Made input sound normalization by option in AIConfiguration.

dialogflow · Mar 15, 2016 · b7eae46 · b7eae46
1 parent 2c0c4e1
commit b7eae46
Show file tree

Hide file tree

Showing 2 changed files with 47 additions and 0 deletions.
diff --git a/ailib/src/main/java/ai/api/AIConfiguration.java b/ailib/src/main/java/ai/api/AIConfiguration.java
@@ -143,6 +143,8 @@ public static SupportedLanguages fromLanguageTag(final String languageTag) {
 
     private boolean voiceActivityDetectionEnabled = true;
 
+    private boolean normalizeInputSound = false;
+
     public AIConfiguration(final String apiKey, final String subscriptionKey, final SupportedLanguages language, final RecognitionEngine recognitionEngine) {
         this.apiKey = apiKey;
         this.subscriptionKey = subscriptionKey;
@@ -229,6 +231,19 @@ public void setServiceUrl(final String serviceUrl) {
         this.serviceUrl = serviceUrl;
     }
 
+    /**
+     * With setting this field to true you can enable sound amplification if it's too quiet. This option improves recognition quality on some devices.
+     * This option does not affect System recognition.
+     * @param normalizeInputSound
+     */
+    public void setNormalizeInputSound(final boolean normalizeInputSound) {
+        this.normalizeInputSound = normalizeInputSound;
+    }
+
+    public boolean isNormalizeInputSound() {
+        return normalizeInputSound;
+    }
+
     public String getQuestionUrl() {
         if (!TextUtils.isEmpty(protocolVersion)) {
             return String.format("%s%s?v=%s", serviceUrl, QUESTION_ENDPOINT, protocolVersion);

diff --git a/ailib/src/main/java/ai/api/services/SpeaktoitRecognitionServiceImpl.java b/ailib/src/main/java/ai/api/services/SpeaktoitRecognitionServiceImpl.java
@@ -33,6 +33,9 @@
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.ShortBuffer;
 import java.util.List;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -247,11 +250,20 @@ public void run() {
 
     private class RecorderStream extends InputStream {
 
+        @SuppressWarnings("MagicNumber")
+        private final float dbLevel = (float) Math.pow(10.0, -1.0 / 20.0);
+
         private final AudioRecord audioRecord;
 
         private byte[] bytes;
         private final Object bytesLock = new Object();
 
+        int max = 0;
+        int min = 0;
+        float offset = 0;
+        float count = 1;
+        int extent;
+
         private RecorderStream(final AudioRecord audioRecord) {
             this.audioRecord = audioRecord;
         }
@@ -268,6 +280,9 @@ public int read(@NonNull final byte[] buffer, final int byteOffset, final int by
             final int bytesRead = audioRecord.read(buffer, byteOffset, byteCount);
             if (bytesRead > 0) {
                 synchronized (bytesLock) {
+                    if (config.isNormalizeInputSound())
+                        normalize(buffer, bytesRead);
+
                     byte[] temp = bytes;
                     int tempLength = temp != null ? temp.length : 0;
                     bytes = new byte[tempLength + bytesRead];
@@ -292,6 +307,23 @@ public int read(@NonNull final byte[] buffer, final int byteOffset, final int by
             }
             return bytesRead != 0 ? bytesRead : AudioRecord.ERROR_INVALID_OPERATION;
         }
+
+        private void normalize(@NonNull final byte[] buffer, final int bytesRead) {
+            final ByteBuffer byteBuffer = ByteBuffer.wrap(buffer, 0, bytesRead).order(ByteOrder.LITTLE_ENDIAN);
+            final ShortBuffer shorts = byteBuffer.asShortBuffer();
+            for (int i = 0; i < shorts.limit(); i++) {
+                final short sample = shorts.get(i);
+                max = Math.max(max, sample);
+                min = Math.min(min, sample);
+                offset = (count - 1) / count * offset + sample / count;
+                count += 1;
+            }
+            extent = Math.max(Math.abs(max), Math.abs(min));
+            final float factor = dbLevel * Short.MAX_VALUE / extent;
+            for (int i = 0; i < shorts.limit(); i++) {
+                byteBuffer.putShort((short) ((shorts.get(i) - offset) * factor));
+            }
+        }
     }
 
     private class RecognizeTask extends AsyncTask<Void, Void, AIResponse> {