diff --git a/ailib/src/main/java/ai/api/AIConfiguration.java b/ailib/src/main/java/ai/api/AIConfiguration.java index 46a2d11..a43be53 100644 --- a/ailib/src/main/java/ai/api/AIConfiguration.java +++ b/ailib/src/main/java/ai/api/AIConfiguration.java @@ -143,6 +143,8 @@ public static SupportedLanguages fromLanguageTag(final String languageTag) { private boolean voiceActivityDetectionEnabled = true; + private boolean normalizeInputSound = false; + public AIConfiguration(final String apiKey, final String subscriptionKey, final SupportedLanguages language, final RecognitionEngine recognitionEngine) { this.apiKey = apiKey; this.subscriptionKey = subscriptionKey; @@ -229,6 +231,19 @@ public void setServiceUrl(final String serviceUrl) { this.serviceUrl = serviceUrl; } + /** + * With setting this field to true you can enable sound amplification if it's too quiet. This option improves recognition quality on some devices. + * This option does not affect System recognition. + * @param normalizeInputSound + */ + public void setNormalizeInputSound(final boolean normalizeInputSound) { + this.normalizeInputSound = normalizeInputSound; + } + + public boolean isNormalizeInputSound() { + return normalizeInputSound; + } + public String getQuestionUrl() { if (!TextUtils.isEmpty(protocolVersion)) { return String.format("%s%s?v=%s", serviceUrl, QUESTION_ENDPOINT, protocolVersion); diff --git a/ailib/src/main/java/ai/api/services/SpeaktoitRecognitionServiceImpl.java b/ailib/src/main/java/ai/api/services/SpeaktoitRecognitionServiceImpl.java index 118b4ff..22850be 100644 --- a/ailib/src/main/java/ai/api/services/SpeaktoitRecognitionServiceImpl.java +++ b/ailib/src/main/java/ai/api/services/SpeaktoitRecognitionServiceImpl.java @@ -33,6 +33,9 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.ShortBuffer; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -247,11 +250,20 @@ public void run() { private class RecorderStream extends InputStream { + @SuppressWarnings("MagicNumber") + private final float dbLevel = (float) Math.pow(10.0, -1.0 / 20.0); + private final AudioRecord audioRecord; private byte[] bytes; private final Object bytesLock = new Object(); + int max = 0; + int min = 0; + float offset = 0; + float count = 1; + int extent; + private RecorderStream(final AudioRecord audioRecord) { this.audioRecord = audioRecord; } @@ -268,6 +280,9 @@ public int read(@NonNull final byte[] buffer, final int byteOffset, final int by final int bytesRead = audioRecord.read(buffer, byteOffset, byteCount); if (bytesRead > 0) { synchronized (bytesLock) { + if (config.isNormalizeInputSound()) + normalize(buffer, bytesRead); + byte[] temp = bytes; int tempLength = temp != null ? temp.length : 0; bytes = new byte[tempLength + bytesRead]; @@ -292,6 +307,23 @@ public int read(@NonNull final byte[] buffer, final int byteOffset, final int by } return bytesRead != 0 ? bytesRead : AudioRecord.ERROR_INVALID_OPERATION; } + + private void normalize(@NonNull final byte[] buffer, final int bytesRead) { + final ByteBuffer byteBuffer = ByteBuffer.wrap(buffer, 0, bytesRead).order(ByteOrder.LITTLE_ENDIAN); + final ShortBuffer shorts = byteBuffer.asShortBuffer(); + for (int i = 0; i < shorts.limit(); i++) { + final short sample = shorts.get(i); + max = Math.max(max, sample); + min = Math.min(min, sample); + offset = (count - 1) / count * offset + sample / count; + count += 1; + } + extent = Math.max(Math.abs(max), Math.abs(min)); + final float factor = dbLevel * Short.MAX_VALUE / extent; + for (int i = 0; i < shorts.limit(); i++) { + byteBuffer.putShort((short) ((shorts.get(i) - offset) * factor)); + } + } } private class RecognizeTask extends AsyncTask {