Skip to content

Commit

Permalink
Made input sound normalization by option in AIConfiguration.
Browse files Browse the repository at this point in the history
  • Loading branch information
Gusarov Roman committed Mar 15, 2016
1 parent 2c0c4e1 commit b7eae46
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 0 deletions.
15 changes: 15 additions & 0 deletions ailib/src/main/java/ai/api/AIConfiguration.java
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ public static SupportedLanguages fromLanguageTag(final String languageTag) {

private boolean voiceActivityDetectionEnabled = true;

private boolean normalizeInputSound = false;

public AIConfiguration(final String apiKey, final String subscriptionKey, final SupportedLanguages language, final RecognitionEngine recognitionEngine) {
this.apiKey = apiKey;
this.subscriptionKey = subscriptionKey;
Expand Down Expand Up @@ -229,6 +231,19 @@ public void setServiceUrl(final String serviceUrl) {
this.serviceUrl = serviceUrl;
}

/**
* With setting this field to true you can enable sound amplification if it's too quiet. This option improves recognition quality on some devices.
* This option does not affect System recognition.
* @param normalizeInputSound
*/
public void setNormalizeInputSound(final boolean normalizeInputSound) {
this.normalizeInputSound = normalizeInputSound;
}

public boolean isNormalizeInputSound() {
return normalizeInputSound;
}

public String getQuestionUrl() {
if (!TextUtils.isEmpty(protocolVersion)) {
return String.format("%s%s?v=%s", serviceUrl, QUESTION_ENDPOINT, protocolVersion);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.ShortBuffer;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
Expand Down Expand Up @@ -247,11 +250,20 @@ public void run() {

private class RecorderStream extends InputStream {

@SuppressWarnings("MagicNumber")
private final float dbLevel = (float) Math.pow(10.0, -1.0 / 20.0);

private final AudioRecord audioRecord;

private byte[] bytes;
private final Object bytesLock = new Object();

int max = 0;
int min = 0;
float offset = 0;
float count = 1;
int extent;

private RecorderStream(final AudioRecord audioRecord) {
this.audioRecord = audioRecord;
}
Expand All @@ -268,6 +280,9 @@ public int read(@NonNull final byte[] buffer, final int byteOffset, final int by
final int bytesRead = audioRecord.read(buffer, byteOffset, byteCount);
if (bytesRead > 0) {
synchronized (bytesLock) {
if (config.isNormalizeInputSound())
normalize(buffer, bytesRead);

byte[] temp = bytes;
int tempLength = temp != null ? temp.length : 0;
bytes = new byte[tempLength + bytesRead];
Expand All @@ -292,6 +307,23 @@ public int read(@NonNull final byte[] buffer, final int byteOffset, final int by
}
return bytesRead != 0 ? bytesRead : AudioRecord.ERROR_INVALID_OPERATION;
}

private void normalize(@NonNull final byte[] buffer, final int bytesRead) {
final ByteBuffer byteBuffer = ByteBuffer.wrap(buffer, 0, bytesRead).order(ByteOrder.LITTLE_ENDIAN);
final ShortBuffer shorts = byteBuffer.asShortBuffer();
for (int i = 0; i < shorts.limit(); i++) {
final short sample = shorts.get(i);
max = Math.max(max, sample);
min = Math.min(min, sample);
offset = (count - 1) / count * offset + sample / count;
count += 1;
}
extent = Math.max(Math.abs(max), Math.abs(min));
final float factor = dbLevel * Short.MAX_VALUE / extent;
for (int i = 0; i < shorts.limit(); i++) {
byteBuffer.putShort((short) ((shorts.get(i) - offset) * factor));
}
}
}

private class RecognizeTask extends AsyncTask<Void, Void, AIResponse> {
Expand Down

0 comments on commit b7eae46

Please sign in to comment.