Merge pull request #42 from linto-ai/next

diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,4 @@
-start_container.sh
 .env*
-test/*
 tmp*
+test.log
 __pycache__
diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@ LinTO-STT can either be used as a standalone transcription service or deployed w
 The following families of STT models are currently supported (please refer to respective documentation for more details):
 * [Kaldi models](kaldi/README.md) 
 * [Whisper models](whisper/README.md)
+* [Test scripts](test/README.md)
 
 LinTO-STT can either be used as a standalone transcription service or deployed within a micro-services infrastructure using a message broker connector.
 

diff --git a/http_server/ingress.py b/http_server/ingress.py
@@ -9,7 +9,7 @@
 from flask import Flask, json, request
 from serving import GeventServing, GunicornServing
 from stt import logger as stt_logger
-from stt.processing import MODEL, USE_GPU, decode, load_wave_buffer
+from stt.processing import MODEL, USE_GPU, decode, load_wave_buffer, warmup
 from swagger import setupSwaggerUI
 
 app = Flask("__stt-standalone-worker__")
@@ -24,7 +24,7 @@
 logger.setLevel(logging.INFO)
 
 # If websocket streaming route is enabled
-if os.environ.get("ENABLE_STREAMING", False) in [True, "true", 1]:
+if os.environ.get("ENABLE_STREAMING", "false").lower() in ["true", "1"]:
     from flask_sock import Sock
     from stt.processing.streaming import ws_streaming
 
@@ -84,7 +84,9 @@ def transcribe():
 
         logger.error(traceback.format_exc())
         logger.error(repr(error))
-        return "Server Error: {}".format(str(error)), 400 if isinstance(error, ValueError) else 500
+        return "Server Error: {}".format(str(error)), (
+            400 if isinstance(error, ValueError) else 500
+        )
 
 
 @app.errorhandler(405)
@@ -128,12 +130,18 @@ def server_error(error):
         serving_type = GunicornServing
         logger.debug("Serving with gunicorn")
 
+    def post_worker_init(worker):
+        logger.info(f"Worker {worker.pid} init")
+        warmup()
+        logger.info(f"Worker {worker.pid} fully initialized")
+
     serving = serving_type(
         app,
         {
             "bind": f"0.0.0.0:{args.service_port}",
             "workers": args.workers,
             "timeout": 3600 * 24,
+            "post_worker_init": post_worker_init,
         },
     )
     logger.info(args)

diff --git a/kaldi/.envdefault b/kaldi/.envdefault
@@ -7,8 +7,8 @@ ENABLE_STREAMING=true
 
 # TASK PARAMETERS
 SERVICE_NAME=stt
-SERVICES_BROKER=redis://192.168.0.1:6379
-BROKER_PASS=password
+SERVICES_BROKER=redis://172.17.0.1:6379
+BROKER_PASS=
 
 # WEBSOCKET PARAMETERS
 STREAMING_PORT=80

diff --git a/kaldi/README.md b/kaldi/README.md
@@ -68,7 +68,7 @@ cp kaldi/.envdefault kaldi/.env
 
 STT can be used three ways:
 * Through an [HTTP API](#http-server) using the **http**'s mode.
-* Through a [message broker](#micro-service-within-linto-platform-stack) using the **task**'s mode.
+* Through a [message broker](#celery-task) using the **task**'s mode.
 * Through a [websocket server](#websocket-server) **websocket**'s mode.
 
 Mode is specified using the .env value or environment variable ```SERVING_MODE```.
@@ -99,7 +99,7 @@ This will run a container providing an [HTTP API](#http-api) binded on the host
 | LM_PATH | Path to the language model on the host machine mounted to /opt/LM | /my/path/to/models/fr-FR_big-v2.2.0 |
 | MODEL_PATH | Path to the model (using MODEL_TYPE=vosk) mounted to /opt/model | /my/path/to/models/vosk-model |
 
-### Micro-service within LinTO-Platform stack
+### Celery task
 The TASK serving mode connect a celery worker to a message broker.
 
 The SERVICE_MODE value in the .env should be set to ```task```.
@@ -205,7 +205,10 @@ On a successfull transcription the returned object is a json object structured a
 * The <ins>confidence</ins> field contains the overall confidence for the transcription. (0.0 if with_metadata=False)
 
 
-## Test
+## Tests
+
+See [Test scripts](../test/README.md) for more details about testing. 
+
 ### Curl
 You can test you http API using curl:
 ```bash 

diff --git a/kaldi/RELEASE.md b/kaldi/RELEASE.md
@@ -1,3 +1,6 @@
+#  1.0.2    
+- Fix task mode for kaldi by updating SERVICES_BROKER and BROKER_PASS in .envdefault
+
 #  1.0.1
 - Fix streaming mode (websocket) in linto-stt-kaldi
 

diff --git a/kaldi/docker-entrypoint.sh b/kaldi/docker-entrypoint.sh
@@ -25,7 +25,7 @@ fi
 # Launch parameters, environement variables and dependencies check
 if [ -z "$SERVICE_MODE" ]
 then
-    echo "ERROR: Must specify a serving mode: [ http | task | websocket ]"
+    echo "ERROR: Must specify an environment variable SERVICE_MODE in [ http | task | websocket ] (None was specified)"
     exit -1
 else
     if [ "$SERVICE_MODE" = "http" ] 
@@ -48,7 +48,7 @@ else
         echo "Running Websocket server on port ${STREAMING_PORT:=80}"
         python websocket/websocketserver.py
     else
-        echo "ERROR: Wrong serving command: $1"
+        echo "ERROR: Must specify an environment variable SERVICE_MODE in [ http | task | websocket ] (got SERVICE_MODE=$SERVICE_MODE)"
         exit -1
     fi
 fi

diff --git a/kaldi/stt/processing/__init__.py b/kaldi/stt/processing/__init__.py
@@ -29,5 +29,8 @@
     sys.exit(-1)
 logger.info("Acoustic model and decoding graph loaded. (t={}s)".format(time() - start))
 
+def warmup():
+    pass
+
 # Not implemented yet in Kaldi
 USE_GPU = False
diff --git a/test/README.md b/test/README.md
@@ -0,0 +1,70 @@
+# LinTO-STT-Tests
+
+## Use tests
+
+### HTTP - transcribe
+
+You can test your http server by using:
+
+```bash
+test_deployment.sh
+```
+
+> ⚠️ Be sure to check that you use the right port (default port for testing: 8080).         
+
+### HTTP - streaming
+
+You can test your http streaming route by using:
+```bash
+test_streaming.py
+```
+Be sure to have a working microphone.
+> ⚠️ Be sure to check that you use the right port (default port for testing: 8080). 
+
+If you want to test the streaming on a file:
+```bash
+test_streaming.py --audio_file bonjour.wav
+```
+
+### Task
+
+You can test your deployment of the task service mode by using:
+
+```bash
+test_celery.py AUDIO.wav
+```
+
+with AUDIO.wav the file you want to test on, for example, you can use bonjour.wav. 
+
+> ⚠️ Be sure to check that you use the same port in your .env and in test_celery.py (default port for testing: 6379)
+
+
+## Unit tests
+
+You will need to install:
+```bash
+pip3 install ddt
+```
+
+To test the Kaldi models, you will need to download the models (see [Kaldi models](../kaldi/README.md)) and then fill the AM_PATH and LM_PATH fields in the [test_config.ini file](test_config.ini).
+> ⚠️ If you don't specify the models, the tests about Kaldi will fail.
+
+To launch the test you can do :
+```bash
+python test/test.py
+```
+
+> ⚠️ Be sure to launch it from the root folder of the repository.
+
+If you want the test to stop at the first fail use the -f flag:
+```bash
+python test/test.py -f
+```
+If you want to run a subset of test you can use -k with a part of a test name. for example only kaldi tests:
+```bash
+python test/test.py -k kaldi
+```
+or test with VAD=auditok, DEVICE=cuda:
+```bash
+python test/test.py -k VAD_auditok_DEVICE_cuda
+```