Merge pull request #44 from linto-ai/next

merge next -> master
linto-ai · Sep 27, 2024 · a146aa4 · a146aa4
2 parents defb842 + f9832b6
commit a146aa4
Show file tree

Hide file tree

Showing 24 changed files with 1,140 additions and 277 deletions.
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -20,7 +20,7 @@ def buildDockerfile(main_folder, dockerfilePath, image_name, version, changedFil
 pipeline {
     agent any
     environment {
-        DOCKER_HUB_REPO_PYBK   = "lintoai/linto-diarization-pybk"
+        // DOCKER_HUB_REPO_PYBK   = "lintoai/linto-diarization-pybk" // DEPRECATED
         DOCKER_HUB_REPO_PYANNOTE = "lintoai/linto-diarization-pyannote"
         DOCKER_HUB_REPO_SIMPLE = "lintoai/linto-diarization-simple"
     }
@@ -35,7 +35,14 @@ pipeline {
                 script {
                     def changedFiles = sh(returnStdout: true, script: 'git diff --name-only HEAD^ HEAD').trim()
                     echo "My changed files: ${changedFiles}"
-
+
+                    // // DEPRECATED
+                    // version = sh(
+                    //     returnStdout: true, 
+                    //     script: "awk -v RS='' '/#/ {print; exit}' pybk/RELEASE.md | head -1 | sed 's/#//' | sed 's/ //'"
+                    // ).trim()
+                    // buildDockerfile('pybk', 'pybk/Dockerfile', env.DOCKER_HUB_REPO_PYBK, version, changedFiles)
+
                     version = sh(
                         returnStdout: true, 
                         script: "awk -v RS='' '/#/ {print; exit}' simple/RELEASE.md | head -1 | sed 's/#//' | sed 's/ //'"
@@ -47,12 +54,6 @@ pipeline {
                         script: "awk -v RS='' '/#/ {print; exit}' pyannote/RELEASE.md | head -1 | sed 's/#//' | sed 's/ //'"
                     ).trim()
                     buildDockerfile('pyannote', 'pyannote/Dockerfile', env.DOCKER_HUB_REPO_PYANNOTE, version, changedFiles)
-
-                    version = sh(
-                        returnStdout: true, 
-                        script: "awk -v RS='' '/#/ {print; exit}' pybk/RELEASE.md | head -1 | sed 's/#//' | sed 's/ //'"
-                    ).trim()
-                    buildDockerfile('pybk', 'pybk/Dockerfile', env.DOCKER_HUB_REPO_PYBK, version, changedFiles)
                 }
             }
         }
@@ -69,9 +70,9 @@ pipeline {
 
                     version = 'latest-unstable'
 
+                    // buildDockerfile('pybk', 'pybk/Dockerfile', env.DOCKER_HUB_REPO_PYBK, version, changedFiles) // DEPRECATED
                     buildDockerfile('simple', 'simple/Dockerfile', env.DOCKER_HUB_REPO_SIMPLE, version, changedFiles)
                     buildDockerfile('pyannote', 'pyannote/Dockerfile', env.DOCKER_HUB_REPO_PYANNOTE, version, changedFiles)
-                    buildDockerfile('pybk', 'pybk/Dockerfile', env.DOCKER_HUB_REPO_PYBK, version, changedFiles)
                 }
             }
         }

diff --git a/README.md b/README.md
@@ -1,27 +1,29 @@
 # LinTO-diarization
 
-LinTO-diarization is an API for Speaker Diarization,
-which can currently work with several technologies.
+LinTO-diarization is an API for Speaker Diarization (segmenting an audio stream into homogeneous segments according to the speaker identity),
+with some capabilities for Speaker Identification when audio samples of known speakers are provided.
+
+LinTO-diarization can currently work with several technologies.
 The following families of technologies are currently supported (please refer to respective documentation for more details):
-* [PyBK](pybk/README.md) 
 * [PyAnnote](pyannote/README.md)
 * [simple_diarizer](simple/README.md)
+* [PyBK](pybk/README.md) (deprecated)
 
 LinTO-diarization can either be used as a standalone transcription service or deployed within a micro-services infrastructure using a message broker connector.
 
 ## Quick test
 
 Below are examples of how to test diarization with "simple_diarizer", on Linux OS with docker installed.
 
-"simple_diarizer" is the recommended diarization method.
-In what follow, you can replace "simple" by "pybk" or "pyannote" to try other methods.
+"PyAnnote" is the recommended diarization method.
+In what follow, you can replace "pyannote" by "simple" or "pybk" to try other methods.
 
 ### HTTP Server
 
 1. If needed, build docker image 
 
 ```bash
-docker build . -t linto-diarization-simple:latest -f simple/Dockerfile
+docker build . -t linto-diarization-pyannote:latest -f pyannote/Dockerfile
 ```
 
 2. Launch docker container (and keep it running)
@@ -31,13 +33,13 @@ docker run -it --rm \
     -p 8080:80 \
     --shm-size=1gb --tmpfs /run/user/0 \
     --env SERVICE_MODE=http \
-    linto-diarization-simple:latest
+    linto-diarization-pyannote:latest
 ```
 
 3. Open the swagger in a browser: [http://localhost:8080/docs](http://localhost:8080/docs)
    Unfold `/diarization` route and click "Try it out". Then
    - Choose a file
-   - Specify either `spk_number` (Fixed number of speaker) or `max_speaker` (Max number of speakers)
+   - Specify either `speaker_count` (Fixed number of speaker) or `max_speaker` (Max number of speakers)
    - Click `Execute`
 
 ### Celery worker
@@ -47,7 +49,7 @@ In the following we assume we want to test on an audio that is in `$HOME/test.wa
 1. If needed, build docker image 
 
 ```bash
-docker build . -t linto-diarization-simple:latest -f simple/Dockerfile
+docker build . -t linto-diarization-pyannote:latest -f pyannote/Dockerfile
 ```
 
 2. Run Redis server
@@ -69,7 +71,7 @@ docker run -it --rm \
     --env SERVICES_BROKER=redis://172.17.0.1:6379 \
     --env BROKER_PASS= \
     --env CONCURRENCY=2 \
-    linto-diarization-simple:latest
+    linto-diarization-pyannote:latest
 ```
 
 3. Testing with a given audio file can be done using python3 (with packages `celery` and `redis` installed).

diff --git a/celery_app/tasks.py b/celery_app/tasks.py
@@ -6,12 +6,15 @@
 
 @celery.task(name="diarization_task")
 def diarization_task(
-    file_name: str, speaker_count: int = None, max_speaker: int = None
+    file: str,
+    speaker_count: int = None,
+    max_speaker: int = None,
+    speaker_names: str = None,
 ):
     """transcribe_task do a synchronous call to the transcribe worker API"""
-    logger.info(f"Received transcription task for {file_name} ({speaker_count=}, {max_speaker=})")
+    logger.info(f"Received transcription task for {file} ({speaker_count=}, {max_speaker=})")
 
-    file_path = os.path.join("/opt/audio", file_name)
+    file_path = os.path.join("/opt/audio", file)
     if not os.path.isfile(file_path):
         raise Exception("Could not find ressource {}".format(file_path))
 
@@ -26,8 +29,9 @@ def diarization_task(
     try:
         result = diarizationworker.run(
             file_path,
-            number_speaker=speaker_count,
+            speaker_count=speaker_count,
             max_speaker=max_speaker,
+            speaker_names=speaker_names,
         )
     except Exception as e:
         import traceback

diff --git a/document/swagger.yml b/document/swagger.yml
@@ -30,24 +30,28 @@ paths:
         description: "Audio File - WaveFile"
         required: true
         type: "file"
-      - name: "spk_number"
+      - name: "speaker_count"
         in: "formData"
-        description: "Fixed number of speaker (Optional)"
+        description: "Fixed number of speakers (Optional)"
         required: false
         type: integer
       - name: "max_speaker"
         in: "formData"
-        description: "Max number of speakers if spk_number is unknown"
+        description: "Maximum number of speakers, if the number of speakers is unknown"
         required: false
-        type: integer
+        type: integer      
+      - name: "speaker_names"
+        description: "List of candidate speakers (if samples were provided to perform speaker identification)"
+        in: formData
+        type: string        
       responses:
         200:
           description: Successfully transcribe the audio
         400:
           description: Request error
         500:
           description: Server error
-
+     
   /healthcheck:
     get:
       tags:

diff --git a/http_server/ingress.py b/http_server/ingress.py
@@ -43,12 +43,13 @@ def transcribe():
 
         # get input file
         if "file" in request.files.keys():
-            spk_number = request.form.get("spk_number", None)
-            if spk_number is not None:
-                spk_number = int(spk_number)
-            max_spk_number = request.form.get("max_speaker", None)
-            if max_spk_number is not None:
-                max_spk_number = int(max_spk_number)
+            speaker_count = request.form.get("speaker_count", None)
+            if speaker_count is not None:
+                speaker_count = int(speaker_count)
+            max_speaker = request.form.get("max_speaker", None)
+            if max_speaker is not None:
+                max_speaker = int(max_speaker)
+            speaker_names = request.form.get('speaker_names')            #speakers input will be ["jean-pierre","abdel","ilyes-rebai","samir-tanfous"]  
             start_t = time()
         else:
             raise ValueError("No audio file was uploaded")
@@ -61,7 +62,7 @@ def transcribe():
     # Diarization
     try:
         result = diarizationworker.run(
-            request.files["file"], number_speaker=spk_number, max_speaker=max_spk_number
+            request.files["file"], speaker_count=speaker_count, max_speaker=max_speaker, speaker_names=speaker_names
         )
     except Exception as e:
         import traceback