From 8c6aec154a813d4b39519a1308b0e697e882c033 Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Thu, 21 Nov 2024 00:18:08 +0530
Subject: [PATCH 01/11] feat(examples): basic functionality

---
 .vscode/settings.json                        |   3 +-
 examples/quivr-whisper/.gitignore            |   1 +
 examples/quivr-whisper/app.py                |  85 +++--
 examples/quivr-whisper/pyproject.toml        |   2 +-
 examples/quivr-whisper/requirements-dev.lock |   2 +
 examples/quivr-whisper/requirements.lock     |   2 +
 examples/quivr-whisper/static/app.js         | 319 +++++++++----------
 examples/quivr-whisper/static/styles.css     |  45 +++
 examples/quivr-whisper/templates/index.html  |   7 +-
 9 files changed, 257 insertions(+), 209 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 86370d352832..bb7120ff5c99 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -44,5 +44,6 @@
     "reportUnusedImport": "warning",
     "reportGeneralTypeIssues": "warning"
   },
-  "makefile.configureOnOpen": false
+  "makefile.configureOnOpen": false,
+  "djlint.showInstallError": false
 }
diff --git a/examples/quivr-whisper/.gitignore b/examples/quivr-whisper/.gitignore
index 4c49bd78f1d0..727370b46a43 100644
--- a/examples/quivr-whisper/.gitignore
+++ b/examples/quivr-whisper/.gitignore
@@ -1 +1,2 @@
 .env
+uploads
\ No newline at end of file
diff --git a/examples/quivr-whisper/app.py b/examples/quivr-whisper/app.py
index 1ae27eac2399..05401706b54c 100644
--- a/examples/quivr-whisper/app.py
+++ b/examples/quivr-whisper/app.py
@@ -4,37 +4,78 @@
 import os
 import requests
 from dotenv import load_dotenv
+from quivr_core import Brain
+from quivr_core.rag.entities.config import RetrievalConfig
 from tempfile import NamedTemporaryFile
+from werkzeug.utils import secure_filename
+from asyncio import to_thread
+import asyncio
+
+
+UPLOAD_FOLDER = 'uploads'
+ALLOWED_EXTENSIONS = {'txt'}
+
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 
 app = Flask(__name__)
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 load_dotenv()
-openai.api_key = os.getenv("OPENAI_API_KEY")
-
-quivr_token = os.getenv("QUIVR_API_KEY", "")
-quivr_chat_id = os.getenv("QUIVR_CHAT_ID", "")
-quivr_brain_id = os.getenv("QUIVR_BRAIN_ID", "")
-quivr_url = (
-    os.getenv("QUIVR_URL", "https://api.quivr.app")
-    + f"/chat/{quivr_chat_id}/question?brain_id={quivr_brain_id}"
-)
 
-headers = {
-    "Content-Type": "application/json",
-    "Authorization": f"Bearer {quivr_token}",
-}
+openai.api_key = os.getenv("OPENAI_API_KEY")
 
+def allowed_file(filename):
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 
 @app.route("/")
 def index():
     return render_template("index.html")
 
 
-@app.route("/transcribe", methods=["POST"])
-def transcribe_audio():
+def run_in_event_loop(func, *args, **kwargs):
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    if asyncio.iscoroutinefunction(func):
+        result = loop.run_until_complete(func(*args, **kwargs))
+    else:
+        result = func(*args, **kwargs)
+    loop.close()
+    return result
+
+
+@app.route('/ask', methods=['POST'])
+async def ask():
+    if 'file' not in request.files:
+        return "No file part", 400
+
+    file = request.files['file']
+
+    if file.filename == '':
+        return "No selected file", 400
+    if not (file and file.filename and allowed_file(file.filename)):
+        return "Invalid file type", 400
+
+    filename = secure_filename(file.filename)
+    filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+    file.save(filepath)
+
+    print("Uploading file...")
+    brain: Brain = await to_thread(run_in_event_loop, Brain.from_files, name="user_brain", file_paths=[filepath])
+
+    print(f"{filepath} saved to brain.")
+
+    print("Speech to text...")
     audio_file = request.files["audio_data"]
     transcript = transcribe_audio_file(audio_file)
-    quivr_response = ask_quivr_question(transcript)
-    audio_base64 = synthesize_speech(quivr_response)
+    print("Transcript result: ", transcript)
+
+    print("Getting response...")
+    quivr_response = await to_thread(run_in_event_loop, brain.ask, transcript)
+
+    print("Text to speech...")
+    audio_base64 = synthesize_speech(quivr_response.answer)
+
+    print("Done")
     return jsonify({"audio_base64": audio_base64})
 
 
@@ -55,16 +96,6 @@ def transcribe_audio_file(audio_file):
     return transcript
 
 
-def ask_quivr_question(transcript):
-    response = requests.post(quivr_url, headers=headers, json={"question": transcript})
-    if response.status_code == 200:
-        quivr_response = response.json().get("assistant")
-        return quivr_response
-    else:
-        print(f"Error from Quivr API: {response.status_code}, {response.text}")
-        return "Sorry, I couldn't understand that."
-
-
 def synthesize_speech(text):
     speech_response = openai.audio.speech.create(
         model="tts-1", voice="nova", input=text
diff --git a/examples/quivr-whisper/pyproject.toml b/examples/quivr-whisper/pyproject.toml
index 457e6c90e392..692d1df09e8c 100644
--- a/examples/quivr-whisper/pyproject.toml
+++ b/examples/quivr-whisper/pyproject.toml
@@ -6,7 +6,7 @@ authors = [
     { name = "Stan Girard", email = "stan@quivr.app" }
 ]
 dependencies = [
-    "flask>=3.1.0",
+    "flask[async]>=3.1.0",
     "openai>=1.54.5",
     "quivr-core>=0.0.24",
 ]
diff --git a/examples/quivr-whisper/requirements-dev.lock b/examples/quivr-whisper/requirements-dev.lock
index 8e93ec1b5ff3..901ea6e170b7 100644
--- a/examples/quivr-whisper/requirements-dev.lock
+++ b/examples/quivr-whisper/requirements-dev.lock
@@ -32,6 +32,8 @@ anyio==4.6.2.post1
     # via httpx
     # via openai
     # via starlette
+asgiref==3.8.1
+    # via flask
 attrs==24.2.0
     # via aiohttp
 backoff==2.2.1
diff --git a/examples/quivr-whisper/requirements.lock b/examples/quivr-whisper/requirements.lock
index 8e93ec1b5ff3..901ea6e170b7 100644
--- a/examples/quivr-whisper/requirements.lock
+++ b/examples/quivr-whisper/requirements.lock
@@ -32,6 +32,8 @@ anyio==4.6.2.post1
     # via httpx
     # via openai
     # via starlette
+asgiref==3.8.1
+    # via flask
 attrs==24.2.0
     # via aiohttp
 backoff==2.2.1
diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index 0d788544dc23..1732bcbc79cd 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -1,209 +1,174 @@
-const recordBtn = document.getElementById('record-btn');
-const audioVisualizer = document.getElementById('audio-visualizer');
-const audioPlayback = document.getElementById('audio-playback');
-const canvasCtx = audioVisualizer.getContext('2d');
+const recordBtn = document.getElementById("record-btn");
+const fileInput = document.getElementById("fileInput");
+
+const audioVisualizer = document.getElementById("audio-visualizer");
+const audioPlayback = document.getElementById("audio-playback");
+const canvasCtx = audioVisualizer.getContext("2d");
 
-let isRecording = false;
-let mediaRecorder;
-let audioChunks = [];
-let audioContext;
 let analyser;
-let dataArray;
 let bufferLength;
+let dataArray;
 let lastAudioLevel = 0;
 let silenceTimer;
 
-recordBtn.addEventListener('click', toggleRecording);
-
-function toggleRecording() {
-    if (!isRecording) {
-        recordBtn.classList.add('hidden');
-        audioVisualizer.classList.remove('hidden');
-        startRecording();
-    } else {
-        audioVisualizer.classList.add('hidden');
-        stopRecording();
-    }
-}
-
-function drawWaveform() {
-    if (!analyser) return;
-
-    requestAnimationFrame(drawWaveform);
-
-    analyser.getByteTimeDomainData(dataArray);
-
-    canvasCtx.fillStyle = 'rgb(255, 255, 255)';
-    canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
-
-    canvasCtx.lineWidth = 2;
-    canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
-
-    canvasCtx.beginPath();
+let isRecording = false;
 
-    let sliceWidth = audioVisualizer.width * 1.0 / bufferLength;
-    let x = 0;
+if (navigator.mediaDevices) {
+  const constraints = { audio: true };
+  let chunks = [];
+
+  navigator.mediaDevices
+    .getUserMedia(constraints)
+    .then((stream) => {
+      const mediaRecorder = new MediaRecorder(stream);
+
+      const startRecording = () => {
+        mediaRecorder.start();
+        console.log(mediaRecorder.state);
+        console.log("recorder started");
+        // recordBtn.classList.add("hidden");
+        audioVisualizer.classList.remove("hidden");
+
+        const audioContext = new (window.AudioContext ||
+          window.webkitAudioContext)();
+        analyser = audioContext.createAnalyser();
+        const source = audioContext.createMediaStreamSource(stream);
+
+        source.connect(analyser);
+        analyser.fftSize = 2048;
+        bufferLength = analyser.frequencyBinCount;
+        dataArray = new Uint8Array(bufferLength);
+
+        drawWaveform();
+      };
+
+      const stopRecording = () => {
+        mediaRecorder.stop();
+        console.log(mediaRecorder.state);
+        console.log("recorder stopped");
+        audioVisualizer.classList.add("hidden");
+      };
+
+      recordBtn.onclick = () => {
+        if (mediaRecorder.state === "inactive") {
+          startRecording();
+        } else if (mediaRecorder.state === "recording") {
+          stopRecording();
+        }
+      };
 
-    let sum = 0;
+      mediaRecorder.onstop = async (e) => {
+        console.log("STOPP");
 
-    for (let i = 0; i < bufferLength; i++) {
-        let v = dataArray[i] / 128.0;
-        let y = v * audioVisualizer.height / 2;
+        // The mediaRecorder has stopped; now we can process the chunks
+        const audioBlob = new Blob(chunks, { type: "audio/wav" });
+        const formData = new FormData();
 
-        sum += v;
+        formData.append("audio_data", audioBlob);
 
-        if (i === 0) {
-            canvasCtx.moveTo(x, y);
+        // Append the file to the FormData object
+        if (fileInput.files.length > 0) {
+          formData.append("file", fileInput.files[0]);
         } else {
-            canvasCtx.lineTo(x, y);
+          alert("Please select a file.");
+          return;
         }
 
-        x += sliceWidth;
-    }
-
-    canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
-    canvasCtx.stroke();
-
-    let currentAudioLevel = sum / bufferLength;
-
-    if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) {
-        if (!silenceTimer) {
-            silenceTimer = setTimeout(stopRecording, 1000);
-        }
-    } else {
-        clearTimeout(silenceTimer);
-        silenceTimer = null;
-    }
-
-    lastAudioLevel = currentAudioLevel;
-}
-
-async function startRecording() {
-    audioChunks = [];
-    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-    mediaRecorder = new MediaRecorder(stream);
-    mediaRecorder.ondataavailable = event => {
-        audioChunks.push(event.data);
-    };
-    mediaRecorder.start();
-    isRecording = true;
-
-    audioContext = new (window.AudioContext || window.webkitAudioContext)();
-    analyser = audioContext.createAnalyser();
-    const source = audioContext.createMediaStreamSource(stream);
-
-    source.connect(analyser);
-    analyser.fftSize = 2048;
-    bufferLength = analyser.frequencyBinCount;
-    dataArray = new Uint8Array(bufferLength);
-
-    drawWaveform();
-}
-
-function stopRecording() {
-    mediaRecorder.stop();
-    mediaRecorder.onstop = async () => {
-        // The mediaRecorder has stopped; now we can process the chunks
-        const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
-        const formData = new FormData();
-        formData.append('audio_data', audioBlob);
-
         // Now we're sending the audio to the server and waiting for a response
         try {
-            const response = await fetch('/transcribe', {
-                method: 'POST',
-                body: formData
-            });
-            const data = await response.json();
-
-            // Once we have the response, we can source the playback element and play it
-            audioPlayback.src = 'data:audio/wav;base64,' + data.audio_base64;
-            audioPlayback.classList.remove('hidden');
-            audioVisualizer.classList.add('hidden'); // hide the visualizer while playing back the response
-            setupAIResponseVisualization();
-            audioPlayback.onloadedmetadata = () => {
-                // When metadata is loaded, start playback
-                audioPlayback.play();
-                visualizeAIResponse();
-            };
-
-            // We only reset the UI after the audio has finished playing
-            // audioPlayback.onended = () => {
-            //     resetUI();
-            // };
+          const response = await fetch("/ask", {
+            method: "POST",
+            body: formData,
+          });
+          const data = await response.json();
+          console.log(data);
+
+          // Once we have the response, we can source the playback element and play it
+          audioPlayback.src = "data:audio/wav;base64," + data.audio_base64;
+          audioPlayback.classList.remove("hidden");
+          audioVisualizer.classList.add("hidden"); // hide the visualizer while playing back the response
+          // setupAIResponseVisualization();
+          audioPlayback.onloadedmetadata = () => {
+            // When metadata is loaded, start playback
+            audioPlayback.play();
+            // visualizeAIResponse();
+          };
+
+          // We only reset the UI after the audio has finished playing
+          // audioPlayback.onended = () => {
+          //     resetUI();
+          // };
         } catch (error) {
-            console.error('Error during fetch/transcription:', error);
-            resetUI();
+          console.error("Error during fetch/transcription:", error);
+          console.log(error);
+
+          // resetUI();
         } finally {
-            if (analyser) {
-                analyser.disconnect();
-                analyser = null;
-            }
-            isRecording = false;
+          if (analyser) {
+            analyser.disconnect();
+            analyser = null;
+          }
+          isRecording = false;
         }
-    };
-}
-function resetUI() {
-    document.getElementById('record-btn').classList.remove('hidden');
-    document.getElementById('audio-visualizer').classList.add('hidden');
-    document.getElementById('audio-playback').classList.add('hidden');
-    // Reset any other UI elements as necessary
+      };
+
+      mediaRecorder.ondataavailable = (e) => {
+        chunks.push(e.data);
+      };
+    })
+    .catch((err) => {
+      console.error(`The following error occurred: ${err}`);
+    });
 }
 
-function setupAIResponseVisualization() {
-    try {
-        // Create a new audio context for playback if it doesn't exist
-        if (!audioContext) {
-            audioContext = new (window.AudioContext || window.webkitAudioContext)();
-        }
-        // Resume the audio context in case it's in a suspended state
-        audioContext.resume().then(() => {
-            analyser = audioContext.createAnalyser();
-            const source = audioContext.createMediaElementSource(audioPlayback);
-            source.connect(analyser);
-            analyser.connect(audioContext.destination);
-            analyser.fftSize = 2048;
-            bufferLength = analyser.frequencyBinCount;
-            dataArray = new Uint8Array(bufferLength);
-        });
-    } catch (error) {
-        console.error('Error setting up AI response visualization:', error);
-    }
-}
+function drawWaveform() {
+  if (!analyser) return;
 
-function visualizeAIResponse() {
-    const draw = () => {
-        requestAnimationFrame(draw);
+  requestAnimationFrame(drawWaveform);
 
-        analyser.getByteTimeDomainData(dataArray);
+  analyser.getByteTimeDomainData(dataArray);
 
-        canvasCtx.fillStyle = 'rgb(255, 255, 255)';
-        canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
+  canvasCtx.fillStyle = "rgb(255, 255, 255)";
+  canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
 
-        canvasCtx.lineWidth = 2;
-        canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
+  canvasCtx.lineWidth = 2;
+  canvasCtx.strokeStyle = "rgb(0, 0, 0)";
 
-        canvasCtx.beginPath();
+  canvasCtx.beginPath();
 
-        let sliceWidth = audioVisualizer.width * 1.0 / bufferLength;
-        let x = 0;
+  let sliceWidth = (audioVisualizer.width * 1.0) / bufferLength;
+  let x = 0;
 
-        for (let i = 0; i < bufferLength; i++) {
-            let v = dataArray[i] / 128.0;
-            let y = v * audioVisualizer.height / 2;
+  let sum = 0;
 
-            if (i === 0) {
-                canvasCtx.moveTo(x, y);
-            } else {
-                canvasCtx.lineTo(x, y);
-            }
+  for (let i = 0; i < bufferLength; i++) {
+    let v = dataArray[i] / 128.0;
+    let y = (v * audioVisualizer.height) / 2;
 
-            x += sliceWidth;
-        }
+    sum += v;
+
+    if (i === 0) {
+      canvasCtx.moveTo(x, y);
+    } else {
+      canvasCtx.lineTo(x, y);
+    }
+
+    x += sliceWidth;
+  }
+
+  canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
+  canvasCtx.stroke();
 
-        canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
-        canvasCtx.stroke();
-    };
+  let currentAudioLevel = sum / bufferLength;
 
-    draw();
-}
\ No newline at end of file
+  if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) {
+    if (!silenceTimer) {
+      silenceTimer = setTimeout(stopRecording, 1000);
+    }
+  } else {
+    clearTimeout(silenceTimer);
+    silenceTimer = null;
+  }
+
+  lastAudioLevel = currentAudioLevel;
+}
diff --git a/examples/quivr-whisper/static/styles.css b/examples/quivr-whisper/static/styles.css
index b7a0d74d47f0..c5381b83edd6 100644
--- a/examples/quivr-whisper/static/styles.css
+++ b/examples/quivr-whisper/static/styles.css
@@ -1,3 +1,48 @@
+body {
+    color: #f4f4f4
+}
+
+.bg-background {
+    background-color: #252525;
+}
+
+.paper {
+    background-color: #1f1f1f;
+}
+
+.bg-primary {
+    background-color: #6142d4;
+}
+
+/* 
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme.light]
+    background = "#fcfcfc"
+    paper = "#f8f8f8"
+
+    [UI.theme.light.primary]
+        main = "#6142d4"
+        dark = "#6e53cf"
+        light = "#6e53cf30"
+    [UI.theme.light.text]
+        primary = "#1f1f1f"
+        secondary = "#818080"
+
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    background = "#252525"
+    paper = "#1f1f1f"
+
+    [UI.theme.dark.primary]
+        main = "#6142d4"
+        dark = "#6e53cf"
+        light = "#6e53cf30"
+    [UI.theme.dark.text]
+        primary = "#f4f4f4"
+        secondary = "#c8c8c8"
+
+*/
+
 .loader {
     border: 4px solid #f3f3f3;
     border-radius: 50%;
diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html
index 6f508034c5e8..f4df5b887796 100644
--- a/examples/quivr-whisper/templates/index.html
+++ b/examples/quivr-whisper/templates/index.html
@@ -9,11 +9,12 @@
     <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
 </head>
 
-<body class="bg-gray-100 flex flex-col items-center justify-center h-screen">
+<body class="dark bg-background flex flex-col items-center justify-center h-screen">
     <h1 class="text-6xl font-bold mb-8">Quivr.app</h1>
     <div id="app" class="text-center">
-        <button id="record-btn"
-            class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded-full cursor-pointer">
+        <input type="file" name="fileInput" id="fileInput">
+        <button type="button" id="record-btn"
+            class="bg-primary text-white font-bold py-2 px-4 rounded-md cursor-pointer">
             Ask a question to Quivr
         </button>
         <canvas id="audio-visualizer" width="640" height="100"

From 7310769db00a863f8ea856e465e2dc48aa1ff30d Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Thu, 21 Nov 2024 00:35:23 +0530
Subject: [PATCH 02/11] feat(examples): audio vis

---
 examples/quivr-whisper/static/app.js        | 139 ++++++++++----------
 examples/quivr-whisper/templates/index.html |   6 +-
 2 files changed, 74 insertions(+), 71 deletions(-)

diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index 1732bcbc79cd..4a608aedb874 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -10,6 +10,8 @@ let bufferLength;
 let dataArray;
 let lastAudioLevel = 0;
 let silenceTimer;
+let silenceThreshold = 0.05; // Adjustable silence threshold
+let silenceDuration = 1500; // Milliseconds of silence before stopping
 
 let isRecording = false;
 
@@ -22,11 +24,72 @@ if (navigator.mediaDevices) {
     .then((stream) => {
       const mediaRecorder = new MediaRecorder(stream);
 
+      const drawWaveform = () => {
+        if (!analyser) return;
+
+        requestAnimationFrame(drawWaveform);
+
+        analyser.getByteTimeDomainData(dataArray);
+
+        canvasCtx.fillStyle = "rgb(255, 255, 255)";
+        canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
+
+        canvasCtx.lineWidth = 2;
+        canvasCtx.strokeStyle = "rgb(0, 0, 0)";
+
+        canvasCtx.beginPath();
+
+        let sliceWidth = (audioVisualizer.width * 1.0) / bufferLength;
+        let x = 0;
+
+        let sum = 0;
+        let maxAmplitude = 0;
+
+        for (let i = 0; i < bufferLength; i++) {
+          let v = dataArray[i] / 128.0 - 1; // Normalize to [-1, 1]
+          let y = (v * audioVisualizer.height) / 2;
+
+          // Calculate absolute amplitude
+          sum += Math.abs(v);
+          maxAmplitude = Math.max(maxAmplitude, Math.abs(v));
+
+          if (i === 0) {
+            canvasCtx.moveTo(x, y + audioVisualizer.height / 2);
+          } else {
+            canvasCtx.lineTo(x, y + audioVisualizer.height / 2);
+          }
+
+          x += sliceWidth;
+        }
+
+        canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
+        canvasCtx.stroke();
+
+        // More robust silence detection
+        let averageAmplitude = sum / bufferLength;
+
+        if (
+          isRecording &&
+          averageAmplitude < silenceThreshold &&
+          maxAmplitude < silenceThreshold * 2
+        ) {
+          if (!silenceTimer) {
+            silenceTimer = setTimeout(stopRecording, silenceDuration);
+          }
+        } else {
+          clearTimeout(silenceTimer);
+          silenceTimer = null;
+        }
+
+        lastAudioLevel = averageAmplitude;
+      };
+
       const startRecording = () => {
+        chunks = []; // Reset chunks at start of recording
         mediaRecorder.start();
+        isRecording = true;
         console.log(mediaRecorder.state);
         console.log("recorder started");
-        // recordBtn.classList.add("hidden");
         audioVisualizer.classList.remove("hidden");
 
         const audioContext = new (window.AudioContext ||
@@ -43,10 +106,13 @@ if (navigator.mediaDevices) {
       };
 
       const stopRecording = () => {
-        mediaRecorder.stop();
-        console.log(mediaRecorder.state);
-        console.log("recorder stopped");
-        audioVisualizer.classList.add("hidden");
+        if (isRecording) {
+          mediaRecorder.stop();
+          isRecording = false;
+          console.log(mediaRecorder.state);
+          console.log("recorder stopped due to silence");
+          audioVisualizer.classList.add("hidden");
+        }
       };
 
       recordBtn.onclick = () => {
@@ -87,28 +153,17 @@ if (navigator.mediaDevices) {
           audioPlayback.src = "data:audio/wav;base64," + data.audio_base64;
           audioPlayback.classList.remove("hidden");
           audioVisualizer.classList.add("hidden"); // hide the visualizer while playing back the response
-          // setupAIResponseVisualization();
           audioPlayback.onloadedmetadata = () => {
             // When metadata is loaded, start playback
             audioPlayback.play();
-            // visualizeAIResponse();
           };
-
-          // We only reset the UI after the audio has finished playing
-          // audioPlayback.onended = () => {
-          //     resetUI();
-          // };
         } catch (error) {
           console.error("Error during fetch/transcription:", error);
-          console.log(error);
-
-          // resetUI();
         } finally {
           if (analyser) {
             analyser.disconnect();
             analyser = null;
           }
-          isRecording = false;
         }
       };
 
@@ -120,55 +175,3 @@ if (navigator.mediaDevices) {
       console.error(`The following error occurred: ${err}`);
     });
 }
-
-function drawWaveform() {
-  if (!analyser) return;
-
-  requestAnimationFrame(drawWaveform);
-
-  analyser.getByteTimeDomainData(dataArray);
-
-  canvasCtx.fillStyle = "rgb(255, 255, 255)";
-  canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
-
-  canvasCtx.lineWidth = 2;
-  canvasCtx.strokeStyle = "rgb(0, 0, 0)";
-
-  canvasCtx.beginPath();
-
-  let sliceWidth = (audioVisualizer.width * 1.0) / bufferLength;
-  let x = 0;
-
-  let sum = 0;
-
-  for (let i = 0; i < bufferLength; i++) {
-    let v = dataArray[i] / 128.0;
-    let y = (v * audioVisualizer.height) / 2;
-
-    sum += v;
-
-    if (i === 0) {
-      canvasCtx.moveTo(x, y);
-    } else {
-      canvasCtx.lineTo(x, y);
-    }
-
-    x += sliceWidth;
-  }
-
-  canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
-  canvasCtx.stroke();
-
-  let currentAudioLevel = sum / bufferLength;
-
-  if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) {
-    if (!silenceTimer) {
-      silenceTimer = setTimeout(stopRecording, 1000);
-    }
-  } else {
-    clearTimeout(silenceTimer);
-    silenceTimer = null;
-  }
-
-  lastAudioLevel = currentAudioLevel;
-}
diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html
index f4df5b887796..729e03ea4a6d 100644
--- a/examples/quivr-whisper/templates/index.html
+++ b/examples/quivr-whisper/templates/index.html
@@ -10,9 +10,9 @@
 </head>
 
 <body class="dark bg-background flex flex-col items-center justify-center h-screen">
-    <h1 class="text-6xl font-bold mb-8">Quivr.app</h1>
-    <div id="app" class="text-center">
-        <input type="file" name="fileInput" id="fileInput">
+    <h1 class="text-6xl font-bold mb-8">Quivr</h1>
+    <div id="app" class="text-center flex flex-col gap-2">
+        <input class="" type="file" name="fileInput" required id="fileInput">
         <button type="button" id="record-btn"
             class="bg-primary text-white font-bold py-2 px-4 rounded-md cursor-pointer">
             Ask a question to Quivr

From b8fe3be7dc8fdf1c0b5148a56feef79826c58aae Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Thu, 21 Nov 2024 01:27:21 +0530
Subject: [PATCH 03/11] feat(examples): silence detection

---
 examples/quivr-whisper/static/app.js        | 403 ++++++++++++--------
 examples/quivr-whisper/templates/index.html |   2 +-
 2 files changed, 244 insertions(+), 161 deletions(-)

diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index 4a608aedb874..c3ed0cf3b3e6 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -1,177 +1,260 @@
+// DOM Elements
 const recordBtn = document.getElementById("record-btn");
 const fileInput = document.getElementById("fileInput");
-
 const audioVisualizer = document.getElementById("audio-visualizer");
 const audioPlayback = document.getElementById("audio-playback");
 const canvasCtx = audioVisualizer.getContext("2d");
 
-let analyser;
-let bufferLength;
-let dataArray;
-let lastAudioLevel = 0;
-let silenceTimer;
-let silenceThreshold = 0.05; // Adjustable silence threshold
-let silenceDuration = 1500; // Milliseconds of silence before stopping
-
-let isRecording = false;
-
-if (navigator.mediaDevices) {
-  const constraints = { audio: true };
-  let chunks = [];
-
-  navigator.mediaDevices
-    .getUserMedia(constraints)
-    .then((stream) => {
-      const mediaRecorder = new MediaRecorder(stream);
-
-      const drawWaveform = () => {
-        if (!analyser) return;
-
-        requestAnimationFrame(drawWaveform);
-
-        analyser.getByteTimeDomainData(dataArray);
-
-        canvasCtx.fillStyle = "rgb(255, 255, 255)";
-        canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height);
-
-        canvasCtx.lineWidth = 2;
-        canvasCtx.strokeStyle = "rgb(0, 0, 0)";
-
-        canvasCtx.beginPath();
-
-        let sliceWidth = (audioVisualizer.width * 1.0) / bufferLength;
-        let x = 0;
-
-        let sum = 0;
-        let maxAmplitude = 0;
-
-        for (let i = 0; i < bufferLength; i++) {
-          let v = dataArray[i] / 128.0 - 1; // Normalize to [-1, 1]
-          let y = (v * audioVisualizer.height) / 2;
-
-          // Calculate absolute amplitude
-          sum += Math.abs(v);
-          maxAmplitude = Math.max(maxAmplitude, Math.abs(v));
-
-          if (i === 0) {
-            canvasCtx.moveTo(x, y + audioVisualizer.height / 2);
-          } else {
-            canvasCtx.lineTo(x, y + audioVisualizer.height / 2);
-          }
-
-          x += sliceWidth;
-        }
-
-        canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2);
-        canvasCtx.stroke();
-
-        // More robust silence detection
-        let averageAmplitude = sum / bufferLength;
+// Configuration
+const SILENCE_THRESHOLD = 0.01;
+const SILENCE_DURATION = 1500;
+const FFT_SIZE = 2048;
+
+// State
+const state = {
+  isRecording: false,
+  chunks: [],
+  silenceTimer: null,
+  lastAudioLevel: 0,
+};
+
+// Audio Analysis
+class AudioAnalyzer {
+  constructor() {
+    this.analyser = null;
+    this.dataArray = null;
+    this.bufferLength = null;
+  }
+
+  setup(source, audioContext) {
+    this.analyser = audioContext.createAnalyser();
+    this.analyser.fftSize = FFT_SIZE;
+    source.connect(this.analyser);
+
+    // Change to Float32Array for time domain data
+    this.bufferLength = this.analyser.frequencyBinCount;
+    this.dataArray = new Float32Array(this.bufferLength);
+
+    return this.analyser;
+  }
+
+  setupForPlayback(audioElement, audioContext) {
+    const source = audioContext.createMediaElementSource(audioElement);
+    const analyser = this.setup(source, audioContext);
+    analyser.connect(audioContext.destination);
+    return analyser;
+  }
+
+  cleanup() {
+    if (this.analyser) {
+      this.analyser.disconnect();
+      this.analyser = null;
+    }
+    this.dataArray = null;
+    this.bufferLength = null;
+  }
+}
 
-        if (
-          isRecording &&
-          averageAmplitude < silenceThreshold &&
-          maxAmplitude < silenceThreshold * 2
-        ) {
-          if (!silenceTimer) {
-            silenceTimer = setTimeout(stopRecording, silenceDuration);
-          }
+// Visualization
+class Visualizer {
+  constructor(canvas, analyzer) {
+    this.canvas = canvas;
+    this.ctx = canvas.getContext("2d");
+    this.analyzer = analyzer;
+  }
+
+  draw(currentAnalyser, onSilence) {
+    if (!currentAnalyser) return;
+
+    requestAnimationFrame(() => this.draw(currentAnalyser, onSilence));
+
+    // Use getFloatTimeDomainData instead of getByteTimeDomainData
+    currentAnalyser.getFloatTimeDomainData(this.analyzer.dataArray);
+
+    // Clear canvas
+    this.ctx.fillStyle = "#252525";
+    this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height);
+
+    this.ctx.lineWidth = 2;
+    this.ctx.strokeStyle = "#6142d4";
+    this.ctx.beginPath();
+
+    const sliceWidth = (this.canvas.width * 1.0) / this.analyzer.bufferLength;
+    let x = 0;
+    let sum = 0;
+    let maxAmplitude = 0;
+
+    // Draw waveform
+    for (let i = 0; i < this.analyzer.bufferLength; i++) {
+      // Values are already normalized (-1 to 1), no need to normalize
+      const v = this.analyzer.dataArray[i];
+      const y = (v * this.canvas.height) / 2 + this.canvas.height / 2;
+
+      sum += Math.abs(v);
+      maxAmplitude = Math.max(maxAmplitude, Math.abs(v));
+
+      if (i === 0) {
+        this.ctx.moveTo(x, y);
+      } else {
+        this.ctx.lineTo(x, y);
+      }
+
+      x += sliceWidth;
+    }
+
+    this.ctx.lineTo(this.canvas.width, this.canvas.height / 2);
+    this.ctx.stroke();
+
+    // Check for silence during recording with proper thresholds
+    if (state.isRecording) {
+      const averageAmplitude = sum / this.analyzer.bufferLength;
+      if (averageAmplitude < SILENCE_THRESHOLD) {
+        // Reset silence timer if we detect sound
+        if (averageAmplitude > SILENCE_THRESHOLD / 2) {
+          clearTimeout(state.silenceTimer);
+          state.silenceTimer = null;
         } else {
-          clearTimeout(silenceTimer);
-          silenceTimer = null;
+          onSilence();
         }
+      }
+    }
+  }
+}
 
-        lastAudioLevel = averageAmplitude;
-      };
-
-      const startRecording = () => {
-        chunks = []; // Reset chunks at start of recording
-        mediaRecorder.start();
-        isRecording = true;
-        console.log(mediaRecorder.state);
-        console.log("recorder started");
-        audioVisualizer.classList.remove("hidden");
-
-        const audioContext = new (window.AudioContext ||
-          window.webkitAudioContext)();
-        analyser = audioContext.createAnalyser();
-        const source = audioContext.createMediaStreamSource(stream);
-
-        source.connect(analyser);
-        analyser.fftSize = 2048;
-        bufferLength = analyser.frequencyBinCount;
-        dataArray = new Uint8Array(bufferLength);
-
-        drawWaveform();
-      };
-
-      const stopRecording = () => {
-        if (isRecording) {
-          mediaRecorder.stop();
-          isRecording = false;
-          console.log(mediaRecorder.state);
-          console.log("recorder stopped due to silence");
-          audioVisualizer.classList.add("hidden");
-        }
-      };
+// Recording Handler
+class RecordingHandler {
+  constructor() {
+    this.mediaRecorder = null;
+    this.audioAnalyzer = new AudioAnalyzer();
+    this.visualizer = new Visualizer(audioVisualizer, this.audioAnalyzer);
+  }
+
+  async initialize() {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      this.mediaRecorder = new MediaRecorder(stream);
+      this.setupRecordingEvents();
+    } catch (err) {
+      console.error(`Media device error: ${err}`);
+    }
+  }
+
+  setupRecordingEvents() {
+    this.mediaRecorder.ondataavailable = (e) => {
+      state.chunks.push(e.data);
+    };
+
+    this.mediaRecorder.onstop = async () => {
+      await this.handleRecordingStop();
+    };
+  }
+
+  startRecording() {
+    state.chunks = [];
+    state.isRecording = true;
+    this.mediaRecorder.start();
+
+    const audioContext = new (window.AudioContext ||
+      window.webkitAudioContext)();
+    const source = audioContext.createMediaStreamSource(
+      this.mediaRecorder.stream
+    );
+
+    const analyser = this.audioAnalyzer.setup(source, audioContext);
+    audioVisualizer.classList.remove("hidden");
+
+    this.visualizer.draw(analyser, () => {
+      if (!state.silenceTimer) {
+        state.silenceTimer = setTimeout(
+          () => this.stopRecording(),
+          SILENCE_DURATION
+        );
+      }
+    });
+  }
+
+  stopRecording() {
+    if (state.isRecording) {
+      state.isRecording = false;
+      this.mediaRecorder.stop();
+      clearTimeout(state.silenceTimer);
+      state.silenceTimer = null;
+    }
+  }
+
+  async handleRecordingStop() {
+    console.log("Processing recording...");
+
+    const audioBlob = new Blob(state.chunks, { type: "audio/wav" });
+    if (!fileInput.files.length) {
+      alert("Please select a file.");
+      return;
+    }
+
+    const formData = new FormData();
+    formData.append("audio_data", audioBlob);
+    formData.append("file", fileInput.files[0]);
+
+    try {
+      await this.processRecording(formData);
+    } catch (error) {
+      console.error("Processing error:", error);
+    } finally {
+      this.audioAnalyzer.cleanup();
+    }
+  }
+
+  async processRecording(formData) {
+    const response = await fetch("/ask", {
+      method: "POST",
+      body: formData,
+    });
+    const data = await response.json();
 
-      recordBtn.onclick = () => {
-        if (mediaRecorder.state === "inactive") {
-          startRecording();
-        } else if (mediaRecorder.state === "recording") {
-          stopRecording();
-        }
-      };
+    await this.handleResponse(data);
+  }
 
-      mediaRecorder.onstop = async (e) => {
-        console.log("STOPP");
+  async handleResponse(data) {
+    audioPlayback.src = "data:audio/wav;base64," + data.audio_base64;
 
-        // The mediaRecorder has stopped; now we can process the chunks
-        const audioBlob = new Blob(chunks, { type: "audio/wav" });
-        const formData = new FormData();
+    const audioContext = new (window.AudioContext ||
+      window.webkitAudioContext)();
 
-        formData.append("audio_data", audioBlob);
+    audioPlayback.onloadedmetadata = () => {
+      const analyser = this.audioAnalyzer.setupForPlayback(
+        audioPlayback,
+        audioContext
+      );
+      audioVisualizer.classList.remove("hidden");
 
-        // Append the file to the FormData object
-        if (fileInput.files.length > 0) {
-          formData.append("file", fileInput.files[0]);
-        } else {
-          alert("Please select a file.");
-          return;
-        }
+      this.visualizer.draw(analyser, () => {});
+      audioPlayback.play();
+    };
 
-        // Now we're sending the audio to the server and waiting for a response
-        try {
-          const response = await fetch("/ask", {
-            method: "POST",
-            body: formData,
-          });
-          const data = await response.json();
-          console.log(data);
-
-          // Once we have the response, we can source the playback element and play it
-          audioPlayback.src = "data:audio/wav;base64," + data.audio_base64;
-          audioPlayback.classList.remove("hidden");
-          audioVisualizer.classList.add("hidden"); // hide the visualizer while playing back the response
-          audioPlayback.onloadedmetadata = () => {
-            // When metadata is loaded, start playback
-            audioPlayback.play();
-          };
-        } catch (error) {
-          console.error("Error during fetch/transcription:", error);
-        } finally {
-          if (analyser) {
-            analyser.disconnect();
-            analyser = null;
-          }
-        }
-      };
-
-      mediaRecorder.ondataavailable = (e) => {
-        chunks.push(e.data);
-      };
-    })
-    .catch((err) => {
-      console.error(`The following error occurred: ${err}`);
-    });
+    audioPlayback.onended = () => {
+      this.audioAnalyzer.cleanup();
+    };
+  }
+}
+
+// Main initialization
+async function initializeApp() {
+  if (!navigator.mediaDevices) {
+    console.error("Media devices not supported");
+    return;
+  }
+
+  const recorder = new RecordingHandler();
+  await recorder.initialize();
+
+  recordBtn.onclick = () => {
+    if (recorder.mediaRecorder.state === "inactive") {
+      recorder.startRecording();
+    } else if (recorder.mediaRecorder.state === "recording") {
+      recorder.stopRecording();
+    }
+  };
 }
+
+// Start the application
+initializeApp();
diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html
index 729e03ea4a6d..ad2ba7b665e7 100644
--- a/examples/quivr-whisper/templates/index.html
+++ b/examples/quivr-whisper/templates/index.html
@@ -18,7 +18,7 @@ <h1 class="text-6xl font-bold mb-8">Quivr</h1>
             Ask a question to Quivr
         </button>
         <canvas id="audio-visualizer" width="640" height="100"
-            class="hidden bg-white rounded-lg cursor-pointer"></canvas>
+            class="bg-background rounded-lg cursor-pointer"></canvas>
         <audio id="audio-playback" controls class="hidden mt-4"></audio>
     </div>
     <script src="{{ url_for('static', filename='app.js') }}"></script>

From 29c9b284bd291a1b88e300ab982332ad412363b0 Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Thu, 21 Nov 2024 01:42:36 +0530
Subject: [PATCH 04/11] fix(examples): dataArray null

---
 examples/quivr-whisper/static/app.js | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index c3ed0cf3b3e6..711c3a540d3b 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -6,7 +6,7 @@ const audioPlayback = document.getElementById("audio-playback");
 const canvasCtx = audioVisualizer.getContext("2d");
 
 // Configuration
-const SILENCE_THRESHOLD = 0.01;
+const SILENCE_THRESHOLD = 128; // Adjusted for byte data (128 is middle)
 const SILENCE_DURATION = 1500;
 const FFT_SIZE = 2048;
 
@@ -31,9 +31,8 @@ class AudioAnalyzer {
     this.analyser.fftSize = FFT_SIZE;
     source.connect(this.analyser);
 
-    // Change to Float32Array for time domain data
     this.bufferLength = this.analyser.frequencyBinCount;
-    this.dataArray = new Float32Array(this.bufferLength);
+    this.dataArray = new Uint8Array(this.bufferLength); // Changed to Uint8Array
 
     return this.analyser;
   }
@@ -64,12 +63,12 @@ class Visualizer {
   }
 
   draw(currentAnalyser, onSilence) {
-    if (!currentAnalyser) return;
+    if (!currentAnalyser || this.analyzer.dataArray === null) return;
 
     requestAnimationFrame(() => this.draw(currentAnalyser, onSilence));
 
-    // Use getFloatTimeDomainData instead of getByteTimeDomainData
-    currentAnalyser.getFloatTimeDomainData(this.analyzer.dataArray);
+    // Use getByteTimeDomainData instead of getFloatTimeDomainData
+    currentAnalyser.getByteTimeDomainData(this.analyzer.dataArray);
 
     // Clear canvas
     this.ctx.fillStyle = "#252525";
@@ -82,16 +81,14 @@ class Visualizer {
     const sliceWidth = (this.canvas.width * 1.0) / this.analyzer.bufferLength;
     let x = 0;
     let sum = 0;
-    let maxAmplitude = 0;
 
     // Draw waveform
     for (let i = 0; i < this.analyzer.bufferLength; i++) {
-      // Values are already normalized (-1 to 1), no need to normalize
-      const v = this.analyzer.dataArray[i];
-      const y = (v * this.canvas.height) / 2 + this.canvas.height / 2;
+      // Scale byte data (0-255) to canvas height
+      const v = this.analyzer.dataArray[i] / 128.0; // normalize to 0-2
+      const y = (v - 1) * (this.canvas.height / 2) + this.canvas.height / 2;
 
-      sum += Math.abs(v);
-      maxAmplitude = Math.max(maxAmplitude, Math.abs(v));
+      sum += Math.abs(v - 1); // Calculate distance from center (128)
 
       if (i === 0) {
         this.ctx.moveTo(x, y);
@@ -105,12 +102,13 @@ class Visualizer {
     this.ctx.lineTo(this.canvas.width, this.canvas.height / 2);
     this.ctx.stroke();
 
-    // Check for silence during recording with proper thresholds
+    // Check for silence during recording with adjusted thresholds for byte data
     if (state.isRecording) {
       const averageAmplitude = sum / this.analyzer.bufferLength;
-      if (averageAmplitude < SILENCE_THRESHOLD) {
+      if (averageAmplitude < 0.1) {
+        // Adjusted threshold for normalized data
         // Reset silence timer if we detect sound
-        if (averageAmplitude > SILENCE_THRESHOLD / 2) {
+        if (averageAmplitude > 0.05) {
           clearTimeout(state.silenceTimer);
           state.silenceTimer = null;
         } else {

From 0d3cd226c1588abfaadd6eb7789cc7799bb29990 Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Thu, 21 Nov 2024 01:58:47 +0530
Subject: [PATCH 05/11] feat(examples): status update

---
 examples/quivr-whisper/static/app.js | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index 711c3a540d3b..3a4fe1d52bb0 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -169,6 +169,9 @@ class RecordingHandler {
         );
       }
     });
+
+    recordBtn.innerText = "Listening...";
+    recordBtn.classList.add("processing");
   }
 
   stopRecording() {
@@ -181,6 +184,7 @@ class RecordingHandler {
   }
 
   async handleRecordingStop() {
+    recordBtn.innerText = "Processing...";
     console.log("Processing recording...");
 
     const audioBlob = new Blob(state.chunks, { type: "audio/wav" });
@@ -210,6 +214,7 @@ class RecordingHandler {
     const data = await response.json();
 
     await this.handleResponse(data);
+    recordBtn.innerText = "Ask a question to Quivr";
   }
 
   async handleResponse(data) {

From b8c65009fe5a4a4c220ae84fceb1d095d7c4ee99 Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Thu, 21 Nov 2024 22:23:12 +0530
Subject: [PATCH 06/11] feat(examples): ui improvement

---
 examples/quivr-whisper/static/app.js        |  94 ++++++++++++----
 examples/quivr-whisper/static/loader.svg    |   1 +
 examples/quivr-whisper/static/mic-off.svg   |   1 +
 examples/quivr-whisper/static/mic.svg       |   1 +
 examples/quivr-whisper/static/styles.css    | 114 ++++++++++++++++++--
 examples/quivr-whisper/templates/index.html |  47 ++++----
 6 files changed, 210 insertions(+), 48 deletions(-)
 create mode 100644 examples/quivr-whisper/static/loader.svg
 create mode 100644 examples/quivr-whisper/static/mic-off.svg
 create mode 100644 examples/quivr-whisper/static/mic.svg

diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index 3a4fe1d52bb0..b9edf532de9d 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -1,10 +1,28 @@
 // DOM Elements
 const recordBtn = document.getElementById("record-btn");
 const fileInput = document.getElementById("fileInput");
+const fileName = document.getElementById("fileName");
+
 const audioVisualizer = document.getElementById("audio-visualizer");
 const audioPlayback = document.getElementById("audio-playback");
 const canvasCtx = audioVisualizer.getContext("2d");
 
+window.addEventListener("load", () => {
+  audioVisualizer.width = window.innerWidth;
+  audioVisualizer.height = window.innerHeight;
+});
+
+window.addEventListener("resize", (e) => {
+  audioVisualizer.width = window.innerWidth;
+  audioVisualizer.height = window.innerHeight;
+});
+
+fileInput.addEventListener("change", () => {
+  fileName.textContent =
+    fileInput.files.length > 0 ? fileInput.files[0].name : "No file chosen";
+  fileName.classList.toggle("file-selected", fileInput.files.length > 0);
+});
+
 // Configuration
 const SILENCE_THRESHOLD = 128; // Adjusted for byte data (128 is middle)
 const SILENCE_DURATION = 1500;
@@ -24,6 +42,7 @@ class AudioAnalyzer {
     this.analyser = null;
     this.dataArray = null;
     this.bufferLength = null;
+    this.source = null;
   }
 
   setup(source, audioContext) {
@@ -32,23 +51,56 @@ class AudioAnalyzer {
     source.connect(this.analyser);
 
     this.bufferLength = this.analyser.frequencyBinCount;
-    this.dataArray = new Uint8Array(this.bufferLength); // Changed to Uint8Array
+    this.dataArray = new Uint8Array(this.bufferLength);
 
     return this.analyser;
   }
 
   setupForPlayback(audioElement, audioContext) {
-    const source = audioContext.createMediaElementSource(audioElement);
-    const analyser = this.setup(source, audioContext);
-    analyser.connect(audioContext.destination);
-    return analyser;
+    // Disconnect existing source if it exists
+    if (this.source) {
+      try {
+        this.source.disconnect();
+      } catch (e) {
+        // Ignore if already disconnected
+      }
+    }
+
+    // Create a new source, ignoring previous connections
+    audioElement.pause();
+    audioElement.currentTime = 0;
+    this.source = audioContext.createMediaElementSource(audioElement);
+
+    this.analyser = audioContext.createAnalyser();
+    this.analyser.fftSize = FFT_SIZE;
+
+    // Connect the source to the analyser and then to destination
+    this.source.connect(this.analyser);
+    this.analyser.connect(audioContext.destination);
+
+    this.bufferLength = this.analyser.frequencyBinCount;
+    this.dataArray = new Uint8Array(this.bufferLength);
+
+    return this.analyser;
   }
 
   cleanup() {
+    if (this.source) {
+      try {
+        this.source.disconnect();
+      } catch (e) {
+        // Ignore disconnect errors
+      }
+    }
     if (this.analyser) {
-      this.analyser.disconnect();
-      this.analyser = null;
+      try {
+        this.analyser.disconnect();
+      } catch (e) {
+        // Ignore disconnect errors
+      }
     }
+    this.source = null;
+    this.analyser = null;
     this.dataArray = null;
     this.bufferLength = null;
   }
@@ -73,12 +125,13 @@ class Visualizer {
     // Clear canvas
     this.ctx.fillStyle = "#252525";
     this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height);
+    if (!state.isRecording) return;
 
     this.ctx.lineWidth = 2;
     this.ctx.strokeStyle = "#6142d4";
     this.ctx.beginPath();
 
-    const sliceWidth = (this.canvas.width * 1.0) / this.analyzer.bufferLength;
+    const sliceWidth = (this.canvas.width * 1) / this.analyzer.bufferLength;
     let x = 0;
     let sum = 0;
 
@@ -125,6 +178,7 @@ class RecordingHandler {
     this.mediaRecorder = null;
     this.audioAnalyzer = new AudioAnalyzer();
     this.visualizer = new Visualizer(audioVisualizer, this.audioAnalyzer);
+    this.audioContext = null;
   }
 
   async initialize() {
@@ -132,6 +186,9 @@ class RecordingHandler {
       const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
       this.mediaRecorder = new MediaRecorder(stream);
       this.setupRecordingEvents();
+      if (!this.audioContext)
+        this.audioContext = new (window.AudioContext ||
+          window.webkitAudioContext)();
     } catch (err) {
       console.error(`Media device error: ${err}`);
     }
@@ -152,13 +209,11 @@ class RecordingHandler {
     state.isRecording = true;
     this.mediaRecorder.start();
 
-    const audioContext = new (window.AudioContext ||
-      window.webkitAudioContext)();
-    const source = audioContext.createMediaStreamSource(
+    const source = this.audioContext.createMediaStreamSource(
       this.mediaRecorder.stream
     );
 
-    const analyser = this.audioAnalyzer.setup(source, audioContext);
+    const analyser = this.audioAnalyzer.setup(source, this.audioContext);
     audioVisualizer.classList.remove("hidden");
 
     this.visualizer.draw(analyser, () => {
@@ -170,7 +225,7 @@ class RecordingHandler {
       }
     });
 
-    recordBtn.innerText = "Listening...";
+    recordBtn.dataset.recording = true;
     recordBtn.classList.add("processing");
   }
 
@@ -180,15 +235,17 @@ class RecordingHandler {
       this.mediaRecorder.stop();
       clearTimeout(state.silenceTimer);
       state.silenceTimer = null;
+      recordBtn.dataset.recording = false;
     }
   }
 
   async handleRecordingStop() {
-    recordBtn.innerText = "Processing...";
     console.log("Processing recording...");
+    recordBtn.dataset.pending = true;
 
-    const audioBlob = new Blob(state.chunks, { type: "audio/wav" });
+    const audioBlob = new Blob(state.chunks, { type: "audio/mpeg" });
     if (!fileInput.files.length) {
+      recordBtn.dataset.pending = false;
       alert("Please select a file.");
       return;
     }
@@ -214,19 +271,15 @@ class RecordingHandler {
     const data = await response.json();
 
     await this.handleResponse(data);
-    recordBtn.innerText = "Ask a question to Quivr";
   }
 
   async handleResponse(data) {
     audioPlayback.src = "data:audio/wav;base64," + data.audio_base64;
 
-    const audioContext = new (window.AudioContext ||
-      window.webkitAudioContext)();
-
     audioPlayback.onloadedmetadata = () => {
       const analyser = this.audioAnalyzer.setupForPlayback(
         audioPlayback,
-        audioContext
+        this.audioContext
       );
       audioVisualizer.classList.remove("hidden");
 
@@ -236,6 +289,7 @@ class RecordingHandler {
 
     audioPlayback.onended = () => {
       this.audioAnalyzer.cleanup();
+      recordBtn.dataset.pending = false;
     };
   }
 }
diff --git a/examples/quivr-whisper/static/loader.svg b/examples/quivr-whisper/static/loader.svg
new file mode 100644
index 000000000000..1390bc478d8e
--- /dev/null
+++ b/examples/quivr-whisper/static/loader.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-loader-pinwheel"><path d="M22 12a1 1 0 0 1-10 0 1 1 0 0 0-10 0"/><path d="M7 20.7a1 1 0 1 1 5-8.7 1 1 0 1 0 5-8.6"/><path d="M7 3.3a1 1 0 1 1 5 8.6 1 1 0 1 0 5 8.6"/><circle cx="12" cy="12" r="10"/></svg>
\ No newline at end of file
diff --git a/examples/quivr-whisper/static/mic-off.svg b/examples/quivr-whisper/static/mic-off.svg
new file mode 100644
index 000000000000..46d151fca867
--- /dev/null
+++ b/examples/quivr-whisper/static/mic-off.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-mic-off"><line x1="2" x2="22" y1="2" y2="22"/><path d="M18.89 13.23A7.12 7.12 0 0 0 19 12v-2"/><path d="M5 10v2a7 7 0 0 0 12 5"/><path d="M15 9.34V5a3 3 0 0 0-5.68-1.33"/><path d="M9 9v3a3 3 0 0 0 5.12 2.12"/><line x1="12" x2="12" y1="19" y2="22"/></svg>
\ No newline at end of file
diff --git a/examples/quivr-whisper/static/mic.svg b/examples/quivr-whisper/static/mic.svg
new file mode 100644
index 000000000000..726d9f11b643
--- /dev/null
+++ b/examples/quivr-whisper/static/mic.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-mic"><path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" x2="12" y1="19" y2="22"/></svg>
\ No newline at end of file
diff --git a/examples/quivr-whisper/static/styles.css b/examples/quivr-whisper/static/styles.css
index c5381b83edd6..deb34d8f7f9c 100644
--- a/examples/quivr-whisper/static/styles.css
+++ b/examples/quivr-whisper/static/styles.css
@@ -1,17 +1,119 @@
-body {
-    color: #f4f4f4
+* {
+    box-sizing: border-box;
+    margin: 0;
+    padding: 0;
 }
 
-.bg-background {
+
+body {
+    color: #f4f4f4;
     background-color: #252525;
+    display: flex;
+    gap: 1rem;
+    align-items: center;
+    flex-direction: column;
+    justify-content: center;
+    min-height: 100vh;
 }
 
-.paper {
-    background-color: #1f1f1f;
+.primary {
+    background-color: #6142d4;
 }
 
-.bg-primary {
+button {
     background-color: #6142d4;
+    border: none;
+    padding: .75rem 2rem;
+    border-radius: 0.5rem;
+    color: #f4f4f4;
+    cursor: pointer;
+}
+
+canvas {
+    position: absolute;
+    width: 100%;
+    height: 100%;
+    top: 0;
+    left: 0;
+    background-color: #252525;
+    z-index: -1;
+}
+
+.record-btn {
+    background-color: #f5f5f5;
+    border: none;
+    outline: none;
+    width: 256px;
+    height: 256px;
+    background-repeat: no-repeat;
+    background-position: center;
+    border-radius: 50%;
+    background-size: 50%;
+    transition: background-color 200ms ease-in, transform 200ms ease-out;
+}
+
+.record-btn:hover {
+    background-color: #fff;
+    transform: scale(1.025);
+}
+
+.record-btn:active {
+    background-color: #e2e2e2;
+    transform: scale(0.975);
+}
+
+.record-btn[data-recording="true"] {
+    background-image: url("./mic.svg");
+}
+
+.record-btn[data-recording="false"] {
+    background-image: url("./mic-off.svg");
+}
+
+.record-btn[data-pending="true"] {
+    background-image: url("./loader.svg") !important;
+    animation: spin 1s linear infinite;
+}
+
+.hidden {
+    display: none;
+}
+
+.custom-file-input {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  gap: 10px;
+}
+
+.custom-file-input input[type="file"] {
+  display: none;
+}
+
+.custom-file-input label {
+  background-color: #6142d4;
+  color: white;
+  padding: 8px 16px;
+  border-radius: 4px;
+  cursor: pointer;
+  font-size: 14px;
+  font-weight: bold;
+  transition: background-color 0.3s;
+}
+
+.custom-file-input label:hover {
+  background-color: #6142d4;
+}
+
+.custom-file-input span {
+  font-size: 14px;
+  color: #f4f4f4;
+}
+
+/* Adjust appearance when a file is selected */
+.custom-file-input span.file-selected {
+  color: #ffffff;
+  font-weight: bold;
 }
 
 /* 
diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html
index ad2ba7b665e7..fa18d2d9a27f 100644
--- a/examples/quivr-whisper/templates/index.html
+++ b/examples/quivr-whisper/templates/index.html
@@ -1,27 +1,30 @@
-<!doctype html>
+<!DOCTYPE html>
 <html lang="en">
-
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>Audio Interaction WebApp</title>
-    <script src="https://cdn.tailwindcss.com"></script>
-    <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
-</head>
+    <link
+      rel="stylesheet"
+      href="{{ url_for('static', filename='styles.css') }}"
+    />
+  </head>
 
-<body class="dark bg-background flex flex-col items-center justify-center h-screen">
-    <h1 class="text-6xl font-bold mb-8">Quivr</h1>
-    <div id="app" class="text-center flex flex-col gap-2">
-        <input class="" type="file" name="fileInput" required id="fileInput">
-        <button type="button" id="record-btn"
-            class="bg-primary text-white font-bold py-2 px-4 rounded-md cursor-pointer">
-            Ask a question to Quivr
-        </button>
-        <canvas id="audio-visualizer" width="640" height="100"
-            class="bg-background rounded-lg cursor-pointer"></canvas>
-        <audio id="audio-playback" controls class="hidden mt-4"></audio>
+  <body>
+    <button
+      type="button"
+      id="record-btn"
+      class="record-btn"
+      data-recording="false"
+      data-pending="false"
+    ></button>
+    <div class="custom-file-input">
+      <label for="fileInput">Choose a file</label>
+      <input type="file" name="fileInput" required id="fileInput" />
+      <span id="fileName">No file chosen</span>
     </div>
+    <canvas id="audio-visualizer" class=""></canvas>
+    <audio id="audio-playback" controls class="hidden"></audio>
     <script src="{{ url_for('static', filename='app.js') }}"></script>
-</body>
-
-</html>
\ No newline at end of file
+  </body>
+</html>

From f75d7448065f21191a0911a5c7e6d77a8f424505 Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Fri, 22 Nov 2024 01:27:07 +0530
Subject: [PATCH 07/11] fix(examples): visualisation

---
 examples/quivr-whisper/static/app.js | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index b9edf532de9d..dc7be1bae604 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -31,6 +31,7 @@ const FFT_SIZE = 2048;
 // State
 const state = {
   isRecording: false,
+  isVisualizing: false,
   chunks: [],
   silenceTimer: null,
   lastAudioLevel: 0,
@@ -125,7 +126,7 @@ class Visualizer {
     // Clear canvas
     this.ctx.fillStyle = "#252525";
     this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height);
-    if (!state.isRecording) return;
+    if (!state.isVisualizing) return;
 
     this.ctx.lineWidth = 2;
     this.ctx.strokeStyle = "#6142d4";
@@ -205,6 +206,7 @@ class RecordingHandler {
   }
 
   startRecording() {
+    state.isVisualizing = true;
     state.chunks = [];
     state.isRecording = true;
     this.mediaRecorder.start();
@@ -231,6 +233,7 @@ class RecordingHandler {
 
   stopRecording() {
     if (state.isRecording) {
+      state.isVisualizing = false;
       state.isRecording = false;
       this.mediaRecorder.stop();
       clearTimeout(state.silenceTimer);
@@ -285,11 +288,13 @@ class RecordingHandler {
 
       this.visualizer.draw(analyser, () => {});
       audioPlayback.play();
+      state.isVisualizing = true;
     };
 
     audioPlayback.onended = () => {
       this.audioAnalyzer.cleanup();
       recordBtn.dataset.pending = false;
+      state.isVisualizing = false;
     };
   }
 }

From f01fa47e06975638f58cf74524a7afdde611f175 Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Fri, 22 Nov 2024 23:54:59 +0530
Subject: [PATCH 08/11] feat(example): separate upload route

---
 examples/quivr-whisper/app.py                | 67 +++++++++++++++-----
 examples/quivr-whisper/pyproject.toml        |  1 +
 examples/quivr-whisper/requirements-dev.lock |  5 ++
 examples/quivr-whisper/requirements.lock     |  5 ++
 examples/quivr-whisper/static/app.js         | 30 ++++++++-
 examples/quivr-whisper/static/styles.css     |  3 +-
 examples/quivr-whisper/templates/index.html  |  3 +-
 7 files changed, 95 insertions(+), 19 deletions(-)

diff --git a/examples/quivr-whisper/app.py b/examples/quivr-whisper/app.py
index 05401706b54c..79031b9019d3 100644
--- a/examples/quivr-whisper/app.py
+++ b/examples/quivr-whisper/app.py
@@ -1,4 +1,4 @@
-from flask import Flask, render_template, request, jsonify
+from flask import Flask, render_template, request, jsonify, session
 import openai
 import base64
 import os
@@ -12,20 +12,22 @@
 import asyncio
 
 
-UPLOAD_FOLDER = 'uploads'
-ALLOWED_EXTENSIONS = {'txt'}
+UPLOAD_FOLDER = "uploads"
+ALLOWED_EXTENSIONS = {"txt"}
 
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 
 app = Flask(__name__)
-app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.secret_key = "secret"
+app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
+app.config["CACHE_TYPE"] = "SimpleCache"  # In-memory cache for development
+app.config["CACHE_DEFAULT_TIMEOUT"] = 60 * 60  # 1 hour cache timeout
 load_dotenv()
 
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
-def allowed_file(filename):
-    return '.' in filename and \
-           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+brains = {}
+
 
 @app.route("/")
 def index():
@@ -43,26 +45,59 @@ def run_in_event_loop(func, *args, **kwargs):
     return result
 
 
-@app.route('/ask', methods=['POST'])
-async def ask():
-    if 'file' not in request.files:
+def allowed_file(filename):
+    return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
+
+
+@app.route("/upload", methods=["POST"])
+async def upload_file():
+    if "file" not in request.files:
         return "No file part", 400
 
-    file = request.files['file']
+    file = request.files["file"]
 
-    if file.filename == '':
+    if file.filename == "":
         return "No selected file", 400
     if not (file and file.filename and allowed_file(file.filename)):
         return "Invalid file type", 400
 
     filename = secure_filename(file.filename)
-    filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+    filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
     file.save(filepath)
 
-    print("Uploading file...")
-    brain: Brain = await to_thread(run_in_event_loop, Brain.from_files, name="user_brain", file_paths=[filepath])
+    print(f"File uploaded and saved at: {filepath}")
+
+    print("Creating brain instance...")
+
+    brain: Brain = await to_thread(
+        run_in_event_loop, Brain.from_files, name="user_brain", file_paths=[filepath]
+    )
+
+    # Store brain instance in cache
+    session_id = session.sid if hasattr(session, "sid") else os.urandom(16).hex()
+    session["session_id"] = session_id
+    # cache.set(session_id, brain)  # Store the brain instance in the cache
+    brains[session_id] = brain
+    print(f"Brain instance created and stored in cache for session ID: {session_id}")
+
+    return jsonify({"message": "Brain created successfully"})
+
+
+@app.route("/ask", methods=["POST"])
+async def ask():
+    if "audio_data" not in request.files:
+        return "Missing audio data", 400
+
+    # Retrieve the brain instance from the cache using the session ID
+    session_id = session.get("session_id")
+    if not session_id:
+        return "Session ID not found. Upload a file first.", 400
+
+    brain = brains.get(session_id)
+    if not brain:
+        return "Brain instance not found in dict. Upload a file first.", 400
 
-    print(f"{filepath} saved to brain.")
+    print("Brain instance loaded from cache.")
 
     print("Speech to text...")
     audio_file = request.files["audio_data"]
diff --git a/examples/quivr-whisper/pyproject.toml b/examples/quivr-whisper/pyproject.toml
index 692d1df09e8c..3c48b90c6529 100644
--- a/examples/quivr-whisper/pyproject.toml
+++ b/examples/quivr-whisper/pyproject.toml
@@ -9,6 +9,7 @@ dependencies = [
     "flask[async]>=3.1.0",
     "openai>=1.54.5",
     "quivr-core>=0.0.24",
+    "flask-caching>=2.3.0",
 ]
 readme = "README.md"
 requires-python = ">= 3.11"
diff --git a/examples/quivr-whisper/requirements-dev.lock b/examples/quivr-whisper/requirements-dev.lock
index 901ea6e170b7..716aa161b38c 100644
--- a/examples/quivr-whisper/requirements-dev.lock
+++ b/examples/quivr-whisper/requirements-dev.lock
@@ -44,6 +44,8 @@ beautifulsoup4==4.12.3
     # via unstructured
 blinker==1.9.0
     # via flask
+cachelib==0.9.0
+    # via flask-caching
 cachetools==5.5.0
     # via google-auth
 certifi==2024.8.30
@@ -114,6 +116,9 @@ filetype==1.2.0
     # via llama-index-core
     # via unstructured
 flask==3.1.0
+    # via flask-caching
+    # via quivr-whisper
+flask-caching==2.3.0
     # via quivr-whisper
 flatbuffers==24.3.25
     # via onnxruntime
diff --git a/examples/quivr-whisper/requirements.lock b/examples/quivr-whisper/requirements.lock
index 901ea6e170b7..716aa161b38c 100644
--- a/examples/quivr-whisper/requirements.lock
+++ b/examples/quivr-whisper/requirements.lock
@@ -44,6 +44,8 @@ beautifulsoup4==4.12.3
     # via unstructured
 blinker==1.9.0
     # via flask
+cachelib==0.9.0
+    # via flask-caching
 cachetools==5.5.0
     # via google-auth
 certifi==2024.8.30
@@ -114,6 +116,9 @@ filetype==1.2.0
     # via llama-index-core
     # via unstructured
 flask==3.1.0
+    # via flask-caching
+    # via quivr-whisper
+flask-caching==2.3.0
     # via quivr-whisper
 flatbuffers==24.3.25
     # via onnxruntime
diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index dc7be1bae604..b6ac04094007 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -1,6 +1,7 @@
 // DOM Elements
 const recordBtn = document.getElementById("record-btn");
 const fileInput = document.getElementById("fileInput");
+const fileInputContainer = document.querySelector(".custom-file-input");
 const fileName = document.getElementById("fileName");
 
 const audioVisualizer = document.getElementById("audio-visualizer");
@@ -246,7 +247,7 @@ class RecordingHandler {
     console.log("Processing recording...");
     recordBtn.dataset.pending = true;
 
-    const audioBlob = new Blob(state.chunks, { type: "audio/mpeg" });
+    const audioBlob = new Blob(state.chunks, { type: "audio/wav" });
     if (!fileInput.files.length) {
       recordBtn.dataset.pending = false;
       alert("Please select a file.");
@@ -299,6 +300,33 @@ class RecordingHandler {
   }
 }
 
+const uploadFile = async (e) => {
+  e.preventDefault();
+  const file = fileInput.files[0];
+
+  if (!file) {
+    alert("Please select a file.");
+    return;
+  }
+  const formData = new FormData();
+  formData.append("file", file);
+  try {
+    await fetch("/upload", {
+      method: "POST",
+      body: formData,
+    });
+    recordBtn.classList.remove("hidden");
+    fileInputContainer.classList.add("hidden");
+  } catch (error) {
+    recordBtn.classList.add("hidden");
+    fileInputContainer.classList.remove("hidden");
+    console.error("Error uploading file:", error);
+  }
+};
+
+const uploadBtn = document.getElementById("upload-btn");
+uploadBtn.addEventListener("click", uploadFile);
+
 // Main initialization
 async function initializeApp() {
   if (!navigator.mediaDevices) {
diff --git a/examples/quivr-whisper/static/styles.css b/examples/quivr-whisper/static/styles.css
index deb34d8f7f9c..2a51e32321b3 100644
--- a/examples/quivr-whisper/static/styles.css
+++ b/examples/quivr-whisper/static/styles.css
@@ -76,7 +76,8 @@ canvas {
 }
 
 .hidden {
-    display: none;
+    display: none !important;
+    visibility: hidden;
 }
 
 .custom-file-input {
diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html
index fa18d2d9a27f..ef632b9afdc1 100644
--- a/examples/quivr-whisper/templates/index.html
+++ b/examples/quivr-whisper/templates/index.html
@@ -14,7 +14,7 @@
     <button
       type="button"
       id="record-btn"
-      class="record-btn"
+      class="record-btn hidden"
       data-recording="false"
       data-pending="false"
     ></button>
@@ -22,6 +22,7 @@
       <label for="fileInput">Choose a file</label>
       <input type="file" name="fileInput" required id="fileInput" />
       <span id="fileName">No file chosen</span>
+      <button id="upload-btn" class="upload-btn">Upload</button>
     </div>
     <canvas id="audio-visualizer" class=""></canvas>
     <audio id="audio-playback" controls class="hidden"></audio>

From c259070046c2bbe9b0b72d683bd11c157220f5e8 Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Sat, 23 Nov 2024 00:06:36 +0530
Subject: [PATCH 09/11] style(example): uploading state

---
 examples/quivr-whisper/static/app.js     | 2 ++
 examples/quivr-whisper/static/styles.css | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index b6ac04094007..04a9f34235d0 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -301,6 +301,7 @@ class RecordingHandler {
 }
 
 const uploadFile = async (e) => {
+  uploadBtn.innerText = "Uploading File...";
   e.preventDefault();
   const file = fileInput.files[0];
 
@@ -321,6 +322,7 @@ const uploadFile = async (e) => {
     recordBtn.classList.add("hidden");
     fileInputContainer.classList.remove("hidden");
     console.error("Error uploading file:", error);
+    uploadBtn.innerText = "Upload Failed. Try again";
   }
 };
 
diff --git a/examples/quivr-whisper/static/styles.css b/examples/quivr-whisper/static/styles.css
index 2a51e32321b3..e250adda1028 100644
--- a/examples/quivr-whisper/static/styles.css
+++ b/examples/quivr-whisper/static/styles.css
@@ -92,7 +92,7 @@ canvas {
 }
 
 .custom-file-input label {
-  background-color: #6142d4;
+  border: solid 2px #6142d4;
   color: white;
   padding: 8px 16px;
   border-radius: 4px;

From 9a262e4355b190288845d8f2d3771ef6c98dbad1 Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Sun, 24 Nov 2024 15:04:08 +0530
Subject: [PATCH 10/11] fix(examples): consecutive questions

---
 examples/quivr-whisper/static/app.js | 74 +++++++++++++++-------------
 1 file changed, 39 insertions(+), 35 deletions(-)

diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index 04a9f34235d0..513acc88180e 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -41,70 +41,74 @@ const state = {
 // Audio Analysis
 class AudioAnalyzer {
   constructor() {
+    this.reset();
+  }
+
+  reset() {
     this.analyser = null;
     this.dataArray = null;
     this.bufferLength = null;
     this.source = null;
+    this.cleanup();
   }
 
   setup(source, audioContext) {
-    this.analyser = audioContext.createAnalyser();
-    this.analyser.fftSize = FFT_SIZE;
-    source.connect(this.analyser);
+    this.cleanup();
 
-    this.bufferLength = this.analyser.frequencyBinCount;
-    this.dataArray = new Uint8Array(this.bufferLength);
+    this.analyser = this._createAnalyser(audioContext);
+    source.connect(this.analyser);
 
+    this._initializeBuffer();
     return this.analyser;
   }
 
-  setupForPlayback(audioElement, audioContext) {
-    // Disconnect existing source if it exists
-    if (this.source) {
-      try {
-        this.source.disconnect();
-      } catch (e) {
-        // Ignore if already disconnected
-      }
+  setupForPlayback(audioElement, audioContext, connectToDestination = true) {
+    // Reuse existing MediaElementSourceNode if it already exists for this audio element
+    if (!this.source || this.source.mediaElement !== audioElement) {
+      this.cleanup(); // Ensure any previous connections are cleaned up
+      this.source = audioContext.createMediaElementSource(audioElement);
     }
 
-    // Create a new source, ignoring previous connections
-    audioElement.pause();
-    audioElement.currentTime = 0;
-    this.source = audioContext.createMediaElementSource(audioElement);
-
-    this.analyser = audioContext.createAnalyser();
-    this.analyser.fftSize = FFT_SIZE;
+    this.analyser = this._createAnalyser(audioContext);
 
-    // Connect the source to the analyser and then to destination
     this.source.connect(this.analyser);
-    this.analyser.connect(audioContext.destination);
 
-    this.bufferLength = this.analyser.frequencyBinCount;
-    this.dataArray = new Uint8Array(this.bufferLength);
+    if (connectToDestination) {
+      this.analyser.connect(audioContext.destination);
+    }
 
+    this._initializeBuffer();
     return this.analyser;
   }
 
   cleanup() {
     if (this.source) {
-      try {
-        this.source.disconnect();
-      } catch (e) {
-        // Ignore disconnect errors
-      }
+      this._safeDisconnect(this.source);
     }
     if (this.analyser) {
+      this._safeDisconnect(this.analyser);
+    }
+  }
+
+  _createAnalyser(audioContext) {
+    const analyser = audioContext.createAnalyser();
+    analyser.fftSize = FFT_SIZE;
+    return analyser;
+  }
+
+  _initializeBuffer() {
+    this.bufferLength = this.analyser.frequencyBinCount;
+    this.dataArray = new Uint8Array(this.bufferLength);
+  }
+
+  _safeDisconnect(node) {
+    if (node) {
       try {
-        this.analyser.disconnect();
-      } catch (e) {
+        node.disconnect();
+      } catch {
         // Ignore disconnect errors
       }
     }
-    this.source = null;
-    this.analyser = null;
-    this.dataArray = null;
-    this.bufferLength = null;
   }
 }
 

From cdbc0fae36f9cb98899a42e47b8c266672c58fe4 Mon Sep 17 00:00:00 2001
From: adityanandanx <nandanaditya985@gmail.com>
Date: Sun, 24 Nov 2024 15:08:02 +0530
Subject: [PATCH 11/11] fix(examples): disable button during pending and only
 accept .txt

---
 examples/quivr-whisper/static/app.js        | 3 +++
 examples/quivr-whisper/templates/index.html | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js
index 513acc88180e..5f9a7064c6cd 100644
--- a/examples/quivr-whisper/static/app.js
+++ b/examples/quivr-whisper/static/app.js
@@ -250,10 +250,12 @@ class RecordingHandler {
   async handleRecordingStop() {
     console.log("Processing recording...");
     recordBtn.dataset.pending = true;
+    recordBtn.disabled = true;
 
     const audioBlob = new Blob(state.chunks, { type: "audio/wav" });
     if (!fileInput.files.length) {
       recordBtn.dataset.pending = false;
+      recordBtn.disabled = false;
       alert("Please select a file.");
       return;
     }
@@ -299,6 +301,7 @@ class RecordingHandler {
     audioPlayback.onended = () => {
       this.audioAnalyzer.cleanup();
       recordBtn.dataset.pending = false;
+      recordBtn.disabled = false;
       state.isVisualizing = false;
     };
   }
diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html
index ef632b9afdc1..d1ae47eb08d4 100644
--- a/examples/quivr-whisper/templates/index.html
+++ b/examples/quivr-whisper/templates/index.html
@@ -20,7 +20,13 @@
     ></button>
     <div class="custom-file-input">
       <label for="fileInput">Choose a file</label>
-      <input type="file" name="fileInput" required id="fileInput" />
+      <input
+        type="file"
+        accept="text/plain"
+        name="fileInput"
+        required
+        id="fileInput"
+      />
       <span id="fileName">No file chosen</span>
       <button id="upload-btn" class="upload-btn">Upload</button>
     </div>