From 8c6aec154a813d4b39519a1308b0e697e882c033 Mon Sep 17 00:00:00 2001 From: adityanandanx Date: Thu, 21 Nov 2024 00:18:08 +0530 Subject: [PATCH 01/11] feat(examples): basic functionality --- .vscode/settings.json | 3 +- examples/quivr-whisper/.gitignore | 1 + examples/quivr-whisper/app.py | 85 +++-- examples/quivr-whisper/pyproject.toml | 2 +- examples/quivr-whisper/requirements-dev.lock | 2 + examples/quivr-whisper/requirements.lock | 2 + examples/quivr-whisper/static/app.js | 319 +++++++++---------- examples/quivr-whisper/static/styles.css | 45 +++ examples/quivr-whisper/templates/index.html | 7 +- 9 files changed, 257 insertions(+), 209 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 86370d352832..bb7120ff5c99 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -44,5 +44,6 @@ "reportUnusedImport": "warning", "reportGeneralTypeIssues": "warning" }, - "makefile.configureOnOpen": false + "makefile.configureOnOpen": false, + "djlint.showInstallError": false } diff --git a/examples/quivr-whisper/.gitignore b/examples/quivr-whisper/.gitignore index 4c49bd78f1d0..727370b46a43 100644 --- a/examples/quivr-whisper/.gitignore +++ b/examples/quivr-whisper/.gitignore @@ -1 +1,2 @@ .env +uploads \ No newline at end of file diff --git a/examples/quivr-whisper/app.py b/examples/quivr-whisper/app.py index 1ae27eac2399..05401706b54c 100644 --- a/examples/quivr-whisper/app.py +++ b/examples/quivr-whisper/app.py @@ -4,37 +4,78 @@ import os import requests from dotenv import load_dotenv +from quivr_core import Brain +from quivr_core.rag.entities.config import RetrievalConfig from tempfile import NamedTemporaryFile +from werkzeug.utils import secure_filename +from asyncio import to_thread +import asyncio + + +UPLOAD_FOLDER = 'uploads' +ALLOWED_EXTENSIONS = {'txt'} + +os.makedirs(UPLOAD_FOLDER, exist_ok=True) app = Flask(__name__) +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER load_dotenv() -openai.api_key = os.getenv("OPENAI_API_KEY") - -quivr_token = os.getenv("QUIVR_API_KEY", "") -quivr_chat_id = os.getenv("QUIVR_CHAT_ID", "") -quivr_brain_id = os.getenv("QUIVR_BRAIN_ID", "") -quivr_url = ( - os.getenv("QUIVR_URL", "https://api.quivr.app") - + f"/chat/{quivr_chat_id}/question?brain_id={quivr_brain_id}" -) -headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {quivr_token}", -} +openai.api_key = os.getenv("OPENAI_API_KEY") +def allowed_file(filename): + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS @app.route("/") def index(): return render_template("index.html") -@app.route("/transcribe", methods=["POST"]) -def transcribe_audio(): +def run_in_event_loop(func, *args, **kwargs): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + if asyncio.iscoroutinefunction(func): + result = loop.run_until_complete(func(*args, **kwargs)) + else: + result = func(*args, **kwargs) + loop.close() + return result + + +@app.route('/ask', methods=['POST']) +async def ask(): + if 'file' not in request.files: + return "No file part", 400 + + file = request.files['file'] + + if file.filename == '': + return "No selected file", 400 + if not (file and file.filename and allowed_file(file.filename)): + return "Invalid file type", 400 + + filename = secure_filename(file.filename) + filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) + file.save(filepath) + + print("Uploading file...") + brain: Brain = await to_thread(run_in_event_loop, Brain.from_files, name="user_brain", file_paths=[filepath]) + + print(f"{filepath} saved to brain.") + + print("Speech to text...") audio_file = request.files["audio_data"] transcript = transcribe_audio_file(audio_file) - quivr_response = ask_quivr_question(transcript) - audio_base64 = synthesize_speech(quivr_response) + print("Transcript result: ", transcript) + + print("Getting response...") + quivr_response = await to_thread(run_in_event_loop, brain.ask, transcript) + + print("Text to speech...") + audio_base64 = synthesize_speech(quivr_response.answer) + + print("Done") return jsonify({"audio_base64": audio_base64}) @@ -55,16 +96,6 @@ def transcribe_audio_file(audio_file): return transcript -def ask_quivr_question(transcript): - response = requests.post(quivr_url, headers=headers, json={"question": transcript}) - if response.status_code == 200: - quivr_response = response.json().get("assistant") - return quivr_response - else: - print(f"Error from Quivr API: {response.status_code}, {response.text}") - return "Sorry, I couldn't understand that." - - def synthesize_speech(text): speech_response = openai.audio.speech.create( model="tts-1", voice="nova", input=text diff --git a/examples/quivr-whisper/pyproject.toml b/examples/quivr-whisper/pyproject.toml index 457e6c90e392..692d1df09e8c 100644 --- a/examples/quivr-whisper/pyproject.toml +++ b/examples/quivr-whisper/pyproject.toml @@ -6,7 +6,7 @@ authors = [ { name = "Stan Girard", email = "stan@quivr.app" } ] dependencies = [ - "flask>=3.1.0", + "flask[async]>=3.1.0", "openai>=1.54.5", "quivr-core>=0.0.24", ] diff --git a/examples/quivr-whisper/requirements-dev.lock b/examples/quivr-whisper/requirements-dev.lock index 8e93ec1b5ff3..901ea6e170b7 100644 --- a/examples/quivr-whisper/requirements-dev.lock +++ b/examples/quivr-whisper/requirements-dev.lock @@ -32,6 +32,8 @@ anyio==4.6.2.post1 # via httpx # via openai # via starlette +asgiref==3.8.1 + # via flask attrs==24.2.0 # via aiohttp backoff==2.2.1 diff --git a/examples/quivr-whisper/requirements.lock b/examples/quivr-whisper/requirements.lock index 8e93ec1b5ff3..901ea6e170b7 100644 --- a/examples/quivr-whisper/requirements.lock +++ b/examples/quivr-whisper/requirements.lock @@ -32,6 +32,8 @@ anyio==4.6.2.post1 # via httpx # via openai # via starlette +asgiref==3.8.1 + # via flask attrs==24.2.0 # via aiohttp backoff==2.2.1 diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js index 0d788544dc23..1732bcbc79cd 100644 --- a/examples/quivr-whisper/static/app.js +++ b/examples/quivr-whisper/static/app.js @@ -1,209 +1,174 @@ -const recordBtn = document.getElementById('record-btn'); -const audioVisualizer = document.getElementById('audio-visualizer'); -const audioPlayback = document.getElementById('audio-playback'); -const canvasCtx = audioVisualizer.getContext('2d'); +const recordBtn = document.getElementById("record-btn"); +const fileInput = document.getElementById("fileInput"); + +const audioVisualizer = document.getElementById("audio-visualizer"); +const audioPlayback = document.getElementById("audio-playback"); +const canvasCtx = audioVisualizer.getContext("2d"); -let isRecording = false; -let mediaRecorder; -let audioChunks = []; -let audioContext; let analyser; -let dataArray; let bufferLength; +let dataArray; let lastAudioLevel = 0; let silenceTimer; -recordBtn.addEventListener('click', toggleRecording); - -function toggleRecording() { - if (!isRecording) { - recordBtn.classList.add('hidden'); - audioVisualizer.classList.remove('hidden'); - startRecording(); - } else { - audioVisualizer.classList.add('hidden'); - stopRecording(); - } -} - -function drawWaveform() { - if (!analyser) return; - - requestAnimationFrame(drawWaveform); - - analyser.getByteTimeDomainData(dataArray); - - canvasCtx.fillStyle = 'rgb(255, 255, 255)'; - canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height); - - canvasCtx.lineWidth = 2; - canvasCtx.strokeStyle = 'rgb(0, 0, 0)'; - - canvasCtx.beginPath(); +let isRecording = false; - let sliceWidth = audioVisualizer.width * 1.0 / bufferLength; - let x = 0; +if (navigator.mediaDevices) { + const constraints = { audio: true }; + let chunks = []; + + navigator.mediaDevices + .getUserMedia(constraints) + .then((stream) => { + const mediaRecorder = new MediaRecorder(stream); + + const startRecording = () => { + mediaRecorder.start(); + console.log(mediaRecorder.state); + console.log("recorder started"); + // recordBtn.classList.add("hidden"); + audioVisualizer.classList.remove("hidden"); + + const audioContext = new (window.AudioContext || + window.webkitAudioContext)(); + analyser = audioContext.createAnalyser(); + const source = audioContext.createMediaStreamSource(stream); + + source.connect(analyser); + analyser.fftSize = 2048; + bufferLength = analyser.frequencyBinCount; + dataArray = new Uint8Array(bufferLength); + + drawWaveform(); + }; + + const stopRecording = () => { + mediaRecorder.stop(); + console.log(mediaRecorder.state); + console.log("recorder stopped"); + audioVisualizer.classList.add("hidden"); + }; + + recordBtn.onclick = () => { + if (mediaRecorder.state === "inactive") { + startRecording(); + } else if (mediaRecorder.state === "recording") { + stopRecording(); + } + }; - let sum = 0; + mediaRecorder.onstop = async (e) => { + console.log("STOPP"); - for (let i = 0; i < bufferLength; i++) { - let v = dataArray[i] / 128.0; - let y = v * audioVisualizer.height / 2; + // The mediaRecorder has stopped; now we can process the chunks + const audioBlob = new Blob(chunks, { type: "audio/wav" }); + const formData = new FormData(); - sum += v; + formData.append("audio_data", audioBlob); - if (i === 0) { - canvasCtx.moveTo(x, y); + // Append the file to the FormData object + if (fileInput.files.length > 0) { + formData.append("file", fileInput.files[0]); } else { - canvasCtx.lineTo(x, y); + alert("Please select a file."); + return; } - x += sliceWidth; - } - - canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2); - canvasCtx.stroke(); - - let currentAudioLevel = sum / bufferLength; - - if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) { - if (!silenceTimer) { - silenceTimer = setTimeout(stopRecording, 1000); - } - } else { - clearTimeout(silenceTimer); - silenceTimer = null; - } - - lastAudioLevel = currentAudioLevel; -} - -async function startRecording() { - audioChunks = []; - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - mediaRecorder = new MediaRecorder(stream); - mediaRecorder.ondataavailable = event => { - audioChunks.push(event.data); - }; - mediaRecorder.start(); - isRecording = true; - - audioContext = new (window.AudioContext || window.webkitAudioContext)(); - analyser = audioContext.createAnalyser(); - const source = audioContext.createMediaStreamSource(stream); - - source.connect(analyser); - analyser.fftSize = 2048; - bufferLength = analyser.frequencyBinCount; - dataArray = new Uint8Array(bufferLength); - - drawWaveform(); -} - -function stopRecording() { - mediaRecorder.stop(); - mediaRecorder.onstop = async () => { - // The mediaRecorder has stopped; now we can process the chunks - const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); - const formData = new FormData(); - formData.append('audio_data', audioBlob); - // Now we're sending the audio to the server and waiting for a response try { - const response = await fetch('/transcribe', { - method: 'POST', - body: formData - }); - const data = await response.json(); - - // Once we have the response, we can source the playback element and play it - audioPlayback.src = 'data:audio/wav;base64,' + data.audio_base64; - audioPlayback.classList.remove('hidden'); - audioVisualizer.classList.add('hidden'); // hide the visualizer while playing back the response - setupAIResponseVisualization(); - audioPlayback.onloadedmetadata = () => { - // When metadata is loaded, start playback - audioPlayback.play(); - visualizeAIResponse(); - }; - - // We only reset the UI after the audio has finished playing - // audioPlayback.onended = () => { - // resetUI(); - // }; + const response = await fetch("/ask", { + method: "POST", + body: formData, + }); + const data = await response.json(); + console.log(data); + + // Once we have the response, we can source the playback element and play it + audioPlayback.src = "data:audio/wav;base64," + data.audio_base64; + audioPlayback.classList.remove("hidden"); + audioVisualizer.classList.add("hidden"); // hide the visualizer while playing back the response + // setupAIResponseVisualization(); + audioPlayback.onloadedmetadata = () => { + // When metadata is loaded, start playback + audioPlayback.play(); + // visualizeAIResponse(); + }; + + // We only reset the UI after the audio has finished playing + // audioPlayback.onended = () => { + // resetUI(); + // }; } catch (error) { - console.error('Error during fetch/transcription:', error); - resetUI(); + console.error("Error during fetch/transcription:", error); + console.log(error); + + // resetUI(); } finally { - if (analyser) { - analyser.disconnect(); - analyser = null; - } - isRecording = false; + if (analyser) { + analyser.disconnect(); + analyser = null; + } + isRecording = false; } - }; -} -function resetUI() { - document.getElementById('record-btn').classList.remove('hidden'); - document.getElementById('audio-visualizer').classList.add('hidden'); - document.getElementById('audio-playback').classList.add('hidden'); - // Reset any other UI elements as necessary + }; + + mediaRecorder.ondataavailable = (e) => { + chunks.push(e.data); + }; + }) + .catch((err) => { + console.error(`The following error occurred: ${err}`); + }); } -function setupAIResponseVisualization() { - try { - // Create a new audio context for playback if it doesn't exist - if (!audioContext) { - audioContext = new (window.AudioContext || window.webkitAudioContext)(); - } - // Resume the audio context in case it's in a suspended state - audioContext.resume().then(() => { - analyser = audioContext.createAnalyser(); - const source = audioContext.createMediaElementSource(audioPlayback); - source.connect(analyser); - analyser.connect(audioContext.destination); - analyser.fftSize = 2048; - bufferLength = analyser.frequencyBinCount; - dataArray = new Uint8Array(bufferLength); - }); - } catch (error) { - console.error('Error setting up AI response visualization:', error); - } -} +function drawWaveform() { + if (!analyser) return; -function visualizeAIResponse() { - const draw = () => { - requestAnimationFrame(draw); + requestAnimationFrame(drawWaveform); - analyser.getByteTimeDomainData(dataArray); + analyser.getByteTimeDomainData(dataArray); - canvasCtx.fillStyle = 'rgb(255, 255, 255)'; - canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height); + canvasCtx.fillStyle = "rgb(255, 255, 255)"; + canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height); - canvasCtx.lineWidth = 2; - canvasCtx.strokeStyle = 'rgb(0, 0, 0)'; + canvasCtx.lineWidth = 2; + canvasCtx.strokeStyle = "rgb(0, 0, 0)"; - canvasCtx.beginPath(); + canvasCtx.beginPath(); - let sliceWidth = audioVisualizer.width * 1.0 / bufferLength; - let x = 0; + let sliceWidth = (audioVisualizer.width * 1.0) / bufferLength; + let x = 0; - for (let i = 0; i < bufferLength; i++) { - let v = dataArray[i] / 128.0; - let y = v * audioVisualizer.height / 2; + let sum = 0; - if (i === 0) { - canvasCtx.moveTo(x, y); - } else { - canvasCtx.lineTo(x, y); - } + for (let i = 0; i < bufferLength; i++) { + let v = dataArray[i] / 128.0; + let y = (v * audioVisualizer.height) / 2; - x += sliceWidth; - } + sum += v; + + if (i === 0) { + canvasCtx.moveTo(x, y); + } else { + canvasCtx.lineTo(x, y); + } + + x += sliceWidth; + } + + canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2); + canvasCtx.stroke(); - canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2); - canvasCtx.stroke(); - }; + let currentAudioLevel = sum / bufferLength; - draw(); -} \ No newline at end of file + if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) { + if (!silenceTimer) { + silenceTimer = setTimeout(stopRecording, 1000); + } + } else { + clearTimeout(silenceTimer); + silenceTimer = null; + } + + lastAudioLevel = currentAudioLevel; +} diff --git a/examples/quivr-whisper/static/styles.css b/examples/quivr-whisper/static/styles.css index b7a0d74d47f0..c5381b83edd6 100644 --- a/examples/quivr-whisper/static/styles.css +++ b/examples/quivr-whisper/static/styles.css @@ -1,3 +1,48 @@ +body { + color: #f4f4f4 +} + +.bg-background { + background-color: #252525; +} + +.paper { + background-color: #1f1f1f; +} + +.bg-primary { + background-color: #6142d4; +} + +/* +# Override default MUI light theme. (Check theme.ts) +[UI.theme.light] + background = "#fcfcfc" + paper = "#f8f8f8" + + [UI.theme.light.primary] + main = "#6142d4" + dark = "#6e53cf" + light = "#6e53cf30" + [UI.theme.light.text] + primary = "#1f1f1f" + secondary = "#818080" + +# Override default MUI dark theme. (Check theme.ts) +[UI.theme.dark] + background = "#252525" + paper = "#1f1f1f" + + [UI.theme.dark.primary] + main = "#6142d4" + dark = "#6e53cf" + light = "#6e53cf30" + [UI.theme.dark.text] + primary = "#f4f4f4" + secondary = "#c8c8c8" + +*/ + .loader { border: 4px solid #f3f3f3; border-radius: 50%; diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html index 6f508034c5e8..f4df5b887796 100644 --- a/examples/quivr-whisper/templates/index.html +++ b/examples/quivr-whisper/templates/index.html @@ -9,11 +9,12 @@ - +

Quivr.app

- Date: Thu, 21 Nov 2024 00:35:23 +0530 Subject: [PATCH 02/11] feat(examples): audio vis --- examples/quivr-whisper/static/app.js | 139 ++++++++++---------- examples/quivr-whisper/templates/index.html | 6 +- 2 files changed, 74 insertions(+), 71 deletions(-) diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js index 1732bcbc79cd..4a608aedb874 100644 --- a/examples/quivr-whisper/static/app.js +++ b/examples/quivr-whisper/static/app.js @@ -10,6 +10,8 @@ let bufferLength; let dataArray; let lastAudioLevel = 0; let silenceTimer; +let silenceThreshold = 0.05; // Adjustable silence threshold +let silenceDuration = 1500; // Milliseconds of silence before stopping let isRecording = false; @@ -22,11 +24,72 @@ if (navigator.mediaDevices) { .then((stream) => { const mediaRecorder = new MediaRecorder(stream); + const drawWaveform = () => { + if (!analyser) return; + + requestAnimationFrame(drawWaveform); + + analyser.getByteTimeDomainData(dataArray); + + canvasCtx.fillStyle = "rgb(255, 255, 255)"; + canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height); + + canvasCtx.lineWidth = 2; + canvasCtx.strokeStyle = "rgb(0, 0, 0)"; + + canvasCtx.beginPath(); + + let sliceWidth = (audioVisualizer.width * 1.0) / bufferLength; + let x = 0; + + let sum = 0; + let maxAmplitude = 0; + + for (let i = 0; i < bufferLength; i++) { + let v = dataArray[i] / 128.0 - 1; // Normalize to [-1, 1] + let y = (v * audioVisualizer.height) / 2; + + // Calculate absolute amplitude + sum += Math.abs(v); + maxAmplitude = Math.max(maxAmplitude, Math.abs(v)); + + if (i === 0) { + canvasCtx.moveTo(x, y + audioVisualizer.height / 2); + } else { + canvasCtx.lineTo(x, y + audioVisualizer.height / 2); + } + + x += sliceWidth; + } + + canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2); + canvasCtx.stroke(); + + // More robust silence detection + let averageAmplitude = sum / bufferLength; + + if ( + isRecording && + averageAmplitude < silenceThreshold && + maxAmplitude < silenceThreshold * 2 + ) { + if (!silenceTimer) { + silenceTimer = setTimeout(stopRecording, silenceDuration); + } + } else { + clearTimeout(silenceTimer); + silenceTimer = null; + } + + lastAudioLevel = averageAmplitude; + }; + const startRecording = () => { + chunks = []; // Reset chunks at start of recording mediaRecorder.start(); + isRecording = true; console.log(mediaRecorder.state); console.log("recorder started"); - // recordBtn.classList.add("hidden"); audioVisualizer.classList.remove("hidden"); const audioContext = new (window.AudioContext || @@ -43,10 +106,13 @@ if (navigator.mediaDevices) { }; const stopRecording = () => { - mediaRecorder.stop(); - console.log(mediaRecorder.state); - console.log("recorder stopped"); - audioVisualizer.classList.add("hidden"); + if (isRecording) { + mediaRecorder.stop(); + isRecording = false; + console.log(mediaRecorder.state); + console.log("recorder stopped due to silence"); + audioVisualizer.classList.add("hidden"); + } }; recordBtn.onclick = () => { @@ -87,28 +153,17 @@ if (navigator.mediaDevices) { audioPlayback.src = "data:audio/wav;base64," + data.audio_base64; audioPlayback.classList.remove("hidden"); audioVisualizer.classList.add("hidden"); // hide the visualizer while playing back the response - // setupAIResponseVisualization(); audioPlayback.onloadedmetadata = () => { // When metadata is loaded, start playback audioPlayback.play(); - // visualizeAIResponse(); }; - - // We only reset the UI after the audio has finished playing - // audioPlayback.onended = () => { - // resetUI(); - // }; } catch (error) { console.error("Error during fetch/transcription:", error); - console.log(error); - - // resetUI(); } finally { if (analyser) { analyser.disconnect(); analyser = null; } - isRecording = false; } }; @@ -120,55 +175,3 @@ if (navigator.mediaDevices) { console.error(`The following error occurred: ${err}`); }); } - -function drawWaveform() { - if (!analyser) return; - - requestAnimationFrame(drawWaveform); - - analyser.getByteTimeDomainData(dataArray); - - canvasCtx.fillStyle = "rgb(255, 255, 255)"; - canvasCtx.fillRect(0, 0, audioVisualizer.width, audioVisualizer.height); - - canvasCtx.lineWidth = 2; - canvasCtx.strokeStyle = "rgb(0, 0, 0)"; - - canvasCtx.beginPath(); - - let sliceWidth = (audioVisualizer.width * 1.0) / bufferLength; - let x = 0; - - let sum = 0; - - for (let i = 0; i < bufferLength; i++) { - let v = dataArray[i] / 128.0; - let y = (v * audioVisualizer.height) / 2; - - sum += v; - - if (i === 0) { - canvasCtx.moveTo(x, y); - } else { - canvasCtx.lineTo(x, y); - } - - x += sliceWidth; - } - - canvasCtx.lineTo(audioVisualizer.width, audioVisualizer.height / 2); - canvasCtx.stroke(); - - let currentAudioLevel = sum / bufferLength; - - if (isRecording && Math.abs(currentAudioLevel - lastAudioLevel) < 0.01) { - if (!silenceTimer) { - silenceTimer = setTimeout(stopRecording, 1000); - } - } else { - clearTimeout(silenceTimer); - silenceTimer = null; - } - - lastAudioLevel = currentAudioLevel; -} diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html index f4df5b887796..729e03ea4a6d 100644 --- a/examples/quivr-whisper/templates/index.html +++ b/examples/quivr-whisper/templates/index.html @@ -10,9 +10,9 @@ -

Quivr.app

-
- +

Quivr

+
+ + class="bg-background rounded-lg cursor-pointer">
From 29c9b284bd291a1b88e300ab982332ad412363b0 Mon Sep 17 00:00:00 2001 From: adityanandanx Date: Thu, 21 Nov 2024 01:42:36 +0530 Subject: [PATCH 04/11] fix(examples): dataArray null --- examples/quivr-whisper/static/app.js | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js index c3ed0cf3b3e6..711c3a540d3b 100644 --- a/examples/quivr-whisper/static/app.js +++ b/examples/quivr-whisper/static/app.js @@ -6,7 +6,7 @@ const audioPlayback = document.getElementById("audio-playback"); const canvasCtx = audioVisualizer.getContext("2d"); // Configuration -const SILENCE_THRESHOLD = 0.01; +const SILENCE_THRESHOLD = 128; // Adjusted for byte data (128 is middle) const SILENCE_DURATION = 1500; const FFT_SIZE = 2048; @@ -31,9 +31,8 @@ class AudioAnalyzer { this.analyser.fftSize = FFT_SIZE; source.connect(this.analyser); - // Change to Float32Array for time domain data this.bufferLength = this.analyser.frequencyBinCount; - this.dataArray = new Float32Array(this.bufferLength); + this.dataArray = new Uint8Array(this.bufferLength); // Changed to Uint8Array return this.analyser; } @@ -64,12 +63,12 @@ class Visualizer { } draw(currentAnalyser, onSilence) { - if (!currentAnalyser) return; + if (!currentAnalyser || this.analyzer.dataArray === null) return; requestAnimationFrame(() => this.draw(currentAnalyser, onSilence)); - // Use getFloatTimeDomainData instead of getByteTimeDomainData - currentAnalyser.getFloatTimeDomainData(this.analyzer.dataArray); + // Use getByteTimeDomainData instead of getFloatTimeDomainData + currentAnalyser.getByteTimeDomainData(this.analyzer.dataArray); // Clear canvas this.ctx.fillStyle = "#252525"; @@ -82,16 +81,14 @@ class Visualizer { const sliceWidth = (this.canvas.width * 1.0) / this.analyzer.bufferLength; let x = 0; let sum = 0; - let maxAmplitude = 0; // Draw waveform for (let i = 0; i < this.analyzer.bufferLength; i++) { - // Values are already normalized (-1 to 1), no need to normalize - const v = this.analyzer.dataArray[i]; - const y = (v * this.canvas.height) / 2 + this.canvas.height / 2; + // Scale byte data (0-255) to canvas height + const v = this.analyzer.dataArray[i] / 128.0; // normalize to 0-2 + const y = (v - 1) * (this.canvas.height / 2) + this.canvas.height / 2; - sum += Math.abs(v); - maxAmplitude = Math.max(maxAmplitude, Math.abs(v)); + sum += Math.abs(v - 1); // Calculate distance from center (128) if (i === 0) { this.ctx.moveTo(x, y); @@ -105,12 +102,13 @@ class Visualizer { this.ctx.lineTo(this.canvas.width, this.canvas.height / 2); this.ctx.stroke(); - // Check for silence during recording with proper thresholds + // Check for silence during recording with adjusted thresholds for byte data if (state.isRecording) { const averageAmplitude = sum / this.analyzer.bufferLength; - if (averageAmplitude < SILENCE_THRESHOLD) { + if (averageAmplitude < 0.1) { + // Adjusted threshold for normalized data // Reset silence timer if we detect sound - if (averageAmplitude > SILENCE_THRESHOLD / 2) { + if (averageAmplitude > 0.05) { clearTimeout(state.silenceTimer); state.silenceTimer = null; } else { From 0d3cd226c1588abfaadd6eb7789cc7799bb29990 Mon Sep 17 00:00:00 2001 From: adityanandanx Date: Thu, 21 Nov 2024 01:58:47 +0530 Subject: [PATCH 05/11] feat(examples): status update --- examples/quivr-whisper/static/app.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js index 711c3a540d3b..3a4fe1d52bb0 100644 --- a/examples/quivr-whisper/static/app.js +++ b/examples/quivr-whisper/static/app.js @@ -169,6 +169,9 @@ class RecordingHandler { ); } }); + + recordBtn.innerText = "Listening..."; + recordBtn.classList.add("processing"); } stopRecording() { @@ -181,6 +184,7 @@ class RecordingHandler { } async handleRecordingStop() { + recordBtn.innerText = "Processing..."; console.log("Processing recording..."); const audioBlob = new Blob(state.chunks, { type: "audio/wav" }); @@ -210,6 +214,7 @@ class RecordingHandler { const data = await response.json(); await this.handleResponse(data); + recordBtn.innerText = "Ask a question to Quivr"; } async handleResponse(data) { From b8c65009fe5a4a4c220ae84fceb1d095d7c4ee99 Mon Sep 17 00:00:00 2001 From: adityanandanx Date: Thu, 21 Nov 2024 22:23:12 +0530 Subject: [PATCH 06/11] feat(examples): ui improvement --- examples/quivr-whisper/static/app.js | 94 ++++++++++++---- examples/quivr-whisper/static/loader.svg | 1 + examples/quivr-whisper/static/mic-off.svg | 1 + examples/quivr-whisper/static/mic.svg | 1 + examples/quivr-whisper/static/styles.css | 114 ++++++++++++++++++-- examples/quivr-whisper/templates/index.html | 47 ++++---- 6 files changed, 210 insertions(+), 48 deletions(-) create mode 100644 examples/quivr-whisper/static/loader.svg create mode 100644 examples/quivr-whisper/static/mic-off.svg create mode 100644 examples/quivr-whisper/static/mic.svg diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js index 3a4fe1d52bb0..b9edf532de9d 100644 --- a/examples/quivr-whisper/static/app.js +++ b/examples/quivr-whisper/static/app.js @@ -1,10 +1,28 @@ // DOM Elements const recordBtn = document.getElementById("record-btn"); const fileInput = document.getElementById("fileInput"); +const fileName = document.getElementById("fileName"); + const audioVisualizer = document.getElementById("audio-visualizer"); const audioPlayback = document.getElementById("audio-playback"); const canvasCtx = audioVisualizer.getContext("2d"); +window.addEventListener("load", () => { + audioVisualizer.width = window.innerWidth; + audioVisualizer.height = window.innerHeight; +}); + +window.addEventListener("resize", (e) => { + audioVisualizer.width = window.innerWidth; + audioVisualizer.height = window.innerHeight; +}); + +fileInput.addEventListener("change", () => { + fileName.textContent = + fileInput.files.length > 0 ? fileInput.files[0].name : "No file chosen"; + fileName.classList.toggle("file-selected", fileInput.files.length > 0); +}); + // Configuration const SILENCE_THRESHOLD = 128; // Adjusted for byte data (128 is middle) const SILENCE_DURATION = 1500; @@ -24,6 +42,7 @@ class AudioAnalyzer { this.analyser = null; this.dataArray = null; this.bufferLength = null; + this.source = null; } setup(source, audioContext) { @@ -32,23 +51,56 @@ class AudioAnalyzer { source.connect(this.analyser); this.bufferLength = this.analyser.frequencyBinCount; - this.dataArray = new Uint8Array(this.bufferLength); // Changed to Uint8Array + this.dataArray = new Uint8Array(this.bufferLength); return this.analyser; } setupForPlayback(audioElement, audioContext) { - const source = audioContext.createMediaElementSource(audioElement); - const analyser = this.setup(source, audioContext); - analyser.connect(audioContext.destination); - return analyser; + // Disconnect existing source if it exists + if (this.source) { + try { + this.source.disconnect(); + } catch (e) { + // Ignore if already disconnected + } + } + + // Create a new source, ignoring previous connections + audioElement.pause(); + audioElement.currentTime = 0; + this.source = audioContext.createMediaElementSource(audioElement); + + this.analyser = audioContext.createAnalyser(); + this.analyser.fftSize = FFT_SIZE; + + // Connect the source to the analyser and then to destination + this.source.connect(this.analyser); + this.analyser.connect(audioContext.destination); + + this.bufferLength = this.analyser.frequencyBinCount; + this.dataArray = new Uint8Array(this.bufferLength); + + return this.analyser; } cleanup() { + if (this.source) { + try { + this.source.disconnect(); + } catch (e) { + // Ignore disconnect errors + } + } if (this.analyser) { - this.analyser.disconnect(); - this.analyser = null; + try { + this.analyser.disconnect(); + } catch (e) { + // Ignore disconnect errors + } } + this.source = null; + this.analyser = null; this.dataArray = null; this.bufferLength = null; } @@ -73,12 +125,13 @@ class Visualizer { // Clear canvas this.ctx.fillStyle = "#252525"; this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height); + if (!state.isRecording) return; this.ctx.lineWidth = 2; this.ctx.strokeStyle = "#6142d4"; this.ctx.beginPath(); - const sliceWidth = (this.canvas.width * 1.0) / this.analyzer.bufferLength; + const sliceWidth = (this.canvas.width * 1) / this.analyzer.bufferLength; let x = 0; let sum = 0; @@ -125,6 +178,7 @@ class RecordingHandler { this.mediaRecorder = null; this.audioAnalyzer = new AudioAnalyzer(); this.visualizer = new Visualizer(audioVisualizer, this.audioAnalyzer); + this.audioContext = null; } async initialize() { @@ -132,6 +186,9 @@ class RecordingHandler { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); this.mediaRecorder = new MediaRecorder(stream); this.setupRecordingEvents(); + if (!this.audioContext) + this.audioContext = new (window.AudioContext || + window.webkitAudioContext)(); } catch (err) { console.error(`Media device error: ${err}`); } @@ -152,13 +209,11 @@ class RecordingHandler { state.isRecording = true; this.mediaRecorder.start(); - const audioContext = new (window.AudioContext || - window.webkitAudioContext)(); - const source = audioContext.createMediaStreamSource( + const source = this.audioContext.createMediaStreamSource( this.mediaRecorder.stream ); - const analyser = this.audioAnalyzer.setup(source, audioContext); + const analyser = this.audioAnalyzer.setup(source, this.audioContext); audioVisualizer.classList.remove("hidden"); this.visualizer.draw(analyser, () => { @@ -170,7 +225,7 @@ class RecordingHandler { } }); - recordBtn.innerText = "Listening..."; + recordBtn.dataset.recording = true; recordBtn.classList.add("processing"); } @@ -180,15 +235,17 @@ class RecordingHandler { this.mediaRecorder.stop(); clearTimeout(state.silenceTimer); state.silenceTimer = null; + recordBtn.dataset.recording = false; } } async handleRecordingStop() { - recordBtn.innerText = "Processing..."; console.log("Processing recording..."); + recordBtn.dataset.pending = true; - const audioBlob = new Blob(state.chunks, { type: "audio/wav" }); + const audioBlob = new Blob(state.chunks, { type: "audio/mpeg" }); if (!fileInput.files.length) { + recordBtn.dataset.pending = false; alert("Please select a file."); return; } @@ -214,19 +271,15 @@ class RecordingHandler { const data = await response.json(); await this.handleResponse(data); - recordBtn.innerText = "Ask a question to Quivr"; } async handleResponse(data) { audioPlayback.src = "data:audio/wav;base64," + data.audio_base64; - const audioContext = new (window.AudioContext || - window.webkitAudioContext)(); - audioPlayback.onloadedmetadata = () => { const analyser = this.audioAnalyzer.setupForPlayback( audioPlayback, - audioContext + this.audioContext ); audioVisualizer.classList.remove("hidden"); @@ -236,6 +289,7 @@ class RecordingHandler { audioPlayback.onended = () => { this.audioAnalyzer.cleanup(); + recordBtn.dataset.pending = false; }; } } diff --git a/examples/quivr-whisper/static/loader.svg b/examples/quivr-whisper/static/loader.svg new file mode 100644 index 000000000000..1390bc478d8e --- /dev/null +++ b/examples/quivr-whisper/static/loader.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/examples/quivr-whisper/static/mic-off.svg b/examples/quivr-whisper/static/mic-off.svg new file mode 100644 index 000000000000..46d151fca867 --- /dev/null +++ b/examples/quivr-whisper/static/mic-off.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/examples/quivr-whisper/static/mic.svg b/examples/quivr-whisper/static/mic.svg new file mode 100644 index 000000000000..726d9f11b643 --- /dev/null +++ b/examples/quivr-whisper/static/mic.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/examples/quivr-whisper/static/styles.css b/examples/quivr-whisper/static/styles.css index c5381b83edd6..deb34d8f7f9c 100644 --- a/examples/quivr-whisper/static/styles.css +++ b/examples/quivr-whisper/static/styles.css @@ -1,17 +1,119 @@ -body { - color: #f4f4f4 +* { + box-sizing: border-box; + margin: 0; + padding: 0; } -.bg-background { + +body { + color: #f4f4f4; background-color: #252525; + display: flex; + gap: 1rem; + align-items: center; + flex-direction: column; + justify-content: center; + min-height: 100vh; } -.paper { - background-color: #1f1f1f; +.primary { + background-color: #6142d4; } -.bg-primary { +button { background-color: #6142d4; + border: none; + padding: .75rem 2rem; + border-radius: 0.5rem; + color: #f4f4f4; + cursor: pointer; +} + +canvas { + position: absolute; + width: 100%; + height: 100%; + top: 0; + left: 0; + background-color: #252525; + z-index: -1; +} + +.record-btn { + background-color: #f5f5f5; + border: none; + outline: none; + width: 256px; + height: 256px; + background-repeat: no-repeat; + background-position: center; + border-radius: 50%; + background-size: 50%; + transition: background-color 200ms ease-in, transform 200ms ease-out; +} + +.record-btn:hover { + background-color: #fff; + transform: scale(1.025); +} + +.record-btn:active { + background-color: #e2e2e2; + transform: scale(0.975); +} + +.record-btn[data-recording="true"] { + background-image: url("./mic.svg"); +} + +.record-btn[data-recording="false"] { + background-image: url("./mic-off.svg"); +} + +.record-btn[data-pending="true"] { + background-image: url("./loader.svg") !important; + animation: spin 1s linear infinite; +} + +.hidden { + display: none; +} + +.custom-file-input { + display: flex; + flex-direction: column; + align-items: center; + gap: 10px; +} + +.custom-file-input input[type="file"] { + display: none; +} + +.custom-file-input label { + background-color: #6142d4; + color: white; + padding: 8px 16px; + border-radius: 4px; + cursor: pointer; + font-size: 14px; + font-weight: bold; + transition: background-color 0.3s; +} + +.custom-file-input label:hover { + background-color: #6142d4; +} + +.custom-file-input span { + font-size: 14px; + color: #f4f4f4; +} + +/* Adjust appearance when a file is selected */ +.custom-file-input span.file-selected { + color: #ffffff; + font-weight: bold; } /* diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html index ad2ba7b665e7..fa18d2d9a27f 100644 --- a/examples/quivr-whisper/templates/index.html +++ b/examples/quivr-whisper/templates/index.html @@ -1,27 +1,30 @@ - + - - - - + + + Audio Interaction WebApp - - - + + - -

Quivr

-
- - - - + + +
+ + + No file chosen
+ + - - - \ No newline at end of file + + From f75d7448065f21191a0911a5c7e6d77a8f424505 Mon Sep 17 00:00:00 2001 From: adityanandanx Date: Fri, 22 Nov 2024 01:27:07 +0530 Subject: [PATCH 07/11] fix(examples): visualisation --- examples/quivr-whisper/static/app.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js index b9edf532de9d..dc7be1bae604 100644 --- a/examples/quivr-whisper/static/app.js +++ b/examples/quivr-whisper/static/app.js @@ -31,6 +31,7 @@ const FFT_SIZE = 2048; // State const state = { isRecording: false, + isVisualizing: false, chunks: [], silenceTimer: null, lastAudioLevel: 0, @@ -125,7 +126,7 @@ class Visualizer { // Clear canvas this.ctx.fillStyle = "#252525"; this.ctx.fillRect(0, 0, this.canvas.width, this.canvas.height); - if (!state.isRecording) return; + if (!state.isVisualizing) return; this.ctx.lineWidth = 2; this.ctx.strokeStyle = "#6142d4"; @@ -205,6 +206,7 @@ class RecordingHandler { } startRecording() { + state.isVisualizing = true; state.chunks = []; state.isRecording = true; this.mediaRecorder.start(); @@ -231,6 +233,7 @@ class RecordingHandler { stopRecording() { if (state.isRecording) { + state.isVisualizing = false; state.isRecording = false; this.mediaRecorder.stop(); clearTimeout(state.silenceTimer); @@ -285,11 +288,13 @@ class RecordingHandler { this.visualizer.draw(analyser, () => {}); audioPlayback.play(); + state.isVisualizing = true; }; audioPlayback.onended = () => { this.audioAnalyzer.cleanup(); recordBtn.dataset.pending = false; + state.isVisualizing = false; }; } } From f01fa47e06975638f58cf74524a7afdde611f175 Mon Sep 17 00:00:00 2001 From: adityanandanx Date: Fri, 22 Nov 2024 23:54:59 +0530 Subject: [PATCH 08/11] feat(example): separate upload route --- examples/quivr-whisper/app.py | 67 +++++++++++++++----- examples/quivr-whisper/pyproject.toml | 1 + examples/quivr-whisper/requirements-dev.lock | 5 ++ examples/quivr-whisper/requirements.lock | 5 ++ examples/quivr-whisper/static/app.js | 30 ++++++++- examples/quivr-whisper/static/styles.css | 3 +- examples/quivr-whisper/templates/index.html | 3 +- 7 files changed, 95 insertions(+), 19 deletions(-) diff --git a/examples/quivr-whisper/app.py b/examples/quivr-whisper/app.py index 05401706b54c..79031b9019d3 100644 --- a/examples/quivr-whisper/app.py +++ b/examples/quivr-whisper/app.py @@ -1,4 +1,4 @@ -from flask import Flask, render_template, request, jsonify +from flask import Flask, render_template, request, jsonify, session import openai import base64 import os @@ -12,20 +12,22 @@ import asyncio -UPLOAD_FOLDER = 'uploads' -ALLOWED_EXTENSIONS = {'txt'} +UPLOAD_FOLDER = "uploads" +ALLOWED_EXTENSIONS = {"txt"} os.makedirs(UPLOAD_FOLDER, exist_ok=True) app = Flask(__name__) -app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER +app.secret_key = "secret" +app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER +app.config["CACHE_TYPE"] = "SimpleCache" # In-memory cache for development +app.config["CACHE_DEFAULT_TIMEOUT"] = 60 * 60 # 1 hour cache timeout load_dotenv() openai.api_key = os.getenv("OPENAI_API_KEY") -def allowed_file(filename): - return '.' in filename and \ - filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS +brains = {} + @app.route("/") def index(): @@ -43,26 +45,59 @@ def run_in_event_loop(func, *args, **kwargs): return result -@app.route('/ask', methods=['POST']) -async def ask(): - if 'file' not in request.files: +def allowed_file(filename): + return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS + + +@app.route("/upload", methods=["POST"]) +async def upload_file(): + if "file" not in request.files: return "No file part", 400 - file = request.files['file'] + file = request.files["file"] - if file.filename == '': + if file.filename == "": return "No selected file", 400 if not (file and file.filename and allowed_file(file.filename)): return "Invalid file type", 400 filename = secure_filename(file.filename) - filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) + filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename) file.save(filepath) - print("Uploading file...") - brain: Brain = await to_thread(run_in_event_loop, Brain.from_files, name="user_brain", file_paths=[filepath]) + print(f"File uploaded and saved at: {filepath}") + + print("Creating brain instance...") + + brain: Brain = await to_thread( + run_in_event_loop, Brain.from_files, name="user_brain", file_paths=[filepath] + ) + + # Store brain instance in cache + session_id = session.sid if hasattr(session, "sid") else os.urandom(16).hex() + session["session_id"] = session_id + # cache.set(session_id, brain) # Store the brain instance in the cache + brains[session_id] = brain + print(f"Brain instance created and stored in cache for session ID: {session_id}") + + return jsonify({"message": "Brain created successfully"}) + + +@app.route("/ask", methods=["POST"]) +async def ask(): + if "audio_data" not in request.files: + return "Missing audio data", 400 + + # Retrieve the brain instance from the cache using the session ID + session_id = session.get("session_id") + if not session_id: + return "Session ID not found. Upload a file first.", 400 + + brain = brains.get(session_id) + if not brain: + return "Brain instance not found in dict. Upload a file first.", 400 - print(f"{filepath} saved to brain.") + print("Brain instance loaded from cache.") print("Speech to text...") audio_file = request.files["audio_data"] diff --git a/examples/quivr-whisper/pyproject.toml b/examples/quivr-whisper/pyproject.toml index 692d1df09e8c..3c48b90c6529 100644 --- a/examples/quivr-whisper/pyproject.toml +++ b/examples/quivr-whisper/pyproject.toml @@ -9,6 +9,7 @@ dependencies = [ "flask[async]>=3.1.0", "openai>=1.54.5", "quivr-core>=0.0.24", + "flask-caching>=2.3.0", ] readme = "README.md" requires-python = ">= 3.11" diff --git a/examples/quivr-whisper/requirements-dev.lock b/examples/quivr-whisper/requirements-dev.lock index 901ea6e170b7..716aa161b38c 100644 --- a/examples/quivr-whisper/requirements-dev.lock +++ b/examples/quivr-whisper/requirements-dev.lock @@ -44,6 +44,8 @@ beautifulsoup4==4.12.3 # via unstructured blinker==1.9.0 # via flask +cachelib==0.9.0 + # via flask-caching cachetools==5.5.0 # via google-auth certifi==2024.8.30 @@ -114,6 +116,9 @@ filetype==1.2.0 # via llama-index-core # via unstructured flask==3.1.0 + # via flask-caching + # via quivr-whisper +flask-caching==2.3.0 # via quivr-whisper flatbuffers==24.3.25 # via onnxruntime diff --git a/examples/quivr-whisper/requirements.lock b/examples/quivr-whisper/requirements.lock index 901ea6e170b7..716aa161b38c 100644 --- a/examples/quivr-whisper/requirements.lock +++ b/examples/quivr-whisper/requirements.lock @@ -44,6 +44,8 @@ beautifulsoup4==4.12.3 # via unstructured blinker==1.9.0 # via flask +cachelib==0.9.0 + # via flask-caching cachetools==5.5.0 # via google-auth certifi==2024.8.30 @@ -114,6 +116,9 @@ filetype==1.2.0 # via llama-index-core # via unstructured flask==3.1.0 + # via flask-caching + # via quivr-whisper +flask-caching==2.3.0 # via quivr-whisper flatbuffers==24.3.25 # via onnxruntime diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js index dc7be1bae604..b6ac04094007 100644 --- a/examples/quivr-whisper/static/app.js +++ b/examples/quivr-whisper/static/app.js @@ -1,6 +1,7 @@ // DOM Elements const recordBtn = document.getElementById("record-btn"); const fileInput = document.getElementById("fileInput"); +const fileInputContainer = document.querySelector(".custom-file-input"); const fileName = document.getElementById("fileName"); const audioVisualizer = document.getElementById("audio-visualizer"); @@ -246,7 +247,7 @@ class RecordingHandler { console.log("Processing recording..."); recordBtn.dataset.pending = true; - const audioBlob = new Blob(state.chunks, { type: "audio/mpeg" }); + const audioBlob = new Blob(state.chunks, { type: "audio/wav" }); if (!fileInput.files.length) { recordBtn.dataset.pending = false; alert("Please select a file."); @@ -299,6 +300,33 @@ class RecordingHandler { } } +const uploadFile = async (e) => { + e.preventDefault(); + const file = fileInput.files[0]; + + if (!file) { + alert("Please select a file."); + return; + } + const formData = new FormData(); + formData.append("file", file); + try { + await fetch("/upload", { + method: "POST", + body: formData, + }); + recordBtn.classList.remove("hidden"); + fileInputContainer.classList.add("hidden"); + } catch (error) { + recordBtn.classList.add("hidden"); + fileInputContainer.classList.remove("hidden"); + console.error("Error uploading file:", error); + } +}; + +const uploadBtn = document.getElementById("upload-btn"); +uploadBtn.addEventListener("click", uploadFile); + // Main initialization async function initializeApp() { if (!navigator.mediaDevices) { diff --git a/examples/quivr-whisper/static/styles.css b/examples/quivr-whisper/static/styles.css index deb34d8f7f9c..2a51e32321b3 100644 --- a/examples/quivr-whisper/static/styles.css +++ b/examples/quivr-whisper/static/styles.css @@ -76,7 +76,8 @@ canvas { } .hidden { - display: none; + display: none !important; + visibility: hidden; } .custom-file-input { diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html index fa18d2d9a27f..ef632b9afdc1 100644 --- a/examples/quivr-whisper/templates/index.html +++ b/examples/quivr-whisper/templates/index.html @@ -14,7 +14,7 @@ @@ -22,6 +22,7 @@ No file chosen +
From c259070046c2bbe9b0b72d683bd11c157220f5e8 Mon Sep 17 00:00:00 2001 From: adityanandanx Date: Sat, 23 Nov 2024 00:06:36 +0530 Subject: [PATCH 09/11] style(example): uploading state --- examples/quivr-whisper/static/app.js | 2 ++ examples/quivr-whisper/static/styles.css | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js index b6ac04094007..04a9f34235d0 100644 --- a/examples/quivr-whisper/static/app.js +++ b/examples/quivr-whisper/static/app.js @@ -301,6 +301,7 @@ class RecordingHandler { } const uploadFile = async (e) => { + uploadBtn.innerText = "Uploading File..."; e.preventDefault(); const file = fileInput.files[0]; @@ -321,6 +322,7 @@ const uploadFile = async (e) => { recordBtn.classList.add("hidden"); fileInputContainer.classList.remove("hidden"); console.error("Error uploading file:", error); + uploadBtn.innerText = "Upload Failed. Try again"; } }; diff --git a/examples/quivr-whisper/static/styles.css b/examples/quivr-whisper/static/styles.css index 2a51e32321b3..e250adda1028 100644 --- a/examples/quivr-whisper/static/styles.css +++ b/examples/quivr-whisper/static/styles.css @@ -92,7 +92,7 @@ canvas { } .custom-file-input label { - background-color: #6142d4; + border: solid 2px #6142d4; color: white; padding: 8px 16px; border-radius: 4px; From 9a262e4355b190288845d8f2d3771ef6c98dbad1 Mon Sep 17 00:00:00 2001 From: adityanandanx Date: Sun, 24 Nov 2024 15:04:08 +0530 Subject: [PATCH 10/11] fix(examples): consecutive questions --- examples/quivr-whisper/static/app.js | 74 +++++++++++++++------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js index 04a9f34235d0..513acc88180e 100644 --- a/examples/quivr-whisper/static/app.js +++ b/examples/quivr-whisper/static/app.js @@ -41,70 +41,74 @@ const state = { // Audio Analysis class AudioAnalyzer { constructor() { + this.reset(); + } + + reset() { this.analyser = null; this.dataArray = null; this.bufferLength = null; this.source = null; + this.cleanup(); } setup(source, audioContext) { - this.analyser = audioContext.createAnalyser(); - this.analyser.fftSize = FFT_SIZE; - source.connect(this.analyser); + this.cleanup(); - this.bufferLength = this.analyser.frequencyBinCount; - this.dataArray = new Uint8Array(this.bufferLength); + this.analyser = this._createAnalyser(audioContext); + source.connect(this.analyser); + this._initializeBuffer(); return this.analyser; } - setupForPlayback(audioElement, audioContext) { - // Disconnect existing source if it exists - if (this.source) { - try { - this.source.disconnect(); - } catch (e) { - // Ignore if already disconnected - } + setupForPlayback(audioElement, audioContext, connectToDestination = true) { + // Reuse existing MediaElementSourceNode if it already exists for this audio element + if (!this.source || this.source.mediaElement !== audioElement) { + this.cleanup(); // Ensure any previous connections are cleaned up + this.source = audioContext.createMediaElementSource(audioElement); } - // Create a new source, ignoring previous connections - audioElement.pause(); - audioElement.currentTime = 0; - this.source = audioContext.createMediaElementSource(audioElement); - - this.analyser = audioContext.createAnalyser(); - this.analyser.fftSize = FFT_SIZE; + this.analyser = this._createAnalyser(audioContext); - // Connect the source to the analyser and then to destination this.source.connect(this.analyser); - this.analyser.connect(audioContext.destination); - this.bufferLength = this.analyser.frequencyBinCount; - this.dataArray = new Uint8Array(this.bufferLength); + if (connectToDestination) { + this.analyser.connect(audioContext.destination); + } + this._initializeBuffer(); return this.analyser; } cleanup() { if (this.source) { - try { - this.source.disconnect(); - } catch (e) { - // Ignore disconnect errors - } + this._safeDisconnect(this.source); } if (this.analyser) { + this._safeDisconnect(this.analyser); + } + } + + _createAnalyser(audioContext) { + const analyser = audioContext.createAnalyser(); + analyser.fftSize = FFT_SIZE; + return analyser; + } + + _initializeBuffer() { + this.bufferLength = this.analyser.frequencyBinCount; + this.dataArray = new Uint8Array(this.bufferLength); + } + + _safeDisconnect(node) { + if (node) { try { - this.analyser.disconnect(); - } catch (e) { + node.disconnect(); + } catch { // Ignore disconnect errors } } - this.source = null; - this.analyser = null; - this.dataArray = null; - this.bufferLength = null; } } From cdbc0fae36f9cb98899a42e47b8c266672c58fe4 Mon Sep 17 00:00:00 2001 From: adityanandanx Date: Sun, 24 Nov 2024 15:08:02 +0530 Subject: [PATCH 11/11] fix(examples): disable button during pending and only accept .txt --- examples/quivr-whisper/static/app.js | 3 +++ examples/quivr-whisper/templates/index.html | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/examples/quivr-whisper/static/app.js b/examples/quivr-whisper/static/app.js index 513acc88180e..5f9a7064c6cd 100644 --- a/examples/quivr-whisper/static/app.js +++ b/examples/quivr-whisper/static/app.js @@ -250,10 +250,12 @@ class RecordingHandler { async handleRecordingStop() { console.log("Processing recording..."); recordBtn.dataset.pending = true; + recordBtn.disabled = true; const audioBlob = new Blob(state.chunks, { type: "audio/wav" }); if (!fileInput.files.length) { recordBtn.dataset.pending = false; + recordBtn.disabled = false; alert("Please select a file."); return; } @@ -299,6 +301,7 @@ class RecordingHandler { audioPlayback.onended = () => { this.audioAnalyzer.cleanup(); recordBtn.dataset.pending = false; + recordBtn.disabled = false; state.isVisualizing = false; }; } diff --git a/examples/quivr-whisper/templates/index.html b/examples/quivr-whisper/templates/index.html index ef632b9afdc1..d1ae47eb08d4 100644 --- a/examples/quivr-whisper/templates/index.html +++ b/examples/quivr-whisper/templates/index.html @@ -20,7 +20,13 @@ >
- + No file chosen