Skip to content

Commit

Permalink
refactor: update model name and improve media handling logic
Browse files Browse the repository at this point in the history
- Changed MODEL_NAME to "gemini-1.5-flash-002" in Config.py
- Added new variable for audio in setup.js
- Refactored frame and audio upload logic in setup.js
- Improved audio playback control during speech detection
- Removed unnecessary blank lines in main.py
  • Loading branch information
IRedDragonICY committed Oct 11, 2024
1 parent f15bebf commit 0a00d49
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 22 deletions.
2 changes: 1 addition & 1 deletion src/Config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class Config:
MODEL_NAME = "gemini-1.5-pro-exp-0827"
MODEL_NAME = "gemini-1.5-flash-002"
HARM_CATEGORIES = ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"HARM_CATEGORY_DANGEROUS_CONTENT"]
BLOCK_NONE = "BLOCK_NONE"
Expand Down
31 changes: 23 additions & 8 deletions src/app/js/setup.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import ModelController from './model.js';
let audioPlaying = false;
let isProcessing = false;
let chunks = [];
let audio;

const stream = await navigator.mediaDevices.getUserMedia({ video: true, audio: true });
const video = document.createElement('video');
Expand All @@ -28,10 +29,16 @@ import ModelController from './model.js';
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
context.drawImage(video, 0, 0, canvas.width, canvas.height);
canvas.toBlob(blob => blob && fetch('/api/upload_frame', {
method: 'POST',
body: new FormData().append('image', blob, 'frame.jpg')
}), 'image/jpeg');
canvas.toBlob(blob => {
if (blob) {
const formData = new FormData();
formData.append('image', blob, 'frame.jpg');
fetch('/api/upload_frame', {
method: 'POST',
body: formData
});
}
}, 'image/jpeg');
}
setTimeout(captureFrame, 2500);
})();
Expand All @@ -42,9 +49,11 @@ import ModelController from './model.js';
mediaRecorder.addEventListener('stop', () => {
const blob = new Blob(chunks, { type: 'audio/wav' });
chunks = [];
const formData = new FormData();
formData.append('audio', blob, 'audio.wav');
fetch('/api/upload_audio', {
method: 'POST',
body: new FormData().append('audio', blob, 'audio.wav')
body: formData
}).then(() => {
statusDiv.textContent = "";
isProcessing = false;
Expand All @@ -53,7 +62,7 @@ import ModelController from './model.js';
await new Promise(r => setTimeout(r, 500));
}
audioPlaying = true;
const audio = new Audio(audioLink);
audio = new Audio(audioLink);
audio.addEventListener('ended', () => {
fetch('/api/reset_audio_status', { method: 'POST' });
audioPlaying = false;
Expand All @@ -77,13 +86,19 @@ import ModelController from './model.js';

vad.MicVAD.new({
onSpeechStart: () => {
if (!isProcessing && !audioPlaying) {
if (audioPlaying) {
audio.pause();
audio.currentTime = 0;
fetch('/api/reset_audio_status', { method: 'POST' });
audioPlaying = false;
}
if (!isProcessing && mediaRecorder.state !== 'recording') {
mediaRecorder.start();
statusDiv.textContent = "Listening...";
}
},
onSpeechEnd: () => {
if (!isProcessing && !audioPlaying) {
if (mediaRecorder.state === 'recording') {
mediaRecorder.stop();
isProcessing = true;
statusDiv.textContent = "Processing...";
Expand Down
15 changes: 2 additions & 13 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@
import cv2
import numpy as np
import uvicorn

from pyngrok import ngrok

logging.disable(logging.CRITICAL)


class ServerApp:
def __init__(self):
self.app = FastAPI()
Expand All @@ -32,7 +30,6 @@ def setup_routes_and_middlewares(self):
allow_methods=["*"],
allow_headers=["*"]
)

directories = {
"/app": "app",
"/assets": "app/assets",
Expand All @@ -43,7 +40,6 @@ def setup_routes_and_middlewares(self):
}
for mount_point, directory in directories.items():
self.app.mount(mount_point, StaticFiles(directory=directory), name=mount_point.strip("/"))

self.app.get("/")(self.index)
self.app.get("/api/audio_status")(self.get_audio_status)
self.app.post("/api/reset_audio_status")(self.reset_audio_status)
Expand Down Expand Up @@ -88,20 +84,15 @@ async def upload_audio(self, audio: UploadFile = File(...)):
async def start_ngrok(self, api_key: str = Form(...)):
if not self.check_internet_connection():
return JSONResponse(content={"message": "Tidak ada koneksi internet."}, status_code=500)

if self.ngrok_process:
return JSONResponse(content={"message": "Ngrok sudah berjalan.", "public_url": self.public_url},
status_code=200)

return JSONResponse(content={"message": "Ngrok sudah berjalan.", "public_url": self.public_url}, status_code=200)
try:
ngrok.set_auth_token(api_key)
tunnel = ngrok.connect(8000)
self.public_url = tunnel.public_url
self.ngrok_process = ngrok.get_ngrok_process()
threading.Thread(target=self.ngrok_process.proc.wait).start()

return JSONResponse(content={"message": "Ngrok berhasil dimulai.", "public_url": self.public_url},
status_code=200)
return JSONResponse(content={"message": "Ngrok berhasil dimulai.", "public_url": self.public_url}, status_code=200)
except Exception as e:
logging.error(f"Error starting ngrok: {e}")
return JSONResponse(content={"message": f"Error starting ngrok: {str(e)}"}, status_code=500)
Expand All @@ -116,7 +107,6 @@ async def stop_ngrok(self):
except Exception as e:
logging.error(f"Error stopping ngrok: {e}")
return JSONResponse(content={"message": f"Error stopping ngrok: {str(e)}"}, status_code=500)

return JSONResponse(content={"message": "Ngrok tidak berjalan."}, status_code=400)

@staticmethod
Expand All @@ -135,7 +125,6 @@ def run(self):
def open_browser():
webbrowser.open_new("http://localhost:8000")


if __name__ == "__main__":
server_app = ServerApp()
server_app.run()

0 comments on commit 0a00d49

Please sign in to comment.