Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Hindi voice assistant #892

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions Hindi Voice Assistant/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import streamlit as st
import os
from utils import get_answer, text_to_speech, autoplay_audio, speech_to_text
from audio_recorder_streamlit import audio_recorder
from streamlit_float import *

float_init()

def initialize_session_state():
if "messages" not in st.session_state:
st.session_state.messages = [
{"role": "assistant",
"content": "हेल्लो, मैं आपकी कैसे मदद करूं?"}]

initialize_session_state()

st.title("Hindi Voice Assistant")

footer_container = st.container()
with footer_container:
audio_bytes = audio_recorder()


for message in st.session_state.messages:
with st.chat_message(message['role']):
st.write(message['content'])

# Take input from user, convert speech to text, give that text as input to model and display the chat
if audio_bytes:
# write the audio bytes to a file
with st.spinner("Transcribing..."):
webm_file_path = "temp_audio.mp3"
with open(webm_file_path,"wb") as f:
f.write(audio_bytes)

transcript = speech_to_text(webm_file_path)
if transcript:
st.session_state.messages.append({'role': 'user', 'content': transcript})

with st.chat_message('user'):
st.write(transcript)
os.remove(webm_file_path)

# If the last message is not from assistant, than we have to response
if st.session_state.messages[-1]['role'] != 'assistant':
with st.chat_message('assistant'):
with st.spinner("Thinking...."):
final_response = get_answer(st.session_state.messages)

with st.spinner("Generating audio response...."):
audio_file = text_to_speech(final_response)
autoplay_audio(audio_file)

st.write(final_response)
st.session_state.messages.append({
"role":"assistant",
"content": final_response
})

os.remove(audio_file)

footer_container.float("bottom: 0rem;")
6 changes: 6 additions & 0 deletions Hindi Voice Assistant/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
streamlit
groq
streamlit_float
audio_recorder_streamlit
dotenv
gtts
57 changes: 57 additions & 0 deletions Hindi Voice Assistant/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from groq import Groq
import streamlit as st
import base64
import os
from dotenv import load_dotenv, dotenv_values
from gtts import gTTS
import tempfile

load_dotenv()

client = Groq(api_key=os.getenv("API_key"),)

def speech_to_text(audio):
with open(audio, 'rb') as audio_file:
transcription = client.audio.transcriptions.create(
file= audio_file,
model = "whisper-large-v3",
language='hi',
response_format='text'
)
return transcription

def get_answer(message):
system_message = [
{
"role" : "system",
"content": """You are a helpfull AI chatbot, that answers questions, in Hindi language, asked by User.
1. You must Avoid discussing sensitive, offensive, or harmful content. Refrain from engaging in any form of discrimination, harassment, or inappropriate behavior.
3. If the user expresses gratitude or indicates the end of the conversation, respond with a polite farewell.
"""
}
]
messages = system_message + message

response = client.chat.completions.create(
model='llama3-8b-8192',
messages = messages
)
return response.choices[0].message.content


def text_to_speech(input_text):
tts = gTTS(input_text, lang='hi')
webm_file_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
tts.save(webm_file_path)
return webm_file_path

def autoplay_audio(file_path):
with open(file_path, 'rb') as f:
data = f.read()
b64 = base64.b64encode(data).decode('utf-8')
md = f"""
<audio autoplay>
<source src='data:audio/mp3;base64,{b64}' type='audio/mp3'>
</audio>
"""
st.markdown(md, unsafe_allow_html=True)
Loading