Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Language portuguese #5

Open
rothbr opened this issue Feb 19, 2024 · 2 comments
Open

Language portuguese #5

rothbr opened this issue Feb 19, 2024 · 2 comments

Comments

@rothbr
Copy link

rothbr commented Feb 19, 2024

I would like to send audios speaking in Portuguese, I saw that Deepgram can transcribe it but I've already tried using a different model in main.py and it doesn't work, could you help me?

"""Main file for the Jarvis project"""
import os
from os import PathLike
from time import time
import asyncio
from typing import Union

from dotenv import load_dotenv
import openai
from deepgram import Deepgram
import pygame
from pygame import mixer
import elevenlabs

from record import speech_to_text

Load API keys

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
DEEPGRAM_API_KEY = "c60a9288752c18057da16e3c894b7ebbefa551ab"
elevenlabs.set_api_key(os.getenv("ELEVENLABS_API_KEY"))

Initialize APIs

gpt_client = openai.Client(api_key=OPENAI_API_KEY)
deepgram = Deepgram(DEEPGRAM_API_KEY)

mixer is a pygame module for playing audio

mixer.init()

Change the context if you want to change Jarvis' personality

context = "Você é Jarvis, assistente humano de Alex. Você é espirituoso e cheio de personalidade. Suas respostas devem ser limitadas a uma ou duas frases curtas."
conversation = {"Conversation": []}
RECORDING_PATH = "audio/recording.wav"

def request_gpt(prompt: str) -> str:
"""
Send a prompt to the GPT-3 API and return the response.

Args:
    - state: The current state of the app.
    - prompt: The prompt to send to the API.

Returns:
    The response from the API.
"""
response = gpt_client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": f"{prompt}",
        }
    ],
    model="gpt-3.5-turbo",
)
return response.choices[0].message.content

async def transcribe(file_name: Union[str, bytes, PathLike[str], PathLike[bytes]], language='pt-BR'):
"""
Transcribe audio using Deepgram API.

Args:
    - file_name: The name of the file to transcribe.
    - language: The language to detect and transcribe. Default is 'pt' for Portuguese.

Returns:
    The response from the API.
"""
with open(file_name, "rb") as audio:
    source = {"buffer": audio, "mimetype": "audio/wav"}
    params = {'model': 'nova-2-general', 'detect_language': 'true', 'language': language}
    response = await deepgram.transcription.prerecorded(source, parameters=params)
    detected_language = None
    if "alternatives" in response["results"]["channels"][0]:
        detected_language = response["results"]["channels"][0]["alternatives"][0].get("language_code")
    if detected_language is None or detected_language != language:
        params['language'] = language
        async with aiohttp.ClientSession() as session:
            response = await deepgram.transcription.prerecorded(source, parameters=params, session=session)
    return response["results"]["channels"][0]["alternatives"][0]["words"]

def log(log: str):
"""
Print and write to status.txt
"""
print(log)
with open("status.txt", "w") as f:
f.write(log)

if name == "main":
while True:
# Record audio
log("Listening...")
speech_to_text()
log("Done listening")

    # Transcribe audio
    current_time = time()
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    words = loop.run_until_complete(transcribe(RECORDING_PATH, language='pt-BR'))
    string_words = " ".join(word_dict.get("word") for word_dict in words if "word" in word_dict)
    with open("conv.txt", "a") as f:
        f.write(f"{string_words}\n")
    transcription_time = time() - current_time
    log(f"Finished transcribing in {transcription_time:.2f} seconds.")

    # Get response from GPT-3
    current_time = time()
    context += f"\nAlex: {string_words}\nJarvis: "
    response = request_gpt(context)
    context += response
    gpt_time = time() - current_time
    log(f"Finished generating response in {gpt_time:.2f} seconds.")

    # Convert response to audio
    current_time = time()
    audio = elevenlabs.generate(
        text=response, voice="Adam", model="eleven_monolingual_v1"
    )
    elevenlabs.save(audio, "audio/response.wav")
    audio_time = time() - current_time
    log(f"Finished generating audio in {audio_time:.2f} seconds.")

    # Play response
    log("Speaking...")
    sound = mixer.Sound("audio/response.wav")
    # Add response as a new line to conv.txt
    with open("conv.txt", "a") as f:
        f.write(f"{response}\n")
    sound.play()
    pygame.time.wait(int(sound.get_length() * 1000))
    print(f"\n --- USER: {string_words}\n --- JARVIS: {response}\n")
@AlexandreSajus
Copy link
Owner

This ticket is tough to read: what part of my code did you change, what errors are you getting, what do you need help with exactly?

@rothbr
Copy link
Author

rothbr commented Feb 23, 2024

Basically I tried to add a different language for Deepgram to transcribe, but it won't work

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants