-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo-Dialog.py
52 lines (36 loc) · 2.02 KB
/
demo-Dialog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from IPython.display import Audio
import nltk # we'll use this to split into sentences
import numpy as np
from bark.generation import (
generate_text_semantic,
preload_models,
)
from bark.api import semantic_to_waveform
from bark import generate_audio, SAMPLE_RATE
from scipy.io.wavfile import write as write_wav
nltk.download('punkt')
preload_models(text_use_gpu=True,text_use_small=True,coarse_use_gpu=True,coarse_use_small=True,fine_use_gpu=True,fine_use_small=True,codec_use_gpu=True)
speaker_lookup = {"Samantha": "v2/en_speaker_9", "John": "v2/en_speaker_2"}
# Script generated by chat GPT
script = """
Samantha: Hey, have you heard about this new text-to-audio model called "Bark"?
John: No, I haven't. What's so special about it?
Samantha: Well, apparently it's the most realistic and natural-sounding text-to-audio model out there right now. People are saying it sounds just like a real person speaking.
John: Wow, that sounds amazing. How does it work?
Samantha: I think it uses advanced machine learning algorithms to analyze and understand the nuances of human speech, and then replicates those nuances in its own speech output.
John: That's pretty impressive. Do you think it could be used for things like audiobooks or podcasts?
Samantha: Definitely! In fact, I heard that some publishers are already starting to use Bark to create audiobooks. And I bet it would be great for podcasts too.
John: I can imagine. It would be like having your own personal voiceover artist.
Samantha: Exactly! I think Bark is going to be a game-changer in the world of text-to-audio technology."""
script = script.strip().split("\n")
script = [s.strip() for s in script if s]
script
pieces = []
silence = np.zeros(int(0.5*SAMPLE_RATE))
for line in script:
speaker, text = line.split(": ")
audio_array = generate_audio(text, history_prompt=speaker_lookup[speaker], )
pieces += [audio_array, silence.copy()]
write_wav("bark_generation.wav",rate=SAMPLE_RATE,data=np.concatenate(pieces))