I am trying to do Text to Speech (TTS) using pyttsx3 and Speech to Text (STT) using Vosk (as I wanted offline). I have these two part in 2 separate threads, but they are not able to operate simultaneous. That is if I were to do acoustic feedback the converted speech is not picked-up. But if I use a separate device to generate speech then it can convert to Text. Any help is appreciated, Thank you
from vosk import Model, KaldiRecognizer
import pyaudio
import pyttsx3
import threading
import time
def text_to_speech():
my_text_to_speech = pyttsx3.init()
my_text_var = "start 0xFF1234"
for x in range(10):
my_text_to_speech.say(my_text_var)
my_text_to_speech.runAndWait()
def start_speech_to_text():
model = Model(r"C:\vosk-model-small-en-us-0.15")
recognizer = KaldiRecognizer(model, 16000)
mic = pyaudio.PyAudio()
stream = mic.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8192)
stream.start_stream()
for x in range(20):
data = stream.read(4096)
if recognizer.AcceptWaveform(data):
text = recognizer.Result()
print(f"' {text[14:-3]} '")
#-- Create the two threads
thread1_STT = threading.Thread(target=start_speech_to_text)
thread2_TTS = threading.Thread(target=text_to_speech)
#-- start threads
thread1_STT.start()
thread2_TTS.start()
#-- wait for threads to finish
thread1_STT.join()
thread2_TTS.join()