OpenAI announced that they will have app for Mac PCs but no info about the windows, I wanted to try using voice command in Windows, it is not working, I tried to do it using Python, I don’t know why but it is still an issue, anyone has tried this before?
Here are some of my codes, if you notice bugs, please let me know
import openai
import speech_recognition as sr
import pyttsx3
Initialize the recognizer
recognizer = sr.Recognizer()
engine = pyttsx3.init()
Set your OpenAI API key
openai.api_key = ‘REMOVED FOR SECURITY PURPOSES’
def recognize_speech():
with sr.Microphone() as source:
print(“Say something…”)
audio = recognizer.listen(source)
try:
text = recognizer.recognize_google(audio)
print(f"You said: {text}")
return text
except sr.UnknownValueError:
print(“Sorry, I did not understand that.”)
return None
def chat_with_gpt(prompt):
response = openai.Completion.create(
engine=“davinci-codex”,
prompt=prompt,
max_tokens=150
)
return response.choices[0].text.strip()
while True:
user_input = recognize_speech()
if user_input:
response = chat_with_gpt(user_input)
print(f"ChatGPT: {response}")
engine.say(response)
engine.runAndWait()
Well, I believe that you wanted to create something like that. I used pygame
to speed up the process of using sound on my machine, because playsound
was not working for me for some reason and I didn’t have energy and time to play with it.
If you have time, you can also add a feature that is automatically detecting when user is talking.
import os
from pathlib import Path
from openai import OpenAI
from dotenv import load_dotenv
from pygame import mixer
import sounddevice as sd
import soundfile as sf
load_dotenv()
client = OpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
)
speech_file_path = Path(__file__).parent / "speech.mp3"
recorded_file_path = Path(__file__).parent / "recorded.wav"
def generate_speech(text, output_path):
response = client.audio.speech.create(
model="tts-1",
voice="nova",
input=text
)
with open(output_path, "wb") as file:
file.write(response.content)
print(f"Audio saved to {output_path}")
def play_audio(file_path):
mixer.init()
mixer.music.load(str(file_path))
mixer.music.play()
while mixer.music.get_busy():
continue
def record_audio(duration, output_path):
sample_rate = 44100
print("Recording...")
recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=2)
sd.wait()
sf.write(output_path, recording, sample_rate)
print(f"Recording saved to {output_path}")
def transcribe_audio(file_path):
with open(file_path, "rb") as audio_file:
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="json"
)
print(transcription)
return transcription.text
def generate_chat_response(messages):
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages
)
return response.choices[0].message.content
def interaction_loop():
conversation = [
{"role": "system", "content": "You are a helpful assistant."}
]
while True:
record_duration = 10
record_audio(record_duration, recorded_file_path)
transcribed_text = transcribe_audio(recorded_file_path)
print(f"You said: {transcribed_text}")
if "exit" in transcribed_text.lower():
print("Exiting the interaction loop.")
break
conversation.append({"role": "user", "content": transcribed_text})
response_text = generate_chat_response(conversation)
conversation.append({"role": "assistant", "content": response_text})
print(f"AI response: {response_text}")
generate_speech(response_text, speech_file_path)
play_audio(speech_file_path)
interaction_loop()
1 Like