OpenAI announced that they will have app for Mac PCs but no info about the windows, I wanted to try using voice command in Windows, it is not working, I tried to do it using Python, I don’t know why but it is still an issue, anyone has tried this before?

Here are some of my codes, if you notice bugs, please let me know

import openai
import speech_recognition as sr
import pyttsx3

Initialize the recognizer

recognizer = sr.Recognizer()
engine = pyttsx3.init()

Set your OpenAI API key

openai.api_key = ‘REMOVED FOR SECURITY PURPOSES’

def recognize_speech():
with sr.Microphone() as source:
print(“Say something…”)
audio = recognizer.listen(source)
try:
text = recognizer.recognize_google(audio)
print(f"You said: {text}")
return text
except sr.UnknownValueError:
print(“Sorry, I did not understand that.”)
return None

def chat_with_gpt(prompt):
response = openai.Completion.create(
engine=“davinci-codex”,
prompt=prompt,
max_tokens=150
)
return response.choices[0].text.strip()

while True:
user_input = recognize_speech()
if user_input:
response = chat_with_gpt(user_input)
print(f"ChatGPT: {response}")
engine.say(response)
engine.runAndWait()

Well, I believe that you wanted to create something like that. I used pygame to speed up the process of using sound on my machine, because playsound was not working for me for some reason and I didn’t have energy and time to play with it.

If you have time, you can also add a feature that is automatically detecting when user is talking.

import os
from pathlib import Path
from openai import OpenAI
from dotenv import load_dotenv
from pygame import mixer
import sounddevice as sd
import soundfile as sf

load_dotenv()

client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
)

speech_file_path = Path(__file__).parent / "speech.mp3"
recorded_file_path = Path(__file__).parent / "recorded.wav"

def generate_speech(text, output_path):
    response = client.audio.speech.create(
        model="tts-1",
        voice="nova",
        input=text
    )
    with open(output_path, "wb") as file:
        file.write(response.content)
    print(f"Audio saved to {output_path}")

def play_audio(file_path):
    mixer.init()
    mixer.music.load(str(file_path))
    mixer.music.play()
    while mixer.music.get_busy():
        continue

def record_audio(duration, output_path):
    sample_rate = 44100
    print("Recording...")
    recording = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=2)
    sd.wait()
    sf.write(output_path, recording, sample_rate)
    print(f"Recording saved to {output_path}")

def transcribe_audio(file_path):
    with open(file_path, "rb") as audio_file:
        transcription = client.audio.transcriptions.create(
            model="whisper-1", 
            file=audio_file,
            response_format="json"
        )
    print(transcription)
    return transcription.text

def generate_chat_response(messages):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages
    )
    return response.choices[0].message.content

def interaction_loop():
    conversation = [
        {"role": "system", "content": "You are a helpful assistant."}
    ]

    while True:
        record_duration = 10
        record_audio(record_duration, recorded_file_path)

        transcribed_text = transcribe_audio(recorded_file_path)
        print(f"You said: {transcribed_text}")

        if "exit" in transcribed_text.lower():
            print("Exiting the interaction loop.")
            break

        conversation.append({"role": "user", "content": transcribed_text})
        response_text = generate_chat_response(conversation)
        conversation.append({"role": "assistant", "content": response_text})

        print(f"AI response: {response_text}")

        generate_speech(response_text, speech_file_path)
        play_audio(speech_file_path)

interaction_loop()
1 Like