I am trying to implement the Open AI realtime API with Expo mobile app using react-native-webrtc-web-shim
library. The functionality works. However, whenever the audio plays on the iPhone, it is out of the top speaker of the device. I understand this is to stop a possible feedback loop, but I need the audio to work without the user having to raise the phone to their ear. Here is the code I am using:
import {mediaDevices, RTCPeerConnection, MediaStream, RTCView} from 'react-native-webrtc-web-shim';
...
const [dataChannel, setDataChannel] = useState<null | ReturnType<
RTCPeerConnection['createDataChannel']
>>(null);
const peerConnection = useRef<null | RTCPeerConnection>(null);
const [localMediaStream, setLocalMediaStream] = useState<null | MediaStream>(
null
);
const remoteMediaStream = useRef<MediaStream>(new MediaStream());
async function startSession() {
// Enable audio and configure audio output to loudspeaker
await Audio.setAudioModeAsync({
allowsRecordingIOS: true,
playsInSilentModeIOS: true
});
// Create a peer connection
const pc = new RTCPeerConnection();
// Set up some event listeners
pc.addEventListener('connectionstatechange', (e) => {
console.log('connectionstatechange', e);
});
pc.addEventListener('track', (event) => {
if (event.track) remoteMediaStream.current.addTrack(event.track);
});
// Add local audio track for microphone input in the browser
const ms = await mediaDevices.getUserMedia({
audio: true,
});
if (isVoiceOnly) {
let videoTrack = await ms.getVideoTracks()[0];
if (videoTrack) videoTrack.enabled = false;
}
setLocalMediaStream(ms);
pc.addTrack(ms.getTracks()[0]);
// Set up data channel for sending and receiving events
const dc = pc.createDataChannel('oai-events');
setDataChannel(dc);
// Start the session using the Session Description Protocol (SDP)
const offer = await pc.createOffer({});
await pc.setLocalDescription(offer);
const baseUrl = '';
const model = 'gpt-4o-realtime-preview-2024-12-17';
const sdpResponse = await fetch(`${baseUrl}?model=${model}`, {
method: 'POST',
body: offer.sdp,
headers: {
Authorization: `Bearer ${process.env.EXPO_PUBLIC_OPEN_AI_KEY}`,
'Content-Type': 'application/sdp',
},
});
const answer = {
type: 'answer',
sdp: await sdpResponse.text(),
};
await pc.setRemoteDescription(answer);
peerConnection.current = pc;
}
useEffect(() => {
async function configureTools() {
console.log('Configuring the client side tools');
let instructions = `You are a virtual friend.`;
const event = {
type: 'session.update',
session: {
modalities: ['text', 'audio'],
instructions: instructions,
tools: clientToolsSchema,
voice:voiceToUse,
input_audio_transcription: {
model: "whisper-1"
},
},
};
setIsSessionActive(true);
setLoading(false);
dataChannel.send(JSON.stringify(event));
}
if (dataChannel) {
// Set session active when the data channel is opened
dataChannel.addEventListener('open', () => {
setEvents([]);
// Configure the client side tools
configureTools();
});
}
}, [dataChannel, voiceToUse]);