I’m using Azure’s Speaker Recognition API for speaker identification in my Python script, but I’m encountering a 404 error with the message:
Resource not found
This error occurs when I try to identify speakers in a diarized audio file. My script works fine when checking the enrollment status of speaker profiles, but when I send an audio segment for identification, the API responds with a 404 error.
main.py is:
from identify_speakers import identify_speaker, check_profiles_enrollment
def main():
meeting_audio = "Recording.wav"
print("\nTranscribing meeting audio...")
diarized_segments = transcribe_meeting_audio(meeting_audio)
print("Diarized segments:", diarized_segments)
print("\nChecking enrolled speaker profiles...")
enrolled_profiles = check_profiles_enrollment()
print("\nLabeling speakers...")
labeled_transcription = []
for segment in diarized_segments:
audio_segment = segment.get("audio_path")
if not audio_segment:
print(f"Skipping segment {segment} due to missing audio.")
continue
identified_profile_id = identify_speaker(audio_segment, enrolled_profiles)
speaker_name = enrolled_profiles.get(identified_profile_id, "Unknown")
labeled_transcription.append(
f"time from {segment['start_time']:.1f}s to {segment['end_time']:.1f}s:\n{speaker_name}: {segment['text']}\n"
)
final_output = "\n".join(labeled_transcription)
print("\nFinal Transcription:\n", final_output)
with open("final_transcription.txt", "w") as file:
file.write(final_output)
if __name__ == "__main__":
main()````
identify_speakers.py is:
import json
import os
import requests
import io
from pydub import AudioSegment
SPEECH_KEY = os.getenv("SPEECH_KEY")
ENDPOINT = os.getenv("SPEECH_ENDPOINT")
HEADERS = {"Ocp-Apim-Subscription-Key": SPEECH_KEY}
def load_speaker_profiles():
"""Load the speaker profiles from the JSON file."""
try:
with open("speaker_profiles.json", "r") as f:
return json.load(f)
except FileNotFoundError:
print("❌ Error: speaker_profiles.json file not found.")
return {}
def check_profiles_enrollment():
"""
Check and cache enrollment status for all profiles. This avoids redundant API calls.
Returns a dictionary of {profile_id: speaker_name} for enrolled profiles.
"""
if not SPEECH_KEY or not ENDPOINT:
print("❌ Error: SPEECH_KEY or ENDPOINT is not set.")
return False
speaker_profiles = load_speaker_profiles()
enrolled_profiles = {}
for speaker_name, profile_id in speaker_profiles.items():
url = f"{ENDPOINT}/speaker/identification/v2.0/text-independent/profiles/{profile_id}"
try:
response = requests.get(url, headers=HEADERS)
if response.status_code == 200:
profile_data = response.json()
enrollment_status = profile_data.get("enrollmentStatus", "").lower()
remaining_speech_length = profile_data.get("remainingEnrollmentsSpeechLength", 0)
if enrollment_status == "enrolled" or remaining_speech_length == 0.0:
print(f"✅ Profile {profile_id} ({speaker_name}) is fully enrolled.")
enrolled_profiles[profile_id] = speaker_name
else:
print(f"⚠️ Profile {profile_id} ({speaker_name}) is not fully enrolled: {enrollment_status}.")
else:
print(f"❌ Failed to check profile {profile_id}: {response.text}")
except Exception as e:
print(f"❌ Error checking profile {profile_id}: {e}")
return enrolled_profiles # Returns only enrolled profiles
def identify_speaker(audio_segment, enrolled_profiles):
"""Identify speaker for an audio segment using the REST API."""
if not enrolled_profiles:
print("❌ No enrolled profiles available for identification.")
return None
url = f"{ENDPOINT}/speaker/identification/v2.0/text-independent/profiles:identifySingleSpeaker"
params = {"api-version": "2021-09-05", "profileIds": ",".join(enrolled_profiles.keys())}
try:
# Convert audio to correct format using pydub
if isinstance(audio_segment, io.BytesIO):
audio_segment.seek(0)
audio = AudioSegment.from_file(audio_segment, format="wav")
else:
audio = AudioSegment.from_file(audio_segment)
audio = audio.set_channels(1) # Mono
audio = audio.set_frame_rate(16000) # 16kHz
audio = audio.set_sample_width(2) # 16-bit
audio_bytes = io.BytesIO()
audio.export(audio_bytes, format="wav")
audio_bytes.seek(0)
response = requests.post(
url,
headers={**HEADERS, "Content-Type": "audio/wav"},
params=params,
data=audio_bytes
)
print(f"Identification API response: {response.status_code} - {response.text}")
if response.status_code == 200:
identified_profile_id = response.json().get("identifiedProfileId")
if identified_profile_id:
speaker_name = enrolled_profiles.get(identified_profile_id, "Unknown")
print(f"