最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

http status code 404 - 404 Error during Azure Speaker Identification despite valid profiles - Stack Overflow

programmeradmin0浏览0评论

I’m using Azure’s Speaker Recognition API for speaker identification in my Python script, but I’m encountering a 404 error with the message: Resource not found This error occurs when I try to identify speakers in a diarized audio file. My script works fine when checking the enrollment status of speaker profiles, but when I send an audio segment for identification, the API responds with a 404 error.

main.py is:

from identify_speakers import identify_speaker, check_profiles_enrollment

def main():
    meeting_audio = "Recording.wav"

    print("\nTranscribing meeting audio...")
    diarized_segments = transcribe_meeting_audio(meeting_audio)
    print("Diarized segments:", diarized_segments)

    print("\nChecking enrolled speaker profiles...")
    enrolled_profiles = check_profiles_enrollment()  

    print("\nLabeling speakers...")
    labeled_transcription = []

    for segment in diarized_segments:
        audio_segment = segment.get("audio_path")

        if not audio_segment:
            print(f"Skipping segment {segment} due to missing audio.")
            continue

        identified_profile_id = identify_speaker(audio_segment, enrolled_profiles)

        speaker_name = enrolled_profiles.get(identified_profile_id, "Unknown")

        labeled_transcription.append(
            f"time from {segment['start_time']:.1f}s to {segment['end_time']:.1f}s:\n{speaker_name}: {segment['text']}\n"
        )

    final_output = "\n".join(labeled_transcription)
    print("\nFinal Transcription:\n", final_output)

    with open("final_transcription.txt", "w") as file:
        file.write(final_output)

if __name__ == "__main__":
    main()````

identify_speakers.py is:


import json
import os
import requests
import io
from pydub import AudioSegment

SPEECH_KEY = os.getenv("SPEECH_KEY")
ENDPOINT = os.getenv("SPEECH_ENDPOINT")
HEADERS = {"Ocp-Apim-Subscription-Key": SPEECH_KEY}

def load_speaker_profiles():
    """Load the speaker profiles from the JSON file."""
    try:
        with open("speaker_profiles.json", "r") as f:
            return json.load(f)
    except FileNotFoundError:
        print("❌ Error: speaker_profiles.json file not found.")
        return {}

def check_profiles_enrollment():
    """
    Check and cache enrollment status for all profiles. This avoids redundant API calls.
    Returns a dictionary of {profile_id: speaker_name} for enrolled profiles.
    """
    if not SPEECH_KEY or not ENDPOINT:
        print("❌ Error: SPEECH_KEY or ENDPOINT is not set.")
        return False

    speaker_profiles = load_speaker_profiles()
    enrolled_profiles = {}

    for speaker_name, profile_id in speaker_profiles.items():
        url = f"{ENDPOINT}/speaker/identification/v2.0/text-independent/profiles/{profile_id}"
        
        try:
            response = requests.get(url, headers=HEADERS)
            if response.status_code == 200:
                profile_data = response.json()
                enrollment_status = profile_data.get("enrollmentStatus", "").lower()
                remaining_speech_length = profile_data.get("remainingEnrollmentsSpeechLength", 0)

                if enrollment_status == "enrolled" or remaining_speech_length == 0.0:
                    print(f"✅ Profile {profile_id} ({speaker_name}) is fully enrolled.")
                    enrolled_profiles[profile_id] = speaker_name
                else:
                    print(f"⚠️ Profile {profile_id} ({speaker_name}) is not fully enrolled: {enrollment_status}.")
            else:
                print(f"❌ Failed to check profile {profile_id}: {response.text}")

        except Exception as e:
            print(f"❌ Error checking profile {profile_id}: {e}")

    return enrolled_profiles  # Returns only enrolled profiles

def identify_speaker(audio_segment, enrolled_profiles):
    """Identify speaker for an audio segment using the REST API."""
    if not enrolled_profiles:
        print("❌ No enrolled profiles available for identification.")
        return None

    url = f"{ENDPOINT}/speaker/identification/v2.0/text-independent/profiles:identifySingleSpeaker"
    params = {"api-version": "2021-09-05", "profileIds": ",".join(enrolled_profiles.keys())}

    try:
        # Convert audio to correct format using pydub
        if isinstance(audio_segment, io.BytesIO):
            audio_segment.seek(0)
            audio = AudioSegment.from_file(audio_segment, format="wav")
        else:
            audio = AudioSegment.from_file(audio_segment)

        audio = audio.set_channels(1)  # Mono
        audio = audio.set_frame_rate(16000)  # 16kHz
        audio = audio.set_sample_width(2)  # 16-bit

        audio_bytes = io.BytesIO()
        audio.export(audio_bytes, format="wav")
        audio_bytes.seek(0)
        
        response = requests.post(
            url,
            headers={**HEADERS, "Content-Type": "audio/wav"},
            params=params,
            data=audio_bytes  
        )

        print(f"Identification API response: {response.status_code} - {response.text}")

        if response.status_code == 200:
            identified_profile_id = response.json().get("identifiedProfileId")
            if identified_profile_id:
                speaker_name = enrolled_profiles.get(identified_profile_id, "Unknown")
                print(f"
发布评论

评论列表(0)

  1. 暂无评论