I'm building a Flutter application that uses Azure Speech Services for voice commands. When I say "open", the microphone should start listening (indicated by turning red), but it's not working correctly. Here's my complete implementation:

AudioRecorder Provider:


final isListeningProvider = StateProvider<bool>((ref) => false);

final audioRecorderProvider = Provider<AudioRecorder>((ref) => AudioRecorder(ref));

class AudioRecorder {
  final FlutterSoundRecorder _recorder = FlutterSoundRecorder();
  bool _isInitialized = false;
  String? _path;
  final Ref _ref;

  AudioRecorder(this._ref);

  bool get isListening => _ref.read(isListeningProvider);

  Future<void> init() async {
    if (!_isInitialized) {
      final status = await Permission.microphone.request();
      if (status != PermissionStatus.granted) {
        throw RecordingPermissionException('Microphone permission not granted');
      }
      await _recorder.openRecorder();
      _isInitialized = true;
    }
  }

  Future<void> startListening(String command) async {
    if (!_isInitialized) await init();
    
    if (command.toLowerCase() == "open") {
      try {
        final dir = await getTemporaryDirectory();
        _path = '${dir.path}/audio_${DateTime.now().millisecondsSinceEpoch}.aac';
        await _recorder.startRecorder(
          toFile: _path,
          codec: Codec.aacADTS,
        );
        _ref.read(isListeningProvider.notifier).state = true;
      } catch (e) {
        debugPrint('Error starting recording: $e');
      }
    }
  }

  Future<String?> stopListening() async {
    try {
      if (_recorder.isRecording) {
        await _recorder.stopRecorder();
        _ref.read(isListeningProvider.notifier).state = false;
        return _path;
      }
      return null;
    } catch (e) {
      debugPrint('Error stopping recording: $e');
      return null;
    }
  }

  Future<void> start() async {
    if (!_isInitialized) await init();
    try {
      final dir = await getTemporaryDirectory();
      _path = '${dir.path}/audio_${DateTime.now().millisecondsSinceEpoch}.aac';
      await _recorder.startRecorder(
        toFile: _path,
        codec: Codec.aacADTS,
      );
      _ref.read(isListeningProvider.notifier).state = true;
    } catch (e) {
      debugPrint('Error recording audio: $e');
    }
  }

  Future<String?> stop() async {
    try {
      if (_recorder.isRecording) {
        await _recorder.stopRecorder();
        _ref.read(isListeningProvider.notifier).state = false;
        return _path;
      }
      return null;
    } catch (e) {
      debugPrint('Error stopping recording: $e');
      return null;
    }
  }

  Future<bool> isRecording() async {
    return _recorder.isRecording;
  }

  Future<void> dispose() async {
    if (_isInitialized) {
      await _recorder.closeRecorder();
      _isInitialized = false;
    }
  }
}

Voice Command State and Provider:


class VoiceCommandState {
  final bool isListening;
  final String? lastCommand;
  final String? error;
  final bool isProcessing; 

  VoiceCommandState({
    this.isListening = false,
    this.lastCommand,
    this.error,
    this.isProcessing = false,
  });

  
  VoiceCommandState copyWith({
    bool? isListening,
    String? lastCommand,
    String? error,
    bool? isProcessing,
  }) {
    return VoiceCommandState(
      isListening: isListening ?? this.isListening,
      lastCommand: lastCommand ?? this.lastCommand,
      error: error ?? this.error,
      isProcessing: isProcessing ?? this.isProcessing,
    );
  }
}

class VoiceCommandNotifier extends StateNotifier<VoiceCommandState> {
  final AudioRecorder _recorder;
  final TranslationRepository _repository;
  final Ref _ref;

  VoiceCommandNotifier(this._recorder, this._repository, this._ref)
      : super(VoiceCommandState());

  Future<void> processVoiceCommand(String command) async {
    try {
      final commandLower = command.toLowerCase();
      
      if (commandLower == "open") {
        // First update prompt screen state
        _ref.read(promptScreenProvider.notifier).setListening(true);
        
        // Start recording first
        try {
          await _recorder.startListening(command);
          // Only update state after successful start of listening
          state = state.copyWith(
            isListening: true,
            lastCommand: command,
            isProcessing: false
          );
        } catch (e) {
          // If recording fails, update both states accordingly
          _ref.read(promptScreenProvider.notifier).setListening(false);
          state = state.copyWith(
            isListening: false,
            error: e.toString(),
            isProcessing: false
          );
          throw e; // Re-throw to be caught by outer try-catch
        }
      } else if (commandLower == "stop") {
        if (state.isListening) {
          try {
            final audioPath = await _recorder.stopListening();
            _ref.read(promptScreenProvider.notifier).setListening(false);
            
            if (audioPath != null) {
              state = state.copyWith(isProcessing: true);
              final text = await _repository.processAudioInput(audioPath);
              _ref.read(promptScreenProvider.notifier).updateText(text);
              
              state = state.copyWith(
                isListening: false,
                lastCommand: text,
                isProcessing: false
              );
            } else {
              state = state.copyWith(
                isListening: false,
                error: "Failed to get audio path",
                isProcessing: false
              );
            }
          } catch (e) {
            state = state.copyWith(
              isListening: false,
              error: e.toString(),
              isProcessing: false
            );
          }
        }
      }
    } catch (e) {
      state = state.copyWith(
        isListening: false,
        error: e.toString(),
        isProcessing: false
      );
    }
  }

  Future<void> handleSpeechRecognition(String audioPath) async {
    try {
      final text = await _repository.processAudioInput(audioPath);
      if (text.toLowerCase() == "open") {
        await processVoiceCommand("open");
      } else if (text.toLowerCase() == "stop") {
        await processVoiceCommand("stop");
      }
    } catch (e) {
      state = state.copyWith(
        isListening: false,
        error: e.toString(),
        isProcessing: false
      );
    }
  }
}

final voiceCommandProvider = StateNotifierProvider<VoiceCommandNotifier, VoiceCommandState>((ref) {
  return VoiceCommandNotifier(
    ref.watch(audioRecorderProvider),
    ref.watch(translationRepositoryProvider),
    ref,
  );
});

Prompt Screen Implementation:

final isListeningProvider = StateProvider<bool>((ref) => false);

class PromptScreen extends ConsumerStatefulWidget {
  const PromptScreen({super.key});

  @override
  ConsumerState<PromptScreen> createState() => _PromptScreenState();
}

class _PromptScreenState extends ConsumerState<PromptScreen> {
  late final TextEditingController _textController;
  late final AudioRecorder _recorder;

  @override
  void initState() {
    super.initState();
    _textController = TextEditingController();
    _recorder = ref.read(audioRecorderProvider);

    _initializeRecorder();
  }

  Future<void> _initializeRecorder() async {
    try {
      await _recorder.init();
    } catch (e) {
      debugPrint('Recorder init error: $e');
    }
  }

  void _handleVoiceCommand(VoiceCommandState state) {
    if (!mounted) return;
    setState(() {}); // Force UI update

    if (state.lastCommand?.toLowerCase() == "open") {
      _startVoiceRecording();
    } else if (state.lastCommand?.toLowerCase() == "stop") {
      _stopVoiceRecording();
    }

    if (state.error != null) {
      ScaffoldMessenger.of(context)
          .showSnackBar(SnackBar(content: Text(state.error!)));
    }
  }

  Future<void> _startVoiceRecording() async {
    try {
      await _recorder.startListening("open");
      ref.read(isListeningProvider.notifier).state = true;
      final currentState = ref.read(voiceCommandProvider);
      ref.read(voiceCommandProvider.notifier).state =
          currentState.copyWith(isListening: true);
    } catch (e) {
      debugPrint('Recording start error: $e');
    }
  }

  Future<void> _stopVoiceRecording() async {
    try {
      final path = await _recorder.stopListening();
      if (path != null) {
        final text = await ref
            .read(translationRepositoryProvider)
            .processAudioInput(path);
        _textController.text = text;
      }
    } catch (e) {
      debugPrint('Recording stop error: $e');
    } finally {
      ref.read(isListeningProvider.notifier).state = false;
      final currentState = ref.read(voiceCommandProvider);
      ref.read(voiceCommandProvider.notifier).state =
          currentState.copyWith(isListening: false);
    }
  }

  @override
  void dispose() {
    _recorder.dispose();
    _textController.dispose();
    super.dispose();
  }

  @override
  Widget build(BuildContext context) {
    final voiceState = ref.watch(voiceCommandProvider);

    // Add listener for voice commands
    ref.listen<VoiceCommandState>(voiceCommandProvider, (_, state) {
      if (!mounted) return;
      _handleVoiceCommand(state);
    });


    return Scaffold(
      // ... scaffold code
        Row(
              children: [
                Expanded(
                  child: ElevatedButton(
                    onPressed: () async {
                      // Make onPressed async
                      if (_textController.text.isNotEmpty) {
                        // Play sound before navigation
                        await ref
                            .read(translationRepositoryProvider)
                            .playUISound('start_conversation');

                        // Navigate after sound plays
                        if (mounted) {
                          // Check if widget is still mounted
                          Navigator.pushNamed(
                            context,
                            '/conversation',
                            arguments: _textController.text,
                          ).then((_) => _textController.clear());
                        }
                      }
                    },
                    style: ElevatedButton.styleFrom(
                      backgroundColor: const Color.fromARGB(255, 61, 62, 63),
                      minimumSize: const Size(double.infinity, 50),
                    ),
                    child: const Text('start conversation',
                        style: TextStyle(color: Colors.white)),
                  ),
                ),
                const SizedBox(width: 16),
                Consumer(
                  builder: (context, ref, child) {
                    final voiceState = ref.watch(voiceCommandProvider);
                    return ElevatedButton(
                      onPressed: () => _toggleRecording(voiceState.isListening),
                      style: ElevatedButton.styleFrom(
                        backgroundColor:
                            voiceState.isListening ? Colors.red : Colors.white,
                        shape: const CircleBorder(),
                        padding: const EdgeInsets.all(16),
                      ),
                      child: const Icon(Icons.mic, size: 28),
                    );
                  },
                ),
              ],
            ),
          ],
        ),
      ),
    );
  }

  Future<void> _toggleRecording(bool isCurrentlyListening) async {
    if (isCurrentlyListening) {
      // Play sound before stopping
      await ref.read(translationRepositoryProvider).playUISound('mic_off');
      await _stopVoiceRecording();
    } else {
      // Play sound before starting
      await ref.read(translationRepositoryProvider).playUISound('mic_on');
      await _startVoiceRecording();
    }
  }
}

Backend Speech Service (Python/FastAPI):



class SpeechService:
    def __init__(self):
        self.speech_key = os.getenv("AZURE_SPEECH_KEY")
        self.speech_region = os.getenv("AZURE_SPEECH_REGION")
        
        if not self.speech_key or not self.speech_region:
            raise ValueError("Azure Speech credentials not found")
            
        self.speech_config = speechsdk.SpeechConfig(
            subscription=self.speech_key,
            region=self.speech_region
        )
        self.speech_config.speech_recognition_language = "en-EN"
        
        # Initialize speech recognizer for general audio processing
        self.recognizer = sr.Recognizer()
        self.recognizer.energy_threshold = 300
        self.recognizer.dynamic_energy_threshold = True
        
        # Define wake words/commands
        self.WAKE_WORDS = {
            "open": "START_RECORDING",
            "stop": "STOP_RECORDING"
        }
        
        # Audio format configuration
        self.supported_formats = [".wav", ".aac", ".mp3", ".ogg", ".mp4", ".m4a"]
        self.valid_mime_types = [
            "audio/wav", "audio/aac", "audio/mpeg", "audio/ogg",
            "audio/mp4", "audio/x-m4a"
        ]
        
        self.translation_service = TranslationService()

    async def process_command(self, audio_path: str) -> str:
        """Process audio for wake word detection using Azure Speech Services"""
        working_path = audio_path
        converted_path = None
        
        try:
            # Convert to WAV if needed
            if not working_path.lower().endswith(".wav"):
                converted_path = await self._convert_to_wav(working_path)
                working_path = converted_path

            # Set up Azure speech recognition
            audio_config = speechsdk.AudioConfig(filename=working_path)
            speech_recognizer = speechsdk.SpeechRecognizer(
                speech_config=self.speech_config,
                audio_config=audio_config
            )

            # Use promise for async recognition
            done = False
            recognized_text = None

            def handle_result(evt):
                nonlocal done, recognized_text
                if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
                    recognized_text = evt.result.text.lower().strip()
                done = True

            speech_recognizer.recognized.connect(handle_result)
            
            # Start recognition
            speech_recognizer.start_continuous_recognition()
            
            # Wait for result with timeout
            timeout = 5  # 5 seconds timeout
            start_time = asyncio.get_event_loop().time()
            
            while not done:
                if asyncio.get_event_loop().time() - start_time > timeout:
                    speech_recognizer.stop_continuous_recognition()
                    raise HTTPException(
                        status_code=408,
                        detail="Recognition timeout"
                    )
                await asyncio.sleep(0.1)
            
            speech_recognizer.stop_continuous_recognition()

            # Check if recognized text matches any wake words
            if recognized_text in self.WAKE_WORDS:
                return recognized_text
            
            return "UNKNOWN_COMMAND"

        except Exception as e:
            logger.error(f"Command processing error: {str(e)}")
            raise HTTPException(
                status_code=500,
                detail=f"Command processing failed: {str(e)}"
            )
        finally:
            # Cleanup temporary files
            await self._cleanup_temp_files(converted_path)

Expected behavior:

When I say "open", the microphone should start listening (turn red)
The mic should stay in listening state until I say "stop"
While in listening state, it should perform speech recognition

Something like this:

"esto es una prueba" is " this is a test"

Actual behavior:

The voice command "open" is recognized but doesn't activate the listening state
The microphone icon stays white instead of turning red
The state doesn't properly update across the application

I suspect there might be an issue with the state management or how the voice commands are being processed, but I can't figure out where the problem lies.

AudioRecorder Provider:


final isListeningProvider = StateProvider<bool>((ref) => false);

final audioRecorderProvider = Provider<AudioRecorder>((ref) => AudioRecorder(ref));

class AudioRecorder {
  final FlutterSoundRecorder _recorder = FlutterSoundRecorder();
  bool _isInitialized = false;
  String? _path;
  final Ref _ref;

  AudioRecorder(this._ref);

  bool get isListening => _ref.read(isListeningProvider);

  Future<void> init() async {
    if (!_isInitialized) {
      final status = await Permission.microphone.request();
      if (status != PermissionStatus.granted) {
        throw RecordingPermissionException('Microphone permission not granted');
      }
      await _recorder.openRecorder();
      _isInitialized = true;
    }
  }

  Future<void> startListening(String command) async {
    if (!_isInitialized) await init();
    
    if (command.toLowerCase() == "open") {
      try {
        final dir = await getTemporaryDirectory();
        _path = '${dir.path}/audio_${DateTime.now().millisecondsSinceEpoch}.aac';
        await _recorder.startRecorder(
          toFile: _path,
          codec: Codec.aacADTS,
        );
        _ref.read(isListeningProvider.notifier).state = true;
      } catch (e) {
        debugPrint('Error starting recording: $e');
      }
    }
  }

  Future<String?> stopListening() async {
    try {
      if (_recorder.isRecording) {
        await _recorder.stopRecorder();
        _ref.read(isListeningProvider.notifier).state = false;
        return _path;
      }
      return null;
    } catch (e) {
      debugPrint('Error stopping recording: $e');
      return null;
    }
  }

  Future<void> start() async {
    if (!_isInitialized) await init();
    try {
      final dir = await getTemporaryDirectory();
      _path = '${dir.path}/audio_${DateTime.now().millisecondsSinceEpoch}.aac';
      await _recorder.startRecorder(
        toFile: _path,
        codec: Codec.aacADTS,
      );
      _ref.read(isListeningProvider.notifier).state = true;
    } catch (e) {
      debugPrint('Error recording audio: $e');
    }
  }

  Future<String?> stop() async {
    try {
      if (_recorder.isRecording) {
        await _recorder.stopRecorder();
        _ref.read(isListeningProvider.notifier).state = false;
        return _path;
      }
      return null;
    } catch (e) {
      debugPrint('Error stopping recording: $e');
      return null;
    }
  }

  Future<bool> isRecording() async {
    return _recorder.isRecording;
  }

  Future<void> dispose() async {
    if (_isInitialized) {
      await _recorder.closeRecorder();
      _isInitialized = false;
    }
  }
}

Voice Command State and Provider:


class VoiceCommandState {
  final bool isListening;
  final String? lastCommand;
  final String? error;
  final bool isProcessing; 

  VoiceCommandState({
    this.isListening = false,
    this.lastCommand,
    this.error,
    this.isProcessing = false,
  });

  
  VoiceCommandState copyWith({
    bool? isListening,
    String? lastCommand,
    String? error,
    bool? isProcessing,
  }) {
    return VoiceCommandState(
      isListening: isListening ?? this.isListening,
      lastCommand: lastCommand ?? this.lastCommand,
      error: error ?? this.error,
      isProcessing: isProcessing ?? this.isProcessing,
    );
  }
}

class VoiceCommandNotifier extends StateNotifier<VoiceCommandState> {
  final AudioRecorder _recorder;
  final TranslationRepository _repository;
  final Ref _ref;

  VoiceCommandNotifier(this._recorder, this._repository, this._ref)
      : super(VoiceCommandState());

  Future<void> processVoiceCommand(String command) async {
    try {
      final commandLower = command.toLowerCase();
      
      if (commandLower == "open") {
        // First update prompt screen state
        _ref.read(promptScreenProvider.notifier).setListening(true);
        
        // Start recording first
        try {
          await _recorder.startListening(command);
          // Only update state after successful start of listening
          state = state.copyWith(
            isListening: true,
            lastCommand: command,
            isProcessing: false
          );
        } catch (e) {
          // If recording fails, update both states accordingly
          _ref.read(promptScreenProvider.notifier).setListening(false);
          state = state.copyWith(
            isListening: false,
            error: e.toString(),
            isProcessing: false
          );
          throw e; // Re-throw to be caught by outer try-catch
        }
      } else if (commandLower == "stop") {
        if (state.isListening) {
          try {
            final audioPath = await _recorder.stopListening();
            _ref.read(promptScreenProvider.notifier).setListening(false);
            
            if (audioPath != null) {
              state = state.copyWith(isProcessing: true);
              final text = await _repository.processAudioInput(audioPath);
              _ref.read(promptScreenProvider.notifier).updateText(text);
              
              state = state.copyWith(
                isListening: false,
                lastCommand: text,
                isProcessing: false
              );
            } else {
              state = state.copyWith(
                isListening: false,
                error: "Failed to get audio path",
                isProcessing: false
              );
            }
          } catch (e) {
            state = state.copyWith(
              isListening: false,
              error: e.toString(),
              isProcessing: false
            );
          }
        }
      }
    } catch (e) {
      state = state.copyWith(
        isListening: false,
        error: e.toString(),
        isProcessing: false
      );
    }
  }

  Future<void> handleSpeechRecognition(String audioPath) async {
    try {
      final text = await _repository.processAudioInput(audioPath);
      if (text.toLowerCase() == "open") {
        await processVoiceCommand("open");
      } else if (text.toLowerCase() == "stop") {
        await processVoiceCommand("stop");
      }
    } catch (e) {
      state = state.copyWith(
        isListening: false,
        error: e.toString(),
        isProcessing: false
      );
    }
  }
}

final voiceCommandProvider = StateNotifierProvider<VoiceCommandNotifier, VoiceCommandState>((ref) {
  return VoiceCommandNotifier(
    ref.watch(audioRecorderProvider),
    ref.watch(translationRepositoryProvider),
    ref,
  );
});

Prompt Screen Implementation:

final isListeningProvider = StateProvider<bool>((ref) => false);

class PromptScreen extends ConsumerStatefulWidget {
  const PromptScreen({super.key});

  @override
  ConsumerState<PromptScreen> createState() => _PromptScreenState();
}

class _PromptScreenState extends ConsumerState<PromptScreen> {
  late final TextEditingController _textController;
  late final AudioRecorder _recorder;

  @override
  void initState() {
    super.initState();
    _textController = TextEditingController();
    _recorder = ref.read(audioRecorderProvider);

    _initializeRecorder();
  }

  Future<void> _initializeRecorder() async {
    try {
      await _recorder.init();
    } catch (e) {
      debugPrint('Recorder init error: $e');
    }
  }

  void _handleVoiceCommand(VoiceCommandState state) {
    if (!mounted) return;
    setState(() {}); // Force UI update

    if (state.lastCommand?.toLowerCase() == "open") {
      _startVoiceRecording();
    } else if (state.lastCommand?.toLowerCase() == "stop") {
      _stopVoiceRecording();
    }

    if (state.error != null) {
      ScaffoldMessenger.of(context)
          .showSnackBar(SnackBar(content: Text(state.error!)));
    }
  }

  Future<void> _startVoiceRecording() async {
    try {
      await _recorder.startListening("open");
      ref.read(isListeningProvider.notifier).state = true;
      final currentState = ref.read(voiceCommandProvider);
      ref.read(voiceCommandProvider.notifier).state =
          currentState.copyWith(isListening: true);
    } catch (e) {
      debugPrint('Recording start error: $e');
    }
  }

  Future<void> _stopVoiceRecording() async {
    try {
      final path = await _recorder.stopListening();
      if (path != null) {
        final text = await ref
            .read(translationRepositoryProvider)
            .processAudioInput(path);
        _textController.text = text;
      }
    } catch (e) {
      debugPrint('Recording stop error: $e');
    } finally {
      ref.read(isListeningProvider.notifier).state = false;
      final currentState = ref.read(voiceCommandProvider);
      ref.read(voiceCommandProvider.notifier).state =
          currentState.copyWith(isListening: false);
    }
  }

  @override
  void dispose() {
    _recorder.dispose();
    _textController.dispose();
    super.dispose();
  }

  @override
  Widget build(BuildContext context) {
    final voiceState = ref.watch(voiceCommandProvider);

    // Add listener for voice commands
    ref.listen<VoiceCommandState>(voiceCommandProvider, (_, state) {
      if (!mounted) return;
      _handleVoiceCommand(state);
    });


    return Scaffold(
      // ... scaffold code
        Row(
              children: [
                Expanded(
                  child: ElevatedButton(
                    onPressed: () async {
                      // Make onPressed async
                      if (_textController.text.isNotEmpty) {
                        // Play sound before navigation
                        await ref
                            .read(translationRepositoryProvider)
                            .playUISound('start_conversation');

                        // Navigate after sound plays
                        if (mounted) {
                          // Check if widget is still mounted
                          Navigator.pushNamed(
                            context,
                            '/conversation',
                            arguments: _textController.text,
                          ).then((_) => _textController.clear());
                        }
                      }
                    },
                    style: ElevatedButton.styleFrom(
                      backgroundColor: const Color.fromARGB(255, 61, 62, 63),
                      minimumSize: const Size(double.infinity, 50),
                    ),
                    child: const Text('start conversation',
                        style: TextStyle(color: Colors.white)),
                  ),
                ),
                const SizedBox(width: 16),
                Consumer(
                  builder: (context, ref, child) {
                    final voiceState = ref.watch(voiceCommandProvider);
                    return ElevatedButton(
                      onPressed: () => _toggleRecording(voiceState.isListening),
                      style: ElevatedButton.styleFrom(
                        backgroundColor:
                            voiceState.isListening ? Colors.red : Colors.white,
                        shape: const CircleBorder(),
                        padding: const EdgeInsets.all(16),
                      ),
                      child: const Icon(Icons.mic, size: 28),
                    );
                  },
                ),
              ],
            ),
          ],
        ),
      ),
    );
  }

  Future<void> _toggleRecording(bool isCurrentlyListening) async {
    if (isCurrentlyListening) {
      // Play sound before stopping
      await ref.read(translationRepositoryProvider).playUISound('mic_off');
      await _stopVoiceRecording();
    } else {
      // Play sound before starting
      await ref.read(translationRepositoryProvider).playUISound('mic_on');
      await _startVoiceRecording();
    }
  }
}

Backend Speech Service (Python/FastAPI):



class SpeechService:
    def __init__(self):
        self.speech_key = os.getenv("AZURE_SPEECH_KEY")
        self.speech_region = os.getenv("AZURE_SPEECH_REGION")
        
        if not self.speech_key or not self.speech_region:
            raise ValueError("Azure Speech credentials not found")
            
        self.speech_config = speechsdk.SpeechConfig(
            subscription=self.speech_key,
            region=self.speech_region
        )
        self.speech_config.speech_recognition_language = "en-EN"
        
        # Initialize speech recognizer for general audio processing
        self.recognizer = sr.Recognizer()
        self.recognizer.energy_threshold = 300
        self.recognizer.dynamic_energy_threshold = True
        
        # Define wake words/commands
        self.WAKE_WORDS = {
            "open": "START_RECORDING",
            "stop": "STOP_RECORDING"
        }
        
        # Audio format configuration
        self.supported_formats = [".wav", ".aac", ".mp3", ".ogg", ".mp4", ".m4a"]
        self.valid_mime_types = [
            "audio/wav", "audio/aac", "audio/mpeg", "audio/ogg",
            "audio/mp4", "audio/x-m4a"
        ]
        
        self.translation_service = TranslationService()

    async def process_command(self, audio_path: str) -> str:
        """Process audio for wake word detection using Azure Speech Services"""
        working_path = audio_path
        converted_path = None
        
        try:
            # Convert to WAV if needed
            if not working_path.lower().endswith(".wav"):
                converted_path = await self._convert_to_wav(working_path)
                working_path = converted_path

            # Set up Azure speech recognition
            audio_config = speechsdk.AudioConfig(filename=working_path)
            speech_recognizer = speechsdk.SpeechRecognizer(
                speech_config=self.speech_config,
                audio_config=audio_config
            )

            # Use promise for async recognition
            done = False
            recognized_text = None

            def handle_result(evt):
                nonlocal done, recognized_text
                if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
                    recognized_text = evt.result.text.lower().strip()
                done = True

            speech_recognizer.recognized.connect(handle_result)
            
            # Start recognition
            speech_recognizer.start_continuous_recognition()
            
            # Wait for result with timeout
            timeout = 5  # 5 seconds timeout
            start_time = asyncio.get_event_loop().time()
            
            while not done:
                if asyncio.get_event_loop().time() - start_time > timeout:
                    speech_recognizer.stop_continuous_recognition()
                    raise HTTPException(
                        status_code=408,
                        detail="Recognition timeout"
                    )
                await asyncio.sleep(0.1)
            
            speech_recognizer.stop_continuous_recognition()

            # Check if recognized text matches any wake words
            if recognized_text in self.WAKE_WORDS:
                return recognized_text
            
            return "UNKNOWN_COMMAND"

        except Exception as e:
            logger.error(f"Command processing error: {str(e)}")
            raise HTTPException(
                status_code=500,
                detail=f"Command processing failed: {str(e)}"
            )
        finally:
            # Cleanup temporary files
            await self._cleanup_temp_files(converted_path)

Expected behavior:

When I say "open", the microphone should start listening (turn red)
The mic should stay in listening state until I say "stop"
While in listening state, it should perform speech recognition

Something like this:

"esto es una prueba" is " this is a test"

Actual behavior:

The voice command "open" is recognized but doesn't activate the listening state
The microphone icon stays white instead of turning red
The state doesn't properly update across the application

I suspect there might be an issue with the state management or how the voice commands are being processed, but I can't figure out where the problem lies.

Share Improve this question edited Feb 12 at 1:27 asked Feb 10 at 20:52 pomoworko 1,0162 gold badges15 silver badges43 bronze badges

VoiceCommandNotifier should update state after calling _recorder.startListening(command), but currently, you set the state before waiting for the function to complete. – Suresh Chikkam Commented Feb 11 at 3:31

Add a comment |

2 Answers 2

Sorted by: Reset to default 1

Use state = state.copyWith() instead of state = VoiceCommandState().

VoiceCommandNotifier :

state = state.copyWith(
  isListening: true,
  lastCommand: command,
  isProcessing: false
);

In PromptScreen, the button's color is voiceState.isListening. If the state isn’t updating, UI won't change.

Add this setState(() {}); inside _handleVoiceCommandand also, wrap the ElevatedButton in a Consumer to check it reacts to provider change.

Consumer(
  builder: (context, ref, child) {
    final voiceState = ref.watch(voiceCommandProvider);
    return ElevatedButton(
      onPressed: () => _toggleRecording(voiceState.isListening),
      style: ElevatedButton.styleFrom(
        backgroundColor: voiceState.isListening ? Colors.red : Colors.white,
        shape: const CircleBorder(),
        padding: const EdgeInsets.all(16),
      ),
      child: const Icon(Icons.mic, size: 28),
    );
  },
),

Since _isListening is a private variable, the UI might not get updated correctly make _isListening a StateNotifier variable final isListening = StateProvider<bool>((ref) => false);

Then update the state ref.read(isListening.notifier).state = true; inside startListening

This is the correct implementation logic, but I am currently using Picovoice, which is relatively expensive for extensive use. Nevertheless, I will demonstrate the logic with Picovoice. However, I aim to replace it with Azure since it is more cost-effective, and I wish to learn how to implement it using Azure.

prompt_screen.dart

import 'package:flutter/cupertino.dart';
import 'package:flutter/material.dart';
import 'package:flutter_riverpod/flutter_riverpod.dart';
import 'package:porcupine_flutter/porcupine.dart';
import 'package:porcupine_flutter/porcupine_error.dart';
import 'package:porcupine_flutter/porcupine_manager.dart';
import 'package:speech_to_text/speech_to_text.dart' as stt;
import '../../domain/repositories/translation_repository.dart';
import '../providers/audio_recorder_provider.dart';
import '../providers/voice_command_provider.dart';
import '../widgets/voice_command_status_inficator.dart';

final isListeningProvider = StateProvider<bool>((ref) => false);

class PromptScreen extends ConsumerStatefulWidget {
  const PromptScreen({super.key});

  @override
  ConsumerState<PromptScreen> createState() => _PromptScreenState();
}

class _PromptScreenState extends ConsumerState<PromptScreen> {
  late final TextEditingController _textController;
  late final AudioRecorder _recorder;
  late PorcupineManager _porcupineManager;
  late stt.SpeechToText _speech;
  bool _isWakeWordMode = true;

  @override
  void initState() {
    super.initState();
    _textController = TextEditingController();
    _recorder = ref.read(audioRecorderProvider);
    _speech = stt.SpeechToText();

    _initializeRecorder();
    _initPorcupine();
  }

  Future<void> _initializeRecorder() async {
    try {
      await _recorder.init();
    } catch (e) {
      debugPrint('Recorder init error: $e');
    }
  }

  void _initPorcupine() async {
    try {
      _porcupineManager = await PorcupineManager.fromBuiltInKeywords(
        'PICOVOICE_API_KEY',
        [BuiltInKeyword.JARVIS, BuiltInKeyword.ALEXA],
        _wakeWordCallback,
      );
      await _porcupineManager.start();
      debugPrint("Porcupine initialized successfully");
    } on PorcupineException catch (err) {
      debugPrint("Failed to initialize Porcupine: ${err.message}");
    }
  }

  Future<void> _startConversation() async {
    if (_textController.text.isNotEmpty) {
      await ref.read(translationRepositoryProvider).playUISound('start_conversation');

      if (mounted) {
        Navigator.pushNamed(
          context,
          '/conversation',
          arguments: _textController.text,
        ).then((_) => _textController.clear());
      }
    }
  }

  void _wakeWordCallback(int keywordIndex) async {
    if (!mounted) return;

    // JARVIS detected
    if (keywordIndex == 0 && _isWakeWordMode) {
      await _startVoiceRecording();
      _isWakeWordMode = false;
    }
    // ALEXA detected
    else if (keywordIndex == 1 && !_isWakeWordMode) {
      await _stopVoiceRecording();
      _isWakeWordMode = true;
      
      // Automatically start conversation after stopping recording
      if (_textController.text.isNotEmpty) {
        await _startConversation();
      }
    }
  }

  void _handleVoiceCommand(VoiceCommandState state) {
    if (!mounted) return;
    setState(() {});

    if (state.error != null) {
      ScaffoldMessenger.of(context)
          .showSnackBar(SnackBar(content: Text(state.error!)));
    }
  }

  Future<void> _startVoiceRecording() async {
    try {
      await ref.read(translationRepositoryProvider).playUISound('mic_on');
      await _recorder.startListening("open");
      ref.read(isListeningProvider.notifier).state = true;
      final currentState = ref.read(voiceCommandProvider);
      ref.read(voiceCommandProvider.notifier).state =
          currentState.copyWith(isListening: true);
    } catch (e) {
      debugPrint('Recording start error: $e');
    }
  }

  Future<void> _stopVoiceRecording() async {
    try {
      await ref.read(translationRepositoryProvider).playUISound('mic_off');
      final path = await _recorder.stopListening();
      if (path != null) {
        var text = await ref
            .read(translationRepositoryProvider)
            .processAudioInput(path);

        // Filter out wake words from the recognized text
        text = text.replaceAll(RegExp(r'\b(?:jarvis|alexa)\b', caseSensitive: false), '').trim();

        // Only update text if there's actual content after filtering
        if (text.isNotEmpty) {
          _textController.text = text;
        }
      }
    } catch (e) {
      debugPrint('Recording stop error: $e');
    } finally {
      ref.read(isListeningProvider.notifier).state = false;
      final currentState = ref.read(voiceCommandProvider);
      ref.read(voiceCommandProvider.notifier).state =
          currentState.copyWith(isListening: false);
    }
  }

  @override
  void dispose() {
    _porcupineManager.delete();
    _recorder.dispose();
    _textController.dispose();
    super.dispose();
  }

  @override
  Widget build(BuildContext context) {
    final voiceState = ref.watch(voiceCommandProvider);

    ref.listen<VoiceCommandState>(voiceCommandProvider, (_, state) {
      if (!mounted) return;
      _handleVoiceCommand(state);
    });

    return Scaffold(
      backgroundColor: const Color(0xFF000000),
      appBar: CupertinoNavigationBar(
        backgroundColor: const Color(0xFF1C1C1E),
        border: null,
        middle: const Text('AI Chat Assistant',
            style: TextStyle(
                color: Colors.white,
                fontSize: 17,
                fontWeight: FontWeight.w600)),
        trailing: CupertinoButton(
          padding: EdgeInsets.zero,
          child: const Icon(CupertinoIcons.gear,
              color: CupertinoColors.systemGrey, size: 28),
          onPressed: () => Navigator.pushNamed(context, '/settings'),
        ),
      ),
      body: Padding(
        padding: const EdgeInsets.all(16.0),
        child: Column(
          children: [
            VoiceCommandStatusIndicator(
              isListening: voiceState.isListening,
            ),
            Text(
              _isWakeWordMode 
                ? 'Say "Jarvis" to start listening'
                : 'Say "Alexa" to stop listening and start conversation',
              style: const TextStyle(color: Colors.white, fontSize: 14),
            ),
            const SizedBox(height: 12),
            Expanded(
              child: Align(
                alignment: Alignment.topLeft,
                child: CupertinoTextField(
                  controller: _textController,
                  maxLines: null,
                  style: const TextStyle(color: Colors.white, fontSize: 17),
                  placeholder: 'write your prompt here',
                  placeholderStyle: const TextStyle(
                      color: CupertinoColors.placeholderText, fontSize: 17),
                  decoration: BoxDecoration(
                    color: const Color(0xFF2C2C2E),
                    borderRadius: BorderRadius.circular(12),
                    border: Border.all(
                      color: const Color(0xFF3A3A3C),
                      width: 0.5,
                    ),
                  ),
                  padding: const EdgeInsets.all(16),
                ),
              ),
            ),
            const SizedBox(height: 20),
            Row(
              children: [
                Expanded(
                  child: ElevatedButton(
                    onPressed: _startConversation,
                    style: ElevatedButton.styleFrom(
                      backgroundColor: const Color.fromARGB(255, 61, 62, 63),
                      minimumSize: const Size(double.infinity, 50),
                    ),
                    child: const Text('start conversation',
                        style: TextStyle(color: Colors.white)),
                  ),
                ),
                const SizedBox(width: 16),
                Consumer(
                  builder: (context, ref, child) {
                    final voiceState = ref.watch(voiceCommandProvider);
                    return ElevatedButton(
                      onPressed: () => _toggleRecording(voiceState.isListening),
                      style: ElevatedButton.styleFrom(
                        backgroundColor:
                            voiceState.isListening ? Colors.red : Colors.white,
                        shape: const CircleBorder(),
                        padding: const EdgeInsets.all(16),
                      ),
                      child: const Icon(Icons.mic, size: 28, color: Colors.black,),
                    );
                  },
                ),
              ],
            ),
          ],
        ),
      ),
    );
  }

  Future<void> _toggleRecording(bool isCurrentlyListening) async {
    if (isCurrentlyListening) {
      await _stopVoiceRecording();
      _isWakeWordMode = true;
    } else {
      await _startVoiceRecording();
      _isWakeWordMode = false;
    }
  }
}

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

Flutter: Voice command 'open' not activating microphone listening state with Azure Speech Services - Stack Overf

2 Answers 2

prompt_screen.dart

与本文相关的文章

评论列表(0)