I'm running into two bugs with Speech Synthesis on Google Chrome Version 135.0.7049.42 on MacOS 15.1.1 (24B91). The first is that when using the asynchronous/online voices provided by Google (e.g. Google US English), no events are fired, such as start
, end
, etc. However, the MacOS native voices (e.g. Samantha) do emit these events as expected.
My second issue is that the first time speak()
is called after starting Chrome, it does not play any audio aloud. After refresh or trying again, speak()
suddenly works. I've also noticed that if I wait for a while before trying to call speak()
again, it will stop working again until another refresh or another attempt happens.
It also is interesting to note that even after the Google voice has stopped speaking, the utterance never really seems to finish, as if you try to start another, it will say it is already speaking.
I've created a small demo file to test this out. Simply create a new HTML file and paste the code, then open it with Chrome (make sure it's the first time Chrome is opening, i.e. quit and reopen it to reproduce).
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>SpeechSynthesis API Test</title>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
button {
padding: 10px;
margin: 10px 0;
}
select {
padding: 5px;
margin-bottom: 15px;
width: 100%;
}
textarea {
width: 100%;
height: 100px;
padding: 10px;
margin-bottom: 15px;
}
</style>
</head>
<body>
<h1>SpeechSynthesis API Test</h1>
<div>
<label for="text">Text to speak:</label>
<textarea id="text">Hello! This is a test of the SpeechSynthesis API.</textarea>
</div>
<div>
<label for="voice-select">Select Voice:</label>
<select id="voice-select">
<option value="">Loading voices...</option>
</select>
</div>
<div>
<button id="speak-btn">Speak</button>
<button id="pause-btn">Pause</button>
<button id="resume-btn">Resume</button>
<button id="cancel-btn">Cancel</button>
</div>
<div id="status"></div>
<script>
// Check if browser supports speech synthesis
if ("speechSynthesis" in window) {
const synth = window.speechSynthesis;
const textInput = document.getElementById("text");
const voiceSelect = document.getElementById("voice-select");
const speakBtn = document.getElementById("speak-btn");
const pauseBtn = document.getElementById("pause-btn");
const resumeBtn = document.getElementById("resume-btn");
const cancelBtn = document.getElementById("cancel-btn");
const statusEl = document.getElementById("status");
let voices = [];
// Function to populate voice dropdown
function populateVoiceList() {
voices = synth.getVoices();
if (voices.length === 0) {
voiceSelect.innerHTML = '<option value="">No voices available</option>';
return;
}
voiceSelect.innerHTML = "";
voices.forEach((voice, index) => {
const option = document.createElement("option");
option.value = index;
option.textContent = `${voice.name} (${voice.lang})`;
if (voice.default) {
option.selected = true;
}
voiceSelect.appendChild(option);
});
statusEl.textContent = `Loaded ${voices.length} voices.`;
}
// Initial population of voices
populateVoiceList();
// Chrome loads voices asynchronously
if (synth.onvoiceschanged !== undefined) {
synth.onvoiceschanged = populateVoiceList;
}
// Speak function
function speak() {
if (synth.speaking) {
statusEl.textContent = "Speech synthesis already in progress";
return;
}
const text = textInput.value;
if (!text) {
statusEl.textContent = "Please enter text to speak";
return;
}
const utterance = new SpeechSynthesisUtterance(text);
// Set selected voice if available
if (voices.length > 0 && voiceSelect.value !== "") {
utterance.voice = voices[voiceSelect.value];
}
// Event handlers
utterance.onstart = () => {
console.log("Speech started at:", new Date().toLocaleTimeString());
statusEl.textContent = "Speaking...";
};
utterance.onend = () => {
console.log("Speech ended at:", new Date().toLocaleTimeString());
statusEl.textContent = "Speech synthesis finished";
};
utterance.onerror = (event) => {
console.log(
"Speech error at:",
new Date().toLocaleTimeString(),
"Error:",
event.error
);
statusEl.textContent = "Error occurred: " + event.error;
};
synth.speak(utterance);
}
// Button event listeners
speakBtn.addEventListener("click", speak);
pauseBtn.addEventListener("click", () => {
if (synth.speaking) {
synth.pause();
statusEl.textContent = "Speech synthesis paused";
}
});
resumeBtn.addEventListener("click", () => {
if (synth.paused) {
synth.resume();
statusEl.textContent = "Speech synthesis resumed";
}
});
cancelBtn.addEventListener("click", () => {
synth.cancel();
statusEl.textContent = "Speech synthesis canceled";
});
} else {
document.body.innerHTML =
"<h1>Sorry, your browser does not support Speech Synthesis</h1>";
}
</script>
</body>
</html>