I'm using Microsoft.CongnitiveServices.Speech SDK to convert text to speech by creating a custom API in C#. The return datatype is 'AudioDataStream'. However, I need to convert it to Stream or bytes so that I can use it in my ui. How can I convert it to a Stream or bytes or is there any other way I can use a stream output in my ui?
Edit: Here is the code I'm using. It gives me the error saying 'Cannot convert Microsoft.CongnitiveServices.Speech.AudioDataStream to System.IO.Stream. I also tried using AudioDataStream.ReadData() but it gives the return type as 'uint' not as bytes.
[HttpPost]
public async Task<IActionResult> ConversationIOStreamAsync(IFormFile audioFile)
{
if (audioFile == null || audioFile.Length == 0)
{
return BadRequest("No audio file received.");
}
// Save the audio file to a temporary location
var filePath = Path.Combine(Path.GetTempPath(), audioFile.FileName);
using (var stream = new FileStream(filePath, FileMode.Create))
{
await audioFile.CopyToAsync(stream);
}
var memoryStream = new MemoryStream();
using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read))
{
await stream.CopyToAsync(memoryStream);
}
memoryStream.Position = 0;
try
{
var audioStream = await ConversationService.ConversationIOStreamAsync(memoryStream);
using var stream = AudioDataStream.FromResult(audioStream);
return File(stream, "audio/wav", audioFile.FileName);
}
catch (System.Exception ex)
{
return StatusCode(500, $"Internal server error: {ex.Message}");
}
}
I'm using Microsoft.CongnitiveServices.Speech SDK to convert text to speech by creating a custom API in C#. The return datatype is 'AudioDataStream'. However, I need to convert it to Stream or bytes so that I can use it in my ui. How can I convert it to a Stream or bytes or is there any other way I can use a stream output in my ui?
Edit: Here is the code I'm using. It gives me the error saying 'Cannot convert Microsoft.CongnitiveServices.Speech.AudioDataStream to System.IO.Stream. I also tried using AudioDataStream.ReadData() but it gives the return type as 'uint' not as bytes.
[HttpPost]
public async Task<IActionResult> ConversationIOStreamAsync(IFormFile audioFile)
{
if (audioFile == null || audioFile.Length == 0)
{
return BadRequest("No audio file received.");
}
// Save the audio file to a temporary location
var filePath = Path.Combine(Path.GetTempPath(), audioFile.FileName);
using (var stream = new FileStream(filePath, FileMode.Create))
{
await audioFile.CopyToAsync(stream);
}
var memoryStream = new MemoryStream();
using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read))
{
await stream.CopyToAsync(memoryStream);
}
memoryStream.Position = 0;
try
{
var audioStream = await ConversationService.ConversationIOStreamAsync(memoryStream);
using var stream = AudioDataStream.FromResult(audioStream);
return File(stream, "audio/wav", audioFile.FileName);
}
catch (System.Exception ex)
{
return StatusCode(500, $"Internal server error: {ex.Message}");
}
}
Share
Improve this question
edited Mar 20 at 8:11
Maryam Mirza
asked Mar 19 at 7:40
Maryam MirzaMaryam Mirza
52 bronze badges
4
|
1 Answer
Reset to default 0I have created an ASP.NET Web API
project that converts AudioDataStream
to a MemoryStream
by reading the audio data in chunks using ReadData()
, then wraps it in a WAV format before returning it as a Stream
. This allows the API to send the audio as a downloadable file in the response.
Below is my complete code.
SpeechService.cs :
using Microsoft.CognitiveServices.Speech;
namespace WebApplication15.Services
{
public class SpeechService
{
private readonly string _subscriptionKey;
private readonly string _region;
public SpeechService(IConfiguration configuration)
{
_subscriptionKey = "<speechKey>";
_region = "<speechRegion>";
}
public async Task<Stream> ConvertTextToSpeechAsync(string text)
{
var config = SpeechConfig.FromSubscription(_subscriptionKey, _region);
using var synthesizer = new SpeechSynthesizer(config, null);
var result = await synthesizer.SpeakTextAsync(text);
if (result.Reason != ResultReason.SynthesizingAudioCompleted)
{
throw new Exception($"Speech synthesis failed: {result.Reason}");
}
using var audioStream = AudioDataStream.FromResult(result);
var memoryStream = new MemoryStream();
byte[] buffer = new byte[4096];
uint bytesRead;
while ((bytesRead = audioStream.ReadData(buffer)) > 0)
{
memoryStream.Write(buffer, 0, (int)bytesRead);
}
var wavStream = ConvertToWaveFormat(memoryStream);
return wavStream;
}
private Stream ConvertToWaveFormat(MemoryStream rawAudioStream)
{
rawAudioStream.Position = 0;
using var reader = new BinaryReader(rawAudioStream);
using var wavStream = new MemoryStream();
using var writer = new BinaryWriter(wavStream);
int sampleRate = 16000;
int bitsPerSample = 16;
int channels = 1;
int byteRate = sampleRate * channels * (bitsPerSample / 8);
int dataSize = (int)rawAudioStream.Length;
writer.Write("RIFF".ToCharArray());
writer.Write(36 + dataSize);
writer.Write("WAVE".ToCharArray());
writer.Write("fmt ".ToCharArray());
writer.Write(16);
writer.Write((short)1);
writer.Write((short)channels);
writer.Write(sampleRate);
writer.Write(byteRate);
writer.Write((short)(channels * (bitsPerSample / 8)));
writer.Write((short)bitsPerSample);
writer.Write("data".ToCharArray());
writer.Write(dataSize);
rawAudioStream.Position = 0;
rawAudioStream.CopyTo(wavStream);
wavStream.Position = 0;
return new MemoryStream(wavStream.ToArray());
}
}
}
SpeechController.cs :
using Microsoft.AspNetCore.Mvc;
using WebApplication15.Services;
namespace WebApplication15.Controllers
{
[ApiController]
[Route("api/speech")]
public class SpeechController : ControllerBase
{
private readonly SpeechService _speechService;
public SpeechController(SpeechService speechService)
{
_speechService = speechService;
}
[HttpPost("convert")]
public async Task<IActionResult> ConvertTextToSpeech([FromBody] SpeechRequest request)
{
if (string.IsNullOrEmpty(request.Text))
{
return BadRequest("Text cannot be empty.");
}
try
{
var audioStream = await _speechService.ConvertTextToSpeechAsync(request.Text);
return File(audioStream, "audio/wav", "speech.wav");
}
catch (System.Exception ex)
{
return StatusCode(500, $"Internal server error: {ex.Message}");
}
}
}
public class SpeechRequest
{
public string Text { get; set; }
}
}
Add builder.Services.AddSingleton<SpeechService>();
line in the Program.cs
class.
Output :
I successfully converted the text to speech output in the browser as shwon below.
ReadData
? i.e. create a loop that reads a buffer and writes it to the stream until the return value from ReadData is zero. – JonasH Commented Mar 19 at 7:45AudioDataStream.ReadData()
to read the stream into aMemoryStream
and convert it tobyte[]
orStream
. – Dasari Kamali Commented Mar 19 at 7:48