I have a problem when I try to run an application using speech to text v2. I want to convert speech to text using a microphone. I use the GOOGLE_APPLICATION_CREDENTIALS variable and using v1 everything works for me. However, when I migrated to v2 I get the following exception:
com.google.api.gax.rpc.InvalidArgumentException: io.grpc.StatusRuntimeException: INVALID_ARGUMENT: Invalid resource field value in the request.
at com.google.api.gax.rpc.ApiExceptionFactory.createException(ApiExceptionFactory.java:92)
at com.google.api.gax.grpc.GrpcApiExceptionFactory.create(GrpcApiExceptionFactory.java:98)
at com.google.api.gax.grpc.GrpcApiExceptionFactory.create(GrpcApiExceptionFactory.java:66)
at com.google.api.gax.grpc.ExceptionResponseObserver.onErrorImpl(ExceptionResponseObserver.java:82)
at com.google.api.gax.rpc.StateCheckingResponseObserver.onError(StateCheckingResponseObserver.java:84)
at com.google.api.gax.grpc.GrpcDirectStreamController$ResponseObserverAdapter.onClose(GrpcDirectStreamController.java:148)
at io.grpc.PartialForwardingClientCallListener.onClose(PartialForwardingClientCallListener.java:39)
at io.grpc.ForwardingClientCallListener.onClose(ForwardingClientCallListener.java:23)
at io.grpc.ForwardingClientCallListener$SimpleForwardingClientCallListener.onClose(ForwardingClientCallListener.java:40)
at com.google.api.gax.grpc.ChannelPool$ReleasingClientCall$1.onClose(ChannelPool.java:569)
at io.grpc.internal.DelayedClientCall$DelayedListener$3.run(DelayedClientCall.java:489)
at io.grpc.internal.DelayedClientCall$DelayedListener.delayOrExecute(DelayedClientCall.java:453)
at io.grpc.internal.DelayedClientCall$DelayedListener.onClose(DelayedClientCall.java:486)
at io.grpc.internal.ClientCallImpl.closeObserver(ClientCallImpl.java:564)
at io.grpc.internal.ClientCallImpl.access$100(ClientCallImpl.java:72)
at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInternal(ClientCallImpl.java:729)
at io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInContext(ClientCallImpl.java:710)
at io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
at io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:133)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: io.grpc.StatusRuntimeException: INVALID_ARGUMENT: Invalid resource field value in the request.
at io.grpc.Status.asRuntimeException(Status.java:532)
... 17 more
My code:
package com.mycompany.testgoogleversio2;
import com.google.api.gax.rpc.ClientStream;
import com.google.api.gax.rpc.ResponseObserver;
import com.google.api.gax.rpc.StreamController;
import com.google.cloud.speech.v2.ExplicitDecodingConfig;
import com.google.cloud.speech.v2.RecognitionConfig;
import com.google.cloud.speech.v2.SpeechClient;
import com.google.cloud.speech.v2.SpeechRecognitionAlternative;
import com.google.cloud.speech.v2.StreamingRecognitionConfig;
import com.google.cloud.speech.v2.StreamingRecognitionResult;
import com.google.cloud.speech.v2.StreamingRecognizeRequest;
import com.google.cloud.speech.v2.StreamingRecognizeResponse;
import com.google.protobuf.ByteString;
import java.util.ArrayList;
import java.util.List;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.TargetDataLine;
/
public class TestGoogleMicrofone {
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
try (SpeechClient client = SpeechClient.create()) {
responseObserver
= new ResponseObserver<StreamingRecognizeResponse>() {
ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();
public void onStart(StreamController controller) {
System.out.println("Startuje");
}
public void onResponse(StreamingRecognizeResponse response) {
System.out.println("laduje");
StreamingRecognitionResult result = response.getResultsList().get(0);
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript() + ", is final: " + result.getIsFinal());
responses.add(response);
}
public void onComplete() {
for (StreamingRecognizeResponse response : responses) {
StreamingRecognitionResult result = response.getResultsList().get(0);
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
public void onError(Throwable t) {
t.printStackTrace();
}
};
ClientStream<StreamingRecognizeRequest> clientStream
= client.streamingRecognizeCallable().splitCall(responseObserver);
List<String> allLanguageCodes = new ArrayList();
allLanguageCodes.add("pl-PL");
RecognitionConfig recognitionConfig
= RecognitionConfig.newBuilder().setExplicitDecodingConfig(
ExplicitDecodingConfig.newBuilder()
.setEncoding(ExplicitDecodingConfig.AudioEncoding.LINEAR16)
.setSampleRateHertz(16000)
.setAudioChannelCount(1)
).addLanguageCodes("en-US").setModel("chirp").build();
//.setLanguageCode("en-US")
StreamingRecognitionConfig streamingRecognitionConfig
= StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).setConfig(recognitionConfig).build();
StreamingRecognizeRequest request
= StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build(); // The first request in a streaming call has to be a config
AudioFormat format = new AudioFormat(16000.0f, 16, 1, true, false);
TargetDataLine microphone;
microphone = AudioSystem.getTargetDataLine(format);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
//ByteArrayOutputStream out = new ByteArrayOutputStream();
int numBytesRead;
int CHUNK_SIZE = 3200;
byte[] data = new byte[microphone.getBufferSize() / 5];
//byte[] data = new byte[CHUNK_SIZE];
microphone.start();
clientStream.send(request);
// SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
// bigEndian: false
long startTime = System.currentTimeMillis();
int bytesRead = 0;
try {
System.out.println("Start speaking");
while (true) {
System.out.println("Jestem2");
long estimatedTime = System.currentTimeMillis() - startTime;
numBytesRead = microphone.read(data, 0, microphone.getBufferSize() / 5);
bytesRead += numBytesRead;
if (estimatedTime > 60000) { // 60 seconds
System.out.println("Stop speakingaaa.");
microphone.close();
//clientStream.closeSend();
break;
}
request
= StreamingRecognizeRequest.newBuilder().setStreamingConfig(streamingRecognitionConfig)
.setAudio(ByteString.copyFrom(data))
.build();
System.out.println(request.getRecognizer());
clientStream.send(request);
//Thread.sleep(100);
}
} catch (Exception e) {
e.printStackTrace();
}
} catch (Exception e) {
e.printStackTrace();
}
responseObserver.onComplete();
}
}
I have no idea what I'm doing wrong but in version v1 everything works fine. Could someone point out what is the source of this problem?