SpeechRecognizer.cs 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. 
  2. namespace Yuuna.Recognition.Speech
  3. {
  4. using Google.Apis.Auth.OAuth2;
  5. using Google.Cloud.Speech.V1;
  6. using Grpc.Auth;
  7. using Grpc.Core;
  8. using System;
  9. using System.Collections.Generic;
  10. using System.IO;
  11. using System.Linq;
  12. using System.Threading;
  13. public sealed class SpeechRecognizer : ISpeechRecognizer
  14. {
  15. private readonly SpeechClient _speech;
  16. private readonly RecognitionConfig _config;
  17. private readonly IRecorder _recorder;
  18. private readonly object _lock = new object();
  19. private volatile bool _started;
  20. /// <summary>
  21. /// 建立新 <see cref="ISpeechRecognizer"/> 實體。
  22. /// </summary>
  23. /// <param name="secret"></param>
  24. /// <returns></returns>
  25. public static ISpeechRecognizer Create(string secret)
  26. {
  27. if (string.IsNullOrWhiteSpace(secret))
  28. throw new ArgumentException("secret is null or empty.", nameof(secret));
  29. return new SpeechRecognizer(new FileInfo(secret));
  30. }
  31. private SpeechRecognizer(FileInfo secret)
  32. {
  33. if (secret is null)
  34. throw new ArgumentNullException(nameof(secret));
  35. if (!secret.Exists)
  36. throw new FileNotFoundException("secret file not found.", secret.FullName);
  37. var credential = GoogleCredential.FromFile(secret.FullName);
  38. var channel = new Channel(SpeechClient.DefaultEndpoint.Host, credential.ToChannelCredentials());
  39. this._speech = SpeechClient.Create(channel);
  40. this._config = new RecognitionConfig
  41. {
  42. Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
  43. SampleRateHertz = 16_000,
  44. LanguageCode = Thread.CurrentThread.CurrentCulture.Name,
  45. };
  46. this._recorder = new WaveRecorder();
  47. this._recorder.Completed += this.OnComplete;
  48. }
  49. private void OnComplete(Stream stream)
  50. {
  51. lock (this._lock)
  52. {
  53. var wavStream = RecognitionAudio.FromStream(stream);
  54. var response = this._speech.Recognize(this._config, wavStream);
  55. var alternatives =
  56. from r in response.Results
  57. from a in r.Alternatives
  58. select new AlternativeBridge(a) as IAlternative;
  59. this.RecognizeCompleted?.Invoke(alternatives.ToList());
  60. }
  61. }
  62. public event Action<IReadOnlyList<IAlternative>> RecognizeCompleted;
  63. public IDisposable Recognize()
  64. {
  65. lock (this._lock)
  66. {
  67. if (this._started)
  68. return null;
  69. this._started = true;
  70. return new Duration(this._recorder, () =>
  71. {
  72. this._started = false;
  73. });
  74. }
  75. }
  76. }
  77. }