Просмотр исходного кода

Integrate Google Cloud Speech (STT) Service

0x0001F36D 6 лет назад
Родитель
Сommit
aa45d06c7f

+ 9 - 2
src/Yuuna.Contracts/Plugins/PluginGateway.cs

@@ -11,14 +11,21 @@ namespace Yuuna.Contracts.Plugins
     using Yuuna.Contracts.Patterns;
     using Yuuna.Contracts.TextSegmention;
     using Yuuna.Contracts.Utilities;
+    using Yuuna.Recognition.Speech;
 
     public class PluginGateway
     { 
         public static void Main(string[] args)
-        { 
+        {
             // Send("打開門");
             // Send("開燈");
-              Send("打開燈"); 
+
+            var stt = SpeechRecognizer.Create("secret.json");
+            stt.RecognizeCompleted += a => Send(a[0].Transcript); 
+            using (stt.Recognize())
+            {
+                Console.ReadKey();
+            }
         }
 
         public static void Send(string text)

+ 23 - 0
src/Yuuna.Contracts/Recognition/Speech/AlternativeBridge.cs

@@ -0,0 +1,23 @@
+
+
+namespace Yuuna.Recognition.Speech
+{
+    using Google.Cloud.Speech.V1;
+
+    internal struct AlternativeBridge : IAlternative
+    {
+        internal AlternativeBridge(SpeechRecognitionAlternative s)
+        {
+            this.Confidence = s.Confidence;
+            this.Transcript = s.Transcript;
+        }
+        public double Confidence { get; }
+        public string Transcript { get; }
+
+        public void Deconstruct(out double confidence, out string transcript)
+        {
+            confidence = this.Confidence;
+            transcript = this.Transcript;
+        }
+    }
+}

+ 22 - 0
src/Yuuna.Contracts/Recognition/Speech/Duration.cs

@@ -0,0 +1,22 @@
+
+
+namespace Yuuna.Recognition.Speech
+{
+    using System;
+
+    internal sealed class Duration : IDisposable
+    {
+        private readonly Action _onStop;
+
+        internal Duration(IRecorder recorder, Action onStop)
+        {
+            recorder.Start();
+            this._onStop = recorder.Stop + onStop;
+        }
+
+        void IDisposable.Dispose()
+        {
+            this._onStop.Invoke();
+        }
+    }
+}

+ 15 - 0
src/Yuuna.Contracts/Recognition/Speech/IAlternative.cs

@@ -0,0 +1,15 @@
+
+
+namespace Yuuna.Recognition.Speech
+{
+    using System.ComponentModel;
+
+    public interface IAlternative
+    {
+        double Confidence { get; }
+        string Transcript { get; }
+
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        void Deconstruct(out double confidence, out string transcript);
+    }
+}

+ 21 - 0
src/Yuuna.Contracts/Recognition/Speech/IRecorder.cs

@@ -0,0 +1,21 @@
+
+
+namespace Yuuna.Recognition.Speech
+{
+    using System;
+    using System.IO;
+
+    public interface IRecorder
+    {
+        event Action<Stream> Completed;
+
+        void Start();
+        void Stop();
+    }
+}
+namespace Yuuna.Recognition.Speech
+{
+    using System.Collections.Concurrent;
+    using System.Diagnostics;
+    using System.Runtime.CompilerServices;
+}

+ 14 - 0
src/Yuuna.Contracts/Recognition/Speech/ISpeechRecognizer.cs

@@ -0,0 +1,14 @@
+
+
+namespace Yuuna.Recognition.Speech
+{
+    using System;
+    using System.Collections.Generic;
+
+    public interface ISpeechRecognizer
+    {
+        event Action<IReadOnlyList<IAlternative>> RecognizeCompleted;
+
+        IDisposable Recognize();
+    }
+}

+ 91 - 0
src/Yuuna.Contracts/Recognition/Speech/SpeechRecognizer.cs

@@ -0,0 +1,91 @@
+
+
+namespace Yuuna.Recognition.Speech
+{
+    using Google.Apis.Auth.OAuth2;
+    using Google.Cloud.Speech.V1;
+    using Grpc.Auth;
+    using Grpc.Core;
+    using System;
+    using System.Collections.Generic;
+    using System.IO;
+    using System.Linq;
+    using System.Threading;
+
+    public sealed class SpeechRecognizer : ISpeechRecognizer
+    {
+        private readonly SpeechClient _speech;
+        private readonly RecognitionConfig _config;
+        private readonly IRecorder _recorder;
+        private readonly object _lock = new object();
+        private volatile bool _started;
+
+        /// <summary>
+        /// 建立新 <see cref="ISpeechRecognizer"/> 實體。
+        /// </summary> 
+        /// <param name="secret"></param>
+        /// <returns></returns>
+        public static ISpeechRecognizer Create(string secret)
+        {
+            if (string.IsNullOrWhiteSpace(secret))
+                throw new ArgumentException("secret is null or empty.", nameof(secret));
+            return new SpeechRecognizer(new FileInfo(secret));
+        }
+
+        private SpeechRecognizer(FileInfo secret)
+        {
+            if (secret is null)
+                throw new ArgumentNullException(nameof(secret));
+
+            if (!secret.Exists)
+                throw new FileNotFoundException("secret file not found.", secret.FullName);
+
+            var credential = GoogleCredential.FromFile(secret.FullName);
+            var channel = new Channel(SpeechClient.DefaultEndpoint.Host, credential.ToChannelCredentials());
+            this._speech = SpeechClient.Create(channel);
+            this._config = new RecognitionConfig
+            {
+                Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
+                SampleRateHertz = 16_000,
+                LanguageCode = Thread.CurrentThread.CurrentCulture.Name,
+            };
+            this._recorder = new WaveRecorder();
+            this._recorder.Completed += this.OnComplete;
+        }
+
+        private void OnComplete(Stream stream)
+        {
+            lock (this._lock)
+            {
+                var wavStream = RecognitionAudio.FromStream(stream);
+                var response = this._speech.Recognize(this._config, wavStream);
+
+                var alternatives =
+                    from r in response.Results
+                    from a in r.Alternatives
+                    select new AlternativeBridge(a) as IAlternative;
+
+                this.RecognizeCompleted?.Invoke(alternatives.ToList());
+            }
+        }
+
+
+        public event Action<IReadOnlyList<IAlternative>> RecognizeCompleted;
+
+        public IDisposable Recognize()
+        {
+            lock (this._lock)
+            {
+                if (this._started)
+                    return null;
+                this._started = true;
+                return new Duration(this._recorder, () =>
+                {
+                    this._started = false;
+                });
+            }
+        }
+
+
+    }
+}

+ 68 - 0
src/Yuuna.Contracts/Recognition/Speech/WaveRecorder.cs

@@ -0,0 +1,68 @@
+
+
+namespace Yuuna.Recognition.Speech
+{
+    using NAudio.Wave;
+    using System;
+    using System.Collections.Generic;
+    using System.IO;
+
+    public class WaveRecorder : IRecorder
+    {
+        private readonly List<byte> _raw;
+        private readonly WaveInEvent _wave;
+        private readonly object _lock = new object();
+        private volatile bool _started = false;
+
+
+        public event Action<Stream> Completed;
+
+        protected virtual void OnDataAvailable(object sender, WaveInEventArgs e)
+        {
+            lock (this._lock)
+            {
+                this._raw.AddRange(e.Buffer);
+            }
+        }
+        public WaveRecorder()
+        {
+            this._raw = new List<byte>(4096);
+            this._wave = new WaveInEvent
+            {
+                WaveFormat = new WaveFormat(16000, 1)
+            };
+
+            this._wave.DataAvailable += this.OnDataAvailable;
+        }
+
+        public void Start()
+        {
+            lock (this._lock)
+            {
+                if (this._started)
+                    return;
+                this._raw.Clear();
+                this._wave.StartRecording();
+                this._started = true;
+            }
+        }
+
+        public void Stop()
+        {
+            lock (this._lock)
+            {
+                if (!this._started)
+                    return;
+                this._wave.StopRecording();
+
+
+                var array = this._raw.ToArray();
+                var ms = new MemoryStream(array);
+
+                var wavStream = new RawSourceWaveStream(ms, this._wave.WaveFormat);
+                this.Completed.Invoke(wavStream);
+                this._started = false;
+            }
+        }
+    }
+}

+ 12 - 0
src/Yuuna.Contracts/Recognition/Speech/readme.md

@@ -0,0 +1,12 @@
+# Google Cloud Speech V1
+## 安裝方式
+1. 進入 [**官網**](https://cloud.google.com/speech-to-text/docs/quickstart-client-libraries) 後。
+2. 照著 **事前準備** 做完會產生一份帶有 secret 的 json 檔。
+3. 把 json 檔改為 ***secret.json*** 並複製到執行檔的所在目錄就算完成安裝。
+
+注意: ***不要把你的 secret 檔案公開、外流,有很大的機會害自己荷包失血。***
+
+## 遺失 Secret 檔
+1. 進入 [IAM 管理頁](https://console.cloud.google.com/iam-admin/serviceaccounts)。
+2. 選擇你的專案,並在 **服務帳戶** 列表中的右方 **動作** 行中選擇 **建立金鑰**。
+3. 選擇 JSON 格式,並照 [**安裝方式**](#安裝方式) 中的第三點處理。

+ 2 - 0
src/Yuuna.Contracts/Yuuna.Contracts.csproj

@@ -6,7 +6,9 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <PackageReference Include="Google.Cloud.Speech.V1" Version="1.3.1" />
     <PackageReference Include="Jieba.Net.Core" Version="1.1.0" />
+    <PackageReference Include="NAudio" Version="1.9.0" />
   </ItemGroup>
 
 </Project>