Kinect for windows语音识别(Speech)
Kinect for windows提供了语音识别的能力,它靠Kinect的语音采集流进行识别的,这是建立在微软的语音识虽库的基础上的,关于微软语音识别可以参考http://msdn.microsoft.com/en-us/library/hh361572(v=office.14).aspx。对别Kinect识别的语音,目前只支持如下语言:en-US,de-DE,en-AU,en-CA,en-GB,en-IE,en-NZ,es-ES,es-MX,fr-CA,fr-FR,it-IT,ja-JP(很遗憾没有中文,相信在不远的将来会有的)。下列代码为语音识别:
[*]KinectSensor kinectsensor = null;
[*] SpeechRecognitionEngine speechEngine;
[*] private void Form1_Shown(object sender, EventArgs e)
[*] {
[*] //从Kinect集合中找到连接上的Kinect
[*] foreach (KinectSensor ks in KinectSensor.KinectSensors)
[*] {
[*] //找到连接的Kinect
[*] if (ks.Status == KinectStatus.Connected)
[*] {
[*] kinectsensor = ks;
[*] kinectsensor.Start();//开始工作,即可以采集摄像头和红外摄像头信息
[*] this.Text = "Kinect开始工作……";
[*]
[*] break;
[*] }
[*]
[*] }
[*] if (kinectsensor != null)
[*] {
[*] //主音识别对象
[*] RecognizerInfo recognizer = GetKinectRecognizer();
[*] if (null != recognizer)
[*] {
[*] this.speechEngine = new SpeechRecognitionEngine(recognizer.Id);
[*] //读取xml 配置文件的语言库
[*] using (var memoryStream = new MemoryStream(Encoding.ASCII.GetBytes(Properties.Resources.SpeechGrammar)))
[*] {
[*] var g = new Grammar(memoryStream);
[*] speechEngine.LoadGrammar(g);
[*] }
[*] #region 程序中用代码添加识别字库
[*] //var directions = new Choices();
[*] //directions.Add(new SemanticResultValue("した", "した"));
[*] //directions.Add(new SemanticResultValue("うえ", "うえ"));
[*] //directions.Add(new SemanticResultValue("はじめ", "はじめ"));
[*] //directions.Add(new SemanticResultValue("おわり", "おわり"));
[*]
[*] //var grammarbuilder = new GrammarBuilder { Culture = recognizer.Culture };
[*] //grammarbuilder.Append(directions);
[*]
[*] //Grammar grammar = new Grammar(grammarbuilder);
[*] //speechEngine.LoadGrammar(grammar);
[*] #endregion
[*]
[*] //订阅语音识别委托实例
[*] speechEngine.SpeechRecognized += SpeechRecognized;
[*] //设置语音识别流
[*] speechEngine.SetInputToAudioStream(kinectsensor.AudioSource.Start(), new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null));
[*] //开始识别
[*] speechEngine.RecognizeAsync(RecognizeMode.Multiple);
[*] }
[*] }
[*] }
[*] /// <summary>
[*] /// 从对象中获取要使用的识别对象
[*] /// </summary>
[*] /// <returns></returns>
[*] private static RecognizerInfo GetKinectRecognizer()
[*] {
[*] //从系统所有语单识别集合中找到自己使用的语音识别对象
[*] foreach (RecognizerInfo recognizer in SpeechRecognitionEngine.InstalledRecognizers())
[*] {
[*] string value;
[*] //判断是否为Kinect的语音识别对象(微软的语音识别,有为Kinect的,也有非Kinect的,所以要判断是否为Kinect)
[*] recognizer.AdditionalInfo.TryGetValue("Kinect", out value);//en-US为英主 ja-JP为日语
[*] //第二个条件为采用那种语言
[*] if ("True".Equals(value, StringComparison.OrdinalIgnoreCase) && "en-US".Equals(recognizer.Culture.Name, StringComparison.OrdinalIgnoreCase))
[*] {
[*] return recognizer;
[*] }
[*] }
[*] return null;
[*] }
[*]
[*] private void SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
[*] {
[*] const double ConfidenceThreshold = 0.4;
[*] //e.Result.Confidence为词语识别率,值为0到1,值越大,识别率越准确
[*] if (e.Result.Confidence >= ConfidenceThreshold)
[*] {
[*] //匹配的词语
[*] switch (e.Result.Semantics.Value.ToString())
[*] {
[*] //匹配xml ,为英语识别
[*] case "Next":
[*] Message_Lab.Text = "向下";
[*] break;
[*] case "Previous":
[*] Message_Lab.Text = "向上";
[*] break;
[*] case "Run":
[*] Message_Lab.Text = "开始";
[*] break;
[*] case "Close":
[*] Message_Lab.Text = "停止";
[*] break;
[*] //匹配xml ,为日语识别
[*] //case "した":
[*] // Message_Lab.Text = "向下";
[*] // break;
[*] //case "うえ":
[*] // Message_Lab.Text = "向上";
[*] // break;
[*] //case "はじめ":
[*] // Message_Lab.Text = "开始";
[*] // break;
[*] //case "おわり":
[*] // Message_Lab.Text = "停止";
[*] // break;
[*] }
[*] }
[*] }
[*]
[*] private void Form1_FormClosing(object sender, FormClosingEventArgs e)
[*] {
[*] if (kinectsensor != null && kinectsensor.Status == KinectStatus.Connected)
[*] {
[*] kinectsensor.AudioSource.Stop();
[*] kinectsensor.Stop();//结束Kinect采集工作
[*] MessageBox.Show("Kinect结束工作!");
[*]
[*] }
[*] }
其中语音识别的匹配xml为:
[*]<grammar version="1.0" xml:lang="en-US" root="rootRule" tag-format="semantics/1.0-literals" xmlns="http://www.w3.org/2001/06/grammar">
[*]<rule id="rootRule">
[*] <one-of>
[*] <item>
[*] <tag>Next</tag>
[*] <one-of>
[*] <item> next </item>
[*] <item> n </item>
[*] <item> down </item>
[*] <item> qian </item>
[*] </one-of>
[*] </item>
[*] <item>
[*] <tag>Previous</tag>
[*] <one-of>
[*] <item> previous </item>
[*] <item> p </item>
[*] <item> up </item>
[*] <item> hou </item>
[*] </one-of>
[*] </item>
[*] <item>
[*] <tag>Run</tag>
[*] <one-of>
[*] <item> run </item>
[*] <item> open </item>
[*] <item> r </item>
[*] </one-of>
[*] </item>
[*] <item>
[*] <tag>Close</tag>
[*] <one-of>
[*] <item> close </item>
[*] <item> exit </item>
[*] <item> c </item>
[*] </one-of>
[*] </item>
[*] </one-of>
[*]</rule>
[*]</grammar>
页:
[1]