2194 lines
76 KiB
C++
2194 lines
76 KiB
C++
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
|
|
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
|
|
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
|
|
// PARTICULAR PURPOSE.
|
|
//
|
|
// Copyright © Microsoft Corporation. All rights reserved
|
|
|
|
/******************************************************************************
|
|
* srengobj.cpp
|
|
* This file contains the implementation of the CSrEngine class.
|
|
* This implements ISpSREngine, ISpSREngine2 and ISpObjectWithToken.
|
|
* This is the main recognition object
|
|
******************************************************************************/
|
|
|
|
#include "stdafx.h"
|
|
#include "SampleSrEngine.h"
|
|
#include "srengobj.h"
|
|
#include "SpHelper.h"
|
|
#ifndef _WIN32_WCE
|
|
#include "shlobj.h"
|
|
#endif
|
|
|
|
static const WCHAR DICT_WORD[] = L"Blah"; // This is the default word the sample engine uses for dictation
|
|
static const WCHAR ALT_WORD[] = L"Alt"; // This is the default word used for alternates
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::FinalConstruct *
|
|
*---------------------------*
|
|
* Description:
|
|
* The ATL FinalConstruct method. Called after the standard C++ constructor
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
HRESULT CSrEngine::FinalConstruct()
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
// This event is used to indicate when space is available for data to be given to the recognition thread
|
|
m_hQueueHasRoom = ::CreateEvent(NULL, TRUE, TRUE, NULL);
|
|
m_FrameQueue.SetSpaceAvailEvent(m_hQueueHasRoom);
|
|
|
|
// Create a thread control which will be used for the recognition thread
|
|
CComPtr<ISpTaskManager> cpTaskMgr;
|
|
hr = cpTaskMgr.CoCreateInstance(CLSID_SpResourceManager);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = cpTaskMgr->CreateThreadControl(this, this, THREAD_PRIORITY_NORMAL, &m_cpDecoderThread);
|
|
}
|
|
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
// Create the SAPI lexicon which holds user and app pronounciations
|
|
hr = m_cpLexicon.CoCreateInstance(CLSID_SpLexicon);
|
|
}
|
|
return hr;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::FinalRelease *
|
|
*---------------------------*
|
|
* Description:
|
|
* The ATL FinalRelease method. Clean up any resources not automatically destructed.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
HRESULT CSrEngine::FinalRelease()
|
|
{
|
|
|
|
::CloseHandle(m_hQueueHasRoom);
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetObjectToken *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is called by SAPI immediately after the engine is created.
|
|
* It can be used to get registry information specific to this engine.
|
|
* The engine can recover from the token file paths stored there during installation.
|
|
* The engine also can recover user set defaults for accuracy, rejection etc.
|
|
* Also could have different engines sharing the same code base (CLSID) but having different registry info
|
|
* e.g. if engine supports different languages
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetObjectToken(ISpObjectToken * pToken)
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
// Helper function that copies the token reference.
|
|
hr = SpGenericSetObjectToken(pToken, m_cpEngineObjectToken);
|
|
if(FAILED(hr))
|
|
{
|
|
return hr;
|
|
}
|
|
|
|
// Read attribute information from the token in the registry
|
|
CComPtr<ISpDataKey> cpAttribKey;
|
|
hr = pToken->OpenKey(L"Attributes", &cpAttribKey);
|
|
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
WCHAR *psz = NULL;
|
|
hr = cpAttribKey->GetStringValue(L"Desktop", &psz);
|
|
::CoTaskMemFree(psz);
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
// This instance of the engine is for doing desktop recognition
|
|
}
|
|
else if(hr == SPERR_NOT_FOUND)
|
|
{
|
|
hr = cpAttribKey->GetStringValue(L"Telephony", &psz);
|
|
::CoTaskMemFree(psz);
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
// This instance of the engine is for doing telephony recognition
|
|
}
|
|
}
|
|
}
|
|
|
|
// Read what language is set in the registry
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
WCHAR *pszLangID = NULL;
|
|
hr = cpAttribKey->GetStringValue(L"Language", &pszLangID);
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
// We could use this language id in recognition.
|
|
m_LangID = (unsigned short)wcstol(pszLangID, NULL, 16);
|
|
::CoTaskMemFree(pszLangID);
|
|
}
|
|
else
|
|
{
|
|
// Default language (US English)
|
|
m_LangID = 0x409;
|
|
}
|
|
}
|
|
|
|
// Read data-file location
|
|
WCHAR *pszPath = NULL;
|
|
|
|
// Looks for the file path stored in the registry
|
|
hr = pToken->GetStorageFileName(CLSID_SampleSREngine, L"SampleEngDataFile", NULL, 0, &pszPath);
|
|
// Could now load engine data-files from this given path
|
|
|
|
::CoTaskMemFree(pszPath);
|
|
|
|
return hr;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::GetObjectToken *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is called if SAPI wants to find which object token this engine is using
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::GetObjectToken(ISpObjectToken ** ppToken)
|
|
{
|
|
|
|
// Generic helper function
|
|
return SpGenericGetObjectToken(ppToken, m_cpEngineObjectToken);
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetSite *
|
|
*---------------------------*
|
|
* Description:
|
|
* This is called to give the engine a reference to the ISpSREngineSite.
|
|
* The engine uses this to call back to SAPI.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetSite(ISpSREngineSite *pSite)
|
|
{
|
|
m_cpSite = pSite;
|
|
return S_OK;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetRecoProfile *
|
|
*---------------------------*
|
|
* Description:
|
|
* The RecoProfile is an object token holding information on the current
|
|
* user and enrollment session. The engine can store whatever information here
|
|
* it likes. It should store this in a key under the RecoProfile key named with the engine class id.
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetRecoProfile(ISpObjectToken *pProfile)
|
|
{
|
|
|
|
|
|
// First find if our engine already has info in this profile
|
|
HRESULT hr = S_OK;
|
|
WCHAR *pszCLSID, *pszPath = NULL;
|
|
CComPtr<ISpDataKey> dataKey;
|
|
|
|
m_cpUserObjectToken = pProfile;
|
|
|
|
hr = ::StringFromCLSID(CLSID_SampleSREngine, &pszCLSID);
|
|
if(FAILED(hr))
|
|
{
|
|
return hr;
|
|
}
|
|
|
|
hr = pProfile->OpenKey(pszCLSID, &dataKey);
|
|
if(hr == SPERR_NOT_FOUND)
|
|
{
|
|
// We haven't seen this user profile before, so create a new registry key to hold info for it
|
|
hr = pProfile->CreateKey(pszCLSID, &dataKey);
|
|
|
|
// Now we can set some default values
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
hr = dataKey->SetStringValue(L"GENDER", L"UNKNOWN");
|
|
}
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
hr = dataKey->SetStringValue(L"AGE", L"UNKNOWN");
|
|
}
|
|
|
|
// Now we can create some temporary file storage (e.g. for trained models)
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
pProfile->GetStorageFileName(CLSID_SampleSREngine, L"SampleEngTrainingFile", NULL, CSIDL_FLAG_CREATE | CSIDL_LOCAL_APPDATA, &pszPath);
|
|
}
|
|
|
|
// Now we request a UI for user training (or properties - SPDUI_RecoProfileProperties)
|
|
// The engine cannot directly create UI it must request it.
|
|
hr = AddEventString(SPEI_REQUEST_UI, 0, SPDUI_UserTraining);
|
|
|
|
}
|
|
else if(SUCCEEDED(hr))
|
|
{
|
|
// We've already seen this profile so read values
|
|
WCHAR *pszGender = NULL, *pszAge = NULL;
|
|
hr = dataKey->GetStringValue(L"GENDER", &pszGender);
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
hr = dataKey->GetStringValue(L"AGE", &pszAge);
|
|
}
|
|
|
|
// Now we could read training file
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
hr = pProfile->GetStorageFileName(CLSID_SampleSREngine, L"SampleEngTrainingFile", NULL, 0, &pszPath);
|
|
}
|
|
|
|
::CoTaskMemFree(pszGender);
|
|
::CoTaskMemFree(pszAge);
|
|
}
|
|
|
|
::CoTaskMemFree(pszPath);
|
|
::CoTaskMemFree(pszCLSID);
|
|
return hr;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::OnCreateRecoContext *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is called each time a new reco context is created in
|
|
* an application using this engine.
|
|
* This sample engine does not strictly need info about reco contexts
|
|
* but for reference we will a keep list of them.
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::OnCreateRecoContext(SPRECOCONTEXTHANDLE hSapiContext, void ** ppvDrvCtxt)
|
|
{
|
|
|
|
|
|
CContext * pContext = new CContext(hSapiContext);
|
|
|
|
// Store a reference to the CContext structure
|
|
*ppvDrvCtxt = pContext;
|
|
m_ContextList.InsertHead(pContext);
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::OnDeleteRecoContext *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is called each time a reco context is deleted.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::OnDeleteRecoContext(void * pvDrvCtxt)
|
|
{
|
|
|
|
|
|
CContext * pContext = (CContext *) pvDrvCtxt;
|
|
m_ContextList.Remove(pContext);
|
|
delete pContext;
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::OnCreateGrammar *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is called each time a new reco grammar is created in
|
|
* an application using this engine.
|
|
* We keep a list of grammars - storing a pointer to the list entry in ppvEngineGrammar.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::OnCreateGrammar(void * pvEngineRecoContext, SPGRAMMARHANDLE hSapiGrammar, void ** ppvEngineGrammar)
|
|
{
|
|
|
|
|
|
// Each grammar will be associated with a context
|
|
CContext * pContext = (CContext *) pvEngineRecoContext;
|
|
_ASSERT(m_ContextList.Find(pContext->m_hSapiContext));
|
|
|
|
// Keep a list of grammars
|
|
CDrvGrammar * pGrammar = new CDrvGrammar(hSapiGrammar);
|
|
// Store a reference to the CDrvGrammar structure
|
|
*ppvEngineGrammar = pGrammar;
|
|
m_GrammarList.InsertHead(pGrammar);
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::OnDeleteGrammar *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is called each time a reco grammar is deleted.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::OnDeleteGrammar(void * pvDrvGrammar)
|
|
{
|
|
|
|
|
|
CDrvGrammar * pGrammar = (CDrvGrammar *)pvDrvGrammar;
|
|
m_GrammarList.Remove(pGrammar);
|
|
delete pGrammar;
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::WordNotify *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is called by SAPI to inform the engine of the words in
|
|
* command & control (C&C) grammars. When words are added or removed (e.g. by
|
|
* the application loading or unloading grammars) this method is called.
|
|
* Here we examine the word text, see if it has an associated pronunciation,
|
|
* and see if there is a pronunciation in the lexicon.
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::WordNotify(SPCFGNOTIFY Action, ULONG cWords, const SPWORDENTRY * pWords)
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
ULONG i;
|
|
WCHAR *wordPron;
|
|
|
|
switch(Action){
|
|
case SPCFGN_ADD:
|
|
SPWORDENTRY WordEntry;
|
|
for(i = 0; SUCCEEDED(hr) && i < cWords; i++)
|
|
{
|
|
WordEntry = pWords[i];
|
|
hr = m_cpSite->GetWordInfo(&WordEntry, SPWIO_WANT_TEXT);
|
|
if(SUCCEEDED(hr) && WordEntry.aPhoneId)
|
|
{
|
|
// Word entry contains a specific pronounciation from CFG
|
|
// Engine should use this in recognition
|
|
size_t cWordPron = wcslen(WordEntry.aPhoneId) + 1;
|
|
wordPron = new WCHAR[cWordPron];
|
|
wcscpy_s(wordPron, cWordPron, WordEntry.aPhoneId);
|
|
::CoTaskMemFree((void*)WordEntry.aPhoneId);
|
|
}
|
|
else
|
|
{
|
|
// See if word is in lexicon
|
|
SPWORDPRONUNCIATIONLIST PronList;
|
|
PronList.pFirstWordPronunciation = 0;
|
|
PronList.pvBuffer = 0;
|
|
PronList.ulSize = 0;
|
|
hr = m_cpLexicon->GetPronunciations(WordEntry.pszLexicalForm, eLEXTYPE_APP | eLEXTYPE_USER, pWords[i].LangID, &PronList);
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
if(PronList.pFirstWordPronunciation != NULL)
|
|
{
|
|
// Pronounciation(s) found in SAPI lexicon
|
|
// Engines should use these prons in recognition and also language and POS info
|
|
// For sample just copy first pron
|
|
size_t cWordPron = wcslen(PronList.pFirstWordPronunciation->szPronunciation) + 1;
|
|
wordPron = new WCHAR[cWordPron];
|
|
wcscpy_s(wordPron, cWordPron, PronList.pFirstWordPronunciation->szPronunciation);
|
|
::CoTaskMemFree(PronList.pvBuffer);
|
|
}
|
|
else
|
|
{
|
|
// Word is in lexicon bu no pronunciation is present.
|
|
// Engine should generate its own pronounciation or fail
|
|
// Here we generate a default NULL pronunciation
|
|
wordPron = NULL;
|
|
}
|
|
}
|
|
else if(hr == SPERR_NOT_IN_LEX)
|
|
{
|
|
// Word is not present in SAPI lexicon.
|
|
// Engine should generate its own pronounciation or fail
|
|
// Here we generate a default NULL pronunciation
|
|
wordPron = NULL;
|
|
hr = S_OK;
|
|
}
|
|
else
|
|
{
|
|
break; // Unexpected error - break;
|
|
}
|
|
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
// Associate the pronunciation information with the SAPI word handle so it can be recovered later
|
|
// An engine can store any arbitrary pointer with each word.
|
|
hr = m_cpSite->SetWordClientContext(WordEntry.hWord, wordPron);
|
|
}
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
// When calling GetWordInfo SAPI allocates the strings which we must free
|
|
::CoTaskMemFree((void*)WordEntry.pszDisplayText);
|
|
::CoTaskMemFree((void*)WordEntry.pszLexicalForm);
|
|
}
|
|
}
|
|
break;
|
|
case SPCFGN_REMOVE:
|
|
for(i = 0; i < cWords; i++)
|
|
{
|
|
WordEntry = pWords[i];
|
|
// Client context already on word entry
|
|
wordPron = (WCHAR *) WordEntry.pvClientContext;
|
|
if(wordPron)
|
|
{
|
|
delete[] wordPron;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
return hr;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::RuleNotify *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is called by SAPI to inform the engine of the rules in
|
|
* command & control grammars. The order or actions of CFG grammars is of this form:
|
|
* WordNotify(SPCFGN_ADD) - add words
|
|
* RuleNotify(SPCFGN_ADD) - add rules
|
|
* RuleNotify(SPCFGN_ACTIVATE) - activate rules to indicate they are to be used for recognition
|
|
* RuleNotify(SPCFGN_INVALIDATE) - if a rule gets edited by the app then this is called
|
|
* RuleNotify(SPCFGN_DEACTIVE) - deactivate rules
|
|
* RuleNotify(SPCFGN_REMOVE) - remove rules
|
|
* WordNotify(SPCFGN_REMOVE) - remove words
|
|
*
|
|
* The engine can call GetRuleInfo to find the initial state in the rule, and
|
|
* then GetStateInfo to find the information about subsequent states and transitions in the rule.
|
|
* If a rule is edited then SPCFGN_INVALIDATE is called to indicate rule has changed so the engine
|
|
* must reparse the rule information.
|
|
*
|
|
* The engine can obtain all the information about the rule either before or during recognition.
|
|
* In this sample engine we just keep a list of rules initially and then wait
|
|
* until we want to generate a result and then find a random path through the rule.
|
|
*
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::RuleNotify(SPCFGNOTIFY Action, ULONG cRules, const SPRULEENTRY * pRules)
|
|
{
|
|
|
|
ULONG i;
|
|
CRuleEntry *pRuleEntry;
|
|
|
|
switch (Action)
|
|
{
|
|
case SPCFGN_ADD:
|
|
for (i = 0; i < cRules; i++)
|
|
{
|
|
// Obtain information on the rule and store in a CRuleEntry structure
|
|
pRuleEntry = new CRuleEntry;
|
|
pRuleEntry->m_hRule = pRules[i].hRule;
|
|
pRuleEntry->m_fTopLevel = (pRules[i].Attributes & SPRAF_TopLevel);
|
|
pRuleEntry->m_fActive = (pRules[i].Attributes & SPRAF_Active);
|
|
|
|
// Keep a list of rules
|
|
m_RuleList.InsertHead(pRuleEntry);
|
|
|
|
// Engine can store information associated with rule handle if desired
|
|
m_cpSite->SetRuleClientContext(pRules[i].hRule, (void *)pRuleEntry);
|
|
}
|
|
break;
|
|
case SPCFGN_REMOVE:
|
|
for (i = 0; i < cRules; i++)
|
|
{
|
|
pRuleEntry = m_RuleList.Find(pRules[i].hRule);
|
|
_ASSERT(pRuleEntry); // The rule must have been added before being removed
|
|
m_RuleList.Remove(pRuleEntry);
|
|
delete pRuleEntry;
|
|
}
|
|
break;
|
|
case SPCFGN_ACTIVATE:
|
|
for (i = 0; i < cRules; i++)
|
|
{
|
|
pRuleEntry = m_RuleList.Find(pRules[i].hRule);
|
|
// Only top-level rules can be activated
|
|
_ASSERT(pRuleEntry && !pRuleEntry->m_fActive && pRuleEntry->m_fTopLevel);
|
|
if (pRuleEntry != NULL)
|
|
{
|
|
pRuleEntry->m_fActive = TRUE;
|
|
}
|
|
}
|
|
break;
|
|
case SPCFGN_DEACTIVATE:
|
|
for (i = 0; i < cRules; i++)
|
|
{
|
|
pRuleEntry = m_RuleList.Find(pRules[i].hRule);
|
|
_ASSERT(pRuleEntry && pRuleEntry->m_fActive && pRuleEntry->m_fTopLevel);
|
|
if (pRuleEntry != NULL)
|
|
{
|
|
pRuleEntry->m_fActive = FALSE;
|
|
}
|
|
}
|
|
break;
|
|
case SPCFGN_INVALIDATE:
|
|
for (i = 0; i < cRules; i++)
|
|
{
|
|
pRuleEntry = m_RuleList.Find(pRules[i].hRule);
|
|
_ASSERT(pRuleEntry);
|
|
if (pRuleEntry != NULL)
|
|
{
|
|
pRuleEntry->m_fTopLevel = (pRules[i].Attributes & SPRAF_TopLevel);
|
|
pRuleEntry->m_fActive = (pRules[i].Attributes & SPRAF_Active);
|
|
// Don't need to do anything here as we don't start parsing the rule until recognition time
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::LoadSLM *
|
|
*---------------------------*
|
|
* Description:
|
|
* Called when SAPI wants the engine to load a dictaion language model (SLM).
|
|
* For each reco gramar one dictation as well as C&C rules can be loaded.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::LoadSLM(void * pvEngineGrammar, const WCHAR * pszTopicName)
|
|
{
|
|
|
|
|
|
if (pszTopicName)
|
|
{
|
|
// Engines should load the named language model - e.g. Spelling using
|
|
// this parameter. If NULL load the default dictation model.
|
|
// Since just a sample ignore this parameter.
|
|
}
|
|
|
|
// pvEngineGrammar is the pointer ppvEngineGrammar we set in OnCreateGrammar
|
|
// Use this to find out on which grammar the SLM is being asked for.
|
|
CDrvGrammar * pGrammar = (CDrvGrammar *)pvEngineGrammar;
|
|
pGrammar->m_SLMLoaded = TRUE;
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::UnloadSLM *
|
|
*---------------------------*
|
|
* Description:
|
|
* Called when SAPI wants the engine to delete an SLM.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::UnloadSLM(void *pvEngineGrammar)
|
|
{
|
|
|
|
|
|
CDrvGrammar * pGrammar = (CDrvGrammar *)pvEngineGrammar;
|
|
pGrammar->m_SLMLoaded = FALSE;
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetSLMState *
|
|
*---------------------------*
|
|
* Description:
|
|
* Called to activate or deactivate an SLM for recognition.
|
|
* NewState is either SPRS_ACTIVE or SPRS_INACTIVE.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
HRESULT CSrEngine::SetSLMState(void * pvDrvGrammar, SPRULESTATE NewState)
|
|
{
|
|
|
|
|
|
// pvDrvGrammar is the pointer ppvEngineGrammar we set in OnCreateGrammar
|
|
CDrvGrammar * pGrammar = (CDrvGrammar *)pvDrvGrammar;
|
|
if (NewState != SPRS_INACTIVE)
|
|
{
|
|
pGrammar->m_SLMActive = TRUE;
|
|
}
|
|
else
|
|
{
|
|
pGrammar->m_SLMActive = FALSE;
|
|
}
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetWordSequenceData *
|
|
*---------------------------*
|
|
* Description:
|
|
* If the app submits a text buffer to SAPI this method is called.
|
|
* The text buffer supplied here can either be used in CFGs with the text buffer transition,
|
|
* or in dictation to supply information to the engine about the prior text visible on screen.
|
|
* This sample engine is just using the text buffer with the text buffer transition.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetWordSequenceData(void *pvEngineGrammar, const WCHAR *pText, ULONG cchText, const SPTEXTSELECTIONINFO *pInfo)
|
|
{
|
|
|
|
|
|
//For each grammar object going to be released, SAPI would call SetWordSequenceData(NULL, 0, NULL).
|
|
|
|
// Recover the data we have associated with this grammar.
|
|
CDrvGrammar * pGrammar = (CDrvGrammar*)pvEngineGrammar;
|
|
|
|
// Delete previous grammar text buffer
|
|
if(pGrammar->m_pWordSequenceText)
|
|
{
|
|
delete pGrammar->m_pWordSequenceText;
|
|
}
|
|
|
|
// Make a copy of the text data
|
|
if(cchText)
|
|
{
|
|
pGrammar->m_pWordSequenceText = new WCHAR[cchText];
|
|
wmemcpy_s(pGrammar->m_pWordSequenceText, cchText, pText, cchText);
|
|
pGrammar->m_cchText = cchText;
|
|
}
|
|
else
|
|
{
|
|
pGrammar->m_pWordSequenceText = NULL;
|
|
pGrammar->m_cchText = NULL;
|
|
}
|
|
|
|
// Engines can use the SPTEXTSELECTIONINFO to determine which
|
|
// parts of the text buffer are visible on screen and / or selected.
|
|
// This sample engine is using the active text selection only.
|
|
SetTextSelection(pvEngineGrammar, pInfo);
|
|
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetTextSelection *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method tells engines if the SPTEXTSELECTIONINFO structure
|
|
* has been updated. This sample engine is using only fields ulStartActiveOffset and cchActiveChars of SPTEXTSELECTIONINFO.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetTextSelection(void * pvEngineGrammar, const SPTEXTSELECTIONINFO * pInfo)
|
|
{
|
|
|
|
|
|
// Recover the data we have associated with this grammar.
|
|
CDrvGrammar * pGrammar = (CDrvGrammar*)pvEngineGrammar;
|
|
|
|
if (pGrammar->m_pInfo)
|
|
{
|
|
delete pGrammar->m_pInfo;
|
|
}
|
|
|
|
if (pInfo)
|
|
{
|
|
pGrammar->m_pInfo = new SPTEXTSELECTIONINFO(*pInfo);
|
|
}
|
|
else
|
|
{
|
|
pGrammar->m_pInfo = NULL;
|
|
}
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::IsPronounceable *
|
|
*---------------------------*
|
|
* Description:
|
|
* Engines should return whether it has or will be able to
|
|
* generate a pronounciation for this word.
|
|
* In this sample engine, this is always true.
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::IsPronounceable(void * pDrvGrammar, const WCHAR * pszWord, SPWORDPRONOUNCEABLE * pWordPronounceable)
|
|
{
|
|
|
|
*pWordPronounceable = SPWP_KNOWN_WORD_PRONOUNCEABLE;
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetAdaptationData *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method can be used by the app to give text data to the engine
|
|
* for language model adaptation etc. This method can only be called
|
|
* by the app after if has received an SPEI_ADAPTATION event. Since
|
|
* we never fire that event this method should never be called.
|
|
|
|
* Return:
|
|
* E_UNEXPECTED
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetAdaptationData(void * pvEngineCtxtCookie, const WCHAR *pAdaptationData, const ULONG cch)
|
|
{
|
|
|
|
|
|
_ASSERT(0); // This method should never be called
|
|
return E_UNEXPECTED;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::AddEvent *
|
|
*---------------------------*
|
|
* Description:
|
|
* Internal helper method to send an event to SAPI.
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
HRESULT CSrEngine::AddEvent(SPEVENTENUM eEventId, ULONGLONG ullStreamPos, WPARAM wParam, LPARAM lParam)
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
SPEVENT Event;
|
|
Event.eEventId = eEventId;
|
|
Event.elParamType = SPET_LPARAM_IS_UNDEFINED;
|
|
Event.ulStreamNum = 0; // Always set this to zero - SAPI fills this in
|
|
Event.ullAudioStreamOffset = ullStreamPos;
|
|
Event.wParam = wParam;
|
|
Event.lParam = lParam;
|
|
|
|
hr = m_cpSite->AddEvent(&Event, NULL);
|
|
|
|
return hr;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::AddEventString *
|
|
*---------------------------*
|
|
* Description:
|
|
* Internal helper method to send an event with a string LParam to SAPI.
|
|
* Request UI is the only event in the sample to need this.
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
HRESULT CSrEngine::AddEventString(SPEVENTENUM eEventId, ULONGLONG ullStreamPos, const WCHAR * psz, WPARAM wParam)
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
SPEVENT Event;
|
|
Event.eEventId = eEventId;
|
|
Event.elParamType = SPET_LPARAM_IS_STRING;
|
|
Event.ulStreamNum = 0; // Always set this to zero - SAPI fills this in
|
|
Event.ullAudioStreamOffset = ullStreamPos;
|
|
Event.wParam = wParam;
|
|
Event.lParam = (LPARAM)psz;
|
|
|
|
hr = m_cpSite->AddEvent(&Event, NULL);
|
|
|
|
return hr;
|
|
}
|
|
|
|
|
|
#define BLOCKSIZE 220 // 1/100 of a second
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::RecognizeStream *
|
|
*---------------------------*
|
|
* Description:
|
|
* This is the method that SAPI calls for recognition to take place.
|
|
* Engines must only return from this method after they have read all the data
|
|
* and completed all the recognition they are going to do on this stream.
|
|
* Thus this method is giving a thread to the engine to do recognition on,
|
|
* and engines may create additional threads.
|
|
*
|
|
* In this sample we constantly read data using this thread, and then perform
|
|
* very basic speech detection and pass data to a recognizer thread, which
|
|
* generates hypotheses and results.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* - REFGUID rguidFormatId - this is the GUID of the input audio format
|
|
* - const WAVEFORMATEX * pWaveFormatEx - this is the extended wav format information of the audio format
|
|
* - HANDLE hRequestSync - this Win32 event is used to indicate that there are pending tasks
|
|
* and the engine should call Synchronize() for SAPI to process these.
|
|
* - HANDLE hDataAvailable - this Win32 event is used to tell the engine that data is available to be read.
|
|
* The frequency this is set can be controlled by the SetBufferNotifySize method.
|
|
* - HANDLE hExit - this Win32 event indicates the engine is being closed down and should exit immediately.
|
|
* - BOOL fNewAudioStream - this indicates this is a new input stream
|
|
* e.g. the app has done a new SetInput call rather than just restarting the previous stream.
|
|
* - BOOL fRealTimeAudio - this indicates the input is from a real-time ISpAudio stream, rather than, say, a file
|
|
* - ISpObjectToken * pAudioObjectToken - this is the object token representing the audio input device
|
|
* the engine may want to query this.
|
|
*
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::RecognizeStream(REFGUID rguidFormatId,
|
|
const WAVEFORMATEX * pWaveFormatEx,
|
|
HANDLE hRequestSync,
|
|
HANDLE hDataAvailable,
|
|
HANDLE hExit,
|
|
BOOL fNewAudioStream,
|
|
BOOL fRealTimeAudio,
|
|
ISpObjectToken * pAudioObjectToken)
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
m_hRequestSync = hRequestSync;
|
|
|
|
// Start the recognition thread
|
|
hr = m_cpDecoderThread->StartThread(0, NULL);
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
const HANDLE aWait[] = { hExit, m_hQueueHasRoom };
|
|
|
|
while (TRUE) // sit in this loop until there is no more data
|
|
{
|
|
// The Read method is used to read data. This will block until the required
|
|
// amount of data is available. If the stream has ended either a fail code
|
|
// will be returned or the amount read will be less than the amount asked for.
|
|
// To see how much data is available to be read without blocking the DataAvailable
|
|
// method can be used or hDataAvailable event.
|
|
BYTE aData[BLOCKSIZE];
|
|
ULONG cbRead;
|
|
hr = m_cpSite->Read(aData, sizeof(aData), &cbRead);
|
|
if (hr != S_OK || cbRead < sizeof(aData))
|
|
{
|
|
break;
|
|
}
|
|
|
|
// Decide if the frame of data is noise or silence with a simple level detector
|
|
BOOL bNoiseDetected = FALSE;
|
|
SHORT * pBuffer = (SHORT *)aData;
|
|
for (ULONG i = 0; i < cbRead; i += 2, pBuffer++)
|
|
{
|
|
if (*pBuffer < (SHORT)-3000 || *pBuffer > (SHORT)3000)
|
|
{
|
|
bNoiseDetected = TRUE;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// If there no space on the frame queue then wait
|
|
BOOL bBlock = m_FrameQueue.IsFull();
|
|
if(bBlock)
|
|
{
|
|
// Wait for space to appear on the queue to be passed to the recognizer.
|
|
// Real engines should not wait - the data reading should be as real-time as possible
|
|
// Also detect if the hExit event is set to indicate we should stop processing.
|
|
if (::WaitForMultipleObjects(sp_countof(aWait), aWait, FALSE, INFINITE) == WAIT_OBJECT_0)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Add the frame to the queue and notify the decoder thread
|
|
m_FrameQueue.InsertTail(bNoiseDetected);
|
|
m_cpDecoderThread->Notify();
|
|
|
|
}
|
|
|
|
// Once we've stopped reading data we must wait for the recognizer thread to finish
|
|
// All processing must be done before returning from the RecognizeStream method.
|
|
m_cpDecoderThread->WaitForThreadDone(TRUE, &hr, 30 * 1000);
|
|
}
|
|
|
|
m_hRequestSync = NULL;
|
|
|
|
return hr;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::ThreadProc *
|
|
*-----------------------*
|
|
* Description:
|
|
* This is the main thread for the recognition process.
|
|
* This hExitThreadEvent indicates this thread should complete,
|
|
* and the hNotifyEvent indicates the RecognizeStream thread is notifying
|
|
* that data is available for processing. The m_hRequestSync indicates
|
|
* SAPI is requesting the engine should call Synchronize.
|
|
*
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::ThreadProc(void *, HANDLE hExitThreadEvent, HANDLE hNotifyEvent, HWND hwndWorker, volatile const BOOL * pfContinueProcessing)
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
const HANDLE aWait[] = { hExitThreadEvent, hNotifyEvent, m_hRequestSync };
|
|
ULONG block = 0;
|
|
ULONG silenceafternoise = 0;
|
|
DWORD waitres;
|
|
|
|
m_bSoundStarted = FALSE;
|
|
m_bPhraseStarted = FALSE;
|
|
m_cBlahBlah = 0;
|
|
|
|
m_ullStart = 0;
|
|
m_ullEnd = 0;
|
|
while (*pfContinueProcessing) // sit in this loop until exit
|
|
{
|
|
ULONG cEvents = sp_countof(aWait);
|
|
// Respond to the request sync event only if speech not detected,
|
|
// as we don't want to allow grammar changes during recognition.
|
|
// However always respond after several seconds.
|
|
if(m_bPhraseStarted && (block - m_ullStart / BLOCKSIZE) < 5 * 100)
|
|
{
|
|
--cEvents;
|
|
}
|
|
waitres = ::WaitForMultipleObjects(cEvents, aWait, FALSE, INFINITE);
|
|
switch (waitres)
|
|
{
|
|
|
|
case WAIT_OBJECT_0: // Exit thread
|
|
break;
|
|
|
|
case WAIT_OBJECT_0 + 1: // Notify (data is available)
|
|
|
|
// Engines should regularly call UpdateRecoPos to indicate how far through the
|
|
// stream they have recognized.
|
|
m_cpSite->UpdateRecoPos((ULONGLONG)block * BLOCKSIZE);
|
|
|
|
if (m_ullStart == 0 && !m_bPhraseStarted)
|
|
{
|
|
// Engines should also call Synchronize to indicate they are ready
|
|
// to be notified about grammar changes and other tasks. Within Synchronize
|
|
// if grammars have changed WordNotify, RuleNotify etc. will be called before Synchronize returns.
|
|
// Here Synchronize is called only if processing silence, not speech
|
|
// to avoid having to deal with grammar changes during recognition.
|
|
|
|
m_cpSite->Synchronize((ULONGLONG)block * BLOCKSIZE);
|
|
// A return code of S_FALSE from synchronize means the engine can stop recognizing
|
|
// This engine ignores this.
|
|
}
|
|
while (m_FrameQueue.HasData())
|
|
{
|
|
BOOL bNoise = m_FrameQueue.RemoveHead();
|
|
block++; // Update the position in stream
|
|
if (bNoise)
|
|
{
|
|
// Found some speech - update start and end positions
|
|
silenceafternoise = 0;
|
|
if (m_ullStart == 0)
|
|
{
|
|
m_ullStart = (ULONGLONG)block * BLOCKSIZE;
|
|
}
|
|
m_ullEnd = (ULONGLONG)block * BLOCKSIZE;
|
|
_CheckRecognition(); // this will generate hypotheses and events
|
|
}
|
|
else
|
|
{
|
|
// Found some silence
|
|
silenceafternoise++;
|
|
if (silenceafternoise > 50)
|
|
{
|
|
if (m_bSoundStarted)
|
|
{
|
|
// We've heard 1/2 sec of silence since the last noise, so send the
|
|
// final recognition if we had previously started a phrase
|
|
if (m_bPhraseStarted)
|
|
{
|
|
_NotifyRecognition(FALSE, m_cBlahBlah);
|
|
}
|
|
AddEvent(SPEI_SOUND_END, m_ullEnd); // send the sound end event
|
|
m_bSoundStarted = FALSE;
|
|
m_bPhraseStarted = FALSE;
|
|
}
|
|
m_ullStart = 0;
|
|
m_ullEnd = 0;
|
|
}
|
|
}
|
|
|
|
//--- Generate random interference start at 20 seconds into the stream
|
|
// and saying it's gone at 22 seconds
|
|
if (block % (100 * 30) == 20 * 100)
|
|
{
|
|
const SPINTERFERENCE rgspi[] =
|
|
{ SPINTERFERENCE_NOISE, SPINTERFERENCE_NOSIGNAL, SPINTERFERENCE_TOOLOUD, SPINTERFERENCE_TOOQUIET };
|
|
|
|
AddEvent(SPEI_INTERFERENCE, block*BLOCKSIZE, 0, rgspi[rand() % 4]);
|
|
}
|
|
else if (block % (100 * 30) == 22 * 100)
|
|
{
|
|
AddEvent(SPEI_INTERFERENCE, block*BLOCKSIZE, 0, SPINTERFERENCE_NONE);
|
|
}
|
|
// Ask for UI at 10 seconds into the stream
|
|
// and cancel request one second later
|
|
else if (block == 10 * 100)
|
|
{
|
|
AddEventString(SPEI_REQUEST_UI, block * BLOCKSIZE, SPDUI_UserTraining);
|
|
}
|
|
else if (block == 11 * 100) // Cancle the UI request at 11 seconds into the stream
|
|
{
|
|
AddEventString(SPEI_REQUEST_UI, block * BLOCKSIZE, NULL);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case WAIT_OBJECT_0 + 2:
|
|
// SAPI has explicitly requested we call Synchronize
|
|
m_cpSite->Synchronize((ULONGLONG)block * BLOCKSIZE);
|
|
// Once synchronize is called the engine cannot fire events prior to that stream position
|
|
// so update the stored stream start position
|
|
m_ullStart = block * BLOCKSIZE;
|
|
if(m_ullEnd < m_ullStart)
|
|
{
|
|
m_ullEnd = m_ullStart;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
_ASSERT(FALSE);
|
|
// Something strange!
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Before exiting we must make sure every phrase start has been paired with a recognition,
|
|
// and that every sound start event has a corresponding sound end.
|
|
if (m_bPhraseStarted)
|
|
{
|
|
_NotifyRecognition(FALSE, m_cBlahBlah);
|
|
}
|
|
if (m_bSoundStarted)
|
|
{
|
|
AddEvent(SPEI_SOUND_END, m_ullEnd);
|
|
}
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::_CheckRecognition *
|
|
*---------------------------*
|
|
* Description:
|
|
* This internal method decides when to fire sound start events,
|
|
* phrase start events and hypotheses.
|
|
*****************************************************************************/
|
|
void CSrEngine::_CheckRecognition()
|
|
{
|
|
|
|
ULONG duration, blahs;
|
|
|
|
if (m_ullEnd > m_ullStart)
|
|
{
|
|
duration = (ULONG)(m_ullEnd - m_ullStart);
|
|
if (duration >= BLOCKSIZE * 100 * 1 / 8)
|
|
{
|
|
if (!m_bSoundStarted)
|
|
{
|
|
// Heard something that was longer than 1/8th second, so do sound start
|
|
AddEvent(SPEI_SOUND_START, m_ullStart);
|
|
m_bSoundStarted = TRUE;
|
|
m_cBlahBlah = 0;
|
|
}
|
|
if (duration >= BLOCKSIZE * 100 * 1 / 4)
|
|
{
|
|
// Heard something that was longer than 1/4 second, so do phrase start
|
|
// and then generate a hypothesis every 1/4 second after that.
|
|
blahs = duration / (BLOCKSIZE * 100 * 1 / 4);
|
|
if (blahs != m_cBlahBlah)
|
|
{
|
|
m_cBlahBlah = blahs;
|
|
if (!m_bPhraseStarted)
|
|
{
|
|
m_bPhraseStarted = TRUE;
|
|
AddEvent(SPEI_PHRASE_START, m_ullStart);
|
|
}
|
|
_NotifyRecognition(TRUE, blahs);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::_NotifyRecognition *
|
|
*-------------------------------*
|
|
* Description:
|
|
* This internal method is used to generate a recognition result. The contents
|
|
* are random since we don't actually know how to recognize anything.*
|
|
****************************************************************************/
|
|
void CSrEngine::_NotifyRecognition( BOOL fHypothesis, ULONG nWords )
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
// First count the active CFG rules
|
|
ULONG cActiveCFGRules = 0;
|
|
CRuleEntry * pRule = m_RuleList.GetHead();
|
|
for(; pRule; pRule = m_RuleList.GetNext(pRule))
|
|
{
|
|
if( pRule->m_fActive )
|
|
{
|
|
cActiveCFGRules++;
|
|
}
|
|
}
|
|
|
|
// Then count all the grammars with active dictation
|
|
ULONG cActiveSLM = 0;
|
|
CDrvGrammar * pGram = m_GrammarList.GetHead();
|
|
for(; pGram; pGram = m_GrammarList.GetNext(pGram))
|
|
{
|
|
if(pGram->m_SLMActive)
|
|
{
|
|
cActiveSLM++;
|
|
}
|
|
}
|
|
|
|
// If both CFG and dictation are active, randomly do one or the other
|
|
if(cActiveCFGRules && cActiveSLM)
|
|
{
|
|
if(rand() % 2)
|
|
{
|
|
cActiveSLM = 0;
|
|
}
|
|
else
|
|
{
|
|
cActiveCFGRules = 0;
|
|
}
|
|
}
|
|
|
|
//--- Compose reco result info
|
|
SPRECORESULTINFO Result;
|
|
memset(&Result, 0, sizeof(SPRECORESULTINFO));
|
|
Result.cbSize = sizeof(SPRECORESULTINFO);
|
|
Result.fHypothesis = fHypothesis;
|
|
Result.ullStreamPosStart = m_ullStart;
|
|
Result.ullStreamPosEnd = m_ullEnd;
|
|
|
|
if( cActiveCFGRules )
|
|
{
|
|
// Generate a CFG result phrase
|
|
hr = WalkCFGRule(&Result, cActiveCFGRules, fHypothesis, nWords, m_ullStart, (ULONG)(m_ullEnd - m_ullStart));
|
|
if( SUCCEEDED(hr) )
|
|
{
|
|
// Pass the results info to SAPI
|
|
m_cpSite->Recognition(&Result);
|
|
// Cleanup any memory allocated for alternates and release phrases
|
|
for(ULONG i = 0; i < Result.ulNumAlts; i++)
|
|
{
|
|
Result.aPhraseAlts[i].pPhrase->Release();
|
|
}
|
|
Result.pPhrase->Release();
|
|
delete[] Result.aPhraseAlts;
|
|
}
|
|
}
|
|
else if(cActiveSLM)
|
|
{
|
|
// Generate a dictation result phrase
|
|
hr = WalkSLM(&Result, cActiveSLM, nWords, m_ullStart, (ULONG)(m_ullEnd - m_ullStart));
|
|
if( SUCCEEDED(hr) )
|
|
{
|
|
// Pass the results info to SAPI
|
|
m_cpSite->Recognition(&Result);
|
|
// Release the result phrase
|
|
Result.pPhrase->Release();
|
|
delete[] Result.pvEngineData;
|
|
}
|
|
}
|
|
else if(!fHypothesis)
|
|
{
|
|
// No rules were active - return a false recognition
|
|
// RecognizeStream can still be called if no rules are active
|
|
// - the engine is free to do anything it wants with the speech data.
|
|
Result.eResultType = SPRT_FALSE_RECOGNITION;
|
|
m_cpSite->Recognition(&Result);
|
|
}
|
|
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::CreatePhraseFromRule *
|
|
*---------------------------------*
|
|
* Description:
|
|
* This method is used to produce the result phrase for a CFG result.
|
|
* It selects a random path through an active rule, and
|
|
* then calls ParseFromTransition to generate an ISpPhraseBuilder object.
|
|
*
|
|
* Return:
|
|
* S_OK
|
|
* FAIL(hr)
|
|
****************************************************************************/
|
|
HRESULT CSrEngine::CreatePhraseFromRule( CRuleEntry * pRule, BOOL fHypothesis,
|
|
ULONGLONG ullAudioPos, ULONG ulAudioSize,
|
|
ISpPhraseBuilder** ppPhrase )
|
|
{
|
|
HRESULT hr = S_OK;
|
|
SPRULEENTRY RuleInfo;
|
|
RuleInfo.hRule = pRule->m_hRule;
|
|
hr = m_cpSite->GetRuleInfo(&RuleInfo, SPRIO_NONE);
|
|
if( SUCCEEDED(hr) )
|
|
{
|
|
// Limit of 200 transitions in grammar!
|
|
const ULONG MAXPATH = 200;
|
|
SPPATHENTRY Path[MAXPATH];
|
|
ULONG cTrans;
|
|
// Recursively generate random path
|
|
hr = RecurseWalk(RuleInfo.hInitialState, Path, &cTrans);
|
|
|
|
//Fill in the audio offset and audio size for each element in the path, while each element has equal size and silence in between
|
|
if (cTrans)
|
|
{
|
|
ULONG ulInterval = ulAudioSize/cTrans;
|
|
for (ULONG ul = 0; ul < cTrans && ul < MAXPATH; ul++)
|
|
{
|
|
Path[ul].elem.ulAudioStreamOffset = ul * ulInterval;
|
|
Path[ul].elem.ulAudioSizeBytes = ulInterval/2;
|
|
}
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
// generate a SPPARSEINFO structure
|
|
SPPARSEINFO ParseInfo;
|
|
memset(&ParseInfo, 0, sizeof(ParseInfo));
|
|
ParseInfo.cbSize = sizeof(SPPARSEINFO);
|
|
ParseInfo.hRule = pRule->m_hRule;
|
|
ParseInfo.ullAudioStreamPosition = ullAudioPos;
|
|
ParseInfo.ulAudioSize = ulAudioSize;
|
|
ParseInfo.cTransitions = cTrans;
|
|
ParseInfo.pPath = Path;
|
|
ParseInfo.fHypothesis = fHypothesis;
|
|
ParseInfo.SREngineID = CLSID_SampleSREngine;
|
|
ParseInfo.ulSREnginePrivateDataSize = 0;
|
|
ParseInfo.pSREnginePrivateData = NULL;
|
|
|
|
// Generate a phrase object from the parse info.
|
|
hr = m_cpSite->ParseFromTransitions(&ParseInfo, ppPhrase );
|
|
if(SUCCEEDED(hr))
|
|
{
|
|
// delete any allocated memory
|
|
for(ULONG i = 0; i < cTrans; i++)
|
|
{
|
|
if(Path[i].elem.pszDisplayText)
|
|
{
|
|
delete const_cast<WCHAR*>(Path[i].elem.pszDisplayText);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return hr;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::FindRule *
|
|
*---------------------*
|
|
* Description:
|
|
* This method is used to locate an active rule in the rule list by index
|
|
*
|
|
****************************************************************************/
|
|
CRuleEntry* CSrEngine::FindRule( ULONG ulRuleIndex )
|
|
{
|
|
CRuleEntry * pRule = m_RuleList.GetHead();
|
|
ULONG ulRule = 0;
|
|
while( pRule )
|
|
{
|
|
if( pRule->m_fActive && ( ulRule++ == ulRuleIndex ) )
|
|
{
|
|
break;
|
|
}
|
|
pRule = m_RuleList.GetNext( pRule );
|
|
}
|
|
_ASSERT(pRule && pRule->m_fActive);
|
|
return pRule;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::NextRuleAlt *
|
|
*------------------------*
|
|
* Description:
|
|
* This method is used to locate a rule alternate in the rule list
|
|
*
|
|
****************************************************************************/
|
|
CRuleEntry* CSrEngine::NextRuleAlt( CRuleEntry * pPriRule, CRuleEntry * pLastRule )
|
|
{
|
|
CRuleEntry * pRule = (pLastRule)?(pLastRule):(m_RuleList.GetHead());
|
|
for(; pRule; pRule = m_RuleList.GetNext(pRule))
|
|
{
|
|
if( pRule->m_fActive &&
|
|
( m_cpSite->IsAlternate( pPriRule->m_hRule, pRule->m_hRule ) == S_OK ) )
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
return pRule;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::WalkCFGRule *
|
|
*------------------------*
|
|
* Description:
|
|
* This method is used to produce the results information for a CFG result.
|
|
* It creates a result phrase and then some alternates phrases.
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
****************************************************************************/
|
|
HRESULT CSrEngine::WalkCFGRule( SPRECORESULTINFO * pResult, ULONG cRulesActive, BOOL fHypothesis,
|
|
ULONG nWords, ULONGLONG ullAudioPos, ULONG ulAudioSize)
|
|
{
|
|
HRESULT hr = E_FAIL;
|
|
CRuleEntry * pPriRule = NULL;
|
|
pResult->ulSizeEngineData = 0;
|
|
pResult->pvEngineData = NULL;
|
|
pResult->eResultType = SPRT_CFG;
|
|
pResult->hGrammar = NULL;
|
|
|
|
while (hr == E_FAIL)
|
|
{
|
|
// Randomly pick a rule and locate it in the list
|
|
pPriRule = FindRule( rand() % cRulesActive );
|
|
|
|
// Create a phrase from the rule
|
|
hr = CreatePhraseFromRule( pPriRule, fHypothesis, ullAudioPos,
|
|
ulAudioSize, &pResult->pPhrase );
|
|
// E_FAIL means the random path generated came to a dead end. Most likely an empty
|
|
// dynamic rule. We are not allowed to recognize through empty rules.
|
|
// Hence we choose another random toplevel rule and try again until we succeed.
|
|
}
|
|
|
|
if (hr != S_OK)
|
|
{
|
|
_ASSERT(FALSE);
|
|
}
|
|
|
|
// Get the phrase info
|
|
SPPHRASE* pPriPhraseInfo = NULL;
|
|
if( SUCCEEDED( hr ) )
|
|
{
|
|
hr = pResult->pPhrase->GetPhrase( &pPriPhraseInfo );
|
|
}
|
|
|
|
// Ask the site how many alternates to generate
|
|
ULONG ulNumAlts = 0;
|
|
if( SUCCEEDED( hr ) )
|
|
{
|
|
hr = m_cpSite->GetMaxAlternates( pPriRule->m_hRule, &ulNumAlts );
|
|
}
|
|
|
|
// Randomly create some alternates
|
|
if( SUCCEEDED( hr ) && ulNumAlts )
|
|
{
|
|
pResult->aPhraseAlts = new SPPHRASEALT[ulNumAlts];
|
|
if( pResult->aPhraseAlts )
|
|
{
|
|
memset( pResult->aPhraseAlts, 0, ulNumAlts * sizeof(SPPHRASEALT) );
|
|
CRuleEntry * pAltRule = NULL;
|
|
|
|
for( ULONG i = 0; SUCCEEDED( hr ) && (i < ulNumAlts); ++i )
|
|
{
|
|
// Try to find an alternate rule
|
|
pAltRule = NextRuleAlt( pPriRule, pAltRule );
|
|
if( !pAltRule )
|
|
{
|
|
break;
|
|
}
|
|
|
|
// Create an alternate phrase from the rule
|
|
hr = CreatePhraseFromRule( pAltRule, fHypothesis, ullAudioPos,
|
|
ulAudioSize, &pResult->aPhraseAlts[i].pPhrase );
|
|
|
|
// Get the alternate phrase info
|
|
SPPHRASE* pAltPhraseInfo = NULL;
|
|
if( SUCCEEDED( hr ) )
|
|
{
|
|
hr = pResult->aPhraseAlts[i].pPhrase->GetPhrase( &pAltPhraseInfo );
|
|
}
|
|
|
|
if( SUCCEEDED( hr ) )
|
|
{
|
|
++pResult->ulNumAlts;
|
|
|
|
// Fill out relationship info
|
|
pResult->aPhraseAlts[i].cElementsInParent = pPriPhraseInfo->Rule.ulCountOfElements;
|
|
pResult->aPhraseAlts[i].cElementsInAlternate = pAltPhraseInfo->Rule.ulCountOfElements;
|
|
|
|
// Point to some extra data for testing
|
|
static BYTE AltData[] = { 0xED, 0xED, 0xED, 0xED };
|
|
pResult->aPhraseAlts[i].pvAltExtra = &AltData;
|
|
pResult->aPhraseAlts[i].cbAltExtra = sp_countof( AltData );
|
|
}
|
|
|
|
if( pAltPhraseInfo )
|
|
{
|
|
::CoTaskMemFree( pAltPhraseInfo );
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
E_OUTOFMEMORY;
|
|
}
|
|
}
|
|
|
|
// Cleanup main phrase information
|
|
if( pPriPhraseInfo )
|
|
{
|
|
::CoTaskMemFree( pPriPhraseInfo );
|
|
}
|
|
|
|
// Cleanup on failure
|
|
if( FAILED( hr ) )
|
|
{
|
|
if( pResult->pPhrase )
|
|
{
|
|
pResult->pPhrase->Release();
|
|
pResult->pPhrase = NULL;
|
|
}
|
|
|
|
for( ULONG i = 0; i < pResult->ulNumAlts; ++i )
|
|
{
|
|
pResult->aPhraseAlts[i].pPhrase->Release();
|
|
pResult->aPhraseAlts[i].pPhrase = NULL;
|
|
}
|
|
pResult->ulNumAlts = 0;
|
|
delete[] pResult->aPhraseAlts;
|
|
pResult->aPhraseAlts = NULL;
|
|
}
|
|
|
|
return hr;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::WalkSLM *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is used to produce the results information for a dictation result.
|
|
* It creates a result phrase, and then serializes some alternates information
|
|
* that the CSrEngineAlternates object uses to generate alternates.
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
HRESULT CSrEngine::WalkSLM(SPRECORESULTINFO * pResult, ULONG cSLMActive,
|
|
ULONG nWords, ULONGLONG ullAudioPos, ULONG ulAudioSize)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
|
|
// If several dictation grammars are active pick one at random
|
|
ULONG ulGramIndex = rand() % cSLMActive;
|
|
CDrvGrammar * pGram = m_GrammarList.GetHead();
|
|
ULONG nGram = 0;
|
|
for(; pGram; pGram = m_GrammarList.GetNext(pGram))
|
|
{
|
|
if(pGram->m_SLMActive)
|
|
{
|
|
if(nGram == ulGramIndex)
|
|
{
|
|
break;
|
|
}
|
|
nGram++;
|
|
}
|
|
}
|
|
_ASSERT(pGram && pGram->m_SLMActive);
|
|
|
|
if( pGram == NULL )
|
|
{
|
|
return E_FAIL;
|
|
}
|
|
|
|
// create and fill SPPHRASE structure
|
|
SPPHRASE phrase;
|
|
memset(&phrase, 0, sizeof(SPPHRASE));
|
|
phrase.cbSize = sizeof(SPPHRASE);
|
|
phrase.LangID = m_LangID;
|
|
phrase.ullAudioStreamPosition = ullAudioPos;
|
|
phrase.ulAudioSizeBytes = ulAudioSize;
|
|
phrase.SREngineID = CLSID_SampleSREngine;
|
|
|
|
// allocate elements
|
|
ULONG cb = nWords * sizeof(SPPHRASEELEMENT);
|
|
SPPHRASEELEMENT* pElements = (SPPHRASEELEMENT *)_malloca(cb);
|
|
memset(pElements, 0, cb);
|
|
|
|
// fill in word info into elements
|
|
for (ULONG n = 0; n < nWords; n++)
|
|
{
|
|
ULONG ulInterval = ulAudioSize/nWords;
|
|
|
|
pElements[n].bDisplayAttributes = SPAF_ONE_TRAILING_SPACE;
|
|
pElements[n].pszDisplayText = DICT_WORD;
|
|
pElements[n].ulAudioStreamOffset = n * ulInterval;
|
|
pElements[n].ulAudioSizeBytes = ulInterval/2;
|
|
|
|
}
|
|
|
|
// add elements to phrase
|
|
phrase.Rule.ulCountOfElements = nWords;
|
|
phrase.pElements = pElements;
|
|
|
|
// make phrase builder and add phrase info
|
|
CComPtr<ISpPhraseBuilder> cpBuilder;
|
|
hr = cpBuilder.CoCreateInstance(CLSID_SpPhraseBuilder);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = cpBuilder->InitFromPhrase(&phrase);
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
// Store alternates string in extra engine data
|
|
// We just serialize an equal number of copies of ALT_WORD as DICT_WORD.
|
|
// Engines can store any information (e.g. serialized lattice) here.
|
|
pResult->ulSizeEngineData = sizeof(ALT_WORD) * nWords;
|
|
pResult->pvEngineData = new WCHAR[(sizeof(ALT_WORD) / sizeof(WCHAR )) * nWords];
|
|
if (pResult->pvEngineData == NULL)
|
|
{
|
|
hr = E_OUTOFMEMORY;
|
|
}
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
WCHAR *pC = (WCHAR *)pResult->pvEngineData;
|
|
size_t cRemainingSize = (size_t)pResult->ulSizeEngineData / sizeof(WCHAR);
|
|
for(ULONG i = 0; i < nWords; i++)
|
|
{
|
|
wcscpy_s(pC, cRemainingSize, ALT_WORD);
|
|
size_t cAltWord = wcslen(ALT_WORD);
|
|
pC += cAltWord;
|
|
*pC = ' ';
|
|
pC++;
|
|
cRemainingSize -= cAltWord + 1;
|
|
}
|
|
*(--pC) = '\0';
|
|
|
|
pResult->eResultType = SPRT_SLM;
|
|
pResult->hGrammar = pGram->m_hSapiGrammar;
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
pResult->pPhrase = cpBuilder.Detach();
|
|
}
|
|
else
|
|
{
|
|
delete[] pResult->pvEngineData;
|
|
pResult->pvEngineData = NULL;
|
|
pResult->ulSizeEngineData = 0;
|
|
}
|
|
|
|
_freea(pElements);
|
|
|
|
return hr;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::WalkTextBuffer *
|
|
*---------------------------*
|
|
* Description:
|
|
* This is called when RecurseWalk hits a text-buffer transition.
|
|
* At such a transition the engine should try to recognize any sequence
|
|
* of words from the active text selection the app has supplied.
|
|
* Any word can be the start word and then recognition can continue to
|
|
* the next \0 end of sentence marker or at the end of the active text selection.
|
|
* In this sample we pick a random string of words from the buffer.
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
HRESULT CSrEngine::WalkTextBuffer(void* pvGrammarCookie, SPPATHENTRY * pPath, SPTRANSITIONID hId, ULONG * pcTrans)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
// Check if text buffer present
|
|
CDrvGrammar * pGrammar = (CDrvGrammar *) pvGrammarCookie;
|
|
_ASSERT(pGrammar->m_pWordSequenceText && pGrammar->m_cchText >= 2); //m_cchText counts the trailing "/0/0"
|
|
if (!pGrammar->m_pWordSequenceText || pGrammar->m_cchText < 2)
|
|
{
|
|
return E_UNEXPECTED;
|
|
}
|
|
|
|
*pcTrans = 0;
|
|
|
|
// Count sentences
|
|
ULONG nPhrase = 0;
|
|
const WCHAR *cPhrase;
|
|
|
|
ULONG ulStartActiveOffset = 0; //The default value with text selection
|
|
ULONG cchActiveChars = pGrammar->m_cchText - 2; //The default value with text selection
|
|
ULONG ccChars = 0;
|
|
|
|
if (pGrammar->m_pInfo)
|
|
{
|
|
ulStartActiveOffset = pGrammar->m_pInfo->ulStartActiveOffset;
|
|
cchActiveChars = pGrammar->m_pInfo->cchActiveChars;
|
|
}
|
|
|
|
for (cPhrase = pGrammar->m_pWordSequenceText + ulStartActiveOffset;
|
|
ccChars < cchActiveChars && (*cPhrase != L'\0' || *(cPhrase + 1) != '\0');
|
|
ccChars++, cPhrase++)
|
|
{
|
|
if(*cPhrase != L'\0' && *(cPhrase + 1) == L'\0')
|
|
{
|
|
nPhrase++;
|
|
}
|
|
}
|
|
|
|
if (nPhrase == 0)
|
|
{
|
|
return E_FAIL;
|
|
}
|
|
|
|
// Randomly pick a sentence between the first active sentence and the last active sentence
|
|
nPhrase = rand() % nPhrase; //nPhrase would be 0 index
|
|
ULONG nP = 0;
|
|
for(cPhrase = pGrammar->m_pWordSequenceText + ulStartActiveOffset; nP != nPhrase; cPhrase++)
|
|
{
|
|
if(*cPhrase == L'\0')
|
|
{
|
|
nP++;
|
|
}
|
|
}
|
|
|
|
// Count words in sentence
|
|
ULONG nWord = 1;
|
|
const WCHAR *cWord;
|
|
for(cWord = cPhrase; *cWord != L'\0' && ULONG(cWord - pGrammar->m_pWordSequenceText) < ulStartActiveOffset + cchActiveChars; cWord++)
|
|
{
|
|
if(iswspace(*cWord))
|
|
{
|
|
while(*(cWord+1) && iswspace(*(cWord+1)) && ULONG(cWord - pGrammar->m_pWordSequenceText) < ulStartActiveOffset + cchActiveChars - 1) {cWord++;}
|
|
nWord++;
|
|
}
|
|
}
|
|
|
|
// Pick entry and exit word
|
|
ULONG startWord = rand() % nWord;
|
|
ULONG countWord = rand() % (nWord - startWord) + 1;
|
|
|
|
// Find the entry word
|
|
for(nWord = 0, cWord = cPhrase; nWord != startWord; cWord++)
|
|
{
|
|
if(iswspace(*cWord))
|
|
{
|
|
while(*(cWord+1) && iswspace(*(cWord+1)) && ULONG(cWord - pGrammar->m_pWordSequenceText) < ulStartActiveOffset + cchActiveChars - 1) {cWord++;}
|
|
nWord++;
|
|
}
|
|
}
|
|
|
|
// Build paths to end word
|
|
const WCHAR *cW = cWord;
|
|
for(nWord = 0; nWord != countWord; cWord++)
|
|
{
|
|
if(*cWord == L'\0' || iswspace(*cWord) || ULONG(cWord - pGrammar->m_pWordSequenceText) == ulStartActiveOffset + cchActiveChars)
|
|
{
|
|
// Build a path entry
|
|
pPath->hTransition = hId;
|
|
memset(&pPath->elem, 0, sizeof(pPath->elem));
|
|
pPath->elem.bDisplayAttributes = SPAF_ONE_TRAILING_SPACE;
|
|
WCHAR *pszWord = new WCHAR[cWord - cW + 1];
|
|
wcsncpy_s(pszWord, cWord - cW + 1, cW, cWord - cW);
|
|
pszWord[cWord - cW] = '\0';
|
|
pPath->elem.pszDisplayText = pszWord;
|
|
|
|
pPath++;
|
|
(*pcTrans)++;
|
|
|
|
while(*(cWord+1) && iswspace(*(cWord+1)) && ULONG(cWord - pGrammar->m_pWordSequenceText) < ulStartActiveOffset + cchActiveChars - 1) {cWord++;}
|
|
cW = cWord + 1; // first char of next word
|
|
nWord++;
|
|
}
|
|
}
|
|
|
|
return hr;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::RecurseWalk *
|
|
*----------------------*
|
|
* Description:
|
|
* This method produces a random path through an active CFG. If the path contains
|
|
* a rule reference transition RecurseWalk is recursively called to produce a path though
|
|
* sub-rules. The result is an array of SPPATHENTRY elements containing the transitions.
|
|
*
|
|
* The initial state in each rule is obtained by calling GetRuleInfo. Then for each
|
|
* state GetStateInfo can be called. This gives an array of SPTRANSITION entries
|
|
* the contain information on the type of transition, the transition id and
|
|
* the next state the transition goes to. The transition id is the main information
|
|
* included in the SPPATHENTRY. Only for word transitions are SPPATHENTRY created,
|
|
* as this is all that is required by ParseFromTransitions.
|
|
*
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
****************************************************************************/
|
|
HRESULT CSrEngine::RecurseWalk(SPSTATEHANDLE hState, SPPATHENTRY * pPath, ULONG * pcTrans)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
|
|
CSpStateInfo StateInfo;
|
|
*pcTrans = 0;
|
|
while (SUCCEEDED(hr) && hState)
|
|
{
|
|
ULONG cTrans;
|
|
hr = m_cpSite->GetStateInfo(hState, &StateInfo);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
// Now randomly decide which transition to take.
|
|
ULONG cTransInState = StateInfo.cEpsilons + StateInfo.cWords + StateInfo.cRules + StateInfo.cSpecialTransitions;
|
|
if (cTransInState == 0)
|
|
{
|
|
// This path is a dead-end. Most likely this is due to an empty dynamic rule.
|
|
hr = E_FAIL;
|
|
break;
|
|
}
|
|
SPTRANSITIONENTRY * pTransEntry = StateInfo.pTransitions + (rand() % cTransInState);
|
|
|
|
switch(pTransEntry->Type)
|
|
{
|
|
case SPTRANSEPSILON:
|
|
// Epsilon transition - don't need to create a path entry
|
|
// Advance to the next state.
|
|
break;
|
|
case SPTRANSRULE:
|
|
// Rule transition - we recursively descend into the rule and add onto the path array
|
|
hr = RecurseWalk(pTransEntry->hRuleInitialState, pPath, &cTrans);
|
|
*pcTrans += cTrans;
|
|
pPath += cTrans;
|
|
break;
|
|
case SPTRANSWORD:
|
|
case SPTRANSWILDCARD:
|
|
// For a word transition we complete an SPPATHENTRY structure with the transition id.
|
|
// A wildcard transition indicates the engine should match against any speech, so we do the same thing.
|
|
pPath->hTransition = pTransEntry->ID;
|
|
memset(&pPath->elem, 0, sizeof(pPath->elem));
|
|
pPath->elem.bDisplayAttributes = SPAF_ONE_TRAILING_SPACE;
|
|
pPath++;
|
|
(*pcTrans)++;
|
|
break;
|
|
case SPTRANSTEXTBUF:
|
|
// Text Buffer transition - produce a path from WalkTextBuffer
|
|
hr = WalkTextBuffer(pTransEntry->pvGrammarCookie, pPath, pTransEntry->ID, &cTrans);
|
|
*pcTrans += cTrans;
|
|
pPath += cTrans;
|
|
break;
|
|
case SPTRANSDICTATION:
|
|
// Dictation transition - indicating the recognizer should do dictation at
|
|
// this point in the grammar. We generate the DICT_WORD as a path entry.
|
|
// The word text is indicated by setting pszDisplayText, which otherwise can be left NULL.
|
|
pPath->hTransition = pTransEntry->ID;
|
|
memset(&pPath->elem, 0, sizeof(pPath->elem));
|
|
pPath->elem.bDisplayAttributes = SPAF_ONE_TRAILING_SPACE;
|
|
size_t cDictWord = wcslen(DICT_WORD);
|
|
WCHAR *pszWord = new WCHAR[cDictWord + 1];
|
|
wcscpy_s(pszWord, cDictWord + 1, DICT_WORD);
|
|
pPath->elem.pszDisplayText = pszWord;
|
|
pPath++;
|
|
(*pcTrans)++;
|
|
break;
|
|
}
|
|
|
|
// Move to the next state - a transition to NULL indicates the end of the rule.
|
|
hState = pTransEntry->hNextState;
|
|
}
|
|
}
|
|
return hr;
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::GetInputAudioFormat *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is called for SAPI to find what audio formats the engine
|
|
* can support for recognition.
|
|
* Audio formats a represented by a GUID and an optional WAVEFORMATEX
|
|
* structure for wav file formats. This method may be called
|
|
* either with pSourceFormatId and pSourceWaveFormatEx NULL, in which
|
|
* case the engine should return its most preferred format. It may also
|
|
* be called with these set, in which case the engine should return
|
|
* that format if it can support or the closest it can.
|
|
*
|
|
* If the engines returned format is incompatible with the input audio
|
|
* object format SAPI will try and create a format convertor to convert the audio
|
|
*
|
|
* Return:
|
|
* S_OK
|
|
* FAILED(hr)
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::GetInputAudioFormat(const GUID * pSourceFormatId, const WAVEFORMATEX * pSourceWaveFormatEx,
|
|
GUID * pDesiredFormatId, WAVEFORMATEX ** ppCoMemDesiredWFEX)
|
|
{
|
|
|
|
|
|
// Helper function which fills in the sample engine desired format (PCM 11kHz Mono).
|
|
return SpConvertStreamFormatEnum(SPSF_11kHz16BitMono, pDesiredFormatId, ppCoMemDesiredWFEX);
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
* CSrEngine::SetPropertyNum *
|
|
*---------------------*
|
|
* Description:
|
|
* The following methods are used for the app to send and receive
|
|
* real-time attribute information to the engine. This differs from the
|
|
* ISpObjectWithToken mechanism which is used to query static registry information.
|
|
* If the app tries to set or query an attribute the engine should return
|
|
* okay if it supports that attribute and S_FALSE if it does not.
|
|
* In this method the app is trying to set a numeric property value.
|
|
* Return:
|
|
* S_FALSE
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::
|
|
SetPropertyNum( SPPROPSRC eSrc, PVOID pvSrcObj, const WCHAR* pName, LONG lValue )
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
hr = S_FALSE; // We don't support any properties
|
|
|
|
return hr;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* CSrEngine::GetPropertyNum *
|
|
*---------------------*
|
|
* Description:
|
|
* In this method the app is trying to get a numeric property value.
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::
|
|
GetPropertyNum( SPPROPSRC eSrc, PVOID pvSrcObj, const WCHAR* pName, LONG * plValue )
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
hr = S_FALSE;
|
|
*plValue = 0;
|
|
|
|
return hr;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* CSrEngine::SetPropertyString *
|
|
*----------------------*
|
|
* Description:
|
|
* In this method the app is trying to set a string property value.
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::
|
|
SetPropertyString( SPPROPSRC eSrc, PVOID pvSrcObj, const WCHAR* pName, const WCHAR* pValue )
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
hr = S_FALSE;
|
|
|
|
return hr;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* CSrEngine::GetPropertyString *
|
|
*----------------------*
|
|
* Description:
|
|
* In this method the app is trying to get a string property value.
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::
|
|
GetPropertyString( SPPROPSRC eSrc, PVOID pvSrcObj, const WCHAR* pName, __deref_out_opt WCHAR** ppCoMemValue )
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
hr = S_FALSE;
|
|
*ppCoMemValue = NULL;
|
|
|
|
return hr;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::PrivateCall *
|
|
*---------------------------*
|
|
* Description:
|
|
* This method is used for a reco extension to send private, engine-specific information
|
|
* to an engine it knows about. The app QI's off the reco context for a private
|
|
* interface, and SAPI creates the engine' extension object (in this case CSampleSRExtension).
|
|
* The app can then call methods on this interface. If the extension object wants to
|
|
* communicate with the engine it calls CallEngine. The data from that call is then
|
|
* passed to this method on the engine.
|
|
*
|
|
* Similarly the alternates class and UI class can also use this mechanism to
|
|
* communicate with their main engine class.
|
|
*
|
|
* The single buffer is in/out. This call can update the buffer, and the updated buffer
|
|
* will be returned to the caller of _ISpPrivateEngineCall::CallEngine(). To return
|
|
* variable size buffers, use _ISpPrivateEngineCall::CallEngineEx() and
|
|
* ISpSREngine::PrivateCallEx().
|
|
*
|
|
* Return:
|
|
* S_OK
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::PrivateCall(void * pvEngineContext, void * pCallFrame, ULONG ulCallFrameSize)
|
|
{
|
|
|
|
// Just an example - do nothing here
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::PrivateCallEx *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method is similar to PrivateCall except that the call frame is an
|
|
* input only parameter. This function should CoTaskMemAlloc a response block
|
|
* and return it in *ppvCoMemResponse. You must also return the size of the
|
|
* allocated block (in bytes) in *pcbResponse.
|
|
*
|
|
* While this is slightly more work than CallEngine/PrivateCall, it allows for
|
|
* variable size responses, which could be more efficent in some cases.
|
|
*
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::PrivateCallEx(void * pvEngineContext, const void * pInCallFrame, ULONG ulCallFrameSize,
|
|
void ** ppvCoMemResponse, ULONG * pcbResponse)
|
|
{
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
*ppvCoMemResponse = NULL;
|
|
*pcbResponse = 0;
|
|
|
|
return hr;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::PrivateCallImmediate *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method is supposed to do the same thing as the PrivateCallEx
|
|
* faster but this sample engine does nothing.
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::PrivateCallImmediate(
|
|
void *pvEngineContext,
|
|
const void *pInCallFrame,
|
|
ULONG ulInCallFrameSize,
|
|
void **ppvCoMemResponse,
|
|
ULONG *pulResponseSize)
|
|
{
|
|
*ppvCoMemResponse = NULL;
|
|
*pulResponseSize = 0;
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetAdaptationData2 *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method is called when an application calls
|
|
* ISpRecoContext2::SetAdaptationData2.
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetAdaptationData2(
|
|
void *pvEngineContext,
|
|
__in_ecount(cch) const WCHAR *pAdaptationData,
|
|
const ULONG cch,
|
|
LPCWSTR pTopicName,
|
|
SPADAPTATIONSETTINGS eSettings,
|
|
SPADAPTATIONRELEVANCE eRelevance)
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetGrammarPrefix *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method is used to specify a prefix word that the recognizer
|
|
* should recognize before any paths in the grammar. This sample engine
|
|
* ignores it.
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetGrammarPrefix(
|
|
void *pvEngineGrammar,
|
|
__in_opt LPCWSTR pszPrefix,
|
|
BOOL fIsPrefixRequired)
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetRulePriority *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method sets the priority of a particular rule. Rules with higher
|
|
* priority have precedence. This sample engine ignores this.
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetRulePriority(
|
|
SPRULEHANDLE hRule,
|
|
void *pvClientRuleContext,
|
|
int nRulePriority)
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::EmulateRecognition *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method is supposed to support emulation by the recognition
|
|
* engine and bypass any inconsistencies that may exist between a
|
|
* particular engine's recognition logic and SAPI's emulation. This
|
|
* sample engine simply returns E_NOTIMPL to let SAPI do its own
|
|
* emulation.
|
|
* Returns:
|
|
* E_NOTIMPL
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::EmulateRecognition(
|
|
ISpPhrase *pPhrase,
|
|
DWORD dwCompareFlags)
|
|
{
|
|
// Let SAPI do its own emulation.
|
|
return E_NOTIMPL;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetSLMWeight *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method sets the weight of the dictation grammar versus that of
|
|
* CFGs. This sample engine ignores this.
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetSLMWeight(
|
|
void *pvEngineGrammar,
|
|
float flWeight)
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetRuleWeight *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method sets the weight of a rule in a CFG. This sample engine
|
|
* ignores this.
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetRuleWeight(
|
|
SPRULEHANDLE hRule,
|
|
void *pvClientRuleContext,
|
|
float flWeight)
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::SetTrainingState *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method puts the engine into training mode. This sample engine
|
|
* ignores this.
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::SetTrainingState(
|
|
BOOL fDoingTraining,
|
|
BOOL fAdaptFromTrainingData)
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::ResetAcousticModelAdaptation *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method resets the profile back to its original state. This
|
|
* sample engine ignores this.
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::ResetAcousticModelAdaptation( void)
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::OnLoadCFG *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method passes the binary CFG data to the engine so that the
|
|
* engine can consume it directly rather than call SAPI to retrieve
|
|
* information from the grammar. This sample engine ignores this.
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::OnLoadCFG(
|
|
void *pvEngineGrammar,
|
|
const SPBINARYGRAMMAR *pGrammarData,
|
|
ULONG ulGrammarID)
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
/****************************************************************************
|
|
* CSrEngine::OnUnloadCFG *
|
|
*--------------------------*
|
|
* Description:
|
|
* This method is the reverse of the OnLoadCFG function and is called by
|
|
* SAPI when the grammar is unloaded. This sample engine ignores this.
|
|
* Returns:
|
|
* S_OK
|
|
*
|
|
*****************************************************************************/
|
|
STDMETHODIMP CSrEngine::OnUnloadCFG(
|
|
void *pvEngineGrammar,
|
|
ULONG ulGrammarID)
|
|
{
|
|
return S_OK;
|
|
}
|