// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A // PARTICULAR PURPOSE. // // Copyright © Microsoft Corporation. All rights reserved /****************************************************************************** * srengobj.cpp * This file contains the implementation of the CSrEngine class. * This implements ISpSREngine, ISpSREngine2 and ISpObjectWithToken. * This is the main recognition object ******************************************************************************/ #include "stdafx.h" #include "SampleSrEngine.h" #include "srengobj.h" #include "SpHelper.h" #ifndef _WIN32_WCE #include "shlobj.h" #endif static const WCHAR DICT_WORD[] = L"Blah"; // This is the default word the sample engine uses for dictation static const WCHAR ALT_WORD[] = L"Alt"; // This is the default word used for alternates /**************************************************************************** * CSrEngine::FinalConstruct * *---------------------------* * Description: * The ATL FinalConstruct method. Called after the standard C++ constructor * Return: * S_OK * FAILED(hr) *****************************************************************************/ HRESULT CSrEngine::FinalConstruct() { HRESULT hr = S_OK; // This event is used to indicate when space is available for data to be given to the recognition thread m_hQueueHasRoom = ::CreateEvent(NULL, TRUE, TRUE, NULL); m_FrameQueue.SetSpaceAvailEvent(m_hQueueHasRoom); // Create a thread control which will be used for the recognition thread CComPtr cpTaskMgr; hr = cpTaskMgr.CoCreateInstance(CLSID_SpResourceManager); if (SUCCEEDED(hr)) { hr = cpTaskMgr->CreateThreadControl(this, this, THREAD_PRIORITY_NORMAL, &m_cpDecoderThread); } if(SUCCEEDED(hr)) { // Create the SAPI lexicon which holds user and app pronounciations hr = m_cpLexicon.CoCreateInstance(CLSID_SpLexicon); } return hr; } /**************************************************************************** * CSrEngine::FinalRelease * *---------------------------* * Description: * The ATL FinalRelease method. Clean up any resources not automatically destructed. * Return: * S_OK *****************************************************************************/ HRESULT CSrEngine::FinalRelease() { ::CloseHandle(m_hQueueHasRoom); return S_OK; } /**************************************************************************** * CSrEngine::SetObjectToken * *---------------------------* * Description: * This method is called by SAPI immediately after the engine is created. * It can be used to get registry information specific to this engine. * The engine can recover from the token file paths stored there during installation. * The engine also can recover user set defaults for accuracy, rejection etc. * Also could have different engines sharing the same code base (CLSID) but having different registry info * e.g. if engine supports different languages * Return: * S_OK * FAILED(hr) *****************************************************************************/ STDMETHODIMP CSrEngine::SetObjectToken(ISpObjectToken * pToken) { HRESULT hr = S_OK; // Helper function that copies the token reference. hr = SpGenericSetObjectToken(pToken, m_cpEngineObjectToken); if(FAILED(hr)) { return hr; } // Read attribute information from the token in the registry CComPtr cpAttribKey; hr = pToken->OpenKey(L"Attributes", &cpAttribKey); if(SUCCEEDED(hr)) { WCHAR *psz = NULL; hr = cpAttribKey->GetStringValue(L"Desktop", &psz); ::CoTaskMemFree(psz); if(SUCCEEDED(hr)) { // This instance of the engine is for doing desktop recognition } else if(hr == SPERR_NOT_FOUND) { hr = cpAttribKey->GetStringValue(L"Telephony", &psz); ::CoTaskMemFree(psz); if(SUCCEEDED(hr)) { // This instance of the engine is for doing telephony recognition } } } // Read what language is set in the registry if(SUCCEEDED(hr)) { WCHAR *pszLangID = NULL; hr = cpAttribKey->GetStringValue(L"Language", &pszLangID); if(SUCCEEDED(hr)) { // We could use this language id in recognition. m_LangID = (unsigned short)wcstol(pszLangID, NULL, 16); ::CoTaskMemFree(pszLangID); } else { // Default language (US English) m_LangID = 0x409; } } // Read data-file location WCHAR *pszPath = NULL; // Looks for the file path stored in the registry hr = pToken->GetStorageFileName(CLSID_SampleSREngine, L"SampleEngDataFile", NULL, 0, &pszPath); // Could now load engine data-files from this given path ::CoTaskMemFree(pszPath); return hr; } /**************************************************************************** * CSrEngine::GetObjectToken * *---------------------------* * Description: * This method is called if SAPI wants to find which object token this engine is using * Return: * S_OK * FAILED(hr) *****************************************************************************/ STDMETHODIMP CSrEngine::GetObjectToken(ISpObjectToken ** ppToken) { // Generic helper function return SpGenericGetObjectToken(ppToken, m_cpEngineObjectToken); } /**************************************************************************** * CSrEngine::SetSite * *---------------------------* * Description: * This is called to give the engine a reference to the ISpSREngineSite. * The engine uses this to call back to SAPI. * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::SetSite(ISpSREngineSite *pSite) { m_cpSite = pSite; return S_OK; } /**************************************************************************** * CSrEngine::SetRecoProfile * *---------------------------* * Description: * The RecoProfile is an object token holding information on the current * user and enrollment session. The engine can store whatever information here * it likes. It should store this in a key under the RecoProfile key named with the engine class id. * Return: * S_OK * FAILED(hr) *****************************************************************************/ STDMETHODIMP CSrEngine::SetRecoProfile(ISpObjectToken *pProfile) { // First find if our engine already has info in this profile HRESULT hr = S_OK; WCHAR *pszCLSID, *pszPath = NULL; CComPtr dataKey; m_cpUserObjectToken = pProfile; hr = ::StringFromCLSID(CLSID_SampleSREngine, &pszCLSID); if(FAILED(hr)) { return hr; } hr = pProfile->OpenKey(pszCLSID, &dataKey); if(hr == SPERR_NOT_FOUND) { // We haven't seen this user profile before, so create a new registry key to hold info for it hr = pProfile->CreateKey(pszCLSID, &dataKey); // Now we can set some default values if(SUCCEEDED(hr)) { hr = dataKey->SetStringValue(L"GENDER", L"UNKNOWN"); } if(SUCCEEDED(hr)) { hr = dataKey->SetStringValue(L"AGE", L"UNKNOWN"); } // Now we can create some temporary file storage (e.g. for trained models) if(SUCCEEDED(hr)) { pProfile->GetStorageFileName(CLSID_SampleSREngine, L"SampleEngTrainingFile", NULL, CSIDL_FLAG_CREATE | CSIDL_LOCAL_APPDATA, &pszPath); } // Now we request a UI for user training (or properties - SPDUI_RecoProfileProperties) // The engine cannot directly create UI it must request it. hr = AddEventString(SPEI_REQUEST_UI, 0, SPDUI_UserTraining); } else if(SUCCEEDED(hr)) { // We've already seen this profile so read values WCHAR *pszGender = NULL, *pszAge = NULL; hr = dataKey->GetStringValue(L"GENDER", &pszGender); if(SUCCEEDED(hr)) { hr = dataKey->GetStringValue(L"AGE", &pszAge); } // Now we could read training file if(SUCCEEDED(hr)) { hr = pProfile->GetStorageFileName(CLSID_SampleSREngine, L"SampleEngTrainingFile", NULL, 0, &pszPath); } ::CoTaskMemFree(pszGender); ::CoTaskMemFree(pszAge); } ::CoTaskMemFree(pszPath); ::CoTaskMemFree(pszCLSID); return hr; } /**************************************************************************** * CSrEngine::OnCreateRecoContext * *---------------------------* * Description: * This method is called each time a new reco context is created in * an application using this engine. * This sample engine does not strictly need info about reco contexts * but for reference we will a keep list of them. * Return: * S_OK * FAILED(hr) *****************************************************************************/ STDMETHODIMP CSrEngine::OnCreateRecoContext(SPRECOCONTEXTHANDLE hSapiContext, void ** ppvDrvCtxt) { CContext * pContext = new CContext(hSapiContext); // Store a reference to the CContext structure *ppvDrvCtxt = pContext; m_ContextList.InsertHead(pContext); return S_OK; } /**************************************************************************** * CSrEngine::OnDeleteRecoContext * *---------------------------* * Description: * This method is called each time a reco context is deleted. * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::OnDeleteRecoContext(void * pvDrvCtxt) { CContext * pContext = (CContext *) pvDrvCtxt; m_ContextList.Remove(pContext); delete pContext; return S_OK; } /**************************************************************************** * CSrEngine::OnCreateGrammar * *---------------------------* * Description: * This method is called each time a new reco grammar is created in * an application using this engine. * We keep a list of grammars - storing a pointer to the list entry in ppvEngineGrammar. * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::OnCreateGrammar(void * pvEngineRecoContext, SPGRAMMARHANDLE hSapiGrammar, void ** ppvEngineGrammar) { // Each grammar will be associated with a context CContext * pContext = (CContext *) pvEngineRecoContext; _ASSERT(m_ContextList.Find(pContext->m_hSapiContext)); // Keep a list of grammars CDrvGrammar * pGrammar = new CDrvGrammar(hSapiGrammar); // Store a reference to the CDrvGrammar structure *ppvEngineGrammar = pGrammar; m_GrammarList.InsertHead(pGrammar); return S_OK; } /**************************************************************************** * CSrEngine::OnDeleteGrammar * *---------------------------* * Description: * This method is called each time a reco grammar is deleted. * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::OnDeleteGrammar(void * pvDrvGrammar) { CDrvGrammar * pGrammar = (CDrvGrammar *)pvDrvGrammar; m_GrammarList.Remove(pGrammar); delete pGrammar; return S_OK; } /**************************************************************************** * CSrEngine::WordNotify * *---------------------------* * Description: * This method is called by SAPI to inform the engine of the words in * command & control (C&C) grammars. When words are added or removed (e.g. by * the application loading or unloading grammars) this method is called. * Here we examine the word text, see if it has an associated pronunciation, * and see if there is a pronunciation in the lexicon. * Return: * S_OK * FAILED(hr) *****************************************************************************/ STDMETHODIMP CSrEngine::WordNotify(SPCFGNOTIFY Action, ULONG cWords, const SPWORDENTRY * pWords) { HRESULT hr = S_OK; ULONG i; WCHAR *wordPron; switch(Action){ case SPCFGN_ADD: SPWORDENTRY WordEntry; for(i = 0; SUCCEEDED(hr) && i < cWords; i++) { WordEntry = pWords[i]; hr = m_cpSite->GetWordInfo(&WordEntry, SPWIO_WANT_TEXT); if(SUCCEEDED(hr) && WordEntry.aPhoneId) { // Word entry contains a specific pronounciation from CFG // Engine should use this in recognition size_t cWordPron = wcslen(WordEntry.aPhoneId) + 1; wordPron = new WCHAR[cWordPron]; wcscpy_s(wordPron, cWordPron, WordEntry.aPhoneId); ::CoTaskMemFree((void*)WordEntry.aPhoneId); } else { // See if word is in lexicon SPWORDPRONUNCIATIONLIST PronList; PronList.pFirstWordPronunciation = 0; PronList.pvBuffer = 0; PronList.ulSize = 0; hr = m_cpLexicon->GetPronunciations(WordEntry.pszLexicalForm, eLEXTYPE_APP | eLEXTYPE_USER, pWords[i].LangID, &PronList); if(SUCCEEDED(hr)) { if(PronList.pFirstWordPronunciation != NULL) { // Pronounciation(s) found in SAPI lexicon // Engines should use these prons in recognition and also language and POS info // For sample just copy first pron size_t cWordPron = wcslen(PronList.pFirstWordPronunciation->szPronunciation) + 1; wordPron = new WCHAR[cWordPron]; wcscpy_s(wordPron, cWordPron, PronList.pFirstWordPronunciation->szPronunciation); ::CoTaskMemFree(PronList.pvBuffer); } else { // Word is in lexicon bu no pronunciation is present. // Engine should generate its own pronounciation or fail // Here we generate a default NULL pronunciation wordPron = NULL; } } else if(hr == SPERR_NOT_IN_LEX) { // Word is not present in SAPI lexicon. // Engine should generate its own pronounciation or fail // Here we generate a default NULL pronunciation wordPron = NULL; hr = S_OK; } else { break; // Unexpected error - break; } if(SUCCEEDED(hr)) { // Associate the pronunciation information with the SAPI word handle so it can be recovered later // An engine can store any arbitrary pointer with each word. hr = m_cpSite->SetWordClientContext(WordEntry.hWord, wordPron); } } if (SUCCEEDED(hr)) { // When calling GetWordInfo SAPI allocates the strings which we must free ::CoTaskMemFree((void*)WordEntry.pszDisplayText); ::CoTaskMemFree((void*)WordEntry.pszLexicalForm); } } break; case SPCFGN_REMOVE: for(i = 0; i < cWords; i++) { WordEntry = pWords[i]; // Client context already on word entry wordPron = (WCHAR *) WordEntry.pvClientContext; if(wordPron) { delete[] wordPron; } } break; } return hr; } /**************************************************************************** * CSrEngine::RuleNotify * *---------------------------* * Description: * This method is called by SAPI to inform the engine of the rules in * command & control grammars. The order or actions of CFG grammars is of this form: * WordNotify(SPCFGN_ADD) - add words * RuleNotify(SPCFGN_ADD) - add rules * RuleNotify(SPCFGN_ACTIVATE) - activate rules to indicate they are to be used for recognition * RuleNotify(SPCFGN_INVALIDATE) - if a rule gets edited by the app then this is called * RuleNotify(SPCFGN_DEACTIVE) - deactivate rules * RuleNotify(SPCFGN_REMOVE) - remove rules * WordNotify(SPCFGN_REMOVE) - remove words * * The engine can call GetRuleInfo to find the initial state in the rule, and * then GetStateInfo to find the information about subsequent states and transitions in the rule. * If a rule is edited then SPCFGN_INVALIDATE is called to indicate rule has changed so the engine * must reparse the rule information. * * The engine can obtain all the information about the rule either before or during recognition. * In this sample engine we just keep a list of rules initially and then wait * until we want to generate a result and then find a random path through the rule. * * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::RuleNotify(SPCFGNOTIFY Action, ULONG cRules, const SPRULEENTRY * pRules) { ULONG i; CRuleEntry *pRuleEntry; switch (Action) { case SPCFGN_ADD: for (i = 0; i < cRules; i++) { // Obtain information on the rule and store in a CRuleEntry structure pRuleEntry = new CRuleEntry; pRuleEntry->m_hRule = pRules[i].hRule; pRuleEntry->m_fTopLevel = (pRules[i].Attributes & SPRAF_TopLevel); pRuleEntry->m_fActive = (pRules[i].Attributes & SPRAF_Active); // Keep a list of rules m_RuleList.InsertHead(pRuleEntry); // Engine can store information associated with rule handle if desired m_cpSite->SetRuleClientContext(pRules[i].hRule, (void *)pRuleEntry); } break; case SPCFGN_REMOVE: for (i = 0; i < cRules; i++) { pRuleEntry = m_RuleList.Find(pRules[i].hRule); _ASSERT(pRuleEntry); // The rule must have been added before being removed m_RuleList.Remove(pRuleEntry); delete pRuleEntry; } break; case SPCFGN_ACTIVATE: for (i = 0; i < cRules; i++) { pRuleEntry = m_RuleList.Find(pRules[i].hRule); // Only top-level rules can be activated _ASSERT(pRuleEntry && !pRuleEntry->m_fActive && pRuleEntry->m_fTopLevel); if (pRuleEntry != NULL) { pRuleEntry->m_fActive = TRUE; } } break; case SPCFGN_DEACTIVATE: for (i = 0; i < cRules; i++) { pRuleEntry = m_RuleList.Find(pRules[i].hRule); _ASSERT(pRuleEntry && pRuleEntry->m_fActive && pRuleEntry->m_fTopLevel); if (pRuleEntry != NULL) { pRuleEntry->m_fActive = FALSE; } } break; case SPCFGN_INVALIDATE: for (i = 0; i < cRules; i++) { pRuleEntry = m_RuleList.Find(pRules[i].hRule); _ASSERT(pRuleEntry); if (pRuleEntry != NULL) { pRuleEntry->m_fTopLevel = (pRules[i].Attributes & SPRAF_TopLevel); pRuleEntry->m_fActive = (pRules[i].Attributes & SPRAF_Active); // Don't need to do anything here as we don't start parsing the rule until recognition time } } break; } return S_OK; } /**************************************************************************** * CSrEngine::LoadSLM * *---------------------------* * Description: * Called when SAPI wants the engine to load a dictaion language model (SLM). * For each reco gramar one dictation as well as C&C rules can be loaded. * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::LoadSLM(void * pvEngineGrammar, const WCHAR * pszTopicName) { if (pszTopicName) { // Engines should load the named language model - e.g. Spelling using // this parameter. If NULL load the default dictation model. // Since just a sample ignore this parameter. } // pvEngineGrammar is the pointer ppvEngineGrammar we set in OnCreateGrammar // Use this to find out on which grammar the SLM is being asked for. CDrvGrammar * pGrammar = (CDrvGrammar *)pvEngineGrammar; pGrammar->m_SLMLoaded = TRUE; return S_OK; } /**************************************************************************** * CSrEngine::UnloadSLM * *---------------------------* * Description: * Called when SAPI wants the engine to delete an SLM. * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::UnloadSLM(void *pvEngineGrammar) { CDrvGrammar * pGrammar = (CDrvGrammar *)pvEngineGrammar; pGrammar->m_SLMLoaded = FALSE; return S_OK; } /**************************************************************************** * CSrEngine::SetSLMState * *---------------------------* * Description: * Called to activate or deactivate an SLM for recognition. * NewState is either SPRS_ACTIVE or SPRS_INACTIVE. * Return: * S_OK *****************************************************************************/ HRESULT CSrEngine::SetSLMState(void * pvDrvGrammar, SPRULESTATE NewState) { // pvDrvGrammar is the pointer ppvEngineGrammar we set in OnCreateGrammar CDrvGrammar * pGrammar = (CDrvGrammar *)pvDrvGrammar; if (NewState != SPRS_INACTIVE) { pGrammar->m_SLMActive = TRUE; } else { pGrammar->m_SLMActive = FALSE; } return S_OK; } /**************************************************************************** * CSrEngine::SetWordSequenceData * *---------------------------* * Description: * If the app submits a text buffer to SAPI this method is called. * The text buffer supplied here can either be used in CFGs with the text buffer transition, * or in dictation to supply information to the engine about the prior text visible on screen. * This sample engine is just using the text buffer with the text buffer transition. * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::SetWordSequenceData(void *pvEngineGrammar, const WCHAR *pText, ULONG cchText, const SPTEXTSELECTIONINFO *pInfo) { //For each grammar object going to be released, SAPI would call SetWordSequenceData(NULL, 0, NULL). // Recover the data we have associated with this grammar. CDrvGrammar * pGrammar = (CDrvGrammar*)pvEngineGrammar; // Delete previous grammar text buffer if(pGrammar->m_pWordSequenceText) { delete pGrammar->m_pWordSequenceText; } // Make a copy of the text data if(cchText) { pGrammar->m_pWordSequenceText = new WCHAR[cchText]; wmemcpy_s(pGrammar->m_pWordSequenceText, cchText, pText, cchText); pGrammar->m_cchText = cchText; } else { pGrammar->m_pWordSequenceText = NULL; pGrammar->m_cchText = NULL; } // Engines can use the SPTEXTSELECTIONINFO to determine which // parts of the text buffer are visible on screen and / or selected. // This sample engine is using the active text selection only. SetTextSelection(pvEngineGrammar, pInfo); return S_OK; } /**************************************************************************** * CSrEngine::SetTextSelection * *---------------------------* * Description: * This method tells engines if the SPTEXTSELECTIONINFO structure * has been updated. This sample engine is using only fields ulStartActiveOffset and cchActiveChars of SPTEXTSELECTIONINFO. * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::SetTextSelection(void * pvEngineGrammar, const SPTEXTSELECTIONINFO * pInfo) { // Recover the data we have associated with this grammar. CDrvGrammar * pGrammar = (CDrvGrammar*)pvEngineGrammar; if (pGrammar->m_pInfo) { delete pGrammar->m_pInfo; } if (pInfo) { pGrammar->m_pInfo = new SPTEXTSELECTIONINFO(*pInfo); } else { pGrammar->m_pInfo = NULL; } return S_OK; } /**************************************************************************** * CSrEngine::IsPronounceable * *---------------------------* * Description: * Engines should return whether it has or will be able to * generate a pronounciation for this word. * In this sample engine, this is always true. * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::IsPronounceable(void * pDrvGrammar, const WCHAR * pszWord, SPWORDPRONOUNCEABLE * pWordPronounceable) { *pWordPronounceable = SPWP_KNOWN_WORD_PRONOUNCEABLE; return S_OK; } /**************************************************************************** * CSrEngine::SetAdaptationData * *---------------------------* * Description: * This method can be used by the app to give text data to the engine * for language model adaptation etc. This method can only be called * by the app after if has received an SPEI_ADAPTATION event. Since * we never fire that event this method should never be called. * Return: * E_UNEXPECTED *****************************************************************************/ STDMETHODIMP CSrEngine::SetAdaptationData(void * pvEngineCtxtCookie, const WCHAR *pAdaptationData, const ULONG cch) { _ASSERT(0); // This method should never be called return E_UNEXPECTED; } /**************************************************************************** * CSrEngine::AddEvent * *---------------------------* * Description: * Internal helper method to send an event to SAPI. * Return: * S_OK * FAILED(hr) *****************************************************************************/ HRESULT CSrEngine::AddEvent(SPEVENTENUM eEventId, ULONGLONG ullStreamPos, WPARAM wParam, LPARAM lParam) { HRESULT hr = S_OK; SPEVENT Event; Event.eEventId = eEventId; Event.elParamType = SPET_LPARAM_IS_UNDEFINED; Event.ulStreamNum = 0; // Always set this to zero - SAPI fills this in Event.ullAudioStreamOffset = ullStreamPos; Event.wParam = wParam; Event.lParam = lParam; hr = m_cpSite->AddEvent(&Event, NULL); return hr; } /**************************************************************************** * CSrEngine::AddEventString * *---------------------------* * Description: * Internal helper method to send an event with a string LParam to SAPI. * Request UI is the only event in the sample to need this. * Return: * S_OK * FAILED(hr) *****************************************************************************/ HRESULT CSrEngine::AddEventString(SPEVENTENUM eEventId, ULONGLONG ullStreamPos, const WCHAR * psz, WPARAM wParam) { HRESULT hr = S_OK; SPEVENT Event; Event.eEventId = eEventId; Event.elParamType = SPET_LPARAM_IS_STRING; Event.ulStreamNum = 0; // Always set this to zero - SAPI fills this in Event.ullAudioStreamOffset = ullStreamPos; Event.wParam = wParam; Event.lParam = (LPARAM)psz; hr = m_cpSite->AddEvent(&Event, NULL); return hr; } #define BLOCKSIZE 220 // 1/100 of a second /**************************************************************************** * CSrEngine::RecognizeStream * *---------------------------* * Description: * This is the method that SAPI calls for recognition to take place. * Engines must only return from this method after they have read all the data * and completed all the recognition they are going to do on this stream. * Thus this method is giving a thread to the engine to do recognition on, * and engines may create additional threads. * * In this sample we constantly read data using this thread, and then perform * very basic speech detection and pass data to a recognizer thread, which * generates hypotheses and results. * * Parameters: * * - REFGUID rguidFormatId - this is the GUID of the input audio format * - const WAVEFORMATEX * pWaveFormatEx - this is the extended wav format information of the audio format * - HANDLE hRequestSync - this Win32 event is used to indicate that there are pending tasks * and the engine should call Synchronize() for SAPI to process these. * - HANDLE hDataAvailable - this Win32 event is used to tell the engine that data is available to be read. * The frequency this is set can be controlled by the SetBufferNotifySize method. * - HANDLE hExit - this Win32 event indicates the engine is being closed down and should exit immediately. * - BOOL fNewAudioStream - this indicates this is a new input stream * e.g. the app has done a new SetInput call rather than just restarting the previous stream. * - BOOL fRealTimeAudio - this indicates the input is from a real-time ISpAudio stream, rather than, say, a file * - ISpObjectToken * pAudioObjectToken - this is the object token representing the audio input device * the engine may want to query this. * * Return: * S_OK * FAILED(hr) *****************************************************************************/ STDMETHODIMP CSrEngine::RecognizeStream(REFGUID rguidFormatId, const WAVEFORMATEX * pWaveFormatEx, HANDLE hRequestSync, HANDLE hDataAvailable, HANDLE hExit, BOOL fNewAudioStream, BOOL fRealTimeAudio, ISpObjectToken * pAudioObjectToken) { HRESULT hr = S_OK; m_hRequestSync = hRequestSync; // Start the recognition thread hr = m_cpDecoderThread->StartThread(0, NULL); if (SUCCEEDED(hr)) { const HANDLE aWait[] = { hExit, m_hQueueHasRoom }; while (TRUE) // sit in this loop until there is no more data { // The Read method is used to read data. This will block until the required // amount of data is available. If the stream has ended either a fail code // will be returned or the amount read will be less than the amount asked for. // To see how much data is available to be read without blocking the DataAvailable // method can be used or hDataAvailable event. BYTE aData[BLOCKSIZE]; ULONG cbRead; hr = m_cpSite->Read(aData, sizeof(aData), &cbRead); if (hr != S_OK || cbRead < sizeof(aData)) { break; } // Decide if the frame of data is noise or silence with a simple level detector BOOL bNoiseDetected = FALSE; SHORT * pBuffer = (SHORT *)aData; for (ULONG i = 0; i < cbRead; i += 2, pBuffer++) { if (*pBuffer < (SHORT)-3000 || *pBuffer > (SHORT)3000) { bNoiseDetected = TRUE; break; } } // If there no space on the frame queue then wait BOOL bBlock = m_FrameQueue.IsFull(); if(bBlock) { // Wait for space to appear on the queue to be passed to the recognizer. // Real engines should not wait - the data reading should be as real-time as possible // Also detect if the hExit event is set to indicate we should stop processing. if (::WaitForMultipleObjects(sp_countof(aWait), aWait, FALSE, INFINITE) == WAIT_OBJECT_0) { break; } } // Add the frame to the queue and notify the decoder thread m_FrameQueue.InsertTail(bNoiseDetected); m_cpDecoderThread->Notify(); } // Once we've stopped reading data we must wait for the recognizer thread to finish // All processing must be done before returning from the RecognizeStream method. m_cpDecoderThread->WaitForThreadDone(TRUE, &hr, 30 * 1000); } m_hRequestSync = NULL; return hr; } /**************************************************************************** * CSrEngine::ThreadProc * *-----------------------* * Description: * This is the main thread for the recognition process. * This hExitThreadEvent indicates this thread should complete, * and the hNotifyEvent indicates the RecognizeStream thread is notifying * that data is available for processing. The m_hRequestSync indicates * SAPI is requesting the engine should call Synchronize. * * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::ThreadProc(void *, HANDLE hExitThreadEvent, HANDLE hNotifyEvent, HWND hwndWorker, volatile const BOOL * pfContinueProcessing) { HRESULT hr = S_OK; const HANDLE aWait[] = { hExitThreadEvent, hNotifyEvent, m_hRequestSync }; ULONG block = 0; ULONG silenceafternoise = 0; DWORD waitres; m_bSoundStarted = FALSE; m_bPhraseStarted = FALSE; m_cBlahBlah = 0; m_ullStart = 0; m_ullEnd = 0; while (*pfContinueProcessing) // sit in this loop until exit { ULONG cEvents = sp_countof(aWait); // Respond to the request sync event only if speech not detected, // as we don't want to allow grammar changes during recognition. // However always respond after several seconds. if(m_bPhraseStarted && (block - m_ullStart / BLOCKSIZE) < 5 * 100) { --cEvents; } waitres = ::WaitForMultipleObjects(cEvents, aWait, FALSE, INFINITE); switch (waitres) { case WAIT_OBJECT_0: // Exit thread break; case WAIT_OBJECT_0 + 1: // Notify (data is available) // Engines should regularly call UpdateRecoPos to indicate how far through the // stream they have recognized. m_cpSite->UpdateRecoPos((ULONGLONG)block * BLOCKSIZE); if (m_ullStart == 0 && !m_bPhraseStarted) { // Engines should also call Synchronize to indicate they are ready // to be notified about grammar changes and other tasks. Within Synchronize // if grammars have changed WordNotify, RuleNotify etc. will be called before Synchronize returns. // Here Synchronize is called only if processing silence, not speech // to avoid having to deal with grammar changes during recognition. m_cpSite->Synchronize((ULONGLONG)block * BLOCKSIZE); // A return code of S_FALSE from synchronize means the engine can stop recognizing // This engine ignores this. } while (m_FrameQueue.HasData()) { BOOL bNoise = m_FrameQueue.RemoveHead(); block++; // Update the position in stream if (bNoise) { // Found some speech - update start and end positions silenceafternoise = 0; if (m_ullStart == 0) { m_ullStart = (ULONGLONG)block * BLOCKSIZE; } m_ullEnd = (ULONGLONG)block * BLOCKSIZE; _CheckRecognition(); // this will generate hypotheses and events } else { // Found some silence silenceafternoise++; if (silenceafternoise > 50) { if (m_bSoundStarted) { // We've heard 1/2 sec of silence since the last noise, so send the // final recognition if we had previously started a phrase if (m_bPhraseStarted) { _NotifyRecognition(FALSE, m_cBlahBlah); } AddEvent(SPEI_SOUND_END, m_ullEnd); // send the sound end event m_bSoundStarted = FALSE; m_bPhraseStarted = FALSE; } m_ullStart = 0; m_ullEnd = 0; } } //--- Generate random interference start at 20 seconds into the stream // and saying it's gone at 22 seconds if (block % (100 * 30) == 20 * 100) { const SPINTERFERENCE rgspi[] = { SPINTERFERENCE_NOISE, SPINTERFERENCE_NOSIGNAL, SPINTERFERENCE_TOOLOUD, SPINTERFERENCE_TOOQUIET }; AddEvent(SPEI_INTERFERENCE, block*BLOCKSIZE, 0, rgspi[rand() % 4]); } else if (block % (100 * 30) == 22 * 100) { AddEvent(SPEI_INTERFERENCE, block*BLOCKSIZE, 0, SPINTERFERENCE_NONE); } // Ask for UI at 10 seconds into the stream // and cancel request one second later else if (block == 10 * 100) { AddEventString(SPEI_REQUEST_UI, block * BLOCKSIZE, SPDUI_UserTraining); } else if (block == 11 * 100) // Cancle the UI request at 11 seconds into the stream { AddEventString(SPEI_REQUEST_UI, block * BLOCKSIZE, NULL); } } break; case WAIT_OBJECT_0 + 2: // SAPI has explicitly requested we call Synchronize m_cpSite->Synchronize((ULONGLONG)block * BLOCKSIZE); // Once synchronize is called the engine cannot fire events prior to that stream position // so update the stored stream start position m_ullStart = block * BLOCKSIZE; if(m_ullEnd < m_ullStart) { m_ullEnd = m_ullStart; } break; default: _ASSERT(FALSE); // Something strange! break; } } // Before exiting we must make sure every phrase start has been paired with a recognition, // and that every sound start event has a corresponding sound end. if (m_bPhraseStarted) { _NotifyRecognition(FALSE, m_cBlahBlah); } if (m_bSoundStarted) { AddEvent(SPEI_SOUND_END, m_ullEnd); } return S_OK; } /**************************************************************************** * CSrEngine::_CheckRecognition * *---------------------------* * Description: * This internal method decides when to fire sound start events, * phrase start events and hypotheses. *****************************************************************************/ void CSrEngine::_CheckRecognition() { ULONG duration, blahs; if (m_ullEnd > m_ullStart) { duration = (ULONG)(m_ullEnd - m_ullStart); if (duration >= BLOCKSIZE * 100 * 1 / 8) { if (!m_bSoundStarted) { // Heard something that was longer than 1/8th second, so do sound start AddEvent(SPEI_SOUND_START, m_ullStart); m_bSoundStarted = TRUE; m_cBlahBlah = 0; } if (duration >= BLOCKSIZE * 100 * 1 / 4) { // Heard something that was longer than 1/4 second, so do phrase start // and then generate a hypothesis every 1/4 second after that. blahs = duration / (BLOCKSIZE * 100 * 1 / 4); if (blahs != m_cBlahBlah) { m_cBlahBlah = blahs; if (!m_bPhraseStarted) { m_bPhraseStarted = TRUE; AddEvent(SPEI_PHRASE_START, m_ullStart); } _NotifyRecognition(TRUE, blahs); } } } } } /**************************************************************************** * CSrEngine::_NotifyRecognition * *-------------------------------* * Description: * This internal method is used to generate a recognition result. The contents * are random since we don't actually know how to recognize anything.* ****************************************************************************/ void CSrEngine::_NotifyRecognition( BOOL fHypothesis, ULONG nWords ) { HRESULT hr = S_OK; // First count the active CFG rules ULONG cActiveCFGRules = 0; CRuleEntry * pRule = m_RuleList.GetHead(); for(; pRule; pRule = m_RuleList.GetNext(pRule)) { if( pRule->m_fActive ) { cActiveCFGRules++; } } // Then count all the grammars with active dictation ULONG cActiveSLM = 0; CDrvGrammar * pGram = m_GrammarList.GetHead(); for(; pGram; pGram = m_GrammarList.GetNext(pGram)) { if(pGram->m_SLMActive) { cActiveSLM++; } } // If both CFG and dictation are active, randomly do one or the other if(cActiveCFGRules && cActiveSLM) { if(rand() % 2) { cActiveSLM = 0; } else { cActiveCFGRules = 0; } } //--- Compose reco result info SPRECORESULTINFO Result; memset(&Result, 0, sizeof(SPRECORESULTINFO)); Result.cbSize = sizeof(SPRECORESULTINFO); Result.fHypothesis = fHypothesis; Result.ullStreamPosStart = m_ullStart; Result.ullStreamPosEnd = m_ullEnd; if( cActiveCFGRules ) { // Generate a CFG result phrase hr = WalkCFGRule(&Result, cActiveCFGRules, fHypothesis, nWords, m_ullStart, (ULONG)(m_ullEnd - m_ullStart)); if( SUCCEEDED(hr) ) { // Pass the results info to SAPI m_cpSite->Recognition(&Result); // Cleanup any memory allocated for alternates and release phrases for(ULONG i = 0; i < Result.ulNumAlts; i++) { Result.aPhraseAlts[i].pPhrase->Release(); } Result.pPhrase->Release(); delete[] Result.aPhraseAlts; } } else if(cActiveSLM) { // Generate a dictation result phrase hr = WalkSLM(&Result, cActiveSLM, nWords, m_ullStart, (ULONG)(m_ullEnd - m_ullStart)); if( SUCCEEDED(hr) ) { // Pass the results info to SAPI m_cpSite->Recognition(&Result); // Release the result phrase Result.pPhrase->Release(); delete[] Result.pvEngineData; } } else if(!fHypothesis) { // No rules were active - return a false recognition // RecognizeStream can still be called if no rules are active // - the engine is free to do anything it wants with the speech data. Result.eResultType = SPRT_FALSE_RECOGNITION; m_cpSite->Recognition(&Result); } } /**************************************************************************** * CSrEngine::CreatePhraseFromRule * *---------------------------------* * Description: * This method is used to produce the result phrase for a CFG result. * It selects a random path through an active rule, and * then calls ParseFromTransition to generate an ISpPhraseBuilder object. * * Return: * S_OK * FAIL(hr) ****************************************************************************/ HRESULT CSrEngine::CreatePhraseFromRule( CRuleEntry * pRule, BOOL fHypothesis, ULONGLONG ullAudioPos, ULONG ulAudioSize, ISpPhraseBuilder** ppPhrase ) { HRESULT hr = S_OK; SPRULEENTRY RuleInfo; RuleInfo.hRule = pRule->m_hRule; hr = m_cpSite->GetRuleInfo(&RuleInfo, SPRIO_NONE); if( SUCCEEDED(hr) ) { // Limit of 200 transitions in grammar! const ULONG MAXPATH = 200; SPPATHENTRY Path[MAXPATH]; ULONG cTrans; // Recursively generate random path hr = RecurseWalk(RuleInfo.hInitialState, Path, &cTrans); //Fill in the audio offset and audio size for each element in the path, while each element has equal size and silence in between if (cTrans) { ULONG ulInterval = ulAudioSize/cTrans; for (ULONG ul = 0; ul < cTrans && ul < MAXPATH; ul++) { Path[ul].elem.ulAudioStreamOffset = ul * ulInterval; Path[ul].elem.ulAudioSizeBytes = ulInterval/2; } } if (SUCCEEDED(hr)) { // generate a SPPARSEINFO structure SPPARSEINFO ParseInfo; memset(&ParseInfo, 0, sizeof(ParseInfo)); ParseInfo.cbSize = sizeof(SPPARSEINFO); ParseInfo.hRule = pRule->m_hRule; ParseInfo.ullAudioStreamPosition = ullAudioPos; ParseInfo.ulAudioSize = ulAudioSize; ParseInfo.cTransitions = cTrans; ParseInfo.pPath = Path; ParseInfo.fHypothesis = fHypothesis; ParseInfo.SREngineID = CLSID_SampleSREngine; ParseInfo.ulSREnginePrivateDataSize = 0; ParseInfo.pSREnginePrivateData = NULL; // Generate a phrase object from the parse info. hr = m_cpSite->ParseFromTransitions(&ParseInfo, ppPhrase ); if(SUCCEEDED(hr)) { // delete any allocated memory for(ULONG i = 0; i < cTrans; i++) { if(Path[i].elem.pszDisplayText) { delete const_cast(Path[i].elem.pszDisplayText); } } } } } return hr; } /**************************************************************************** * CSrEngine::FindRule * *---------------------* * Description: * This method is used to locate an active rule in the rule list by index * ****************************************************************************/ CRuleEntry* CSrEngine::FindRule( ULONG ulRuleIndex ) { CRuleEntry * pRule = m_RuleList.GetHead(); ULONG ulRule = 0; while( pRule ) { if( pRule->m_fActive && ( ulRule++ == ulRuleIndex ) ) { break; } pRule = m_RuleList.GetNext( pRule ); } _ASSERT(pRule && pRule->m_fActive); return pRule; } /**************************************************************************** * CSrEngine::NextRuleAlt * *------------------------* * Description: * This method is used to locate a rule alternate in the rule list * ****************************************************************************/ CRuleEntry* CSrEngine::NextRuleAlt( CRuleEntry * pPriRule, CRuleEntry * pLastRule ) { CRuleEntry * pRule = (pLastRule)?(pLastRule):(m_RuleList.GetHead()); for(; pRule; pRule = m_RuleList.GetNext(pRule)) { if( pRule->m_fActive && ( m_cpSite->IsAlternate( pPriRule->m_hRule, pRule->m_hRule ) == S_OK ) ) { break; } } return pRule; } /**************************************************************************** * CSrEngine::WalkCFGRule * *------------------------* * Description: * This method is used to produce the results information for a CFG result. * It creates a result phrase and then some alternates phrases. * Return: * S_OK * FAILED(hr) ****************************************************************************/ HRESULT CSrEngine::WalkCFGRule( SPRECORESULTINFO * pResult, ULONG cRulesActive, BOOL fHypothesis, ULONG nWords, ULONGLONG ullAudioPos, ULONG ulAudioSize) { HRESULT hr = E_FAIL; CRuleEntry * pPriRule = NULL; pResult->ulSizeEngineData = 0; pResult->pvEngineData = NULL; pResult->eResultType = SPRT_CFG; pResult->hGrammar = NULL; while (hr == E_FAIL) { // Randomly pick a rule and locate it in the list pPriRule = FindRule( rand() % cRulesActive ); // Create a phrase from the rule hr = CreatePhraseFromRule( pPriRule, fHypothesis, ullAudioPos, ulAudioSize, &pResult->pPhrase ); // E_FAIL means the random path generated came to a dead end. Most likely an empty // dynamic rule. We are not allowed to recognize through empty rules. // Hence we choose another random toplevel rule and try again until we succeed. } if (hr != S_OK) { _ASSERT(FALSE); } // Get the phrase info SPPHRASE* pPriPhraseInfo = NULL; if( SUCCEEDED( hr ) ) { hr = pResult->pPhrase->GetPhrase( &pPriPhraseInfo ); } // Ask the site how many alternates to generate ULONG ulNumAlts = 0; if( SUCCEEDED( hr ) ) { hr = m_cpSite->GetMaxAlternates( pPriRule->m_hRule, &ulNumAlts ); } // Randomly create some alternates if( SUCCEEDED( hr ) && ulNumAlts ) { pResult->aPhraseAlts = new SPPHRASEALT[ulNumAlts]; if( pResult->aPhraseAlts ) { memset( pResult->aPhraseAlts, 0, ulNumAlts * sizeof(SPPHRASEALT) ); CRuleEntry * pAltRule = NULL; for( ULONG i = 0; SUCCEEDED( hr ) && (i < ulNumAlts); ++i ) { // Try to find an alternate rule pAltRule = NextRuleAlt( pPriRule, pAltRule ); if( !pAltRule ) { break; } // Create an alternate phrase from the rule hr = CreatePhraseFromRule( pAltRule, fHypothesis, ullAudioPos, ulAudioSize, &pResult->aPhraseAlts[i].pPhrase ); // Get the alternate phrase info SPPHRASE* pAltPhraseInfo = NULL; if( SUCCEEDED( hr ) ) { hr = pResult->aPhraseAlts[i].pPhrase->GetPhrase( &pAltPhraseInfo ); } if( SUCCEEDED( hr ) ) { ++pResult->ulNumAlts; // Fill out relationship info pResult->aPhraseAlts[i].cElementsInParent = pPriPhraseInfo->Rule.ulCountOfElements; pResult->aPhraseAlts[i].cElementsInAlternate = pAltPhraseInfo->Rule.ulCountOfElements; // Point to some extra data for testing static BYTE AltData[] = { 0xED, 0xED, 0xED, 0xED }; pResult->aPhraseAlts[i].pvAltExtra = &AltData; pResult->aPhraseAlts[i].cbAltExtra = sp_countof( AltData ); } if( pAltPhraseInfo ) { ::CoTaskMemFree( pAltPhraseInfo ); } } } else { E_OUTOFMEMORY; } } // Cleanup main phrase information if( pPriPhraseInfo ) { ::CoTaskMemFree( pPriPhraseInfo ); } // Cleanup on failure if( FAILED( hr ) ) { if( pResult->pPhrase ) { pResult->pPhrase->Release(); pResult->pPhrase = NULL; } for( ULONG i = 0; i < pResult->ulNumAlts; ++i ) { pResult->aPhraseAlts[i].pPhrase->Release(); pResult->aPhraseAlts[i].pPhrase = NULL; } pResult->ulNumAlts = 0; delete[] pResult->aPhraseAlts; pResult->aPhraseAlts = NULL; } return hr; } /**************************************************************************** * CSrEngine::WalkSLM * *---------------------------* * Description: * This method is used to produce the results information for a dictation result. * It creates a result phrase, and then serializes some alternates information * that the CSrEngineAlternates object uses to generate alternates. * Return: * S_OK * FAILED(hr) *****************************************************************************/ HRESULT CSrEngine::WalkSLM(SPRECORESULTINFO * pResult, ULONG cSLMActive, ULONG nWords, ULONGLONG ullAudioPos, ULONG ulAudioSize) { HRESULT hr = S_OK; // If several dictation grammars are active pick one at random ULONG ulGramIndex = rand() % cSLMActive; CDrvGrammar * pGram = m_GrammarList.GetHead(); ULONG nGram = 0; for(; pGram; pGram = m_GrammarList.GetNext(pGram)) { if(pGram->m_SLMActive) { if(nGram == ulGramIndex) { break; } nGram++; } } _ASSERT(pGram && pGram->m_SLMActive); if( pGram == NULL ) { return E_FAIL; } // create and fill SPPHRASE structure SPPHRASE phrase; memset(&phrase, 0, sizeof(SPPHRASE)); phrase.cbSize = sizeof(SPPHRASE); phrase.LangID = m_LangID; phrase.ullAudioStreamPosition = ullAudioPos; phrase.ulAudioSizeBytes = ulAudioSize; phrase.SREngineID = CLSID_SampleSREngine; // allocate elements ULONG cb = nWords * sizeof(SPPHRASEELEMENT); SPPHRASEELEMENT* pElements = (SPPHRASEELEMENT *)_malloca(cb); memset(pElements, 0, cb); // fill in word info into elements for (ULONG n = 0; n < nWords; n++) { ULONG ulInterval = ulAudioSize/nWords; pElements[n].bDisplayAttributes = SPAF_ONE_TRAILING_SPACE; pElements[n].pszDisplayText = DICT_WORD; pElements[n].ulAudioStreamOffset = n * ulInterval; pElements[n].ulAudioSizeBytes = ulInterval/2; } // add elements to phrase phrase.Rule.ulCountOfElements = nWords; phrase.pElements = pElements; // make phrase builder and add phrase info CComPtr cpBuilder; hr = cpBuilder.CoCreateInstance(CLSID_SpPhraseBuilder); if (SUCCEEDED(hr)) { hr = cpBuilder->InitFromPhrase(&phrase); } if (SUCCEEDED(hr)) { // Store alternates string in extra engine data // We just serialize an equal number of copies of ALT_WORD as DICT_WORD. // Engines can store any information (e.g. serialized lattice) here. pResult->ulSizeEngineData = sizeof(ALT_WORD) * nWords; pResult->pvEngineData = new WCHAR[(sizeof(ALT_WORD) / sizeof(WCHAR )) * nWords]; if (pResult->pvEngineData == NULL) { hr = E_OUTOFMEMORY; } } if (SUCCEEDED(hr)) { WCHAR *pC = (WCHAR *)pResult->pvEngineData; size_t cRemainingSize = (size_t)pResult->ulSizeEngineData / sizeof(WCHAR); for(ULONG i = 0; i < nWords; i++) { wcscpy_s(pC, cRemainingSize, ALT_WORD); size_t cAltWord = wcslen(ALT_WORD); pC += cAltWord; *pC = ' '; pC++; cRemainingSize -= cAltWord + 1; } *(--pC) = '\0'; pResult->eResultType = SPRT_SLM; pResult->hGrammar = pGram->m_hSapiGrammar; } if (SUCCEEDED(hr)) { pResult->pPhrase = cpBuilder.Detach(); } else { delete[] pResult->pvEngineData; pResult->pvEngineData = NULL; pResult->ulSizeEngineData = 0; } _freea(pElements); return hr; } /**************************************************************************** * CSrEngine::WalkTextBuffer * *---------------------------* * Description: * This is called when RecurseWalk hits a text-buffer transition. * At such a transition the engine should try to recognize any sequence * of words from the active text selection the app has supplied. * Any word can be the start word and then recognition can continue to * the next \0 end of sentence marker or at the end of the active text selection. * In this sample we pick a random string of words from the buffer. * Return: * S_OK * FAILED(hr) *****************************************************************************/ HRESULT CSrEngine::WalkTextBuffer(void* pvGrammarCookie, SPPATHENTRY * pPath, SPTRANSITIONID hId, ULONG * pcTrans) { HRESULT hr = S_OK; // Check if text buffer present CDrvGrammar * pGrammar = (CDrvGrammar *) pvGrammarCookie; _ASSERT(pGrammar->m_pWordSequenceText && pGrammar->m_cchText >= 2); //m_cchText counts the trailing "/0/0" if (!pGrammar->m_pWordSequenceText || pGrammar->m_cchText < 2) { return E_UNEXPECTED; } *pcTrans = 0; // Count sentences ULONG nPhrase = 0; const WCHAR *cPhrase; ULONG ulStartActiveOffset = 0; //The default value with text selection ULONG cchActiveChars = pGrammar->m_cchText - 2; //The default value with text selection ULONG ccChars = 0; if (pGrammar->m_pInfo) { ulStartActiveOffset = pGrammar->m_pInfo->ulStartActiveOffset; cchActiveChars = pGrammar->m_pInfo->cchActiveChars; } for (cPhrase = pGrammar->m_pWordSequenceText + ulStartActiveOffset; ccChars < cchActiveChars && (*cPhrase != L'\0' || *(cPhrase + 1) != '\0'); ccChars++, cPhrase++) { if(*cPhrase != L'\0' && *(cPhrase + 1) == L'\0') { nPhrase++; } } if (nPhrase == 0) { return E_FAIL; } // Randomly pick a sentence between the first active sentence and the last active sentence nPhrase = rand() % nPhrase; //nPhrase would be 0 index ULONG nP = 0; for(cPhrase = pGrammar->m_pWordSequenceText + ulStartActiveOffset; nP != nPhrase; cPhrase++) { if(*cPhrase == L'\0') { nP++; } } // Count words in sentence ULONG nWord = 1; const WCHAR *cWord; for(cWord = cPhrase; *cWord != L'\0' && ULONG(cWord - pGrammar->m_pWordSequenceText) < ulStartActiveOffset + cchActiveChars; cWord++) { if(iswspace(*cWord)) { while(*(cWord+1) && iswspace(*(cWord+1)) && ULONG(cWord - pGrammar->m_pWordSequenceText) < ulStartActiveOffset + cchActiveChars - 1) {cWord++;} nWord++; } } // Pick entry and exit word ULONG startWord = rand() % nWord; ULONG countWord = rand() % (nWord - startWord) + 1; // Find the entry word for(nWord = 0, cWord = cPhrase; nWord != startWord; cWord++) { if(iswspace(*cWord)) { while(*(cWord+1) && iswspace(*(cWord+1)) && ULONG(cWord - pGrammar->m_pWordSequenceText) < ulStartActiveOffset + cchActiveChars - 1) {cWord++;} nWord++; } } // Build paths to end word const WCHAR *cW = cWord; for(nWord = 0; nWord != countWord; cWord++) { if(*cWord == L'\0' || iswspace(*cWord) || ULONG(cWord - pGrammar->m_pWordSequenceText) == ulStartActiveOffset + cchActiveChars) { // Build a path entry pPath->hTransition = hId; memset(&pPath->elem, 0, sizeof(pPath->elem)); pPath->elem.bDisplayAttributes = SPAF_ONE_TRAILING_SPACE; WCHAR *pszWord = new WCHAR[cWord - cW + 1]; wcsncpy_s(pszWord, cWord - cW + 1, cW, cWord - cW); pszWord[cWord - cW] = '\0'; pPath->elem.pszDisplayText = pszWord; pPath++; (*pcTrans)++; while(*(cWord+1) && iswspace(*(cWord+1)) && ULONG(cWord - pGrammar->m_pWordSequenceText) < ulStartActiveOffset + cchActiveChars - 1) {cWord++;} cW = cWord + 1; // first char of next word nWord++; } } return hr; } /**************************************************************************** * CSrEngine::RecurseWalk * *----------------------* * Description: * This method produces a random path through an active CFG. If the path contains * a rule reference transition RecurseWalk is recursively called to produce a path though * sub-rules. The result is an array of SPPATHENTRY elements containing the transitions. * * The initial state in each rule is obtained by calling GetRuleInfo. Then for each * state GetStateInfo can be called. This gives an array of SPTRANSITION entries * the contain information on the type of transition, the transition id and * the next state the transition goes to. The transition id is the main information * included in the SPPATHENTRY. Only for word transitions are SPPATHENTRY created, * as this is all that is required by ParseFromTransitions. * * Return: * S_OK * FAILED(hr) ****************************************************************************/ HRESULT CSrEngine::RecurseWalk(SPSTATEHANDLE hState, SPPATHENTRY * pPath, ULONG * pcTrans) { HRESULT hr = S_OK; CSpStateInfo StateInfo; *pcTrans = 0; while (SUCCEEDED(hr) && hState) { ULONG cTrans; hr = m_cpSite->GetStateInfo(hState, &StateInfo); if (SUCCEEDED(hr)) { // Now randomly decide which transition to take. ULONG cTransInState = StateInfo.cEpsilons + StateInfo.cWords + StateInfo.cRules + StateInfo.cSpecialTransitions; if (cTransInState == 0) { // This path is a dead-end. Most likely this is due to an empty dynamic rule. hr = E_FAIL; break; } SPTRANSITIONENTRY * pTransEntry = StateInfo.pTransitions + (rand() % cTransInState); switch(pTransEntry->Type) { case SPTRANSEPSILON: // Epsilon transition - don't need to create a path entry // Advance to the next state. break; case SPTRANSRULE: // Rule transition - we recursively descend into the rule and add onto the path array hr = RecurseWalk(pTransEntry->hRuleInitialState, pPath, &cTrans); *pcTrans += cTrans; pPath += cTrans; break; case SPTRANSWORD: case SPTRANSWILDCARD: // For a word transition we complete an SPPATHENTRY structure with the transition id. // A wildcard transition indicates the engine should match against any speech, so we do the same thing. pPath->hTransition = pTransEntry->ID; memset(&pPath->elem, 0, sizeof(pPath->elem)); pPath->elem.bDisplayAttributes = SPAF_ONE_TRAILING_SPACE; pPath++; (*pcTrans)++; break; case SPTRANSTEXTBUF: // Text Buffer transition - produce a path from WalkTextBuffer hr = WalkTextBuffer(pTransEntry->pvGrammarCookie, pPath, pTransEntry->ID, &cTrans); *pcTrans += cTrans; pPath += cTrans; break; case SPTRANSDICTATION: // Dictation transition - indicating the recognizer should do dictation at // this point in the grammar. We generate the DICT_WORD as a path entry. // The word text is indicated by setting pszDisplayText, which otherwise can be left NULL. pPath->hTransition = pTransEntry->ID; memset(&pPath->elem, 0, sizeof(pPath->elem)); pPath->elem.bDisplayAttributes = SPAF_ONE_TRAILING_SPACE; size_t cDictWord = wcslen(DICT_WORD); WCHAR *pszWord = new WCHAR[cDictWord + 1]; wcscpy_s(pszWord, cDictWord + 1, DICT_WORD); pPath->elem.pszDisplayText = pszWord; pPath++; (*pcTrans)++; break; } // Move to the next state - a transition to NULL indicates the end of the rule. hState = pTransEntry->hNextState; } } return hr; } /**************************************************************************** * CSrEngine::GetInputAudioFormat * *---------------------------* * Description: * This method is called for SAPI to find what audio formats the engine * can support for recognition. * Audio formats a represented by a GUID and an optional WAVEFORMATEX * structure for wav file formats. This method may be called * either with pSourceFormatId and pSourceWaveFormatEx NULL, in which * case the engine should return its most preferred format. It may also * be called with these set, in which case the engine should return * that format if it can support or the closest it can. * * If the engines returned format is incompatible with the input audio * object format SAPI will try and create a format convertor to convert the audio * * Return: * S_OK * FAILED(hr) *****************************************************************************/ STDMETHODIMP CSrEngine::GetInputAudioFormat(const GUID * pSourceFormatId, const WAVEFORMATEX * pSourceWaveFormatEx, GUID * pDesiredFormatId, WAVEFORMATEX ** ppCoMemDesiredWFEX) { // Helper function which fills in the sample engine desired format (PCM 11kHz Mono). return SpConvertStreamFormatEnum(SPSF_11kHz16BitMono, pDesiredFormatId, ppCoMemDesiredWFEX); } /***************************************************************************** * CSrEngine::SetPropertyNum * *---------------------* * Description: * The following methods are used for the app to send and receive * real-time attribute information to the engine. This differs from the * ISpObjectWithToken mechanism which is used to query static registry information. * If the app tries to set or query an attribute the engine should return * okay if it supports that attribute and S_FALSE if it does not. * In this method the app is trying to set a numeric property value. * Return: * S_FALSE *****************************************************************************/ STDMETHODIMP CSrEngine:: SetPropertyNum( SPPROPSRC eSrc, PVOID pvSrcObj, const WCHAR* pName, LONG lValue ) { HRESULT hr = S_OK; hr = S_FALSE; // We don't support any properties return hr; } /***************************************************************************** * CSrEngine::GetPropertyNum * *---------------------* * Description: * In this method the app is trying to get a numeric property value. *****************************************************************************/ STDMETHODIMP CSrEngine:: GetPropertyNum( SPPROPSRC eSrc, PVOID pvSrcObj, const WCHAR* pName, LONG * plValue ) { HRESULT hr = S_OK; hr = S_FALSE; *plValue = 0; return hr; } /***************************************************************************** * CSrEngine::SetPropertyString * *----------------------* * Description: * In this method the app is trying to set a string property value. *****************************************************************************/ STDMETHODIMP CSrEngine:: SetPropertyString( SPPROPSRC eSrc, PVOID pvSrcObj, const WCHAR* pName, const WCHAR* pValue ) { HRESULT hr = S_OK; hr = S_FALSE; return hr; } /***************************************************************************** * CSrEngine::GetPropertyString * *----------------------* * Description: * In this method the app is trying to get a string property value. *****************************************************************************/ STDMETHODIMP CSrEngine:: GetPropertyString( SPPROPSRC eSrc, PVOID pvSrcObj, const WCHAR* pName, __deref_out_opt WCHAR** ppCoMemValue ) { HRESULT hr = S_OK; hr = S_FALSE; *ppCoMemValue = NULL; return hr; } /**************************************************************************** * CSrEngine::PrivateCall * *---------------------------* * Description: * This method is used for a reco extension to send private, engine-specific information * to an engine it knows about. The app QI's off the reco context for a private * interface, and SAPI creates the engine' extension object (in this case CSampleSRExtension). * The app can then call methods on this interface. If the extension object wants to * communicate with the engine it calls CallEngine. The data from that call is then * passed to this method on the engine. * * Similarly the alternates class and UI class can also use this mechanism to * communicate with their main engine class. * * The single buffer is in/out. This call can update the buffer, and the updated buffer * will be returned to the caller of _ISpPrivateEngineCall::CallEngine(). To return * variable size buffers, use _ISpPrivateEngineCall::CallEngineEx() and * ISpSREngine::PrivateCallEx(). * * Return: * S_OK *****************************************************************************/ STDMETHODIMP CSrEngine::PrivateCall(void * pvEngineContext, void * pCallFrame, ULONG ulCallFrameSize) { // Just an example - do nothing here return S_OK; } /**************************************************************************** * CSrEngine::PrivateCallEx * *--------------------------* * Description: * This method is similar to PrivateCall except that the call frame is an * input only parameter. This function should CoTaskMemAlloc a response block * and return it in *ppvCoMemResponse. You must also return the size of the * allocated block (in bytes) in *pcbResponse. * * While this is slightly more work than CallEngine/PrivateCall, it allows for * variable size responses, which could be more efficent in some cases. * * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::PrivateCallEx(void * pvEngineContext, const void * pInCallFrame, ULONG ulCallFrameSize, void ** ppvCoMemResponse, ULONG * pcbResponse) { HRESULT hr = S_OK; *ppvCoMemResponse = NULL; *pcbResponse = 0; return hr; } /**************************************************************************** * CSrEngine::PrivateCallImmediate * *--------------------------* * Description: * This method is supposed to do the same thing as the PrivateCallEx * faster but this sample engine does nothing. * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::PrivateCallImmediate( void *pvEngineContext, const void *pInCallFrame, ULONG ulInCallFrameSize, void **ppvCoMemResponse, ULONG *pulResponseSize) { *ppvCoMemResponse = NULL; *pulResponseSize = 0; return S_OK; } /**************************************************************************** * CSrEngine::SetAdaptationData2 * *--------------------------* * Description: * This method is called when an application calls * ISpRecoContext2::SetAdaptationData2. * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::SetAdaptationData2( void *pvEngineContext, __in_ecount(cch) const WCHAR *pAdaptationData, const ULONG cch, LPCWSTR pTopicName, SPADAPTATIONSETTINGS eSettings, SPADAPTATIONRELEVANCE eRelevance) { return S_OK; } /**************************************************************************** * CSrEngine::SetGrammarPrefix * *--------------------------* * Description: * This method is used to specify a prefix word that the recognizer * should recognize before any paths in the grammar. This sample engine * ignores it. * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::SetGrammarPrefix( void *pvEngineGrammar, __in_opt LPCWSTR pszPrefix, BOOL fIsPrefixRequired) { return S_OK; } /**************************************************************************** * CSrEngine::SetRulePriority * *--------------------------* * Description: * This method sets the priority of a particular rule. Rules with higher * priority have precedence. This sample engine ignores this. * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::SetRulePriority( SPRULEHANDLE hRule, void *pvClientRuleContext, int nRulePriority) { return S_OK; } /**************************************************************************** * CSrEngine::EmulateRecognition * *--------------------------* * Description: * This method is supposed to support emulation by the recognition * engine and bypass any inconsistencies that may exist between a * particular engine's recognition logic and SAPI's emulation. This * sample engine simply returns E_NOTIMPL to let SAPI do its own * emulation. * Returns: * E_NOTIMPL * *****************************************************************************/ STDMETHODIMP CSrEngine::EmulateRecognition( ISpPhrase *pPhrase, DWORD dwCompareFlags) { // Let SAPI do its own emulation. return E_NOTIMPL; } /**************************************************************************** * CSrEngine::SetSLMWeight * *--------------------------* * Description: * This method sets the weight of the dictation grammar versus that of * CFGs. This sample engine ignores this. * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::SetSLMWeight( void *pvEngineGrammar, float flWeight) { return S_OK; } /**************************************************************************** * CSrEngine::SetRuleWeight * *--------------------------* * Description: * This method sets the weight of a rule in a CFG. This sample engine * ignores this. * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::SetRuleWeight( SPRULEHANDLE hRule, void *pvClientRuleContext, float flWeight) { return S_OK; } /**************************************************************************** * CSrEngine::SetTrainingState * *--------------------------* * Description: * This method puts the engine into training mode. This sample engine * ignores this. * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::SetTrainingState( BOOL fDoingTraining, BOOL fAdaptFromTrainingData) { return S_OK; } /**************************************************************************** * CSrEngine::ResetAcousticModelAdaptation * *--------------------------* * Description: * This method resets the profile back to its original state. This * sample engine ignores this. * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::ResetAcousticModelAdaptation( void) { return S_OK; } /**************************************************************************** * CSrEngine::OnLoadCFG * *--------------------------* * Description: * This method passes the binary CFG data to the engine so that the * engine can consume it directly rather than call SAPI to retrieve * information from the grammar. This sample engine ignores this. * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::OnLoadCFG( void *pvEngineGrammar, const SPBINARYGRAMMAR *pGrammarData, ULONG ulGrammarID) { return S_OK; } /**************************************************************************** * CSrEngine::OnUnloadCFG * *--------------------------* * Description: * This method is the reverse of the OnLoadCFG function and is called by * SAPI when the grammar is unloaded. This sample engine ignores this. * Returns: * S_OK * *****************************************************************************/ STDMETHODIMP CSrEngine::OnUnloadCFG( void *pvEngineGrammar, ULONG ulGrammarID) { return S_OK; }