// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A // PARTICULAR PURPOSE. // // Copyright © Microsoft Corporation. All rights reserved /****************************************************************************** * srengobj.h * This file contains the declaration of the CSrEngine class. * This implements ISpSREngine, ISpSREngine2 and ISpObjectWithToken. * This is the main recognition object ******************************************************************************/ #pragma once #include "stdafx.h" #include "SampleSrEngine.h" #include "resource.h" // A list of reco contexts is stored. Each entry in the list is an instance of this class. class CContext { public: CContext * m_pNext; BOOL operator==(SPRECOCONTEXTHANDLE hContext) { return (m_hSapiContext == hContext); } CContext(SPRECOCONTEXTHANDLE hSapiContext) : m_hSapiContext(hSapiContext) {} SPRECOCONTEXTHANDLE m_hSapiContext; // The reco context handle given by SAPI }; // A list of reco grammars is stored. Each entry in the list is an instance of this class. class CDrvGrammar { public: CDrvGrammar * m_pNext; SPGRAMMARHANDLE m_hSapiGrammar; // The grammar handle given by SAPI BOOL m_SLMLoaded; // Does the grammar have an associated SLM for dictation BOOL m_SLMActive; // Is the dictation active WCHAR* m_pWordSequenceText; // The text of the word sequence buffer if one is set ULONG m_cchText; // The size of the word sequence buffer SPTEXTSELECTIONINFO* m_pInfo; // The text selection of the word sequence buffer CDrvGrammar(SPGRAMMARHANDLE hSapiGrammar) : m_hSapiGrammar(hSapiGrammar), m_SLMLoaded(FALSE), m_SLMActive(FALSE), m_pWordSequenceText(NULL), m_cchText(0), m_pInfo(NULL) { } ~CDrvGrammar() { // Free up resources //For each grammar object going to be released, SAPI would call SetWordSequenceData(NULL, 0, NULL). //SetWordSequenceData and SetTextSelection would release the memories. //There is no need to release memories referred by m_pWordSequenceText and m_pInfo here. } #ifdef _WIN32_WCE CDrvGrammar() { } static LONG Compare(const CDrvGrammar *, const CDrvGrammar *) { return 0; } #endif }; // The RecognizeStream thread read audio data in blocks. For each block // it decides if the data is speech or silence and adds that value to this queue. // The decoder thread reads these and processes them. // A critical section is used to make the queue thread-safe, and an event is used to // show if the buffer has space or not. // This very roughtly simulates the idea of doing features extraction on // one thread and passes the feature stream to the decoder. class CFrameQueue { public: BOOL m_aFrames[100]; // The queue of speech/silence values ULONG m_cFrames; ULONG m_ulHeadIndex; HANDLE m_hSpaceAvailEvent; CRITICAL_SECTION m_cs; CFrameQueue() { m_cFrames = 0; m_ulHeadIndex = 0; m_hSpaceAvailEvent = NULL; InitializeCriticalSection(&m_cs); } ~CFrameQueue() { DeleteCriticalSection(&m_cs); } void SetSpaceAvailEvent(HANDLE h) { m_hSpaceAvailEvent = h; } void InsertTail(BOOL b) { EnterCriticalSection(&m_cs); ULONG ulTailIndex = (m_ulHeadIndex + m_cFrames) % sp_countof(m_aFrames); m_aFrames[ulTailIndex] = b; m_cFrames++; if (m_cFrames == sp_countof(m_aFrames)) { ResetEvent(m_hSpaceAvailEvent); } LeaveCriticalSection(&m_cs); } BOOL IsFull() { EnterCriticalSection(&m_cs); BOOL b = (m_cFrames == sp_countof(m_aFrames)); LeaveCriticalSection(&m_cs); return b; } BOOL RemoveHead() { EnterCriticalSection(&m_cs); BOOL b = m_aFrames[m_ulHeadIndex]; m_ulHeadIndex = (m_ulHeadIndex + 1) % sp_countof(m_aFrames); m_cFrames--; SetEvent(m_hSpaceAvailEvent); LeaveCriticalSection(&m_cs); return b; } BOOL HasData() { EnterCriticalSection(&m_cs); ULONG cFrames = m_cFrames; LeaveCriticalSection(&m_cs); return cFrames; } }; // Class so we can use CSpBasicQueue to store rule information class CRuleEntry { public: BOOL operator==(SPRULEHANDLE rh) { return (m_hRule == rh); } CRuleEntry * m_pNext; SPRULEHANDLE m_hRule; // SAPI rule handle BOOL m_fTopLevel; // Shows if rule can be activated BOOL m_fActive; // Shows if rule is currectly active }; // The main CSrEngine class class ATL_NO_VTABLE CSrEngine : public CComObjectRootEx, public CComCoClass, public ISpSREngine2, public ISpObjectWithToken, public ISpThreadTask { public: CSrEngine() : m_ulNextGrammarIndex(0), m_cActive(0), m_bPhraseStarted(FALSE), m_bSoundStarted(FALSE), m_hQueueHasRoom(NULL), m_hRequestSync(NULL), m_LangID(0) {} DECLARE_REGISTRY_RESOURCEID(IDR_SRENG) DECLARE_PROTECT_FINAL_CONSTRUCT() BEGIN_COM_MAP(CSrEngine) COM_INTERFACE_ENTRY(ISpSREngine) COM_INTERFACE_ENTRY(ISpSREngine2) COM_INTERFACE_ENTRY(ISpObjectWithToken) END_COM_MAP() private: HANDLE m_hRequestSync; CFrameQueue m_FrameQueue; ULONG m_cBlahBlah; CSpBasicQueue m_GrammarList; CSpBasicQueue m_ContextList; ULONG m_ulNextGrammarIndex; ULONG m_cActive; ULONGLONG m_ullStart; ULONGLONG m_ullEnd; BOOL m_bSoundStarted:1; BOOL m_bPhraseStarted:1; CComPtr m_cpSite; CComPtr m_cpDecoderThread; HANDLE m_hQueueHasRoom; CSpBasicQueue m_RuleList; CComPtr m_cpLexicon; CComPtr m_cpEngineObjectToken; CComPtr m_cpUserObjectToken; LANGID m_LangID; public: HRESULT RandomlyWalkRule(SPRECORESULTINFO * pResult, ULONG nWords, ULONGLONG ullAudioPos, ULONG ulAudioSize); HRESULT RecurseWalk(SPSTATEHANDLE hState, SPPATHENTRY * pPath, ULONG * pcTrans); HRESULT WalkCFGRule(SPRECORESULTINFO * pResult, ULONG cRulesActive, BOOL fHypothesis, ULONG nWords, ULONGLONG ullAudioPos, ULONG ulAudioSize); HRESULT WalkSLM(SPRECORESULTINFO * pResult, ULONG cSLMActive, ULONG nWords, ULONGLONG ullAudioPos, ULONG ulAudioSize); HRESULT WalkTextBuffer(void* pvGrammarCookie, SPPATHENTRY * pPath, SPTRANSITIONID hId, ULONG * pcTrans); HRESULT AddEvent(SPEVENTENUM eEvent, ULONGLONG ullStreamPos, WPARAM wParam = 0, LPARAM lParam = 0); HRESULT AddEventString(SPEVENTENUM eEvent, ULONGLONG ulLStreamPos, const WCHAR * psz, WPARAM = 0); HRESULT CreatePhraseFromRule( CRuleEntry * pRule, BOOL fHypothesis, ULONGLONG ullAudioPos, ULONG ulAudioSize, ISpPhraseBuilder** ppPhrase ); CRuleEntry* FindRule( ULONG ulRuleIndex ); CRuleEntry* NextRuleAlt( CRuleEntry * pPriRule, CRuleEntry * pLastRule ); void _CheckRecognition(); void _NotifyRecognition(BOOL fHypothesis, ULONG nWords); // ATL contstructor / destructor HRESULT FinalConstruct(); HRESULT FinalRelease(); // Initialization methods STDMETHODIMP SetObjectToken(ISpObjectToken * pToken); STDMETHODIMP GetObjectToken(ISpObjectToken ** ppToken); STDMETHODIMP SetRecoProfile(ISpObjectToken * pProfileToken); STDMETHODIMP SetSite(ISpSREngineSite *pSite); STDMETHODIMP GetInputAudioFormat(const GUID * pSrcFormatId, const WAVEFORMATEX * pSrcWFEX, GUID * pDesiredFormatId, WAVEFORMATEX ** ppCoMemDesiredWFEX); STDMETHODIMP OnCreateRecoContext(SPRECOCONTEXTHANDLE hSAPIRecoContext, void ** ppvDrvCtxt); STDMETHODIMP OnDeleteRecoContext(void * pvDrvCtxt); STDMETHODIMP OnCreateGrammar(void * pvEngineRecoContext, SPGRAMMARHANDLE hSAPIGrammar, void ** ppvEngineGrammar); STDMETHODIMP OnDeleteGrammar(void * pvEngineGrammar); // CFG methods STDMETHODIMP WordNotify(SPCFGNOTIFY Action, ULONG cWords, const SPWORDENTRY * pWords); STDMETHODIMP RuleNotify(SPCFGNOTIFY Action, ULONG cRules, const SPRULEENTRY * pRules); // Proprietary grammar methods // - used to implement an engine-specific grammar format // - this sample does not implement these STDMETHODIMP LoadProprietaryGrammar(void * pvEngineGrammar, REFGUID rguidParam, const WCHAR * pszStringParam, const void * pvDataParam, ULONG ulDataSize, SPLOADOPTIONS Options) { return E_NOTIMPL; } STDMETHODIMP UnloadProprietaryGrammar(void * pvEngineGrammar) { return E_NOTIMPL; } STDMETHODIMP SetProprietaryRuleState(void * pvEngineGrammar, const WCHAR * pszName, void * pvReserved, SPRULESTATE NewState, ULONG * pcRulesChanged) { return E_NOTIMPL; } STDMETHODIMP SetProprietaryRuleIdState(void * pvEngineGrammar, DWORD dwRuleId, SPRULESTATE NewState) { return E_NOTIMPL; } // Since this engine does not support proprietary grammars, we do not need to implement // this method other than just returning S_OK. Note to implementors: Do NOT return // E_NOTIMPL. Just return S_OK and ignore this data if you do not need it to implement // proprietary grammars. STDMETHODIMP SetGrammarState(void * pvEngineGrammar, SPGRAMMARSTATE eGrammarState) { return S_OK; } STDMETHODIMP SetContextState(void * pvEngineContxt, SPCONTEXTSTATE eCtxtState) { return S_OK; } // Dictation methods STDMETHODIMP LoadSLM(void * pvEngineGrammar, const WCHAR * pszTopicName); STDMETHODIMP UnloadSLM(void * pvEngineGrammar); STDMETHODIMP SetSLMState(void * pvEngineGrammar, SPRULESTATE NewState); STDMETHODIMP IsPronounceable(void *pDrvGrammar, const WCHAR *pszWord, SPWORDPRONOUNCEABLE * pWordPronounceable); STDMETHODIMP SetWordSequenceData(void * pvEngineGrammar, const WCHAR * pText, ULONG cchText, const SPTEXTSELECTIONINFO * pInfo); STDMETHODIMP SetTextSelection(void * pvEngineGrammar, const SPTEXTSELECTIONINFO * pInfo); STDMETHODIMP SetAdaptationData(void * pvEngineCtxtCookie, const WCHAR * pText, const ULONG cch); // Property methods STDMETHODIMP SetPropertyNum( SPPROPSRC eSrc, void* pvSrcObj, const WCHAR* pName, LONG lValue ); STDMETHODIMP GetPropertyNum( SPPROPSRC eSrc, void* pvSrcObj, const WCHAR* pName, LONG * plValue ); STDMETHODIMP SetPropertyString( SPPROPSRC eSrc, void* pvSrcObj, const WCHAR* pName, const WCHAR* pValue ); STDMETHODIMP GetPropertyString( SPPROPSRC eSrc, void* pvSrcObj, const WCHAR* pName, __deref_out_opt WCHAR** ppCoMemValue ); // The main recognition method STDMETHODIMP RecognizeStream(REFGUID rguidFmtId, const WAVEFORMATEX * pWaveFormatEx, HANDLE hRequestSync, HANDLE hDataAvailable, HANDLE hExit, BOOL fNewAudioStream, BOOL fRealTimeAudio, ISpObjectToken * pAudioObjectToken); STDMETHODIMP PrivateCall(void * pvEngineContext, void * pCallFrame, ULONG ulCallFrameSize); STDMETHODIMP PrivateCallEx(void * pvEngineContext, const void * pInCallFrame, ULONG ulCallFrameSize, void ** ppvCoMemResponse, ULONG * pcbResponse); // ISpThreadTask methods STDMETHODIMP InitThread( void * pvTaskData, HWND hwnd ) { return S_OK; } LRESULT STDMETHODCALLTYPE WindowMessage( void *pvTaskData, HWND hWnd, UINT Msg, WPARAM wParam, LPARAM lParam ) { return E_UNEXPECTED; } STDMETHODIMP ThreadProc( void *pvTaskData, HANDLE hExitThreadEvent, HANDLE hNotifyEvent, HWND hwndWorker, volatile const BOOL * pfContinueProcessing ); // ISpSREngine2 methods STDMETHODIMP PrivateCallImmediate( void *pvEngineContext, const void *pInCallFrame, ULONG ulInCallFrameSize, void **ppvCoMemResponse, ULONG *pulResponseSize); STDMETHODIMP SetAdaptationData2( void *pvEngineContext, __in_ecount(cch) const WCHAR *pAdaptationData, const ULONG cch, LPCWSTR pTopicName, SPADAPTATIONSETTINGS eSettings, SPADAPTATIONRELEVANCE eRelevance); STDMETHODIMP SetGrammarPrefix( void *pvEngineGrammar, __in_opt LPCWSTR pszPrefix, BOOL fIsPrefixRequired); STDMETHODIMP SetRulePriority( SPRULEHANDLE hRule, void *pvClientRuleContext, int nRulePriority); STDMETHODIMP EmulateRecognition( ISpPhrase *pPhrase, DWORD dwCompareFlags); STDMETHODIMP SetSLMWeight( void *pvEngineGrammar, float flWeight); STDMETHODIMP SetRuleWeight( SPRULEHANDLE hRule, void *pvClientRuleContext, float flWeight); STDMETHODIMP SetTrainingState( BOOL fDoingTraining, BOOL fAdaptFromTrainingData); STDMETHODIMP ResetAcousticModelAdaptation( void); STDMETHODIMP OnLoadCFG( void *pvEngineGrammar, const SPBINARYGRAMMAR *pGrammarData, ULONG ulGrammarID); STDMETHODIMP OnUnloadCFG( void *pvEngineGrammar, ULONG ulGrammarID); };