408 lines
14 KiB
C++
408 lines
14 KiB
C++
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
|
|
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
|
|
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
|
|
// PARTICULAR PURPOSE.
|
|
//
|
|
// Copyright © Microsoft Corporation. All rights reserved
|
|
|
|
/******************************************************************************
|
|
* srengobj.h
|
|
* This file contains the declaration of the CSrEngine class.
|
|
* This implements ISpSREngine, ISpSREngine2 and ISpObjectWithToken.
|
|
* This is the main recognition object
|
|
******************************************************************************/
|
|
|
|
#pragma once
|
|
|
|
#include "stdafx.h"
|
|
#include "SampleSrEngine.h"
|
|
#include "resource.h"
|
|
|
|
// A list of reco contexts is stored. Each entry in the list is an instance of this class.
|
|
class CContext
|
|
{
|
|
public:
|
|
CContext * m_pNext;
|
|
BOOL operator==(SPRECOCONTEXTHANDLE hContext)
|
|
{
|
|
return (m_hSapiContext == hContext);
|
|
}
|
|
|
|
CContext(SPRECOCONTEXTHANDLE hSapiContext) :
|
|
m_hSapiContext(hSapiContext)
|
|
{}
|
|
|
|
SPRECOCONTEXTHANDLE m_hSapiContext; // The reco context handle given by SAPI
|
|
};
|
|
|
|
// A list of reco grammars is stored. Each entry in the list is an instance of this class.
|
|
class CDrvGrammar
|
|
{
|
|
public:
|
|
CDrvGrammar * m_pNext;
|
|
SPGRAMMARHANDLE m_hSapiGrammar; // The grammar handle given by SAPI
|
|
BOOL m_SLMLoaded; // Does the grammar have an associated SLM for dictation
|
|
BOOL m_SLMActive; // Is the dictation active
|
|
WCHAR* m_pWordSequenceText; // The text of the word sequence buffer if one is set
|
|
ULONG m_cchText; // The size of the word sequence buffer
|
|
SPTEXTSELECTIONINFO* m_pInfo; // The text selection of the word sequence buffer
|
|
|
|
CDrvGrammar(SPGRAMMARHANDLE hSapiGrammar) :
|
|
m_hSapiGrammar(hSapiGrammar),
|
|
m_SLMLoaded(FALSE),
|
|
m_SLMActive(FALSE),
|
|
m_pWordSequenceText(NULL),
|
|
m_cchText(0),
|
|
m_pInfo(NULL)
|
|
{
|
|
}
|
|
|
|
~CDrvGrammar()
|
|
{
|
|
// Free up resources
|
|
//For each grammar object going to be released, SAPI would call SetWordSequenceData(NULL, 0, NULL).
|
|
//SetWordSequenceData and SetTextSelection would release the memories.
|
|
//There is no need to release memories referred by m_pWordSequenceText and m_pInfo here.
|
|
}
|
|
|
|
#ifdef _WIN32_WCE
|
|
CDrvGrammar()
|
|
{
|
|
}
|
|
|
|
static LONG Compare(const CDrvGrammar *, const CDrvGrammar *)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
};
|
|
|
|
// The RecognizeStream thread read audio data in blocks. For each block
|
|
// it decides if the data is speech or silence and adds that value to this queue.
|
|
// The decoder thread reads these and processes them.
|
|
// A critical section is used to make the queue thread-safe, and an event is used to
|
|
// show if the buffer has space or not.
|
|
// This very roughtly simulates the idea of doing features extraction on
|
|
// one thread and passes the feature stream to the decoder.
|
|
class CFrameQueue
|
|
{
|
|
public:
|
|
BOOL m_aFrames[100]; // The queue of speech/silence values
|
|
ULONG m_cFrames;
|
|
ULONG m_ulHeadIndex;
|
|
HANDLE m_hSpaceAvailEvent;
|
|
CRITICAL_SECTION m_cs;
|
|
|
|
CFrameQueue()
|
|
{
|
|
m_cFrames = 0;
|
|
m_ulHeadIndex = 0;
|
|
m_hSpaceAvailEvent = NULL;
|
|
InitializeCriticalSection(&m_cs);
|
|
}
|
|
~CFrameQueue()
|
|
{
|
|
DeleteCriticalSection(&m_cs);
|
|
}
|
|
void SetSpaceAvailEvent(HANDLE h)
|
|
{
|
|
m_hSpaceAvailEvent = h;
|
|
}
|
|
void InsertTail(BOOL b)
|
|
{
|
|
EnterCriticalSection(&m_cs);
|
|
ULONG ulTailIndex = (m_ulHeadIndex + m_cFrames) % sp_countof(m_aFrames);
|
|
m_aFrames[ulTailIndex] = b;
|
|
m_cFrames++;
|
|
if (m_cFrames == sp_countof(m_aFrames))
|
|
{
|
|
ResetEvent(m_hSpaceAvailEvent);
|
|
}
|
|
LeaveCriticalSection(&m_cs);
|
|
}
|
|
BOOL IsFull()
|
|
{
|
|
EnterCriticalSection(&m_cs);
|
|
BOOL b = (m_cFrames == sp_countof(m_aFrames));
|
|
LeaveCriticalSection(&m_cs);
|
|
return b;
|
|
}
|
|
BOOL RemoveHead()
|
|
{
|
|
EnterCriticalSection(&m_cs);
|
|
BOOL b = m_aFrames[m_ulHeadIndex];
|
|
m_ulHeadIndex = (m_ulHeadIndex + 1) % sp_countof(m_aFrames);
|
|
m_cFrames--;
|
|
SetEvent(m_hSpaceAvailEvent);
|
|
LeaveCriticalSection(&m_cs);
|
|
return b;
|
|
}
|
|
BOOL HasData()
|
|
{
|
|
EnterCriticalSection(&m_cs);
|
|
ULONG cFrames = m_cFrames;
|
|
LeaveCriticalSection(&m_cs);
|
|
return cFrames;
|
|
}
|
|
};
|
|
|
|
|
|
// Class so we can use CSpBasicQueue to store rule information
|
|
class CRuleEntry
|
|
{
|
|
public:
|
|
BOOL operator==(SPRULEHANDLE rh)
|
|
{
|
|
return (m_hRule == rh);
|
|
}
|
|
CRuleEntry * m_pNext;
|
|
SPRULEHANDLE m_hRule; // SAPI rule handle
|
|
BOOL m_fTopLevel; // Shows if rule can be activated
|
|
BOOL m_fActive; // Shows if rule is currectly active
|
|
};
|
|
|
|
|
|
|
|
// The main CSrEngine class
|
|
|
|
class ATL_NO_VTABLE CSrEngine :
|
|
public CComObjectRootEx<CComMultiThreadModel>,
|
|
public CComCoClass<CSrEngine, &CLSID_SampleSREngine>,
|
|
public ISpSREngine2,
|
|
public ISpObjectWithToken,
|
|
public ISpThreadTask
|
|
{
|
|
public:
|
|
CSrEngine() :
|
|
m_ulNextGrammarIndex(0),
|
|
m_cActive(0),
|
|
m_bPhraseStarted(FALSE),
|
|
m_bSoundStarted(FALSE),
|
|
m_hQueueHasRoom(NULL),
|
|
m_hRequestSync(NULL),
|
|
m_LangID(0)
|
|
{}
|
|
|
|
DECLARE_REGISTRY_RESOURCEID(IDR_SRENG)
|
|
|
|
DECLARE_PROTECT_FINAL_CONSTRUCT()
|
|
|
|
BEGIN_COM_MAP(CSrEngine)
|
|
COM_INTERFACE_ENTRY(ISpSREngine)
|
|
COM_INTERFACE_ENTRY(ISpSREngine2)
|
|
COM_INTERFACE_ENTRY(ISpObjectWithToken)
|
|
END_COM_MAP()
|
|
|
|
private:
|
|
HANDLE m_hRequestSync;
|
|
CFrameQueue m_FrameQueue;
|
|
ULONG m_cBlahBlah;
|
|
CSpBasicQueue<CDrvGrammar> m_GrammarList;
|
|
CSpBasicQueue<CContext> m_ContextList;
|
|
ULONG m_ulNextGrammarIndex;
|
|
ULONG m_cActive;
|
|
ULONGLONG m_ullStart;
|
|
ULONGLONG m_ullEnd;
|
|
BOOL m_bSoundStarted:1;
|
|
BOOL m_bPhraseStarted:1;
|
|
CComPtr<ISpSREngineSite> m_cpSite;
|
|
CComPtr<ISpThreadControl> m_cpDecoderThread;
|
|
HANDLE m_hQueueHasRoom;
|
|
CSpBasicQueue<CRuleEntry> m_RuleList;
|
|
CComPtr<ISpLexicon> m_cpLexicon;
|
|
CComPtr<ISpObjectToken> m_cpEngineObjectToken;
|
|
CComPtr<ISpObjectToken> m_cpUserObjectToken;
|
|
LANGID m_LangID;
|
|
|
|
public:
|
|
|
|
HRESULT RandomlyWalkRule(SPRECORESULTINFO * pResult, ULONG nWords, ULONGLONG ullAudioPos, ULONG ulAudioSize);
|
|
HRESULT RecurseWalk(SPSTATEHANDLE hState, SPPATHENTRY * pPath, ULONG * pcTrans);
|
|
HRESULT WalkCFGRule(SPRECORESULTINFO * pResult, ULONG cRulesActive, BOOL fHypothesis,
|
|
ULONG nWords, ULONGLONG ullAudioPos, ULONG ulAudioSize);
|
|
HRESULT WalkSLM(SPRECORESULTINFO * pResult, ULONG cSLMActive,
|
|
ULONG nWords, ULONGLONG ullAudioPos, ULONG ulAudioSize);
|
|
HRESULT WalkTextBuffer(void* pvGrammarCookie, SPPATHENTRY * pPath, SPTRANSITIONID hId, ULONG * pcTrans);
|
|
|
|
HRESULT AddEvent(SPEVENTENUM eEvent, ULONGLONG ullStreamPos, WPARAM wParam = 0, LPARAM lParam = 0);
|
|
HRESULT AddEventString(SPEVENTENUM eEvent, ULONGLONG ulLStreamPos, const WCHAR * psz, WPARAM = 0);
|
|
|
|
HRESULT CreatePhraseFromRule( CRuleEntry * pRule, BOOL fHypothesis,
|
|
ULONGLONG ullAudioPos, ULONG ulAudioSize,
|
|
ISpPhraseBuilder** ppPhrase );
|
|
|
|
CRuleEntry* FindRule( ULONG ulRuleIndex );
|
|
CRuleEntry* NextRuleAlt( CRuleEntry * pPriRule, CRuleEntry * pLastRule );
|
|
|
|
void _CheckRecognition();
|
|
void _NotifyRecognition(BOOL fHypothesis, ULONG nWords);
|
|
|
|
// ATL contstructor / destructor
|
|
HRESULT FinalConstruct();
|
|
HRESULT FinalRelease();
|
|
|
|
// Initialization methods
|
|
STDMETHODIMP SetObjectToken(ISpObjectToken * pToken);
|
|
STDMETHODIMP GetObjectToken(ISpObjectToken ** ppToken);
|
|
|
|
STDMETHODIMP SetRecoProfile(ISpObjectToken * pProfileToken);
|
|
STDMETHODIMP SetSite(ISpSREngineSite *pSite);
|
|
STDMETHODIMP GetInputAudioFormat(const GUID * pSrcFormatId, const WAVEFORMATEX * pSrcWFEX,
|
|
GUID * pDesiredFormatId, WAVEFORMATEX ** ppCoMemDesiredWFEX);
|
|
|
|
STDMETHODIMP OnCreateRecoContext(SPRECOCONTEXTHANDLE hSAPIRecoContext, void ** ppvDrvCtxt);
|
|
STDMETHODIMP OnDeleteRecoContext(void * pvDrvCtxt);
|
|
|
|
STDMETHODIMP OnCreateGrammar(void * pvEngineRecoContext,
|
|
SPGRAMMARHANDLE hSAPIGrammar,
|
|
void ** ppvEngineGrammar);
|
|
|
|
STDMETHODIMP OnDeleteGrammar(void * pvEngineGrammar);
|
|
|
|
// CFG methods
|
|
STDMETHODIMP WordNotify(SPCFGNOTIFY Action, ULONG cWords, const SPWORDENTRY * pWords);
|
|
STDMETHODIMP RuleNotify(SPCFGNOTIFY Action, ULONG cRules, const SPRULEENTRY * pRules);
|
|
|
|
// Proprietary grammar methods
|
|
// - used to implement an engine-specific grammar format
|
|
// - this sample does not implement these
|
|
STDMETHODIMP LoadProprietaryGrammar(void * pvEngineGrammar,
|
|
REFGUID rguidParam,
|
|
const WCHAR * pszStringParam,
|
|
const void * pvDataParam,
|
|
ULONG ulDataSize,
|
|
SPLOADOPTIONS Options)
|
|
{
|
|
return E_NOTIMPL;
|
|
}
|
|
|
|
STDMETHODIMP UnloadProprietaryGrammar(void * pvEngineGrammar)
|
|
{
|
|
return E_NOTIMPL;
|
|
}
|
|
|
|
STDMETHODIMP SetProprietaryRuleState(void * pvEngineGrammar,
|
|
const WCHAR * pszName,
|
|
void * pvReserved,
|
|
SPRULESTATE NewState,
|
|
ULONG * pcRulesChanged)
|
|
{
|
|
return E_NOTIMPL;
|
|
}
|
|
STDMETHODIMP SetProprietaryRuleIdState(void * pvEngineGrammar,
|
|
DWORD dwRuleId,
|
|
SPRULESTATE NewState)
|
|
{
|
|
return E_NOTIMPL;
|
|
}
|
|
|
|
// Since this engine does not support proprietary grammars, we do not need to implement
|
|
// this method other than just returning S_OK. Note to implementors: Do NOT return
|
|
// E_NOTIMPL. Just return S_OK and ignore this data if you do not need it to implement
|
|
// proprietary grammars.
|
|
STDMETHODIMP SetGrammarState(void * pvEngineGrammar, SPGRAMMARSTATE eGrammarState)
|
|
{
|
|
return S_OK;
|
|
}
|
|
STDMETHODIMP SetContextState(void * pvEngineContxt, SPCONTEXTSTATE eCtxtState)
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
|
|
// Dictation methods
|
|
STDMETHODIMP LoadSLM(void * pvEngineGrammar, const WCHAR * pszTopicName);
|
|
STDMETHODIMP UnloadSLM(void * pvEngineGrammar);
|
|
STDMETHODIMP SetSLMState(void * pvEngineGrammar, SPRULESTATE NewState);
|
|
|
|
STDMETHODIMP IsPronounceable(void *pDrvGrammar, const WCHAR *pszWord, SPWORDPRONOUNCEABLE * pWordPronounceable);
|
|
STDMETHODIMP SetWordSequenceData(void * pvEngineGrammar, const WCHAR * pText, ULONG cchText, const SPTEXTSELECTIONINFO * pInfo);
|
|
STDMETHODIMP SetTextSelection(void * pvEngineGrammar, const SPTEXTSELECTIONINFO * pInfo);
|
|
STDMETHODIMP SetAdaptationData(void * pvEngineCtxtCookie, const WCHAR * pText, const ULONG cch);
|
|
|
|
// Property methods
|
|
STDMETHODIMP SetPropertyNum( SPPROPSRC eSrc, void* pvSrcObj, const WCHAR* pName, LONG lValue );
|
|
STDMETHODIMP GetPropertyNum( SPPROPSRC eSrc, void* pvSrcObj, const WCHAR* pName, LONG * plValue );
|
|
STDMETHODIMP SetPropertyString( SPPROPSRC eSrc, void* pvSrcObj, const WCHAR* pName, const WCHAR* pValue );
|
|
STDMETHODIMP GetPropertyString( SPPROPSRC eSrc, void* pvSrcObj, const WCHAR* pName, __deref_out_opt WCHAR** ppCoMemValue );
|
|
|
|
|
|
// The main recognition method
|
|
STDMETHODIMP RecognizeStream(REFGUID rguidFmtId, const WAVEFORMATEX * pWaveFormatEx,
|
|
HANDLE hRequestSync, HANDLE hDataAvailable,
|
|
HANDLE hExit, BOOL fNewAudioStream, BOOL fRealTimeAudio,
|
|
ISpObjectToken * pAudioObjectToken);
|
|
|
|
STDMETHODIMP PrivateCall(void * pvEngineContext, void * pCallFrame, ULONG ulCallFrameSize);
|
|
STDMETHODIMP PrivateCallEx(void * pvEngineContext, const void * pInCallFrame, ULONG ulCallFrameSize,
|
|
void ** ppvCoMemResponse, ULONG * pcbResponse);
|
|
|
|
|
|
// ISpThreadTask methods
|
|
STDMETHODIMP InitThread( void * pvTaskData, HWND hwnd )
|
|
{
|
|
return S_OK;
|
|
}
|
|
LRESULT STDMETHODCALLTYPE WindowMessage( void *pvTaskData, HWND hWnd, UINT Msg, WPARAM wParam, LPARAM lParam )
|
|
{
|
|
return E_UNEXPECTED;
|
|
}
|
|
|
|
STDMETHODIMP ThreadProc( void *pvTaskData, HANDLE hExitThreadEvent, HANDLE hNotifyEvent, HWND hwndWorker, volatile const BOOL * pfContinueProcessing );
|
|
|
|
// ISpSREngine2 methods
|
|
STDMETHODIMP PrivateCallImmediate(
|
|
void *pvEngineContext,
|
|
const void *pInCallFrame,
|
|
ULONG ulInCallFrameSize,
|
|
void **ppvCoMemResponse,
|
|
ULONG *pulResponseSize);
|
|
|
|
STDMETHODIMP SetAdaptationData2(
|
|
void *pvEngineContext,
|
|
__in_ecount(cch) const WCHAR *pAdaptationData,
|
|
const ULONG cch,
|
|
LPCWSTR pTopicName,
|
|
SPADAPTATIONSETTINGS eSettings,
|
|
SPADAPTATIONRELEVANCE eRelevance);
|
|
|
|
STDMETHODIMP SetGrammarPrefix(
|
|
void *pvEngineGrammar,
|
|
__in_opt LPCWSTR pszPrefix,
|
|
BOOL fIsPrefixRequired);
|
|
|
|
STDMETHODIMP SetRulePriority(
|
|
SPRULEHANDLE hRule,
|
|
void *pvClientRuleContext,
|
|
int nRulePriority);
|
|
|
|
STDMETHODIMP EmulateRecognition(
|
|
ISpPhrase *pPhrase,
|
|
DWORD dwCompareFlags);
|
|
|
|
STDMETHODIMP SetSLMWeight(
|
|
void *pvEngineGrammar,
|
|
float flWeight);
|
|
|
|
STDMETHODIMP SetRuleWeight(
|
|
SPRULEHANDLE hRule,
|
|
void *pvClientRuleContext,
|
|
float flWeight);
|
|
|
|
STDMETHODIMP SetTrainingState(
|
|
BOOL fDoingTraining,
|
|
BOOL fAdaptFromTrainingData);
|
|
|
|
STDMETHODIMP ResetAcousticModelAdaptation( void);
|
|
|
|
STDMETHODIMP OnLoadCFG(
|
|
void *pvEngineGrammar,
|
|
const SPBINARYGRAMMAR *pGrammarData,
|
|
ULONG ulGrammarID);
|
|
|
|
STDMETHODIMP OnUnloadCFG(
|
|
void *pvEngineGrammar,
|
|
ULONG ulGrammarID);
|
|
};
|
|
|