devices/devicemocap/speech/kinectaudiostream.h

devices/devicemocap/speech/kinectaudiostream.h
#ifndef __KINECTAUDIOSTREAM_H__
#define __KINECTAUDIOSTREAM_H__
/***************************************************************************************
Autodesk(R) Open Reality(R) Samples
(C) 2013 Autodesk, Inc. and/or its licensors
All rights reserved.
AUTODESK SOFTWARE LICENSE AGREEMENT
Autodesk, Inc. licenses this Software to you only upon the condition that
you accept all of the terms contained in the Software License Agreement ("Agreement")
that is embedded in or that is delivered with this Software. By selecting
the "I ACCEPT" button at the end of the Agreement or by copying, installing,
uploading, accessing or using all or any portion of the Software you agree
to enter into the Agreement. A contract is then formed between Autodesk and
either you personally, if you acquire the Software for yourself, or the company
or other legal entity for which you are acquiring the software.
AUTODESK, INC., MAKES NO WARRANTY, EITHER EXPRESS OR IMPLIED, INCLUDING BUT
NOT LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR
PURPOSE REGARDING THESE MATERIALS, AND MAKES SUCH MATERIALS AVAILABLE SOLELY ON AN
"AS-IS" BASIS.
IN NO EVENT SHALL AUTODESK, INC., BE LIABLE TO ANYONE FOR SPECIAL, COLLATERAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES IN CONNECTION WITH OR ARISING OUT OF PURCHASE
OR USE OF THESE MATERIALS. THE SOLE AND EXCLUSIVE LIABILITY TO AUTODESK, INC.,
REGARDLESS OF THE FORM OF ACTION, SHALL NOT EXCEED THE PURCHASE PRICE OF THE
MATERIALS DESCRIBED HEREIN.
Autodesk, Inc., reserves the right to revise and improve its products as it sees fit.
Autodesk and Open Reality are registered trademarks or trademarks of Autodesk, Inc.,
in the U.S.A. and/or other countries. All other brand names, product names, or
trademarks belong to their respective holders.
GOVERNMENT USE
Use, duplication, or disclosure by the U.S. Government is subject to restrictions as
set forth in FAR 12.212 (Commercial Computer Software-Restricted Rights) and
DFAR 227.7202 (Rights in Technical Data and Computer Software), as applicable.
Manufacturer is Autodesk, Inc., 10 Duke Street, Montreal, Quebec, Canada, H3C 2L7.
***************************************************************************************/
#pragma once
// For IMediaObject and related interfaces
#include <dmo.h>
// For WAVEFORMATEX
#include <mmreg.h>
// For MMCSS functionality such as AvSetMmThreadCharacteristics
#include <avrt.h>
#include <stack>
#include <queue>
// Format of Kinect audio stream
static const WORD AudioFormat = WAVE_FORMAT_PCM;
// Number of channels in Kinect audio stream
static const WORD AudioChannels = 1;
// Samples per second in Kinect audio stream
static const DWORD AudioSamplesPerSecond = 16000;
// Average bytes per second in Kinect audio stream
static const DWORD AudioAverageBytesPerSecond = 32000;
// Block alignment in Kinect audio stream
static const WORD AudioBlockAlign = 2;
// Bits per audio sample in Kinect audio stream
static const WORD AudioBitsPerSample = 16;
class CStaticMediaBuffer : public IMediaBuffer
{
public:
// Constructor
CStaticMediaBuffer() : m_dataLength(0) {}
// IUnknown methods
STDMETHODIMP_(ULONG) AddRef() { return 2; }
STDMETHODIMP_(ULONG) Release() { return 1; }
STDMETHODIMP QueryInterface(REFIID riid, void **ppv)
{
if (riid == IID_IUnknown)
{
AddRef();
*ppv = (IUnknown*)this;
return NOERROR;
}
else if (riid == IID_IMediaBuffer)
{
AddRef();
*ppv = (IMediaBuffer*)this;
return NOERROR;
}
else
{
return E_NOINTERFACE;
}
}
// IMediaBuffer methods
STDMETHODIMP SetLength(DWORD length) {m_dataLength = length; return NOERROR;}
STDMETHODIMP GetMaxLength(DWORD *pMaxLength) {*pMaxLength = sizeof(m_pData); return NOERROR;}
STDMETHODIMP GetBufferAndLength(BYTE **ppBuffer, DWORD *pLength)
{
if (ppBuffer)
{
*ppBuffer = m_pData;
}
if (pLength)
{
*pLength = m_dataLength;
}
return NOERROR;
}
void Init(ULONG ulData)
{
m_dataLength = ulData;
}
protected:
// Statically allocated buffer used to hold audio data returned by IMediaObject
BYTE m_pData[AudioSamplesPerSecond * AudioBlockAlign];
// Amount of data currently being held in m_pData
ULONG m_dataLength;
};
class KinectAudioStream : public IStream
{
public:
// KinectAudioStream methods
KinectAudioStream(IMediaObject *pKinectDmo);
virtual ~KinectAudioStream();
HRESULT StartCapture();
HRESULT StopCapture();
// IUnknown methods
STDMETHODIMP_(ULONG) AddRef() { return InterlockedIncrement(&m_cRef); }
STDMETHODIMP_(ULONG) Release()
{
UINT ref = InterlockedDecrement(&m_cRef);
if (ref == 0)
{
delete this;
}
return ref;
}
STDMETHODIMP QueryInterface(REFIID riid, void **ppv)
{
if (riid == IID_IUnknown)
{
AddRef();
*ppv = (IUnknown*)this;
return S_OK;
}
else if (riid == IID_IStream)
{
AddRef();
*ppv = (IStream*)this;
return S_OK;
}
else
{
return E_NOINTERFACE;
}
}
// IStream methods
STDMETHODIMP Read(void *,ULONG,ULONG *);
STDMETHODIMP Write(const void *,ULONG,ULONG *);
STDMETHODIMP Seek(LARGE_INTEGER,DWORD,ULARGE_INTEGER *);
STDMETHODIMP SetSize(ULARGE_INTEGER);
STDMETHODIMP CopyTo(IStream *,ULARGE_INTEGER,ULARGE_INTEGER *,ULARGE_INTEGER *);
STDMETHODIMP Commit(DWORD);
STDMETHODIMP Revert();
STDMETHODIMP LockRegion(ULARGE_INTEGER,ULARGE_INTEGER,DWORD);
STDMETHODIMP UnlockRegion(ULARGE_INTEGER,ULARGE_INTEGER,DWORD);
STDMETHODIMP Stat(STATSTG *,DWORD);
STDMETHODIMP Clone(IStream **);
private:
// Number of audio buffers (chunks of overall ring buffer) used to capture Kinect audio data
static const UINT NumBuffers = 20;
typedef std::stack<CStaticMediaBuffer*> BufferPool;
typedef std::queue<CStaticMediaBuffer*> CircularBufferQueue;
// Number of references to this object
UINT m_cRef;
// Media object used to capture audio
IMediaObject* m_pKinectDmo;
// Event used to signal that capture thread should stop capturing audio
HANDLE m_hStopEvent;
// Event used to signal that there's captured audio data ready to be read
HANDLE m_hDataReady;
// Audio capture thread
HANDLE m_hCaptureThread;
// Pool of unused buffers ready to be used for writing captured audio data
BufferPool m_BufferPool;
// Circular buffer queue that contains audio data ready for reading by stream clients
CircularBufferQueue m_ReadBufferQueue;
// Buffer where most recently captured audio data is being written
CStaticMediaBuffer* m_CurrentWriteBuffer;
// Buffer from which stream client is currently reading audio data
CStaticMediaBuffer* m_CurrentReadBuffer;
// Next index to be read within current read buffer
ULONG m_CurrentReadBufferIndex;
// Total number of bytes read so far by audio stream client
ULONG m_BytesRead;
// Critical section used to synchronize multithreaded access to captured audio data
CRITICAL_SECTION m_Lock;
CStaticMediaBuffer* GetWriteBuffer();
void ReleaseBuffer(CStaticMediaBuffer* pBuffer);
void ReleaseAllBuffers();
void QueueCapturedData(BYTE *pData, UINT cbData);
void QueueCapturedBuffer(CStaticMediaBuffer *pBuffer);
void ReadOneBuffer(BYTE **ppbData, ULONG* pcbData);
static DWORD WINAPI CaptureThread(LPVOID pParam);
DWORD WINAPI CaptureThread();
BOOL IsCapturing()
{
return (m_hStopEvent != NULL) && (WaitForSingleObject(m_hStopEvent,0) != WAIT_OBJECT_0);
}
};
#endif /* __KINECTAUDIOSTREAM_H__ */