Ceci est une ancienne révision du document !
Cette page est en cours de rédaction
Installation
à partir des dépôts
Les paquet présent dans raring semblent ne pas fonctionner
Installer les paquets python-pocketsphinx libpocketsphinx1 gstreamer0.10-pocketsphinx python-pyaudio
apt-get install python-pocketsphinx libpocketsphinx1 gstreamer0.10-pocketsphinx python-pyaudio
à partir des sources
D'abord il faut installer les dépendances
sudo apt-get build-dep pocketsphinx sudo apt-get install bison python-pyaudio
sphinxbase
On récupère ensuite les sources de sphinxbase-0.8
wget -O sphinxbase-0.8.tar.gz http://sourceforge.net/projects/cmusphinx/files/sphinxbase/0.8/sphinxbase-0.8.tar.gz/download
On les décompresse
tar -xvzf sphinxbase-0.8.tar.gz
On compile
Si vous avez l'erreur :
import pocketsphinx as ps File "sphinxbase.pxd", line 150, in init pocketsphinx (pocketsphinx.c:7935) ValueError: PyCapsule_GetPointer called with invalid PyCapsule object
il faut régénérer le fichier python/sphinxbase.c ref
sudo apt-get install cython mv python/sphinxbase.c ~
puis relancer la compilation a partir de ./configure
Par défaut au moment de la configuration de sphinxbase, si les librairies de compilation de pulseaudio sont installées elles seront utilisées.
Si comme moi vous devez utiliser Alsa, il faut supprimer la librairie pulseaudio :
sudo apt-get remove libpulse-dev
./configure --prefix=/usr/local make sudo make install
pocketsphinx
wget -O pocketsphinx-0.8.tar.gz http://sourceforge.net/projects/cmusphinx/files/pocketsphinx/0.8/pocketsphinx-0.8.tar.gz/download tar -xvzf pocketsphinx-0.8.tar.gz cd pocketsphinx-0.8 ./configure --prefix=/usr/local make sudo make install
modèles français
Téléchargement:
wget -O lium_french_f0.tar.gz http://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/French%20F0%20Broadcast%20News%20Acoustic%20Model/lium_french_f0.tar.gz/download tar -xvzf lium_french_f0.tar.gz cd lium_french_f0/ sudo mkdir /usr/share/pocketsphinx/model/FR/ sudo mv * /usr/share/pocketsphinx/model/FR/ wget -O french3g62K.lm.dmp.bz2 http://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/French%20Language%20Model/french3g62K.lm.dmp.bz2/download bzip2 -d french3g62K.lm.dmp.bz2 sudo mv french3g62K.lm.dmp /usr/share/pocketsphinx/model/FR/ wget -O frenchWords62K.dic http://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/French%20Language%20Model/frenchWords62K.dic/download sudo mv frenchWords62K.dic /usr/share/pocketsphinx/model/FR/ wget -O lium_french_f2.tar.gz http://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/French%20F2%20Telephone%20Acoustic%20Model/lium_french_f2.tar.gz/download
Bon alors, est ce que ça marche ?
pocketsphinx_continuous
pocketsphinx_continuous -dict /usr/share/pocketsphinx/model/FR/frenchWords62K.dic -hmm /usr/share/pocketsphinx/model/FR/ -lm /usr/share/pocketsphinx/model/FR/french3g62K.lm.dmp
scripts python
lister tous les périphériques audio
#!/usr/bin/python
"""
PyAudio Example:
Query and print PortAudio HostAPIs, Devices, and their
support rates.
"""
import pyaudio
standard_sample_rates = [8000.0, 9600.0, 11025.0, 12000.0,
16000.0, 22050.0, 24000.0, 32000.0,
44100.0, 48000.0, 88200.0, 96000.0,
192000.0]
p = pyaudio.PyAudio()
max_apis = p.get_host_api_count()
max_devs = p.get_device_count()
print("\nPortAudio System Info:\n======================")
print("Version: %d" % pyaudio.get_portaudio_version())
print("Version Text: %s" % pyaudio.get_portaudio_version_text())
print("Number of Host APIs: %d" % max_apis)
print("Number of Devices : %d" % max_devs)
print("\nHost APIs:\n==========")
for i in range(max_apis):
apiinfo = p.get_host_api_info_by_index(i)
for k in list(apiinfo.items()):
print("%s: %s" % k)
print("--------------------------")
print("\nDevices:\n========")
for i in range(max_devs):
devinfo = p.get_device_info_by_index(i)
# print out device parameters
for k in list(devinfo.items()):
name, value = k
# if host API, then get friendly name
if name == 'hostApi':
value = str(value) + \
" (%s)" % p.get_host_api_info_by_index(k[1])['name']
print("\t%s: %s" % (name, value))
# print out supported format rates
input_supported_rates = []
output_supported_rates = []
full_duplex_rates = []
for f in standard_sample_rates:
if devinfo['maxInputChannels'] > 0:
try:
if p.is_format_supported(
f,
input_device = devinfo['index'],
input_channels = devinfo['maxInputChannels'],
input_format = pyaudio.paInt16):
input_supported_rates.append(f)
except ValueError:
pass
if devinfo['maxOutputChannels'] > 0:
try:
if p.is_format_supported(
f,
output_device = devinfo['index'],
output_channels = devinfo['maxOutputChannels'],
output_format = pyaudio.paInt16):
output_supported_rates.append(f)
except ValueError:
pass
if (devinfo['maxInputChannels'] > 0) and \
(devinfo['maxOutputChannels'] > 0):
try:
if p.is_format_supported(
f,
input_device = devinfo['index'],
input_channels = devinfo['maxInputChannels'],
input_format = pyaudio.paInt16,
output_device = devinfo['index'],
output_channels = devinfo['maxOutputChannels'],
output_format = pyaudio.paInt16):
full_duplex_rates.append(f)
except ValueError:
pass
if len(input_supported_rates):
print("\tInput rates: %s" % input_supported_rates)
if len(output_supported_rates):
print("\tOutput rates: %s" % output_supported_rates)
if len(full_duplex_rates):
print("\tFull duplex: %s" % full_duplex_rates)
print("\t--------------------------------")
print("\nDefault Devices:\n================")
try:
def_index = p.get_default_input_device_info()['index']
print("Default Input Device : %s" % def_index)
devinfo = p.get_device_info_by_index(def_index)
for k in list(devinfo.items()):
name, value = k
if name == 'hostApi':
value = str(value) + \
" (%s)" % p.get_host_api_info_by_index(k[1])['name']
print("\t%s: %s" % (name, value))
print("\t--------------------------------")
except IOError as e:
print("No Input devices: %s" % e[0])
try:
def_index = p.get_default_output_device_info()['index']
print("Default Output Device: %s" % def_index)
devinfo = p.get_device_info_by_index(def_index)
for k in list(devinfo.items()):
name, value = k
if name == 'hostApi':
value = str(value) + \
" (%s)" % p.get_host_api_info_by_index(k[1])['name']
print("\t%s: %s" % (name, value))
print("\t--------------------------------")
except IOError as e:
print("No Output devices: %s" % e[0])
p.terminate()
essayer de faire une reconnaissance vocale
#!/usr/bin/python
import sys,os
import pyaudio
import wave
hmdir = "/usr/share/pocketsphinx/model/FR/"
lmd = "/usr/share/pocketsphinx/model/FR/french3g62K.lm.dmp"
dictd = "/usr/share/pocketsphinx/model/FR/frenchWords62K.dic"
def decodeSpeech(hmmd,lmdir,dictp,wavfile):
import pocketsphinx as ps
import sphinxbase
speechRec = ps.Decoder(hmm = hmmd, lm = lmdir, dict = dictp)
wavFile = file(wavfile,'rb')
wavFile.seek(44)
speechRec.decode_raw(wavFile)
result = speechRec.get_hyp()
return result[0]
#CHUNK = 1024
CHUNK = 512
#FORMAT = pyaudio.paInt16
FORMAT = pyaudio.paALSA
CHANNELS = 1
RATE = 16000
#RATE = 44100
RECORD_SECONDS = 10
for x in range(10):
fn = "o"+str(x)+".wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
print("* recording")
frames = []
print str(RATE / CHUNK * RECORD_SECONDS) + " size\n"
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
wf = wave.open(fn, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
p.terminate()
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
wavfile = fn
recognised = decodeSpeech(hmdir,lmd,dictd,wavfile)
print recognised
cm = 'espeak "'+recognised+'"'
os.system(cm)
références
Site officiel : http://cmusphinx.sourceforge.net/wiki/download/
Python audio : http://people.csail.mit.edu/hubert/pyaudio/docs/
Exemple en python: http://pythonism.wordpress.com/2013/06/06/pocketsphinx-voice-recognition-with-python/