ich nutze aktuell Porcupine (https://github.com/Picovoice/porcupine) als Wakeword detection und parallel dazu speech_recognition um die Aufnahme anschließend zu starten.
1. Problem:
Das Mikrofon nimmt die Audio in 48000 Hz auf und Porcupine benötigt diese in 16000 Hz.
fehlgeschlagene Lösungsansätze zu 1.:
- Gerät mit eigener Samplerate in der .asoundrc definiert
- Gerät mit eigener Samplerate in der etc/asound.conf definiert
Lösung zu 1.:
Da die Lösungsansätze nicht funktioniert haben, verwende ich den PvRecorder (https://github.com/Picovoice/pvrecorder), welcher ebenfalls von Picovoice (so wie auch Porcupine) stammt. Dammit funktioniert alles super.
2. Problem
Ich verwende speech_recognition um nach einem Wakeword die Audio aufzunehmen. Sobald ich in die Zeile
Code: Alles auswählen
with sr.Microphone(...) as source
Code: Alles auswählen
Expression 'ret' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 1736
Expression 'AlsaOpen( &alsaApi->baseHostApiRep, params, streamDir, &self->pcm )' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 1904
Expression 'PaAlsaStreamComponent_Initialize( &self->capture, alsaApi, inParams, StreamDirection_In, NULL != callback )' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 2171
Expression 'PaAlsaStream_Initialize( stream, alsaHostApi, inputParameters, outputParameters, sampleRate, framesPerBuffer, callback, streamFlags, userData )' failed in 'src/hostapi/alsa/pa_linux_alsa.c', line: 2839
14.03.2023 00:11:28 [ DEBUG ] [Errno -9985] Device unavailable
Daher habe ich vor der Zeile
Code: Alles auswählen
with sr.Microphone(...) as source
Hat jemand von euch ne Idee, wie ich den code umbauen kann, damit das Gerät verfügbar ist? Vlt ohne die Nutzung von PvRecorder?
Lieben Dank
Jakob
Codebase:
File input.py
Code: Alles auswählen
from typing import Callable
import pyaudio
from src.io.impl.audio.wakeword.porcupine import PorcupineWakeWordDetector
from src.io.definitions.audio_interface import IAudioInput
from src import log
import speech_recognition as sr
class AudioInput(IAudioInput):
def __init__(self, hot_word_detected_callback: Callable,
config: dict) -> None:
# self.__audio_stream: pyaudio.Stream = self.get_audio_input_stream(config)
self.__hot_word_detected_callback: Callable = hot_word_detected_callback
self.speech_engine: sr.Recognizer = sr.Recognizer()
self.config = config
self.audio_config = config["audio"]["input"]
self.__configure_microphone()
self.running: bool = False
self.recording: bool = False
self.sensitivity: float = self.audio_config.get("sensitivity")
log.info("Audio Input initialized.")
async def start(self) -> None:
self.running = True
log.info("Audio Input started.")
await self.run()
async def run(self) -> None:
wake_word_engine = PorcupineWakeWordDetector(self.hot_word_detected_callback, self.config)
try:
await wake_word_engine.start()
except MemoryError:
log.warning("Memory is full!")
log.action("Restart porcupine...")
wake_word_engine.stop()
await wake_word_engine.start()
async def hot_word_detected_callback(self):
text: str = await self.recognize_input()
await self.__hot_word_detected_callback(text)
async def recognize_input(self, play_bling_before_listen: bool = None) -> str:
self.recording = True
self.__signalize_to_listen(play_bling_before_listen)
try:
return self.__record_input_and_recognize()
except Exception as e:
log.debug(e)
log.warning("Text could not be translated...")
return "Das habe ich nicht verstanden."
finally:
self.recording = False
def __record_input_and_recognize(self):
with sr.Microphone(device_index=self.audio_config["device_index"]) as source:
audio = self.speech_engine.listen(source, timeout=7)
return self.__recognize_input_from_audio_data(audio)
def __recognize_input_from_audio_data(self, audio: any) -> str:
text: str = ""
try:
text = self.speech_engine.recognize_google(
audio, language=self.audio_config.get("language")
)
log.info("[USER INPUT]\t" + text)
except sr.UnknownValueError:
return ""
finally:
return text
def __signalize_to_listen(self, play_bling_before_listen: bool = None) -> None:
...
async def stop(self) -> None:
pass
def __configure_microphone(self) -> None:
self.speech_engine.pause_threshold = self.audio_config.get("pause_threshold")
self.speech_engine.energy_threshold = self.audio_config.get("energy_threshold")
self.speech_engine.dynamic_energy_threshold = self.audio_config.get(
"dynamic_energy_threshold"
)
self.speech_engine.dynamic_energy_adjustment_damping = self.audio_config.get(
"dynamic_energy_adjustment_damping"
)
with sr.Microphone() as source:
self.speech_engine.adjust_for_ambient_noise(source)
def get_audio_input_stream(self, config: dict) -> pyaudio.Stream:
audio_object = pyaudio.PyAudio()
log.warning(audio_object.get_device_info_by_index(config["audio"]["input"]["device_index"]))
audio_stream = audio_object.open(
rate=48000,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=2,
input_device_index=config["audio"]["input"]["device_index"],
)
return audio_stream
Code: Alles auswählen
from pvrecorder import PvRecorder
from datetime import datetime
import pvporcupine
from typing import Callable, Optional
from src import log
class PorcupineWakeWordDetector:
def __init__(self, hot_word_detected_callback: Callable,
config: dict) -> None:
self.active = True
self.__api_key = config["api"]["porcupine"]
self.__audio_config: dict = config["audio"]["input"]
self.__hot_word_detected_callback = hot_word_detected_callback
self.__sensitivity = self.__audio_config["sensitivity"]
self.__keyword: list[str] = self.__audio_config["keywords"]
self.__porcupine: Optional[pvporcupine.Porcupine] = None
self.__recorder: Optional[PvRecorder] = None
async def start(self) -> None:
log.action("Starting PorcupineWakeWordDetector...")
if self.__porcupine is not None:
log.debug("stopping old instances")
self.stop()
self.__recorder.delete()
try:
log.debug("try in porcupine")
self.__porcupine = pvporcupine.create(
access_key=self.__api_key,
keywords=self.__keyword,
sensitivities=[self.__sensitivity],
)
log.debug("Porcupine instance created")
self.__recorder = PvRecorder(
self.__audio_config["device_index"],
self.__porcupine.frame_length
)
log.debug("PvRecorder instance created")
self.__recorder.start()
log.debug("PvRecorder started")
await self.__listen()
except Exception as e:
log.debug(e)
self.stop()
self.__recorder.delete()
def stop(self) -> None:
if self.__porcupine is not None:
self.__porcupine.delete()
self.__porcupine = None
self.__recorder.delete()
self.__recorder = None
async def __listen(self) -> None:
try:
while self.active:
pcm = self.__recorder.read()
keyword_index: int = self.__porcupine.process(pcm)
if keyword_index >= 0:
self.__recorder.stop()
self.__recorder.delete()
log.info(
f"Detected {self.__keyword[keyword_index]} at "
f"{datetime.now()}"
)
await self.__hot_word_detected_callback()
self.__recorder.start()
except Exception as e:
log.error(e)