Record audio while holding a button, replay it on release and overwrite the previous file-CodePudding

I want to record audio while I'm pressing a button and replay it when I release the button.

It works fine for the first time, but when I record one more time, it appends the new record to the existing one. I can't figure out how to replace the file instead of appending to it. I've tried to delete it when I start to record, but it can't create a new one. Basically, I'd like to overwrite the file, instead of appending the sound to it.

from pynput import keyboard
import time
import pyaudio
import wave
import sched
from pygame import mixer

CHUNK = 8192
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()
frames = []


def callback(in_data, frame_count, time_info, status):
    frames.append(in_data)
    return in_data, pyaudio.paContinue


class MyListener(keyboard.Listener):
    def __init__(self):
        super(MyListener, self).__init__(self.on_press, self.on_release)
        self.key_pressed = None
        self.wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
        self.wf.setnchannels(CHANNELS)
        self.wf.setsampwidth(p.get_sample_size(FORMAT))
        self.wf.setframerate(RATE)

    def on_press(self, key):
        if key.char == 'r':
            self.key_pressed = True
        return True

    def on_release(self, key):
        if key.char == 'r':
            self.key_pressed = False
        return True


listener = MyListener()
listener.start()
started = False
stream = None


def recorder():
    global started, p, stream, frames

    if listener.key_pressed and not started:
        # Start the recording
        try:
            stream = p.open(format=FORMAT,
                            channels=CHANNELS,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK,
                            stream_callback=callback)
            print("Stream active:", stream.is_active())
            started = True
            print("start Stream")
        except:
            raise

    elif not listener.key_pressed and started:
        try:
            started = False
            print("Stop recording")
            stream.stop_stream()
            stream.close()
            listener.wf.writeframes(b''.join(frames))

            mixer.init()
            mixer.music.load("output.wav")
            mixer.music.set_volume(0.8)
            mixer.music.play()
        except:
            raise

    # Reschedule the recorder function in 100 ms.
    task.enter(0.1, 1, recorder, ())


print("Press and hold the 'r' key to begin recording")
print("Release the 'r' key to end recording")
task = sched.scheduler(time.time, time.sleep)
task.enter(0.1, 1, recorder, ())
task.run()

CodePudding user response：

Some issues I found:

You don't close the wave file after writing to it
You don't reset the frames buffer between writes
Pygame seems to be locking the file so a 2nd write attempt fails. I could not figure out how to get Pygame to unlock the file, so I switched to Playsound.

from pynput import keyboard
import time
import pyaudio
import wave
import sched
from playsound import playsound

CHUNK = 8192
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()
frames = []

def callback(in_data, frame_count, time_info, status):
    frames.append(in_data)
    return in_data, pyaudio.paContinue

class MyListener(keyboard.Listener):
    def __init__(self):
        super(MyListener, self).__init__(self.on_press, self.on_release)
        self.key_pressed = None

    def on_press(self, key):
        if key.char == 'r':
            self.key_pressed = True
        return True

    def on_release(self, key):
        if key.char == 'r':
            self.key_pressed = False
        return True

listener = MyListener()
listener.start()
started = False
stream = None

def recorder():
    global started, p, stream, frames

    if listener.key_pressed and not started:
        # Start the recording
        try:
            # Reset buffer
            frames = []
            stream = p.open(format=FORMAT,
                            channels=CHANNELS,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK,
                            stream_callback=callback)
            print("Stream active:", stream.is_active())
            started = True
            print("start Stream")
        except:
            raise

    elif not listener.key_pressed and started:
        try:
            started = False
            print("Stop recording")
            stream.stop_stream()
            stream.close()

            # Using with will close the file automatically
            with wave.open(WAVE_OUTPUT_FILENAME, 'wb') as wf:
                wf.setnchannels(CHANNELS)
                wf.setsampwidth(p.get_sample_size(FORMAT))
                wf.setframerate(RATE)
                wf.writeframes(b''.join(frames))

            # No issues with playsound locking the file
            playsound(WAVE_OUTPUT_FILENAME)
        except:
            raise

    # Reschedule the recorder function in 100 ms.
    task.enter(0.1, 1, recorder, ())


print("Press and hold the 'r' key to begin recording")
print("Release the 'r' key to end recording")
task = sched.scheduler(time.time, time.sleep)
task.enter(0.1, 1, recorder, ())
task.run()

And just for fun, I changed your program to use callbacks only instead of constantly polling:

from pynput import keyboard
import pyaudio
import wave
from playsound import playsound

CHUNK = 8192
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

class recorder():
    def __init__(self):
        self.recording = False
        
    def start(self):
        if self.recording: return
        try:
            self.frames = []
            self.stream = p.open(format=FORMAT,
                            channels=CHANNELS,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK,
                            stream_callback=self.callback)
            print("Stream active:", self.stream.is_active())
            print("start Stream")
            self.recording = True
        except:
            raise

    def stop(self):
        if not self.recording: return
        self.recording = False
        print("Stop recording")
        self.stream.stop_stream()
        self.stream.close()

        with wave.open(WAVE_OUTPUT_FILENAME, 'wb') as wf:
            wf.setnchannels(CHANNELS)
            wf.setsampwidth(p.get_sample_size(FORMAT))
            wf.setframerate(RATE)
            wf.writeframes(b''.join(self.frames))
        playsound(WAVE_OUTPUT_FILENAME)

    def callback(self, in_data, frame_count, time_info, status):
        self.frames.append(in_data)
        return in_data, pyaudio.paContinue

class MyListener(keyboard.Listener):
    def __init__(self):
        super(MyListener, self).__init__(self.on_press, self.on_release)
        self.recorder = recorder();

    def on_press(self, key):
        if key.char == 'r':
            self.recorder.start()
        return True

    def on_release(self, key):
        if key.char == 'r':
            self.recorder.stop();
            return True
        # Any other key ends the program
        return False

print("Press and hold the 'r' key to begin recording")
print("Release the 'r' key to end recording")

# Collect events until released
with MyListener() as listener:
    listener.join()

CodePudding user response：

audiomath provides some high-level classes that can help with tasks like this. Like pyaudio, it also wraps around PortAudio to allow recording and playback. Below is a shorter audiomath-based listing.

I'm guessing you probably don't sit with your finger on the 'r' button for longer than a few seconds. In that case, it's more efficient to pre-allocate several seconds of space in memory (provided it's more than enough) and record into/play back from that. For the sound you want to keep, you can always say player.sound.Write('output.wav') at the end. Or you can uncomment the line that saves every attempt.

If for any reason you definitely do want to stream open-endedly to a file while recording, you can supply a filename in the constructor and that will get passed to the Recorder instance (the memory-resident self.buffer still exists: it's a circular buffer retaining the last N seconds). Streaming to file requires an external ffmpeg installation, though. Note that audiomath's playback loads the entire thing back into memory, so nothing has really been made more memory-efficient overall...

Because of the use of ffmpeg, you can also stream directly to other audio formats, other than .wav if you want.

import time
import pynput
import audiomath as am  

class MyListener(pynput.keyboard.Listener):
    def __init__(self, filename=None, buffer_length_seconds=10):
        """
        Note:
        
        If you specify a long enough `buffer_length_seconds` for your
        purposes, then you do not need to record to file and so do not
        need to specify `filename`.
        
        If you specify `filename`, you need to have installed `ffmpeg`
        (see the help for `audiomath.ffmeg.Install`).
        """
        super(MyListener, self).__init__(self.on_press, self.on_release)
        self.key_pressed = None
        
        self.queue = []
        self.filename = filename
        self.buffer_length_seconds = buffer_length_seconds
        self.new_recorder()
        
    def new_recorder( self ):
        self.buffer = am.Sound(self.buffer_length_seconds, nChannels=2, fs=44100)
        self.recorder = am.Recorder(self.buffer, loop=True, recording=False, filename=self.filename)
        
    def on_press(self, key):
        try: key.char
        except: return
        if key.char == 'r' and not self.key_pressed:
            print('recording...')
            self.recorder.Record()
            self.key_pressed = True
        return True

    def on_release(self, key):
        try: key.char
        except: return
        if key.char == 'r' and self.key_pressed:
            print('finished')
            self.recorder.Stop()
            self.recorder = None # seems to be necessary when filename is used, to ensure the ffmpeg process and file are closed and cleaned up
            if self.filename:
                player = am.Player( self.filename )
            else:
                player = am.Player( self.buffer )
                # self.buffer.Write( 'output.wav')  # if you want to ensure every attempt gets saved. File will be overwritten.
            self.queue.append( player )
            self.new_recorder()
            self.key_pressed = False
        return True

#with MyListener('output.wav') as listener:  # this variant uses a file (and needs ffmpeg)
with MyListener() as listener:               # this variant just records to memory
    while True:
        if listener.queue:
            player = listener.queue.pop( 0 )
            player.Play()
        time.sleep(0.1)