def createDecoder(freq, channels): # Just as with the encoder, to create a decoder, we must first # allocate resources for it. We want Python to be responsible for # the memory deallocation, and thus Python must be responsible for # the initial memory allocation. # The frequency must be passed in as a 32-bit int freq = opus.opus_int32(freq) # The number of channels must also be passed in as a 32-bit int channels = opus.opus_int32(channels) # Obtain the number of bytes of memory required for the decoder size = opus.opus_decoder_get_size(channels) # Allocate the required memory for the decoder memory = ctypes.create_string_buffer(size) # Cast the newly-allocated memory as a pointer to a decoder. We # could also have used opus.od_p as the pointer type, but writing # it out in full may be clearer. decoder = ctypes.cast(memory, ctypes.POINTER(opus.OpusDecoder)) # Initialise the decoder error = opus.opus_decoder_init(decoder, freq, channels) # Check that there hasn't been an error when initialising the # decoder if error != opus.OPUS_OK: raise Exception("An error occurred while creating the decoder: " + opus.opus_strerror(error).decode("utf")) # Return our newly-created decoder return decoder
def create_encoder(npBufSource, samples_per_second): # To create an encoder, we must first allocate resources for it. # We want Python to be responsible for the memory deallocation, # and thus Python must be responsible for the initial memory # allocation. # Opus can encode both speech and music, and it can automatically # detect when the source swaps between the two. Here we specify # automatic detection. application = opus.OPUS_APPLICATION_AUDIO # The frequency must be passed in as a 32-bit int samples_per_second = opus.opus_int32(samples_per_second) # The number of channels can be obtained from the shape of the # NumPy array that was passed in as npBufSource channels = npBufSource.shape[1] # Obtain the number of bytes of memory required for the encoder size = opus.opus_encoder_get_size(channels); # Allocate the required memory for the encoder memory = ctypes.create_string_buffer(size) # Cast the newly-allocated memory as a pointer to an encoder. We # could also have used opus.oe_p as the pointer type, but writing # it out in full may be clearer. encoder = ctypes.cast(memory, ctypes.POINTER(opus.OpusEncoder)) # Initialise the encoder error = opus.opus_encoder_init( encoder, samples_per_second, channels, application ) # Check that there hasn't been an error when initialising the # encoder if error != opus.OPUS_OK: raise Exception("An error occurred while creating the encoder: "+ opus.opus_strerror(error).decode("utf")) # Return our newly-created encoder return encoder
def record(self): # Define the stream's callback count = 0 # DEBUG last_inputBufferAdcTime = None def callback(indata, outdata, samples, time, status): nonlocal step_no, current_pos, stream, warmup_samples nonlocal samples_per_frame nonlocal count nonlocal frame_buffer nonlocal last_inputBufferAdcTime start_time = t.time() # internal_latency = (time.outputBufferDacTime # - time.inputBufferAdcTime) # print("internal latency:", internal_latency, # "samples:", samples) # if last_inputBufferAdcTime is not None: # print("time diff:", time.inputBufferAdcTime - last_inputBufferAdcTime) # last_inputBufferAdcTime = time.inputBufferAdcTime if status: print(status) count += 1 # Grab the lock so that we're threadsafe with self._lock: # TODO: Need to add monitoring inx = [] inx[:] = indata[:] # Step No 0 # ========= if step_no == 0: if status.input_underflow: print("INPUT UNDERFLOW: Not a problem as we're just writing out. Count:",count) elif status.input_overflow: print("INPUT OVERFLOW: Not a problem as we're just writing out. Count:",count) elif status: print(status) print("count: ",count) print("ABORTING") raise sd.CallbackAbort # If the number of output channels does not match # out starting sound's PCM, we'll need to adjust # it. if outdata.shape[1] != self._starting_sound_pcm.shape[1]: # Number of channels does not match print("Number of channels does not match") # Copy the starting sound to the output remaining = len(self._starting_sound_pcm) - current_pos if remaining >= samples: outdata[:] = self._starting_sound_pcm[ current_pos : current_pos+samples ] else: # Copy what's left of the starting sound, and fill # the rest with silence outdata[:remaining] = self._starting_sound_pcm[ current_pos : len(self._starting_sound_pcm) ] outdata[remaining:samples] = [[0.0, 0.0]] * (samples-remaining) # Adjust the starting sound position current_pos += samples if current_pos >= len(self._starting_sound_pcm): print("Finished playing starting sound; moving to next step") step_no = 2#+= 1 FIXME current_pos = 0 # Step No 1 # ========= elif step_no == 1: if status: print(status, "in step #1; ignoring") # Play silence outdata[:] = [[0.0, 0.0]] * samples current_pos += samples # Warm-up the encoder. See "Encoder Guidelines" # at https://tools.ietf.org/html/rfc7845#page-27 frame_buffer.put(indata) if frame_buffer.size() >= samples_per_frame: # Pass the complete frame to another thread for processing q.put(frame_buffer.get(samples_per_frame)) if current_pos >= warmup_samples: print("Finished warming up the encoder; moving to next step") step_no += 1 warmup_samples = current_pos current_pos = 0 # Step No 2 # ========= elif step_no == 2: if status: print(status, "in step #2; aborting") print("count: ",count) print("ABORTING") raise sd.CallbackAbort # Play backing track and record voice # Copy the backing track to the output remaining = len(self._backing_track_pcm) - current_pos if remaining >= samples: outdata[:] = self._backing_track_pcm[ current_pos : current_pos+samples ] else: # Copy what's left of the backing track, and fill # the rest with silence outdata[:remaining] = self._backing_track_pcm[ current_pos : len(self._backing_track_pcm) ] outdata[remaining:samples] = [[0.0, 0.0]] * (samples-remaining) # Adjust the position current_pos += samples # DEBUG send outdata to q2 q2.put_nowait(outdata.copy()) # Record the microphone frame_buffer.put(indata) # DEBUG send the microphone data straight to the queue #q.put_nowait(indata.copy()) while frame_buffer.size() >= samples_per_frame: # Pass complete frames to another thread for processing frame = frame_buffer.get(samples_per_frame) q.put_nowait(frame) if current_pos >= len(self._backing_track_pcm): print("Finished playing backing track; moving to next step") step_no += 1 current_pos = 0 end_time = t.time() duration = end_time - start_time if duration > 2/1000: print(" thread call duration at end of step 2(ms):", round(duration*1000, 2)) # Step No 3 # ========= elif step_no == 3: if status: print(status, "in step #3; ignoring") # Play one frame's worth of silence, just to # ensure we can keep the frame size constant. outdata[:] = [[0.0, 0.0]] * samples current_pos += samples # Record the microphone frame_buffer.put(indata) if frame_buffer.size() >= samples_per_frame: # Pass the complete frame to another thread for processing q.put(frame_buffer.get(samples_per_frame)) if current_pos >= warmup_samples: print("Finished playing a frame's worth of silence; moving to next step") step_no += 1 current_pos = 0 else: print("Stopping") raise sd.CallbackStop if stream.cpu_load > 0.2: print("CPU Load above 20% during playback") end_time = t.time() duration = end_time - start_time if duration > 2/1000: print(" thread call duration (ms):", round(duration*1000, 2)) # Step number indicates where we are in the recording process # 0: play starting sound # 1: silence used for warming up the encoder # 2: backing track and recording # 3: silence + recording to ensure we finish on a clean step_no = 0 # Step 0: Set the current position for the sound being played current_pos = 0 # Step 1: Encoder warmup # Obtain the algorithmic delay of the Opus encoder delay = opus.opus_int32() result = opus.opus_encoder_ctl( self._encoder, opus.OPUS_GET_LOOKAHEAD_REQUEST, ctypes.pointer(delay) ) if result != opus.OPUS_OK: raise Exception("Failed in OPUS_GET_LOOKAHEAD_REQUEST") delay_samples = delay.value # The encoder guidelines recommend that at least an extra 120 # samples is added to delay_samples. See # https://tools.ietf.org/html/rfc7845#page-27 extra_samples = 120 warmup_samples = delay_samples + extra_samples # Create a buffer capable of holding two frames samples_per_frame = 960 channels = 2 frame_buffer = FrameBuffer( 48000, # one second FIXME channels ) # Create an event for communication between threads finished = threading.Event() # Create an input-output sounddevice stream print("Creating stream") stream = sd.Stream( samplerate=48000, #channels=channels, dtype=numpy.float32, latency=100/1000, #"high", callback=callback, finished_callback=finished.set ) with stream: finished.wait() # Wait until playback is finished # Store the final number of pre-skip warmup samples self._pre_skip = warmup_samples
def write_opus(self, output_filename): # Go through the frames and save them as an OggOpus file # Create a new stream state with a random serial number stream_state = opus_helpers.create_stream_state() # Create a packet (reused for each pass) ogg_packet = ogg.ogg_packet() # Flag to indicate the start of stream start_of_stream = 1 # Packet counter count_packets = 0 # PCM samples counter count_samples = 0 # Allocate memory for a page ogg_page = ogg.ogg_page() # Allocate storage space for the encoded frame. 4,000 bytes # is the recommended maximum buffer size for the encoded # frame. max_bytes_in_encoded_frame = opus.opus_int32(4000) EncodedFrameType = ctypes.c_ubyte * max_bytes_in_encoded_frame.value encoded_frame = EncodedFrameType() # Create a pointer to the first byte of the buffer for the # encoded frame. encoded_frame_ptr = ctypes.cast( ctypes.pointer(encoded_frame), ctypes.POINTER(ctypes.c_ubyte) ) # Open file for writing f = open(output_filename, "wb") # Headers # ======= # Specify the identification header id_header = opus.make_identification_header( pre_skip = self._pre_skip ) # Specify the packet containing the identification header ogg_packet.packet = ctypes.cast(id_header, ogg.c_uchar_p) ogg_packet.bytes = len(id_header) ogg_packet.b_o_s = start_of_stream ogg_packet.e_o_s = 0 ogg_packet.granulepos = 0 ogg_packet.packetno = count_packets start_of_stream = 0 count_packets += 1 # Write the header result = ogg.ogg_stream_packetin( stream_state, ogg_packet ) if result != 0: raise Exception("Failed to write Opus identification header") # Specify the comment header comment_header = opus.make_comment_header() # Specify the packet containing the identification header ogg_packet.packet = ctypes.cast(comment_header, ogg.c_uchar_p) ogg_packet.bytes = len(comment_header) ogg_packet.b_o_s = start_of_stream ogg_packet.e_o_s = 0 ogg_packet.granulepos = 0 ogg_packet.packetno = count_packets count_packets += 1 # Write the header result = ogg.ogg_stream_packetin( stream_state, ogg_packet ) if result != 0: raise Exception("Failed to write Opus comment header") # Write out pages to file while ogg.ogg_stream_flush(ctypes.pointer(stream_state), ctypes.pointer(ogg_page)) != 0: # Write page print("Writing header page") f.write(bytes(ogg_page.header[0:ogg_page.header_len])) f.write(bytes(ogg_page.body[0:ogg_page.body_len])) # Frames # ====== # Loop through the PCM frames in the queue while not q.empty(): # Get the frame from the queue frame_pcm = q.get_nowait() # Convert to opus_int16 frame_pcm = numpy.array(frame_pcm * 2**15, dtype=opus.opus_int16) # Create a pointer to the start of the frame's data source_ptr = frame_pcm.ctypes.data_as(ctypes.c_void_p) #print("Processing frame at sourcePtr ", sourcePtr.value) # Check if we have enough source data remaining to process at # the current frame size samples_per_frame = 960 assert len(frame_pcm) == samples_per_frame # Encode the audio #print("Encoding audio") num_bytes = opus.opus_encode( self._encoder, ctypes.cast(source_ptr, ctypes.POINTER(opus.opus_int16)), samples_per_frame, encoded_frame_ptr, max_bytes_in_encoded_frame ) #print("num_bytes: ", num_bytes) # Check for any errors during encoding if num_bytes < 0: raise Exception("Encoder error detected: "+ opus.opus_strerror(num_bytes).decode("utf")) # Writing OggOpus # =============== # Increase the number of samples count_samples += samples_per_frame # Place data into the packet ogg_packet.packet = encoded_frame_ptr ogg_packet.bytes = num_bytes ogg_packet.b_o_s = start_of_stream ogg_packet.e_o_s = 0 # FIXME: It needs to end! ogg_packet.granulepos = count_samples ogg_packet.packetno = count_packets # No longer the start of stream start_of_stream = 0 # Increase the number of packets count_packets += 1 # Place the packet in to the stream result = ogg.ogg_stream_packetin( stream_state, ogg_packet ) # Check for errors if result != 0: raise Exception("Error while placing packet in Ogg stream") # Write out pages to file while ogg.ogg_stream_pageout(ctypes.pointer(stream_state), ctypes.pointer(ogg_page)) != 0: # Write page print("Writing page") f.write(bytes(ogg_page.header[0:ogg_page.header_len])) f.write(bytes(ogg_page.body[0:ogg_page.body_len])) # Force the writing of the final page while ogg.ogg_stream_flush(ctypes.pointer(stream_state), ctypes.pointer(ogg_page)) != 0: # Write page print("Writing final page") f.write(bytes(ogg_page.header[0:ogg_page.header_len])) f.write(bytes(ogg_page.body[0:ogg_page.body_len])) # Make sure the queue is empty if not q.empty(): print("WARNING: Failed to completely process all the recorded frames") # Finished f.close() print("Finished writing file")
# Specify the desired frame size. This will be used for the vast # majority of the encoding, except possibly at the end of the # buffer (as there may not be sufficient data left to fill a # frame.) frame_size_index = 5 frame_size = frame_sizes[frame_size_index] # Function to calculate the size of a frame in bytes def frame_size_bytes(frame_size): return frame_size * source_channels * bytes_per_sample # Allocate storage space for the encoded frame. 4,000 bytes is # the recommended maximum buffer size for the encoded frame. max_encoded_frame_bytes = opus.opus_int32(4000) EncodedFrameType = ctypes.c_ubyte * max_encoded_frame_bytes.value encoded_frame = EncodedFrameType() # Create a pointer to the first byte of the buffer for the encoded # frame. encoded_frame_ptr = ctypes.cast(ctypes.pointer(encoded_frame), ctypes.POINTER(ctypes.c_ubyte)) # Number of bytes to process in buffer length_bytes = (np_buf_source.shape[0] * np_buf_source.shape[1] * bytes_per_sample)
def encodeThenDecode(npBufSource, npBufTarget, freq): # Encoding # ======== # Extract the number of channels in the source sourceChannels = npBufSource.shape[1] # Create an encoder encoder = createEncoder(npBufSource, freq) # Frame sizes are measured in number of samples. There are only a # specified number of possible valid frame durations for Opus, # which (assuming a frequency of 48kHz) gives the following valid # sizes. frameSizes = [120, 240, 480, 960, 1920, 2880] # Specify the desired frame size. This will be used for the vast # majority of the encoding, except possibly at the end of the # buffer (as there may not be sufficient data left to fill a # frame.) frameSizeIndex = 5 frameSize = frameSizes[frameSizeIndex] # Function to calculate the size of a frame in bytes def frameSizeBytes(frameSize): global bytesPerSample return frameSize * sourceChannels * bytesPerSample # Allocate storage space for the encoded frame. 4,000 bytes is # the recommended maximum buffer size for the encoded frame. maxEncodedFrameBytes = opus.opus_int32(4000) encodedFrameType = ctypes.c_ubyte * maxEncodedFrameBytes.value encodedFrame = encodedFrameType() # Create a pointer to the first byte of the buffer for the encoded # frame. encodedFramePtr = ctypes.cast(ctypes.pointer(encodedFrame), ctypes.POINTER(ctypes.c_ubyte)) # Number of bytes to process in buffer bytesPerSample = 2 lengthBytes = buf.shape[0] * buf.shape[1] * bytesPerSample # Saving # ====== # Create a new stream state with a random serial number stream_state = createStreamState() # Create a packet (reused for each pass) ogg_packet = ogg.ogg_packet() # Flag to indicate the start of stream start_of_stream = 1 # Packet counter count_packets = 0 # PCM samples counter count_samples = 0 # Allocate memory for a page ogg_page = ogg.ogg_page() # Open file for writing output_filename = "test.opus" f = open(output_filename, "wb") # Specify the identification header id_header = opus.make_identification_header(pre_skip=312) # Specify the packet containing the identification header ogg_packet.packet = ctypes.cast(id_header, ogg.c_uchar_p) ogg_packet.bytes = len(id_header) ogg_packet.b_o_s = start_of_stream ogg_packet.e_o_s = 0 ogg_packet.granulepos = 0 ogg_packet.packetno = count_packets start_of_stream = 0 count_packets += 1 # Write the header result = ogg.ogg_stream_packetin(stream_state, ogg_packet) if result != 0: raise Exception("Failed to write Opus identification header") # Specify the comment header comment_header = opus.make_comment_header() # Specify the packet containing the identification header ogg_packet.packet = ctypes.cast(comment_header, ogg.c_uchar_p) ogg_packet.bytes = len(comment_header) ogg_packet.b_o_s = start_of_stream ogg_packet.e_o_s = 0 ogg_packet.granulepos = 0 ogg_packet.packetno = count_packets count_packets += 1 # Write the header result = ogg.ogg_stream_packetin(stream_state, ogg_packet) if result != 0: raise Exception("Failed to write Opus comment header") # Write out pages to file while ogg.ogg_stream_flush(ctypes.pointer(stream_state), ctypes.pointer(ogg_page)) != 0: # Write page print("Writing page") f.write(bytes(ogg_page.header[0:ogg_page.header_len])) f.write(bytes(ogg_page.body[0:ogg_page.body_len])) # Decoding # ======== # Extract the number of channels for the target targetChannels = npBufTarget.shape[1] # Create a decoder decoderFreq = 48000 # TODO: Test changes to this decoderPtr = createDecoder(decoderFreq, targetChannels) # Encode and re-decode the audio # ============================== # Pointer to a location in the source buffer. We will increment # this as we progress through the encoding of the buffer. It # starts pointing to the first byte. sourcePtr = npBufSource.ctypes.data_as(ctypes.c_void_p) sourcePtr_init = sourcePtr # The number of bytes processed will be the difference between the # pointer's current location and the address of the first byte. bytesProcessed = sourcePtr.value - sourcePtr_init.value # Pointer to a location in the target buffer. We will increment # this as we progress through re-decoding each encoded frame. targetPtr = npBufTarget.ctypes.data_as(ctypes.c_void_p) # Loop through the source buffer while bytesProcessed < lengthBytes: print("Processing frame at sourcePtr ", sourcePtr.value) # Check if we have enough source data remaining to process at # the current frame size print("lengthBytes: ", lengthBytes) print("bytesProcessed: ", bytesProcessed) print("bytes remaining (lengthBytes - bytesProcessed):", lengthBytes - bytesProcessed) print("frameSizeBytes(frameSize):", frameSizeBytes(frameSize)) while lengthBytes - bytesProcessed < frameSizeBytes(frameSize): print("Warning! Not enough data for frame.") frameSizeIndex -= 1 if frameSizeIndex < 0: # The data is less than the smallest number of samples # in a frame. Either we ignore the remaining samples # and shorten the audio, or we pad the frame with # zeros and lengthen the audio. We'll take the easy # option and shorten the audio. break frameSize = frameSizes[frameSizeIndex] print("Decreased frame size to ", frameSize) if frameSizeIndex < 0: print("Warning! Ignoring samples at the end of the audio\n" + "as they do not fit into even the smallest frame.") break # Encode the audio print("Encoding audio") numBytes = opus.opus_encode( encoder, ctypes.cast(sourcePtr, ctypes.POINTER(opus.opus_int16)), frameSize, encodedFramePtr, maxEncodedFrameBytes) print("numBytes: ", numBytes) # Check for any errors during encoding if numBytes < 0: raise Exception("Encoder error detected: " + opus.opus_strerror(numBytes).decode("utf")) # Move to next position in the buffer: encoder oldAddress = sourcePtr.value #print("oldAddress:",oldAddress) deltaBytes = frameSize * sourceChannels * 2 newAddress = oldAddress + deltaBytes #print("newAddress:",newAddress) sourcePtr = ctypes.c_void_p(newAddress) bytesProcessed = sourcePtr.value - sourcePtr_init.value # Writing OggOpus # =============== # Increase the number of samples count_samples += frameSize # Place data into the packet ogg_packet.packet = encodedFramePtr ogg_packet.bytes = numBytes ogg_packet.b_o_s = start_of_stream ogg_packet.e_o_s = 0 # FIXME: It needs to end! ogg_packet.granulepos = count_samples ogg_packet.packetno = count_packets # No longer the start of stream start_of_stream = 0 # Increase the number of packets count_packets += 1 # Place the packet in to the stream result = ogg.ogg_stream_packetin(stream_state, ogg_packet) # Check for errors if result != 0: raise Exception("Error while placing packet in Ogg stream") # Write out pages to file while ogg.ogg_stream_pageout(ctypes.pointer(stream_state), ctypes.pointer(ogg_page)) != 0: # Write page print("Writing page") f.write(bytes(ogg_page.header[0:ogg_page.header_len])) f.write(bytes(ogg_page.body[0:ogg_page.body_len])) # Decode the audio if True: print("Decoding audio") numSamples = opus.opus_decode( decoderPtr, encodedFramePtr, numBytes, ctypes.cast(targetPtr, ctypes.POINTER(ctypes.c_short)), 5760, # Max space required in PCM 0 # What's this about? ) print("numSamples: ", numSamples) # Check for any errors during decoding if numSamples < 0: raise Exception("Decoder error detected: " + opus.opus_strerror(numSamples).decode("utf")) # Move to next position in the buffer: decoder targetPtr.value += numSamples * targetChannels * 2 # Write a packet saying we're at the end of the stream # Place data into the packet ogg_packet.packet = None ogg_packet.bytes = 0 ogg_packet.b_o_s = start_of_stream ogg_packet.e_o_s = 1 ogg_packet.granulepos = count_samples ogg_packet.packetno = count_packets # Increase the number of packets count_packets += 1 # Place the packet in to the stream #result = ogg.ogg_stream_packetin( # stream_state, # ogg_packet #) # Check for errors if result != 0: raise Exception("Error while placing packet in Ogg stream") # Write out pages to file while ogg.ogg_stream_pageout(ctypes.pointer(stream_state), ctypes.pointer(ogg_page)) != 0: # Write page print("Writing page") f.write(bytes(ogg_page.header[0:ogg_page.header_len])) f.write(bytes(ogg_page.body[0:ogg_page.body_len])) # Close file f.close()
def encodeThenDecode(npBufSource, npBufTarget, freq): # Encoding # ======== # Extract the number of channels in the source sourceChannels = npBufSource.shape[1] # Create an encoder encoder = createEncoder(npBufSource, freq) # Frame sizes are measured in number of samples. There are only a # specified number of possible valid frame durations for Opus, # which (assuming a frequency of 48kHz) gives the following valid # sizes. frameSizes = [120, 240, 480, 960, 1920, 2880] # Specify the desired frame size. This will be used for the vast # majority of the encoding, except possibly at the end of the # buffer (as there may not be sufficient data left to fill a # frame.) frameSizeIndex = 5 frameSize = frameSizes[frameSizeIndex] # Function to calculate the size of a frame in bytes def frameSizeBytes(frameSize): global bytesPerSample return frameSize * sourceChannels * bytesPerSample # Allocate storage space for the encoded frame. 4,000 bytes is # the recommended maximum buffer size for the encoded frame. maxEncodedFrameBytes = opus.opus_int32(4000) encodedFrameType = ctypes.c_ubyte * maxEncodedFrameBytes.value encodedFrame = encodedFrameType() # Create a pointer to the first byte of the buffer for the encoded # frame. encodedFramePtr = ctypes.cast(ctypes.pointer(encodedFrame), ctypes.POINTER(ctypes.c_ubyte)) # Number of bytes to process in buffer bytesPerSample = 2 lengthBytes = buf.shape[0] * buf.shape[1] * bytesPerSample # Decoding # ======== # Extract the number of channels for the target targetChannels = npBufTarget.shape[1] # Create a decoder decoderFreq = 48000 # TODO: Test changes to this decoderPtr = createDecoder(decoderFreq, targetChannels) # Encode and re-decode the audio # ============================== # Pointer to a location in the source buffer. We will increment # this as we progress through the encoding of the buffer. It # starts pointing to the first byte. sourcePtr = npBufSource.ctypes.data_as(ctypes.c_void_p) sourcePtr_init = sourcePtr # The number of bytes processed will be the difference between the # pointer's current location and the address of the first byte. bytesProcessed = sourcePtr.value - sourcePtr_init.value # Pointer to a location in the target buffer. We will increment # this as we progress through re-decoding each encoded frame. targetPtr = npBufTarget.ctypes.data_as(ctypes.c_void_p) # Loop through the source buffer count = 0 # FIXME: debugging only while bytesProcessed < lengthBytes: print("processing frame: ", count) count += 1 print("Processing frame at sourcePtr ", sourcePtr.value) # Check if we have enough source data remaining to process at # the current frame size print("lengthBytes: ", lengthBytes) print("bytesProcessed: ", bytesProcessed) print("bytes remaining (lengthBytes - bytesProcessed):", lengthBytes - bytesProcessed) print("frameSizeBytes(frameSize):", frameSizeBytes(frameSize)) while lengthBytes - bytesProcessed < frameSizeBytes(frameSize): print("Warning! Not enough data for frame.") frameSizeIndex -= 1 if frameSizeIndex < 0: # The data is less than the smallest number of samples # in a frame. Either we ignore the remaining samples # and shorten the audio, or we pad the frame with # zeros and lengthen the audio. We'll take the easy # option and shorten the audio. break frameSize = frameSizes[frameSizeIndex] print("Decreased frame size to ", frameSize) if frameSizeIndex < 0: print("Warning! Ignoring samples at the end of the audio\n" + "as they do not fit into even the smallest frame.") break # Encode the audio if True: # Print out the PCM data to see that it's readable #p = sourcePtr #d = 0 #while d < frameSize*2*2: # p2 = ctypes.c_void_p(ctypes.cast(p,ctypes.c_void_p).value + d) # print("p[",p2.value,"]:",ctypes.cast(p2, opus.opus_int16_p).contents.value) # d += 2 print("Encoding audio") numBytes = opus.opus_encode( encoder, ctypes.cast(sourcePtr, ctypes.POINTER(opus.opus_int16)), frameSize, encodedFramePtr, maxEncodedFrameBytes) print("numBytes: ", numBytes) # Check for any errors during encoding if numBytes < 0: raise Exception("Encoder error detected: " + opus.opus_strerror(numBytes).decode("utf")) # Move to next position in the buffer: encoder oldAddress = sourcePtr.value #print("oldAddress:",oldAddress) deltaBytes = frameSize * sourceChannels * 2 newAddress = oldAddress + deltaBytes #print("newAddress:",newAddress) sourcePtr = ctypes.c_void_p(newAddress) bytesProcessed = sourcePtr.value - sourcePtr_init.value # Decode the audio if True: print("Decoding audio") numSamples = opus.opus_decode( decoderPtr, encodedFramePtr, numBytes, ctypes.cast(targetPtr, ctypes.POINTER(ctypes.c_short)), 5760, # Max space required in PCM 0 # What's this about? ) print("numSamples: ", numSamples) # Check for any errors during decoding if numSamples < 0: raise Exception("Decoder error detected: " + opus.opus_strerror(numSamples).decode("utf")) # Move to next position in the buffer: decoder targetPtr.value += numSamples * targetChannels * 2