def test_find(self): test_raw_size, test_cooked_size = self.size_test_cases[-1] header = self.prefix + test_raw_size padding = "x" * 10 self.assertEqual((None, 0), id3_header.find_size("")) self.assertEqual((None, 1), id3_header.find_size("x")) self.assertEqual((None, 1), id3_header.find_size("xI")) self.assertEqual((None, 12), id3_header.find_size("xI" + padding)) self.assertEqual((test_cooked_size, 0), id3_header.find_size(header)) self.assertEqual((test_cooked_size, 0), id3_header.find_size(header + padding)) self.assertEqual((test_cooked_size, 3), id3_header.find_size("xIx" + header)) self.assertEqual((test_cooked_size, 3), id3_header.find_size("xIx" + header + padding))
def split_blocks(block_iter, expected_hdr=None): """Extract a sequence of MPEG audio frames from a stream of data blocks. Args: block_iter: An iterable object that yields a sequence of data blocks. expected_hdr: If given, only yield frames matching this MP3Header template Yields: A (hdr, data_buffer) pair. If 'hdr' is None, data buffer contains non-MPEG-audio junk that was found inside the stream. Otherwise 'hdr' is an MP3Header object and 'data_buffer' contains the MP3 frame. """ buffered = '' current_hdr = None at_end_of_stream = False to_be_skipped = 0 while True: # First we skip data if necessary. while to_be_skipped > 0: assert current_hdr is None # If we don't have anything in our buffer, pull in the # next block. if not buffered: try: buffered = block_iter.next() except StopIteration: sys.stderr.write( "Stream ended while skipping data " "between frames (probably ID3 headers).\n") at_end_of_stream = True break # If the buffer contains less than the amount of data to # be skipped, yield it all and update to_be_skipped. # Otherwise slice the amount to be skipped off of the # front of the buffer. if len(buffered) <= to_be_skipped: yield None, buffered to_be_skipped -= len(buffered) buffered = '' else: yield None, buffered[:to_be_skipped] buffered = buffered[to_be_skipped:] to_be_skipped = 0 # We try to have at least _READ_SIZE bytes of data buffered. if len(buffered) < _READ_SIZE: # To avoid excess string copies, we collect data in a list # until we have the desired amount, then concatenate it all # at the end. buffered_list = [buffered] buffered_size = len(buffered) while buffered_size < _READ_SIZE: try: next_block = block_iter.next() except StopIteration: at_end_of_stream = True break buffered_list.append(next_block) buffered_size += len(next_block) buffered = ''.join(buffered_list) # Are we at the end of the file? If so, break out of the # "while True:" loop if not buffered: break # Do we have an MP3 header? If so, yield the frame and then # slice it off of our buffer. if current_hdr: current_frame = buffered[:current_hdr.frame_size] # If we found a full-length frame, yield it. Otherwise # return the truncated frame as junk. (We can be sure not # to throw away a valid frame since we buffer at least the # next _READ_SIZE bytes, and _READ_SIZE is larger than any # possible MP3 frame. if len(current_frame) != current_hdr.frame_size: current_hdr = None yield current_hdr, current_frame current_hdr = None buffered = buffered[len(current_frame):] # Look for the next ID3 header. id3_size, id3_offset = id3_header.find_size(buffered) # Look for the next MP3 header. next_hdr, offset = mp3_header.find(buffered, expected_hdr=expected_hdr) # If we see an ID3 header before the next MP3 header, skip past the # ID3. We do this out of paranoia, since an ID3 header might contain # false synch. if id3_size is not None and id3_offset < offset: to_be_skipped = id3_offset + id3_size continue # We are starting on this header. current_hdr = next_hdr # If we cannot make any progress and are at the end of the # stream, just return what we have buffered as junk and then # break out of the loop if (current_hdr, offset) == (None, 0) and at_end_of_stream: if buffered: yield None, buffered break # Did we find junk before the next frame? If so, yield it. if offset > 0: yield None, buffered[:offset] buffered = buffered[offset:]
def split_blocks(block_iter, expected_hdr=None): """Extract a sequence of MPEG audio frames from a stream of data blocks. Args: block_iter: An iterable object that yields a sequence of data blocks. expected_hdr: If given, only yield frames matching this MP3Header template Yields: A (hdr, data_buffer) pair. If 'hdr' is None, data buffer contains non-MPEG-audio junk that was found inside the stream. Otherwise 'hdr' is an MP3Header object and 'data_buffer' contains the MP3 frame. """ buffered = '' current_hdr = None at_end_of_stream = False to_be_skipped = 0 while True: # First we skip data if necessary. while to_be_skipped > 0: assert current_hdr is None # If we don't have anything in our buffer, pull in the # next block. if not buffered: try: buffered = block_iter.next() except StopIteration: sys.stderr.write( "Stream ended while skipping data " "between frames (probably ID3 headers).\n") at_end_of_stream = True break # If the buffer contains less than the amount of data to # be skipped, yield it all and update to_be_skipped. # Otherwise slice the amount to be skipped off of the # front of the buffer. if len(buffered) <= to_be_skipped: yield None, buffered to_be_skipped -= len(buffered) buffered = '' else: yield None, buffered[:to_be_skipped] buffered = buffered[to_be_skipped:] to_be_skipped = 0 # We try to have at least _READ_SIZE bytes of data buffered. if len(buffered) < _READ_SIZE: # To avoid excess string copies, we collect data in a list # until we have the desired amount, then concatenate it all # at the end. buffered_list = [ buffered ] buffered_size = len(buffered) while buffered_size < _READ_SIZE: try: next_block = block_iter.next() except StopIteration: at_end_of_stream = True break buffered_list.append(next_block) buffered_size += len(next_block) buffered = ''.join(buffered_list) # Are we at the end of the file? If so, break out of the # "while True:" loop if not buffered: break # Do we have an MP3 header? If so, yield the frame and then # slice it off of our buffer. if current_hdr: current_frame = buffered[:current_hdr.frame_size] # If we found a full-length frame, yield it. Otherwise # return the truncated frame as junk. (We can be sure not # to throw away a valid frame since we buffer at least the # next _READ_SIZE bytes, and _READ_SIZE is larger than any # possible MP3 frame. if len(current_frame) != current_hdr.frame_size: current_hdr = None yield current_hdr, current_frame current_hdr = None buffered = buffered[len(current_frame):] # Look for the next ID3 header. id3_size, id3_offset = id3_header.find_size(buffered) # Look for the next MP3 header. next_hdr, offset = mp3_header.find(buffered, expected_hdr=expected_hdr) # If we see an ID3 header before the next MP3 header, skip past the # ID3. We do this out of paranoia, since an ID3 header might contain # false synch. if id3_size is not None and id3_offset < offset: to_be_skipped = id3_offset + id3_size continue # We are starting on this header. current_hdr = next_hdr # If we cannot make any progress and are at the end of the # stream, just return what we have buffered as junk and then # break out of the loop if (current_hdr, offset) == (None, 0) and at_end_of_stream: if buffered: yield None, buffered break # Did we find junk before the next frame? If so, yield it. if offset > 0: yield None, buffered[:offset] buffered = buffered[offset:]