def check_format_info(info): """ Check that format info is correct. Args: info: Format info from fmt chunk. Raises: wavy.WaveFileIsCorrupted: If format info is incorrect. """ # check that block align matches bits per sample # BlockAlign == NumChannels * BitsPerSample/8 block_align = int(info.wBitsPerSample * info.nChannels / 8) if block_align != info.nBlockAlign: raise wavy.WaveFileIsCorrupted( f"Block align is incorrect for {info.wBitsPerSample} bits. " f"Expected: {block_align}, " f"Actual: {info.nBlockAlign}.") # check that avg bytes per sample matches expected value n_avg_bytes_per_sec = info.nSamplesPerSec * info.nBlockAlign if n_avg_bytes_per_sec != info.nAvgBytesPerSec: raise wavy.WaveFileIsCorrupted(f"Avg. bytes per sec. is incorrect. " f"Expected: {n_avg_bytes_per_sec}, " f"Actual: {info.nAvgBytesPerSec}.")
def get_data_from_chunk(chunk, format, handler): """ Read data from data chunk. Args: chunk: Data chunk. format: File format information. Returns: numpy.array: Data read from chunk. """ # get size of data size = chunk.getsize() # this gives us the number of frames if size % format.nBlockAlign != 0: # something is wrong here, size should be a multiple raise wavy.WaveFileIsCorrupted("Data size does not match frame size of" f" {format.wBitsPerSample} bits") # number of bytes for data type to be parsed n_bytes = format.wBitsPerSample // 8 # read data from raw data = handler.read_data(chunk, size, n_bytes, format.wFormatTag == WAVE_FORMAT_IEEE_FLOAT) # check if there is more than one channel, if so reshape return data if format.nChannels == 1 \ else data.reshape(-1, format.nChannels)
def get_chunk(stream, bigendian): """ Get chunk for wave file (always little endian) """ try: return chunk.Chunk(stream, bigendian=bigendian) except EOFError: raise wavy.WaveFileIsCorrupted('Reached end of file prematurely.')
def get_fmt_chunk(stream, handler): """ Reads the format chunk from the stream, checks that it matches the specifications for Wave files and returns format information. Args: stream: The stream to read. Returns: FormatInfo: Format info from fmt chunk. Raises: wavy.WaveFileIsCorrupted: If file is corrupted. wavy.WaveFileNotSupported: If file type is not PCM. """ # iterate through chunks until we find format while True: chunk = get_chunk(stream, False if handler.little_endian else True) name = chunk.getname() # found format if name == FMT: break # we got to the data chunks, something is wrong if name == DATA: raise wavy.WaveFileIsCorrupted( 'Found data chunk before fmt chunk.') chunk.skip() # chunk size is not supported if chunk.getsize() not in FMT_CHUNK_SIZES: raise wavy.WaveFileIsCorrupted( 'Format chunk is of unexpected size: {}.'.format(chunk.getsize())) # extract common chunk info into tuple info = FormatInfo(*handler.read('HHLLHH', chunk.read(16))) format_tag = info.wFormatTag # if format extensible is used, the format tag # will be specified in the sub format if format_tag == WAVE_FORMAT_EXTENSIBLE and \ chunk.getsize() == 40: # get format tag format_tag = get_sub_format(chunk, handler) # replace tag in FormatInfo info = FormatInfo(format_tag, *list(info)[1:]) # we only support PCM and FLOAT if format_tag in SUPPORTED_WAVE_FORMATS: # check that the sample width is supported for type if info.wBitsPerSample not in \ SUPPORTED_SAMPLE_WIDTH_FOR_FORMAT[format_tag]: raise wavy.WaveFileNotSupported( "Sample width '{}' is not supported for " "given type.".format(info.wBitsPerSample)) chunk.skip() return info raise wavy.WaveFileNotSupported( 'The wave format is not of supported type.')