Пример #1
0
def read_header(filehandle):
    # Read binary file a block at a time
    raw = filehandle.read(BLOCK_SIZE)

    # Read the configuration datagram, output at the beginning of the file
    length1, = unpack_from('<l', raw)
    byte_cnt = LENGTH_SIZE

    # Configuration datagram header
    byte_cnt += DATAGRAM_HEADER_SIZE

    # Configuration: header
    config_header = read_config_header(raw[byte_cnt:byte_cnt+CONFIG_HEADER_SIZE])
    byte_cnt += CONFIG_HEADER_SIZE
    byte_cnt += CONFIG_TRANSDUCER_SIZE * config_header['transducer_count']

    # Compare length1 (from beginning of datagram) to length2 (from the end of datagram) to
    # the actual number of bytes read. A mismatch can indicate an invalid, corrupt, misaligned,
    # or missing configuration datagram or a reverse byte order binary data file.
    # A bad/missing configuration datagram header is a significant error.
    length2, = unpack_from('<l', raw, byte_cnt)
    if not (length1 == length2 == byte_cnt-LENGTH_SIZE):
        raise InstrumentDataException(
            "Length of configuration datagram and number of bytes read do not match: length1: %s"
            ", length2: %s, byte_cnt: %s. Possible file corruption or format incompatibility." %
            (length1, length2, byte_cnt+LENGTH_SIZE))
    byte_cnt += LENGTH_SIZE
    filehandle.seek(byte_cnt)
    return config_header
Пример #2
0
def read_header(filehandle):
    # Read binary file a block at a time
    raw = filehandle.read(BLOCK_SIZE)

    # Read the configuration datagram, output at the beginning of the file
    length1, = unpack_from('<l', raw)
    byte_cnt = LENGTH_SIZE

    # Configuration datagram header
    byte_cnt += DATAGRAM_HEADER_SIZE

    # Configuration: header
    config_header = read_config_header(raw[byte_cnt:byte_cnt+CONFIG_HEADER_SIZE])
    byte_cnt += CONFIG_HEADER_SIZE
    byte_cnt += CONFIG_TRANSDUCER_SIZE * config_header['transducer_count']

    # Compare length1 (from beginning of datagram) to length2 (from the end of datagram) to
    # the actual number of bytes read. A mismatch can indicate an invalid, corrupt, misaligned,
    # or missing configuration datagram or a reverse byte order binary data file.
    # A bad/missing configuration datagram header is a significant error.
    length2, = unpack_from('<l', raw, byte_cnt)
    if not (length1 == length2 == byte_cnt-LENGTH_SIZE):
        raise InstrumentDataException(
            "Length of configuration datagram and number of bytes read do not match: length1: %s"
            ", length2: %s, byte_cnt: %s. Possible file corruption or format incompatibility." %
            (length1, length2, byte_cnt+LENGTH_SIZE))
    byte_cnt += LENGTH_SIZE
    filehandle.seek(byte_cnt)
    return config_header
Пример #3
0
def parse_echogram_file(input_file_path, output_file_path=None):
    """
    Parse the *.raw file.
    @param input_file_path absolute path/name to file to be parsed
    @param output_file_path optional path to directory to write output
    If omitted outputs are written to path of input file
    """

    try:
        input_file = open(input_file_path, 'rb')
    except IOError as e:
        log.error('Could not open Raw Echogram file: %r %r', input_file_path, e)
        raise

    # Extract the file time from the file name
    input_file_name = input_file.name
    file_path, filename = os.path.split(input_file_name)

    if output_file_path is None:
        output_file_path = file_path

    # tuple contains the string before the '.', the '.', and the 'raw' string
    outfile = filename.rpartition('.')[0]

    match = FILE_NAME_MATCHER.match(input_file_name)
    if match:
        file_time = match.group('Date') + match.group('Time')
    else:
        file_time = ""
        # Files retrieved from the instrument should always match the timestamp naming convention
        log.error("Unable to extract file time from input file name: %s."
                 "Expected format *-DYYYYmmdd-THHMMSS.raw" % input_file_name)
        raise InstrumentDataException("Unable to extract file time from input file name: %s."
                 "Expected format *-DYYYYmmdd-THHMMSS.raw" % input_file_name)

    # Read binary file a block at a time
    raw = input_file.read(BLOCK_SIZE)

    # Set starting byte
    byte_cnt = 0

    # Read the configuration datagram, output at the beginning of the file
    length1, = unpack_from('<l', raw)
    byte_cnt += LENGTH_SIZE

    # Configuration datagram header
    datagram_header = read_datagram_header(raw[byte_cnt:byte_cnt+DATAGRAM_HEADER_SIZE])
    byte_cnt += DATAGRAM_HEADER_SIZE

    # Configuration: header
    config_header = read_config_header(raw[byte_cnt:byte_cnt+CONFIG_HEADER_SIZE])
    byte_cnt += CONFIG_HEADER_SIZE

    transducer_count = config_header['transducer_count']

    byte_cnt += CONFIG_TRANSDUCER_SIZE * transducer_count

    # Compare length1 (from beginning of datagram) to length2 (from the end of datagram) to
    # the actual number of bytes read. A mismatch can indicate an invalid, corrupt, misaligned,
    # or missing configuration datagram or a reverse byte order binary data file.
    # A bad/missing configuration datagram header is a significant error.
    length2, = unpack_from('<l', raw, byte_cnt)
    if not (length1 == length2 == byte_cnt-LENGTH_SIZE):
        raise InstrumentDataException(
            "Length of configuration datagram and number of bytes read do not match: length1: %s"
            ", length2: %s, byte_cnt: %s. Possible file corruption or format incompatibility." %
            (length1, length2, byte_cnt+LENGTH_SIZE))

    first_ping_metadata = defaultdict(list)
    trans_keys = range(1, transducer_count+1)
    trans_array = dict((key, []) for key in trans_keys)         # transducer power data
    trans_array_time = dict((key, []) for key in trans_keys)    # transducer time data
    td_f = dict.fromkeys(trans_keys)                            # transducer frequency
    td_dr = dict.fromkeys(trans_keys)                           # transducer depth measurement

    position = 0

    while raw:
        # We only care for the Sample datagrams, skip over all the other datagrams
        match = SAMPLE_MATCHER.search(raw)

        if not match:
            # Read in the next block w/ a token sized overlap
            input_file.seek(input_file.tell() - 4)
            raw = input_file.read(BLOCK_SIZE)

            # The last 4 bytes is just the length2 of the last datagram
            if len(raw) <= 4:
                break

        # Offset by size of length value
        match_start = match.start() - LENGTH_SIZE

        # Seek to the position of the length data before the token to read into numpy array
        input_file.seek(position + match_start)

        # Read and unpack the Sample Datagram into numpy array
        sample_data = numpy.fromfile(input_file, dtype=sample_dtype, count=1)
        channel = sample_data['channel_number'][0]

        # Check for a valid channel number that is within the number of transducers config
        # to prevent incorrectly indexing into the dictionaries.
        # An out of bounds channel number can indicate invalid, corrupt,
        # or misaligned datagram or a reverse byte order binary data file.
        # Log warning and continue to try and process the rest of the file.
        if channel < 0 or channel > transducer_count:
            log.warn("Invalid channel: %s for transducer count: %s."
                     "Possible file corruption or format incompatibility.", channel, transducer_count)

            # Need current position in file to increment for next regex search offset
            position = input_file.tell()

            # Read the next block for regex search
            raw = input_file.read(BLOCK_SIZE)
            continue

        # Convert high and low bytes to internal time
        internal_time = (sample_data['high_date_time'][0] << 32) + sample_data['low_date_time'][0]
        # Note: Strictly sequential time tags are not guaranteed.
        trans_array_time[channel].append(internal_time)

        # Gather metadata once per transducer channel number
        if not trans_array[channel]:
            file_path = os.path.join(
                output_file_path, outfile + '_' + str(int(sample_data['frequency'])/1000) + 'k.png')

            first_ping_metadata[ZplscBParticleKey.FILE_TIME] = file_time
            first_ping_metadata[ZplscBParticleKey.ECHOGRAM_PATH].append(file_path)
            first_ping_metadata[ZplscBParticleKey.CHANNEL].append(channel)
            first_ping_metadata[ZplscBParticleKey.TRANSDUCER_DEPTH].append(sample_data['transducer_depth'][0])
            first_ping_metadata[ZplscBParticleKey.FREQUENCY].append(sample_data['frequency'][0])
            first_ping_metadata[ZplscBParticleKey.TRANSMIT_POWER].append(sample_data['transmit_power'][0])
            first_ping_metadata[ZplscBParticleKey.PULSE_LENGTH].append(sample_data['pulse_length'][0])
            first_ping_metadata[ZplscBParticleKey.BANDWIDTH].append(sample_data['bandwidth'][0])
            first_ping_metadata[ZplscBParticleKey.SAMPLE_INTERVAL].append(sample_data['sample_interval'][0])
            first_ping_metadata[ZplscBParticleKey.SOUND_VELOCITY].append(sample_data['sound_velocity'][0])
            first_ping_metadata[ZplscBParticleKey.ABSORPTION_COEF].append(sample_data['absorption_coefficient'][0])
            first_ping_metadata[ZplscBParticleKey.TEMPERATURE].append(sample_data['temperature'][0])

            # Make only one particle for the first ping series containing data for all channels
            if channel == config_header['transducer_count']:
                # Convert from Windows time to NTP time.
                time_stamp = internal_time / 10000.0 - windows_ntp_diff

                # Put the metadata and timestamp in a tuple to return to calling method for creation
                # of a particle

                particle_data = (first_ping_metadata, time_stamp)

            # Extract various calibration parameters used for generating echogram plot
            # This data doesn't change so extract it once per channel
            td_f[channel] = sample_data['frequency'][0]
            td_dr[channel] = sample_data['sound_velocity'][0] * sample_data['sample_interval'][0] / 2

        count = sample_data['count'][0]

        # Extract array of power data
        power_data = numpy.fromfile(input_file, dtype=power_dtype, count=count)

        # Decompress power data to dB
        trans_array[channel].append(power_data['power_data'] * 10. * numpy.log10(2) / 256.)

        # Read the athwartship and alongship angle measurements
        if sample_data['mode'][0] > 1:
            angle_data = numpy.fromfile(input_file, dtype=angle_dtype, count=count)

        # Read and compare length1 (from beginning of datagram) to length2
        # (from the end of datagram). A mismatch can indicate an invalid, corrupt,
        # or misaligned datagram or a reverse byte order binary data file.
        # Log warning and continue to try and process the rest of the file.
        len_dtype = numpy.dtype([('length2', '<i4')])     # 4 byte int (long)
        length2_data = numpy.fromfile(input_file, dtype=len_dtype, count=1)
        if not (sample_data['length1'][0] == length2_data['length2'][0]):
            log.warn("Mismatching beginning and end length values in sample datagram: length1"
                     ": %s, length2: %s. Possible file corruption or format incompatibility."
                     , sample_data['length1'][0], length2_data['length2'][0])

        # Need current position in file to increment for next regex search offset
        position = input_file.tell()

        # Read the next block for regex search
        raw = input_file.read(BLOCK_SIZE)

    # Driver spends most of the time plotting,
    # this can take longer for more transducers so lets break out the work
    processes = []

    for channel in td_f.iterkeys():
        try:
            process = Process(target=generate_echogram_plot,
                              args=(trans_array_time[channel], trans_array[channel],
                                    td_f[channel], td_dr[channel], channel,
                                    os.path.join(
                                        output_file_path,
                                        first_ping_metadata[ZplscBParticleKey.ECHOGRAM_PATH][channel - 1])))
            process.start()
            processes.append(process)

        except Exception, e:
            log.error("Error: Unable to start process: %s", e)
Пример #4
0
def parse_echogram_file(input_file_path, output_file_path=None):
    """
    Parse the *.raw file.
    @param input_file_path absolute path/name to file to be parsed
    @param output_file_path optional path to directory to write output
    If omitted outputs are written to path of input file
    """

    try:
        input_file = open(input_file_path, 'rb')
    except IOError as e:
        log.error('Could not open Raw Echogram file: %r %r', input_file_path,
                  e)
        raise

    # Extract the file time from the file name
    input_file_name = input_file.name
    file_path, filename = os.path.split(input_file_name)

    if output_file_path is None:
        output_file_path = file_path

    # tuple contains the string before the '.', the '.', and the 'raw' string
    outfile = filename.rpartition('.')[0]

    match = FILE_NAME_MATCHER.match(input_file_name)
    if match:
        file_time = match.group('Date') + match.group('Time')
    else:
        file_time = ""
        # Files retrieved from the instrument should always match the timestamp naming convention
        log.error("Unable to extract file time from input file name: %s."
                  "Expected format *-DYYYYmmdd-THHMMSS.raw" % input_file_name)
        raise InstrumentDataException(
            "Unable to extract file time from input file name: %s."
            "Expected format *-DYYYYmmdd-THHMMSS.raw" % input_file_name)

    # Read binary file a block at a time
    raw = input_file.read(BLOCK_SIZE)

    # Set starting byte
    byte_cnt = 0

    # Read the configuration datagram, output at the beginning of the file
    length1, = unpack_from('<l', raw)
    byte_cnt += LENGTH_SIZE

    # Configuration datagram header
    datagram_header = read_datagram_header(raw[byte_cnt:byte_cnt +
                                               DATAGRAM_HEADER_SIZE])
    byte_cnt += DATAGRAM_HEADER_SIZE

    # Configuration: header
    config_header = read_config_header(raw[byte_cnt:byte_cnt +
                                           CONFIG_HEADER_SIZE])
    byte_cnt += CONFIG_HEADER_SIZE

    transducer_count = config_header['transducer_count']

    byte_cnt += CONFIG_TRANSDUCER_SIZE * transducer_count

    # Compare length1 (from beginning of datagram) to length2 (from the end of datagram) to
    # the actual number of bytes read. A mismatch can indicate an invalid, corrupt, misaligned,
    # or missing configuration datagram or a reverse byte order binary data file.
    # A bad/missing configuration datagram header is a significant error.
    length2, = unpack_from('<l', raw, byte_cnt)
    if not (length1 == length2 == byte_cnt - LENGTH_SIZE):
        raise InstrumentDataException(
            "Length of configuration datagram and number of bytes read do not match: length1: %s"
            ", length2: %s, byte_cnt: %s. Possible file corruption or format incompatibility."
            % (length1, length2, byte_cnt + LENGTH_SIZE))

    first_ping_metadata = defaultdict(list)
    trans_keys = range(1, transducer_count + 1)
    trans_array = dict(
        (key, []) for key in trans_keys)  # transducer power data
    trans_array_time = dict(
        (key, []) for key in trans_keys)  # transducer time data
    td_f = dict.fromkeys(trans_keys)  # transducer frequency
    td_dr = dict.fromkeys(trans_keys)  # transducer depth measurement

    position = 0

    while raw:
        # We only care for the Sample datagrams, skip over all the other datagrams
        match = SAMPLE_MATCHER.search(raw)

        if not match:
            # Read in the next block w/ a token sized overlap
            input_file.seek(input_file.tell() - 4)
            raw = input_file.read(BLOCK_SIZE)

            # The last 4 bytes is just the length2 of the last datagram
            if len(raw) <= 4:
                break

        # Offset by size of length value
        match_start = match.start() - LENGTH_SIZE

        # Seek to the position of the length data before the token to read into numpy array
        input_file.seek(position + match_start)

        # Read and unpack the Sample Datagram into numpy array
        sample_data = numpy.fromfile(input_file, dtype=sample_dtype, count=1)
        channel = sample_data['channel_number'][0]

        # Check for a valid channel number that is within the number of transducers config
        # to prevent incorrectly indexing into the dictionaries.
        # An out of bounds channel number can indicate invalid, corrupt,
        # or misaligned datagram or a reverse byte order binary data file.
        # Log warning and continue to try and process the rest of the file.
        if channel < 0 or channel > transducer_count:
            log.warn(
                "Invalid channel: %s for transducer count: %s."
                "Possible file corruption or format incompatibility.", channel,
                transducer_count)

            # Need current position in file to increment for next regex search offset
            position = input_file.tell()

            # Read the next block for regex search
            raw = input_file.read(BLOCK_SIZE)
            continue

        # Convert high and low bytes to internal time
        internal_time = (sample_data['high_date_time'][0] <<
                         32) + sample_data['low_date_time'][0]
        # Note: Strictly sequential time tags are not guaranteed.
        trans_array_time[channel].append(internal_time)

        # Gather metadata once per transducer channel number
        if not trans_array[channel]:
            file_path = os.path.join(
                output_file_path, outfile + '_' +
                str(int(sample_data['frequency']) / 1000) + 'k.png')

            first_ping_metadata[ZplscBParticleKey.FILE_TIME] = file_time
            first_ping_metadata[ZplscBParticleKey.ECHOGRAM_PATH].append(
                file_path)
            first_ping_metadata[ZplscBParticleKey.CHANNEL].append(channel)
            first_ping_metadata[ZplscBParticleKey.TRANSDUCER_DEPTH].append(
                sample_data['transducer_depth'][0])
            first_ping_metadata[ZplscBParticleKey.FREQUENCY].append(
                sample_data['frequency'][0])
            first_ping_metadata[ZplscBParticleKey.TRANSMIT_POWER].append(
                sample_data['transmit_power'][0])
            first_ping_metadata[ZplscBParticleKey.PULSE_LENGTH].append(
                sample_data['pulse_length'][0])
            first_ping_metadata[ZplscBParticleKey.BANDWIDTH].append(
                sample_data['bandwidth'][0])
            first_ping_metadata[ZplscBParticleKey.SAMPLE_INTERVAL].append(
                sample_data['sample_interval'][0])
            first_ping_metadata[ZplscBParticleKey.SOUND_VELOCITY].append(
                sample_data['sound_velocity'][0])
            first_ping_metadata[ZplscBParticleKey.ABSORPTION_COEF].append(
                sample_data['absorption_coefficient'][0])
            first_ping_metadata[ZplscBParticleKey.TEMPERATURE].append(
                sample_data['temperature'][0])

            # Make only one particle for the first ping series containing data for all channels
            if channel == config_header['transducer_count']:
                # Convert from Windows time to NTP time.
                time_stamp = internal_time / 10000.0 - windows_ntp_diff

                # Put the metadata and timestamp in a tuple to return to calling method for creation
                # of a particle

                particle_data = (first_ping_metadata, time_stamp)

            # Extract various calibration parameters used for generating echogram plot
            # This data doesn't change so extract it once per channel
            td_f[channel] = sample_data['frequency'][0]
            td_dr[channel] = sample_data['sound_velocity'][0] * sample_data[
                'sample_interval'][0] / 2

        count = sample_data['count'][0]

        # Extract array of power data
        power_data = numpy.fromfile(input_file, dtype=power_dtype, count=count)

        # Decompress power data to dB
        trans_array[channel].append(power_data['power_data'] * 10. *
                                    numpy.log10(2) / 256.)

        # Read the athwartship and alongship angle measurements
        if sample_data['mode'][0] > 1:
            angle_data = numpy.fromfile(input_file,
                                        dtype=angle_dtype,
                                        count=count)

        # Read and compare length1 (from beginning of datagram) to length2
        # (from the end of datagram). A mismatch can indicate an invalid, corrupt,
        # or misaligned datagram or a reverse byte order binary data file.
        # Log warning and continue to try and process the rest of the file.
        len_dtype = numpy.dtype([('length2', '<i4')])  # 4 byte int (long)
        length2_data = numpy.fromfile(input_file, dtype=len_dtype, count=1)
        if not (sample_data['length1'][0] == length2_data['length2'][0]):
            log.warn(
                "Mismatching beginning and end length values in sample datagram: length1"
                ": %s, length2: %s. Possible file corruption or format incompatibility.",
                sample_data['length1'][0], length2_data['length2'][0])

        # Need current position in file to increment for next regex search offset
        position = input_file.tell()

        # Read the next block for regex search
        raw = input_file.read(BLOCK_SIZE)

    # Driver spends most of the time plotting,
    # this can take longer for more transducers so lets break out the work
    processes = []

    for channel in td_f.iterkeys():
        try:
            process = Process(
                target=generate_echogram_plot,
                args=(trans_array_time[channel], trans_array[channel],
                      td_f[channel], td_dr[channel], channel,
                      os.path.join(
                          output_file_path, first_ping_metadata[
                              ZplscBParticleKey.ECHOGRAM_PATH][channel - 1])))
            process.start()
            processes.append(process)

        except Exception, e:
            log.error("Error: Unable to start process: %s", e)