def __init__(self, raw_data, port_timestamp=None, internal_timestamp=None, preferred_timestamp=DataParticleKey.PORT_TIMESTAMP, quality_flag=DataParticleValue.OK, new_sequence=None): super(PhsenParserDataParticle, self).__init__( raw_data, port_timestamp=None, internal_timestamp=None, preferred_timestamp=DataParticleKey.PORT_TIMESTAMP, quality_flag=DataParticleValue.OK, new_sequence=None) timestamp_match = TIMESTAMP_MATCHER.match(self.raw_data[:8]) if not timestamp_match: raise RecoverableSampleException("PhsenParserDataParticle: No regex match of " \ "timestamp [%s]" % self.raw_data[:8]) self._data_match = DATA_MATCHER.match(self.raw_data[8:]) if not self._data_match: raise RecoverableSampleException("PhsenParserDataParticle: No regex match of " \ "parsed sample data [%s]" % self.raw_data[8:]) # use the timestamp from the sio header as internal timestamp sec_since_1970 = int(self.raw_data[:8], 16) self.set_internal_timestamp(unix_time=sec_since_1970)
def __init__(self, raw_data, port_timestamp=None, internal_timestamp=None, preferred_timestamp=DataParticleKey.PORT_TIMESTAMP, quality_flag=DataParticleValue.OK, new_sequence=None): super(FlortdCommonParserDataParticle, self).__init__(raw_data, port_timestamp, internal_timestamp, preferred_timestamp, quality_flag, new_sequence) # the raw data has the timestamp from the sio header pre-pended to it, match the first 8 bytes timestamp_match = TIMESTAMP_MATCHER.match(self.raw_data[:8]) if not timestamp_match: raise RecoverableSampleException( "FlortdParserDataParticle: No regex match of timestamp [%s]" % self.raw_data[:8]) # now match the flort data, excluding the sio header timestamp in the first 8 bytes self._data_match = DATA_MATCHER.match(self.raw_data[8:]) if not self._data_match: raise RecoverableSampleException( "FlortdParserDataParticle: No regex match of \ parsed sample data [%s]", self.raw_data[8:]) # use the timestamp from the sio header as internal timestamp sec_since_1970 = int(self.raw_data[:8], 16) self.set_internal_timestamp(unix_time=sec_since_1970)
def __init__(self, raw_data, port_timestamp=None, internal_timestamp=None, preferred_timestamp=DataParticleKey.PORT_TIMESTAMP, quality_flag=DataParticleValue.OK, new_sequence=None): super(DostadParserDataParticle, self).__init__( raw_data, port_timestamp=None, internal_timestamp=None, preferred_timestamp=DataParticleKey.PORT_TIMESTAMP, quality_flag=DataParticleValue.OK, new_sequence=None) # the raw data has the timestamp from the sio header pre-pended to it, match the first 8 bytes timestamp_match = TIMESTAMP_MATCHER.match(self.raw_data[:8]) if not timestamp_match: raise RecoverableSampleException("DostaParserDataParticle: No regex match of " \ "timestamp [%s]" % self.raw_data[:8]) # now match the dosta data, excluding the sio header timestamp in the first 8 bytes self._data_match = DATA_MATCHER.match(self.raw_data[8:]) if not self._data_match: raise RecoverableSampleException("DostaParserDataParticle: No regex match of " \ "parsed sample data [%s]" % self.raw_data[8:]) posix_time = int(timestamp_match.group(0), 16) self.set_internal_timestamp(unix_time=float(posix_time))
def _process_data_match(self, data_match): """ This method processes a data match. It will extract a metadata particle and insert it into the record_buffer when we have not already extracted the metadata and all header values exist. This method will also extract a data particle and append it to the the record_buffer. @param data_match A regular expression match object for a cspp data record """ params = data_match.group(0).split('\t') frame_type = params[3] data_particle = None # Extract the data record particle if frame_type == 'SLB': # light frame data_particle = self._extract_sample(self._light_particle_class, None, data_match) elif frame_type == 'SDB': # dark frame data_particle = self._extract_sample(self._dark_particle_class, None, data_match) else: log.warn('got invalid frame type %s', frame_type) self._exception_callback(RecoverableSampleException('got invalid frame type %s' % frame_type)) # If we created a data particle, let's append the particle to the result particles # to return and increment the state data positioning if data_particle: if not self._metadata_extracted: # Once the first data particle is read, all available header lines will # have been read and inserted into the header state dictionary. # Only the source file is required to create a metadata particle. if self._header_state[DefaultHeaderKey.SOURCE_FILE] is not None: metadata_particle = self._extract_sample(self._metadata_particle_class, None, (copy.copy(self._header_state), data_match)) if metadata_particle: # We're going to insert the metadata particle so that it is # the first in the list and set the position to 0, as it cannot # have the same position as the non-metadata particle self._record_buffer.insert(0, metadata_particle) else: # metadata particle was not created successfully log.warn('Unable to create metadata particle') self._exception_callback(RecoverableSampleException( 'Unable to create metadata particle')) else: # no source file path, don't create metadata particle log.warn('No source file, not creating metadata particle') self._exception_callback(RecoverableSampleException( 'No source file, not creating metadata particle')) # need to set metadata extracted to true so we don't keep creating # the metadata, even if it failed self._metadata_extracted = True self._record_buffer.append(data_particle)
def _build_parsed_values(self): """ Encode the common and bands parameters from the raw data using the particle maps, and extract the non directional psd array """ particle_parameters = super(WavssADclNonDirectionalDataParticle, self)._build_parsed_values() band_len = len(self.band_parameter_types) if len(self.payload) < (band_len + 2): raise RecoverableSampleException( 'missing bands particle map header data') bands_header = self.payload[:band_len] psd_payload = self.payload[band_len:] num_bands = int(self.payload[0]) expected_payload_len = band_len + num_bands if len(self.payload) != expected_payload_len: raise RecoverableSampleException( 'unexpected number of non-directional parameters (got %d, expected %d)' % (len(self.payload), expected_payload_len)) # append the band description parameters for value, (name, ptype) in zip(bands_header, self.band_parameter_types): particle_parameters.append(self._encode_value(name, value, ptype)) # append the non-directional PSD array, from the end of the frequency spacing group to the last floating # point match particle_parameters.append( self._encode_value(ArrayParticleKeys.PSD_NON_DIRECTIONAL, psd_payload, list_encoder_factory(float))) return particle_parameters
def _build_parsed_values(self): """ Set the timestamp and encode the common particles from the raw data using COMMON_PARTICLE_MAP """ utc_time, self.dcl_data, checksum = self.extract_dcl_parts( self.raw_data) if utc_time: # DCL controller timestamp is the port_timestamp self.set_port_timestamp(unix_time=utc_time) if not self.dcl_data: raise RecoverableSampleException('Missing DCL data segment') if not checksum or checksum != self.compute_checksum(self.dcl_data): self.contents[DataParticleKey. QUALITY_FLAG] = DataParticleValue.CHECKSUM_FAILED csv = self.dcl_data.split(',') if len(csv) < 7: raise RecoverableSampleException( 'DCL format error: missing items from common wavss header') self.marker, self.date, self.time, self.serial_number, self.buoy_id, self.latitude, self.longitude = csv[: 7] # Instrument timestamp is the internal_timestamp instrument_timestamp = timestamp_yyyymmddhhmmss_to_ntp(self.date + self.time) self.set_internal_timestamp(instrument_timestamp) self.payload = csv[7:] return [self._encode_value('serial_number', self.serial_number, str)]
def _process_chunk_not_containing_data_record_or_header_part(self, chunk): """ This method processes a chunk that does not contain a data record or header. This case is not applicable to "non_data". For cspp file streams, we expect some lines in the file that we do not care about, and we will not consider them "non_data". @param chunk A regular expression match object for a cspp header row """ if HEX_ASCII_LINE_MATCHER.match(chunk): # we found a line starting with the timestamp, depth, and # suspect timestamp, followed by all hex ascii chars log.warn('got hex ascii corrupted data %s at position %s', chunk, self._read_state[StateKey.POSITION]) self._exception_callback( RecoverableSampleException( "Found hex ascii corrupted data: %s" % chunk)) # ignore the expected timestamp line and any lines matching the ignore regex, # otherwise data is unexpected elif not TIMESTAMP_LINE_MATCHER.match(chunk) and not \ (self._ignore_matcher is not None and self._ignore_matcher.match(chunk)): # Unexpected data was found log.warn('got unrecognized row %s at position %s', chunk, self._read_state[StateKey.POSITION]) self._exception_callback( RecoverableSampleException("Found an invalid chunk: %s" % chunk))
def parse_file(self): """ Entry point into parsing the file Loop through the file one ensemble at a time """ position = 0 # set position to beginning of file header_id_bytes = self._stream_handle.read(2) # read the first two bytes of the file while header_id_bytes: # will be None when EOF is found if header_id_bytes == ADCPS_PD0_HEADER_REGEX: # get the ensemble size from the next 2 bytes (excludes checksum bytes) num_bytes = struct.unpack("<H", self._stream_handle.read(2))[0] self._stream_handle.seek(position) # reset to beginning of ensemble input_buffer = self._stream_handle.read(num_bytes + 2) # read entire ensemble if len(input_buffer) == num_bytes + 2: # make sure there are enough bytes including checksum try: pd0 = AdcpPd0Record(input_buffer, glider=self._glider) velocity = self._particle_classes['velocity'](pd0) self._record_buffer.append(velocity) config = self._particle_classes['config'](pd0) engineering = self._particle_classes['engineering'](pd0) for particle in [config, engineering]: if self._changed(particle): self._record_buffer.append(particle) if hasattr(pd0, 'bottom_track'): bt = self._particle_classes['bottom_track'](pd0) bt_config = self._particle_classes['bottom_track_config'](pd0) self._record_buffer.append(bt) if self._changed(bt_config): self._record_buffer.append(bt_config) except PD0ParsingException: # seek to just past this header match # self._stream_handle.seek(position + 2) self._exception_callback(RecoverableSampleException("Exception parsing PD0")) else: # reached EOF log.warn("not enough bytes left for complete ensemble") self._exception_callback(UnexpectedDataException("Found incomplete ensemble at end of file")) else: # did not get header ID bytes log.warn('did not find header ID bytes') self._exception_callback(RecoverableSampleException( "Did not find Header ID bytes where expected, trying next 2 bytes")) position = self._stream_handle.tell() # set the new file position header_id_bytes = self._stream_handle.read(2) # read the next two bytes of the file
def _process_data_match(self, particle_class, data_match, result_particles): """ This method processes a data match. It will extract a metadata particle and insert it into result_particles when we have not already extracted the metadata and all header values exist. This method will also extract a data particle and append it to the result_particles. @param particle_class is the class of particle to be created @param data_match A regular expression match object for a cspp data record @param result_particles A list which should be updated to include any particles extracted """ # Extract the data record particle data_particle = self._extract_sample(particle_class, None, data_match, None) # If we created a data particle, let's append the particle to the result particles # to return and increment the state data positioning if data_particle: if not self._read_state[StateKey.METADATA_EXTRACTED]: # once the first data particle is read, all header lines should have # also been read # Source File is the only part of the header that is required if self._header_state[ DefaultHeaderKey.SOURCE_FILE] is not None: metadata_particle = self._extract_sample( self._metadata_particle_class, None, (copy.copy(self._header_state), data_match), None) if metadata_particle: # We're going to insert the metadata particle so that it is # the first in the list and set the position to 0, as it cannot # have the same position as the non-metadata particle result_particles.insert( 0, (metadata_particle, { StateKey.POSITION: 0, StateKey.METADATA_EXTRACTED: True })) else: # metadata particle was not created successfully log.warn('Unable to create metadata particle') self._exception_callback( RecoverableSampleException( 'Unable to create metadata particle')) else: # no source file path, don't create metadata particle log.warn('No source file, not creating metadata particle') self._exception_callback( RecoverableSampleException( 'No source file, not creating metadata particle')) # need to set metadata extracted to true so we don't keep creating # the metadata, even if it failed self._read_state[StateKey.METADATA_EXTRACTED] = True result_particles.append( (data_particle, copy.copy(self._read_state)))
def _build_parsed_values(self): """ Encode the common and bands parameters from the raw data using the particle maps, and extract the 3 mean directional arrays """ particle_parameters = super(WavssADclMeanDirectionalDataParticle, self)._build_parsed_values() band_len = len(self.band_parameter_types) if len(self.payload) < (band_len + 2): raise RecoverableSampleException( 'missing bands particle map header data') bands_header = self.payload[:band_len] num_bands = int(self.payload[0]) expected_payload_len = band_len + num_bands * 3 + 2 if len(self.payload) != expected_payload_len: raise RecoverableSampleException( 'unexpected number of mean-directional parameters (got %d, expected %d)' % (len(self.payload), expected_payload_len)) # append the band description parameters for value, (name, ptype) in zip(bands_header, self.band_parameter_types): particle_parameters.append(self._encode_value(name, value, ptype)) # append the mean directional specific parameters mean_header = self.payload[band_len:] for value, (name, ptype) in zip(mean_header, self.parameter_types): particle_parameters.append(self._encode_value(name, value, ptype)) # split up the array into 3 arrays each number of bands in length, taking each 3rd item, size of array # checked in wavss parser spectra_payload = self.payload[band_len + 2:] psd = spectra_payload[0:num_bands * 3:3] mean_dir = spectra_payload[1:num_bands * 3:3] dir_spread = spectra_payload[2:num_bands * 3:3] # to match with non-directional data, the mean directional arrays must be padded with NaNs so they are # the same size for i in xrange(num_bands, MEAN_DIR_NUMBER_BANDS): psd.append(np.nan) mean_dir.append(np.nan) dir_spread.append(np.nan) # append and encode the particle mean directional arrays particle_parameters.extend( (self._encode_value(ArrayParticleKeys.PSD_MEAN_DIRECTIONAL, psd, float_list_encoder), self._encode_value(ArrayParticleKeys.MEAN_DIRECTION_ARRAY, mean_dir, float_list_encoder), self._encode_value(ArrayParticleKeys.DIRECTIONAL_SPREAD_ARRAY, dir_spread, float_list_encoder))) return particle_parameters
def process_velocity_data(self): """ Handles the processing of velocity data particles and handles error processing if events which should have occurred prior to receiving a velocity record did not happen. """ # Get the timestamp of the velocity record in case we need it for the metadata particle. timestamp = VelptAbDclDataParticle.get_timestamp(self._current_record) # If this flag is still indicating TRUE, it means we found NO diagnostic records. # That's an error! if self._first_diagnostics_record: self._first_diagnostics_record = False log.warning('No diagnostic records present, just a header.' 'No particles generated') self._exception_callback( RecoverableSampleException( 'No diagnostic records present, just a header.' 'No particles generated')) # This flag indicates that diagnostics were being produced and now that # the first velocity record has been encountered, it's time to match the # number of diagnostics particles produced against the number of diagnostic # records expected from the diagnostics header. if self._sending_diagnostics: self._sending_diagnostics = False if self._total_diagnostic_records != self._diagnostics_count: if self._diagnostics_count < self._total_diagnostic_records: log.warning( 'Not enough diagnostics records, got %s, expected %s', self._diagnostics_count, self._total_diagnostic_records) self._exception_callback( RecoverableSampleException( 'Not enough diagnostics records')) elif self._diagnostics_count > self._total_diagnostic_records: log.warning( 'Too many diagnostics records, got %s, expected %s', self._diagnostics_count, self._total_diagnostic_records) self._exception_callback( RecoverableSampleException( 'Too many diagnostics records')) self._diagnostics_count = 0 self._total_diagnostic_records = 0 velocity_data_dict = VelptAbDclDataParticle.generate_data_dict( self._current_record) particle = self._extract_sample(self._velocity_data_class, None, velocity_data_dict, internal_timestamp=timestamp) self._record_buffer.append(particle)
def build_instrument_metadata_particle(self, timestamp): """ The instrument metadata particle is built from three separate records: the hardware configuration record, the head configuration record and the user configuration record. These should all be concentrated at the very beginning of the recovered data file. This assumption is made because the instrument is configured before being deployed so the records holding this data would be stored first. The data files seen as of the date this code was written all start with the three config records, then a quantity of velocity data records, then a group of diagnostics (header plus data records) followed by more velocity data records. This sequence can be repeated a number of times in one file, though the config data will only occur once. It is remotely possible that a group of diagnostics could occur before any velocity data records are encountered. Publishing of the instrument metadata particle is triggered by encountering either the first velocity data record or the first diagnostics data record. Counting to see if all three configuration records were encountered and then producing the instrument metadata particle was rejected as it is remotely possible that one of the configuration records could be missing. """ self._config_metadata_built = True date_time_group = VelptAbDataParticle.get_date_time_string(self._current_record) instrument_metadata_dict = VelptAbDataParticle.generate_instrument_metadata_dict\ (date_time_group, self._hardware_config_dict, self._head_config_dict, self._user_config_dict) particle = self._extract_sample(self._instrument_metadata_class, None, instrument_metadata_dict, timestamp) self._record_buffer.append(particle) # Check to see if all the configuration records were found if not self._hardware_config_dict_generated: # Log a warning for the missing hardware config record log.warning( 'Hardware configuration record invalid or not present in recovered data') self._exception_callback(RecoverableSampleException( 'Hardware configuration record invalid or not present in recovered data')) if not self._head_config_dict_generated: # Log a warning for the missing head config record log.warning('Head configuration record invalid or not present in recovered data') self._exception_callback(RecoverableSampleException( 'Head configuration record invalid or not present in recovered data')) if not self._user_config_dict_generated: # Log a warning for the missing user config record log.warning('User configuration record invalid or not present in recovered data') self._exception_callback(RecoverableSampleException( 'User configuration record invalid or not present in recovered data'))
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: # check for a match against the corrected Endurance pattern match = ENDURANCE_CORR_MATCHER.match(line) if match is not None: log.debug('record found') data_particle = self._extract_sample(self._particle_class, None, match, None) self._record_buffer.append(data_particle) else: # check to see if this is any other expected format test_uncorr = UNCORR_MATCHER.match(line) test_pioneer = PIONEER_MATCHER.match(line) test_meta = METADATA_MATCHER.match(line) if test_uncorr is None and test_meta is None and test_pioneer is None: # something in the data didn't match a required regex, so raise an exception and press on. message = "Error while decoding parameters in data: [%s]" % line self._exception_callback(RecoverableSampleException(message))
def _build_parsed_values(self): """ Take something in the data format and turn it into an array of dictionaries defining the data in the particle with the appropriate tag. @throws RecoverableSampleException If there is a problem with sample creation """ results = [] try: # Append the base metadata parsed values to the results to return results += self._build_metadata_parsed_values() data_match = self.raw_data[MetadataRawDataKey.DATA_MATCH] # Set the internal timestamp internal_timestamp_unix = numpy.float(data_match.group( DataMatchesGroupNumber.PROFILER_TIMESTAMP)) self.set_internal_timestamp(unix_time=internal_timestamp_unix) except (ValueError, TypeError, IndexError) as ex: log.warn("Exception when building parsed values") raise RecoverableSampleException("Error (%s) while decoding parameters in data: [%s]" % (ex, self.raw_data)) return results
def _build_parsed_values(self): """ Take something in the data format and turn it into an array of dictionaries defining the data in the particle with the appropriate tag. @throws RecoverableSampleException If there is a problem with sample creation """ results = [] try: # Process each of the instrument particle parameters for name, group, function in ENGINEERING_PARTICLE_ENCODING_RULES: results.append(self._encode_value(name, self.raw_data.group(group), function)) # # Set the internal timestamp internal_timestamp_unix = numpy.float(self.raw_data.group( DataMatchesGroupNumber.PROFILER_TIMESTAMP)) self.set_internal_timestamp(unix_time=internal_timestamp_unix) # We shouldn't end up with an exception due to the strongly specified regex, but we # will ensure we catch any potential errors just in case except (ValueError, TypeError, IndexError) as ex: log.warn("Exception when building parsed values") raise RecoverableSampleException("Error (%s) while decoding parameters in data: [%s]" % (ex, self.raw_data)) return results
def __init__(self, raw_data, port_timestamp=None, internal_timestamp=None, preferred_timestamp=DataParticleKey.PORT_TIMESTAMP, quality_flag=DataParticleValue.OK, new_sequence=None): super(AdcpsJlnSioDataParticle, self).__init__(raw_data, port_timestamp, internal_timestamp, preferred_timestamp, quality_flag, new_sequence) self._data_match = DATA_MATCHER.match(self.raw_data[8:]) if not self._data_match: raise RecoverableSampleException( "AdcpsJlnSioParserDataParticle: No regex match of " "parsed sample data [%s]" % self.raw_data[8:]) date_str = self.unpack_date(self._data_match.group(0)[11:19]) unix_time = utilities.zulu_timestamp_to_utc_time(date_str) self.set_internal_timestamp(unix_time=unix_time)
def log_warning(self, msg_text, which_line): """ :param msg_text: The text to display in the log :param which_line: The line number where the problem occurred """ self._exception_callback( RecoverableSampleException(msg_text + ' %d - No particle generated', which_line))
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: # first check for a match against the uncorrected pattern match = DATA_MATCHER.match(line) if match is not None: log.debug('record found') data_particle = self._extract_sample(self._particle_class, None, match, None) self._record_buffer.append(data_particle) else: test_status = STATUS_MATCHER.match(line) # just ignore the status messages if test_status is None: test_empty = EMPTY_MATCHER.match(line) # empty lines exist in all sample files, suppress warning due to empty line if test_empty is None: # something in the data didn't match a required regex, so raise an exception and press on. message = "Error while decoding parameters in data: [%s]" % line self._exception_callback( RecoverableSampleException(message))
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: match = BEGIN_MATCHER.match(line) if match is not None: count = match.group(DataMatchesGroupNumber.NUM_WAVELENGTHS) data_regex = self._build_data_regex(BEGIN_REGEX, count) fields = re.match(data_regex, line) if fields is not None: self._process_data_match(fields, self._record_buffer) else: # did not match the regex log.warn("line did not match regex %s", line) self._exception_callback( RecoverableSampleException( "Found an invalid line: %s" % line)) else: # Check for head part match header_part_match = HEADER_PART_MATCHER.match(line) if header_part_match is not None: self._process_header_part_match(header_part_match) else: self._process_line_not_containing_data_record_or_header_part( line)
def _build_parsed_values(self): """ Take something in the data format and turn it into an array of dictionaries defining the data in the particle with the appropriate tag. @throws RecoverableSampleException If there is a problem with sample creation """ try: # Generate a particle by calling encode_value for each entry # in the Instrument Particle Mapping table, # where each entry is a tuple containing the particle field name # and a function to use for data conversion. return [ self._encode_value(name, self.raw_data[name], function) for name, function in WINCH_CSPP_PARTICLE_ENCODING_RULES ] except (ValueError, TypeError, IndexError) as ex: log.warn("Exception when building parsed values") raise RecoverableSampleException( "Error (%s) while encoding parameters in data: [%s]" % (ex, self.raw_data))
def _process_chunk_not_containing_data_record_or_header_part(self, chunk): """ This method processes a chunk that does not contain a data record or header. This case is not applicable to "non_data". For cspp file streams, we expect some lines in the file that we do not care about, and we will not consider them "non_data". @param chunk A regular expression match object for a cspp header row """ # Check for the expected timestamp line we will ignore timestamp_line_match = TIMESTAMP_LINE_MATCHER.match(chunk) # Check for other status messages we can ignore ignore_match = IGNORE_MATCHER.match(chunk) if timestamp_line_match is not None or ignore_match is not None: # Ignore pass else: # OK. We got unexpected data log.warn('got unrecognized row %s at position %s', chunk, self._read_state[StateKey.POSITION]) self._exception_callback( RecoverableSampleException("Found an invalid chunk: %s" % chunk))
def _build_parsed_values(self): """ Take something in the data format and turn it into an array of dictionaries defining the data in the particle with the appropriate tag. @throws RecoverableSampleException If there is a problem with sample creation """ results = [] # split the entire matched line by tabs, which will return each parameters # value as an array of string params = self.raw_data.group(0).split('\t') if len(params) < NUM_FIELDS: log.warn('Not enough fields could be parsed from the data %s', self.raw_data.group(0)) raise RecoverableSampleException('Not enough fields could be parsed from the data %s' % self.raw_data.group(0)) for name, index, encode_function in self._parameter_map: if name == self._spectral_channels: # spectral channels is an array of ints, need to do the extra map results.append(self._encode_value(name, map(int, params[index:GRP_SPECTRAL_END]), encode_function)) else: results.append(self._encode_value(name, params[index], encode_function)) internal_timestamp_unix = float(params[GRP_PROFILER_TIMESTAMP]) self.set_internal_timestamp(unix_time=internal_timestamp_unix) return results
def parse_file(self): """ Parse Winch CSPP text file. """ # loop over all lines in the data file and parse the data to generate Winch CSPP particles for line in self._stream_handle: match = WINCH_DATA_MATCHER.match(line) if not match: # If it is not a valid Winch Cspp record, ignore it. error_message = 'Winch Cspp data regex does not match for line: %s' % line log.warn(error_message) self._exception_callback( RecoverableSampleException(error_message)) else: date = match.group(WinchCsppParserDataParticleKey.DATE) year, month, day = date.split('-') hour, minute, second = match.group( WinchCsppParserDataParticleKey.TIME).split(':') unix_time = calendar.timegm( (int(year), int(month), int(day), int(hour), int(minute), float(second))) time_stamp = ntplib.system_to_ntp_time(unix_time) # Generate a Winch CSPP particle using the group dictionary and add it to the internal buffer particle = self._extract_sample(WinchCsppDataParticle, None, match.groupdict(), time_stamp) if particle is not None: self._record_buffer.append(particle)
def _build_parsed_values(self): """ Take something in the data format and turn it into an array of dictionaries defining the data in the particle with the appropriate tag. @throws SampleException If there is a problem with sample creation """ results = [] try: # Process each of the instrument particle parameters for rule in INSTRUMENT_PARTICLE_ENCODING_RULES: results.append( self._encode_value( rule[PARTICLE_KEY_INDEX], self.raw_data.group( rule[DATA_MATCHES_GROUP_NUMBER_INDEX]), rule[TYPE_ENCODING_INDEX])) # # Set the internal timestamp internal_timestamp_unix = numpy.float( self.raw_data.group(DataMatchesGroupNumber.PROFILER_TIMESTAMP)) self.set_internal_timestamp(unix_time=internal_timestamp_unix) except (ValueError, TypeError, IndexError) as ex: log.warn("Exception when building parsed values") raise RecoverableSampleException( "Error (%s) while decoding parameters in data: %s" % (ex, self.raw_data)) log.debug('FlortDjCsppInstrumentDataParticle: particle=%s', results) return results
def _generate_metadata_particle(self): """ This function generates a metadata particle. """ if self._metadata_matches_dict[ MetadataMatchKey.FILE_TIME_MATCH] is None: message = "Unable to create metadata particle due to missing file time" log.warn(message) self._exception_callback(RecoverableSampleException(message)) else: particle_data = dict() for key in self._metadata_matches_dict.keys(): log.trace('key: %s, particle_data: %s', key, particle_data) if self._metadata_matches_dict[key]: self._process_metadata_match_dict(key, particle_data) utc_time = formatted_timestamp_utc_time( particle_data[PhsenAbcdefImodemDataParticleKey.FILE_TIME], "%Y%m%d %H%M%S") ntp_timestamp = ntplib.system_to_ntp_time(utc_time) # Generate the metadata particle class and add the # result to the list of particles to be returned. particle = self._extract_sample(self._metadata_particle_class, None, particle_data, internal_timestamp=ntp_timestamp) if particle is not None: log.trace("Appending metadata particle to record buffer: %s", particle.generate()) self._record_buffer.append(particle)
def _build_parsed_values(self): """ Take something in the data format and turn it into an array of dictionaries defining the data in the particle with the appropriate tag. @throws RecoverableSampleException If there is a problem with sample creation """ results = [] try: results.append( self._encode_value( CtdpfJCsppParserDataParticleKey.PROFILER_TIMESTAMP, self.raw_data.group( DataMatchesGroupNumber.PROFILER_TIMESTAMP), numpy.float)) results.append( self._encode_value( CtdpfJCsppParserDataParticleKey.SUSPECT_TIMESTAMP, self.raw_data.group( DataMatchesGroupNumber.SUSPECT_TIMESTAMP), encode_y_or_n)) results.append( self._encode_value( CtdpfJCsppParserDataParticleKey.TEMPERATURE, self.raw_data.group(DataMatchesGroupNumber.TEMPERATURE), float)) results.append( self._encode_value( CtdpfJCsppParserDataParticleKey.CONDUCTIVITY, self.raw_data.group(DataMatchesGroupNumber.CONDUCTIVITY), float)) results.append( self._encode_value( CtdpfJCsppParserDataParticleKey.PRESSURE, self.raw_data.group(DataMatchesGroupNumber.PRESSURE), float)) results.append( self._encode_value( CtdpfJCsppParserDataParticleKey.SALINITY, self.raw_data.group(DataMatchesGroupNumber.SALINITY), float)) # Set the internal timestamp internal_timestamp_unix = numpy.float( self.raw_data.group(DataMatchesGroupNumber.PROFILER_TIMESTAMP)) self.set_internal_timestamp(unix_time=internal_timestamp_unix) except (ValueError, TypeError, IndexError) as ex: log.warn("Exception when building parsed values") raise RecoverableSampleException( "Error (%s) while decoding parameters in data: [%s]" % (ex, self.raw_data)) return results
def _validate_checksum(self, input_buffer): num_bytes = struct.unpack("<H", input_buffer[2:4])[0] # get the number of bytes in the record, number of bytes is immediately # after the sentinel bytes and does not include the 2 checksum bytes record_start = 0 record_end = num_bytes #if there is enough in the buffer check the record if record_end <= len(input_buffer[0:-CHECKSUM_BYTES]): #make sure the checksum bytes are in the buffer too total = 0 for i in range(record_start, record_end): total += ord(input_buffer[i]) #add up all the bytes in the record checksum = total & CHECKSUM_MODULO # bitwise and with 65535 or mod vs 65536 #log.debug("checksum & total = %d %d ", checksum, total) if checksum == struct.unpack( "<H", input_buffer[record_end:record_end + CHECKSUM_BYTES])[0]: return True else: err_msg = 'ADCPT ACFGM DCL RECORD FAILED CHECKSUM' self._exception_callback(RecoverableSampleException(err_msg)) log.warn(err_msg) return False
def _build_parsed_values(self): """ Take something in the data format and turn it into a particle with the appropriate tag. @throws SampleException If there is a problem with sample creation """ match = HEADER_FOOTER_MATCHER.search(self.raw_data) if not match: raise RecoverableSampleException("AdcpsJlnStcMetadataParserDataParticle: No regex match of \ parsed sample data [%s]", self.raw_data) result = [self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_TIMESTAMP, match.group(1), str), self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_ID, match.group(2), int), self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_SERIAL_NUMBER, match.group(3), int), self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_VOLTS, match.group(4), float), self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_RECORDS, match.group(5), int), self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_LENGTH, match.group(6), int), self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_EVENTS, match.group(7), int), self._encode_value(AdcpsJlnStcMetadataParserDataParticleKey.ADCPS_JLN_SAMPLES_WRITTEN, match.group(8), int), ] return result
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: data_match = NEW_DATA_MATCHER.match(line) if data_match: # DCL controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp( data_match.groups()[DCL_TIMESTAMP]) # particle-ize the data block received, return the record data_particle = self._extract_sample( self._particle_class, None, data_match, port_timestamp=port_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) # increment state for this chunk even if we don't get a particle self._record_buffer.append(data_particle) else: # NOTE: Need to check for the metadata line last, since the corrected Endurance # record also has the [*] pattern test_meta = METADATA_MATCHER.match(line) if test_meta is None: # something in the data didn't match a required regex, so raise an exception and press on. message = "Error while decoding parameters in data: [%s]" % line self._exception_callback( RecoverableSampleException(message))
def recov_exception(self, error_message): """ Add a warning log message and use the exception callback to pass a recoverable exception @param error_message: The error message to use in the log and callback """ log.warn(error_message) self._exception_callback(RecoverableSampleException(error_message))