def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: data_match = NEW_DATA_MATCHER.match(line) if data_match: # DCL controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp( data_match.groups()[DCL_TIMESTAMP]) # particle-ize the data block received, return the record data_particle = self._extract_sample( self._particle_class, None, data_match, port_timestamp=port_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) # increment state for this chunk even if we don't get a particle self._record_buffer.append(data_particle) else: # NOTE: Need to check for the metadata line last, since the corrected Endurance # record also has the [*] pattern test_meta = METADATA_MATCHER.match(line) if test_meta is None: # something in the data didn't match a required regex, so raise an exception and press on. message = "Error while decoding parameters in data: [%s]" % line self._exception_callback( RecoverableSampleException(message))
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: data_match = NEW_DATA_MATCHER.match(line) if data_match: # DCL controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp(data_match.groups()[DCL_TIMESTAMP]) # particle-ize the data block received, return the record data_particle = self._extract_sample(self._particle_class, None, data_match, port_timestamp=port_timestamp, preferred_ts = DataParticleKey.PORT_TIMESTAMP) # increment state for this chunk even if we don't get a particle self._record_buffer.append(data_particle) else: # NOTE: Need to check for the metadata line last, since the corrected Endurance # record also has the [*] pattern test_meta = METADATA_MATCHER.match(line) if test_meta is None: # something in the data didn't match a required regex, so raise an exception and press on. message = "Error while decoding parameters in data: [%s]" % line self._exception_callback(RecoverableSampleException(message))
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: # check for a match against the sensor data pattern match = SENSOR_DATA_MATCHER.match(line) if match is not None: log.debug('record found') # DCL controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp( match.groups()[SENSOR_GROUP_TIMESTAMP]) data_particle = self._extract_sample( self._particle_class, None, match.groups(), port_timestamp=port_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(data_particle) else: # check to see if this is any other expected format test_meta = METADATA_MATCHER.match(line) if test_meta is None or line.find(TAB) != -1: # something in the data didn't match a required regex, so raise an exception and press on. message = "Error while decoding parameters in data: [%s]" % line self._exception_callback(UnexpectedDataException(message))
def parse_file(self): """ The main parsing function which loops over each line in the file and extracts particles if the correct format is found. """ # read the first line in the file line = self._stream_handle.readline() while line: # check for a data line or a dcl logger line we specifically ignore data_match = DATA_LINE_MATCHER.match(line) ignore_match = IGNORE_LINE_MATCHER.match(line) if data_match: # found a data line, extract this particle # DCL controller timestamp is the port_timestamp dcl_controller_timestamp = data_match.groups()[DCL_TIMESTAMP_GROUP] port_timestamp = dcl_time_to_ntp(dcl_controller_timestamp) particle = self._extract_sample(self.particle_class, None, data_match, port_timestamp=port_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(particle) elif not ignore_match: # we found a line with an unknown format, call an exception error_message = 'Found line with unknown format %s' % line log.warn(error_message) self._exception_callback(SampleException(error_message)) # read the next line line = self._stream_handle.readline()
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: # check for a match against the sensor data pattern match = SENSOR_DATA_MATCHER.match(line) if match is not None: log.debug('record found') # DCL controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp(match.groups()[SENSOR_GROUP_TIMESTAMP]) data_particle = self._extract_sample(self._particle_class, None, match.groups(), port_timestamp=port_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(data_particle) else: # check to see if this is any other expected format test_meta = METADATA_MATCHER.match(line) if test_meta is None or line.find(TAB) != -1: # something in the data didn't match a required regex, so raise an exception and press on. message = "Error while decoding parameters in data: [%s]" % line self._exception_callback(UnexpectedDataException(message))
def parse_file(self): """ Parse the zplsc_c log file (averaged condensed data). Read file line by line. Values are extracted from lines containing condensed ASCII data @return: dictionary of data values with the particle names as keys or None """ # Loop over all lines in the data file and parse the data to generate particles for number, line in enumerate(self._stream_handle, start=1): # Check if this is the dcl status log match = DCL_LOG_MATCHER.match(line) if match is not None: log.trace("MATCHED DCL_LOG_MATCHER: %s: %s", number, match.groups()) # No data to extract, move on to the next line continue # Check if this is the instrument phase status log match = PHASE_STATUS_MATCHER.match(line) if match is not None: log.trace("MATCHED PHASE_STATUS_MATCHER: %s: %s", number, match.groups()) # No data to extract, move on to the next line continue # Check if this is the instrument condensed ASCII data match = SENSOR_DATA_MATCHER.match(line) if match is not None: log.trace("MATCHED SENSOR_DATA_MATCHER: %s: %s", number, match.groups()) # Extract the condensed ASCII data from this line data_dict = self.parse_line(match) if data_dict is None: log.error('Erroneous data found in line %s: %s', number, line) continue dcl_timestamp = data_dict[ZplscCDataKey.DCL_TIMESTAMP] # dcl_timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp(dcl_timestamp) transmission_timestamp = data_dict[ZplscCParticleKey.TRANS_TIMESTAMP] # transmission_timestamp is the the internal_timestamp internal_timestamp = timestamp_yyyymmddhhmmss_to_ntp(transmission_timestamp) # Extract a particle and append it to the record buffer. particle = self._extract_sample(ZplscCInstrumentDataParticle, None, data_dict, internal_timestamp=internal_timestamp, port_timestamp=port_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) if particle is not None: log.trace('Parsed particle: %s' % particle.generate_dict()) self._record_buffer.append(particle) continue # Error, line did not match any expected regex self._exception_callback( RecoverableSampleException('Unknown data found in line %s:%s' % (number, line)))
def __init__(self, raw_data, instrument_particle_map, *args, **kwargs): super(DclInstrumentDataParticle, self).__init__(raw_data, *args, **kwargs) # DCL Controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp(self.raw_data[SENSOR_GROUP_TIMESTAMP]) self.set_port_timestamp(port_timestamp) self.instrument_particle_map = instrument_particle_map
def _generate_port_timestamp(record_dict): """ Generates the port_timestamp from the given DCL Controller Timestamp. :param record_dict: dictionary containing the dcl controller timestamp str parameter :return: the port_timestamp """ return float( dcl_time_to_ntp( record_dict[Pco2wAbcDataParticleKey.DCL_CONTROLLER_TIMESTAMP]))
def _extract_dcl_controller_ntp_timestamp(self, inst_match): """ This function will create a timestamp to be used as the port_timestamp for the instrument particle is generated. """ # calculate the instrument particle port_timestamp # from the DCL timestamp. return dcl_time_to_ntp( inst_match.group( InstrumentDataMatchGroups.INST_GROUP_DCL_TIMESTAMP))
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: # first check for a match against the uncorrected pattern match = UNCORR_MATCHER.match(line) if match is None: # check for a match against corrected Endurance pattern match = ENDURANCE_CORR_MATCHER.match(line) if match is None: # check for a match against Pioneer pattern match = PIONEER_MATCHER.match(line) if match is None: # check for a match against CTDBP_FLORT pattern match = CTDBP_FLORT_MATCHER.match(line) if match is not None: log.debug('record found') # DCL Controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp( match.group('dcl_controller_timestamp')) # Instrument timestamp is the internal_timestamp internal_timestamp = timestamp_ddmmyyyyhhmmss_to_ntp( match.group('date_time_string')) data_particle = self._extract_sample( self._particle_class, None, match, port_timestamp=port_timestamp, internal_timestamp=internal_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(data_particle) else: # NOTE: Need to check for the metadata line last, since the corrected Endurance # record also has the [*] pattern test_meta = METADATA_MATCHER.match(line) if test_meta is None: # something in the data didn't match a required regex, so raise an exception and press on. message = "Error while decoding parameters in data: [%s]" % line self._exception_callback( RecoverableSampleException(message))
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: # first check for a match against the uncorrected pattern match = UNCORR_MATCHER.match(line) if match is None: # check for a match against corrected Endurance pattern match = ENDURANCE_CORR_MATCHER.match(line) if match is None: # check for a match against Pioneer pattern match = PIONEER_MATCHER.match(line) if match is None: # check for a match against CTDBP_FLORT pattern match = CTDBP_FLORT_MATCHER.match(line) if match is not None: log.debug('record found') # DCL Controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp(match.group('dcl_controller_timestamp')) # Instrument timestamp is the internal_timestamp internal_timestamp = timestamp_ddmmyyyyhhmmss_to_ntp(match.group('date_time_string')) data_particle = self._extract_sample(self._particle_class, None, match, port_timestamp=port_timestamp, internal_timestamp=internal_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(data_particle) else: # NOTE: Need to check for the metadata line last, since the corrected Endurance # record also has the [*] pattern test_meta = METADATA_MATCHER.match(line) if test_meta is None: # something in the data didn't match a required regex, so raise an exception and press on. message = "Error while decoding parameters in data: [%s]" % line self._exception_callback(RecoverableSampleException(message))
def __init__(self, raw_data, port_timestamp=None, internal_timestamp=None, preferred_timestamp=DataParticleKey.PORT_TIMESTAMP, quality_flag=DataParticleValue.OK, new_sequence=None): super(PresfAbcDclParserTideDataParticle, self).__init__(raw_data, port_timestamp, internal_timestamp, preferred_timestamp, quality_flag, new_sequence) # DCL Controller timestamp is the port_timestamp dcl_controller_timestamp = dcl_time_to_ntp( self.raw_data.group(TIDE_GROUP_DCL_TIMESTAMP)) self.set_port_timestamp(dcl_controller_timestamp) # Instrument timestamp is the internal_timestamp instrument_timestamp = timestamp_ddmmyyyyhhmmss_to_ntp( self.raw_data.group(TIDE_GROUP_DATA_TIME_STRING)) self.set_internal_timestamp(instrument_timestamp)
def __init__(self, raw_data, port_timestamp=None, internal_timestamp=None, preferred_timestamp=DataParticleKey.PORT_TIMESTAMP, quality_flag=DataParticleValue.OK, new_sequence=None): super(PresfAbcDclParserTideDataParticle, self).__init__(raw_data, port_timestamp, internal_timestamp, preferred_timestamp, quality_flag, new_sequence) # DCL Controller timestamp is the port_timestamp dcl_controller_timestamp = dcl_time_to_ntp(self.raw_data.group(TIDE_GROUP_DCL_TIMESTAMP)) self.set_port_timestamp(dcl_controller_timestamp) # Instrument timestamp is the internal_timestamp instrument_timestamp = timestamp_ddmmyyyyhhmmss_to_ntp(self.raw_data.group(TIDE_GROUP_DATA_TIME_STRING)) self.set_internal_timestamp(instrument_timestamp)
def parse_file(self): """ The main parsing function which loops over each line in the file and extracts particles if the correct format is found. """ # read the first line in the file line = self._stream_handle.readline() while line: # check for a data line or a dcl logger line we specifically ignore data_match = DATA_LINE_MATCHER.match(line) ignore_match = IGNORE_LINE_MATCHER.match(line) if data_match: # found a data line, extract this particle # DCL controller timestamp is the port_timestamp dcl_controller_timestamp = data_match.groups( )[DCL_TIMESTAMP_GROUP] port_timestamp = dcl_time_to_ntp(dcl_controller_timestamp) particle = self._extract_sample( self.particle_class, None, data_match, port_timestamp=port_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(particle) elif not ignore_match: # we found a line with an unknown format, call an exception error_message = 'Found line with unknown format %s' % line log.warn(error_message) self._exception_callback(SampleException(error_message)) # read the next line line = self._stream_handle.readline()
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ # initialize data for modem particle first_timestamp = None date_timestamp = None distance = None dsp_bat = None xmit_bat = None # initialize raw_data for CsppEngDclEngDataParticle self._eng_data = [None] * 10 for line in self._stream_handle: data_match = RECORD_MATCHER.match(line) if data_match is None: message = 'got malformed line %s ' % line log.warn(message) self._exception_callback(RecoverableSampleException(message)) continue if data_match.group('mode') == 'sent': continue # skip sent messages, go to next line timestamp_str = data_match.group('timestamp') message = data_match.group('message') if first_timestamp is None: first_timestamp = timestamp_str # save the first timestamp for the modem particle # save off header information for modem particle # modem particle created after processing entire file. range_match = RANGE_MATCHER.match(message) if range_match: distance = range_match.group('range') continue # go to next line dsp_match = DSP_MATCHER.match(message) if dsp_match: dsp_bat = dsp_match.group('dsp_bat') continue # go to next line xmit_match = XMIT_MATCHER.match(message) if xmit_match: xmit_bat = xmit_match.group('dsp_bat') continue # go to next line # process NMEA sentences nmea_match = NMEA_MATCHER.match(message) if nmea_match: sentence = nmea_match.group('sentence') checksum = int(nmea_match.group('checksum'), 16) # Convert to integer # Note: NMEA checksums typically do not include the $ at the # beginning of the sentence but it appears Wetlabs implemented # it that way. comp_checksum = self.calc_checksum(sentence) if comp_checksum == checksum: fields = sentence.split(',') command = fields[5] count = fields[6] sentence_params = NMEA_SENTENCE_MAP.get(command) if sentence_params is None: # skip NMEA sentences we are not looking for log.debug('NMEA sentence skipped %s', line) continue # go to next line expected_count, particle_class = sentence_params if int(count) != expected_count: message = 'did not get expected number of fields on line %s' % line log.warn(message) self._exception_callback(RecoverableSampleException(message)) continue # go to next line if particle_class == CsppEngDclEngDataParticle: if command == 'DATE': date_timestamp = timestamp_str # save timestamp from the DATE record self.process_date(fields[7:]) elif command == 'PFS': self._eng_data[1:3] = fields[7:9] elif command == 'PST': self.process_start(fields[7:]) elif command == 'ENA': self._eng_data[5:7] = fields[7:9] elif command == 'WHE': self.process_wave(fields[7:]) else: # Create particle and add to buffer timestamp = dcl_time_to_ntp(timestamp_str) data_particle = self._extract_sample(particle_class, None, fields[7:], port_timestamp=timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(data_particle) else: message = 'checksum failed on line %s' % line log.warn(message) self._exception_callback(RecoverableSampleException(message)) # end for loop # only send modem particle if we have a timestamp # and at least one parameter if first_timestamp and (distance or dsp_bat or xmit_bat): timestamp = dcl_time_to_ntp(first_timestamp) data_particle = self._extract_sample(CsppEngDclModemParticle, None, [distance, dsp_bat, xmit_bat], port_timestamp=timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(data_particle) if any(self._eng_data): # Publish CsppEngDclEngDataParticle if we have any data if date_timestamp: # preference is DATE timestamp timestamp = dcl_time_to_ntp(date_timestamp) else: timestamp = dcl_time_to_ntp(first_timestamp) data_particle = self._extract_sample(CsppEngDclEngDataParticle, None, self._eng_data, port_timestamp=timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(data_particle)
def parse_file(self): """ Parse through the file, pulling single lines and comparing to the established patterns, generating particles for data lines """ for line in self._stream_handle: message = 'data line \n%s' % line log.debug(message) # First check for valid FLORT DJ DCL data # If this is a valid sensor data record, # use the extracted fields to generate a particle. sensor_match = SENSOR_DATA_MATCHER.match(line) if sensor_match is not None: self._particle_class._data_particle_map = INSTRUMENT_PARTICLE_MAP log.debug('FLORT DJ match found') else: log.debug('FLORT DJ match NOT found') # check for a match against the FLORT D data in a combined # CTDBP FLORT instrument record sensor_match = CTDBP_FLORT_MATCHER.match(line) if sensor_match is not None: self._particle_class._data_particle_map = CTDBP_FLORT_PARTICLE_MAP log.debug('check for CTDBP/FLORT match') if sensor_match is not None: # FLORT data matched against one of the patterns log.debug('record found') # DCL Controller timestamp is the port_timestamp dcl_controller_timestamp = sensor_match.groups()[SENSOR_GROUP_TIMESTAMP] port_timestamp = dcl_time_to_ntp(dcl_controller_timestamp) if self._particle_class._data_particle_map == INSTRUMENT_PARTICLE_MAP: # For valid FLORT DJ data, Instrument timestamp is the internal_timestamp instrument_timestamp = sensor_match.groups()[SENSOR_GROUP_SENSOR_DATE] \ + ' ' + sensor_match.groups()[SENSOR_GROUP_SENSOR_TIME] internal_timestamp = timestamp_mmddyyhhmmss_to_ntp(instrument_timestamp) else: # _data_particle_map is CTDBP_FLORT_PARTICLE_MAP utc_time = formatted_timestamp_utc_time(sensor_match.groups()[CTDBP_FLORT_GROUP_DATE_TIME], "%d %b %Y %H:%M:%S") instrument_timestamp = ntplib.system_to_ntp_time(utc_time) internal_timestamp = instrument_timestamp # using port_timestamp as preferred_ts because internal_timestamp is not accurate particle = self._extract_sample(self._particle_class, None, sensor_match.groups(), port_timestamp=port_timestamp, internal_timestamp=internal_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) # increment state for this chunk even if we don't # get a particle self._record_buffer.append(particle) # It's not a sensor data record, see if it's a metadata record. else: log.debug('No data recs found, check for meta record') # If it's a valid metadata record, ignore it. # Otherwise generate warning for unknown data. meta_match = METADATA_MATCHER.match(line) if meta_match is None: error_message = 'Unknown data found in chunk %s' % line log.warn(error_message) self._exception_callback(UnexpectedDataException(error_message))
def parse_file(self): """ Parse out any pending data chunks in the chunker. If it is valid data, build a particle. Go until the chunker has no more valid data. @retval a list of tuples with sample particles encountered in this parsing, plus the state. """ data = self._stream_handle.read() position = 0 # keep track of where we are in the file matches = SENSOR_DATA_MATCHER.finditer(data) for sensor_match in matches: start = sensor_match.start() # check to see if we skipped over any data if start != position: skipped_data = data[position:start] meta_match = METADATA_MATCHER.match(skipped_data) if meta_match.group(0) == skipped_data: pass # ignore all metadata records else: error_message = 'Unknown data found in line %s' % skipped_data log.warn(error_message) self._exception_callback(UnexpectedDataException(error_message)) position = sensor_match.end() # increment the position groups = sensor_match.groups() # See if the checksum is correct. # Checksum is the modulo 256 sum of all data bytes. # If calculated checksum is zero, the record checksum is valid. buffer_checksum = groups[SENSOR_GROUP_CHECKSUM_SECTION] checksum = reduce(lambda x, y: x + y, map(ord, buffer_checksum)) % 256 if checksum == 0: checksum_status = CHECKSUM_PASSED else: checksum_status = CHECKSUM_FAILED # Create a tuple containing all the data to be used when # creating the particle. # The order of the particle data matches the PARTICLE_GROUPS. particle_data = ( groups[SENSOR_GROUP_TIMESTAMP], groups[SENSOR_GROUP_YEAR], groups[SENSOR_GROUP_MONTH], groups[SENSOR_GROUP_DAY], groups[SENSOR_GROUP_HOUR], groups[SENSOR_GROUP_MINUTE], groups[SENSOR_GROUP_SECOND], groups[SENSOR_GROUP_ID], groups[SENSOR_GROUP_SERIAL], groups[SENSOR_GROUP_TIMER], struct.unpack('>h', groups[SENSOR_GROUP_DELAY])[0], list(struct.unpack('>7I', groups[SENSOR_GROUP_ADC_COUNTS])), struct.unpack('>H', groups[SENSOR_GROUP_SUPPLY_VOLTAGE])[0], struct.unpack('>H', groups[SENSOR_GROUP_ANALOG_VOLTAGE])[0], struct.unpack('>H', groups[SENSOR_GROUP_TEMPERATURE])[0], struct.unpack('>B', groups[SENSOR_GROUP_FRAME_COUNT])[0], checksum_status ) # DCL Controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp(groups[SENSOR_GROUP_TIMESTAMP]) particle = self._extract_sample(self.particle_class, None, particle_data, port_timestamp=port_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(particle)
def _process_instrument_data(self, working_record): """ Determines which particle to produce, calls extract_sample to create the given particle """ log.debug( "PhsenAbcdefDclParser._process_instrument_data(): aggregate working_record size %s is %s", len(working_record), working_record) # this size includes the leading * character instrument_record_length = 465 # this size includes the leading * character control_record_length_without_voltage_battery = 39 # this size includes the leading * character control_record_length_with_voltage_battery = 43 data_type = self._determine_data_type(working_record) # DCL controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp(self.latest_dcl_time) if data_type is not DataTypeEnum.UNKNOWN: # Create a tuple for the particle composed of the working record and latest DCL time # The tuple allows for DCL time to be available when EXTERNAL calls each particle's # build_parse_values method particle_data = (self.latest_dcl_time, working_record) if data_type is DataTypeEnum.INSTRUMENT: # Per the IDD, if the candidate data is not the proper size, throw a recoverable exception if len(working_record) == instrument_record_length: # Create particle mule (to be used later to create the instrument particle) particle = self._extract_sample( self._instrument_data_particle_class, None, particle_data, port_timestamp=port_timestamp) self._record_buffer.append(particle) else: self._exception_callback( RecoverableSampleException( "PhsenAbcdefDclParser._process_instrument_data(): " "Throwing RecoverableSampleException, Size of data " "record is not the length of an instrument data record" )) elif data_type is DataTypeEnum.CONTROL: # Per the IDD, if the candidate data is not the proper size, throw a recoverable exception if len(working_record) == control_record_length_without_voltage_battery or \ len(working_record) == control_record_length_with_voltage_battery: # Create particle mule (to be used later to create the metadata particle) particle = self._extract_sample( self._metadata_particle_class, None, particle_data, port_timestamp=port_timestamp) self._record_buffer.append(particle) else: log.warn( "PhsenAbcdefDclParser._process_instrument_data(): " "Size of data record is not the length of a control data record" ) self._exception_callback( RecoverableSampleException( "PhsenAbcdefDclParser._process_instrument_data(): " "Throwing RecoverableSampleException, Size of data " "record is not the length of a control data record" )) else: log.warn( "PhsenAbcdefDclParser._process_instrument_data(): " "Throwing RecoverableSampleException, Record is neither instrument or control" ) self._exception_callback( RecoverableSampleException( "PhsenAbcdefDclParser._process_instrument_data(): " "Data Type is neither Control or Instrument"))
def parse_file(self): """ Entry point into parsing the file, loop over each line and interpret it until the entire file is parsed """ stored_start_timestamp = None # read the first line in the file line = self._stream_handle.readline() while line: # data will be at start of line so use match data_match = DATA_START_MATCHER.match(line) # instrument started may be in middle so use search log_match = LOG_START_MATCHER.match(line) if data_match: # found a data line dcl_timestamp = data_match.group(1) # Note Bug #10002 found early deployments created data missing commas # between some fields. Replace commas with space and then split to # correctly parse files from deployments with either firmware fields_set = line[START_N_CHARS:].replace(',', ' ') fields = fields_set.split() if len(fields) != N_FIELDS: msg = 'Expected %d fields but received %d' % (N_FIELDS, len(fields)) log.warn(msg) self._exception_callback(SampleException(msg)) else: # create an array of the fields to parse in the particle raw_data = [stored_start_timestamp, dcl_timestamp] raw_data.extend(fields) # DCL controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp( raw_data[DCL_CONTROLLER_TIMESTAMP]) # datacollection time is the internal_timestamp unix_ts = float(raw_data[DATA_COLLECTION_TIME]) internal_timestamp = ntplib.system_to_ntp_time(unix_ts) # extract this particle particle = self._extract_sample( self.particle_class, None, raw_data, port_timestamp=port_timestamp, internal_timestamp=internal_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(particle) stored_start_timestamp = None elif log_match: # pull out whatever text is within the log log_contents = log_match.group(2) # there are two cases, a log message simply contains the 'Instrument Started' text, or it contains # an entire other log message which may contain 'Instrument Started' instr_log_match = INSTRUMENT_STARTED_MATCHER.match( log_contents) full_log_instr_match = INSTRUMENT_START_LOG_MATCHER.match( log_contents) # text other than instrument started is ignored within log messages if instr_log_match: # found a line containing a single log instrument started, hold on to it until we get a data line stored_start_timestamp = log_match.group(1) elif full_log_instr_match: # found a log within a log, use the inner timestamp associated with the instrument start stored_start_timestamp = full_log_instr_match.group(1) else: msg = 'Data with unexpected format received: %s' % line log.warn(msg) self._exception_callback(UnexpectedDataException(msg)) line = self._stream_handle.readline()
def parse_file(self): """ Parser for velpt_ab_dcl data. """ line_count = 0 # Read a single line from the input file fuelcell_input_row = self._file_handle.readline() # Read the file, one line at a time while fuelcell_input_row: line_count += 1 # Check to see if this record contains fuel cell data if not NON_DATA_MATCHER.search(fuelcell_input_row): # Is the record properly time stamped? found_date_time_group = DATE_MATCHER.search(fuelcell_input_row) # If so, continue processing if found_date_time_group: # Grab the time stamp data from the data date_time_group = found_date_time_group.group(1) # Now get the fuel cell data from the input line found_data = START_DATA_MATCHER.search(fuelcell_input_row) # If an integer was found, followed by a comma, the line has fuel cell data. if found_data: data_string = fuelcell_input_row[found_data.start(1) + 1:] # Need to find the colon near the end of the line which marks the # end of the actual fuel cell data. The colon marks the end of the # fuel cell data followed by the checksum for that data. Following # that there will be a space then a hexadecimal number. If any of those # elements are missing, the data is suspect. found_end = END_DATA_MATCHER.search(data_string) if found_end: # first find the last space in the data_string (start of the terminator) terminator_index = data_string.rfind(' ') the_data = data_string[:terminator_index] # Now replace any extraneous spaces in the data the_data = the_data.replace(' ', '') data_plus_checksum = the_data.split(':') actual_data = data_plus_checksum[0] read_checksum = int(data_plus_checksum[1]) if self.good_checksum(actual_data, read_checksum): the_fields = actual_data.split(',') if self.good_field(the_fields): # DCL controller timestamp is the port_timestamp dcl_controller_timestamp = date_time_group port_timestamp = dcl_time_to_ntp( dcl_controller_timestamp) raw_data = [date_time_group] raw_data.extend(the_fields) particle = self._extract_sample( self._fuelcell_data_class, None, raw_data, port_timestamp=port_timestamp, preferred_ts=DataParticleKey. PORT_TIMESTAMP) self._record_buffer.append(particle) else: self.log_warning('Improper format line', line_count) else: self.log_warning('Bad checksum line', line_count) else: self.log_warning('No terminator found on line', line_count) else: self.log_warning('No data found on line', line_count) else: self.log_warning('Bad/Missing Timestamp on line', line_count) else: # No FC Data is an expected occurance, do not raise exception log.debug('No fuel cell data on line %d', line_count) # Read another line from the input file fuelcell_input_row = self._file_handle.readline()
def parse_file(self): """ Open and read the file and parser the data within, and at the end of this method self._record_buffer will be filled with all the particles in the file. """ while True: # loop through file looking for beginning of an adcp data burst line = self._stream_handle.readline() # READ NEXT LINE if line == "": break # Check if this is a DCL Log message dcl_log_match = DCL_LOG_MATCHER.match(line) if dcl_log_match: # verified to be a regular DCL Log. Discard & move to next line. continue # skip to next line in outer loop line_match = SENSOR_TIME_MATCHER.match(line) if line_match is None: self.recov_exception_callback("Expected starting DCL Timestamp, received: %r" % line) continue # skip to next line in outer loop matches = line_match.groups() sensor_data_list = [] # Save timestamp from the DCL controller log and it's parts parsed_data = list(matches[SENSOR_GROUP_TIMESTAMP:SENSOR_TIME_SENSOR_DATE_TIME]) port_timestamp = matches[SENSOR_GROUP_TIMESTAMP] port_timestamp = dcl_time_to_ntp(port_timestamp) # Get instrument_timestamp & ensemble_number parsed_data.append(matches[SENSOR_TIME_SENSOR_DATE_TIME]) instrument_timestamp = matches[SENSOR_TIME_SENSOR_DATE_TIME] internal_timestamp = dcl_time_to_ntp(instrument_timestamp) parsed_data.append(matches[SENSOR_TIME_ENSEMBLE]) line = self._stream_handle.readline() # READ NEXT LINE line_match = SENSOR_HEAD_MATCHER.match(line) if line_match is None: self.recov_exception_callback("Expecting Heading, Pitch, & Roll data, received: %r" % line) continue # skip to next line in outer loop matches = line_match.groups() # Get head, pitch, & roll parsed_data.append(matches[HEAD_HEADING]) parsed_data.append(matches[HEAD_PITCH]) parsed_data.append(matches[HEAD_ROLL]) line = self._stream_handle.readline() # READ NEXT LINE line_match = SENSOR_TEMP_MATCHER.match(line) if line_match is None: self.recov_exception_callback("Expecting Temperature, Speed of Sound, & BIT data," " received: %r" % line) continue # skip to next line in outer loop matches = line_match.groups() # Get temperature, speed of sound, & BIT values parsed_data.append(matches[TEMP_TEMP]) parsed_data.append(matches[TEMP_SOS]) binary_string = '{0:08b}'.format(int(matches[TEMP_HEX], 16)) parsed_data.append(binary_string[3]) parsed_data.append(binary_string[4]) parsed_data.append(binary_string[6]) line = self._stream_handle.readline() # READ NEXT LINE line_match = IGNORE_HEADING_MATCHER.match(line) if line_match is None: self.recov_exception_callback("Expecting Header, received: %s" % line) continue # skip to next line in outer loop # Start looking for sensor data while True: # loop through all the velocity and echo data records line = self._stream_handle.readline() # READ NEXT LINE line_match = SENSOR_DATA_MATCHER.match(line) if line_match is not None: # Collect velocity data sextets and echo power quartets sensor_data_list.append(line_match.groups()[SENSOR_DATA_BIN:]) else: try: # Transpose velocity data sextets and echo power quartets np_array = numpy.array(sensor_data_list) parsed_data.extend(np_array.transpose().tolist()[1:]) # Get number of cells parsed_data.append(sensor_data_list[-1][0]) particle = self._extract_sample(self._particle_class, None, parsed_data, port_timestamp=port_timestamp, internal_timestamp=internal_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) if particle is not None: self._record_buffer.append(particle) except Exception: self.recov_exception_callback("Error parsing sensor data row," " received: %s" % line) break # exit inner loop once a particle has been produced
def parse_file(self): """ Parser for velpt_ab_dcl data. """ line_count = 0 # Read a single line from the input file fuelcell_input_row = self._file_handle.readline() # Read the file, one line at a time while fuelcell_input_row: line_count += 1 # Check to see if this record contains fuel cell data if not NON_DATA_MATCHER.search(fuelcell_input_row): # Is the record properly time stamped? found_date_time_group = DATE_MATCHER.search(fuelcell_input_row) # If so, continue processing if found_date_time_group: # Grab the time stamp data from the data date_time_group = found_date_time_group.group(1) # Now get the fuel cell data from the input line found_data = START_DATA_MATCHER.search(fuelcell_input_row) # If an integer was found, followed by a comma, the line has fuel cell data. if found_data: data_string = fuelcell_input_row[found_data.start(1)+1:] # Need to find the colon near the end of the line which marks the # end of the actual fuel cell data. The colon marks the end of the # fuel cell data followed by the checksum for that data. Following # that there will be a space then a hexadecimal number. If any of those # elements are missing, the data is suspect. found_end = END_DATA_MATCHER.search(data_string) if found_end: # first find the last space in the data_string (start of the terminator) terminator_index = data_string.rfind(' ') the_data = data_string[:terminator_index] # Now replace any extraneous spaces in the data the_data = the_data.replace(' ', '') data_plus_checksum = the_data.split(':') actual_data = data_plus_checksum[0] read_checksum = int(data_plus_checksum[1]) if self.good_checksum(actual_data, read_checksum): the_fields = actual_data.split(',') if self.good_field(the_fields): # DCL controller timestamp is the port_timestamp dcl_controller_timestamp = date_time_group port_timestamp = dcl_time_to_ntp(dcl_controller_timestamp) raw_data = [date_time_group] raw_data.extend(the_fields) particle = self._extract_sample(self._fuelcell_data_class, None, raw_data, port_timestamp=port_timestamp, preferred_ts=DataParticleKey.PORT_TIMESTAMP) self._record_buffer.append(particle) else: self.log_warning('Improper format line', line_count) else: self.log_warning('Bad checksum line', line_count) else: self.log_warning('No terminator found on line', line_count) else: self.log_warning('No data found on line', line_count) else: self.log_warning('Bad/Missing Timestamp on line', line_count) else: # No FC Data is an expected occurance, do not raise exception log.debug('No fuel cell data on line %d', line_count) # Read another line from the input file fuelcell_input_row = self._file_handle.readline()
def _process_instrument_data(self, working_record): """ Determines which particle to produce, calls extract_sample to create the given particle """ log.debug("PhsenAbcdefDclParser._process_instrument_data(): aggregate working_record size %s is %s", len(working_record), working_record) # this size includes the leading * character instrument_record_length = 465 # this size includes the leading * character control_record_length_without_voltage_battery = 39 # this size includes the leading * character control_record_length_with_voltage_battery = 43 data_type = self._determine_data_type(working_record) # DCL controller timestamp is the port_timestamp port_timestamp = dcl_time_to_ntp(self.latest_dcl_time) if data_type is not DataTypeEnum.UNKNOWN: # Create a tuple for the particle composed of the working record and latest DCL time # The tuple allows for DCL time to be available when EXTERNAL calls each particle's # build_parse_values method particle_data = (self.latest_dcl_time, working_record) if data_type is DataTypeEnum.INSTRUMENT: # Per the IDD, if the candidate data is not the proper size, throw a recoverable exception if len(working_record) == instrument_record_length: # Create particle mule (to be used later to create the instrument particle) particle = self._extract_sample(self._instrument_data_particle_class, None, particle_data, port_timestamp=port_timestamp) self._record_buffer.append(particle) else: self._exception_callback(RecoverableSampleException( "PhsenAbcdefDclParser._process_instrument_data(): " "Throwing RecoverableSampleException, Size of data " "record is not the length of an instrument data record")) elif data_type is DataTypeEnum.CONTROL: # Per the IDD, if the candidate data is not the proper size, throw a recoverable exception if len(working_record) == control_record_length_without_voltage_battery or \ len(working_record) == control_record_length_with_voltage_battery: # Create particle mule (to be used later to create the metadata particle) particle = self._extract_sample(self._metadata_particle_class, None, particle_data, port_timestamp=port_timestamp) self._record_buffer.append(particle) else: log.warn("PhsenAbcdefDclParser._process_instrument_data(): " "Size of data record is not the length of a control data record") self._exception_callback(RecoverableSampleException( "PhsenAbcdefDclParser._process_instrument_data(): " "Throwing RecoverableSampleException, Size of data " "record is not the length of a control data record")) else: log.warn("PhsenAbcdefDclParser._process_instrument_data(): " "Throwing RecoverableSampleException, Record is neither instrument or control") self._exception_callback(RecoverableSampleException("PhsenAbcdefDclParser._process_instrument_data(): " "Data Type is neither Control or Instrument"))