def procall(fns): """ Process a list of .mdd files :param fns: List of .mdd files to process :return: sections """ # Prepare object to collect data into db = mdd_data.mdd_data() db.reset() sects = db.sects() stats = db.stats() changed_files = [] # Ingest all sections in all input files for fn in fns: d = mdd(fn) for sect in d.sections: sect.glider = d.glider sect.time = d.time #print fn, sect.node, sect.start, sect.end # Basic validation: we know gliders make these... if sect.end <= sect.start: #print 'start > end?? node %d port %d start %d end %d' % (sect.node, sect.port, sect.start, sect.end) continue # Create new or open existing output file filename = 'node%dp%d.dat' % (sect.node, sect.port) ofn = mdd_config.datafile(filename) # keep track of which port 1 node files change so sio block parsing can be done on them if sect.port == 1 and filename not in changed_files: changed_files.append(filename) try: of = open(ofn, 'r+b') except IOError: of = open(ofn, 'wb') # If we are past end of file, fill with zeroes of.seek(0, 2) if sect.start > of.tell(): of.write('\0' * (sect.start - of.tell())) # Write each section out at its address of.seek(sect.start) of.write(sect.data) of.close() # Keep metadata for what we have processed sects.append(sect) stats.accumulate(sect.node, sect.glider, 'bytes', 1 + sect.end - sect.start) stats.max(sect.node, sect.glider, 'last', sect.time) # Merge adjacent sections into one, start sorted by node/port/start sects.sort(lambda a, b: cmp(a.node, b.node) or cmp(a.port, b.port) or cmp(a.start, b.start)) n = 0 while n < len(sects) - 1: curr = sects[n] # Merge subsequent sections into this one until we can't anymore while n < len(sects) - 1: next_sect = sects[n + 1] if curr.node != next_sect.node or curr.port != next_sect.port: break elif curr.end < next_sect.start - 1: break curr.end = max(curr.end, next_sect.end) curr.time = max(curr.time, next_sect.time) del sects[n + 1] n += 1 #print '\n'.join([repr(s) for s in sects]) db.save() # the following section of code was added to parse the initially created node files, # locate complete sio blocks, and copy those into fixed instrument specific files sio_parse = SioParse() # loop over each node file that has changed with this run and parse it for changed_file in changed_files: sio_parse.parse_file(changed_file) # save the sio parse database sio_parse.save() return sects
# mdp_config.py: # Read and parse gliders / nodes configuration file # 08jul2013 [email protected] Initial import mdd_config import os from xml.dom.minidom import parse xml_config_fn = mdd_config.datafile('ooi.xml') # All info about a node in an object (just data) class nodeInfo(object): def __init__(self, n, posn, name, rate): self.id = int(n) self.lat = posn[0] self.lon = posn[1] self.depth = posn[2] self.name = name self.rate = rate # Shortcut to get subelement from a node def getElementData(doc, name): return str(doc.getElementsByTagName(name)[0].childNodes[0].data) # Read xml config file into usable glider and node lists def getSysConfig(): config = parse(xml_config_fn).getElementsByTagName('modemConfig')[0] deployments = config.getElementsByTagName('deployment')
def parse_file(self, file_name): """ Find any complete sio blocks in input file and copy them to their respective output files :param file_name: The input file name to parse """ # get the current file state from the dictionary, or initialize # it if it doesn't exist file_state = self.sio_db.get_file_state(file_name) if file_state is None: file_state = self.sio_db.init_file_state(file_name) # insert the file index at the end of the file name before the extension file_out_start = file_name[:-4] + '_' + str(file_state[StateKey.OUTPUT_INDEX]) file_out_end = file_name[-4:] # get the full input file path and find the current file size full_path_in = mdd_config.datafile(file_name) file_len = os.stat(full_path_in).st_size # update the file size and unprocessed data based on the input file length if file_state[StateKey.UNPROCESSED_DATA] is None: file_state[StateKey.UNPROCESSED_DATA] = [[0, file_len]] file_state[StateKey.FILE_SIZE] = file_len else: self.update_state_file_length(file_state, file_len) # increment output index each time we read this file file_state[StateKey.OUTPUT_INDEX] += 1 fid_in = open(full_path_in, 'rb') newly_processed_blocks = [] # loop over unprocessed blocks for unproc in file_state[StateKey.UNPROCESSED_DATA]: # read the next unprocessed data block from the file fid_in.seek(unproc[START_IDX]) block_len = unproc[END_IDX] - unproc[START_IDX] data_block = fid_in.read(block_len) # loop and find each sio header in this unprocessed block for match in SIO_HEADER_MATCHER.finditer(data_block): # get the file string associated with this instrument ID from the sio header file_type = ID_MAP.get(match.group(SIO_HEADER_GROUP_ID)) # insert the file type into the file name full_path_out = mdd_config.datafile(file_out_start + '.' + file_type + file_out_end) # open the output file in append mode, creating if it doesn't exist fid_out = open(full_path_out, 'a+') # get length of data packet carried within this sio header data_len = int(match.group(SIO_HEADER_GROUP_DATA_LENGTH), 16) # end index relative to the unprocessed block end_block_idx = match.end(0) + data_len + 1 # end index relative to the match end_match_idx = SIO_HEADER_LENGTH - 1 + data_len match_block = data_block[match.start(0):end_block_idx] orig_len = len(match_block) # replace escape modem chars match_block = match_block.replace(b'\x18\x6b', b'\x2b') match_block = match_block.replace(b'\x18\x58', b'\x18') # store how many chars were replaced in this block for updating the state n_replaced = orig_len - len(match_block) # need to increase block length if replaced characters to include the rest of the block match_block += data_block[end_block_idx:end_block_idx + n_replaced] if end_match_idx < len(match_block) and match_block[end_match_idx] == SIO_BLOCK_END: # found the matching end of the packet, this block is complete, # write it to output file fid_out.write(match_block[:end_match_idx + 1]) # adjust the start and end indices to be relative to the file rather than the block start_file_idx = match.start(0) + unproc[START_IDX] end_file_idx = end_block_idx + n_replaced + unproc[START_IDX] newly_processed_blocks.append([start_file_idx, end_file_idx]) fid_out.close() # pre combine blocks so there aren't so many to loop over newly_processed_blocks = SioParse._combine_adjacent_packets(newly_processed_blocks) # remove the processed blocks from the unprocessed file state for new_block in newly_processed_blocks: self.update_processed_file_state(file_state, new_block[START_IDX], new_block[END_IDX]) fid_in.close()
# mdd_data.py: # Persistent data object for mdd data processor # 08jul2013 [email protected] Initial # 04sep2013 [email protected] only one database object, added offsets import mdd_config import os import pickle # Where we save data dbfile = mdd_config.datafile('mdd.pckl') db = None class matrix(object): def __init__(self): self.x = {} self.y = set() # Increment value at (xkey, ykey, tag) by increment def accumulate(self, xkey, ykey, tag, increment = 1): if xkey not in self.x: self.x[xkey] = {} column = self.x[xkey] if ykey not in column: column[ykey] = {} loc = column[ykey] if ykey not in self.y: self.y.add(ykey) if tag not in loc: loc[tag] = 0 loc[tag] += increment
SIO_HEADER_REGEX += b'[0-9A-Za-z]' # MFLM Processing Flag (coded value) SIO_HEADER_REGEX += b'([0-9a-fA-F]{8})' # POSIX Timestamp of Controller (hex) SIO_HEADER_REGEX += b'_' # Spacer (0x5F) SIO_HEADER_REGEX += b'([0-9a-fA-F]{2})' # Block Number (hex) SIO_HEADER_REGEX += b'_' # Spacer (0x5F) SIO_HEADER_REGEX += b'([0-9a-fA-F]{4})' # CRC Checksum (hex) SIO_HEADER_REGEX += b'\x02' # End of SIO Header (binary data follows) SIO_HEADER_MATCHER = re.compile(SIO_HEADER_REGEX) SIO_HEADER_LENGTH = 34 # sio header group match index SIO_HEADER_GROUP_ID = 1 # Instrument ID SIO_HEADER_GROUP_DATA_LENGTH = 2 # Number of Data Bytes sio_db_file = mdd_config.datafile('sio.pckl') # constants for accessing unprocessed data START_IDX = 0 END_IDX = 1 # map of instrument ID to file type to place instrument data in ID_MAP = { 'AD': 'adcps', 'CT': 'ctdmo', 'CO': 'ctdmo', 'DO': 'dosta', 'FL': 'flort', 'PH': 'phsen', 'CS': 'status', 'PS': 'status',
def parse_file(self, file_name): """ Find any complete sio blocks in input file and copy them to their respective output files :param file_name: The input file name to parse """ # get the current file state from the dictionary, or initialize # it if it doesn't exist file_state = self.sio_db.get_file_state(file_name) if file_state is None: file_state = self.sio_db.init_file_state(file_name) # insert the file index at the end of the file name before the extension file_out_start = file_name[:-4] + '_' + str( file_state[StateKey.OUTPUT_INDEX]) file_out_end = file_name[-4:] # get the full input file path and find the current file size full_path_in = mdd_config.datafile(file_name) file_len = os.stat(full_path_in).st_size # update the file size and unprocessed data based on the input file length if file_state[StateKey.UNPROCESSED_DATA] is None: file_state[StateKey.UNPROCESSED_DATA] = [[0, file_len]] file_state[StateKey.FILE_SIZE] = file_len else: self.update_state_file_length(file_state, file_len) fid_in = open(full_path_in, 'rb') newly_processed_blocks = [] # loop over unprocessed blocks for unproc in file_state[StateKey.UNPROCESSED_DATA]: # read the next unprocessed data block from the file fid_in.seek(unproc[START_IDX]) block_len = unproc[END_IDX] - unproc[START_IDX] data_block = fid_in.read(block_len) # loop and find each sio header in this unprocessed block for match in SIO_HEADER_MATCHER.finditer(data_block): # get the file string associated with this instrument ID from the sio header file_type = ID_MAP.get(match.group(SIO_HEADER_GROUP_ID)) # get length of data packet carried within this sio header data_len = int(match.group(SIO_HEADER_GROUP_DATA_LENGTH), 16) # end index relative to the unprocessed block end_block_idx = match.end(0) + data_len + 1 # end index relative to the match end_match_idx = SIO_HEADER_LENGTH - 1 + data_len match_block = data_block[match.start(0):end_block_idx] orig_len = len(match_block) # replace escape modem chars match_block = match_block.replace(b'\x18\x6b', b'\x2b') match_block = match_block.replace(b'\x18\x58', b'\x18') # store how many chars were replaced in this block for updating the state n_replaced = orig_len - len(match_block) # need to increase block length if replaced characters to include the rest of the block match_block += data_block[end_block_idx:end_block_idx + n_replaced] if end_match_idx < len(match_block) and match_block[ end_match_idx] == SIO_BLOCK_END: # found the matching end of the packet, this block is complete # include controller / instrument number in file name so different instruments are in # different files ctrl_id = match.group(SIO_HEADER_GROUP_CTRL_ID) file_out = file_out_start + '.' + file_type + '_' + ctrl_id + file_out_end # insert the file type into the file name full_path_out = mdd_config.datafile(file_out) # open the output file in append mode, creating if it doesn't exist fid_out = open(full_path_out, 'a+') # write it to output file fid_out.write(match_block[:end_match_idx + 1]) # adjust the start and end indices to be relative to the file rather than the block start_file_idx = match.start(0) + unproc[START_IDX] end_file_idx = end_block_idx + n_replaced + unproc[ START_IDX] newly_processed_blocks.append( [start_file_idx, end_file_idx]) fid_out.close() # check for newly processed blocks if newly_processed_blocks: # increment output index if we have found new data to parse in this file file_state[StateKey.OUTPUT_INDEX] += 1 # pre combine blocks so there aren't so many to loop over newly_processed_blocks = SioParse._combine_adjacent_packets( newly_processed_blocks) # remove the processed blocks from the unprocessed file state for new_block in newly_processed_blocks: self.update_processed_file_state(file_state, new_block[START_IDX], new_block[END_IDX]) fid_in.close()
SIO_HEADER_REGEX += b'([0-9a-fA-F]{8})' # POSIX Timestamp of Controller (hex) SIO_HEADER_REGEX += b'_' # Spacer (0x5F) SIO_HEADER_REGEX += b'([0-9a-fA-F]{2})' # Block Number (hex) SIO_HEADER_REGEX += b'_' # Spacer (0x5F) SIO_HEADER_REGEX += b'([0-9a-fA-F]{4})' # CRC Checksum (hex) SIO_HEADER_REGEX += b'\x02' # End of SIO Header (binary data follows) SIO_HEADER_MATCHER = re.compile(SIO_HEADER_REGEX) SIO_HEADER_LENGTH = 34 # sio header group match index SIO_HEADER_GROUP_ID = 1 # Instrument ID SIO_HEADER_GROUP_DATA_LENGTH = 3 # Number of Data Bytes SIO_HEADER_GROUP_CTRL_ID = 2 # controller and instrument number sio_db_file = mdd_config.datafile('sio.pckl') # constants for accessing unprocessed data START_IDX = 0 END_IDX = 1 # map of instrument ID to file type to place instrument data in ID_MAP = { 'AD': 'adcps', 'CT': 'ctdmo', 'CO': 'ctdmo', 'DO': 'dosta', 'FL': 'flort', 'PH': 'phsen', 'CS': 'status', 'PS': 'status',