def _pull_damaged_segment_data(connection, work_dir, node_name): """ write out a tuple for each damaged segment_sequence """ log = logging.getLogger("_pull_damaged_segment_data") damaged_segment_count = 0 damaged_segment_file_path = \ compute_damaged_segment_file_path(work_dir, node_name) damaged_segment_file = \ gzip.GzipFile(filename=damaged_segment_file_path, mode="wb") group_object = itertools.groupby(_damaged_segment_generator(connection), _group_key_function) for (unified_id, conjoined_part, ), damaged_segment_group in group_object: sequence_numbers = list() for damaged_segment_row in damaged_segment_group: sequence_numbers.extend(damaged_segment_row.sequence_numbers) assert len(sequence_numbers) > 0 damaged_segment_dict = {"unified_id" : unified_id, "conjoined_part" : conjoined_part, "sequence_numbers" : sequence_numbers, } store_sized_pickle(damaged_segment_dict, damaged_segment_file) damaged_segment_count += 1 log.info("stored {0} damaged segment entries".format(damaged_segment_count))
def _pull_damaged_segment_data(connection, work_dir, node_name): """ write out a tuple for each damaged segment_sequence """ log = logging.getLogger("_pull_damaged_segment_data") damaged_segment_count = 0 damaged_segment_file_path = \ compute_damaged_segment_file_path(work_dir, node_name) damaged_segment_file = \ gzip.GzipFile(filename=damaged_segment_file_path, mode="wb") group_object = itertools.groupby(_damaged_segment_generator(connection), _group_key_function) for ( unified_id, conjoined_part, ), damaged_segment_group in group_object: sequence_numbers = list() for damaged_segment_row in damaged_segment_group: sequence_numbers.extend(damaged_segment_row.sequence_numbers) assert len(sequence_numbers) > 0 damaged_segment_dict = { "unified_id": unified_id, "conjoined_part": conjoined_part, "sequence_numbers": sequence_numbers, } store_sized_pickle(damaged_segment_dict, damaged_segment_file) damaged_segment_count += 1 log.info( "stored {0} damaged segment entries".format(damaged_segment_count))
def __init__(self, work_dir, node_name): path = compute_segment_file_path(work_dir, node_name) self._segment_file = gzip.GzipFile(filename=path, mode="rb") path = compute_damaged_segment_file_path(work_dir, node_name) self._damaged_file = gzip.GzipFile(filename=path, mode="rb") self.segment_dict = None try: self._damaged_dict = retrieve_sized_pickle(self._damaged_file) except EOFError: self._damaged_dict = None self.advance()