def _parse_table(self, config_file): self.alignments = OrderedDict() cfg_stream, close = fileio.process_file_arg(config_file) try: table_started = False table_finished = False row_num = 0 for i, l in enumerate(cfg_stream): line = l.strip() if self._end_pattern.match(line): if not table_started: raise errors.SampleTableError( 'hit end of sample table before beginning') if len(self.alignments) < 1: raise errors.SampleTableError( 'no rows found in sample table') table_finished = True break if self._begin_pattern.match(line): table_started = True continue if not table_started: continue if (line == '') or (line.startswith('#')): continue row_num += 1 try: al = AlignmentConfig(line) except errors.SampleTableRowError as e: _LOG.error('sample table row {0} is invalid'.format( row_num)) raise e if not al.taxon_name in self.alignments: self.alignments[al.taxon_name] = OrderedDict() self.alignments[al.taxon_name][al.locus_name] = al self._ordering.append((al.taxon_name, al.locus_name)) continue if al.locus_name in self.alignments[al.taxon_name]: raise errors.SampleTableError('locus {0} found twice ' 'for taxon {1} at row {2} of sample ' 'table'.format(al.locus_name, al.taxon_name, row_num)) self.alignments[al.taxon_name][al.locus_name] = al self._ordering.append((al.taxon_name, al.locus_name)) if not table_started: raise errors.SampleTableError('no sample table found') if not table_finished: raise errors.SampleTableError('no end of table found') finally: if close: cfg_stream.close()
class SampleTable(object): _begin_pattern = re.compile(r'^begin\s*sample_tbl$', re.IGNORECASE) _end_pattern = re.compile(r'^end\s*sample_tbl$', re.IGNORECASE) def __init__(self, config_file): self.alignments = None self._ordering = [] self._parse_table(config_file) def _parse_table(self, config_file): self.alignments = OrderedDict() cfg_stream, close = fileio.process_file_arg(config_file) try: table_started = False table_finished = False row_num = 0 for i, l in enumerate(cfg_stream): line = l.strip() if self._end_pattern.match(line): if not table_started: raise errors.SampleTableError( 'hit end of sample table before beginning') if len(self.alignments) < 1: raise errors.SampleTableError( 'no rows found in sample table') table_finished = True break if self._begin_pattern.match(line): table_started = True continue if not table_started: continue if (line == '') or (line.startswith('#')): continue row_num += 1 try: al = AlignmentConfig(line) except errors.SampleTableRowError as e: _LOG.error('sample table row {0} is invalid'.format( row_num)) raise e if not al.taxon_name in self.alignments: self.alignments[al.taxon_name] = OrderedDict() self.alignments[al.taxon_name][al.locus_name] = al self._ordering.append((al.taxon_name, al.locus_name)) continue if al.locus_name in self.alignments[al.taxon_name]: raise errors.SampleTableError('locus {0} found twice ' 'for taxon {1} at row {2} of sample ' 'table'.format(al.locus_name, al.taxon_name, row_num)) self.alignments[al.taxon_name][al.locus_name] = al self._ordering.append((al.taxon_name, al.locus_name)) if not table_started: raise errors.SampleTableError('no sample table found') if not table_finished: raise errors.SampleTableError('no end of table found') finally: if close: cfg_stream.close() def _get_taxa(self): return self.alignments.keys() taxa = property(_get_taxa) def _get_loci(self): l = [] for t, d in self.alignments.iteritems(): for locus in d.iterkeys(): if not locus in l: l.append(locus) return l loci = property(_get_loci) def _get_number_of_taxa(self): return len(self.taxa) npairs = property(_get_number_of_taxa) def get_sample_table_string(self): return '\n'.join(('BEGIN SAMPLE_TBL', '\n'.join((str(self.alignments[t][l]) for t, l in self._ordering)), 'END SAMPLE_TBL')) def __str__(self): return self.get_sample_table_string() def equals(self, other): if not isinstance(other, SampleTable): return False if len(self.alignments) != len(other.alignments): return False for i1, i2 in zip(self.alignments.items(), other.alignments.items()): if i1[0] != i2[0]: return False if len(i1[1]) != len(i2[1]): return False for t1, t2 in zip(i1[1].items(), i2[1].items()): if t1[0] != t2[0]: return False if not t1[1].equals(t2[1]): return False if self._ordering != self._ordering: return False return True