def test_dict_from_param(self): # nothing self.assertEqual(dict_from_param(None), {}) self.assertEqual(dict_from_param(''), {}) # already dict self.assertEqual(dict_from_param({'a': 1}), {'a': 1}) # dict string exp = {'a': '1', 'b': '2', 'c': '3'} self.assertDictEqual(dict_from_param('a:1,b:2,c:3'), exp) # invalid dict string with self.assertRaises(ValueError) as ctx: dict_from_param('test') msg = 'Invalid dictionary string: "test".' self.assertEqual(str(ctx.exception), msg) # dict file fp = join(self.tmpdir, 'test.txt') with open(fp, 'w') as f: for itm in exp.items(): print('\t'.join(itm), file=f) obs = dict_from_param(fp) self.assertDictEqual(obs, exp) # invalid dict file with open(fp, 'w') as f: for itm in exp.items(): print('test', file=f) with self.assertRaises(ValueError) as ctx: dict_from_param(fp) msg = f'Invalid dictionary file: "{fp}".' self.assertEqual(str(ctx.exception), msg) remove(fp)
def assign_taxonomy(self): """Assign taxonomy to genomes. """ # take user-defined taxIds of input genomes if self.input_tax: try: self.input_tax = dict_from_param(self.input_tax) except ValueError: if len(self.data) > 1: raise ValueError('Invalid input taxonomy format.') # for single-sample analysis, one can simply enter a taxId self.input_tax = {max(self.data.keys()): self.input_tax} print('User-specified TaxIDs of input genomes:') for sid, tid in sorted(self.input_tax.items()): if tid not in self.taxdump: # TODO: read from both temp and master taxdump raise ValueError('TaxID {} is not present in taxonomy ' 'database.'.format(tid)) print(' {}: {} ({}).'.format(sid, tid, self.taxdump[tid]['name'])) else: self.input_tax = {} # auto-infer taxIds of remaining genomes sids = sorted([x for x in self.data if x not in self.input_tax]) if sids: print('Auto-inferring plausible taxIds for input genomes based on ' 'taxonomy of search results...') for sid in sids: try: tid, cov = self.infer_genome_tax(self.data[sid], self.taxdump, self.input_cov) self.input_tax[sid] = tid except ValueError: raise ValueError('Cannot auto-infer taxonomy for {}. ' 'Please specify manually.'.format(sid)) print(' {}: {} ({}) (covering {:2g}% best hits).'.format( sid, tid, self.taxdump[tid]['name'], cov)) # refine taxonomy database print('Refining taxonomy database...') refine_taxdump(self.sum_taxids(), self.taxdump) add_children(self.taxdump) print('Done. Retained {} taxa.'.format(len(self.taxdump))) # find lowest common ancestor (LCA) of all genomes self.lca = find_lca(self.input_tax.values(), self.taxdump) print('All input genomes belong to {} ({}).'.format( self.lca, describe_taxon(self.lca, self.taxdump)))