def test_infer_close_group(self): me = Analyze() me.taxdump = taxdump_from_text(taxdump_proteo) add_children(me.taxdump) me.groups = {} # close group is parent of LCA of self group me.self_tax = ['562'] # E. coli me.groups['self'] = set(['562'] + get_descendants('562', me.taxdump)) me.close_tax = None me.close_size = None me.infer_close_group() self.assertListEqual(me.close_tax, ['561']) # Escherichia self.assertSetEqual(me.groups['close'], {'561', '2580236'}) # close group must have at least 5 taxa me.close_tax = None me.groups['close'] = None me.close_size = 5 me.infer_close_group() self.assertListEqual(me.close_tax, ['543']) # Enterobacteriaceae exp = {'543', '620', '622', '570', '548', '561', '2580236'} self.assertSetEqual(me.groups['close'], exp) # close group is LCA of multiple self groups me.self_tax = ['561', '620'] # Escherichia and Shigella me.groups['self'] = set().union(*[[x] + get_descendants(x, me.taxdump) for x in me.self_tax]) me.close_tax = None me.groups['close'] = None me.close_size = None me.infer_close_group() self.assertListEqual(me.close_tax, ['543']) # Enterobacteriaceae exp = {'543', '570', '548'} self.assertSetEqual(me.groups['close'], exp)
def define_groups(self): """Define the three (actually two) groups: "self" and "close". """ self.groups = {} for key in ('self', 'close'): tids = getattr(self, '{}_tax'.format(key)) # user-defined group if tids: setattr(self, '{}_tax'.format(key), list_from_param(tids)) print('User-defined {} group:'.format(key)) # auto-infer group else: getattr(self, 'infer_{}_group'.format(key))() print('Auto-inferred {} group:'.format(key)) # collect taxIds that belong to group tids = getattr(self, '{}_tax'.format(key)) if key not in self.groups: self.groups[key] = set().union( *[[x] + get_descendants(x, self.taxdump) for x in tids]) # subtract self group from close group if key == 'close': self.groups['close'] = self.groups['close'].difference( self.groups['self']) # report group content for tid in tids: print(' {} ({})'.format(tid, describe_taxon(tid, self.taxdump))) print('{} group has {} taxa.'.format(key.capitalize(), len(self.groups[key])))
def infer_close_group(self): """Infer close group automatically. Notes ----- 1. Assign `close_tax` as top-level taxId(s) of the close group. 2. Assign `groups['close']` as all taxIds under the close group. """ mems = [] # start from the LCA of self group cid = find_lca(self.self_tax, self.taxdump) while True: # close group should exclude self group mems = set([cid] + get_descendants(cid, self.taxdump)).difference( self.groups['self']) # stop when size limit is reached if mems and (not self.close_size or len(mems) >= self.close_size): break # move up one level pid = self.taxdump[cid]['parent'] if pid == cid or pid == '0': break cid = pid self.close_tax = [cid] self.groups['close'] = mems
def define_groups(self): """Define the three (actually two) groups: "self" and "close". Notes ----- Assign these attributes: 1. `self_tax`: top-level taxId(s) of the self group. 2. `close_tax`: top-level taxId(s) of the close group. 3. `groups` (keys: self, close, distal): all taxIds under each group. """ self.groups = {} for key in ('self', 'close'): tids = getattr(self, f'{key}_tax') # user-defined group if tids: setattr(self, f'{key}_tax', list_from_param(tids)) print(f'User-defined {key} group:') # auto-infer group else: getattr(self, f'infer_{key}_group')() print(f'Auto-inferred {key} group:') # collect taxIds that belong to group tids = getattr(self, f'{key}_tax') if key not in self.groups: self.groups[key] = set().union( *[[x] + get_descendants(x, self.taxdump) for x in tids]) # subtract self group from close group if key == 'close': self.groups['close'] = self.groups['close'].difference( self.groups['self']) # report group content for tid in tids: print(f' {tid} ({describe_taxon(tid, self.taxdump)})') print(f'{key.capitalize()} group has {len(self.groups[key])} ' 'taxa.')
def test_get_descendants(self): taxdump = taxdump_from_text(taxdump_archaea) add_children(taxdump) obs = get_descendants('1935183', taxdump) # Asgard group exp = ['1655434', '1655637', '1538547'] self.assertListEqual(obs, exp)
def test_get_descendants(self): taxdump = {k: v for k, v in taxdump_archaea.items()} add_children(taxdump) obs = get_descendants('1935183', taxdump) # Asgard group exp = ['1655434', '1655637', '1538547'] self.assertListEqual(obs, exp)