def download_nexson_from_phylografter(paths, download_db, lock_policy): DOMAIN = os.environ.get('PHYLOGRAFTER_DOMAIN_PREF') if DOMAIN is None: DOMAIN = 'http://www.reelab.net/phylografter' headers = { 'accept-encoding': 'gzip', 'content-type': 'application/json', 'accept': 'application/json', } nexson = paths['nexson'] lockfile = nexson + '.lock' was_locked, owns_lock = lock_policy.wait_for_lock(lockfile) try: if not owns_lock: return False study = os.path.split(nexson)[-1] if VERBOSE: sys.stderr.write('Downloading %s...\n' % study) SUBMIT_URI = DOMAIN + '/study/export_gzipNexSON.json/' + study resp = requests.get(SUBMIT_URI, headers=headers, allow_redirects=True) resp.raise_for_status() try: uncompressed = gzip.GzipFile(mode='rb', fileobj=StringIO( resp.content)).read() results = uncompressed except: raise if isinstance(results, unicode) or isinstance(results, str): er = json.loads(results) else: raise RuntimeError('Non gzipped response, but not a string is:', results) should_write = False if not os.path.exists(study): should_write = True else: prev_content = json.load(open(study, 'rU')) if prev_content != er: should_write = True if should_write: store_state_JSON(er, study) if download_db is not None: try: download_db['studies'].remove(int(study)) except: warn('%s not in %s' % (study, paths['nexson_state_db'])) pass else: store_state_JSON(download_db, paths['nexson_state_db']) finally: lock_policy.remove_lock() return True
def download_nexson_from_phylografter(paths, download_db, lock_policy): DOMAIN = os.environ.get('PHYLOGRAFTER_DOMAIN_PREF') if DOMAIN is None: DOMAIN = 'http://www.reelab.net/phylografter' headers = { 'accept-encoding' : 'gzip', 'content-type' : 'application/json', 'accept' : 'application/json', } nexson = paths['nexson'] lockfile = nexson + '.lock' was_locked, owns_lock = lock_policy.wait_for_lock(lockfile) try: if not owns_lock: return False study = os.path.split(nexson)[-1] if VERBOSE: sys.stderr.write('Downloading %s...\n' % study) SUBMIT_URI = DOMAIN + '/study/export_gzipNexSON.json/' + study resp = requests.get(SUBMIT_URI, headers=headers, allow_redirects=True) resp.raise_for_status() try: uncompressed = gzip.GzipFile(mode='rb', fileobj=StringIO(resp.content)).read() results = uncompressed except: raise if isinstance(results, unicode) or isinstance(results, str): er = json.loads(results) else: raise RuntimeError('Non gzipped response, but not a string is:', results) should_write = False if not os.path.exists(study): should_write = True else: prev_content = json.load(open(study, 'rU')) if prev_content != er: should_write = True if should_write: store_state_JSON(er, study) if download_db is not None: try: download_db['studies'].remove(int(study)) except: warn('%s not in %s' % (study, paths['nexson_state_db'])) pass else: store_state_JSON(download_db, paths['nexson_state_db']) finally: lock_policy.remove_lock() return True
def target_is_dirty(src_path_list, dest_path_list, trigger=None): if bool(trigger): return True for p in src_path_list: if not os.path.exists(p): warn('Source path "%s" does not exist' % p) return True for dest_path in dest_path_list: if not os.path.exists(dest_path): return True smt = max([os.path.getmtime(i) for i in src_path_list]) dmt = min([os.path.getmtime(i) for i in dest_path_list]) return (smt >= dmt)
def _process_tree_log_info(curr_tree, tree_list, tree_id2tree, tree_notes, nexson_obj): TREE_INFO_LABELS = ( 'tree info', 'ingested tree', 'property added', ) TREE_ID_BEARING_LABELS = ('checking for uniqueness of ott ids', 'name fixing on tree', 'checking if tree could be added to graph', 'null or duplicate names. skipping tree') for el in tree_notes: try: label = el['label'].lower() except: _process_tree_part_log_info(curr_tree, el, nexson_obj) else: if label in TREE_INFO_LABELS: for k, v in el.items(): if k != 'label': if k == 'ot:tag': curr_tree.setdefault(k, []).append(v) else: if k in curr_tree: warn('Overwriting "%s" -> "%s" with value "%s"' % (k, curr_tree[k], v)) curr_tree[k] = v elif label in TREE_ID_BEARING_LABELS: # check index in case a tree has been omitted... tid = el['tree id'] if tid != curr_tree['id']: debug('%s swapping curr_tree to tree with id %s' % (str(el), tid)) curr_tree = tree_id2tree[tid] if label == 'null or duplicate names. skipping tree': curr_tree['can be ingested'] = False elif label == 'all nodes have ottolids': curr_tree['had_ott_ids_before_tnrs'] = True elif label == 'postpruning newick': curr_tree['postpruning newick'] = el['tree'] elif label not in TO_IGNORE_LABELS: warn('Ignored label "%s" in tree' % label)
def _process_tree_log_info(curr_tree, tree_list, tree_id2tree, tree_notes, nexson_obj): TREE_INFO_LABELS = ('tree info', 'ingested tree', 'property added', ) TREE_ID_BEARING_LABELS = ('checking for uniqueness of ott ids', 'name fixing on tree', 'checking if tree could be added to graph', 'null or duplicate names. skipping tree' ) for el in tree_notes: try: label = el['label'].lower() except: _process_tree_part_log_info(curr_tree, el, nexson_obj) else: if label in TREE_INFO_LABELS: for k, v in el.items(): if k != 'label': if k == 'ot:tag': curr_tree.setdefault(k, []).append(v) else: if k in curr_tree: warn('Overwriting "%s" -> "%s" with value "%s"' %(k, curr_tree[k], v)) curr_tree[k] = v elif label in TREE_ID_BEARING_LABELS: # check index in case a tree has been omitted... tid = el['tree id'] if tid != curr_tree['id']: debug('%s swapping curr_tree to tree with id %s' % (str(el), tid)) curr_tree = tree_id2tree[tid] if label == 'null or duplicate names. skipping tree': curr_tree['can be ingested'] = False elif label == 'all nodes have ottolids': curr_tree['had_ott_ids_before_tnrs'] = True elif label == 'postpruning newick': curr_tree['postpruning newick'] = el['tree'] elif label not in TO_IGNORE_LABELS: warn('Ignored label "%s" in tree' % label)
def write_status_obj_as_html(status_obj, output): ''' Writes the status html Takes: `status_obj` a dict parsed from "status JSON" `output` a file-like obj ''' study_info = status_obj['study_info'] tree_list = status_obj['tree_list'] phylografter_study_id = study_info['phylografter_study_id'] phylografter_study_link = study_info['phylografter_study_link'] output.write('''<html> <head> <title>Snaphsot of treemachine status for phylografter study %s</title> </head>''' % phylografter_study_id) output.write('''<body> <h2>Study Info</h2> <p><a href="%s">%s</a></p> <table border="1"> ''' % (phylografter_study_link, phylografter_study_link)) kl = study_info.keys() kl.sort() for k in kl: v = study_info[k] output.write(' <tr><td>' + str(k) + '</td><td>' + proc_val_for_html(v) + '</td>\n') output.write(''' </table> ''') ##############################3 # Write tree info ##############################3 to_ignore_keys = [ 'tree_index', ] detail_keys = [ 'number of external nodes', 'number edges', 'number nodes', 'ot:tag', # [tag, tag] ] verbose_keys = [ 'postpruning newick', # newick string 'subtrees', # dict. Keys newick -> dict with name and node keys 'by_ott' # {ottID -> {'taxonomy', }} ] valuable_tree_keys = [ 'id', 'ot:branchLengthMode', 'ingroup', 'ot:inGroupClade', 'ot:focalClade' ] special_keys = [ 'status', 'had_ott_ids_before_tnrs', # bool 'deprecated', # bool 'reused_tip_labels', # 'pruned_dup', # = [name, name, ...] 'null_ottol_ids_in_final_mapping', # ['original name'] 'null ott id for node', # ['name'] 'overlapping_tip_taxa', 'importable_into_treemachine', 'phylografter_tree_link', 'ott id missing', 'tnrs resolved ottolid', 'pruned unmapped', ] keys_to_lists = ['ot:tag'] all_known_tree_keys = special_keys + verbose_keys + valuable_tree_keys + detail_keys + to_ignore_keys num_importable = study_info['num_trees_importable'] for tree in tree_list: tid = tree['id'] phylografter_tree_link = tree['phylografter_tree_link'] output.write('''<h3>Tree %s</h3> <p><a href="%s">%s</a></p> ''' % (tid, phylografter_tree_link, phylografter_tree_link)) # STATUS output.write('''<p><b>Status: </b>%s</p> ''' % (tree['status']['text'])) r = tree['status']['reasons'] if r: output.write('''<table border="1"> ''') for row in r: rc = row['rc'] if rc == 'USER': _display_user_status_code(output, row) elif rc == 'DUPLICATE': _display_duplicate_taxon_status_code(output, row) elif rc == 'OVERLAPPING': _display_overlapping_taxon_status_code(output, row) elif rc in ['PRUNING', 'PRUNING_UNMAPPED']: _display_pruning_taxon_status_code(output, row) elif rc == 'TNRS': _display_tnrs_taxon_status_code(output, row) else: assert rc == 'NULLID' _display_null_taxon_status_code(output, row) output.write(''' </table> ''') # STATUS output.write('''<p><b>Info</b></p> ''') output.write('''<table border="1">\n ''') kl = tree.keys() for k in kl: if k not in all_known_tree_keys: warn('Unexpected key in tree: ' + k) for k in keys_to_lists: tl = tree.get(k) if tl is not None: tree[k] = '"%s"' % '", "'.join(tl) for k in valuable_tree_keys + detail_keys: try: v = tree[k] output.write(' <tr><td>%s</td><td>%s</td>\n' % (k, proc_val_for_html(v))) except: output.write(' <tr><td>%s</td><td>%s</td>\n' % (k, warn_html("Missing!"))) output.write(''' </table> <hr/> ''') if num_importable != 1: output.write('<p>' + warn_html(str(num_importable) + ' trees') + ' importable from this study!</p>\n') else: output.write('<p>1 tree importable from this study.</p>\n') output.write('''</body> </head> ''')
def process_treemachine_log_info(log_obj, nexson_obj, default_study_id): '''Returns study_info_dict, list_of_tree_info objects ''' nexson_obj.incoming_ottid_to_otu = get_ott_id_to_taxon_map( nexson_obj.otu_list) study = {} tree_list = [] context_el = None tree_id2tree = {} for el in log_obj: try: label = el['label'].lower() if label == 'otus': _add_unique(el, study, 'number', 'num_otus') context_el = study elif label == 'study tagged as deprecated. ignore.': study['Deprecated'] = 'Study tagged as deprecated.' elif label == 'processing tree': tree_list.append({}) context_el = tree_list[-1] context_el['tree_index'] = len(tree_list) - 1 _add_unique(el, context_el, '@id', 'id') tree_id2tree[context_el['id']] = context_el elif label == 'tree tagged as deprecated. ignoring.': tid = el['@id'] tree_id2tree[tid]['deprecated'] = True elif label not in TO_IGNORE_LABELS: warn('Ignored label "%s"' % label) except: _process_tree_log_info(context_el, tree_list, tree_id2tree, el, nexson_obj) # move study info to study dict STUDY_INFO_PREFIXES = ['ot:study', 'ot:dataDeposit', 'ot:curatorName'] for tree in tree_list: for k in tree.keys(): is_study_info = False for p in STUDY_INFO_PREFIXES: if k.startswith(p): is_study_info = True if is_study_info: if k in study: assert study[k] == tree[k] else: study[k] = tree[k] del tree[k] # process NexSON and tm out and add some helpful fields to the dicts importable_count = 0 phylografter_domain = 'http://www.reelab.net/phylografter/' for tree in tree_list: i = diagnose_tree_status(tree, nexson_obj) tree['importable_into_treemachine'] = 'true' if i else 'false' if i: importable_count += 1 tid = tree['id'] tree[ 'phylografter_tree_link'] = phylografter_domain + 'stree/svgView/' + tid study['num_trees_importable'] = importable_count study['phylografter_study_id'] = study.get('ot:studyId', default_study_id) study[ 'phylografter_study_link'] = phylografter_domain + 'study/view/' + study[ 'phylografter_study_id'] return {'study_info': study, 'tree_list': tree_list}
def _process_tree_part_log_info(curr_tree, part_info, nexson_obj): prev_subtree = None for el in part_info: if isinstance(el, list): dup_dict = {} overlap_dict = {} for sub in el: label = sub['label'].lower() if label == 'matched anc': assert prev_subtree is not None prev_subtree['name'] = sub.get('name') prev_subtree['node'] = sub.get('node') elif label == 'duplicate': _update_keys(dup_dict, sub, ['name', 'OTT ID', 'nexsonid']) elif label == 'overlapping retained': _update_keys(overlap_dict.setdefault('retained', {}), sub, ['name', 'nexsonid']) elif label == 'overlapping pruned': _update_keys(overlap_dict.setdefault('pruned', {}), sub, ['name', 'nexsonid']) elif label not in TO_IGNORE_LABELS: warn('Ignored label "%s" in tree' % label) if len(dup_dict) > 0: _add_otu_from_node_nexsonid(dup_dict, dup_dict.get('nexsonid'), nexson_obj) curr_tree['reused_tip_labels'].append(dup_dict) if len(overlap_dict) > 0: r = overlap_dict['retained'] _add_otu_from_node_nexsonid(r, r.get('nexsonid'), nexson_obj) p = overlap_dict['pruned'] _add_otu_from_node_nexsonid(p, p.get('nexsonid'), nexson_obj) curr_tree['overlapping_tip_taxa'].append(overlap_dict) else: label = el['label'].lower() if label == 'taxon mapping': by_ott = curr_tree.setdefault('by_ott', {}) ott_id = el['OTT ID'] by_ott[el['OTT ID']] = {'taxonomy': el['taxonomy']} elif label == 'subtree': sd = { } # Change the following to the rvalue to retain the subtrees... => curr_tree.setdefault('subtrees', {}) n = el['newick'] prev_subtree = {} sd[n] = prev_subtree elif label == 'ott id reused in tree': curr_tree.setdefault('reused_tip_labels', []) elif label == 'overlapping tips': curr_tree.setdefault('overlapping_tip_taxa', []) elif label == 'pruning dups and overlapping': _add_otu_from_node_nexsonid(el, el.get('nexsonid'), nexson_obj) curr_tree.setdefault('pruned_dup', []).append(el) elif label == 'pruning unmapped': _add_otu_from_node_nexsonid(el, el.get('nexsonid'), nexson_obj) curr_tree.setdefault('pruned unmapped', []).append(el) elif label == 'error ottolid indexed to a null node!': curr_tree.setdefault('null_ottol_ids_in_final_mapping', []).append(el) elif label == 'null ott id for node': curr_tree.setdefault('null ott id for node', []).append(el) elif label == 'ott id missing': curr_tree.setdefault('ott id missing', []).append({ 'name': el['name'], 'nexsonid': el['nexsonid'] }) elif label == 'tnrs resolved ottolid': nid = el['nexsonid'] searched_on = el['searched on'] ott_id = el['OTT ID'] name = el['name'] c = nexson_obj.__dict__.get('corrected', {}) node = nexson_obj.node_for_nexsonid(nid) nexsontree = nexson_obj.tree_nexsonid_to_tree[curr_tree['id']] oid2node = nexsontree.ottid_to_node_list nl = oid2node[node.otu.ott_id] assert node in nl nl.remove(node) new_otu_list = oid2node.get(ott_id) if new_otu_list is None or len(new_otu_list) == 0: o = OTU({'@id': None, '@label': name, 'meta': None}) o.original_label = searched_on o.ott_id = ott_id nexson_obj.otu_list.append(o) oid2node[ott_id] = [node] else: node.otu = new_otu_list[0].otu new_otu_list.append(node) curr_tree.setdefault('tnrs resolved ottolid', []).append({ 'searched on': searched_on, 'OTT ID': ott_id, 'name': name, 'nexsonid': nid }) elif label not in TO_IGNORE_LABELS: warn('Ignored label "%s" in part' % label)
def write_status_obj_as_html(status_obj, output): ''' Writes the status html Takes: `status_obj` a dict parsed from "status JSON" `output` a file-like obj ''' study_info= status_obj['study_info'] tree_list = status_obj['tree_list'] phylografter_study_id = study_info['phylografter_study_id'] phylografter_study_link = study_info['phylografter_study_link'] output.write('''<html> <head> <title>Snaphsot of treemachine status for phylografter study %s</title> </head>''' % phylografter_study_id) output.write('''<body> <h2>Study Info</h2> <p><a href="%s">%s</a></p> <table border="1"> ''' % (phylografter_study_link, phylografter_study_link)) kl = study_info.keys() kl.sort() for k in kl: v = study_info[k] output.write(' <tr><td>' + str(k) + '</td><td>' + proc_val_for_html(v) + '</td>\n') output.write(''' </table> ''') ##############################3 # Write tree info ##############################3 to_ignore_keys =[ 'tree_index', ] detail_keys = ['number of external nodes', 'number edges', 'number nodes', 'ot:tag', # [tag, tag] ] verbose_keys = ['postpruning newick', # newick string 'subtrees', # dict. Keys newick -> dict with name and node keys 'by_ott' # {ottID -> {'taxonomy', }} ] valuable_tree_keys = ['id', 'ot:branchLengthMode', 'ingroup', 'ot:inGroupClade', 'ot:focalClade'] special_keys = ['status', 'had_ott_ids_before_tnrs', # bool 'deprecated', # bool 'reused_tip_labels', # 'pruned_dup', # = [name, name, ...] 'null_ottol_ids_in_final_mapping', # ['original name'] 'null ott id for node', # ['name'] 'overlapping_tip_taxa', 'importable_into_treemachine', 'phylografter_tree_link', 'ott id missing', 'tnrs resolved ottolid', 'pruned unmapped', ] keys_to_lists = ['ot:tag'] all_known_tree_keys = special_keys + verbose_keys + valuable_tree_keys + detail_keys + to_ignore_keys num_importable = study_info['num_trees_importable'] for tree in tree_list: tid = tree['id'] phylografter_tree_link = tree['phylografter_tree_link'] output.write('''<h3>Tree %s</h3> <p><a href="%s">%s</a></p> ''' % (tid, phylografter_tree_link, phylografter_tree_link)) # STATUS output.write('''<p><b>Status: </b>%s</p> ''' % (tree['status']['text'])) r = tree['status']['reasons'] if r: output.write('''<table border="1"> ''' ) for row in r: rc = row['rc'] if rc == 'USER': _display_user_status_code(output, row) elif rc == 'DUPLICATE': _display_duplicate_taxon_status_code(output, row) elif rc == 'OVERLAPPING': _display_overlapping_taxon_status_code(output, row) elif rc in ['PRUNING', 'PRUNING_UNMAPPED']: _display_pruning_taxon_status_code(output, row) elif rc == 'TNRS': _display_tnrs_taxon_status_code(output, row) else: assert rc == 'NULLID' _display_null_taxon_status_code(output, row) output.write(''' </table> ''') # STATUS output.write('''<p><b>Info</b></p> ''') output.write('''<table border="1">\n ''') kl = tree.keys() for k in kl: if k not in all_known_tree_keys: warn('Unexpected key in tree: ' + k) for k in keys_to_lists: tl = tree.get(k) if tl is not None: tree[k] = '"%s"' % '", "'.join(tl) for k in valuable_tree_keys + detail_keys: try: v = tree[k] output.write(' <tr><td>%s</td><td>%s</td>\n' % (k, proc_val_for_html(v))) except: output.write(' <tr><td>%s</td><td>%s</td>\n' % (k, warn_html("Missing!"))) output.write(''' </table> <hr/> ''') if num_importable != 1: output.write('<p>' + warn_html(str(num_importable) + ' trees') + ' importable from this study!</p>\n') else: output.write('<p>1 tree importable from this study.</p>\n') output.write('''</body> </head> ''')
def process_treemachine_log_info(log_obj, nexson_obj, default_study_id): '''Returns study_info_dict, list_of_tree_info objects ''' nexson_obj.incoming_ottid_to_otu = get_ott_id_to_taxon_map(nexson_obj.otu_list) study = {} tree_list = [] context_el = None tree_id2tree = {} for el in log_obj: try: label = el['label'].lower() if label == 'otus': _add_unique(el, study, 'number', 'num_otus') context_el = study elif label == 'study tagged as deprecated. ignore.': study['Deprecated'] = 'Study tagged as deprecated.' elif label == 'processing tree': tree_list.append({}) context_el = tree_list[-1] context_el['tree_index'] = len(tree_list) - 1 _add_unique(el, context_el, '@id', 'id') tree_id2tree[context_el['id']] = context_el elif label =='tree tagged as deprecated. ignoring.': tid = el['@id'] tree_id2tree[tid]['deprecated'] = True elif label not in TO_IGNORE_LABELS: warn('Ignored label "%s"' % label) except: _process_tree_log_info(context_el, tree_list, tree_id2tree, el, nexson_obj) # move study info to study dict STUDY_INFO_PREFIXES = ['ot:study', 'ot:dataDeposit', 'ot:curatorName'] for tree in tree_list: for k in tree.keys(): is_study_info = False for p in STUDY_INFO_PREFIXES: if k.startswith(p): is_study_info = True if is_study_info: if k in study: assert study[k] == tree[k] else: study[k] = tree[k] del tree[k] # process NexSON and tm out and add some helpful fields to the dicts importable_count =0 phylografter_domain = 'http://www.reelab.net/phylografter/' for tree in tree_list: i = diagnose_tree_status(tree, nexson_obj) tree['importable_into_treemachine'] = 'true' if i else 'false' if i: importable_count += 1 tid = tree['id'] tree['phylografter_tree_link'] = phylografter_domain + 'stree/svgView/' + tid study['num_trees_importable'] = importable_count study['phylografter_study_id'] = study.get('ot:studyId', default_study_id) study['phylografter_study_link'] = phylografter_domain + 'study/view/' + study['phylografter_study_id'] return {'study_info': study, 'tree_list': tree_list}
def _process_tree_part_log_info(curr_tree, part_info, nexson_obj): prev_subtree = None for el in part_info: if isinstance(el, list): dup_dict = {} overlap_dict = {} for sub in el: label = sub['label'].lower() if label == 'matched anc': assert prev_subtree is not None prev_subtree['name'] = sub.get('name') prev_subtree['node'] = sub.get('node') elif label == 'duplicate': _update_keys(dup_dict, sub, ['name', 'OTT ID', 'nexsonid']) elif label == 'overlapping retained': _update_keys(overlap_dict.setdefault('retained', {}), sub, ['name', 'nexsonid']) elif label == 'overlapping pruned': _update_keys(overlap_dict.setdefault('pruned', {}), sub, ['name', 'nexsonid']) elif label not in TO_IGNORE_LABELS: warn('Ignored label "%s" in tree' % label) if len(dup_dict) > 0: _add_otu_from_node_nexsonid(dup_dict, dup_dict.get('nexsonid'), nexson_obj) curr_tree['reused_tip_labels'].append(dup_dict) if len(overlap_dict) > 0: r = overlap_dict['retained'] _add_otu_from_node_nexsonid(r, r.get('nexsonid'), nexson_obj) p = overlap_dict['pruned'] _add_otu_from_node_nexsonid(p, p.get('nexsonid'), nexson_obj) curr_tree['overlapping_tip_taxa'].append(overlap_dict) else: label = el['label'].lower() if label == 'taxon mapping': by_ott = curr_tree.setdefault('by_ott', {}) ott_id = el['OTT ID'] by_ott[el['OTT ID']] = {'taxonomy': el['taxonomy']} elif label == 'subtree': sd = {} # Change the following to the rvalue to retain the subtrees... => curr_tree.setdefault('subtrees', {}) n = el['newick'] prev_subtree = {} sd[n] = prev_subtree elif label == 'ott id reused in tree': curr_tree.setdefault('reused_tip_labels', []) elif label == 'overlapping tips': curr_tree.setdefault('overlapping_tip_taxa', []) elif label == 'pruning dups and overlapping': _add_otu_from_node_nexsonid(el, el.get('nexsonid'), nexson_obj) curr_tree.setdefault('pruned_dup', []).append(el) elif label == 'pruning unmapped': _add_otu_from_node_nexsonid(el, el.get('nexsonid'), nexson_obj) curr_tree.setdefault('pruned unmapped', []).append(el) elif label == 'error ottolid indexed to a null node!': curr_tree.setdefault('null_ottol_ids_in_final_mapping', []).append(el) elif label == 'null ott id for node': curr_tree.setdefault('null ott id for node', []).append(el) elif label == 'ott id missing': curr_tree.setdefault('ott id missing', []).append({'name': el['name'], 'nexsonid': el['nexsonid']}) elif label == 'tnrs resolved ottolid': nid = el['nexsonid'] searched_on = el['searched on'] ott_id = el['OTT ID'] name = el['name'] c = nexson_obj.__dict__.get('corrected', {}) node = nexson_obj.node_for_nexsonid(nid) nexsontree = nexson_obj.tree_nexsonid_to_tree[curr_tree['id']] oid2node = nexsontree.ottid_to_node_list nl = oid2node[node.otu.ott_id] assert node in nl nl.remove(node) new_otu_list = oid2node.get(ott_id) if new_otu_list is None or len(new_otu_list) == 0: o = OTU({'@id': None, '@label':name, 'meta':None}) o.original_label = searched_on o.ott_id = ott_id nexson_obj.otu_list.append(o) oid2node[ott_id] = [node] else: node.otu = new_otu_list[0].otu new_otu_list.append(node) curr_tree.setdefault('tnrs resolved ottolid', []).append({'searched on': searched_on, 'OTT ID': ott_id, 'name': name, 'nexsonid': nid}) elif label not in TO_IGNORE_LABELS: warn('Ignored label "%s" in part' % label)