示例#1
0
def download_nexson_from_phylografter(paths, download_db, lock_policy):
    DOMAIN = os.environ.get('PHYLOGRAFTER_DOMAIN_PREF')
    if DOMAIN is None:
        DOMAIN = 'http://www.reelab.net/phylografter'

    headers = {
        'accept-encoding': 'gzip',
        'content-type': 'application/json',
        'accept': 'application/json',
    }
    nexson = paths['nexson']
    lockfile = nexson + '.lock'
    was_locked, owns_lock = lock_policy.wait_for_lock(lockfile)
    try:
        if not owns_lock:
            return False
        study = os.path.split(nexson)[-1]
        if VERBOSE:
            sys.stderr.write('Downloading %s...\n' % study)
        SUBMIT_URI = DOMAIN + '/study/export_gzipNexSON.json/' + study
        resp = requests.get(SUBMIT_URI, headers=headers, allow_redirects=True)
        resp.raise_for_status()
        try:
            uncompressed = gzip.GzipFile(mode='rb',
                                         fileobj=StringIO(
                                             resp.content)).read()
            results = uncompressed
        except:
            raise
        if isinstance(results, unicode) or isinstance(results, str):
            er = json.loads(results)
        else:
            raise RuntimeError('Non gzipped response, but not a string is:',
                               results)
        should_write = False
        if not os.path.exists(study):
            should_write = True
        else:
            prev_content = json.load(open(study, 'rU'))
            if prev_content != er:
                should_write = True
        if should_write:
            store_state_JSON(er, study)
        if download_db is not None:
            try:
                download_db['studies'].remove(int(study))
            except:
                warn('%s not in %s' % (study, paths['nexson_state_db']))
                pass
            else:
                store_state_JSON(download_db, paths['nexson_state_db'])
    finally:
        lock_policy.remove_lock()
    return True
def download_nexson_from_phylografter(paths, download_db, lock_policy):
    DOMAIN = os.environ.get('PHYLOGRAFTER_DOMAIN_PREF')
    if DOMAIN is None:
        DOMAIN = 'http://www.reelab.net/phylografter'

    headers = {
            'accept-encoding' : 'gzip',
            'content-type' : 'application/json',
            'accept' : 'application/json',
        }
    nexson = paths['nexson']
    lockfile = nexson + '.lock'
    was_locked, owns_lock = lock_policy.wait_for_lock(lockfile)
    try:
        if not owns_lock:
            return False
        study = os.path.split(nexson)[-1]
        if VERBOSE:
            sys.stderr.write('Downloading %s...\n' % study)
        SUBMIT_URI = DOMAIN + '/study/export_gzipNexSON.json/' + study
        resp = requests.get(SUBMIT_URI,
                         headers=headers,
                         allow_redirects=True)
        resp.raise_for_status()
        try:
            uncompressed = gzip.GzipFile(mode='rb', fileobj=StringIO(resp.content)).read()
            results = uncompressed
        except:
            raise 
        if isinstance(results, unicode) or isinstance(results, str):
            er = json.loads(results)
        else:
            raise RuntimeError('Non gzipped response, but not a string is:', results)
        should_write = False
        if not os.path.exists(study):
            should_write = True
        else:
            prev_content = json.load(open(study, 'rU'))
            if prev_content != er:
                should_write = True
        if should_write:
            store_state_JSON(er, study)
        if download_db is not None:
            try:
                download_db['studies'].remove(int(study))
            except:
                warn('%s not in %s' % (study, paths['nexson_state_db']))
                pass
            else:
                store_state_JSON(download_db, paths['nexson_state_db'])
    finally:
        lock_policy.remove_lock()
    return True
示例#3
0
def target_is_dirty(src_path_list, dest_path_list, trigger=None):
    if bool(trigger):
        return True
    for p in src_path_list:
        if not os.path.exists(p):
            warn('Source path "%s" does not exist' % p)
            return True
    for dest_path in dest_path_list:
        if not os.path.exists(dest_path):
            return True
    smt = max([os.path.getmtime(i) for i in src_path_list])
    dmt = min([os.path.getmtime(i) for i in dest_path_list])
    return (smt >= dmt)
def target_is_dirty(src_path_list, dest_path_list, trigger=None):
    if bool(trigger):
        return True
    for p in src_path_list:
        if not os.path.exists(p):
            warn('Source path "%s" does not exist' % p)
            return True
    for dest_path in dest_path_list:
        if not os.path.exists(dest_path):
            return True
    smt = max([os.path.getmtime(i) for i in src_path_list])
    dmt = min([os.path.getmtime(i) for i in dest_path_list])
    return (smt >= dmt)
示例#5
0
def _process_tree_log_info(curr_tree, tree_list, tree_id2tree, tree_notes,
                           nexson_obj):
    TREE_INFO_LABELS = (
        'tree info',
        'ingested tree',
        'property added',
    )
    TREE_ID_BEARING_LABELS = ('checking for uniqueness of ott ids',
                              'name fixing on tree',
                              'checking if tree could be added to graph',
                              'null or duplicate names. skipping tree')
    for el in tree_notes:
        try:
            label = el['label'].lower()
        except:
            _process_tree_part_log_info(curr_tree, el, nexson_obj)
        else:
            if label in TREE_INFO_LABELS:
                for k, v in el.items():
                    if k != 'label':
                        if k == 'ot:tag':
                            curr_tree.setdefault(k, []).append(v)
                        else:
                            if k in curr_tree:
                                warn('Overwriting "%s" -> "%s" with value "%s"'
                                     % (k, curr_tree[k], v))
                            curr_tree[k] = v
            elif label in TREE_ID_BEARING_LABELS:  # check index in case a tree has been omitted...
                tid = el['tree id']
                if tid != curr_tree['id']:
                    debug('%s swapping curr_tree to tree with id %s' %
                          (str(el), tid))
                    curr_tree = tree_id2tree[tid]
                if label == 'null or duplicate names. skipping tree':
                    curr_tree['can be ingested'] = False
            elif label == 'all nodes have ottolids':
                curr_tree['had_ott_ids_before_tnrs'] = True
            elif label == 'postpruning newick':
                curr_tree['postpruning newick'] = el['tree']
            elif label not in TO_IGNORE_LABELS:
                warn('Ignored label "%s" in tree' % label)
def _process_tree_log_info(curr_tree, tree_list, tree_id2tree, tree_notes, nexson_obj):
    TREE_INFO_LABELS = ('tree info',
                        'ingested tree',
                        'property added',
                        )
    TREE_ID_BEARING_LABELS = ('checking for uniqueness of ott ids',
                              'name fixing on tree',
                              'checking if tree could be added to graph',
                              'null or duplicate names. skipping tree'
                                )
    for el in tree_notes:
        try:
            label = el['label'].lower()
        except:
            _process_tree_part_log_info(curr_tree, el, nexson_obj)
        else:
            if label in TREE_INFO_LABELS:
                for k, v in el.items():
                    if k != 'label':
                        if k == 'ot:tag':
                            curr_tree.setdefault(k, []).append(v)
                        else:
                            if k in curr_tree:
                                warn('Overwriting "%s" -> "%s" with value "%s"' %(k, curr_tree[k], v))
                            curr_tree[k] = v
            elif label in TREE_ID_BEARING_LABELS: # check index in case a tree has been omitted...
                tid = el['tree id']
                if tid != curr_tree['id']:
                    debug('%s swapping curr_tree to tree with id %s' % (str(el), tid))
                    curr_tree = tree_id2tree[tid]
                if label == 'null or duplicate names. skipping tree':
                    curr_tree['can be ingested'] = False
            elif label == 'all nodes have ottolids':
                curr_tree['had_ott_ids_before_tnrs'] = True
            elif label == 'postpruning newick':
                curr_tree['postpruning newick'] = el['tree']
            elif label not in TO_IGNORE_LABELS:
                warn('Ignored label "%s" in tree' % label)
示例#7
0
def write_status_obj_as_html(status_obj, output):
    '''
    Writes the status html

    Takes:
        `status_obj` a dict parsed from "status JSON"
        `output` a file-like obj
    '''
    study_info = status_obj['study_info']
    tree_list = status_obj['tree_list']
    phylografter_study_id = study_info['phylografter_study_id']
    phylografter_study_link = study_info['phylografter_study_link']

    output.write('''<html>
<head>
    <title>Snaphsot of treemachine status for phylografter study %s</title>
</head>''' % phylografter_study_id)
    output.write('''<body>
<h2>Study Info</h2>
    <p><a href="%s">%s</a></p>
    <table border="1">
''' % (phylografter_study_link, phylografter_study_link))
    kl = study_info.keys()
    kl.sort()
    for k in kl:
        v = study_info[k]
        output.write('        <tr><td>' + str(k) + '</td><td>' +
                     proc_val_for_html(v) + '</td>\n')
    output.write('''    </table>
''')

    ##############################3
    # Write tree info
    ##############################3
    to_ignore_keys = [
        'tree_index',
    ]
    detail_keys = [
        'number of external nodes',
        'number edges',
        'number nodes',
        'ot:tag',  # [tag, tag]
    ]
    verbose_keys = [
        'postpruning newick',  # newick string
        'subtrees',  # dict. Keys newick -> dict with name and node keys
        'by_ott'  # {ottID -> {'taxonomy', }}
    ]
    valuable_tree_keys = [
        'id', 'ot:branchLengthMode', 'ingroup', 'ot:inGroupClade',
        'ot:focalClade'
    ]
    special_keys = [
        'status',
        'had_ott_ids_before_tnrs',  # bool
        'deprecated',  # bool
        'reused_tip_labels',  #  
        'pruned_dup',  #  = [name, name, ...]
        'null_ottol_ids_in_final_mapping',  # ['original name']
        'null ott id for node',  # ['name']
        'overlapping_tip_taxa',
        'importable_into_treemachine',
        'phylografter_tree_link',
        'ott id missing',
        'tnrs resolved ottolid',
        'pruned unmapped',
    ]
    keys_to_lists = ['ot:tag']
    all_known_tree_keys = special_keys + verbose_keys + valuable_tree_keys + detail_keys + to_ignore_keys
    num_importable = study_info['num_trees_importable']
    for tree in tree_list:
        tid = tree['id']
        phylografter_tree_link = tree['phylografter_tree_link']
        output.write('''<h3>Tree %s</h3>
    <p><a href="%s">%s</a></p>
    ''' % (tid, phylografter_tree_link, phylografter_tree_link))
        # STATUS
        output.write('''<p><b>Status: </b>%s</p>
    ''' % (tree['status']['text']))
        r = tree['status']['reasons']
        if r:
            output.write('''<table border="1">
''')
        for row in r:
            rc = row['rc']
            if rc == 'USER':
                _display_user_status_code(output, row)
            elif rc == 'DUPLICATE':
                _display_duplicate_taxon_status_code(output, row)
            elif rc == 'OVERLAPPING':
                _display_overlapping_taxon_status_code(output, row)
            elif rc in ['PRUNING', 'PRUNING_UNMAPPED']:
                _display_pruning_taxon_status_code(output, row)
            elif rc == 'TNRS':
                _display_tnrs_taxon_status_code(output, row)
            else:
                assert rc == 'NULLID'
                _display_null_taxon_status_code(output, row)
        output.write('''    </table>
''')

        # STATUS
        output.write('''<p><b>Info</b></p>
    ''')
        output.write('''<table border="1">\n    ''')
        kl = tree.keys()
        for k in kl:
            if k not in all_known_tree_keys:
                warn('Unexpected key in tree: ' + k)
        for k in keys_to_lists:
            tl = tree.get(k)
            if tl is not None:
                tree[k] = '"%s"' % '", "'.join(tl)
        for k in valuable_tree_keys + detail_keys:
            try:
                v = tree[k]
                output.write('        <tr><td>%s</td><td>%s</td>\n' %
                             (k, proc_val_for_html(v)))
            except:
                output.write('        <tr><td>%s</td><td>%s</td>\n' %
                             (k, warn_html("Missing!")))
        output.write('''    </table>
    <hr/>
''')
    if num_importable != 1:
        output.write('<p>' + warn_html(str(num_importable) + ' trees') +
                     ' importable from this study!</p>\n')
    else:
        output.write('<p>1 tree importable from this study.</p>\n')
    output.write('''</body>
</head>
''')
示例#8
0
def process_treemachine_log_info(log_obj, nexson_obj, default_study_id):
    '''Returns study_info_dict, list_of_tree_info objects
    '''
    nexson_obj.incoming_ottid_to_otu = get_ott_id_to_taxon_map(
        nexson_obj.otu_list)
    study = {}
    tree_list = []
    context_el = None
    tree_id2tree = {}
    for el in log_obj:
        try:
            label = el['label'].lower()
            if label == 'otus':
                _add_unique(el, study, 'number', 'num_otus')
                context_el = study
            elif label == 'study tagged as deprecated. ignore.':
                study['Deprecated'] = 'Study tagged as deprecated.'
            elif label == 'processing tree':
                tree_list.append({})
                context_el = tree_list[-1]
                context_el['tree_index'] = len(tree_list) - 1
                _add_unique(el, context_el, '@id', 'id')
                tree_id2tree[context_el['id']] = context_el
            elif label == 'tree tagged as deprecated. ignoring.':
                tid = el['@id']
                tree_id2tree[tid]['deprecated'] = True
            elif label not in TO_IGNORE_LABELS:
                warn('Ignored label "%s"' % label)
        except:
            _process_tree_log_info(context_el, tree_list, tree_id2tree, el,
                                   nexson_obj)

    # move study info to study dict
    STUDY_INFO_PREFIXES = ['ot:study', 'ot:dataDeposit', 'ot:curatorName']
    for tree in tree_list:
        for k in tree.keys():
            is_study_info = False
            for p in STUDY_INFO_PREFIXES:
                if k.startswith(p):
                    is_study_info = True
            if is_study_info:
                if k in study:
                    assert study[k] == tree[k]
                else:
                    study[k] = tree[k]
                del tree[k]

    # process NexSON and tm out and add some helpful fields to the dicts
    importable_count = 0
    phylografter_domain = 'http://www.reelab.net/phylografter/'
    for tree in tree_list:
        i = diagnose_tree_status(tree, nexson_obj)
        tree['importable_into_treemachine'] = 'true' if i else 'false'
        if i:
            importable_count += 1
        tid = tree['id']
        tree[
            'phylografter_tree_link'] = phylografter_domain + 'stree/svgView/' + tid
    study['num_trees_importable'] = importable_count

    study['phylografter_study_id'] = study.get('ot:studyId', default_study_id)
    study[
        'phylografter_study_link'] = phylografter_domain + 'study/view/' + study[
            'phylografter_study_id']
    return {'study_info': study, 'tree_list': tree_list}
示例#9
0
def _process_tree_part_log_info(curr_tree, part_info, nexson_obj):
    prev_subtree = None
    for el in part_info:
        if isinstance(el, list):
            dup_dict = {}
            overlap_dict = {}
            for sub in el:
                label = sub['label'].lower()
                if label == 'matched anc':
                    assert prev_subtree is not None
                    prev_subtree['name'] = sub.get('name')
                    prev_subtree['node'] = sub.get('node')
                elif label == 'duplicate':
                    _update_keys(dup_dict, sub, ['name', 'OTT ID', 'nexsonid'])
                elif label == 'overlapping retained':
                    _update_keys(overlap_dict.setdefault('retained', {}), sub,
                                 ['name', 'nexsonid'])
                elif label == 'overlapping pruned':
                    _update_keys(overlap_dict.setdefault('pruned', {}), sub,
                                 ['name', 'nexsonid'])
                elif label not in TO_IGNORE_LABELS:
                    warn('Ignored label "%s" in tree' % label)
            if len(dup_dict) > 0:
                _add_otu_from_node_nexsonid(dup_dict, dup_dict.get('nexsonid'),
                                            nexson_obj)
                curr_tree['reused_tip_labels'].append(dup_dict)
            if len(overlap_dict) > 0:
                r = overlap_dict['retained']
                _add_otu_from_node_nexsonid(r, r.get('nexsonid'), nexson_obj)
                p = overlap_dict['pruned']
                _add_otu_from_node_nexsonid(p, p.get('nexsonid'), nexson_obj)
                curr_tree['overlapping_tip_taxa'].append(overlap_dict)
        else:
            label = el['label'].lower()
            if label == 'taxon mapping':
                by_ott = curr_tree.setdefault('by_ott', {})
                ott_id = el['OTT ID']
                by_ott[el['OTT ID']] = {'taxonomy': el['taxonomy']}
            elif label == 'subtree':
                sd = {
                }  # Change the following to the rvalue to retain the subtrees... => curr_tree.setdefault('subtrees', {})
                n = el['newick']
                prev_subtree = {}
                sd[n] = prev_subtree
            elif label == 'ott id reused in tree':
                curr_tree.setdefault('reused_tip_labels', [])
            elif label == 'overlapping tips':
                curr_tree.setdefault('overlapping_tip_taxa', [])
            elif label == 'pruning dups and overlapping':
                _add_otu_from_node_nexsonid(el, el.get('nexsonid'), nexson_obj)
                curr_tree.setdefault('pruned_dup', []).append(el)
            elif label == 'pruning unmapped':
                _add_otu_from_node_nexsonid(el, el.get('nexsonid'), nexson_obj)
                curr_tree.setdefault('pruned unmapped', []).append(el)
            elif label == 'error ottolid indexed to a null node!':
                curr_tree.setdefault('null_ottol_ids_in_final_mapping',
                                     []).append(el)
            elif label == 'null ott id for node':
                curr_tree.setdefault('null ott id for node', []).append(el)
            elif label == 'ott id missing':
                curr_tree.setdefault('ott id missing', []).append({
                    'name':
                    el['name'],
                    'nexsonid':
                    el['nexsonid']
                })
            elif label == 'tnrs resolved ottolid':
                nid = el['nexsonid']
                searched_on = el['searched on']
                ott_id = el['OTT ID']
                name = el['name']
                c = nexson_obj.__dict__.get('corrected', {})
                node = nexson_obj.node_for_nexsonid(nid)
                nexsontree = nexson_obj.tree_nexsonid_to_tree[curr_tree['id']]
                oid2node = nexsontree.ottid_to_node_list
                nl = oid2node[node.otu.ott_id]
                assert node in nl
                nl.remove(node)
                new_otu_list = oid2node.get(ott_id)
                if new_otu_list is None or len(new_otu_list) == 0:
                    o = OTU({'@id': None, '@label': name, 'meta': None})
                    o.original_label = searched_on
                    o.ott_id = ott_id
                    nexson_obj.otu_list.append(o)
                    oid2node[ott_id] = [node]
                else:
                    node.otu = new_otu_list[0].otu
                    new_otu_list.append(node)
                curr_tree.setdefault('tnrs resolved ottolid', []).append({
                    'searched on':
                    searched_on,
                    'OTT ID':
                    ott_id,
                    'name':
                    name,
                    'nexsonid':
                    nid
                })
            elif label not in TO_IGNORE_LABELS:
                warn('Ignored label "%s" in part' % label)
def write_status_obj_as_html(status_obj, output):
    '''
    Writes the status html

    Takes:
        `status_obj` a dict parsed from "status JSON"
        `output` a file-like obj
    '''
    study_info= status_obj['study_info']
    tree_list = status_obj['tree_list']
    phylografter_study_id = study_info['phylografter_study_id']
    phylografter_study_link = study_info['phylografter_study_link']

    output.write('''<html>
<head>
    <title>Snaphsot of treemachine status for phylografter study %s</title>
</head>''' % phylografter_study_id)
    output.write('''<body>
<h2>Study Info</h2>
    <p><a href="%s">%s</a></p>
    <table border="1">
''' % (phylografter_study_link, phylografter_study_link))
    kl = study_info.keys()
    kl.sort()
    for k in kl:
        v = study_info[k]
        output.write('        <tr><td>' + str(k) + '</td><td>' + proc_val_for_html(v) + '</td>\n') 
    output.write('''    </table>
''')

    ##############################3
    # Write tree info
    ##############################3
    to_ignore_keys =[ 'tree_index', ]
    detail_keys = ['number of external nodes', 
                   'number edges',
                   'number nodes',
                   'ot:tag', # [tag, tag]
                          ]
    verbose_keys = ['postpruning newick', # newick string
                    'subtrees', # dict. Keys newick -> dict with name and node keys
                    'by_ott' # {ottID -> {'taxonomy', }}
                   ]
    valuable_tree_keys = ['id', 
                          'ot:branchLengthMode', 
                          'ingroup', 
                          'ot:inGroupClade',
                          'ot:focalClade']
    special_keys = ['status',
                    'had_ott_ids_before_tnrs', # bool
                    'deprecated', # bool
                    'reused_tip_labels', #  
                    'pruned_dup', #  = [name, name, ...]
                    'null_ottol_ids_in_final_mapping', # ['original name']
                    'null ott id for node', # ['name']
                    'overlapping_tip_taxa', 
                    'importable_into_treemachine',
                    'phylografter_tree_link',
                    'ott id missing',
                    'tnrs resolved ottolid',
                    'pruned unmapped',
                    ]
    keys_to_lists = ['ot:tag']
    all_known_tree_keys = special_keys + verbose_keys + valuable_tree_keys + detail_keys + to_ignore_keys
    num_importable = study_info['num_trees_importable']
    for tree in tree_list:
        tid = tree['id']
        phylografter_tree_link = tree['phylografter_tree_link']
        output.write('''<h3>Tree %s</h3>
    <p><a href="%s">%s</a></p>
    ''' % (tid, phylografter_tree_link, phylografter_tree_link))
        # STATUS
        output.write('''<p><b>Status: </b>%s</p>
    ''' % (tree['status']['text']))
        r = tree['status']['reasons']
        if r:
            output.write('''<table border="1">
''' )
        for row in r:
            rc = row['rc']
            if rc == 'USER':
                _display_user_status_code(output, row)
            elif rc == 'DUPLICATE':
                _display_duplicate_taxon_status_code(output, row)
            elif rc == 'OVERLAPPING':
                _display_overlapping_taxon_status_code(output, row)
            elif rc in ['PRUNING', 'PRUNING_UNMAPPED']:
                _display_pruning_taxon_status_code(output, row)
            elif rc == 'TNRS':
                _display_tnrs_taxon_status_code(output, row)
            else:
                assert rc == 'NULLID'
                _display_null_taxon_status_code(output, row)
        output.write('''    </table>
''')

        # STATUS
        output.write('''<p><b>Info</b></p>
    ''')
        output.write('''<table border="1">\n    ''')
        kl = tree.keys()
        for k in kl:
            if k not in all_known_tree_keys:
                warn('Unexpected key in tree: ' + k)
        for k in keys_to_lists:
            tl = tree.get(k)
            if tl is not None:
                tree[k] = '"%s"' % '", "'.join(tl)
        for k in valuable_tree_keys + detail_keys:
            try:
                v = tree[k]
                output.write('        <tr><td>%s</td><td>%s</td>\n' % (k, proc_val_for_html(v))) 
            except:
                output.write('        <tr><td>%s</td><td>%s</td>\n' % (k, warn_html("Missing!"))) 
        output.write('''    </table>
    <hr/>
''')
    if num_importable != 1:
        output.write('<p>' + warn_html(str(num_importable) + ' trees') + ' importable from this study!</p>\n')
    else:
        output.write('<p>1 tree importable from this study.</p>\n')
    output.write('''</body>
</head>
''')
def process_treemachine_log_info(log_obj, nexson_obj, default_study_id):
    '''Returns study_info_dict, list_of_tree_info objects
    '''
    nexson_obj.incoming_ottid_to_otu = get_ott_id_to_taxon_map(nexson_obj.otu_list)
    study = {}
    tree_list = []
    context_el = None
    tree_id2tree = {}
    for el in log_obj:
        try:
            label = el['label'].lower()
            if label == 'otus':
                _add_unique(el, study, 'number', 'num_otus')
                context_el = study
            elif label == 'study tagged as deprecated. ignore.':
                study['Deprecated'] = 'Study tagged as deprecated.'
            elif label == 'processing tree':
                tree_list.append({})
                context_el = tree_list[-1]
                context_el['tree_index'] = len(tree_list) - 1
                _add_unique(el, context_el, '@id', 'id')
                tree_id2tree[context_el['id']] = context_el
            elif label =='tree tagged as deprecated. ignoring.':
                tid = el['@id']
                tree_id2tree[tid]['deprecated'] = True
            elif label not in TO_IGNORE_LABELS:
                warn('Ignored label "%s"' % label)
        except:
            _process_tree_log_info(context_el, tree_list, tree_id2tree, el, nexson_obj)
    
    # move study info to study dict
    STUDY_INFO_PREFIXES = ['ot:study', 'ot:dataDeposit', 'ot:curatorName']
    for tree in tree_list:
        for k in tree.keys():
            is_study_info = False
            for p in STUDY_INFO_PREFIXES:
                if k.startswith(p):
                    is_study_info = True
            if is_study_info:
                if k in study:
                    assert study[k] == tree[k]
                else:
                    study[k] = tree[k]
                del tree[k]

    # process NexSON and tm out and add some helpful fields to the dicts
    importable_count =0 
    phylografter_domain = 'http://www.reelab.net/phylografter/'
    for tree in tree_list:
        i = diagnose_tree_status(tree, nexson_obj)
        tree['importable_into_treemachine'] = 'true' if i else 'false'
        if i:
            importable_count += 1
        tid = tree['id']
        tree['phylografter_tree_link'] = phylografter_domain + 'stree/svgView/' + tid
    study['num_trees_importable'] = importable_count

    study['phylografter_study_id'] = study.get('ot:studyId', default_study_id)
    study['phylografter_study_link'] = phylografter_domain + 'study/view/' + study['phylografter_study_id']
    return {'study_info': study,
            'tree_list': tree_list}
def _process_tree_part_log_info(curr_tree, part_info, nexson_obj):
    prev_subtree = None
    for el in part_info:
        if isinstance(el, list):
            dup_dict = {}
            overlap_dict = {}
            for sub in el:
                label = sub['label'].lower()
                if label == 'matched anc':
                    assert prev_subtree is not None
                    prev_subtree['name'] = sub.get('name')
                    prev_subtree['node'] = sub.get('node')
                elif label == 'duplicate':
                    _update_keys(dup_dict, sub, ['name', 'OTT ID', 'nexsonid'])
                elif label == 'overlapping retained':
                    _update_keys(overlap_dict.setdefault('retained', {}), sub, ['name', 'nexsonid'])
                elif label == 'overlapping pruned':
                    _update_keys(overlap_dict.setdefault('pruned', {}), sub, ['name', 'nexsonid'])
                elif label not in TO_IGNORE_LABELS:
                    warn('Ignored label "%s" in tree' % label)
            if len(dup_dict) > 0:
                _add_otu_from_node_nexsonid(dup_dict, dup_dict.get('nexsonid'), nexson_obj)
                curr_tree['reused_tip_labels'].append(dup_dict)
            if len(overlap_dict) > 0:
                r = overlap_dict['retained']
                _add_otu_from_node_nexsonid(r, r.get('nexsonid'), nexson_obj)
                p = overlap_dict['pruned']
                _add_otu_from_node_nexsonid(p, p.get('nexsonid'), nexson_obj)
                curr_tree['overlapping_tip_taxa'].append(overlap_dict)
        else:
            label = el['label'].lower()
            if label == 'taxon mapping':
                by_ott = curr_tree.setdefault('by_ott', {})
                ott_id = el['OTT ID']
                by_ott[el['OTT ID']] = {'taxonomy': el['taxonomy']}
            elif label == 'subtree':
                sd = {} # Change the following to the rvalue to retain the subtrees... => curr_tree.setdefault('subtrees', {})
                n = el['newick']
                prev_subtree = {}
                sd[n] = prev_subtree
            elif label == 'ott id reused in tree':
                curr_tree.setdefault('reused_tip_labels', [])
            elif label == 'overlapping tips':
                curr_tree.setdefault('overlapping_tip_taxa', [])
            elif label == 'pruning dups and overlapping':
                _add_otu_from_node_nexsonid(el, el.get('nexsonid'), nexson_obj)
                curr_tree.setdefault('pruned_dup', []).append(el)
            elif label == 'pruning unmapped':
                _add_otu_from_node_nexsonid(el, el.get('nexsonid'), nexson_obj)
                curr_tree.setdefault('pruned unmapped', []).append(el)
            elif label == 'error ottolid indexed to a null node!':
                curr_tree.setdefault('null_ottol_ids_in_final_mapping', []).append(el)
            elif label == 'null ott id for node':
                curr_tree.setdefault('null ott id for node', []).append(el)
            elif label == 'ott id missing':
                curr_tree.setdefault('ott id missing', []).append({'name': el['name'], 'nexsonid': el['nexsonid']})
            elif label == 'tnrs resolved ottolid':
                nid = el['nexsonid']
                searched_on = el['searched on']
                ott_id = el['OTT ID']
                name = el['name']
                c = nexson_obj.__dict__.get('corrected', {})
                node = nexson_obj.node_for_nexsonid(nid)
                nexsontree = nexson_obj.tree_nexsonid_to_tree[curr_tree['id']]
                oid2node = nexsontree.ottid_to_node_list
                nl = oid2node[node.otu.ott_id]
                assert node in nl
                nl.remove(node)
                new_otu_list = oid2node.get(ott_id)
                if new_otu_list is None or len(new_otu_list) == 0:
                    o = OTU({'@id': None, '@label':name, 'meta':None})
                    o.original_label = searched_on
                    o.ott_id = ott_id
                    nexson_obj.otu_list.append(o)
                    oid2node[ott_id] = [node]
                else:
                    node.otu = new_otu_list[0].otu
                    new_otu_list.append(node)
                curr_tree.setdefault('tnrs resolved ottolid', []).append({'searched on': searched_on, 
                                                                          'OTT ID': ott_id,
                                                                          'name': name,
                                                                          'nexsonid': nid})
            elif label not in TO_IGNORE_LABELS:
                warn('Ignored label "%s" in part' % label)