Пример #1
0
def load_all_results(input_settings,
                     alg_settings,
                     output_settings,
                     prec_rec_str="",
                     **kwargs):
    """
    Load all of the results for the datasets and algs specified in the config file
    """
    df_all = pd.DataFrame()
    algs = get_algs_to_run(alg_settings, **kwargs)
    for dataset in input_settings['datasets']:
        if kwargs.get('most_specific_terms'):
            # load the ontology
            obo_file = "%s/%s" % (input_settings['input_dir'],
                                  dataset['obo_file'])
            go_dags = go_examples.parse_obo_file_and_build_dags(obo_file)
        for alg in algs:
            if alg not in alg_settings:
                print("%s not found in config file. Skipping" % (alg))
                continue
            alg_params = alg_settings[alg]
            curr_seed = kwargs.get('cv_seed')
            if 'cv-' in kwargs['exp_type']:
                for rep in range(1, kwargs.get('num_reps', 1) + 1):
                    if curr_seed is not None:
                        curr_seed += rep - 1
                    curr_exp_type = "%s-rep%s%s" % (
                        kwargs['exp_type'], rep, "-seed%s" %
                        (curr_seed) if curr_seed is not None else "")
                    df = load_alg_results(
                        dataset,
                        alg,
                        alg_params,
                        prec_rec_str=prec_rec_str,
                        results_dir=output_settings['output_dir'],
                        **kwargs,  #exp_type=curr_exp_type,
                        #only_terms=kwargs.get('only_terms'), postfix=kwargs.get('postfix',''),
                    )
                    add_dataset_settings(dataset, df)
                    df['rep'] = rep
                    df_all = pd.concat([df_all, df])
            else:
                df = load_alg_results(
                    dataset,
                    alg,
                    alg_params,
                    prec_rec_str=prec_rec_str,
                    results_dir=output_settings['output_dir'],
                    **kwargs,  #exp_type=kwargs['exp_type'],
                    #only_terms=kwargs.get('only_terms'), postfix=kwargs.get('postfix',''),
                )
                if kwargs.get('most_specific_terms') and len(df) > 0:
                    df = get_most_specific_sp_term_pairs(df, go_dags, **kwargs)
                    print("MAD: %s" % (mad(df['fmax'].values)))
                add_dataset_settings(dataset, df)
                df_all = pd.concat([df_all, df])
    return df_all
Пример #2
0
def setup_obo_dag_matrix(obo_file, goterms):
    """
    *goterms*: if a set of goterms are given, then limit the dag to 
        the sub-ontology which has the given terms. Currently just returns the DAG for the first term. 
        TODO allow for multiple
    """
    go_dags = go_examples.parse_obo_file_and_build_dags(obo_file)
    dag_matrix = None
    for h, dag in go_dags.items():
        t = list(goterms)[0]
        if not dag.has_node(t):
            continue
        dag_matrix, goids = build_hierarchy_matrix(dag, goterms, h=h)
    if dag_matrix is None:
        print("ERROR: term %s not found in any of the sub-ontologies" % (t))
        sys.exit("Quitting")
    else:
        return dag_matrix, goids
Пример #3
0
def setup_h_ann_matrices(prots, obo_file, pos_neg_file, goterms=None):
    # parse the go_dags first as it also sets up the goid_to_category dictionary
    go_dags = go_examples.parse_obo_file_and_build_dags(obo_file)

    goids = []
    # TODO build a matrix with the direct annotations (i.e., from the gaf file)
    # for now, just use all of the propagated annotations
    # and then evaluate using the scores
    #for pos_neg_file in pos_neg_files:
    if 'bp' in pos_neg_file:
        h = 'bp'
    elif 'mf' in pos_neg_file:
        h = 'mf'
    elif 'cc' in pos_neg_file:
        h = 'cc'
    dag_matrix, ann_matrix, goids = build_h_ann_matrices(
        prots, go_dags, pos_neg_file=pos_neg_file, h=h, goterms=goterms)

    return dag_matrix, ann_matrix, goids
def main(sparse_net_file, obo_file, pos_neg_file=None, gaf_file=None, ignore_ec=["IEA"],
         alpha=.5, theta=.5, mu=.5, h="bp", out_pref=None):

    W, prots = alg_utils.setup_sparse_network(sparse_net_file)
    # parse the go_dags first as it also sets up the goid_to_category dictionary
    go_dags = go_examples.parse_obo_file_and_build_dags(obo_file)

    dag_matrix, ann_matrix, goids = build_h_ann_matrices(prots, go_dags, pos_neg_file=pos_neg_file, gaf_file=gaf_file, h='bp')
    # make sure they're type float so matlab will parse them correctly
    sparse_net = W.astype('float') 
    ann_matrix = ann_matrix.astype('float') 
    dag_matrix = dag_matrix.astype('float')

    if out_pref is not None:
        out_file = "%s%s-annotations-and-go-dag.mat" % (out_pref, h)
        utils.checkDir(os.path.dirname(out_file))

        print("\twriting graph, annotation, and hierarchy matrices to %s" % (out_file))
        # write these to a file to run the matlab BirgRank 
        savemat(out_file, {"G": sparse_net, "R": ann_matrix, "H": dag_matrix}, do_compression=True)

        goids_file = "%s%s-goids.txt" % (out_pref, h)
        print("\twriting goids to %s" % (goids_file))
        with open(goids_file, 'w') as out:
            out.write(''.join("%s\n" % (goid) for goid in goids))

    run_birgrank = True 
    if run_birgrank is True:
        Xh = birgRank(sparse_net, ann_matrix.transpose(), dag_matrix, alpha=.5, theta=.5, mu=.5, eps=0.0001, max_iters=1000, verbose=True)
        Xh = Xh.T
        print(Xh.shape)

        out_file = "%s%s-pred-scores.txt" % (out_pref, h)
        print("\twriting scores to %s" % (out_file))
        # write the results for a single GO term
        with open(out_file, 'w') as out:
            for i in range(Xh.shape[0]):
                print("writing results for goterm %s" % (goids[i]))
                out.write(''.join("%s\t%s\t%s\n" % (goids[i], prots[j], score) for j, score in enumerate(Xh[i].toarray().flatten())))
                break
    return