示例#1
0
def create_sparse_net_file(out_pref,
                           net_files=[],
                           string_net_files=[],
                           string_nets=STRING_NETWORKS,
                           string_cutoff=None,
                           forcenet=False):
    if net_files is None:
        net_files = []
    # if there aren't any string net files, then set the string nets to empty
    if len(string_net_files) == 0:
        string_nets = []
    # if there are string_net_files, and string_nets is None, set it back to its default
    elif string_nets is None:
        string_nets = STRING_NETWORKS
    string_nets = list(string_nets)
    num_networks = len(net_files) + len(string_nets)
    # if there is only 1 string network, then write the name instead of the number
    if len(string_nets) == 1:
        num_networks = list(string_nets)[0]
    sparse_nets_file = "%s%s-sparse-nets.mat" % (out_pref, num_networks)
    # the node IDs should be the same for each of the networks,
    # so no need to include the # in the ids file
    node_ids_file = "%snode-ids.txt" % (out_pref)
    net_names_file = "%s%s-net-names.txt" % (out_pref, num_networks)
    if forcenet is False \
       and os.path.isfile(sparse_nets_file) and os.path.isfile(node_ids_file) \
       and os.path.isfile(net_names_file):
        # read the files
        print("\treading sparse nets from %s" % (sparse_nets_file))
        sparse_networks = list(loadmat(sparse_nets_file)['Networks'][0])
        print("\treading node ids file from %s" % (node_ids_file))
        nodes = utils.readItemList(node_ids_file, 1)
        print("\treading network_names from %s" % (net_names_file))
        network_names = utils.readItemList(net_names_file, 1)

    else:
        print("\tcreating sparse nets and writing to %s" % (sparse_nets_file))
        sparse_networks, network_names, nodes = setup_sparse_networks(
            net_files=net_files,
            string_net_files=string_net_files,
            string_nets=string_nets,
            string_cutoff=string_cutoff)

        # now write them to a file
        write_sparse_net_file(sparse_networks, sparse_nets_file, network_names,
                              net_names_file, nodes, node_ids_file)

    return sparse_networks, network_names, nodes
    'default outputs/version/weighted/plots/edge-weights/edge-weight-dist-version.png.'
)
#parser.add_option('-o', '--out-file', type='string', default="viz/assays/zscore-rectfs-assays.png",
#        help='path/to/output_file.png. Default:')
parser.add_option('',
                  '--pdf',
                  action='store_true',
                  help='Also store a pdf of the figures')
(opts, args) = parser.parse_args()

for i, version in enumerate(opts.version):
    print("")
    print("-" * 30)
    t_settings.set_version(version)
    chemicals = sorted(
        utils.readItemList("%s/chemicals.txt" % (t_settings.INPUTSPREFIX)))

    interactome = t_settings.INTERACTOME

    print(
        "Getting the weight, cost and direction of each edge from the interactome %s"
        % (interactome))
    lines = utils.readColumns(interactome, 1, 2, 3, 4)
    edge_weights = {(u, v): float(w) for u, v, w, d in lines}
    edge_dir = {(u,v): True if d.lower() in ['true','t','dir','directed'] else False \
                for u,v,w,d in lines}

    # get the evidence from get_interaction_evidence.py
    #evidence_file = getev.getEvidenceFile(evidence_version, t_settings.DATADIR)
    #edge_dir = getev.getEdgeDir(edge_weights.keys(), evidence_file, split_family_nodes=False, add_ev_to_family_edges=False)
    # TODO try just the directed edges
    # parse the command line arguments
    (opts, args) = parser.parse_args()
    return opts


opts = parseArguments()

#if __name__ != "__main__":
#    os.chdir("/data/jeff-law/projects/2016-02-toxcast/")

for version in opts.version:
    print("")
    print("-"*30)
    t_settings.set_version(version)
    chemicals = sorted(readItemList("%s/chemicals.txt" % (t_settings.INPUTSPREFIX)))
    #sig_chemicals = utils.readItemList("inputs/%s/sig-chemicals.txt" % (version), 1)
    #unsig_chemicals = set(chemicals).difference(set(sig_chemicals))

    #summary_file = "outputs/%s/weighted/stats/network-summaries.csv" % (version)
    df = summary_stats.get_summary_stats(version=version, forced=opts.forced)
    #df = t_utils.get_summary_stats(version=version, forced=True)

    # loop through the chemicals, significant chemicals and unsignificant chemicals
    for chemicals, postfix in [(chemicals, '')]:
        if opts.out_file is None:
            out_file_name = "summary-network-stats-%s.png" % (version)
            out_dir = "%s/plots/summary-stats/" % (t_settings.RESULTSPREFIX)
            t_utils.checkDir(out_dir)
            out_file = "%s/%s" % (out_dir, out_file_name)
            # if specified, copy the file to the compare versions dir
示例#4
0
def load_net_ann_datasets(out_dir, taxon, dataset, input_settings,
                          alg_settings, uniprot_taxon_file, **kwargs):
    sparse_net_file = "%s/%s-net.npz" % (out_dir, taxon)
    node2idx_file = sparse_net_file + "-node-ids.txt"
    swsn_weights_file = sparse_net_file + "-swsn-weights.txt"
    sparse_ann_file = "%s/ann.npz" % (out_dir)
    if not kwargs.get('forcenet') and \
            (os.path.isfile(sparse_net_file) and os.path.isfile(node2idx_file)) and \
            os.path.isfile(sparse_ann_file):
        print("Reading network from %s" % (sparse_net_file))
        W = sp.load_npz(sparse_net_file)
        print("\t%d nodes and %d edges" % (W.shape[0], len(W.data) / 2))
        print("Reading node names from %s" % (node2idx_file))
        prots = utils.readItemList(node2idx_file, 1)
        new_net_obj = setup.Sparse_Networks(W, prots)
        if os.path.isfile(swsn_weights_file):
            print("Reading swsn weights file %s" % (swsn_weights_file))
            weights = [
                float(w) for w in utils.readItemList(swsn_weights_file, 1)
            ]
            # also load the original networks to get the edge weights for the STRING networks
            net_obj = run_eval_algs.setup_net(input_settings['input_dir'],
                                              dataset, **kwargs)
            net_obj.swsn_weights = weights
        else:
            net_obj = new_net_obj
        print("\nReading annotation matrix from %s" % (sparse_ann_file))
        loaded_data = np.load(sparse_ann_file, allow_pickle=True)
        dag_matrix = setup.make_csr_from_components(loaded_data['arr_0'])
        ann_matrix = setup.make_csr_from_components(loaded_data['arr_1'])
        goids, prots = loaded_data['arr_2'], loaded_data['arr_3']
        ann_obj = setup.Sparse_Annotations(dag_matrix, ann_matrix, goids,
                                           prots)
        species_to_uniprot_idx = eval_loso.get_uniprot_species(
            uniprot_taxon_file, ann_obj)
        # TODO eval ann obj
        eval_ann_obj = None
    else:
        # load the network
        # TODO if a subset of the network was run, need to get that subset
        net_obj, ann_obj, eval_ann_obj = run_eval_algs.setup_dataset(
            dataset, input_settings['input_dir'], alg_settings, **kwargs)
        species_to_uniprot_idx = eval_loso.get_uniprot_species(
            uniprot_taxon_file, ann_obj)
        new_net_obj = net_obj
        # run SWSN if needd
        #if net_obj.multi_net:
        # TODO if LOSO was run, need to leave out the taxon for edge weights to be accurate
        if taxon is not None:
            if kwargs.get('limit_to_taxons_file'):
                # limit the network to the specified species
                # read in the specified taxons from the file
                _, net_taxons = eval_loso.get_selected_species(
                    species_to_uniprot_idx, kwargs['limit_to_taxons_file'])
                net_taxon_prots = net_exp.get_taxon_prots(
                    net_obj.nodes, net_taxons, species_to_uniprot_idx)
                net_obj, ann_obj = net_exp.limit_to_taxons(net_taxon_prots,
                                                           net_obj=net_obj,
                                                           ann_obj=ann_obj,
                                                           **kwargs)
            # leave out the annotations for this taxon ID
            train_ann_mat, test_ann_mat, sp_goterms = eval_loso.leave_out_taxon(
                taxon,
                ann_obj,
                species_to_uniprot_idx,
                eval_ann_obj=eval_ann_obj,
                **kwargs)
            taxon_prots = net_exp.get_taxon_prots(net_obj.nodes, [taxon],
                                                  species_to_uniprot_idx)
            new_net_obj = net_exp.limit_net_to_target_taxon(
                train_ann_mat, taxon_prots, net_obj, ann_obj, **kwargs)
            W = new_net_obj.W
        #    else:
        #        W, _ = net_obj.weight_SWSN(ann_obj.ann_matrix)
        #        #new_net_obj =
        else:
            W = net_obj.W
        print("\twriting sparse matrix to %s" % (sparse_net_file))
        sp.save_npz(sparse_net_file, W)
        print("\twriting node2idx labels to %s" % (node2idx_file))
        with open(node2idx_file, 'w') as out:
            out.write(''.join([
                "%s\t%d\n" % (prot, i) for i, prot in enumerate(net_obj.nodes)
            ]))
        if net_obj.multi_net:
            print("\twriting swsn weights file to %s" % (swsn_weights_file))
            with open(swsn_weights_file, 'w') as out:
                out.write('\n'.join([str(w)
                                     for w in new_net_obj.swsn_weights]) +
                          '\n')
                net_obj.swsn_weights = new_net_obj.swsn_weights
        # now store them to a file
        print("\twriting sparse annotations to %s" % (sparse_ann_file))
        # store all the data in the same file
        dag_matrix_data = setup.get_csr_components(ann_obj.dag_matrix)
        ann_matrix_data = setup.get_csr_components(ann_obj.ann_matrix)
        #np.savez_compressed(
        #    sparse_ann_file, dag_matrix_data=dag_matrix_data,
        #    ann_matrix_data=ann_matrix_data, goids=goids, prots=prots)
        np.savez_compressed(sparse_ann_file, dag_matrix_data, ann_matrix_data,
                            ann_obj.goids, ann_obj.prots)
    return net_obj, new_net_obj, ann_obj, eval_ann_obj, species_to_uniprot_idx
def get_summary_stats(version="2018_01-toxcast-d2d-p1_5-u1_25",
                      summary_file="network_summaries.csv",
                      scope="permute-dir-undir",
                      forced=False):
    """ Function to aggregate summary statistics for every network
    returns a dataframe containing the counted metrics for each chemical
    """
    TOXCAST_DATA = t_utils.loadToxcastData(t_settings.INTERACTOMES[version])
    #inputs_dir = "inputs/%s/" % (version)
    t_settings.set_version(version)
    inputs_dir = t_settings.INPUTSPREFIX
    outputs_dir = "outputs/%s/weighted" % (version)
    chemicals = utils.readItemList("%s/chemicals.txt" % (inputs_dir), 1)
    #hits_template = "%s/hit-prots/%%s-hit-prots.txt" % (inputs_dir)
    #nonhits_template = "%s/hit-prots/%%s-nonhit-prots.txt" % (inputs_dir)
    #rec_tfs_template = "%s/rec-tfs/%%s-rec-tfs.txt" % (inputs_dir)
    chem_rec, chem_tfs = TOXCAST_DATA.chemical_rec, TOXCAST_DATA.chemical_tfs
    chem_prot_hit_vals = TOXCAST_DATA.chemical_protein_hit
    paths_dir = "%s/edgelinker" % (outputs_dir)
    paths_template = "%s/%%s-paths.txt" % (paths_dir)

    out_dir = "%s/stats/summary" % outputs_dir
    t_utils.checkDir(out_dir)
    summary_file = "%s/%s" % (out_dir, summary_file)
    if os.path.isfile(summary_file) and not forced:
        print(
            "Reading network summary stats from '%s'. Set forced to True to overwrite it."
            % (summary_file))
        df = pd.read_csv(summary_file, index_col=0)
    else:
        print("Reading in the stats from the response networks in", paths_dir)
        chemical_names, chemical_name_to_id = t_utils.getChemicalNameMaps()
        chemical_names = {
            chemical: chemical_names[chemical]
            for chemical in chemicals
        }
        chemical_prots = {}
        chemical_num_paths = {}
        chemical_num_edges = {}
        chemical_avg_path_lengths = {}
        chemical_rec = {}
        chemical_tfs = {}
        chemical_net_rec = {}
        chemical_net_tfs = {}
        chemical_hits = {}
        chemical_nonhits = {}
        chemical_net_hits = {}
        chemical_net_nonhits = {}
        chemical_inter_hits = {}
        chemical_inter_nonhits = {}
        chemical_inter_net_hits = {}
        chemical_inter_net_nonhits = {}
        # also get the q-value for each chemical
        chemical_pvals = {}
        pvals_file = "%s/stats/stat-sig-%s/gpd-pval.txt" % (outputs_dir, scope)
        if os.path.isfile(pvals_file):
            with open(pvals_file, 'r') as file_handle:
                header = file_handle.readline().rstrip().split('\t')
            pval_col = header.index("200") + 1
            chemical_pvals = {
                chem: pval
                for chem, pval in utils.readColumns(pvals_file, 1, pval_col)
            }
        chemical_qvals = {}
        qvals_file = "%s/stats/stat-sig-%s/bfcorr_pval_qval.txt" % (
            outputs_dir, scope)
        if os.path.isfile(qvals_file):
            chemical_qvals = t_utils.getPvals(outputs_dir,
                                              scope,
                                              sig_cutoff_type="FDR")
        for chemical in tqdm(chemicals):
            #prots, paths = getProteins(paths=paths_template % chemical, max_k=200, ties=True)
            paths = t_utils.getPaths(paths_template % chemical,
                                     max_k=200,
                                     ties=True)
            prots = set()
            num_paths = len(paths)
            edges = set()
            path_lengths = []
            for path in paths:
                path = path.split('|')
                # path length is the number of edges in a path
                path_lengths.append(len(path) - 1)
                prots = prots.union(set(path))
                for i in range(len(path) - 1):
                    edges.add((path[i], path[i + 1]))

            chemical_prots[chemical] = len(prots)
            chemical_num_paths[chemical] = len(paths)
            chemical_avg_path_lengths[chemical] = np.mean(path_lengths)
            chemical_num_edges[chemical] = len(edges)
            #rec, tfs = t_utils.getRecTFs(rec_tfs_template % chemical)
            rec, tfs = chem_rec[chemical], chem_tfs[chemical]
            chemical_rec[chemical] = len(rec)
            chemical_tfs[chemical] = len(tfs)
            chemical_net_rec[chemical] = len(prots.intersection(rec))
            chemical_net_tfs[chemical] = len(prots.intersection(tfs))
            # read the hits and nonhits for each chemical to calculate how many of them are in the network
            #hits = utils.readItemSet(hits_template % chemical, 1)
            #nonhits = utils.readItemSet(nonhits_template % chemical, 1)
            hits = set([p for p, hit_val in chem_prot_hit_vals[chemical].items() \
                    if hit_val == 1])
            nonhits = set([p for p, hit_val in chem_prot_hit_vals[chemical].items() \
                    if hit_val == 0])
            chemical_hits[chemical] = len(hits)
            chemical_nonhits[chemical] = len(nonhits)
            chemical_net_hits[chemical] = len(hits.intersection(prots))
            chemical_net_nonhits[chemical] = len(nonhits.intersection(prots))
            # subtract the rec and tfs to get just the intermediate hits and nonhits
            chemical_inter_hits[chemical] = len(hits.difference(
                rec.union(tfs)))
            chemical_inter_nonhits[chemical] = len(
                nonhits.difference(rec.union(tfs)))
            chemical_inter_net_hits[chemical] = len(
                hits.intersection(prots).difference(rec.union(tfs)))
            chemical_inter_net_nonhits[chemical] = len(
                nonhits.intersection(prots).difference(rec.union(tfs)))

        # write these metrics to a file
        df = pd.DataFrame({
            "name": chemical_names,
            "prots": chemical_prots,
            "num_paths": chemical_num_paths,
            "pvals": chemical_pvals,
            "qvals": chemical_qvals,
            "num_edges": chemical_num_edges,
            "avg_path_lengths": chemical_avg_path_lengths,
            "net_rec": chemical_net_rec,
            "net_tfs": chemical_net_tfs,
            "hit_rec": chemical_rec,
            "hit_tfs": chemical_tfs,
            "net_hits": chemical_net_hits,
            "net_nonhits": chemical_net_nonhits,
            'hits': chemical_hits,
            'nonhits': chemical_nonhits,
            "inter_net_hits": chemical_inter_net_hits,
            "inter_net_nonhits": chemical_inter_net_nonhits,
            "inter_hits": chemical_inter_hits,
            "inter_nonhits": chemical_inter_nonhits,
        })
        print("Writing: ", summary_file)
        df.to_csv(summary_file,
                  header=True,
                  columns=[
                      'name', 'prots', 'num_paths', 'num_edges',
                      'avg_path_lengths', 'hits', 'nonhits', 'net_hits',
                      'net_nonhits', 'hit_rec', 'hit_tfs', 'net_rec',
                      'net_tfs', 'inter_net_hits', 'inter_net_nonhits',
                      'inter_hits', 'inter_nonhits', 'pvals', 'qvals'
                  ])

    # change the index or chemical id to unicode (string)
    #df.index = df.index.map(unicode)

    return df
示例#6
0
def permute_and_run_edgelinker(opts, random_index):
    if opts.write_score_counts:
        rand_scores_k = "%s/rand-networks/rand-%d-med-scores-k.txt" % (
            opts.write_score_counts, random_index)
        # if the final score counts file already exists, then don't do anything
        if os.path.isfile(rand_scores_k) and not opts.forced:
            print("%s already exists. Skipping." % (rand_scores_k))
            return
        chemical_k_scores = "%s/chemical-k-median-scores.txt" % (
            opts.write_score_counts)
        if not os.path.isfile(chemical_k_scores):
            print(
                "Error: %s does not exist. Run compute_stat_sig.py with the --write-counts option to write it. Quitting"
                % (chemical_k_scores))
            return

    t_utils.checkDir("%s/networks" % (opts.out_dir))
    rec_tfs_file_template = "%s/rec-tfs/%%s-rec-tfs.txt" % (opts.inputs_dir)
    chemicals = sorted(
        utils.readItemList("%s/chemicals.txt" % opts.inputs_dir, col=1))
    if opts.single_chem:
        chemicals = opts.single_chem

    if opts.permute_rec_tfs is not None:
        # if specified, "permute" the sets of receptors and tfs for each chemical instead of the interactome
        print("Writing random sets of rec/tfs for each chemical to %s" %
              (opts.out_dir))
        rec_tfs_file_template = "%s/%%s/%d-random-rec-tfs.txt" % (opts.out_dir,
                                                                  random_index)
        all_rec, all_tfs = t_utils.getRecTFs(opts.permute_rec_tfs)
        #chemical_num_rectfs_file = "%s/chemical_num_rectfs.txt" % (opts.inputs_dir)
        #lines = utils.readColumns(chemical_num_rectfs_file, 2, 3, 4)
        #for chem, num_rec, num_tfs in tqdm(lines):
        for chemical in tqdm(chemicals, disable=opts.verbose):
            out_file = rec_tfs_file_template % (chemical)
            if not os.path.isfile(out_file) or opts.forced:
                rec, tfs, costs, zscores = t_utils.getRecTFs(
                    t_settings.REC_TFS_FILE % (opts.inputs_dir, chemical),
                    costs=True)
                rec = list(rec)
                tfs = list(tfs)

                out_dir = "%s/%s" % (opts.out_dir, chemical)
                t_utils.checkDir(out_dir)
                random_rec = random.sample(all_rec, len(rec))
                # apply the costs to the random rec and tfs
                for i in range(len(rec)):
                    costs[random_rec[i]] = costs[rec[i]]
                    zscores[random_rec[i]] = zscores[rec[i]]
                random_tfs = random.sample(all_tfs, len(tfs))
                for i in range(len(tfs)):
                    costs[random_tfs[i]] = costs[tfs[i]]
                    zscores[random_tfs[i]] = zscores[tfs[i]]
                t_utils.writeRecTFs(out_file,
                                    random_rec,
                                    random_tfs,
                                    costs=costs,
                                    zscores=zscores)
        # use the original interactome
        permuted_network_out_file = opts.interactome
        print("Using the original interactome %s" %
              (permuted_network_out_file))
    else:
        # default is to permute the interactome
        permuted_network_out_file = '%s/networks/permuted-network%d.txt' % (
            opts.out_dir, random_index)
        if not os.path.isfile(permuted_network_out_file) or opts.forced:
            # don't log transform. The weights will be log transformed by the edgelinker code
            #G = cycLinker.readNetwork(opts.interactome, weight=True, logtransform=False)
            # UPDATE: 2017-12-07: try using the direction of the edges from the fourth column of the interactome instead of splitting based on if the edge is bidirected or not
            G = nx.DiGraph()
            dir_edges = []
            undir_edges = []
            lines = utils.readColumns(opts.interactome, 1, 2, 3, 4)
            if len(lines) == 0:
                print(
                    "ERROR: interactome should have 4 columns: a, b, w, and True/False for directed/undirected. Quitting"
                )
                sys.exit()
            for u, v, w, directed in lines:
                G.add_edge(u, v, weight=float(w))
                if directed.lower() in ["true", "t", "dir", 'directed']:
                    dir_edges.append((u, v))
                elif directed.lower() not in [
                        "false", 'f', 'undir', 'undirected'
                ]:
                    print(
                        "ERROR: Unknown directed edge type '%s'. 4th column should be T/F to indicdate directed/undirected"
                        % (directed.lower()))
                    print("Quitting.")
                    sys.exit()
                elif u < v:
                    undir_edges.append((u, v))

            if opts.undirected:
                # swap all edges as undirected edges
                permG = permute_network.permute_network(
                    G.to_undirected(), num_iterations=opts.num_iterations)
                permG = permG.to_directed()
            elif opts.split_by_weight:
                # split the edges into bins by weight and swap the directed and undirected edges separately
                # if specified by the user
                permG = permute_network.permute_network(
                    G,
                    swap_phys_sig_sep=opts.swap_phys_sig_sep,
                    split_weight=opts.split_by_weight,
                    num_iterations=opts.num_iterations)
            elif opts.swap_phys_sig_sep:
                # swap the directed and undirected edges separately
                permG = permute_network.permute_network(
                    G,
                    swap_phys_sig_sep=opts.swap_phys_sig_sep,
                    num_iterations=opts.num_iterations,
                    edge_lists=(undir_edges, dir_edges))
            else:
                # if none of the options are specified, then swap everything as directed edges
                permG = permute_network.permute_network(
                    G, num_iterations=opts.num_iterations)
            print("Writing %s" % (permuted_network_out_file))
            nx.write_weighted_edgelist(permG,
                                       permuted_network_out_file,
                                       comments='#',
                                       delimiter='\t')
        else:
            print("Using %s" % (permuted_network_out_file))

    # now run edgelinker on each of the chemicals using the permuted network
    # if version is netpath, use the different type of input file
    # TODO fix this
    # PATHLINKERDATAVERSIONS
    #if 'kegg' in opts.inputs_dir or 'netpath' in opts.inputs_dir:
    #    rec_tfs_file_template = "%s/rec-tfs/%%s-nodes.txt" % (opts.inputs_dir)
    in_files = []
    out_files = []
    for chemical in tqdm(chemicals, disable=opts.verbose):
        rec_tfs_file = rec_tfs_file_template % (chemical)
        in_files.append(os.path.abspath(rec_tfs_file))
        out_dir = "%s/%s" % (opts.out_dir, chemical)
        t_utils.checkDir(out_dir)
        out_pref = "%s/%d-random" % (out_dir, random_index)
        out_files.append(os.path.abspath(out_pref))
        # python implementation of edgelinker is taking too long. Switching to java for now.
        #run_write_edgelinker(permG, rec_tfs_file, opts.k, out_pref)
        # run the java implementation of edgelinker below

    # write the in and out files to the networks dir
    edgelinker_in_files = '%s/networks/permuted-network%d-infiles.txt' % (
        opts.out_dir, random_index)
    with open(edgelinker_in_files, 'w') as out:
        out.write('\n'.join(in_files))
    edgelinker_out_files = '%s/networks/permuted-network%d-outfiles.txt' % (
        opts.out_dir, random_index)
    with open(edgelinker_out_files, 'w') as out:
        out.write('\n'.join(out_files))
    print("Running edgelinker on chemical %s: %s" % (chemical, out_pref))
    run_edgelinker.runEdgeLinker(permuted_network_out_file,
                                 cyclinker_in_files,
                                 cyclinker_out_files,
                                 opts.k,
                                 edge_penalty=EDGE_PENALTY,
                                 rec_tfs_penalty=REC_TFS_PENALTY,
                                 multi_run=True)

    if opts.write_score_counts:
        # now that edgelinker has been run on all of the chemical sources/targets,
        # get the path counts for the chemical network's path scores
        # import compute_stat_sig.py and run the code directly. This avoids the issues of re-importing the libraries from baobab
        print(
            "Writing the counts for each of the scores for random index: '%d'"
            % (random_index))
        stat_sig = compute_stat_sig.StatSig(random_paths_dir=opts.out_dir,
                                            k_limit=opts.k,
                                            num_random=(random_index,
                                                        random_index),
                                            out_dir=opts.write_score_counts)
        stat_sig.write_rand_counts(chemicals=chemicals, forced=opts.forced)
#        cmd = "python src/compute_stat_sig.py " + \
#              " --chemicals %s/chemicals.txt " % (opts.inputs_dir) + \
#              " --random-paths-dir %s/ " % (opts.out_dir) + \
#              " -P --k-limit %d " % (opts.k) + \
#              " --num-random %d %d" % (random_index, random_index) + \
#              " --group-by-prob " + \
#              " --write-rand-counts " + \
#              " --out-dir %s " % (opts.write_score_counts)
#        if opts.forced:
#            cmd += " --forced "
#        print(cmd)
#        subprocess.check_call(cmd.split())

#if opts.run_mgsa_random:
#    run_mgsa_random(random_index)

    if opts.cleanup:
        print(
            "Deleting the generated permuted network and the edgelinker output files"
        )
        if permuted_network_out_file != opts.interactome:
            os.remove(permuted_network_out_file)
        os.remove(edgelinker_in_files)
        # remove the individual output files
        for cyc_out_file in out_files:
            # # 2017-02-17 - temporarilly don't remove the paths file for running MGSA
            os.remove(cyc_out_file + "-paths.txt")
            os.remove(cyc_out_file + "-ranked-edges.txt")
        os.remove(edgelinker_out_files)