def trn_stats(genes, trn, t_factors, version): LOGGER.info("Computing TRN statistics") nodes = sorted(trn.nodes_iter()) node2id = {n: i for (i, n) in enumerate(nodes)} id2node = {i: n for (i, n) in enumerate(nodes)} (grn, node2id) = to_simple(trn.to_grn(), return_map=True) nodes = sorted(grn.nodes_iter()) regulating = {node for (node, deg) in grn.out_degree_iter() if deg > 0} regulated = set(nodes) - regulating components = sorted(nx.weakly_connected_components(grn), key=len, reverse=True) data = dict() for (a, b) in itertools.product(("in", "out"), repeat=2): data["{a}_{b}_ass".format(a=a, b=b)] = nx.degree_assortativity_coefficient(grn, x=a, y=b) census = triadic_census(grn) forward = census["030T"] feedback = census["030C"] num_cycles = sum(1 for cyc in nx.simple_cycles(grn) if len(cyc) > 2) in_deg = [grn.in_degree(node) for node in regulated] out_deg = [grn.out_degree(node) for node in regulating] data["version"] = version, data["release"] = pd.to_datetime(RELEASE[version]), data["num_genes"] = len(genes), data["num_tf"] = len(t_factors), data["num_nodes"] = len(nodes), data["num_regulating"] = len(regulating), data["num_regulated"] = len(regulated), data["num_links"] = grn.size(), data["density"] = nx.density(grn), data["num_components"] = len(components), data["largest_component"] = len(components[0]), data["feed_forward"] = forward, data["feedback"] = feedback, data["fis_out"] = trn.out_degree(TranscriptionFactor[FIS_ID, version]), data["hns_out"] = trn.out_degree(TranscriptionFactor[HNS_ID, version]), data["cycles"] = num_cycles, data["regulated_in_deg"] = mean(in_deg), data["regulating_out_deg"] = mean(out_deg), data["hub_out_deg"] = max(out_deg) stats = pd.DataFrame(data, index=[1]) in_deg = [grn.in_degree(node) for node in nodes] out_deg = [grn.out_degree(node) for node in nodes] bc = nx.betweenness_centrality(grn) bc = [bc[node] for node in nodes] dists = pd.DataFrame({ "version": version, "release": [pd.to_datetime(RELEASE[version])] * len(nodes), "node": [id2node[node].unique_id for node in nodes], "regulated_in_degree": in_deg, "regulating_out_degree": out_deg, "betweenness": bc }) return (stats, dists)
def main_random(args): locations = sorted(glob(os.path.join(args.in_path, args.glob))) locations = [os.path.abspath(loc) for loc in locations] pool = multiprocessing.Pool(args.nproc) bar = ProgressBar(maxval=args.rnd_num, widgets=[Timer(), " ", SimpleProgress(), " ", Percentage(), " ", Bar(), " ", ETA()]) rewire_name = "grn_rewired_{0:.1f}.pkl".format(args.prob) switch_name = "grn_switched_{0:d}.pkl".format(args.flip_num) for path in locations: filename = os.path.join(path, "trn.pkl") if not os.path.exists(filename): continue ver = version_from_path(path) base_path = os.path.join(args.out_path, ver) if not os.path.isdir(base_path): os.makedirs(base_path) LOGGER.info(ver) trn = pyorg.read_pickle(filename) # we consider null-models on the projected level since that is the level # on which we evaluate topological quantities net = pyreg.to_simple(trn.to_grn()) if args.run_rewire: LOGGER.info("Rewiring with probability %.1f", args.prob) tasks = [(net, args.prob)] * args.rnd_num res_it = pool.imap_unordered(rewire, tasks) rands = list() bar.start() for rnd in res_it: rands.append(rnd) bar += 1 bar.finish() pyorg.write_pickle(rands, os.path.join(base_path, rewire_name)) if args.run_switch: LOGGER.info("Switch-randomizing each edge %d times", args.flip_num) tasks = [(net, args.flip_num)] * args.rnd_num res_it = pool.imap_unordered(switch, tasks) success = list() rands = list() bar.start() for (rnd, rate) in res_it: rands.append(rnd) success.append(rate) bar += 1 bar.finish() pyorg.write_pickle(rands, os.path.join(base_path, switch_name)) LOGGER.info("mean flip success rate: %.3G +/- %.3G", np.mean(success), np.std(success)) pool.close()