def update_map(map_path, frame_path, update_from, version=""): LOGGER.info("{0:*^78s}".format("Update Gene IDs")) map_name = os.path.splitext(os.path.basename(map_path))[0] LOGGER.info("{0:*^78s}".format(map_name)) base_path = os.path.dirname(map_path) if not version: version = os.path.basename(base_path) LOGGER.info("{0:*^78s}".format(version)) # load into memory LOGGER.info("Loading genes") pyorganism.read_pickle(os.path.join(base_path, "genes.pkl")) LOGGER.info("Reading data frame") hdf_key = "/%s" % (map_name,) mapping = pandas.read_hdf(frame_path, hdf_key) # frame_info(mapping) id2gene = dict() for row in mapping[[version, update_from]].itertuples(): if not row[1]: id2gene[row[0]] = pyreg.Gene.get(row[2], None, version) elif row[1] != row[2]: id2gene[row[0]] = pyreg.Gene.get(row[2], None, version) else: id2gene[row[0]] = pyreg.Gene.get(row[1], None, version) pyorganism.write_pickle(id2gene, os.path.join(base_path, "corrected_" + map_name + ".pkl"))
def construct_trn(path): LOGGER.info("{0:*^78s}".format("Construct TRN")) version = os.path.basename(path) if not version: version = os.path.basename(os.path.dirname(path)) LOGGER.info("{0:*^78s}".format(version)) # load objects so that they are in memory pyorganism.read_pickle(os.path.join(path, "conformations.pkl")) t_units = pyorganism.read_pickle(os.path.join(path, "transcription_units.pkl")) interactions = pyorganism.read_pickle(os.path.join(path, "interactions.pkl")) assert all(pyreg.Conformation.has_key(triple[0], version) for triple in interactions),\ "unknown conformation in regulatory interactions" assert all(pyreg.Promoter.has_key(triple[1], version) for triple in interactions),\ "unknown promoter in regulatory interactions" # conformation-promoter directed regulatory network cpn = nx.MultiDiGraph() for (u, v, inter) in interactions: first = pyreg.Conformation[u, version] second = pyreg.Promoter[v, version] cpn.add_edge(first, second, key=inter) # TRN from CPN trn = pyreg.TRN() node_converter = NodeConverter(t_units) for (u, v, k, d) in cpn.edges_iter(keys=True, data=True): first = u.t_factor for nbr in node_converter(v.unique_id): trn.add_edge(first, nbr, key=k, **d.copy()) LOGGER.info("%d Nodes", len(trn)) LOGGER.info("%d Links", trn.size()) components = list(nx.connected_components(trn.to_undirected())) LOGGER.info("%d Components", len(components)) LOGGER.info("Largest Component: %d", len(components[0])) if len(components) > 1: LOGGER.info("Second Largest Component: %d", len(components[1])) pyorganism.write_pickle(trn, os.path.join(path, "trn.pkl"))
def store_joint_ids(map_path, frame_path, version=""): LOGGER.info("{0:*^78s}".format("Store Gene IDs")) map_name = os.path.splitext(os.path.basename(map_path))[0] LOGGER.info("{0:*^78s}".format(map_name)) base_path = os.path.dirname(map_path) if not version: version = os.path.basename(base_path) LOGGER.info("{0:*^78s}".format(version)) # load into memory LOGGER.info("Loading genes") pyorganism.read_pickle(os.path.join(base_path, "genes.pkl")) LOGGER.info("Loading feature map") id2gene = pyorganism.read_pickle(map_path) LOGGER.info("Creating data frame") df = create_dataframe(id2gene, version) hdf_key = "/%s" % (map_name,) LOGGER.info("Assembling joint frame") if os.path.exists(frame_path): try: mapping = pandas.read_hdf(frame_path, hdf_key) mapping[version] = pandas.Series(df[version], index=mapping.index) except KeyError: mapping = df else: mapping = df frame_info(mapping) with warnings.catch_warnings(): warnings.simplefilter("ignore", pandas.io.pytables.PerformanceWarning) mapping.to_hdf(frame_path, hdf_key, format="table")
def null_stats(base_dir, task): glbls = globals() prob = glbls["prob"] choose = glbls["choose"] logger = glbls["LOGGER"] ver = os.path.basename(base_dir) if not ver: ver = os.path.basename(os.path.dirname(base_dir)) if task == "rewired": desc = "rewired {0:.1f}".format(prob) filename = "trn_rewired_{0:.1f}.pkl".format(prob) elif task == "switch": desc = "switch" filename = "trn_random.pkl" try: nets = pyorg.read_pickle(os.path.join(base_dir, filename)) except (OSError, IOError, EOFError): (err, msg, trace) = sys.exc_info() logger.error("Version: '%s' Task: '%s'", ver, task) logger.error(str(msg)) return err_stats(ver, desc) chosen = random.sample(nets, choose) logger.info("%d/%d random networks", len(chosen), len(nets)) nets = [trn2grn(net) for net in chosen] return pd.concat([stats(net, ver, desc) for net in nets], ignore_index=True)
def compile_feature2gene(objects_path): LOGGER.info("{0:*^78s}".format("Compile Gene Feature Map")) version = os.path.basename(objects_path) if not version: version = os.path.basename(os.path.dirname(objects_path)) global VERSION VERSION = version LOGGER.info("{0:*^78s}".format(version)) names = compile_names(CONFIG["data_paths"], CONFIG["data_load"]) LOGGER.info("Loading genes") genes = pyorganism.read_pickle(os.path.join(objects_path, "genes.pkl")) LOGGER.info("Finding gene names") finder = pyorganism.FindObject(genes, targets=[gene.name.lower() for gene in genes]) (feature2gene, gap) = compile_feature_map(genes, names, finder) synonyms = synonym_finder(genes) LOGGER.info("Finding gene names by synonyms") gap = extend_feature_map(feature2gene, gap, synonyms) LOGGER.info("Missing %d gene names", len(gap)) LOGGER.info("Fuzzy search of gene names (threshold %d%%)", CONFIG["threshold"]) LOGGER.setLevel(logging.DEBUG) gap = fuzzy_extension(feature2gene, gap, finder, CONFIG["threshold"]) LOGGER.info("Fuzzy search of gene names by synonyms (threshold %d%%)", CONFIG["threshold"]) gap = fuzzy_extension(feature2gene, gap, synonyms, CONFIG["threshold"]) LOGGER.info("Manual update") manual_name_updates(feature2gene) num = sum(1 for gene in feature2gene.itervalues() if gene) LOGGER.info("Final map contains %d names and %d genes (%3.2f%%)", len(feature2gene), num, 100.0 * num / len(feature2gene)) pyorganism.write_pickle(feature2gene, os.path.join(objects_path, "feature2gene.pkl"))
def load_data(locations): tr_nets = dict() versions = list() drop = list() for path in locations: ver = os.path.basename(path) if not ver: ver = os.path.basename(os.path.dirname(path)) try: pyorg.read_pickle(os.path.join(path, "genes.pkl")) pyorg.read_pickle(os.path.join(path, "transcription_factors.pkl")) trn = pyorg.read_pickle(os.path.join(path, "trn.pkl")) tr_nets[ver] = trn versions.append(ver) except IOError: drop.append(path) continue for path in drop: locations.remove(path) return (tr_nets, versions)
def main(in_path, out_path, version=""): version = os.path.basename(in_path) if not version: version = os.path.basename(os.path.dirname(in_path)) LOGGER.info("{0:*^78s}".format(version)) LOGGER.info("Loading genes") genes = pyorganism.read_pickle(os.path.join(in_path, "genes.pkl")) LOGGER.info("Loading TRN") trn = pyorganism.read_pickle(os.path.join(in_path, "trn.pkl")) LOGGER.info("Loading TFs") t_factors = pyorganism.read_pickle(os.path.join(in_path, "transcription_factors.pkl")) LOGGER.info("Loading GPN") gpn = pyorganism.read_pickle(os.path.join(in_path, "gpn_5000.pkl")) version = os.path.basename(in_path) if not version: version = os.path.basename(os.path.dirname(in_path)) (stats, dists) = gpn_stats(genes, gpn, version) store_results(os.path.join(out_path, "gpn_5000_statistics.csv"), stats) store_results(os.path.join(out_path, "gpn_5000_distributions.csv"), dists) (stats, dists) = trn_stats(genes, trn, t_factors, version) store_results(os.path.join(out_path, "trn_statistics.csv"), stats) store_results(os.path.join(out_path, "trn_distributions.csv"), dists)
def compile_name2gene(objects_path): LOGGER.info("{0:*^78s}".format("Compile Gene Name Map")) full_data = compile_names(CONFIG["data_paths"], CONFIG["data_load"]) version = os.path.basename(objects_path) if not version: version = os.path.basename(os.path.dirname(objects_path)) global VERSION VERSION = version LOGGER.info("{0:*^78s}".format(version)) LOGGER.info("Loading genes") genes = pyorganism.read_pickle(os.path.join(objects_path, "genes.pkl")) LOGGER.info("Finding gene names") names = set(full_data["name"].unique()) if numpy.nan in names: names.remove(numpy.nan) names = sorted(names) finder = pyorganism.FindObject(genes, "name") (name2gene, name_gap) = compile_feature_map(genes, names, finder) synonyms = synonym_finder(genes) LOGGER.info("Finding gene names by synonyms") name_gap = extend_feature_map(name2gene, name_gap, synonyms) LOGGER.info("Missing %d gene names", len(name_gap)) LOGGER.info("Fuzzy search of gene names (threshold %d%%)", CONFIG["threshold"]) name_gap = fuzzy_extension(name2gene, name_gap, finder, CONFIG["threshold"]) # LOGGER.info("Fuzzy search of gene names by synonyms (threshold %d%%)", CONFIG["threshold"]) # name_gap = fuzzy_extension(name2gene, name_gap, synonyms, CONFIG["threshold"]) manual_name_updates(name2gene) num = sum(1 for gene in name2gene.itervalues() if gene) LOGGER.info("Final map contains %d names and %d genes (%3.2f%%)", len(name2gene), num, 100.0 * num / len(name2gene)) LOGGER.info("Finding gene blattner numbers") bnumbers = set(full_data["blattner"].unique()) if numpy.nan in bnumbers: bnumbers.remove(numpy.nan) bnumbers = sorted(bnumbers) gene_finder = pyorganism.FindObject(genes, "bnumber") (blattner2gene, blattner_gap) = compile_feature_map(genes, bnumbers, gene_finder) LOGGER.info("Finding gene blattner numbers by synonyms") blattner_gap = extend_feature_map(blattner2gene, blattner_gap, synonyms) LOGGER.info("Missing %d gene blattner numbers", len(blattner_gap)) LOGGER.info("Fuzzy search of gene blattner numbers (threshold %d%%)", CONFIG["threshold"]) blattner_gap = fuzzy_extension(blattner2gene, blattner_gap, finder, CONFIG["threshold"]) # LOGGER.info("Fuzzy search of gene blattner numbers by synonyms (threshold %d%%)", CONFIG["threshold"]) # blattner_gap = fuzzy_extension(blattner2gene, blattner_gap, synonyms, CONFIG["threshold"]) num = sum(1 for gene in blattner2gene.itervalues() if gene) LOGGER.info("Final map contains %d blattner numbers and %d genes (%3.2f%%)", len(blattner2gene), num, 100.0 * num / len(blattner2gene)) verify_union(name2gene, blattner2gene, full_data) pyorganism.write_pickle(name2gene, os.path.join(objects_path, "name2gene.pkl")) pyorganism.write_pickle(blattner2gene, os.path.join(objects_path, "blattner2gene.pkl"))
def main_random(args): locations = sorted(glob(os.path.join(args.in_path, args.glob))) locations = [os.path.abspath(loc) for loc in locations] pool = multiprocessing.Pool(args.nproc) bar = ProgressBar(maxval=args.rnd_num, widgets=[Timer(), " ", SimpleProgress(), " ", Percentage(), " ", Bar(), " ", ETA()]) rewire_name = "grn_rewired_{0:.1f}.pkl".format(args.prob) switch_name = "grn_switched_{0:d}.pkl".format(args.flip_num) for path in locations: filename = os.path.join(path, "trn.pkl") if not os.path.exists(filename): continue ver = version_from_path(path) base_path = os.path.join(args.out_path, ver) if not os.path.isdir(base_path): os.makedirs(base_path) LOGGER.info(ver) trn = pyorg.read_pickle(filename) # we consider null-models on the projected level since that is the level # on which we evaluate topological quantities net = pyreg.to_simple(trn.to_grn()) if args.run_rewire: LOGGER.info("Rewiring with probability %.1f", args.prob) tasks = [(net, args.prob)] * args.rnd_num res_it = pool.imap_unordered(rewire, tasks) rands = list() bar.start() for rnd in res_it: rands.append(rnd) bar += 1 bar.finish() pyorg.write_pickle(rands, os.path.join(base_path, rewire_name)) if args.run_switch: LOGGER.info("Switch-randomizing each edge %d times", args.flip_num) tasks = [(net, args.flip_num)] * args.rnd_num res_it = pool.imap_unordered(switch, tasks) success = list() rands = list() bar.start() for (rnd, rate) in res_it: rands.append(rnd) success.append(rate) bar += 1 bar.finish() pyorg.write_pickle(rands, os.path.join(base_path, switch_name)) LOGGER.info("mean flip success rate: %.3G +/- %.3G", np.mean(success), np.std(success)) pool.close()
def construct_gpn(path, window_sizes): LOGGER.info("{0:*^78s}".format("Construct GPN")) version = os.path.basename(path) if not version: version = os.path.basename(os.path.dirname(path)) LOGGER.info("{0:*^78s}".format(version)) genes = pyorganism.read_pickle(os.path.join(path, "genes.pkl")) gpn_gen = pyreg.GPNGenerator() gpn_gen.parse_genes(genes) for window in window_sizes: LOGGER.info("Window Size: %d",window) gpn = gpn_gen.generate_gpn(window) LOGGER.info("%d Nodes", len(gpn)) LOGGER.info("%d Links", gpn.size()) components = list(nx.connected_components(gpn)) LOGGER.info("%d Components", len(components)) LOGGER.info("Largest Component: %d", len(components[0])) if len(components) > 1: LOGGER.info("Second Largest Component: %d", len(components[1])) pyorganism.write_pickle(gpn, os.path.join(path, "gpn_%d.pkl" % window))
def main_continuous(args): glbls = globals() engine = create_engine(args.engine) pymodels.Base.metadata.bind = engine pymodels.Session.configure(bind=engine) session = pymodels.Session() tasks = session.query(pymodels.Job).\ options(joinedload("analysis"), joinedload("control"), joinedload("experiment")).filter(~pymodels.Job.complete).all() if len(tasks) == 0: LOGGER.warn("Nothing to do") return analysis_configs = {job.analysis for job in tasks} control_configs = {job.control for job in tasks} experiments = {job.experiment for job in tasks} preparations = {job.preparation for job in tasks} sampling = {job.sampling for job in tasks} projections = {job.projection for job in tasks} LOGGER.debug("%d analysis configurations", len(analysis_configs)) LOGGER.debug("%d control configurations", len(control_configs)) LOGGER.debug("%d experiments", len(experiments)) LOGGER.debug("%d setup cases", len(preparations)) LOGGER.debug("%d sampling methods", len(sampling)) LOGGER.debug("%d network projections", len(projections)) num_prep = len(analysis_configs) * len(control_configs) * len(experiments)\ * len(preparations) * len(sampling) * len(projections) LOGGER.debug("%d total configurations", num_prep) LOGGER.info("Preparing Data") task_args = dict() bar = ProgressBar(maxval=num_prep, widgets=[Timer(), " ", SimpleProgress(), " ", Percentage(), " ", Bar(), " ", ETA()]).start() for anal in analysis_configs: LOGGER.debug(" %s:", anal.version) feature2node = pyorg.read_pickle(os.path.join(anal.objects, anal.map)) for cntrl in control_configs: LOGGER.debug(" %s", cntrl.type) net = pyorg.read_pickle(os.path.join(anal.objects, cntrl.network)) tu_net = pyreg.to_transcription_unit_based(net) op_net = pyreg.to_operon_based(net) for exp in experiments: LOGGER.debug(" %s", exp.strain) for prep in preparations: LOGGER.debug(" %s", prep) series = glbls[prep](session, exp) for sampl in sampling: LOGGER.debug(" %s", sampl) for prj in projections: LOGGER.debug(" %s", prj) control = pyreg.ContinuousControl() if prj == "tu": control.setup(tu_net, series, feature2node, sampl) elif prj == "operon": control.setup(op_net, series, feature2node, sampl) else: control.setup(net, series, feature2node, sampl) if cntrl.type == "analog": control.from_gpn() elif cntrl.type == "digital": control.from_trn() else: raise ValueError("'{}'".format(cntrl.type)) task_args[(anal.id, cntrl.id, exp.id, prep, sampl, prj)] = (control, series.columns) bar += 1 bar.finish() LOGGER.info("Running Jobs") tasks = [task_args[(job.analysis.id, job.control.id, job.experiment.id, job.preparation, job.sampling, job.projection)] + (job.measure, job.random_num, job.delay, job.id) for job in tasks] pool = multiprocessing.Pool(args.nproc) result_it = pool.imap_unordered(continuous_exec, tasks) bar = ProgressBar(maxval=len(tasks), widgets=[Timer(), " ", SimpleProgress(), " ", Percentage(), " ", Bar(), " ", ETA()]).start() for (job_id, z_scores, cntrl_scores, samples, points) in result_it: results = list() try: job = session.query(pymodels.Job).filter_by(id=job_id).one() for (i, name) in enumerate(points): res = pymodels.Result(control=cntrl_scores[i], ctc=z_scores[i], point=name, job=job) session.add(res) results.append(res) job.complete = True session.commit() except Exception: session.rollback() bar += 1 continue if job.selection > 0: try: for (i, res) in enumerate(results): # use a more low-level insert for speed session.execute(pymodels.RandomSample.__table__.insert(), [{"control": val, "result_id": res.id}\ for val in np.random.choice(samples[i], job.selection, replace=False)]) session.commit() except Exception: session.rollback() bar += 1 continue bar += 1 bar.finish() session.close()
def main(argv): LOGGER.info("{0:*^78s}".format("Compile RegulonDB Content")) if not os.path.exists(argv[0]): sys.exit(1) if not os.path.exists(argv[1]): os.makedirs(argv[1]) version = argv[2] if len(argv) == 3 else os.path.basename(argv[0]) if not version: version = os.path.basename(os.path.dirname(argv[0])) LOGGER.info("{0:*^78s}".format(version)) global VERSION VERSION = version version = tuple([int(num) for num in version.split(".")]) # compile genes and gene products gene_file = os.path.join(argv[1], "genes.pkl") if os.path.exists(gene_file): genes = pyorganism.read_pickle(gene_file) else: genes = compile_genes(argv[0]) product_file = os.path.join(argv[1], "products.pkl") if os.path.exists(product_file): products = pyorganism.read_pickle(product_file) else: products = compile_products(argv[0]) regdb.link_gene_product(os.path.join(argv[0], "gene_product_link.xml"), VERSION) tf_file = os.path.join(argv[1], "transcription_factors.pkl") if os.path.exists(tf_file): t_factors = pyorganism.read_pickle(tf_file) else: t_factors = compile_transcription_factors(argv[0], version) num = sum(1 for gene in genes if isinstance(gene.regulatory_product, pyreg.TranscriptionFactor)) norm = float(len(genes)) LOGGER.info("Found {0:d} genes that code for transcription factors({1:.2%})".format(num, num / norm)) if version >= (7, 2): sigma_file = os.path.join(argv[1], "sigma_factors.pkl") if os.path.exists(sigma_file): sigma_factors = pyorganism.read_pickle(sigma_file) else: sigma_factors = compile_sigma_factors(argv[0]) conf_file = os.path.join(argv[1], "conformations.pkl") if os.path.exists(conf_file): conformations = pyorganism.read_pickle(conf_file) else: conformations = compile_conformations(argv[0]) prom_file = os.path.join(argv[1], "promoters.pkl") if os.path.exists(prom_file): promoters = pyorganism.read_pickle(prom_file) else: promoters = compile_promoters(argv[0]) op_file = os.path.join(argv[1], "operons.pkl") if os.path.exists(op_file): operons = pyorganism.read_pickle(op_file) else: operons = compile_operons(argv[0]) regdb.update_operons(operons, promoters, genes) tu_file = os.path.join(argv[1], "transcription_units.pkl") if os.path.exists(tu_file): t_units = pyorganism.read_pickle(tu_file) else: t_units = compile_transcription_units(argv[0]) inter_file = os.path.join(argv[1], "interactions.pkl") if os.path.exists(inter_file): interactions = pyorganism.read_pickle(inter_file) else: interactions = compile_regulation(argv[0]) # write-out all pickles again since object attributes may have been updated pyorganism.write_pickle(genes, gene_file) pyorganism.write_pickle(products, product_file) pyorganism.write_pickle(t_factors, tf_file) if version >= (7, 2): pyorganism.write_pickle(sigma_factors, sigma_file) pyorganism.write_pickle(conformations, conf_file) pyorganism.write_pickle(promoters, prom_file) pyorganism.write_pickle(operons, op_file) pyorganism.write_pickle(t_units, tu_file) pyorganism.write_pickle(interactions, inter_file)
def main_analysis(args): locations = sorted(glob(os.path.join(args.in_path, args.glob))) locations = [os.path.abspath(loc) for loc in locations] tasks = list() if args.run_rewire: tasks.extend([(loc, "rewired", args.choose, args.prob) for loc in locations]) if args.run_switch: tasks.extend([(loc, "switch", args.choose) for loc in locations]) out_name = os.path.join(args.out_path, args.file) if not os.path.isdir(args.out_path): os.makedirs(args.out_path) if os.path.exists(out_name): result = pd.read_csv(out_name, sep=str(";"), dtype={"version": str}, encoding=args.encoding) else: result = pd.DataFrame(columns=["version", "num_components", "largest_component", "feed_forward", "feedback", "cycles", "regulated_in_deg", "regulating_out_deg", "null_model"]) pool = multiprocessing.Pool(args.nproc) rewire_name = "grn_rewired_{0:.1f}.pkl".format(args.prob) rewire_descr = "rewired {0:.1f}".format(args.prob) switch_name = "grn_switched_{0:d}.pkl".format(args.flip_num) switch_descr = "switch {0:d}".format(args.flip_num) for path in locations: ver = version_from_path(path) LOGGER.info(ver) if args.run_rewire: filename = os.path.join(path, rewire_name) if os.path.exists(filename): LOGGER.info("Analyzing rewired networks (probability %.1f)", args.prob) nets = pyorg.read_pickle(filename) if args.choose is not None: chosen_num = min(args.choose, len(nets)) LOGGER.info("Using %d/%d random networks", chosen_num, len(nets)) nets = random.sample(nets, chosen_num) tasks = [(net, ver, rewire_descr) for net in nets] res_it = pool.imap_unordered(stats, tasks) frames = list() bar = ProgressBar(maxval=len(tasks), widgets=[Timer(), " ", SimpleProgress(), " ", Percentage(), " ", Bar(), " ", ETA()]).start() for df in res_it: frames.append(df) bar += 1 bar.finish() result = result.append(pd.concat(frames, ignore_index=True), ignore_index=True) result.to_csv(out_name, header=True, index=False, sep=str(";"), encoding=args.encoding) if args.run_switch: filename = os.path.join(path, switch_name) if os.path.exists(filename): LOGGER.info("Analyzing switched networks (flip number %d)", args.flip_num) nets = pyorg.read_pickle(filename) if args.choose is not None: chosen_num = min(args.choose, len(nets)) LOGGER.info("Using %d/%d random networks", chosen_num, len(nets)) nets = random.sample(nets, chosen_num) tasks = [(net, ver, switch_descr) for net in nets] res_it = pool.imap_unordered(stats, tasks) frames = list() bar = ProgressBar(maxval=len(tasks), widgets=[Timer(), " ", SimpleProgress(), " ", Percentage(), " ", Bar(), " ", ETA()]).start() for df in res_it: frames.append(df) bar += 1 bar.finish() result = result.append(pd.concat(frames, ignore_index=True), ignore_index=True) result.to_csv(out_name, header=True, index=False, sep=str(";"), encoding=args.encoding)
def main(remote_client, args): config = json.load(codecs.open(args.config, encoding=args.encoding, mode="rb")) if config["continuous"]: load_func = load_continuous table_key = "/Continuous" job_gen = continuous_jobs worker = continuous_worker result = continuous_result else: load_func = load_discrete table_key = "/Discrete" job_gen = discrete_jobs worker = discrete_worker result = discrete_result organism = pyorg.Organism(name=config["organism"]) load_func(organism, config) LOGGER.info("Load data") glob_vars = globals() data = config["data"] network = config["network"] analysis = config["analysis"] namespace = dict() namespace["genes"] = dict() namespace["networks"] = dict() namespace["prepared"] = dict() for version in config["versions"]: LOGGER.info("{0:*^78s}".format(version)) namespace["genes"][version] = pyorg.read_pickle(os.path.join( data["base"], version, data["gene_path"])) id2gene = pyorg.read_pickle(os.path.join(data["base"], version, data["mapping_path"])) namespace["networks"][version] = dict() for (cntrl_type, net_file, projections) in izip(analysis["control_types"], network["paths"], network["projections"]): net = pyorg.read_pickle(os.path.join(data["base"], version, net_file)) namespace["networks"][version][cntrl_type] = dict() for basis in projections: if basis == "gene": namespace["networks"][version][cntrl_type][basis] = net elif basis == "tu": namespace["networks"][version][cntrl_type][basis] =\ pyreg.to_transcription_unit_based(net) elif basis == "operon": namespace["networks"][version][cntrl_type][basis] =\ pyreg.to_operon_based(net) namespace["prepared"][version] = dict() for (cntrl_type, experiments, setups) in izip(analysis["control_types"], analysis["experimental_sets"], analysis["experimental_setups"]): LOGGER.info("{0:*^78s}".format(cntrl_type)) namespace["prepared"][version][cntrl_type] = dict() for (exp_name, exp_setup) in izip(experiments, setups): LOGGER.info("{0:*^78s}".format(exp_name)) df = organism.activity[exp_name] setup_func = glob_vars[exp_setup] namespace["prepared"][version][cntrl_type][exp_name] =\ setup_func(cntrl_type, df, id2gene) if any(method.startswith("delayed") for method in chain(analysis["control"], *analysis["ctc"])): namespace["prepared"][version][cntrl_type][exp_name]["delayed"] = dict() for delta in analysis["delays"]: delayed_continuous(namespace["prepared"][version][cntrl_type][exp_name], delta) if any(ms_name.endswith("comparison") for ms_name in chain(*analysis["measures"])): rate_continuous(namespace["prepared"][version][cntrl_type][exp_name]) LOGGER.debug("\n".join(print_dict(namespace))) db = shelve.open(config["shelve"], protocol=pickle.HIGHEST_PROTOCOL) for (key, value) in namespace.iteritems(): db[key] = value db.close() # general parallel setup using IPython.parallel LOGGER.info("Remote imports") d_view = remote_client.direct_view() d_view.execute("import numpy as np; "\ "import shelve; import pickle;"\ "import pyorganism as pyorg; import pyorganism.regulation as pyreg;"\ "import logging; from IPython.config import Application;"\ "LOGGER = Application.instance().log;"\ "LOGGER.setLevel(logging.{level});".format(level=args.log_level), block=True) LOGGER.info("Transfer data") # d_view.push(namespace, block=True) d_view.execute("db = shelve.open('{shelve}', protocol=pickle.HIGHEST_PROTOCOL);"\ "globals().update(db);db.close()".format(shelve=config["shelve"]), block=True) LOGGER.info("Generate job descriptions") jobs = job_gen(organism, config, namespace) l_view = remote_client.load_balanced_view() bar = ProgressBar(maxval=len(jobs), widgets=[Timer(), " ", Percentage(), " ", Bar(), " ", ETA()]).start() result_mngr = ResultManager(config["output"], table_key) results_it = l_view.map(worker, jobs, ordered=False, block=False) for (spec, res_cntrl, res_ctc, samples) in results_it: LOGGER.debug(res_cntrl) LOGGER.debug(res_ctc) result(result_mngr, spec, res_cntrl, res_ctc, samples) bar += 1 result_mngr.finalize() bar.finish() LOGGER.info("parallel speed-up was %.3g", results_it.serial_time / results_it.wall_time)