def main_continuous(args): glbls = globals() engine = create_engine(args.engine) pymodels.Base.metadata.bind = engine pymodels.Session.configure(bind=engine) session = pymodels.Session() tasks = session.query(pymodels.Job).\ options(joinedload("analysis"), joinedload("control"), joinedload("experiment")).filter(~pymodels.Job.complete).all() if len(tasks) == 0: LOGGER.warn("Nothing to do") return analysis_configs = {job.analysis for job in tasks} control_configs = {job.control for job in tasks} experiments = {job.experiment for job in tasks} preparations = {job.preparation for job in tasks} sampling = {job.sampling for job in tasks} projections = {job.projection for job in tasks} LOGGER.debug("%d analysis configurations", len(analysis_configs)) LOGGER.debug("%d control configurations", len(control_configs)) LOGGER.debug("%d experiments", len(experiments)) LOGGER.debug("%d setup cases", len(preparations)) LOGGER.debug("%d sampling methods", len(sampling)) LOGGER.debug("%d network projections", len(projections)) num_prep = len(analysis_configs) * len(control_configs) * len(experiments)\ * len(preparations) * len(sampling) * len(projections) LOGGER.debug("%d total configurations", num_prep) LOGGER.info("Preparing Data") task_args = dict() bar = ProgressBar(maxval=num_prep, widgets=[Timer(), " ", SimpleProgress(), " ", Percentage(), " ", Bar(), " ", ETA()]).start() for anal in analysis_configs: LOGGER.debug(" %s:", anal.version) feature2node = pyorg.read_pickle(os.path.join(anal.objects, anal.map)) for cntrl in control_configs: LOGGER.debug(" %s", cntrl.type) net = pyorg.read_pickle(os.path.join(anal.objects, cntrl.network)) tu_net = pyreg.to_transcription_unit_based(net) op_net = pyreg.to_operon_based(net) for exp in experiments: LOGGER.debug(" %s", exp.strain) for prep in preparations: LOGGER.debug(" %s", prep) series = glbls[prep](session, exp) for sampl in sampling: LOGGER.debug(" %s", sampl) for prj in projections: LOGGER.debug(" %s", prj) control = pyreg.ContinuousControl() if prj == "tu": control.setup(tu_net, series, feature2node, sampl) elif prj == "operon": control.setup(op_net, series, feature2node, sampl) else: control.setup(net, series, feature2node, sampl) if cntrl.type == "analog": control.from_gpn() elif cntrl.type == "digital": control.from_trn() else: raise ValueError("'{}'".format(cntrl.type)) task_args[(anal.id, cntrl.id, exp.id, prep, sampl, prj)] = (control, series.columns) bar += 1 bar.finish() LOGGER.info("Running Jobs") tasks = [task_args[(job.analysis.id, job.control.id, job.experiment.id, job.preparation, job.sampling, job.projection)] + (job.measure, job.random_num, job.delay, job.id) for job in tasks] pool = multiprocessing.Pool(args.nproc) result_it = pool.imap_unordered(continuous_exec, tasks) bar = ProgressBar(maxval=len(tasks), widgets=[Timer(), " ", SimpleProgress(), " ", Percentage(), " ", Bar(), " ", ETA()]).start() for (job_id, z_scores, cntrl_scores, samples, points) in result_it: results = list() try: job = session.query(pymodels.Job).filter_by(id=job_id).one() for (i, name) in enumerate(points): res = pymodels.Result(control=cntrl_scores[i], ctc=z_scores[i], point=name, job=job) session.add(res) results.append(res) job.complete = True session.commit() except Exception: session.rollback() bar += 1 continue if job.selection > 0: try: for (i, res) in enumerate(results): # use a more low-level insert for speed session.execute(pymodels.RandomSample.__table__.insert(), [{"control": val, "result_id": res.id}\ for val in np.random.choice(samples[i], job.selection, replace=False)]) session.commit() except Exception: session.rollback() bar += 1 continue bar += 1 bar.finish() session.close()
def main(remote_client, args): config = json.load(codecs.open(args.config, encoding=args.encoding, mode="rb")) if config["continuous"]: load_func = load_continuous table_key = "/Continuous" job_gen = continuous_jobs worker = continuous_worker result = continuous_result else: load_func = load_discrete table_key = "/Discrete" job_gen = discrete_jobs worker = discrete_worker result = discrete_result organism = pyorg.Organism(name=config["organism"]) load_func(organism, config) LOGGER.info("Load data") glob_vars = globals() data = config["data"] network = config["network"] analysis = config["analysis"] namespace = dict() namespace["genes"] = dict() namespace["networks"] = dict() namespace["prepared"] = dict() for version in config["versions"]: LOGGER.info("{0:*^78s}".format(version)) namespace["genes"][version] = pyorg.read_pickle(os.path.join( data["base"], version, data["gene_path"])) id2gene = pyorg.read_pickle(os.path.join(data["base"], version, data["mapping_path"])) namespace["networks"][version] = dict() for (cntrl_type, net_file, projections) in izip(analysis["control_types"], network["paths"], network["projections"]): net = pyorg.read_pickle(os.path.join(data["base"], version, net_file)) namespace["networks"][version][cntrl_type] = dict() for basis in projections: if basis == "gene": namespace["networks"][version][cntrl_type][basis] = net elif basis == "tu": namespace["networks"][version][cntrl_type][basis] =\ pyreg.to_transcription_unit_based(net) elif basis == "operon": namespace["networks"][version][cntrl_type][basis] =\ pyreg.to_operon_based(net) namespace["prepared"][version] = dict() for (cntrl_type, experiments, setups) in izip(analysis["control_types"], analysis["experimental_sets"], analysis["experimental_setups"]): LOGGER.info("{0:*^78s}".format(cntrl_type)) namespace["prepared"][version][cntrl_type] = dict() for (exp_name, exp_setup) in izip(experiments, setups): LOGGER.info("{0:*^78s}".format(exp_name)) df = organism.activity[exp_name] setup_func = glob_vars[exp_setup] namespace["prepared"][version][cntrl_type][exp_name] =\ setup_func(cntrl_type, df, id2gene) if any(method.startswith("delayed") for method in chain(analysis["control"], *analysis["ctc"])): namespace["prepared"][version][cntrl_type][exp_name]["delayed"] = dict() for delta in analysis["delays"]: delayed_continuous(namespace["prepared"][version][cntrl_type][exp_name], delta) if any(ms_name.endswith("comparison") for ms_name in chain(*analysis["measures"])): rate_continuous(namespace["prepared"][version][cntrl_type][exp_name]) LOGGER.debug("\n".join(print_dict(namespace))) db = shelve.open(config["shelve"], protocol=pickle.HIGHEST_PROTOCOL) for (key, value) in namespace.iteritems(): db[key] = value db.close() # general parallel setup using IPython.parallel LOGGER.info("Remote imports") d_view = remote_client.direct_view() d_view.execute("import numpy as np; "\ "import shelve; import pickle;"\ "import pyorganism as pyorg; import pyorganism.regulation as pyreg;"\ "import logging; from IPython.config import Application;"\ "LOGGER = Application.instance().log;"\ "LOGGER.setLevel(logging.{level});".format(level=args.log_level), block=True) LOGGER.info("Transfer data") # d_view.push(namespace, block=True) d_view.execute("db = shelve.open('{shelve}', protocol=pickle.HIGHEST_PROTOCOL);"\ "globals().update(db);db.close()".format(shelve=config["shelve"]), block=True) LOGGER.info("Generate job descriptions") jobs = job_gen(organism, config, namespace) l_view = remote_client.load_balanced_view() bar = ProgressBar(maxval=len(jobs), widgets=[Timer(), " ", Percentage(), " ", Bar(), " ", ETA()]).start() result_mngr = ResultManager(config["output"], table_key) results_it = l_view.map(worker, jobs, ordered=False, block=False) for (spec, res_cntrl, res_ctc, samples) in results_it: LOGGER.debug(res_cntrl) LOGGER.debug(res_ctc) result(result_mngr, spec, res_cntrl, res_ctc, samples) bar += 1 result_mngr.finalize() bar.finish() LOGGER.info("parallel speed-up was %.3g", results_it.serial_time / results_it.wall_time)