Python normalize_id示例，intogensm.utils.normalize_id Python示例

示例#1

0

显示文件

文件： analysis.py 项目： chris-zen/phd-thesis

	def __init__(self):
		RunCommand.__init__(self,
			args_usage="<variants-file> [<variants-file> ...]",
			epilog=PROJECT_EPILOG)
		
		self.variants_files = []
		
		# Gather variants files from arguments

		for var_file in self.args.files:
			if not os.path.isabs(var_file):
				var_file = os.path.join(os.getcwd(), var_file)

			if not os.path.exists(var_file):
				self.log.error("Variants file not found: {}".format(var_file))
				exit(-1)

			if not os.path.isfile(var_file):
				self.log.error("A file is expected: {}".format(var_file))
				exit(-1)

			self.variants_files += [var_file]

		# Get project id

		if self.args.project_id is None:
			self.log.error("Project identifier not specified.")
			exit(-1)

		self.project_id = normalize_id(self.args.project_id)

示例#2

0

显示文件

文件： summary.py 项目： chris-zen/phd-thesis

def datasets(projects_set):
	log = task.logger

	config = GlobalConfig(task.conf)
	paths = PathsConfig(config)

	classifier, projects = projects_set

	classifier_id = classifier["id"]

	group_values = classifier["group_values"]
	short_values = classifier["group_short_values"]
	long_values = classifier["group_long_values"]

	group_name = classifier["group_name"]
	group_short_name = classifier["group_short_name"]
	group_long_name = classifier["group_long_name"]

	group_file_prefix = normalize_id(classifier_id)

	log.info("--- [{0} ({1}) ({2}) ({3})] {4}".format(
		classifier["name"], group_long_name, group_short_name, group_name, "-" * 30))

	log.info("Reading number of samples per project ...")

	project_ids = []
	total_samples = 0
	for project in projects:
		project_id = project["id"]
		project_ids += [project_id]

		log.info("  Project {0}".format(project["id"]))

		projdb = ProjectDb(project["db"])

		num_samples = projdb.get_total_affected_samples()
		total_samples += num_samples

		log.debug("    {0} samples".format(num_samples))

		projdb.close()

	log.debug("  {0} samples in total".format(total_samples))

	log.info("Updating ...")

	combination_path = paths.combination_path()

	path = os.path.join(combination_path, "{0}.tsv".format(group_file_prefix))

	if not os.path.exists(path):
		with open(path, "w") as f:
			tsv.write_line(f, "NAME", "SHORT_NAME", "LONG_NAME", "SAMPLES_TOTAL", "PROJECT_IDS")

	with open(path, "a") as f:
		tsv.write_line(f, group_name, group_short_name, group_long_name, total_samples, ",".join(project_ids))

示例#3

0

显示文件

文件： command.py 项目： chris-zen/phd-thesis

	def __init__(self, args_usage="", epilog="", logger_name=None):

		Command.__init__(self, args_usage, epilog, logger_name)

		signal.signal(signal.SIGINT, keyboardinterrupt_handler)
		signal.signal(signal.SIGTERM, keyboardinterrupt_handler)

		'''
		# Override configuration path if required

		if self.args.conf_path is not None:
			self.conf_path = os.path.abspath(self.args.conf_path)
		'''

		# Determine required and user configuration files and data

		self.engine_conf_args = ConfArgs(self.log, self.conf_path,
										 self.args.engine_conf_files, self.args.engine_conf_data,
										 self.DEFAULT_CONF_FILES, self.DEFAULT_REQUIRED_CONF)

		self.engine_conf_builder = self.engine_conf_args.conf_builder

		self.case_conf_args = ConfArgs(self.log, self.conf_path, self.args.case_conf_files, self.args.case_conf_data)

		self.case_conf_builder = self.case_conf_args.conf_builder

		# Workspace

		self.workspace = self.args.workspace

		# Case name

		self.case_name = self.args.case_name

		if self.case_name is not None:
			self.case_name = normalize_id(self.case_name)

		# max cores

		if self.args.max_cores is None:
			self.max_cores = 0
		else:
			self.max_cores = self.args.max_cores

示例#4

0

显示文件

文件： batch_analysis.py 项目： chris-zen/phd-thesis

	def execute(self):

		# Gather scan paths from arguments

		scan_paths = []

		for scan_path in self.args.paths:
			if not os.path.isabs(scan_path):
				scan_path = os.path.join(os.getcwd(), scan_path)

			if not os.path.exists(scan_path):
				self.log.error("Path not found: {}".format(scan_path))
				exit(-1)

			scan_paths += [scan_path]

		# Gather includes and excludes from options

		includes = []

		if self.args.include is not None:
			for inc in self.args.include:
				includes += ["^{0}$".format(re.escape(inc))]
		if self.args.include_regex is not None:
			for inc_regex in self.args.include_regex:
				includes += [inc_regex]
		if self.args.include_from is not None:
			for file in self.args.include_from:
				with open(file, "r") as f:
					for line in f:
						line = line.strip()
						if line.startswith("#") or len(line) == 0:
							continue
						includes += ["^{0}$".format(re.escape(line))]

		if len(includes) == 0:
			includes = ["^.*$"]

		excludes = []

		if self.args.exclude is not None:
			for exc in self.args.exclude:
				excludes += ["^{0}$".format(re.escape(exc))]
		if self.args.exclude_regex is not None:
			for exc_regex in self.args.exclude_regex:
				excludes += [exc_regex]
		if self.args.exclude_from is not None:
			for file in self.args.exclude_from:
				with open(file, "r") as f:
					for line in f:
						line = line.strip()
						if line.startswith("#") or len(line) == 0:
							continue
						excludes += ["^{0}$".format(re.escape(line))]

		# compile regular expressions

		includes = [re.compile(inc) for inc in includes]
		excludes = [re.compile(exc) for exc in excludes]

		# scan paths

		self.projects = []
		project_ids = set()
		file_object = {}

		self.log.info("Looking for data projects ...")

		for scan_path in scan_paths:
			for path, project in list_projects(self.log, scan_path):
				if "id" not in project:
					self.log.warn("Discarding project missing 'id': {0}".format(path))
					continue
				if "files" not in project:
					self.log.warn("Discarding project missing 'files': {0}".format(path))
					continue

				project["id"] = normalize_id(project["id"])
				project_id = project["id"]
				if "name" in project:
					project_name = ": " + project["name"]
				else:
					project_name = ""

				if match_id(project_id, includes) and not match_id(project_id, excludes):
					if project_id in project_ids:
						self.log.error("Duplicated project id at {0}".format(path))
						exit(-1)

					self.log.info("  {0}{1} (included)".format(project_id, project_name))

					project = init_project_files(project, os.path.dirname(path), file_object)
					self.projects += [project]
				else:
					self.log.info("  {0}{1} (excluded)".format(project_id, project_name))

		# Create the wok engine and the workflow instance
		
		self.case_conf_builder.add_value("projects", self.projects)

		self._wok_run(MUTATIONS_FLOW_NAME, container="{}-{}".format(self.user_id, self.workspace))

示例#5

0

显示文件

文件： analysis.py 项目： chris-zen/phd-thesis

def run(type):
	if type not in [COHORT_ANALYSIS, SINGLE_TUMOR_ANALYSIS]:
		abort(400)

	if request.method == "GET":
		form = dict(
			ofm_genes_threshold=ONCODRIVEFM_GENES_THRESHOLD,
			ofm_pathways_threshold=ONCODRIVEFM_PATHWAYS_THRESHOLD,
			oclust_genes_threshold=ONCODRIVECLUST_MUTATIONS_THRESHOLD)

		return render_template("analysis.html", type=type, form=form)

	if current_app.wok.cases_count(current_user) >= current_app.config.get("LIMIT_NUM_CASES", 100):
		flash("""There is a limit on the number of simultaneous analysis that can be managed.
		You must remove finished analysis before running new ones.""", "error")
		return redirect(url_for("cases.index"))

	mutations_file = request.files['mutations_file']
	file_name = os.path.basename(mutations_file.filename)

	project_id = request.form['project_name']
	if len(project_id) == 0:
		project_id = os.path.splitext(file_name)[0]

	project_id = unique_project_id(normalize_id(project_id))

	'''
	if not current_user.validated:
		flash("""You can not run an analysis with your data until you are completely registered.
		Please check your email and follow the instructions to validate this account.""", "error")
		flash("Meanwhile you can play with the included examples.")
		return redirect(url_for("examples"))
	'''

	cb = ConfigBuilder()
	cb.add_value("user_id", current_user.nick)
	cb.add_value("workspace", DEFAULT_WORKSPACE)
	cb.add_value("project.id", project_id)

	#case_name = "-".join([current_user.nick, project_id])
	#cb.add_value("wok.instance.name", case_name)

	results_path, project_path, project_temp_path = get_paths(project_id)

	if not current_user.is_anonymous():
		cb.add_value("website.user_id", current_user.nick)

	if type == SINGLE_TUMOR_ANALYSIS: #request.form.get("variants_only") == "1":
		cb.add_value("variants_only", True)
		cb.add_value("skip_oncodrivefm", True)
		cb.add_value("skip_oncodriveclust", True)

	try:
		threshold = request.form["ofm_genes_threshold"]
		if re.match(r"^[1-9]\d*%?$", threshold):
			cb.add_value(ONCODRIVEFM_GENES_THRESHOLD_KEY, threshold)
	except:
		if type == COHORT_ANALYSIS:
			current_app.logger.warn("[{}] Wrong form input: {}={}".format(
				current_user.nick, "ofm_genes_threshold", request.form.get("ofm_genes_threshold")))

	try:
		threshold = request.form["ofm_pathways_threshold"]
		if re.match(r"^[1-9]\d*%?$", threshold):
			cb.add_value(ONCODRIVEFM_PATHWAYS_THRESHOLD_KEY, threshold)
	except:
		if type == COHORT_ANALYSIS:
			current_app.logger.warn("[{}] Wrong form input: {}={}".format(
				current_user.nick, "ofm_pathways_threshold", reuqest.form.get("ofm_pathways_threshold")))

	try:
		threshold = int(request.form["oclust_genes_threshold"])
		if threshold >= 1:
			cb.add_value(ONCODRIVECLUST_GENES_THRESHOLD_KEY, threshold)
	except:
		if type == COHORT_ANALYSIS:
			current_app.logger.warn("[{}] Wrong form input: {}={}".format(
				current_user.nick, "oclust_genes_threshold", request.form.get("oclust_genes_threshold")))

	genes_filter_enabled = request.form.get('genes_filter_enabled') == "1"
	cb.add_value(ONCODRIVEFM_FILTER_ENABLED_KEY, genes_filter_enabled)
	cb.add_value(ONCODRIVECLUST_FILTER_ENABLED_KEY, genes_filter_enabled)
	if genes_filter_enabled:
		try:
			genes_filter_file = request.files['genes_filter_file']
			genes_filter_file_path = os.path.join(project_temp_path, "genes-filter.txt")
			genes_filter_file.save(genes_filter_file_path)
			if os.path.getsize(genes_filter_file_path) != 0:
				cb.add_value(ONCODRIVEFM_GENES_FILTER_KEY, genes_filter_file_path)
				cb.add_value(ONCODRIVECLUST_GENES_FILTER_KEY, genes_filter_file_path)
		except:
			current_app.logger.exception("Error retrieving genes filter from form")

	assembly = request.form.get("assembly", DEFAULT_ASSEMBLY).lower()

	project = dict(
		id=project_id,
		assembly=assembly,
		files=[file_name])

	projects = [init_project_files(project, check_paths=False)]
	cb.add_value("projects", projects)

	properties = dict(
		analysis_type=type,
		path=os.path.relpath(project_path, results_path))

	current_app.logger.info("[{}] Starting analysis {} ...".format(current_user.nick, project_id))

	case = current_app.wok.create_case(current_user, project_id, cb, PROJECT_NAME, MUTATIONS_FLOW_NAME,
									   properties=properties, start=False)

	engine_case = current_app.wok.engine.case(case.engine_name)

	#TODO use a background thread
	upload_files(current_app.logger, case.engine_name, engine_case.storages, projects, streams=[mutations_file.stream])

	current_app.logger.info("[{}] Analysis {} started on case {}...".format(
							current_user.nick, project_id, case.engine_name))

	engine_case.start()

	return redirect(url_for("cases.index", highlight=case.id))

示例#6

0

显示文件

文件： recurrences.py 项目： chris-zen/phd-thesis

def combination_recurrences(projects_set):
    log = task.logger

    config = GlobalConfig(task.conf)
    paths = PathsConfig(config)

    classifier, projects = projects_set

    classifier_id = classifier["id"]

    group_values = classifier["group_values"]
    short_values = classifier["group_short_values"]
    long_values = classifier["group_long_values"]

    group_name = classifier["group_name"]
    group_short_name = classifier["group_short_name"]
    group_long_name = classifier["group_long_name"]

    if len(group_values) == 0:
        group_file_prefix = classifier_id
    else:
        group_file_prefix = "{0}-{1}".format(classifier_id, group_short_name)

    group_file_prefix = normalize_id(group_file_prefix)

    log.info(
        "--- [{0} ({1}) ({2}) ({3})] {4}".format(
            classifier["name"], group_long_name, group_short_name, group_name, "-" * 30
        )
    )

    log.info("Creating database ...")

    db_path = make_temp_file(task, suffix="-{0}.db".format(group_file_prefix))
    log.debug("  > {0}".format(db_path))

    conn = sqlite3.connect(db_path)
    conn.row_factory = sqlite3.Row

    create_db(conn)

    log.info("Combining recurrences ...")

    c = conn.cursor()

    sample_total = 0

    project_ids = []
    for project in projects:
        project_ids += [project["id"]]

        log.info("  Project {0}:".format(project["id"]))

        projdb = ProjectDb(project["db"])

        project_sample_total = projdb.get_total_affected_samples()

        sample_total += project_sample_total

        log.info("    Total samples = {0}".format(project_sample_total))

        log.info("    Variant genes ...")

        count = 0
        for afg in projdb.affected_genes(join_variant=True, join_xrefs=True, join_rec=True):
            var = afg.var
            rec = afg.rec

            if rec.sample_freq is None:
                log.warn("Discarding variant gene without sample frequency: {0}".format(repr(afg)))
                continue

            start, end, ref, alt = var_to_tab(var)

            try:
                c.execute(
                    "INSERT INTO variants (chr, strand, start, ref, alt, xrefs) VALUES (?,?,?,?,?,?)",
                    (var.chr, var.strand, start, ref, alt, ",".join(var.xrefs)),
                )
                var_id = c.lastrowid
            except sqlite3.IntegrityError:
                c.execute(
                    "SELECT var_id FROM variants WHERE chr=? AND strand=? AND start=? AND ref=? AND alt=?",
                    (var.chr, var.strand, start, ref, alt),
                )
                r = c.fetchone()
                var_id = r[0]

            try:
                c.execute(
                    "INSERT INTO variant_genes (var_id, gene_id, impact, coding_region, prot_changes, sample_freq) VALUES (?,?,?,?,?,?)",
                    (var_id, afg.gene_id, afg.impact, afg.coding_region, afg.prot_changes, rec.sample_freq),
                )
            except sqlite3.IntegrityError:
                c.execute(
                    """
					UPDATE variant_genes
					SET sample_freq=sample_freq + ?
					WHERE var_id=? AND gene_id=?""",
                    (rec.sample_freq, var_id, afg.gene_id),
                )

            count += 1

        log.info("      {0} variant genes".format(count))

        log.info("    Genes ...")

        count = 0
        for gene in projdb.genes(join_xrefs=True, join_rec=True):
            rec = gene.rec

            if rec.sample_freq is None:
                continue

            c.execute("SELECT COUNT(*) FROM genes WHERE gene_id=?", (gene.id,))
            r = c.fetchone()
            if r[0] == 0:
                c.execute("INSERT INTO genes (gene_id, sample_freq) VALUES (?,?)", (gene.id, rec.sample_freq))
            else:
                c.execute("UPDATE genes SET sample_freq=sample_freq + ? WHERE gene_id=?", (rec.sample_freq, gene.id))
            count += 1

        log.info("      {0} genes".format(count))

        log.info("    Pathways ...")

        count = 0
        for pathway in projdb.pathways(join_rec=True):
            rec = pathway.rec

            if rec.sample_freq is None:
                continue

            c.execute("SELECT COUNT(*) FROM pathways WHERE pathway_id=?", (pathway.id,))
            r = c.fetchone()
            if r[0] == 0:
                c.execute("INSERT INTO pathways (pathway_id, sample_freq) VALUES (?,?)", (pathway.id, rec.sample_freq))
            else:
                c.execute(
                    "UPDATE pathways SET sample_freq=sample_freq + ? WHERE pathway_id=?", (rec.sample_freq, pathway.id)
                )
            count += 1

        log.info("      {0} pathways".format(count))

        projdb.close()

    log.info("Calculating proportions with {0} samples in total among projects ...".format(sample_total))

    if sample_total > 0:
        c.execute("UPDATE variant_genes SET sample_prop=CAST(sample_freq AS REAL)/{0}.0".format(sample_total))
        c.execute("UPDATE genes SET sample_prop=CAST(sample_freq AS REAL)/{0}.0".format(sample_total))
        c.execute("UPDATE pathways SET sample_prop=CAST(sample_freq AS REAL)/{0}.0".format(sample_total))

    c.close()
    conn.commit()

    log.info("Saving results ...")

    c = conn.cursor()

    base_path = paths.combination_path("recurrences")

    log.info("  Variant genes ...")

    with tsv.open(os.path.join(base_path, "variant_gene-{0}.tsv.gz".format(group_file_prefix)), "w") as f:
        tsv.write_param(f, "classifier", classifier["id"])
        tsv.write_param(f, "group_id", group_name)
        tsv.write_param(f, "group_short_name", group_short_name)
        tsv.write_param(f, "group_long_name", group_long_name)
        tsv.write_param(f, "projects", ",".join(project_ids))
        tsv.write_param(f, "SAMPLE_TOTAL", sample_total)
        tsv.write_line(
            f,
            "CHR",
            "STRAND",
            "START",
            "ALLELE",
            "GENE_ID",
            "IMPACT",
            "IMPACT_CLASS",
            "SAMPLE_FREQ",
            "SAMPLE_PROP",
            "PROT_CHANGES",
            "XREFS",
        )
        for r in c.execute(
            "SELECT * FROM variant_genes JOIN variants USING (var_id) ORDER BY chr*1, chr, strand, start, gene_id"
        ):
            strand, ref, alt = r["strand"], r["ref"], r["alt"]
            allele = "{0}/{1}".format(ref, alt)
            tsv.write_line(
                f,
                r["chr"],
                strand,
                r["start"],
                allele,
                r["gene_id"],
                r["impact"],
                TransFIC.class_name(r["impact"]),
                r["sample_freq"],
                r["sample_prop"],
                r["prot_changes"],
                r["xrefs"],
                null_value="-",
            )

    log.info("  Genes ...")

    with tsv.open(os.path.join(base_path, "gene-{0}.tsv.gz".format(group_file_prefix)), "w") as f:
        tsv.write_param(f, "classifier", classifier["id"])
        tsv.write_param(f, "group_id", group_name)
        tsv.write_param(f, "group_short_name", group_short_name)
        tsv.write_param(f, "group_long_name", group_long_name)
        tsv.write_param(f, "projects", ",".join(project_ids))
        tsv.write_param(f, "SAMPLE_TOTAL", sample_total)
        tsv.write_line(f, "GENE_ID", "SAMPLE_FREQ", "SAMPLE_PROP")
        for r in c.execute("SELECT * FROM genes ORDER BY gene_id"):
            tsv.write_line(f, r["gene_id"], r["sample_freq"], r["sample_prop"], null_value="-")

    log.info("  Pathways ...")

    with tsv.open(os.path.join(base_path, "pathway-{0}.tsv.gz".format(group_file_prefix)), "w") as f:
        tsv.write_param(f, "classifier", classifier["id"])
        tsv.write_param(f, "group_id", group_name)
        tsv.write_param(f, "group_short_name", group_short_name)
        tsv.write_param(f, "group_long_name", group_long_name)
        tsv.write_param(f, "projects", ",".join(project_ids))
        tsv.write_param(f, "SAMPLE_TOTAL", sample_total)
        tsv.write_line(f, "PATHWAY_ID", "SAMPLE_FREQ", "SAMPLE_PROP")
        for r in c.execute("SELECT * FROM pathways ORDER BY pathway_id"):
            tsv.write_line(f, r["pathway_id"], r["sample_freq"], r["sample_prop"], null_value="-")

    conn.close()

    remove_temp(task, db_path)

示例#7

0

显示文件

文件： analysis.py 项目： chris-zen/phd-thesis

def run(type):
	if type not in [COHORT_ANALYSIS, SINGLE_TUMOR_ANALYSIS]:
		abort(400)

	if request.method == "GET":
		form = dict(
			ofm_genes_threshold=ONCODRIVEFM_GENES_THRESHOLD,
			ofm_pathways_threshold=ONCODRIVEFM_PATHWAYS_THRESHOLD,
			oclust_genes_threshold=ONCODRIVECLUST_MUTATIONS_THRESHOLD)

		return render_template("analysis.html", type=type, form=form)

	mutations_file = request.files['mutations_file']
	file_name = os.path.basename(mutations_file.filename)

	project_id = request.form['project_name']
	if len(project_id) == 0:
		project_id = os.path.splitext(file_name)[0]

	project_id = normalize_id(project_id)

	i = 0
	base_id = project_id
	while current_app.wok.exists_case(current_user, project_id):
		i += 1
		project_id = "{}-{}".format(base_id, i)

	'''
	if current_app.wok.exists_case(current_user, project_id):
		flash("An analysis with this name already exists. Please give it a different name or remove the previous one before starting again.", "error")
		return render_template("analysis.html", type=type, form=request.form)
	'''

	'''
	if g.demo and current_user.max_analysis != -1 and proj_manager.get_projects_count(g.conn, g.user_id) >= current_user.max_analysis:
		flash("""The online version is for demo only and there is a limit for the number of simultaneous analysis a user can manage.
		You must remove finished analysis before running new ones.
		Please download the pipeline and install in your system to avoid these limitations.""", "error")
		return redirect(url_for("download"))

	if not current_user.validated:
		flash("""You can not run an analysis with your data until you are completely registered.
		Please check your email and follow the instructions to validate this account.""", "error")
		flash("Meanwhile you can play with the included examples.")
		return redirect(url_for("examples"))
	'''

	cb = ConfigBuilder(current_app.wok.conf_builder)
	cb.add_value("workspace", "default")
	cb.add_value("project.id", project_id)

	#case_name = "-".join([current_user.nick, project_id])
	#cb.add_value("wok.instance.name", case_name)

	conf = cb.get_conf()

	results_path = os.path.join(conf["results_path"], current_user.nick)
	cb.add_value("results_path", results_path)

	temp_path = os.path.join(conf["temp_path"], current_user.nick)
	cb.add_value("temp_path", temp_path)

	conf = cb.get_conf()

	project_path = get_project_path(conf, project_id)
	if not os.path.exists(project_path):
		os.makedirs(project_path)

	project_temp_path = get_temp_path(conf, project_id)
	if not os.path.exists(project_temp_path):
		os.makedirs(project_temp_path)

	if not current_user.is_anonymous():
		cb.add_value("website.user_id", current_user.nick)

	# FIXME ? type == SINGLE_TUMOR_ANALYSIS
	if request.form.get("variants_only") == "1":
		cb.add_value("variants_only", True)
		cb.add_value("skip_oncodrivefm", True)
		cb.add_value("skip_oncodriveclust", True)

	try:
		threshold = request.form["ofm_genes_threshold"]
		if re.match(r"^[1-9]\d*%?$", threshold):
			cb.add_value(ONCODRIVEFM_GENES_THRESHOLD_KEY, threshold)
	except:
		if type == COHORT_ANALYSIS:
			current_app.logger.warn("Undefined form input: {}".format("ofm_genes_threshold"))

	try:
		threshold = request.form["ofm_pathways_threshold"]
		if re.match(r"^[1-9]\d*%?$", threshold):
			cb.add_value(ONCODRIVEFM_PATHWAYS_THRESHOLD_KEY, threshold)
	except:
		if type == COHORT_ANALYSIS:
			current_app.logger.warn("Undefined form input: {}".format("ofm_pathways_threshold"))

	try:
		threshold = int(request.form["oclust_genes_threshold"])
		if threshold >= 1:
			cb.add_value(ONCODRIVECLUST_GENES_THRESHOLD_KEY, threshold)
	except:
		if type == COHORT_ANALYSIS:
			current_app.logger.warn("Undefined form input: {}".format("oclust_genes_threshold"))

	genes_filter_enabled = request.form.get('genes_filter_enabled') == "1"
	cb.add_value(ONCODRIVEFM_FILTER_ENABLED_KEY, genes_filter_enabled)
	cb.add_value(ONCODRIVECLUST_FILTER_ENABLED_KEY, genes_filter_enabled)
	if genes_filter_enabled:
		try:
			genes_filter_file = request.files['genes_filter_file']
			genes_filter_file_path = os.path.join(project_temp_path, "genes-filter.txt")
			genes_filter_file.save(genes_filter_file_path)
			if os.path.getsize(genes_filter_file_path) != 0:
				cb.add_value(ONCODRIVEFM_GENES_FILTER_KEY, genes_filter_file_path)
				cb.add_value(ONCODRIVECLUST_GENES_FILTER_KEY, genes_filter_file_path)
		except:
			current_app.logger.exception("Error retrieving genes filter from form")

	mutations_path = os.path.join(project_temp_path, file_name)
	try:
		mutations_file.save(mutations_path)
	except:
		current_app.logger.exception("Error while saving mutations file {} into {}".format(mutations_file.filename, mutations_path))
		flash("""There were some problem with the input file for mutations.
			Please check that a file has been loaded before submitting a new analysis.
			This error has been already submitted to the application administrators
			who will take care of it as soon as possible.""")
		return render_template("analysis.html", type=type, form=request.form)

	assembly = request.form.get("assembly", "hg19").lower()

	cb, flow_uri = project_analysis(
				mutations_path,
				assembly=assembly,
				conf_builder=cb)

	properties = dict(
		analysis_type=type,
		path=project_path,
		temp_path=project_temp_path,
		data_file=mutations_path)

	current_app.logger.info("[{}] Starting analysis {} ...".format(
							current_user.nick, project_id))

	case = current_app.wok.create_case(current_user, project_id, cb, flow_uri,
									   properties=properties, start=True)

	current_app.logger.info("[{}] Analysis {} started on case {}...".format(
							current_user.nick, project_id, case.engine_name))

	return redirect(url_for("cases.index", highlight=case.id))

示例#8

0

显示文件

文件： oncodrivefm.py 项目： chris-zen/phd-thesis

def combination_oncodrivefm(projects_set):
	log = task.logger

	config = GlobalConfig(task.conf)
	paths = PathsConfig(config)

	classifier, projects = projects_set

	classifier_id = classifier["id"]

	group_values = classifier["group_values"]
	short_values = classifier["group_short_values"]
	long_values = classifier["group_long_values"]

	group_name = classifier["group_name"]
	group_short_name = classifier["group_short_name"]
	group_long_name = classifier["group_long_name"]

	if len(group_values) == 0:
		group_file_prefix = classifier_id
	else:
		group_file_prefix = "{0}-{1}".format(classifier_id, group_short_name)

	group_file_prefix = normalize_id(group_file_prefix)

	log.info("--- [{0} ({1}) ({2}) ({3})] {4}".format(
		classifier["name"], group_long_name, group_short_name, group_name, "-" * 30))

	log.info("Exporting project data ...")

	base_path = make_temp_dir(task, suffix=".{0}".format(group_file_prefix))

	log.debug("> {0}".format(base_path))

	project_ids = []
	gene_files = []
	pathway_files = []
	for project in projects:
		project_id = project["id"]
		project_ids += [project_id]

		log.info("  Project {0}:".format(project["id"]))

		projdb = ProjectDb(project["db"])

		log.info("    Genes ...")

		count = 0
		file_path = os.path.join(base_path, "{0}-genes.tsv".format(project_id))
		gene_files += [file_path]
		with open(file_path, "w") as f:
			tsv.write_param(f, "classifier", classifier_id)
			tsv.write_param(f, "group_id", group_name)
			tsv.write_param(f, "slice", project_id)
			tsv.write_line(f, "GENE_ID", "PVALUE")
			for gene in projdb.genes():
				if gene.fm_pvalue is not None:
					tsv.write_line(f, gene.id, gene.fm_pvalue, null_value="-")
					count += 1

		log.info("      {0} genes".format(count))

		log.info("    Pathways ...")

		count = 0
		file_path = os.path.join(base_path, "{0}-pathways.tsv".format(project_id))
		pathway_files += [file_path]
		with open(file_path, "w") as f:
			tsv.write_param(f, "classifier", classifier_id)
			tsv.write_param(f, "group_id", group_name)
			tsv.write_param(f, "slice", project_id)
			tsv.write_line(f, "PATHWAY_ID", "ZSCORE")
			for pathway in projdb.pathways():
				if pathway.fm_zscore is not None:
					tsv.write_line(f, pathway.id, pathway.fm_zscore, null_value="-")
					count += 1

		log.info("      {0} pathways".format(count))

		projdb.close()

	log.info("Combining ...")

	combination_path = paths.combination_path("oncodrivefm")

	log.info("  Genes ...")

	cmd = " ".join([
			"oncodrivefm-combine",
			"-m median-empirical",
			"-o '{0}'".format(combination_path),
			"-n 'gene-{0}'".format(group_file_prefix),
			"-D 'classifier={0}'".format(classifier_id),
			"-D 'group_id={0}'".format(group_name),
			"-D 'group_short_name={0}'".format(group_short_name),
			"-D 'group_long_name={0}'".format(group_long_name),
			"--output-format tsv.gz"
	] + ["'{0}'".format(name) for name in gene_files])

	log.debug(cmd)

	ret_code = subprocess.call(cmd, shell=True)
	if ret_code != 0:
		#log.error("OncodriveFM error while combining gene pvalues:\n{0}".format(cmd))
		#return -1
		raise Exception("OncodriveFM error while combining gene pvalues:\n{0}".format(cmd))

	log.info("  Pathways ...")

	cmd = " ".join([
			"oncodrivefm-combine",
			"-m median-zscore",
			"-o '{0}'".format(combination_path),
			"-n 'pathway-{0}'".format(group_file_prefix),
			"-D 'classifier={0}'".format(classifier_id),
			"-D 'group_id={0}'".format(group_name),
			"-D 'group_short_name={0}'".format(group_short_name),
			"-D 'group_long_name={0}'".format(group_long_name),
			"--output-format tsv.gz"
	] + ["'{0}'".format(name) for name in pathway_files])

	log.debug(cmd)

	ret_code = subprocess.call(cmd, shell=True)
	if ret_code != 0:
		#log.error("OncodriveFM error while combining pathway zscores:\n{0}".format(cmd))
		#return -1
		raise Exception("OncodriveFM error while combining pathway zscores:\n{0}".format(cmd))

	remove_temp(task, base_path)

示例#9

0

显示文件

文件： command.py 项目： chris-zen/phd-thesis

	def __init__(self, args_usage="", epilog="", flow_file=None, conf_files=None, conf_keys=None, logger_name=None):

		Command.__init__(self, args_usage, epilog, logger_name)

		signal.signal(signal.SIGINT, keyboardinterrupt_handler)
		signal.signal(signal.SIGTERM, keyboardinterrupt_handler)

		if conf_files is None:
			conf_files = []
		if conf_keys is None:
			conf_keys = []

		self.flow_file = flow_file
		self.conf_files = self.DEFAULT_CONF_FILES + conf_files
		self.conf_keys = self.DEFAULT_REQUIRED_CONF + conf_keys

		# Workspace

		self.workspace = self.args.workspace

		# Instance name

		self.instance_name = self.args.instance_name

		if self.instance_name is not None:
			self.instance_name = normalize_id(self.instance_name)

		'''
		# Override configuration path if required

		if self.args.conf_path is not None:
			self.conf_path = os.path.abspath(self.args.conf_path)

		# Get required configuration files and override system.conf if required

		if self.args.system_conf is not None:
			req_conf_files = []
			for cf in self.conf_files:
				if cf == "system.conf":
					req_conf_files += [self.args.system_conf]
				else:
					req_conf_files += [cf]
		else:
			req_conf_files = self.conf_files
		'''

		req_conf_files = self.conf_files
		
		# Determine required and user configuration files and data

		self.required_conf_files = [os.path.join(self.conf_path, cf)
										for cf in req_conf_files]

		if self.args.conf_files is not None:
			self.user_conf_files = []
			for cf in self.args.conf_files:
				if not os.path.isabs(cf):
					cf = os.path.join(os.getcwd(), cf)
				self.user_conf_files += [cf]
		else:
			self.user_conf_files = []

		if self.args.conf_data is not None:
			self.user_conf_data = self.args.conf_data
		else:
			self.user_conf_data = []

		# max cores

		if self.args.max_cores is None:
			self.max_cores = 0
		else:
			self.max_cores = self.args.max_cores

		# Prepare extra configuration data

		self.extra_conf_data = []