def stat_cor(input={"correlation_R":"", "cor_pdf": "", "venn": "", }, output={"json": ""}, param=None): # TODO: merge this into stat_venn """ ReplicateQC aims to describe the similarity of replicate experiment. Venn diagram and correlation plot will be used.""" correlation_result_r_code = open(input["correlation_R"]).read() signal_list = re.findall(r"[pc]\d+ <- (.*)$", correlation_result_r_code, re.MULTILINE) rep_count = len(signal_list) correlation_value_list = [] # F**K!! WTF!! # for i in range(rep_count): # for j in range(i + 1, rep_count): # tpfile_name = tempfile.mktemp() # open(tpfile_name,"w").write("cor(%s, %s)" % (signal_list[i], signal_list[j])) # cmd = 'R --slave --vanilla < %s ' %tpfile_name # print("Running %s"%cmd) # cmd_result = subprocess.check_output(cmd, shell=True) # correlation_value_list.append(float(re.findall("[1] (.*)", cmd_result)[0])) # min_correlation = min(correlation_value_list) result_dict = {"stat": {}, "input": input, "output": output, "param": param} # result_dict["stat"]["judge"] = "Pass" if min_correlation >= 0.6 else "Fail" result_dict["stat"]["cutoff"] = 0.6 # result_dict["stat"]["min_cor"] = min_correlation json_dump(result_dict)
def stat_macs2(input={"macs2_peaks_xls": "", "db": "", "template": ""}, output={"R": "", "json": "", "pdf": ""}, param={"id": ""}): json_dict = {"stat": {}, "input": input, "output": output, "param": param} json_dict["stat"] = _peaks_parse(input["macs2_peaks_xls"]) json_dict["stat"]["cutoff"] = {"uni_loc": 5000000, "high_conf_peaks": 1000} json_dict["stat"]["judge"] = {"uni_loc": "Pass" if json_dict["stat"]["treat_unic"] > 5000000 else "Fail", "high_conf_peaks": "Pass" if json_dict["stat"]["peaksge10"] >= 1000 else "Fail"} name = [param["id"]] db = sqlite3.connect(input["db"]).cursor() db.execute("select peak_fc_10 from peak_calling") historyData = [math.log(i[0] + 0.001, 10) for i in db.fetchall() if i[0] > 0] high_confident_peaks_r = JinjaTemplateCommand(name="highpeaksQC", template=input["template"], param={'historic_data': historyData, 'current_data': [math.log(json_dict["stat"]["peaksge10"] + 0.01, 10)], # 'ids': name, 'cutoff': 3, 'main': 'High confidence peaks distribution', 'xlab': 'log(Number of Peaks fold greater than 10)', 'ylab': 'fn(log(Number of Peaks fold greater than 10))', "pdf": output["pdf"], "render_dump": output["R"]}) template_dump(high_confident_peaks_r) r_exec(high_confident_peaks_r) json_dump(json_dict)
def stat_fastqc(input={"db": "", "fastqc_summaries": [], "template": ""}, output={"R": "", "json": "", "pdf": ""}, param={"ids": [], "id": ""}): json_dict = {"stat": {}, "input": input, "output": output, "param": param} stat = json_dict["stat"] quality_medians = [] for a_summary, a_id in zip(input["fastqc_summaries"], param["ids"]): parsed = _python_fastqc_parse(input=a_summary) stat[a_id] = {} stat[a_id]["median"] = parsed["median"] stat[a_id]["cutoff"] = 25 stat[a_id]['judge'] = "Pass" if parsed["median"] > 25 else "Fail" stat[a_id]["sequence_length"] = parsed["sequence_length"] quality_medians.append(parsed["median"]) # The table of fastqc_summary that will be used for rendering # Col 1: sample ID # Col 2: sequence length # Col 3: median of sequence quality qc_db = sqlite3.connect(input["db"]).cursor() qc_db.execute("SELECT median_quality FROM fastqc_info") history_data = [float(i[0]) for i in qc_db.fetchall()] fastqc_dist_r = JinjaTemplateCommand( template=input["template"], param={'historic_data': history_data, 'current_data': quality_medians, 'ids': [underline_to_space(i) for i in param["ids"]], 'cutoff': 25, 'main': 'Sequence Quality Score Cumulative Percentage', 'xlab': 'sequence quality score', 'ylab': 'fn(sequence quality score)', "need_smooth_curve": True, "pdf": output["pdf"], "render_dump": output["R"]}) template_dump(fastqc_dist_r) r_exec(fastqc_dist_r) json_dump(json_dict)
def stat_venn(input={"venn": ""}, output={"json",""}, param=None): result_dict = {"stat": {}, "input": input, "output": output, "param": param} json_dump(result_dict)