def write(self, name, values): value_to_str = self._value_to_str_func(self.dtype) FileWriter.write(self, name) for value in values: FileWriter.write(self, "\t") FileWriter.write(self, value_to_str(value)) FileWriter.write(self, "\n")
def write_header(self, header = None): if isinstance(header, MatrixHeader): FileWriter.write(self, "\t".join(header.columns)) elif isinstance(header, (list, tuple)): FileWriter.write(self, "\t".join(header)) else: raise Exception("Unsupported headers type: {}".format(str(type(header)))) FileWriter.write(self, "\n")
def merge(log, input, output, gitools_output): """ Merge repeated rows by the lowest pvalue, in case the pvalue is the same take the one with greater n """ f = FileReader(input) hdr = f.readline().rstrip().split("\t") upreg = {} downreg = {} upreg_count = 0 downreg_count = 0 mid_index = 8 for line in f: line = line.rstrip() if len(line) == 0: continue fields = line.split("\t") row_name = fields[0] upreg_count += merge_data(row_name, fields[1:mid_index], upreg) downreg_count += merge_data(row_name, fields[mid_index:], downreg) f.close() upreg_keys = upreg.keys() downreg_keys = downreg.keys() log.debug("Total rows: upreg = {}, downreg = {}".format(len(upreg_keys), len(downreg_keys))) log.debug("Merged rows: upreg = {}, downreg = {}".format(upreg_count, downreg_count)) ofile = FileWriter(output) ofile.write("\t".join(hdr)) ofile.write("\n") gfile = FileWriter(gitools_output) gfile.write("column\trow\t") gfile.write("\t".join([x[6:] for x in hdr if x.startswith("upreg_")])) gfile.write("\n") for row_name in upreg_keys: upreg_data = upreg[row_name] upreg_data_join = "\t".join(upreg_data) downreg_data = downreg[row_name] downreg_data_join = "\t".join(downreg_data) ofile.write(row_name) ofile.write("\t") ofile.write(upreg_data_join) ofile.write("\t") ofile.write(downreg_data_join) ofile.write("\n") gfile.write("upreg\t") gfile.write(row_name) gfile.write("\t") gfile.write(upreg_data_join) gfile.write("\n") gfile.write("downreg\t") gfile.write(row_name) gfile.write("\t") gfile.write(downreg_data_join) gfile.write("\n") ofile.close() gfile.close() return (upreg_count, downreg_count)
def write_data_map(dmap, path): rf = FileWriter(path) hdr = ["id"] hdr.extend(["_".join(("gain", f.replace("-", "_").lower())) for f in FIELDS]) hdr.extend(["_".join(("loss", f.replace("-", "_").lower())) for f in FIELDS]) rf.write("\t".join(hdr) + "\n") for row, values in dmap.iteritems(): rf.write(row) for v in values: rf.write("\t") rf.write(v) if len(values) == len(FIELDS): rf.write("\t".join(["-"] * len(FIELDS))) rf.write("\n") rf.close()
def combination(log, conf, rs, c, data_repo, results_path, conditions): cid = c["id"] ids = c["source/ids"] files = c["files"] results_url = data_repo.url(results_path) try: # prepare temporary path and files tmp_path = mkdtemp(prefix = "cnv_combination_") data_file = os.path.join(tmp_path, "data.tdm") columns_file = os.path.join(tmp_path, "columns.gmt") tmp_file = os.path.join(tmp_path, "tmp.tdm") log.debug("Temporary directory: {}".format(tmp_path)) # join files to combine in a single TDM file log.info("Joining files ...".format(files[0])) outpf = FileWriter(data_file) log.debug("\t{} ...".format(files[0])) repo, path = rs.from_url(files[0]) local_path = repo.get_local(path) ref_hdr = tdm.unflatten(local_path, outpf, row_column = "id", column_and_attr_func = lambda name: unflatten_filtered_names(name, ids[0])) #outpf.flush() #ref_hdr = tdm.read_header_names(data_file) repo.close_local(path) for i in xrange(1, len(files)): log.debug("\t{} ...".format(files[i])) repo, path = rs.from_url(files[i]) local_path = repo.get_local(path) hdr = tdm.unflatten(local_path, tmp_file, row_column = "id", column_and_attr_func = lambda name: unflatten_filtered_names(name, ids[i])) tdm.append(outpf, tmp_file, ref_hdr) repo.close_local(path) outpf.close() # prepare conditions columns file in GMT format outpf = FileWriter(columns_file) for cond in conditions: outpf.write(cond) outpf.write("\t\t") outpf.write("\t".join(["_".join((sid, cond)) for sid in ids])) outpf.write("\n") outpf.close() # run gitools-combination with data.tdm log.info("Running gitools combination ...") log.debug("\tData: {}".format(data_file)) log.debug("\tColumns: {}".format(columns_file)) gitools_combination_bin = os.path.join(conf["bin_paths.gitools"], "bin", "gitools-combination") cmd = " ".join([ gitools_combination_bin, "-N", cid, "-w", tmp_path, "-d", data_file, "-c", columns_file, "-pn", P_VALUE_FIELD, "-sn n", "-p 1", "-debug"]) log.debug(cmd) retcode = subprocess.call(args = cmd, shell = True) sys.stdout.write("\n") sys.stdout.flush() if retcode != 0: raise Exception("Combination exit code = {}".format(retcode)) # flatten results log.info("Flattening results into {} ...".format(results_url)) try: results_local_path = data_repo.create_local(results_path) tdm.flatten(os.path.join(tmp_path, cid + "-results.tdm.gz"), results_local_path, None, ["N", "z-score", "p-value"]) data_repo.put_local(results_local_path) except: data_repo.close_local(results_local_path) finally: shutil.rmtree(tmp_path)