def ReadTSV(filename): snvheaders = [_f for _f in """CHROM POS REF ALT""".split() if _f] base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': snvs = CSVFileTable(filename=filename) elif extn == 'tsv': snvs = TSVFileTable(filename=filename) elif extn == 'xls': snvs = XLSFileTable(filename=filename) elif extn == 'xlsx': snvs = XLSXFileTable(filename=filename) elif extn == 'txt': snvs = TXTFileTable(filename=filename, headers=snvheaders) else: raise RuntimeError("Unexpected SNV file extension: %s" % filename) for h in snvheaders: if h not in snvs.headers(): raise RuntimeError("Required header: %s missing from SNV file %s" % (h, filename)) assert (snvs.headers()[:4] == snvheaders) chrom = set() snvdata = [] for r in snvs: ri = list(map(r.get, snvs.headers())) chrom.add(ri[0]) snvdata.append(ri) return ["\t".join(snvs.headers())], chrom, snvdata
for filename in opt.snvs: base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': snvs = CSVFileTable(filename=filename) elif extn == 'vcf': snvs = VCFFile(filename=filename) elif extn == 'tsv': snvs = TSVFileTable(filename=filename) elif extn == 'xls': snvs = XLSFileTable(filename=filename) elif extn == 'xlsx': snvs = XLSXFileTable(filename=filename) elif extn == 'txt': snvs = TXTFileTable(filename=filename, headers=snvheaders) else: raise RuntimeError("Unexpected SNV file extension: %s" % filename) for h in snvheaders: if h not in snvs.headers(): raise RuntimeError("Required header: %s missing from SNV file %s" % (h, filename)) for h in snvs.headers(): if h in snvheaders: continue # if h not in extrasnvheaders: # extrasnvheaders.append(h) for r in snvs: chr = r[snvheaders[0]].strip()
vafmatrix = defaultdict(dict) for filename in opt.counts: base, extn = filename.rsplit('.', 1) extn = extn.lower() if extn == 'csv': counts = CSVFileTable(filename=filename) elif extn == 'vcf': counts = VCFFile(filename=filename) elif extn == 'tsv': counts = TSVFileTable(filename=filename) elif extn == 'xls': counts = XLSFileTable(filename=filename) elif extn == 'xlsx': counts = XLSXFileTable(filename=filename) elif extn == 'txt': counts = TXTFileTable(filename=filename, headers=txtheaders) else: raise RuntimeError("Unexpected ReadCounts file extension: %s" % filename) for h in headers: if h not in counts.headers(): raise RuntimeError( "Required header: %s missing from ReadCounts file %s" % (h, filename)) for r in counts: try: chr = str(int(float(r[headers[0]]))) except ValueError: chr = r[headers[0]].strip()