Пример #1
0
def ReadTSV(filename):
    snvheaders = [_f for _f in """CHROM POS REF ALT""".split() if _f]
    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snvs = CSVFileTable(filename=filename)
    elif extn == 'tsv':
        snvs = TSVFileTable(filename=filename)
    elif extn == 'xls':
        snvs = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        snvs = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        snvs = TXTFileTable(filename=filename, headers=snvheaders)
    else:
        raise RuntimeError("Unexpected SNV file extension: %s" % filename)

    for h in snvheaders:
        if h not in snvs.headers():
            raise RuntimeError("Required header: %s missing from SNV file %s" %
                               (h, filename))

    assert (snvs.headers()[:4] == snvheaders)

    chrom = set()
    snvdata = []
    for r in snvs:
        ri = list(map(r.get, snvs.headers()))
        chrom.add(ri[0])
        snvdata.append(ri)

    return ["\t".join(snvs.headers())], chrom, snvdata
Пример #2
0
for filename in opt.snvs:

    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snvs = CSVFileTable(filename=filename)
    elif extn == 'vcf':
        snvs = VCFFile(filename=filename)
    elif extn == 'tsv':
        snvs = TSVFileTable(filename=filename)
    elif extn == 'xls':
        snvs = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        snvs = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        snvs = TXTFileTable(filename=filename, headers=snvheaders)
    else:
        raise RuntimeError("Unexpected SNV file extension: %s" % filename)

    for h in snvheaders:
        if h not in snvs.headers():
            raise RuntimeError("Required header: %s missing from SNV file %s" %
                               (h, filename))

    for h in snvs.headers():
        if h in snvheaders:
            continue
        # if h not in extrasnvheaders:
        #     extrasnvheaders.append(h)
    for r in snvs:
        chr = r[snvheaders[0]].strip()
Пример #3
0
vafmatrix = defaultdict(dict)
for filename in opt.counts:
    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        counts = CSVFileTable(filename=filename)
    elif extn == 'vcf':
        counts = VCFFile(filename=filename)
    elif extn == 'tsv':
        counts = TSVFileTable(filename=filename)
    elif extn == 'xls':
        counts = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        counts = XLSXFileTable(filename=filename)
    elif extn == 'txt':
        counts = TXTFileTable(filename=filename, headers=txtheaders)
    else:
        raise RuntimeError("Unexpected ReadCounts file extension: %s" %
                           filename)

    for h in headers:
        if h not in counts.headers():
            raise RuntimeError(
                "Required header: %s missing from ReadCounts file %s" %
                (h, filename))

    for r in counts:
        try:
            chr = str(int(float(r[headers[0]])))
        except ValueError:
            chr = r[headers[0]].strip()