Exemplo n.º 1
0
        types2files["GDNA"].add(filename)
        files2types[filename].add("GDNA")
    if re.search(regex["NRNA"], filename) and key not in NRNA:
        NRNA[key] = row
        types2files["NRNA"].add(filename)
        files2types[filename].add("NRNA")
    if re.search(regex["SDNA"], filename) and key not in SDNA:
        SDNA[key] = row
        types2files["SDNA"].add(filename)
        files2types[filename].add("SDNA")
    if re.search(regex["TRNA"], filename) and key not in TRNA:
        TRNA[key] = row
        types2files["TRNA"].add(filename)
        files2types[filename].add("TRNA")
f.close()
progress.done()

if sum(map(len, [GDNA, SDNA, NRNA, TRNA])) == 0:
    print("No read counts available for testing", file=sys.stderr)
    sys.exit(0)

fatal = False
for f in files2types:
    if len(files2types[f]) < 1:
        print("Filename %s does not match any read type regular expression." %
              (f, ),
              file=sys.stderr)
        fatal = True
    elif len(files2types[f]) > 1:
        print(
            "Filename %s matches more than one read type regular expression." %
Exemplo n.º 2
0
        alt = r[snvheaders[3]].strip()
        if r.get('INFO:INDEL'):
            continue
        if len(ref) != 1:
            continue
        if not re.search(r'^[ACGT](,[ACGT])*$', alt):
            continue
        # for h in r:
        #     if r.get(h):
        #         usedsnvheaders.add(h)
        snvkey = (filename, chr, locus, ref, alt)
        if snvkey not in snvdata:
            snvdata[snvkey] = r

    progress.update()
progress.done()

chrreg = ChromLabelRegistry()

for snvfile in snvchroms:
    chrreg.add_labels(snvfile,snvchroms[snvfile])

snvdata1 = {}
for (sf, chr, locus, ref, alt), r in snvdata.iteritems():
    chrom = chrreg.label2chrom(sf,chr)
    assert(chrom)
    snvkey = (chrom,locus,ref,alt)
    if snvkey not in snvdata1:
        snvdata1[snvkey] = (chrom,locus,ref,alt,r)

for bamfile in opt.alignments: