Exemplo n.º 1
0
regex["NRNA"] = opt.normaltransre
regex["SDNA"] = opt.tumordnare
regex["TRNA"] = opt.tumortransre

progress = ProgressText()

base = os.path.split(os.path.abspath(opt.counts))[0]

TRNA = {}
NRNA = {}
GDNA = {}
SDNA = {}

from chromreg import ChromLabelRegistry

chrreg = ChromLabelRegistry()
labels = list(map(str, list(range(1, 100)))) + ["X", "Y", "MT"]
chrreg.add_labels(opt.counts, labels)
chrreg.default_chrom_order()
chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts, l))

progress.stage("Parsing read-counts")
f = open(opt.counts, mode='rt', encoding='utf8')
reader = csv.DictReader(f, delimiter='\t')
types2files = defaultdict(set)
files2types = defaultdict(set)
for row in reader:
    key = (row['CHROM'], row['POS'])
    filename = row['AlignedReads']
    for k in row:
        if k.endswith('Count') and row[k] != "":
Exemplo n.º 2
0
        if r.get('INFO:INDEL'):
            continue
        if len(ref) != 1:
            continue
        if not re.search(r'^[ACGT](,[ACGT])*$', alt):
            continue
    # for h in r:
    #     if r.get(h):
    #         usedsnvheaders.add(h)
        snvkey = (filename, chr, locus, ref, alt)
        if snvkey not in snvdata:
            snvdata[snvkey] = r
    progress.update()
progress.done()

chrreg = ChromLabelRegistry()

for snvfile in snvchroms:
    chrreg.add_labels(snvfile, snvchroms[snvfile])

snvdata1 = {}
for (sf, chr, locus, ref, alt), r in snvdata.iteritems():
    chrom = chrreg.label2chrom(sf, chr)
    assert (chrom)
    snvkey = (chrom, locus, ref, alt)
    if snvkey not in snvdata1:
        snvdata1[snvkey] = (chrom, locus, ref, alt, r)

for bamfile in opt.alignments:
    chrreg.add_bamlabels(bamfile)
Exemplo n.º 3
0
if outfile.endswith('.vcf'):
    assert (filename.endswith('.vcf'))
if not filename.endswith('.vcf'):
    assert (outfile.endswith('.tsv'))

print("Reading", filename, "...", end=' ')
sys.stdout.flush()

if filename.rsplit('.', 1)[-1].lower() == 'vcf':
    fileheader, chrlab, snvdata = ReadVCF(filename)
else:
    fileheader, chrlab, snvdata = ReadTSV(filename)

from chromreg import ChromLabelRegistry

chrreg = ChromLabelRegistry()
chrreg.add_labels(filename, chrlab)

for i in range(len(snvdata) - 1, -1, -1):
    chrlab = snvdata[i][0]
    chrom = chrreg.label2chrom(filename, chrlab)
    if not chrreg.isnumberedchrom(chrom) and \
       not chrreg.issexchrom(chrom):
        del snvdata[i]
        continue
    snvdata[i][0] = chrom
    snvdata[i][1] = int(snvdata[i][1])

# Expected chromosome labels from UCSC file...
# Extra numeric chromosomes might make this more robust for organisms
# with more chromosomes? Doesn't hurt normal (human) case either way.
Exemplo n.º 4
0
            continue
        if len(ref) != 1:
            continue
        if not re.search(r'^[ACGT](,[ACGT])*$', alt):
            continue
        # for h in r:
        #     if r.get(h):
        #         usedsnvheaders.add(h)
        snvkey = (filename, chr, locus, ref, alt)
        if snvkey not in snvdata:
            snvdata[snvkey] = r

    progress.update()
progress.done()

chrreg = ChromLabelRegistry()

for snvfile in snvchroms:
    chrreg.add_labels(snvfile,snvchroms[snvfile])

snvdata1 = {}
for (sf, chr, locus, ref, alt), r in snvdata.iteritems():
    chrom = chrreg.label2chrom(sf,chr)
    assert(chrom)
    snvkey = (chrom,locus,ref,alt)
    if snvkey not in snvdata1:
        snvdata1[snvkey] = (chrom,locus,ref,alt,r)

for bamfile in opt.alignments:
    chrreg.add_bamlabels(bamfile)
Exemplo n.º 5
0
opt, args = parser.parse_args()
regex = {}
regex["GDNA"] = opt.normaldnare
regex["NRNA"] = opt.normaltransre
regex["SDNA"] = opt.tumordnare
regex["TRNA"] = opt.tumortransre

progress = ProgressText()

base = os.path.split(os.path.abspath(opt.counts))[0]

TRNA = {}; NRNA = {}; GDNA = {}; SDNA = {}

from chromreg import ChromLabelRegistry
chrreg = ChromLabelRegistry()
labels = map(str,range(1,100)) + ["X","Y","MT"]
chrreg.add_labels(opt.counts,labels)
chrreg.default_chrom_order()
chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts,l))

progress.stage("Parsing read-counts")
f = open(opt.counts, 'r')
reader = csv.DictReader(f, delimiter='\t')
types2files = defaultdict(set)
files2types = defaultdict(set)
for row in reader:
    key = (row['CHROM'],row['POS'])
    filename = row['AlignedReads']
    for k in row:
        if k.endswith('Count') and row[k] != "":