type="str", dest="tumortransre", default=r'TRNA', help="Tumor transcriptome filename regular expression. Default: TRNA.", remember=True, name="Tumor Transcr. RE") parser.add_option_group(regexs) opt, args = parser.parse_args() regex = {} regex["GDNA"] = opt.normaldnare regex["NRNA"] = opt.normaltransre regex["SDNA"] = opt.tumordnare regex["TRNA"] = opt.tumortransre progress = ProgressText() base = os.path.split(os.path.abspath(opt.counts))[0] TRNA = {} NRNA = {} GDNA = {} SDNA = {} from chromreg import ChromLabelRegistry chrreg = ChromLabelRegistry() labels = list(map(str, list(range(1, 100)))) + ["X", "Y", "MT"] chrreg.add_labels(opt.counts, labels) chrreg.default_chrom_order() chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts, l))
if 'exit' in error_kwargs: try: opt, args = parser.parse_args(opts=opt) except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True if opt.maxreads == None: opt.maxreads = 1e+20 progress = ProgressText(quiet=opt.quiet) from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile progress.stage("Read SNV data", len(opt.snvs)) snvheaders = filter(None, """ CHROM POS REF ALT """.split()) snvdata = {} # extrasnvheaders = [] # usedsnvheaders = set() snvchroms = defaultdict(set) for filename in opt.snvs: base, extn = filename.rsplit('.', 1)
opt = None while True: if 'exit' in error_kwargs: try: opt, args = parser.parse_args(opts=opt) except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) import pysam from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile progress.stage("Read SNP data", len(opt.snps)) snpheaders = filter(None, """ CHROM POS REF ALT """.split()) snpdata = {} extrasnpheaders = [] usedsnpheaders = set() for filename in opt.snps: base, extn = filename.rsplit('.', 1)
opt = None while True: if 'exit' in error_kwargs: try: opt, args = parser.parse_args(opts=opt) except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) from pysamimport import pysam from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile progress.stage("Read SNV data", len(opt.snvs)) snvheaders = [_f for _f in """ CHROM POS REF ALT """.split() if _f] snvdata = {} extrasnvheaders = [] usedsnvheaders = set() for filename in opt.snvs: base, extn = filename.rsplit('.', 1)
regexs.add_option("--tumordnare", type="str", dest="tumordnare", default=r'SDNA', help="Somatic/Tumor DNA filename regular expression. Default: SDNA.", remember=True, name="Somatic DNA RE") regexs.add_option("--tumortransre", type="str", dest="tumortransre", default=r'TRNA', help="Tumor transcriptome filename regular expression. Default: TRNA.", remember=True, name="Tumor Transcr. RE") parser.add_option_group(regexs) opt, args = parser.parse_args() regex = {} regex["GDNA"] = opt.normaldnare regex["NRNA"] = opt.normaltransre regex["SDNA"] = opt.tumordnare regex["TRNA"] = opt.tumortransre progress = ProgressText() base = os.path.split(os.path.abspath(opt.counts))[0] TRNA = {}; NRNA = {}; GDNA = {}; SDNA = {} from chromreg import ChromLabelRegistry chrreg = ChromLabelRegistry() labels = map(str,range(1,100)) + ["X","Y","MT"] chrreg.add_labels(opt.counts,labels) chrreg.default_chrom_order() chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts,l)) progress.stage("Parsing read-counts") f = open(opt.counts, 'r') reader = csv.DictReader(f, delimiter='\t')
break matrix = None if opt.matrix: if opt.matrix == "Ref:Var": matrix = (lambda d: "%(Ref)s:%(Var)s" % d) elif opt.matrix == "Ref;Var": matrix = (lambda d: "%(Ref)s;%(Var)s" % d) elif opt.matrix == "VAF": matrix = (lambda d: "%(VAF)s" % d) progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) doublequote = lambda s: '"%s"' % (s, ) indent = lambda s, n: "\n".join([(" " * n) + l for l in s.splitlines()]) args = [] args.extend(["-c", doublequote(" ".join(opt.counts))]) if matrix: args.extend(["-M", opt.matrix]) if opt.minreads != minreads_default: args.extend(["-m", str(opt.minreads)]) args.extend(["-o", doublequote(opt.output)]) if opt.quiet: args.extend(["-q"]) cmdargs = " ".join(args)
if 'exit' in error_kwargs: try: opt, args = parser.parse_args(opts=opt) except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break opts.mates = False progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) import pysam from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile progress.stage("Read SNP data", len(opt.snps)) snpheaders = [_f for _f in """ CHROM POS REF ALT """.split() if _f] snvdata = {} snvchroms = defaultdict(set) extrasnpheaders = [] usedsnpheaders = set() for filename in opt.snps: filename0 = filename
opt = None while True: if "exit" in error_kwargs: try: opt, args = parser.parse_args(opts=opt) except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) from pysamimport import pysam from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile progress.stage("Read SNV data", len(opt.snvs)) snvheaders = filter( None, """ CHROM POS REF ALT """.split(), ) snvdata = {} extrasnvheaders = [] usedsnvheaders = set()
opt = None while True: if 'exit' in error_kwargs: try: opt, args = parser.parse_args(opts=opt) except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) sumkeys = [ _f for _f in map( str.strip, """ SNPJuncIntronCount SNPJuncNoIntronCount NoSNPJuncIntronCount NoSNPJuncNoIntronCount SNPMateCount NoSNPMateCount SNPCount NoSNPCount MatesCount NotMatesCount IntronCount NoIntronCount SpanningReads RemovedDuplicateReads SNPLociReads""" .split()) if _f ] countdata = defaultdict(dict) progress.stage("Read SNP/Junction counts") from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable countheaders = None for filename in opt.counts: base, extn = filename.rsplit('.', 1) path, base = os.path.split(base) extn = extn.lower()
except ValueError: parser.error("Bad Max. Read option", **error_kwargs) continue break readfilter = filterFactory.get(opt.filter) if opt.readgroup: readgroup = groupFactory.get(opt.readgroup) else: readgroup = None progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) doublequote = lambda s: '"%s"' % (s, ) indent = lambda s, n: "\n".join([(" " * n) + l for l in s.splitlines()]) args = [] args.extend(["-s", doublequote(" ".join(opt.snvs))]) args.extend(["-r", doublequote(" ".join(opt.alignments))]) if opt.filter != filter_default: args.extend(["-f", doublequote(opt.filter)]) if opt.minreads != minreads_default: args.extend(["-m", str(opt.minreads)]) if opt.maxreads != maxreads_default: args.extend(["-M", str(opt.maxreads)]) if opt.readgroup != readgroup_default: args.extend(
opt = None while True: if 'exit' in error_kwargs: try: opt, args = parser.parse_args(opts=opt) except UserCancelledError: sys.exit(0) else: opt, args = parser.parse_args() break progress = None if not opt.output: opt.quiet = True progress = ProgressText(quiet=opt.quiet) sumkeys = filter(None, map(str.strip, """ SNPJuncIntronCount SNPJuncNoIntronCount NoSNPJuncIntronCount NoSNPJuncNoIntronCount SNPMateCount NoSNPMateCount SNPCount NoSNPCount MatesCount NotMatesCount IntronCount NoIntronCount SpanningReads RemovedDuplicateReads SNPLociReads""".split())) countdata = defaultdict(dict) progress.stage("Read SNP/Junction counts") from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable countheaders = None for filename in opt.counts: base, extn = filename.rsplit('.', 1) path, base = os.path.split(base) extn = extn.lower() if extn == 'csv': counts = CSVFileTable(filename=filename) elif extn == 'tsv': counts = TSVFileTable(filename=filename)