def main(args=None): """ 1. get read counts at different positions either all of same length or from genomic regions from the BED file 2. compute the scores """ args = process_args(args) if 'BED' in args: bed_regions = args.BED else: bed_regions = None if len(args.bwfiles) == 1 and not args.outRawCounts: sys.stderr.write("You've input a single bigWig file and not specified " "--outRawCounts. The resulting output will NOT be " "useful with any deepTools program!\n") # Preload deepBlue files, which need to then be deleted deepBlueFiles = [] for idx, fname in enumerate(args.bwfiles): if db.isDeepBlue(fname): deepBlueFiles.append([fname, idx]) if len(deepBlueFiles) > 0: sys.stderr.write( "Preloading the following deepBlue files: {}\n".format(",".join( [x[0] for x in deepBlueFiles]))) if 'BED' in args: regs = db.makeRegions(args.BED, args) else: foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) regs = db.makeTiles(foo, args) del foo for x in deepBlueFiles: x.extend([args, regs]) if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: pool = multiprocessing.Pool(args.numberOfProcessors) res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) else: res = list(map(db.preloadWrapper, deepBlueFiles)) # substitute the file names with the temp files for (ftuple, r) in zip(deepBlueFiles, res): args.bwfiles[ftuple[1]] = r deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] del regs num_reads_per_bin = score_bw.getScorePerBin( args.bwfiles, args.binSize, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, stepSize=args.binSize + args.distanceBetweenBins, verbose=args.verbose, region=args.region, bedFile=bed_regions, chrsToSkip=args.chromosomesToSkip, out_file_for_raw_data=args.outRawCounts, allArgs=args) sys.stderr.write("Number of bins " "found: {}\n".format(num_reads_per_bin.shape[0])) if num_reads_per_bin.shape[0] < 2: exit("ERROR: too few non zero bins found.\n" "If using --region please check that this " "region is covered by reads.\n") f = open(args.outFileName, "wb") np.savez_compressed(f, matrix=num_reads_per_bin, labels=args.labels) f.close() if args.outRawCounts: # append to the generated file the # labels header = "#'chr'\t'start'\t'end'\t" header += "'" + "'\t'".join(args.labels) + "'\n" f = open(args.outRawCounts, "r+") content = f.read() f.seek(0, 0) f.write(header + content) """ if bed_regions: bed_regions.seek(0) reg_list = bed_regions.readlines() args.outRawCounts.write("#'chr'\t'start'\t'end'\t") args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n") fmt = "\t".join(np.repeat('%s', num_reads_per_bin.shape[1])) + "\n" for idx, row in enumerate(num_reads_per_bin): args.outRawCounts.write("{}\t{}\t{}\t".format(*reg_list[idx].strip().split("\t")[0:3])) args.outRawCounts.write(fmt % tuple(row)) else: args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n") fmt = "\t".join(np.repeat('{}', num_reads_per_bin.shape[1])) + "\n" for row in num_reads_per_bin: args.outRawCounts.write(fmt.format(*tuple(row))) """ f.close() # Clean up temporary bigWig files, if applicable if not args.deepBlueKeepTemp: for k, v in deepBlueFiles: os.remove(args.bwfiles[v]) else: for k, v in deepBlueFiles: print("{} is stored in {}".format(k, args.bwfiles[v]))
def main(args=None): args = parse_arguments().parse_args(args) if args.scaleFactors: scaleFactors = [float(x) for x in args.scaleFactors.split(":")] else: scaleFactors = [1, 1] # the getRatio function is called and receives # the function_args per each tile that is considered FUNC = getRatio function_args = {'valueType': args.ratio, 'scaleFactors': scaleFactors, 'pseudocount': args.pseudocount} # Preload deepBlue files, which need to then be deleted deepBlueFiles = [] for idx, fname in enumerate([args.bigwig1, args.bigwig2]): if db.isDeepBlue(fname): deepBlueFiles.append([fname, idx]) if len(deepBlueFiles) > 0: sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) regs = db.makeChromTiles(foo) for x in deepBlueFiles: x.extend([args, regs]) if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: pool = multiprocessing.Pool(args.numberOfProcessors) res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) else: res = list(map(db.preloadWrapper, deepBlueFiles)) # substitute the file names with the temp files for (ftuple, r) in zip(deepBlueFiles, res): if ftuple[1] == 0: args.bigwig1 = r else: args.bigwig2 = r deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] del regs writeBedGraph_bam_and_bw.writeBedGraph( [(args.bigwig1, getType(args.bigwig1)), (args.bigwig2, getType(args.bigwig2))], args.outFileName, 0, FUNC, function_args, tileSize=args.binSize, region=args.region, blackListFileName=args.blackListFileName, verbose=args.verbose, numberOfProcessors=args.numberOfProcessors, format=args.outFileFormat, smoothLength=False, missingDataAsZero=not args.skipNonCoveredRegions, extendPairedEnds=False) # Clean up temporary bigWig files, if applicable if not args.deepBlueKeepTemp: for k, v in deepBlueFiles: if v == 0: os.remove(args.bigwig1) else: os.remove(args.bigwig2) else: for k, v in deepBlueFiles: foo = args.bigwig1 if v == 1: foo = args.bigwig2 print("{} is stored in {}".format(k, foo))
def main(args=None): """ 1. get read counts at different positions either all of same length or from genomic regions from the BED file 2. compute the scores """ args = process_args(args) if 'BED' in args: bed_regions = args.BED else: bed_regions = None if len(args.bwfiles) == 1 and not args.outRawCounts: sys.stderr.write("You've input a single bigWig file and not specified " "--outRawCounts. The resulting output will NOT be " "useful with any deepTools program!\n") # Preload deepBlue files, which need to then be deleted deepBlueFiles = [] for idx, fname in enumerate(args.bwfiles): if db.isDeepBlue(fname): deepBlueFiles.append([fname, idx]) if len(deepBlueFiles) > 0: sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) if 'BED' in args: regs = db.makeRegions(args.BED, args) else: foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) regs = db.makeTiles(foo, args) del foo for x in deepBlueFiles: x.extend([args, regs]) if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: pool = multiprocessing.Pool(args.numberOfProcessors) res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) else: res = list(map(db.preloadWrapper, deepBlueFiles)) # substitute the file names with the temp files for (ftuple, r) in zip(deepBlueFiles, res): args.bwfiles[ftuple[1]] = r deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] del regs num_reads_per_bin = score_bw.getScorePerBin( args.bwfiles, args.binSize, blackListFileName=args.blackListFileName, numberOfProcessors=args.numberOfProcessors, stepSize=args.binSize + args.distanceBetweenBins, verbose=args.verbose, region=args.region, bedFile=bed_regions, chrsToSkip=args.chromosomesToSkip, out_file_for_raw_data=args.outRawCounts, allArgs=args) sys.stderr.write("Number of bins " "found: {}\n".format(num_reads_per_bin.shape[0])) if num_reads_per_bin.shape[0] < 2: exit("ERROR: too few non zero bins found.\n" "If using --region please check that this " "region is covered by reads.\n") f = open(args.outFileName, "wb") np.savez_compressed(f, matrix=num_reads_per_bin, labels=args.labels) f.close() if args.outRawCounts: # append to the generated file the # labels header = "#'chr'\t'start'\t'end'\t" header += "'" + "'\t'".join(args.labels) + "'\n" f = open(args.outRawCounts, "r+") content = f.read() f.seek(0, 0) f.write(header + content) """ if bed_regions: bed_regions.seek(0) reg_list = bed_regions.readlines() args.outRawCounts.write("#'chr'\t'start'\t'end'\t") args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n") fmt = "\t".join(np.repeat('%s', num_reads_per_bin.shape[1])) + "\n" for idx, row in enumerate(num_reads_per_bin): args.outRawCounts.write("{}\t{}\t{}\t".format(*reg_list[idx].strip().split("\t")[0:3])) args.outRawCounts.write(fmt % tuple(row)) else: args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n") fmt = "\t".join(np.repeat('{}', num_reads_per_bin.shape[1])) + "\n" for row in num_reads_per_bin: args.outRawCounts.write(fmt.format(*tuple(row))) """ f.close() # Clean up temporary bigWig files, if applicable if not args.deepBlueKeepTemp: for k, v in deepBlueFiles: os.remove(args.bwfiles[v]) else: for k, v in deepBlueFiles: print("{} is stored in {}".format(k, args.bwfiles[v]))