def load_deepblue_files(regionsFileName, scoreFileName):
    deepBlueFiles = []
    for idx, fname in enumerate(scoreFileName):
        if db.isDeepBlue(fname):
            deepBlueFiles.append([fname, idx])
    if len(deepBlueFiles) > 0:
        sys.stderr.write(
            "Preloading the following deepBlue files: {}\n".format(",".join(
                [x[0] for x in deepBlueFiles])))
        regs = db.makeRegions(regionsFileName, args)
        for x in deepBlueFiles:
            x.extend([args, regs])
        if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1:
            pool = multiprocessing.Pool(args.numberOfProcessors)
            res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999)
        else:
            res = list(map(db.preloadWrapper, deepBlueFiles))

        # substitute the file names with the temp files
        for (ftuple, r) in zip(deepBlueFiles, res):
            scoreFileName[ftuple[1]] = r
        deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles]
        del regs
    return deepBlueFiles
Exemplo n.º 2
0
def main(args=None):
    """
    1. get read counts at different positions either
    all of same length or from genomic regions from the BED file

    2. compute  the scores

    """
    args = process_args(args)

    if 'BED' in args:
        bed_regions = args.BED
    else:
        bed_regions = None

    if len(args.bwfiles) == 1 and not args.outRawCounts:
        sys.stderr.write("You've input a single bigWig file and not specified "
                         "--outRawCounts. The resulting output will NOT be "
                         "useful with any deepTools program!\n")

    # Preload deepBlue files, which need to then be deleted
    deepBlueFiles = []
    for idx, fname in enumerate(args.bwfiles):
        if db.isDeepBlue(fname):
            deepBlueFiles.append([fname, idx])
    if len(deepBlueFiles) > 0:
        sys.stderr.write(
            "Preloading the following deepBlue files: {}\n".format(",".join(
                [x[0] for x in deepBlueFiles])))
        if 'BED' in args:
            regs = db.makeRegions(args.BED, args)
        else:
            foo = db.deepBlue(deepBlueFiles[0][0],
                              url=args.deepBlueURL,
                              userKey=args.userKey)
            regs = db.makeTiles(foo, args)
            del foo
        for x in deepBlueFiles:
            x.extend([args, regs])
        if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1:
            pool = multiprocessing.Pool(args.numberOfProcessors)
            res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999)
        else:
            res = list(map(db.preloadWrapper, deepBlueFiles))

        # substitute the file names with the temp files
        for (ftuple, r) in zip(deepBlueFiles, res):
            args.bwfiles[ftuple[1]] = r
        deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles]
        del regs

    num_reads_per_bin = score_bw.getScorePerBin(
        args.bwfiles,
        args.binSize,
        blackListFileName=args.blackListFileName,
        numberOfProcessors=args.numberOfProcessors,
        stepSize=args.binSize + args.distanceBetweenBins,
        verbose=args.verbose,
        region=args.region,
        bedFile=bed_regions,
        chrsToSkip=args.chromosomesToSkip,
        out_file_for_raw_data=args.outRawCounts,
        allArgs=args)

    sys.stderr.write("Number of bins "
                     "found: {}\n".format(num_reads_per_bin.shape[0]))

    if num_reads_per_bin.shape[0] < 2:
        exit("ERROR: too few non zero bins found.\n"
             "If using --region please check that this "
             "region is covered by reads.\n")

    f = open(args.outFileName, "wb")
    np.savez_compressed(f, matrix=num_reads_per_bin, labels=args.labels)
    f.close()

    if args.outRawCounts:
        # append to the generated file the
        # labels
        header = "#'chr'\t'start'\t'end'\t"
        header += "'" + "'\t'".join(args.labels) + "'\n"
        f = open(args.outRawCounts, "r+")
        content = f.read()
        f.seek(0, 0)
        f.write(header + content)
        """
        if bed_regions:
            bed_regions.seek(0)
            reg_list = bed_regions.readlines()
            args.outRawCounts.write("#'chr'\t'start'\t'end'\t")
            args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n")
            fmt = "\t".join(np.repeat('%s', num_reads_per_bin.shape[1])) + "\n"
            for idx, row in enumerate(num_reads_per_bin):
                args.outRawCounts.write("{}\t{}\t{}\t".format(*reg_list[idx].strip().split("\t")[0:3]))
                args.outRawCounts.write(fmt % tuple(row))

        else:
            args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n")
            fmt = "\t".join(np.repeat('{}', num_reads_per_bin.shape[1])) + "\n"
            for row in num_reads_per_bin:
                args.outRawCounts.write(fmt.format(*tuple(row)))
        """
        f.close()

    # Clean up temporary bigWig files, if applicable
    if not args.deepBlueKeepTemp:
        for k, v in deepBlueFiles:
            os.remove(args.bwfiles[v])
    else:
        for k, v in deepBlueFiles:
            print("{} is stored in {}".format(k, args.bwfiles[v]))
Exemplo n.º 3
0
def main(args=None):
    args = parse_arguments().parse_args(args)

    if args.scaleFactors:
        scaleFactors = [float(x) for x in args.scaleFactors.split(":")]
    else:
        scaleFactors = [1, 1]

    # the getRatio function is called and receives
    # the function_args per each tile that is considered
    FUNC = getRatio
    function_args = {'valueType': args.ratio,
                     'scaleFactors': scaleFactors,
                     'pseudocount': args.pseudocount}

    # Preload deepBlue files, which need to then be deleted
    deepBlueFiles = []
    for idx, fname in enumerate([args.bigwig1, args.bigwig2]):
        if db.isDeepBlue(fname):
            deepBlueFiles.append([fname, idx])
    if len(deepBlueFiles) > 0:
        sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles])))
        foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey)
        regs = db.makeChromTiles(foo)
        for x in deepBlueFiles:
            x.extend([args, regs])
        if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1:
            pool = multiprocessing.Pool(args.numberOfProcessors)
            res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999)
        else:
            res = list(map(db.preloadWrapper, deepBlueFiles))

        # substitute the file names with the temp files
        for (ftuple, r) in zip(deepBlueFiles, res):
            if ftuple[1] == 0:
                args.bigwig1 = r
            else:
                args.bigwig2 = r
        deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles]
        del regs

    writeBedGraph_bam_and_bw.writeBedGraph(
        [(args.bigwig1, getType(args.bigwig1)),
         (args.bigwig2, getType(args.bigwig2))],
        args.outFileName, 0, FUNC,
        function_args, tileSize=args.binSize, region=args.region,
        blackListFileName=args.blackListFileName,
        verbose=args.verbose,
        numberOfProcessors=args.numberOfProcessors,
        format=args.outFileFormat,
        smoothLength=False,
        missingDataAsZero=not args.skipNonCoveredRegions,
        extendPairedEnds=False)

    # Clean up temporary bigWig files, if applicable
    if not args.deepBlueKeepTemp:
        for k, v in deepBlueFiles:
            if v == 0:
                os.remove(args.bigwig1)
            else:
                os.remove(args.bigwig2)
    else:
        for k, v in deepBlueFiles:
            foo = args.bigwig1
            if v == 1:
                foo = args.bigwig2
            print("{} is stored in {}".format(k, foo))
def main(args=None):

    args = process_args(args)

    parameters = {'upstream': args.beforeRegionStartLength,
                  'downstream': args.afterRegionStartLength,
                  'body': args.regionBodyLength,
                  'bin size': args.binSize,
                  'ref point': args.referencePoint,
                  'verbose': args.verbose,
                  'bin avg type': args.averageTypeBins,
                  'missing data as zero': args.missingDataAsZero,
                  'min threshold': args.minThreshold,
                  'max threshold': args.maxThreshold,
                  'scale': args.scale,
                  'skip zeros': args.skipZeros,
                  'nan after end': args.nanAfterEnd,
                  'proc number': args.numberOfProcessors,
                  'sort regions': args.sortRegions,
                  'sort using': args.sortUsing,
                  'unscaled 5 prime': args.unscaled5prime,
                  'unscaled 3 prime': args.unscaled3prime
                  }

    hm = heatmapper.heatmapper()

    # Preload deepBlue files, which need to then be deleted
    deepBlueFiles = []
    for idx, fname in enumerate(args.scoreFileName):
        if db.isDeepBlue(fname):
            deepBlueFiles.append([fname, idx])
    if len(deepBlueFiles) > 0:
        sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles])))
        regs = db.makeRegions(args.regionsFileName, args)
        for x in deepBlueFiles:
            x.extend([args, regs])
        if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1:
            pool = multiprocessing.Pool(args.numberOfProcessors)
            res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999)
        else:
            res = list(map(db.preloadWrapper, deepBlueFiles))

        # substitute the file names with the temp files
        for (ftuple, r) in zip(deepBlueFiles, res):
            args.scoreFileName[ftuple[1]] = r
        deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles]
        del regs

    scores_file_list = args.scoreFileName
    hm.computeMatrix(scores_file_list, args.regionsFileName, parameters, blackListFileName=args.blackListFileName, verbose=args.verbose, allArgs=args)
    if args.sortRegions not in ['no', 'keep']:
        sortUsingSamples = []
        if args.sortUsingSamples is not None:
            for i in args.sortUsingSamples:
                if (i > 0 and i <= hm.matrix.get_num_samples()):
                    sortUsingSamples.append(i - 1)
                else:
                    exit("The value {0} for --sortUsingSamples is not valid. Only values from 1 to {1} are allowed.".format(args.sortUsingSamples, hm.matrix.get_num_samples()))
            print('Samples used for ordering within each group: ', sortUsingSamples)

        hm.matrix.sort_groups(sort_using=args.sortUsing, sort_method=args.sortRegions, sample_list=sortUsingSamples)
    elif args.sortRegions == 'keep':
        hm.parameters['group_labels'] = hm.matrix.group_labels
        hm.parameters["group_boundaries"] = hm.matrix.group_boundaries
        cmo.sortMatrix(hm, args.regionsFileName, args.transcriptID, args.transcript_id_designator, verbose=not args.quiet)

    hm.save_matrix(args.outFileName)

    if args.outFileNameMatrix:
        hm.save_matrix_values(args.outFileNameMatrix)

    if args.outFileSortedRegions:
        hm.save_BED(args.outFileSortedRegions)

    # Clean up temporary bigWig files, if applicable
    if not args.deepBlueKeepTemp:
        for k, v in deepBlueFiles:
            os.remove(args.scoreFileName[v])
    else:
        for k, v in deepBlueFiles:
            print("{} is stored in {}".format(k, args.scoreFileName[v]))
Exemplo n.º 5
0
def main(args=None):
    args = parse_arguments().parse_args(args)

    if args.scaleFactors:
        scaleFactors = [float(x) for x in args.scaleFactors.split(":")]
    else:
        scaleFactors = [1, 1]

    # the getRatio function is called and receives
    # the function_args per each tile that is considered
    FUNC = getRatio
    function_args = {'valueType': args.ratio,
                     'scaleFactors': scaleFactors,
                     'pseudocount': args.pseudocount}

    # Preload deepBlue files, which need to then be deleted
    deepBlueFiles = []
    for idx, fname in enumerate([args.bigwig1, args.bigwig2]):
        if db.isDeepBlue(fname):
            deepBlueFiles.append([fname, idx])
    if len(deepBlueFiles) > 0:
        sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles])))
        foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey)
        regs = db.makeChromTiles(foo)
        for x in deepBlueFiles:
            x.extend([args, regs])
        if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1:
            pool = multiprocessing.Pool(args.numberOfProcessors)
            res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999)
        else:
            res = list(map(db.preloadWrapper, deepBlueFiles))

        # substitute the file names with the temp files
        for (ftuple, r) in zip(deepBlueFiles, res):
            if ftuple[1] == 0:
                args.bigwig1 = r
            else:
                args.bigwig2 = r
        deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles]
        del regs

    writeBedGraph_bam_and_bw.writeBedGraph(
        [(args.bigwig1, getType(args.bigwig1)),
         (args.bigwig2, getType(args.bigwig2))],
        args.outFileName, 0, FUNC,
        function_args, tileSize=args.binSize, region=args.region,
        blackListFileName=args.blackListFileName,
        verbose=args.verbose,
        numberOfProcessors=args.numberOfProcessors,
        format=args.outFileFormat,
        smoothLength=False,
        missingDataAsZero=not args.skipNonCoveredRegions,
        extendPairedEnds=False)

    # Clean up temporary bigWig files, if applicable
    if not args.deepBlueKeepTemp:
        for k, v in deepBlueFiles:
            if v == 0:
                os.remove(args.bigwig1)
            else:
                os.remove(args.bigwig2)
    else:
        for k, v in deepBlueFiles:
            foo = args.bigwig1
            if v == 1:
                foo = args.bigwig2
            print("{} is stored in {}".format(k, foo))
def main(args=None):
    """
    1. get read counts at different positions either
    all of same length or from genomic regions from the BED file

    2. compute  the scores

    """
    args = process_args(args)

    if 'BED' in args:
        bed_regions = args.BED
    else:
        bed_regions = None

    if len(args.bwfiles) == 1 and not args.outRawCounts:
        sys.stderr.write("You've input a single bigWig file and not specified "
                         "--outRawCounts. The resulting output will NOT be "
                         "useful with any deepTools program!\n")

    # Preload deepBlue files, which need to then be deleted
    deepBlueFiles = []
    for idx, fname in enumerate(args.bwfiles):
        if db.isDeepBlue(fname):
            deepBlueFiles.append([fname, idx])
    if len(deepBlueFiles) > 0:
        sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles])))
        if 'BED' in args:
            regs = db.makeRegions(args.BED, args)
        else:
            foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey)
            regs = db.makeTiles(foo, args)
            del foo
        for x in deepBlueFiles:
            x.extend([args, regs])
        if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1:
            pool = multiprocessing.Pool(args.numberOfProcessors)
            res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999)
        else:
            res = list(map(db.preloadWrapper, deepBlueFiles))

        # substitute the file names with the temp files
        for (ftuple, r) in zip(deepBlueFiles, res):
            args.bwfiles[ftuple[1]] = r
        deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles]
        del regs

    num_reads_per_bin = score_bw.getScorePerBin(
        args.bwfiles,
        args.binSize,
        blackListFileName=args.blackListFileName,
        numberOfProcessors=args.numberOfProcessors,
        stepSize=args.binSize + args.distanceBetweenBins,
        verbose=args.verbose,
        region=args.region,
        bedFile=bed_regions,
        chrsToSkip=args.chromosomesToSkip,
        out_file_for_raw_data=args.outRawCounts,
        allArgs=args)

    sys.stderr.write("Number of bins "
                     "found: {}\n".format(num_reads_per_bin.shape[0]))

    if num_reads_per_bin.shape[0] < 2:
        exit("ERROR: too few non zero bins found.\n"
             "If using --region please check that this "
             "region is covered by reads.\n")

    f = open(args.outFileName, "wb")
    np.savez_compressed(f,
                        matrix=num_reads_per_bin,
                        labels=args.labels)
    f.close()

    if args.outRawCounts:
        # append to the generated file the
        # labels
        header = "#'chr'\t'start'\t'end'\t"
        header += "'" + "'\t'".join(args.labels) + "'\n"
        f = open(args.outRawCounts, "r+")
        content = f.read()
        f.seek(0, 0)
        f.write(header + content)

        """
        if bed_regions:
            bed_regions.seek(0)
            reg_list = bed_regions.readlines()
            args.outRawCounts.write("#'chr'\t'start'\t'end'\t")
            args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n")
            fmt = "\t".join(np.repeat('%s', num_reads_per_bin.shape[1])) + "\n"
            for idx, row in enumerate(num_reads_per_bin):
                args.outRawCounts.write("{}\t{}\t{}\t".format(*reg_list[idx].strip().split("\t")[0:3]))
                args.outRawCounts.write(fmt % tuple(row))

        else:
            args.outRawCounts.write("'" + "'\t'".join(args.labels) + "'\n")
            fmt = "\t".join(np.repeat('{}', num_reads_per_bin.shape[1])) + "\n"
            for row in num_reads_per_bin:
                args.outRawCounts.write(fmt.format(*tuple(row)))
        """
        f.close()

    # Clean up temporary bigWig files, if applicable
    if not args.deepBlueKeepTemp:
        for k, v in deepBlueFiles:
            os.remove(args.bwfiles[v])
    else:
        for k, v in deepBlueFiles:
            print("{} is stored in {}".format(k, args.bwfiles[v]))