Пример #1
0
def test_build_matrix_cooler():
    outfile = NamedTemporaryFile(suffix='.cool', delete=False)
    outfile.close()
    qc_folder = mkdtemp(prefix="testQC_")
    args = "-s {} {} --outFileName {} -bs 5000 -b /tmp/test.bam --QCfolder {} --threads 4".format(
        sam_R1, sam_R2, outfile.name, qc_folder).split()
    hicBuildMatrix.main(args)

    test = hm.hiCMatrix(ROOT + "small_test_matrix_parallel.h5")
    new = hm.hiCMatrix(outfile.name)

    nt.assert_equal(test.matrix.data, new.matrix.data)
    # nt.assert_equal(test.cut_intervals, new.cut_intervals)
    nt.assert_equal(len(new.cut_intervals), len(test.cut_intervals))
    cut_interval_new_ = []
    cut_interval_test_ = []
    for x in new.cut_intervals:
        cut_interval_new_.append(x[:3])
    for x in test.cut_intervals:
        cut_interval_test_.append(x[:3])

    nt.assert_equal(cut_interval_new_, cut_interval_test_)
    # print(set(os.listdir(ROOT + "QC/")))
    assert are_files_equal(ROOT + "QC/QC.log", qc_folder + "/QC.log")
    assert set(os.listdir(ROOT + "QC/")) == set(os.listdir(qc_folder))

    os.unlink(outfile.name)
    shutil.rmtree(qc_folder)
Пример #2
0
def test_build_matrix_rf():
    outfile = NamedTemporaryFile(suffix='.h5', delete=False)
    outfile.close()
    qc_folder = mkdtemp(prefix="testQC_")
    args = "-s {} {} -rs {} --outFileName {}  --QCfolder {} " \
           "--restrictionSequence GATC " \
           "--danglingSequence GATC " \
           "--minDistance 150 " \
           "--maxLibraryInsertSize 1500 --threads 4".format(sam_R1, sam_R2, dpnii_file,
                                                            outfile.name,
                                                            qc_folder).split()
    hicBuildMatrix.main(args)

    test = hm.hiCMatrix(ROOT + "small_test_rf_matrix.h5")
    new = hm.hiCMatrix(outfile.name)

    nt.assert_equal(test.matrix.data, new.matrix.data)
    nt.assert_equal(test.cut_intervals, new.cut_intervals)

    print(set(os.listdir(ROOT + "QC_rc/")))
    assert are_files_equal(ROOT + "QC_rc/QC.log", qc_folder + "/QC.log")
    assert set(os.listdir(ROOT + "QC_rc/")) == set(os.listdir(qc_folder))

    os.unlink(outfile.name)
    shutil.rmtree(qc_folder)
def test_build_matrix_restrictionCutFile_eight(
        sam1, sam2, outFile, qcFolder, outBam, binSize, restrictionCutFile,
        minDistance, maxDistance, maxLibraryInsertSize, restrictionSequence,
        danglingSequence, region, removeSelfLigation, minMappingQuality,
        threads, inputBufferSize):
    # added skipDuplicationCheck
    args = "-s {} {} --restrictionCutFile {} --outFileName {} --QCfolder {} " \
           "--restrictionSequence {} " \
           "--danglingSequence {} " \
           "--minDistance {} " \
           "--maxLibraryInsertSize {} --threads {} " \
           "--removeSelfLigation {} --keepSelfCircles " \
           "--minMappingQuality {} --inputBufferSize {} " \
           "--doTestRun --skipDuplicationCheck ".format(bam_R1, bam_R2,
                                                        restrictionCutFile, outFile.name,
                                                        qcFolder, restrictionSequence,
                                                        danglingSequence, minDistance,
                                                        maxLibraryInsertSize, threads,
                                                        removeSelfLigation,
                                                        minMappingQuality,
                                                        inputBufferSize).split()

    hicBuildMatrix.main(args)

    os.unlink(outFile.name)
    shutil.rmtree(qcFolder)
Пример #4
0
def test_build_matrix(capsys):
    outfile = NamedTemporaryFile(suffix='.h5', delete=False)
    outfile.close()
    qc_folder = mkdtemp(prefix="testQC_")
    args = "-s {} {} --outFileName {} -bs 5000 -b /tmp/test.bam --QCfolder {} --threads 4".format(
        sam_R1, sam_R2, outfile.name, qc_folder).split()
    hicBuildMatrix.main(args)

    test = hm.hiCMatrix(ROOT + "small_test_matrix_parallel.h5")
    new = hm.hiCMatrix(outfile.name)
    nt.assert_equal(test.matrix.data, new.matrix.data)
    nt.assert_equal(test.cut_intervals, new.cut_intervals)
    # print("MATRIX NAME:", outfile.name)
    print(set(os.listdir(ROOT + "QC/")))
    assert are_files_equal(ROOT + "QC/QC.log", qc_folder + "/QC.log")
    assert set(os.listdir(ROOT + "QC/")) == set(os.listdir(qc_folder))

    # accept delta of 60 kb, file size is around 4.5 MB
    assert abs(
        os.path.getsize(ROOT + "small_test_matrix_result.bam") -
        os.path.getsize("/tmp/test.bam")) < 64000

    os.unlink(outfile.name)
    shutil.rmtree(qc_folder)
    os.unlink("/tmp/test.bam")
Пример #5
0
def main(args=None):
    args = parse_arguments().parse_args(args)
    if not os.path.exists(args.QCfolder):
        try:
            os.makedirs(args.QCfolder)
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    outFile = NamedTemporaryFile(suffix='.h5', delete=False)
    args_hicBuildMatrix = "--samFiles {} {} --outFileName {}  --QCfolder {} --doTestRun --doTestRunLines {} --threads 1 ".format(
        args.samFiles[0], args.samFiles[1], outFile.name, args.QCfolder,
        str(args.lines)).split()

    args_hicBuildMatrix.append('--binSize')
    args_hicBuildMatrix.append(str(10000))

    if args.restrictionSequence:
        args_hicBuildMatrix.append('--restrictionSequence')
        for restrictionSequence in args.restrictionSequence:
            args_hicBuildMatrix.append(restrictionSequence)

    if args.danglingSequence:
        args_hicBuildMatrix.append('--danglingSequence')
        for danglingSequence in args.danglingSequence:
            args_hicBuildMatrix.append(danglingSequence)

    if args.danglingSequence:
        args_hicBuildMatrix.append('--restrictionCutFile')
        for restrictionCutFile in args.restrictionCutFile:
            args_hicBuildMatrix.append(restrictionCutFile.name)

    log.debug('args_hicBuildMatrix {}'.format(args_hicBuildMatrix))

    hicBuildMatrix.main(args_hicBuildMatrix)
def test_build_matrix_restrictionCutFile_four(
        sam1, sam2, outFile, qcFolder, outBam, binSize, restrictionCutFile,
        minDistance, maxDistance, maxLibraryInsertSize, restrictionSequence,
        danglingSequence, region, removeSelfLigation, minMappingQuality,
        threads, inputBufferSize):
    # test more params with restrictionCutFile (now without region param)
    args = "-s {} {} --restrictionCutFile {} --outFileName {} --QCfolder {} " \
           "--restrictionSequence {} " \
           "--danglingSequence {} " \
           "--minDistance {} " \
           "--maxLibraryInsertSize {} --threads {} " \
           "--removeSelfLigation {} --keepSelfCircles ".format(bam_R1, bam_R2,
                                                               restrictionCutFile,
                                                               outFile.name, qcFolder,
                                                               restrictionSequence,
                                                               danglingSequence,
                                                               minDistance,
                                                               maxLibraryInsertSize,
                                                               threads,
                                                               removeSelfLigation).split()

    hicBuildMatrix.main(args)

    os.unlink(outFile.name)
    shutil.rmtree(qcFolder)
def test_build_matrix_restrictionCutFile_one(
        sam1, sam2, outFile, qcFolder, outBam, binSize, restrictionCutFile,
        minDistance, maxDistance, maxLibraryInsertSize, restrictionSequence,
        danglingSequence, region, removeSelfLigation, minMappingQuality,
        threads, inputBufferSize):
    # test restrictionCutFile
    args = "-s {} {} --restrictionCutFile {} --outFileName {} " \
           "--QCfolder {} ".format(sam1, sam2,
                                   restrictionCutFile,
                                   outFile.name,
                                   qcFolder).split()

    hicBuildMatrix.main(args)

    os.unlink(outFile.name)
    shutil.rmtree(qcFolder)
Пример #8
0
def test_build_matrix():
    outfile = NamedTemporaryFile(suffix='.h5', delete=False)
    outfile.close()
    qc_folder = mkdtemp(prefix="testQC_")
    args = "-s {} {} -o {} -bs 5000 -b /tmp/test.bam --QCfolder {}".format(
        sam_R1, sam_R2, outfile.name, qc_folder).split()
    hicBuildMatrix.main(args)

    test = hm.hiCMatrix(ROOT + "small_test_matrix.h5")
    new = hm.hiCMatrix(outfile.name)
    nt.assert_equal(test.matrix.data, new.matrix.data)
    assert test.cut_intervals == new.cut_intervals

    print set(os.listdir(ROOT + "QC/"))
    assert set(os.listdir(ROOT + "QC/")) == set(os.listdir(qc_folder))

    os.unlink(outfile.name)
    shutil.rmtree(qc_folder)
def test_build_matrix_bin_size(sam1, sam2, outFile, qcFolder, outBam, binSize,
                               restrictionCutFile, minDistance, maxDistance,
                               maxLibraryInsertSize, restrictionSequence,
                               danglingSequence, region, removeSelfLigation,
                               minMappingQuality, threads, inputBufferSize):
    """
    This test runs buildMatrix with all command line args for one time to ensure all args
    being ok.

    Note: Test will take some time.

    Note: parameters can be expanded (the values in the list) so that many combinations of
          command line args can be tested.
    """

    # test binSize instead of restrictionCutFile
    args = "-s {} {} --binSize {} --outFileName {}  --QCfolder {} ".format(
        sam1, sam2, binSize, outFile.name, qcFolder).split()

    hicBuildMatrix.main(args)

    os.unlink(outFile.name)
    shutil.rmtree(qcFolder)