Пример #1
0
def run_rtc(rtc):
    files = list(readFofn(rtc.task.input_files[0]))
    report = fofn_to_report(len(files))
    shutil.copy(rtc.task.input_files[0], rtc.task.output_files[0])
    write_report_and_log(report, rtc.task.output_files[1])
    log.info("Completed running {i}".format(i=rtc.task))
    return 0
Пример #2
0
    def test_read_ccs_from_multiple_smrtcells_write_one_bam(self):
        """
        test_read_ccs_from_multiple_smrtcells_write_one_bam:
        Read ccs reads from multiple bam files of multiple SMRTCells,
        select records of a few zmws from each movie, and
        write to an output bam file.
        """
        fofn = op.join(self.bigDataDir, "ccsbam.fofn")
        movies = [
            "m140802_021814_42161_c110036822550000001823106706241500_s1_p0",
            "m140802_043938_42161_c110036822550000001823106706241501_s1_p0",
            "m140802_070303_42161_c110036822550000001823106706241502_s1_p0"
        ]

        ccsbamfns = [f for f in readFofn(fofn)]
        outbamfn = op.join(self.outDir, "test_writebam_ccs_3.bam")

        m1, m2, m3 = movies
        hn1 = [54434, 54440, 54493, 80328, 163395]
        hn2 = [1784, 7201, 40789, 79704, 152904]
        hn3 = [705, 4838, 40197, 84197, 126136]
        zmws = ["%s/%d" % (m1, hn) for hn in hn1] + \
               ["%s/%d" % (m2, hn) for hn in hn2] + \
               ["%s/%d" % (m3, hn) for hn in hn3]
        expected_num_ccs_reads = 13821 + 13457 + 17016 + 13705 + 13581 + 14900 + 13477 + 12238 + 11318  #123513

        _verify_write_compare_ccs(self,
                                  ccsbamfns,
                                  zmws,
                                  outbamfn,
                                  expected_movies=movies,
                                  expected_len=expected_num_ccs_reads)
Пример #3
0
    def test_read_ccs_from_multiple_smrtcells_write_one_bam(self):
        """
        test_read_ccs_from_multiple_smrtcells_write_one_bam:
        Read ccs reads from multiple bam files of multiple SMRTCells,
        select records of a few zmws from each movie, and
        write to an output bam file.
        """
        fofn = op.join(self.bigDataDir, "ccsbam.fofn")
        movies = ["m140802_021814_42161_c110036822550000001823106706241500_s1_p0",
                  "m140802_043938_42161_c110036822550000001823106706241501_s1_p0",
                  "m140802_070303_42161_c110036822550000001823106706241502_s1_p0"]

        ccsbamfns = [f for f in readFofn(fofn)]
        outbamfn = op.join(self.outDir, "test_writebam_ccs_3.bam")

        m1, m2, m3 = movies
        hn1 = [54434, 54440, 54493, 80328, 163395]
        hn2 = [1784, 7201, 40789, 79704, 152904]
        hn3 = [705, 4838, 40197, 84197, 126136]
        zmws = ["%s/%d" % (m1, hn) for hn in hn1] + \
               ["%s/%d" % (m2, hn) for hn in hn2] + \
               ["%s/%d" % (m3, hn) for hn in hn3]
        expected_num_ccs_reads = 13821 + 13457 + 17016 + 13705 + 13581 + 14900 + 13477 + 12238 + 11318 #123513

        _verify_write_compare_ccs(self, ccsbamfns, zmws, outbamfn,
                                  expected_movies=movies,
                                  expected_len=expected_num_ccs_reads)
Пример #4
0
def fromSmrtPortalPath(jobPath):
    # reckon the reports path from the input fofn ...
    reportsPaths = set([ updir(path) for path in readFofn(op.join(jobPath, "input.fofn"))])
    if len(reportsPaths) > 1:
        raise ValueError, "No support for multi-movie jobs yet"
    else:
        reportsPath = list(reportsPaths)[0]
        basFname = findOneOrNone("*.bas.h5", reportsPath) or findOneOrNone("*.bax.h5", reportsPath)
        plsFname = findOneOrNone("*.pls.h5", reportsPath) or findOneOrNone("*.plx.h5", reportsPath)
        trcFname = findOneOrNone("*.trc.h5", updir(reportsPath))
        alnFname = findOneOrNone("*.cmp.h5", op.join(jobPath, "data"))
        return Fixture(trcFname=trcFname, plsFname=plsFname,
                       basFname=basFname, alnFname=alnFname)
Пример #5
0
def parse_input_file(i_file):
    """Parse input file, get input bam or bax.h5 file."""
    bam_fn, bax_fn = None, None
    if (i_file.endswith(".bam")):
        bam_fn = i_file
    elif (i_file.endswith(".fofn")):
        fns = [f for f in readFofn(i_file)]
        if not all([f.endswith(".bax.h5") for f in fns]) or \
            len(fns) != 1:
            raise ValueError ("%s fofn should contain exactly one bax.h5 file.")
        else:
            bax_fn = fns[0]
    elif i_file.endswith(".bax.h5"):
        bax_fn = i_file
    else:
        raise ValueError ("Unsupported file format %s" % i_file)
    return bam_fn, bax_fn
Пример #6
0
def parse_input_file(i_file):
    """Parse input file, get input bam or bax.h5 file."""
    bam_fn, bax_fn = None, None
    if (i_file.endswith(".bam")):
        bam_fn = i_file
    elif (i_file.endswith(".fofn")):
        fns = [f for f in readFofn(i_file)]
        if not all([f.endswith(".bax.h5") for f in fns]) or \
            len(fns) != 1:
            raise ValueError("%s fofn should contain exactly one bax.h5 file.")
        else:
            bax_fn = fns[0]
    elif i_file.endswith(".bax.h5"):
        bax_fn = i_file
    else:
        raise ValueError("Unsupported file format %s" % i_file)
    return bam_fn, bax_fn
Пример #7
0
def fromSmrtPortalPath(jobPath):
    # reckon the reports path from the input fofn ...
    reportsPaths = set(
        [updir(path) for path in readFofn(op.join(jobPath, "input.fofn"))])
    if len(reportsPaths) > 1:
        raise ValueError, "No support for multi-movie jobs yet"
    else:
        reportsPath = list(reportsPaths)[0]
        basFname = findOneOrNone("*.bas.h5", reportsPath) or findOneOrNone(
            "*.bax.h5", reportsPath)
        plsFname = findOneOrNone("*.pls.h5", reportsPath) or findOneOrNone(
            "*.plx.h5", reportsPath)
        trcFname = findOneOrNone("*.trc.h5", updir(reportsPath))
        alnFname = findOneOrNone("*.cmp.h5", op.join(jobPath, "data"))
        return Fixture(trcFname=trcFname,
                       plsFname=plsFname,
                       basFname=basFname,
                       alnFname=alnFname)
Пример #8
0
 def __init__(self, fname, *rest):
     # Can read any of the following:
     #  ~ fofn
     #  ~ h5, multipart
     #  ~ h5, single part
     self.filename = op.abspath(op.expanduser(fname))
     if fname.endswith(".h5"):
         directory = op.dirname(self.filename)
         with h5py.File(self.filename, "r") as f:
             if f.get("MultiPart"):
                 partFilenames = [ op.join(directory, fn)
                                   for fn in f["/MultiPart/Parts"] ]
             else:
                 partFilenames = [ self.filename ]
     elif self.filename.endswith(".fofn"):
         partFilenames = list(readFofn(self.filename))
     else:
         raise ValueError, "Invalid file name for multipart reader"
     self._parts = [ self.PART_READER_CLASS(pfn, *rest)
                     for pfn in partFilenames ]
Пример #9
0
def test_simple():
    fofn = StringIO("/a/b\n/c/d")
    lst = list(readFofn(fofn))
    assert_array_equal(["/a/b", "/c/d"], lst)
Пример #10
0
def test_absolutifying():
    for fofnPath in data.getFofns():
        for filePath in readFofn(fofnPath):
            assert_true(isabs(filePath))
Пример #11
0
def test_empty_lines():
    fofn = StringIO("/a/b\n \n/c/d\n ")
    lst = list(readFofn(fofn))
    assert_array_equal(["/a/b", "/c/d"], lst)
Пример #12
0
def test_simple():
    fofn = StringIO("/a/b\n/c/d")
    lst = list(readFofn(fofn))
    assert_array_equal(["/a/b", "/c/d"], lst)
Пример #13
0
def test_absolutifying():
    for fofnPath in data.getFofns():
        for filePath in readFofn(fofnPath):
            assert_true(isabs(filePath))
Пример #14
0
def test_simple():
    fofn = StringIO("/a/b\n/c/d")
    lst = list(readFofn(fofn))
    assert ["/a/b", "/c/d"] == lst
Пример #15
0
def test_empty_lines():
    fofn = StringIO(u"/a/b\n \n/c/d\n ")
    lst = list(readFofn(fofn))
    assert ["/a/b", "/c/d"] == lst