def test_split_references(self):
        test_file_1 = ('/pbi/dept/secondary/siv/testdata/SA3-RS/lambda/'
                       '2372215/0007_tiny/Alignment_Results/m150404_1016'
                       '26_42267_c100807920800000001823174110291514_s1_p'
                       '0.1.aligned.bam')
        test_file_2 = ('/pbi/dept/secondary/siv/testdata/SA3-Sequel/ecoli/'
                       '315/3150204/r54049_20160508_152025/1_A01/Alignment'
                       '_Results/m54049_160508_155917.alignmentset.xml')
        test_file_3 = ('/pbi/dept/secondary/siv/testdata/SA3-RS/ecoli/'
                       'tiny-multimovie/Alignment_Results/'
                       'combined.alignmentset.xml')
        NREC1 = len(AlignmentSet(test_file_1))
        NREC2 = len(AlignmentSet(test_file_2))
        NREC3 = len(AlignmentSet(test_file_3))
        NREC = NREC1 + NREC2 + NREC3
        self.assertNotEqual(NREC1, 0)
        self.assertNotEqual(NREC2, 0)
        self.assertNotEqual(NREC3, 0)
        self.assertNotEqual(NREC, 0)
        ds1 = AlignmentSet(test_file_1, test_file_2, test_file_3)
        # used to get total:
        #self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
        self.assertEqual(len(ds1), NREC)
        dss = ds1.split_references(1)
        self.assertEqual(len(dss), 1)
        self.assertEqual(sum([len(ds_) for ds_ in dss]), NREC)
        self.assertEqual(len(ds1), NREC)
        self.assertFalse(ds1.filters)

        dss = ds1.split_references(12)
        self.assertEqual(len(dss), 2)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         NREC)
        self.assertEqual(len(set(dss[0].index.tId)), 1)
        self.assertEqual(len(set(dss[-1].index.tId)), 1)
        self.assertEqual(
            dss[0].tid2rname[list(set(dss[0].index.tId))[0]],
            'ecoliK12_pbi_March2013')
        self.assertEqual(len(dss[0]), NREC2 + NREC3)
        self.assertEqual(
            dss[-1].tid2rname[list(set(dss[-1].index.tId))[0]],
            'lambda_NEB3011')
        self.assertEqual(len(dss[-1]), NREC1)
示例#2
0
    def test_split_references(self):
        test_file_1 = ('/pbi/dept/secondary/siv/testdata/SA3-RS/lambda/'
                       '2372215/0007_tiny/Alignment_Results/m150404_1016'
                       '26_42267_c100807920800000001823174110291514_s1_p'
                       '0.1.aligned.bam')
        test_file_2 = ('/pbi/dept/secondary/siv/testdata/SA3-Sequel/ecoli/'
                       '315/3150204/r54049_20160508_152025/1_A01/Alignment'
                       '_Results/m54049_160508_155917.alignmentset.xml')
        test_file_3 = ('/pbi/dept/secondary/siv/testdata/SA3-RS/ecoli/'
                       'tiny-multimovie/Alignment_Results/'
                       'combined.alignmentset.xml')
        NREC1 = len(AlignmentSet(test_file_1))
        NREC2 = len(AlignmentSet(test_file_2))
        NREC3 = len(AlignmentSet(test_file_3))
        NREC = NREC1 + NREC2 + NREC3
        self.assertNotEqual(NREC1, 0)
        self.assertNotEqual(NREC2, 0)
        self.assertNotEqual(NREC3, 0)
        self.assertNotEqual(NREC, 0)
        ds1 = AlignmentSet(test_file_1, test_file_2, test_file_3)
        # used to get total:
        #self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
        self.assertEqual(len(ds1), NREC)
        dss = ds1.split_references(1)
        self.assertEqual(len(dss), 1)
        self.assertEqual(sum([len(ds_) for ds_ in dss]), NREC)
        self.assertEqual(len(ds1), NREC)
        self.assertFalse(ds1.filters)

        dss = ds1.split_references(12)
        self.assertEqual(len(dss), 2)
        self.assertEqual(sum([len(ds_) for ds_ in dss]), NREC)
        self.assertEqual(len(set(dss[0].index.tId)), 1)
        self.assertEqual(len(set(dss[-1].index.tId)), 1)
        self.assertEqual(dss[0].tid2rname[list(set(dss[0].index.tId))[0]],
                         'ecoliK12_pbi_March2013')
        self.assertEqual(len(dss[0]), NREC2 + NREC3)
        self.assertEqual(dss[-1].tid2rname[list(set(dss[-1].index.tId))[0]],
                         'lambda_NEB3011')
        self.assertEqual(len(dss[-1]), NREC1)
示例#3
0
    def test_split_references(self):
        test_file_1 = ('/pbi/dept/secondary/siv/testdata/SA3-RS/lambda/'
                       '2372215/0007_tiny/Alignment_Results/m150404_1016'
                       '26_42267_c100807920800000001823174110291514_s1_p'
                       '0.1.aligned.bam')
        test_file_2 = ('/pbi/dept/secondary/siv/testdata/SA3-Sequel/ecoli/'
                       '315/3150204/r54049_20160508_152025/1_A01/Alignment'
                       '_Results/m54049_160508_155917.alignmentset.xml')
        test_file_3 = ('/pbi/dept/secondary/siv/testdata/SA3-RS/ecoli/'
                       'tiny-multimovie/Alignment_Results/'
                       'combined.alignmentset.xml')
        NREC1 = len(AlignmentSet(test_file_1))
        NREC2 = len(AlignmentSet(test_file_2))
        NREC3 = len(AlignmentSet(test_file_3))
        NREC = NREC1 + NREC2 + NREC3
        assert not NREC1 == 0
        assert not NREC2 == 0
        assert not NREC3 == 0
        assert not NREC == 0
        ds1 = AlignmentSet(test_file_1, test_file_2, test_file_3)
        # used to get total:
        #assert sum(1 for _ in ds1) == N_RECORDS
        assert len(ds1) == NREC
        dss = list(ds1.split_references(1))
        assert len(dss) == 1
        assert sum([len(ds_) for ds_ in dss]) == NREC
        assert len(ds1) == NREC
        assert not ds1.filters

        dss = list(ds1.split_references(12))
        assert len(dss) == 2
        assert sum([len(ds_) for ds_ in dss]) == NREC
        assert len(set(dss[0].index.tId)) == 1
        assert len(set(dss[-1].index.tId)) == 1
        assert dss[0].tid2rname[list(set(
            dss[0].index.tId))[0]] == 'ecoliK12_pbi_March2013'
        assert len(dss[0]) == NREC2 + NREC3
        assert dss[-1].tid2rname[list(set(
            dss[-1].index.tId))[0]] == 'lambda_NEB3011'
        assert len(dss[-1]) == NREC1