Exemplo n.º 1
0
    def test_cumulative_count_distribution(self):
        # load file
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("small.sam")
        # read alignments

        reads = ac.loadAlignments(testFile)

        # convert to coverage
        coverage = reads.toCoverage()

        qc = CoverageDistribution(self.ss, coverage)

        _, cd = qc.plotDistributions(testMode = True, cumulative = True, normalize = False)

        # first sample
        items = list(cd.popitem()[1])
        assert(len(items) == 1)
        assert(items.pop()[1] == 1500)

        _, cd = qc.plotDistributions(testMode = True, cumulative = False, normalize = False)

        # first sample
        items = list(cd.popitem()[1])
        assert(len(items) == 1)
        assert(items.pop()[1] == 1500)
Exemplo n.º 2
0
    def test_collapse(self):
        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        collapsed = coverage.collapse()
        self.assertEquals(collapsed.toDF().count(), coverage.toDF().count())
Exemplo n.º 3
0
    def test_aggregatedCoverage(self):
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        collapsed = coverage.aggregatedCoverage(10)
        self.assertEquals(collapsed.toDF().count(), 166)
Exemplo n.º 4
0
    def test_flatten(self):
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        flattened = coverage.flatten()
        self.assertEquals(flattened.toDF().count(), 1500)
Exemplo n.º 5
0
    def test_collapse(self):
        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        collapsed = coverage.collapse()
        self.assertEquals(collapsed.toDF().count(), coverage.toDF().count())
Exemplo n.º 6
0
    def test_aggregatedCoverage(self):
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        collapsed = coverage.aggregatedCoverage(10)
        self.assertEquals(collapsed.toDF().count(), 166)
Exemplo n.º 7
0
    def test_load_coverage(self):

        testFile = self.resourceFile("sample_coverage.bed")
        ac = ADAMContext(self.sc)

        coverage = ac.loadCoverage(testFile)

        self.assertEqual(coverage.toDF().count(), 3)
Exemplo n.º 8
0
    def test_flatten(self):
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        flattened = coverage.flatten()
        self.assertEquals(flattened.toDF().count(), 1500)
Exemplo n.º 9
0
    def test_load_slices(self):

        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.ss)

        slices = ac.loadSlices(testFile, 10000)

        self.assertEqual(slices.toDF().count(), 1)
        self.assertEqual(slices._jvmDataset.jrdd().count(), 1)
Exemplo n.º 10
0
    def test_load_coverage(self):


        testFile = self.resourceFile("sample_coverage.bed")
        ac = ADAMContext(self.ss)

        coverage = ac.loadCoverage(testFile)

        self.assertEqual(coverage.toDF().count(), 3)
Exemplo n.º 11
0
    def test_load_interval_list(self):

        testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 369)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 369)
Exemplo n.º 12
0
    def test_load_bed(self):

        testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
Exemplo n.º 13
0
    def test_load_gtf(self):

        testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 15)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 15)
Exemplo n.º 14
0
    def test_load_alignments(self):
        
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadAlignments(testFile)

        self.assertEqual(reads.toDF().count(), 20)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 20)
Exemplo n.º 15
0
    def test_load_dna_sequences(self):

        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.ss)

        sequences = ac.loadDnaSequences(testFile)

        self.assertEqual(sequences.toDF().count(), 1)
        self.assertEqual(sequences._jvmDataset.jrdd().count(), 1)
Exemplo n.º 16
0
    def test_VariantsPerSampleDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data = VariantsPerSampleDistribution(self.ss, genotypes).plotDistributions(testMode= True)

        expected = [6, 8, 8, 1, 7, 8]
        assert(sum(data) == sum(expected))
Exemplo n.º 17
0
    def test_load_interval_list(self):

        testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 369)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 369)
Exemplo n.º 18
0
    def test_load_bed(self):

        testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
Exemplo n.º 19
0
    def test_transform(self):
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.sc)

        genotypes = ac.loadGenotypes(testFile)

        transformedGenotypes = genotypes.transform(lambda x: x.filter(x.contigName == '1'))

        self.assertEquals(transformedGenotypes.toDF().count(), 9)
Exemplo n.º 20
0
    def test_count_kmers(self):

        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        kmers = reads.countKmers(6)

        self.assertEqual(kmers.count(), 1040)
Exemplo n.º 21
0
    def test_load_alignments(self):
        
        testFile = self.resourceFile("small.sam")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadAlignments(testFile)

        self.assertEqual(reads.toDF().count(), 20)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 20)
Exemplo n.º 22
0
    def test_load_genotypes(self):

        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        reads = ac.loadGenotypes(testFile)

        self.assertEqual(reads.toDF().count(), 18)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 18)
Exemplo n.º 23
0
    def test_to_fragments(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        fragments = reads.toFragments()
        self.assertEqual(fragments.toDF().count(), 5)
Exemplo n.º 24
0
    def test_load_contig_fragments(self):

        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.sc)

        reads = ac.loadContigFragments(testFile)

        self.assertEqual(reads.toDF().count(), 1)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 1)
Exemplo n.º 25
0
    def test_load_gtf(self):

        testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 15)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 15)
Exemplo n.º 26
0
    def test_load_variants(self):

        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.sc)

        reads = ac.loadVariants(testFile)

        self.assertEqual(reads.toDF().count(), 6)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 6)
Exemplo n.º 27
0
    def test_transform(self):
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.ss)

        genotypes = ac.loadGenotypes(testFile)

        transformedGenotypes = genotypes.transform(lambda x: x.filter(x.contigName == '1'))

        self.assertEquals(transformedGenotypes.toDF().count(), 9)
Exemplo n.º 28
0
    def test_realignIndels_reads(self):

        readsPath = self.resourceFile("small.1.sam")

        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)
        realigned = reads.realignIndels()

        self.assertEqual(realigned.toDF().count(), 20)
Exemplo n.º 29
0
    def test_transform(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        transformedReads = reads.transform(lambda x: x.filter(x.referenceName == "1"))

        self.assertEqual(transformedReads.toDF().count(), 1)
Exemplo n.º 30
0
    def test_transform(self):

        featurePath = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)

        features = ac.loadFeatures(featurePath)

        transformedFeatures = features.transform(lambda x: x.filter(x.start < 12613))

        self.assertEqual(transformedFeatures.toDF().count(), 6)
Exemplo n.º 31
0
    def test_GenotypeCallRatesDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data =  GenotypeCallRatesDistribution(self.ss, genotypes, sample=1.0).plot(testMode= True)
        expected = sorted([0.95, 0.88, 0.89, 0.94, 0.93, 0.90])
        sorted_data = sorted(data)

        assert( expected == [ round(x,2) for x in sorted_data] )
    def test_load_indexed_bam(self):

        readsPath = self.resourceFile("indexed_bams/sorted.bam")
        ac = ADAMContext(self.ss)

        querys = [ReferenceRegion("chr2", 100, 101), ReferenceRegion("3", 10, 17)]

        reads = ac.loadIndexedBam(readsPath, querys)

        self.assertEqual(reads.toDF().count(), 2)
Exemplo n.º 33
0
    def test_load_narrowPeak(self):

        
        testFile = self.resourceFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
Exemplo n.º 34
0
    def test_transform(self):

        variantPath = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        variants = ac.loadVariants(variantPath)

        transformedVariants = variants.transform(lambda x: x.filter(x.start < 19190))

        self.assertEqual(transformedVariants.toDF().count(), 3)
Exemplo n.º 35
0
    def test_HetHomRatioDistribution(self):
        ac = ADAMContext(self.ss)
        testFile = self.resourceFile("genodata.v3.test.vcf")

        genotypes = ac.loadGenotypes(testFile)
        _, data =  HetHomRatioDistribution(self.ss, genotypes, sample=1.0).plot(testMode= True)
        expected = sorted([5.0, 0.6, 0.14, 0.17, 1.67])
        sorted_data = sorted(data)

        assert( expected == [ round(x,2) for x in sorted_data ])
Exemplo n.º 36
0
    def test_load_variants(self):

        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadVariants(testFile)

        self.assertEqual(reads.toDF().count(), 6)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 6)
Exemplo n.º 37
0
    def test_toFeatures(self):
        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.sc)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        features = coverage.toFeatures()

        assert (isinstance(features, FeatureRDD))
        self.assertEquals(features.toDF().count(), coverage.toDF().count())
Exemplo n.º 38
0
    def test_load_contig_fragments(self):


        testFile = self.resourceFile("HLA_DQB1_05_01_01_02.fa")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadContigFragments(testFile)

        self.assertEqual(reads.toDF().count(), 1)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 1)
Exemplo n.º 39
0
    def test_toFeatures(self):
        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        features = coverage.toFeatures()

        assert(isinstance(features, FeatureRDD))
        self.assertEquals(features.toDF().count(), coverage.toDF().count())
Exemplo n.º 40
0
    def test_save(self):

        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        tmpPath = self.tmpFile() + ".coverage.adam"
        coverage.save(tmpPath, asSingleFile=True, disableFastConcat=True)
        assert (os.listdir(tmpPath) != [])
Exemplo n.º 41
0
    def test_save_unordered_sam(self):

        testFile = self.resourceFile("unordered.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        tmpPath = self.tmpFile() + ".sam"
        reads.saveAsSam(tmpPath, asSingleFile=True)

        self.checkFiles(testFile, tmpPath)
Exemplo n.º 42
0
    def test_transform(self):

        featurePath = self.resourceFile("gencode.v7.annotation.trunc10.bed")
        ac = ADAMContext(self.ss)

        features = ac.loadFeatures(featurePath)

        transformedFeatures = features.transform(lambda x: x.filter(x.start < 12613))

        self.assertEquals(transformedFeatures.toDF().count(), 6)
Exemplo n.º 43
0
    def test_transform(self):

        variantPath = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        variants = ac.loadVariants(variantPath)

        transformedVariants = variants.transform(lambda x: x.filter(x.start < 19190))

        self.assertEquals(transformedVariants.toDF().count(), 3)
Exemplo n.º 44
0
    def test_load_indexed_bam(self):

        testFile = self.resourceFile("indexed_bams/sorted.bam")
        ac = ADAMContext(self.ss)

        reads = ac.loadIndexedBam(testFile,
                                  [ReferenceRegion("chr2", 100, 101),
                                   ReferenceRegion("3", 10, 17)])

        self.assertEqual(reads.toDF().count(), 2)
Exemplo n.º 45
0
    def test_load_narrowPeak(self):

        
        testFile = self.resourceFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak")
        ac = ADAMContext(self.ss)
        
        reads = ac.loadFeatures(testFile)

        self.assertEqual(reads.toDF().count(), 10)
        self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
Exemplo n.º 46
0
    def test_filterByOverlappingRegion(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        query = ReferenceRegion("chr2", 1, 400)

        filtered = reads.filterByOverlappingRegion(query)
        self.assertEqual(filtered.toDF().count(), 1)
    def test_save_unordered_sam(self):

        testFile = self.resourceFile("unordered.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        tmpPath = self.tmpFile() + ".sam"
        reads.saveAsSam(tmpPath,
                        asSingleFile=True)

        self.checkFiles(testFile, tmpPath)
Exemplo n.º 48
0
    def test_vcf_sort_lex(self):
    
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().sortLexicographically().saveAsVcf(tmpPath,
                                                                        asSingleFile=True)

        self.checkFiles(tmpPath, self.resourceFile("sorted.lex.vcf", module='adam-cli'))
Exemplo n.º 49
0
    def test_save(self):

        testFile = self.resourceFile("sorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(testFile)
        coverage = reads.toCoverage()
        tmpPath = self.tmpFile() + ".coverage.adam"
        coverage.save(tmpPath,
                              asSingleFile=True,
                            disableFastConcat=True)
    	assert(os.listdir(tmpPath) != [])
Exemplo n.º 50
0
    def test_vcf_sort(self):
    
        testFile = self.resourceFile("random.vcf")
        ac = ADAMContext(self.sc)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContextRDD().sort().saveAsVcf(tmpPath,
                                                         asSingleFile=True)

        self.checkFiles(tmpPath, self.resourceFile("sorted.vcf"))
Exemplo n.º 51
0
    def test_filterByOverlappingRegions(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        querys = [ReferenceRegion("chr2", 1L, 400L),
                    ReferenceRegion("3", 1L, 100L)]

        filtered = reads.filterByOverlappingRegions(querys)
        self.assertEquals(filtered.toDF().count(), 2)
    def test_union(self):

        testFile1 = self.resourceFile("sorted.sam")
        testFile2 = self.resourceFile("unordered.sam")
        ac = ADAMContext(self.ss)

        reads1 = ac.loadAlignments(testFile1)
        reads2 = ac.loadAlignments(testFile2)

        unionReads = reads1.union([reads2])

        self.assertEqual(unionReads.toDF().count(), 13)
Exemplo n.º 53
0
    def test_vcf_add_filter(self):
        
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)
        
        genotypes = ac.loadGenotypes(testFile)

        tmpPath = self.tmpFile() + ".vcf"
        genotypes.toVariantContexts().addFilterHeaderLine("BAD",
                                                          "Bad variant.").saveAsVcf(tmpPath)

        self.check_for_line_in_file(tmpPath, '##FILTER=<ID=BAD,Description="Bad variant.">')
Exemplo n.º 54
0
    def test_to_coverage(self):

        readsPath = self.resourceFile("unsorted.sam")
        ac = ADAMContext(self.ss)

        reads = ac.loadAlignments(readsPath)

        coverage = reads.toCoverage()
        self.assertEquals(coverage.toDF().count(), 42)

        coverage = reads.toCoverage(collapse = False)
        self.assertEquals(coverage.toDF().count(), 46)
Exemplo n.º 55
0
    def test_to_variants(self):
        testFile = self.resourceFile("small.vcf")
        ac = ADAMContext(self.ss)

        genotypes = ac.loadGenotypes(testFile)

        variants = genotypes.toVariants()

        self.assertEquals(variants.toDF().count(), 18)

        variants = genotypes.toVariants(dedupe=True)

        self.assertEquals(variants.toDF().count(), 6)