def test_round_trip_gtf(self): testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf") ac = ADAMContext(self.sc) features = ac.loadFeatures(testFile) tmpPath = self.tmpFile() + ".gtf" features.save(tmpPath, asSingleFile=True) savedFeatures = ac.loadFeatures(testFile) self.assertEquals(features._jvmRdd.jrdd().count(), savedFeatures._jvmRdd.jrdd().count())
def test_round_trip_bed(self): testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed") ac = ADAMContext(self.sc) features = ac.loadFeatures(testFile) tmpPath = self.tmpFile() + ".bed" features.save(tmpPath, asSingleFile=True) savedFeatures = ac.loadFeatures(testFile) self.assertEquals(features._jvmRdd.jrdd().count(), savedFeatures._jvmRdd.jrdd().count())
def test_round_trip_interval_list(self): testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list") ac = ADAMContext(self.sc) features = ac.loadFeatures(testFile) tmpPath = self.tmpFile() + ".interval_list" features.save(tmpPath, asSingleFile=True) savedFeatures = ac.loadFeatures(testFile) self.assertEquals(features._jvmRdd.jrdd().count(), savedFeatures._jvmRdd.jrdd().count())
def test_round_trip_bed(self): testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed") ac = ADAMContext(self.ss) features = ac.loadFeatures(testFile) tmpPath = self.tmpFile() + ".bed" features.save(tmpPath, asSingleFile=True) savedFeatures = ac.loadFeatures(testFile) self.assertEqual(features._jvmRdd.jrdd().count(), savedFeatures._jvmRdd.jrdd().count())
def test_round_trip_gtf(self): testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf") ac = ADAMContext(self.ss) features = ac.loadFeatures(testFile) tmpPath = self.tmpFile() + ".gtf" features.save(tmpPath, asSingleFile=True) savedFeatures = ac.loadFeatures(testFile) self.assertEqual(features._jvmRdd.jrdd().count(), savedFeatures._jvmRdd.jrdd().count())
def test_round_trip_interval_list(self): testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list") ac = ADAMContext(self.ss) features = ac.loadFeatures(testFile) tmpPath = self.tmpFile() + ".interval_list" features.save(tmpPath, asSingleFile=True) savedFeatures = ac.loadFeatures(testFile) self.assertEqual(features._jvmRdd.jrdd().count(), savedFeatures._jvmRdd.jrdd().count())
def test_round_trip_narrowPeak(self): testFile = self.resourceFile( "wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak") ac = ADAMContext(self.sc) features = ac.loadFeatures(testFile) tmpPath = self.tmpFile() + ".narrowPeak" features.save(tmpPath, asSingleFile=True) savedFeatures = ac.loadFeatures(testFile) self.assertEquals(features._jvmRdd.jrdd().count(), savedFeatures._jvmRdd.jrdd().count())
def test_round_trip_narrowPeak(self): testFile = self.resourceFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak") ac = ADAMContext(self.ss) features = ac.loadFeatures(testFile) tmpPath = self.tmpFile() + ".narrowPeak" features.save(tmpPath, asSingleFile=True) savedFeatures = ac.loadFeatures(testFile) self.assertEqual(features._jvmRdd.jrdd().count(), savedFeatures._jvmRdd.jrdd().count())
def test_load_interval_list(self): testFile = self.resourceFile("SeqCap_EZ_Exome_v3.hg19.interval_list") ac = ADAMContext(self.ss) reads = ac.loadFeatures(testFile) self.assertEqual(reads.toDF().count(), 369) self.assertEqual(reads._jvmRdd.jrdd().count(), 369)
def test_load_gtf(self): testFile = self.resourceFile("Homo_sapiens.GRCh37.75.trun20.gtf") ac = ADAMContext(self.ss) reads = ac.loadFeatures(testFile) self.assertEqual(reads.toDF().count(), 15) self.assertEqual(reads._jvmRdd.jrdd().count(), 15)
def test_load_bed(self): testFile = self.resourceFile("gencode.v7.annotation.trunc10.bed") ac = ADAMContext(self.ss) reads = ac.loadFeatures(testFile) self.assertEqual(reads.toDF().count(), 10) self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
def test_transform(self): featurePath = self.resourceFile("gencode.v7.annotation.trunc10.bed") ac = ADAMContext(self.ss) features = ac.loadFeatures(featurePath) transformedFeatures = features.transform(lambda x: x.filter(x.start < 12613)) self.assertEqual(transformedFeatures.toDF().count(), 6)
def test_load_narrowPeak(self): testFile = self.resourceFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak") ac = ADAMContext(self.ss) reads = ac.loadFeatures(testFile) self.assertEqual(reads.toDF().count(), 10) self.assertEqual(reads._jvmRdd.jrdd().count(), 10)
def test_transform(self): featurePath = self.resourceFile("gencode.v7.annotation.trunc10.bed") ac = ADAMContext(self.ss) features = ac.loadFeatures(featurePath) transformedFeatures = features.transform(lambda x: x.filter(x.start < 12613)) self.assertEquals(transformedFeatures.toDF().count(), 6)
def test_shuffle_right_outer_join_groupBy_left(self): readsPath = self.resourceFile("small.1.sam") targetsPath = self.resourceFile("small.1.bed") ac = ADAMContext(self.ss) reads = ac.loadAlignments(readsPath) targets = ac.loadFeatures(targetsPath) jRdd = reads.rightOuterShuffleRegionJoinAndGroupByLeft(targets) self.assertEqual(jRdd.toDF().count(), 21)
def test_shuffle_inner_join(self): readsPath = self.resourceFile("small.1.sam") targetsPath = self.resourceFile("small.1.bed") ac = ADAMContext(self.ss) reads = ac.loadAlignments(readsPath) targets = ac.loadFeatures(targetsPath) jRdd = reads.shuffleRegionJoin(targets) self.assertEqual(jRdd.toDF().count(), 5)
def test_broadcast_right_outer_join(self): readsPath = self.resourceFile("small.1.sam") targetsPath = self.resourceFile("small.1.bed") ac = ADAMContext(self.ss) reads = ac.loadAlignments(readsPath) targets = ac.loadFeatures(targetsPath) jRdd = reads.rightOuterBroadcastRegionJoin(targets) self.assertEqual(jRdd.toDF().count(), 6)
def test_visualize_features(self): genomicRDD = GenomicVizRDD(self.ss) # load file ac = ADAMContext(self.ss) testFile = self.resourceFile("smalltest.bed") # read alignments features = ac.loadFeatures(testFile) contig = "chrM" start = 1 end = 2000 x = genomicRDD.ViewFeatures(features, contig, start, end) assert (True)
def test_visualize_features(self): # load file ac = ADAMContext(self.ss) testFile = self.resourceFile("smalltest.bed") # read features features = ac.loadFeatures(testFile) featureViz = FeatureSummary(ac, features) contig = "chrM" start = 1 end = 2000 x = featureViz.viewPileup(contig, start, end) assert(x != None)