Python SimpleOutputRenderer示例，oncotator.output.SimpleOutputRenderer.SimpleOutputRenderer Python示例

示例#1

0

显示文件

文件： MafliteInputMutationCreatorTest.py 项目： alexramos/oncotator

    def test_simple_seg_file_input(self):
        """Test that we can read in a seg file, do no annotation, and output as SIMPLE_TSV"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_simple_seg_file_input.tsv"
        if os.path.exists(output_filename):
            os.remove(output_filename)
        ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config')
        segs = ic.createMutations()

        i = 1
        for i,seg in enumerate(segs):
            pass

        self.assertTrue((i+1) == 27, "Found %d segments when there should have been 27." % (i+1))

        ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config')
        segs = ic.createMutations()


        outputRenderer = SimpleOutputRenderer(output_filename, '')
        outputRenderer.renderMutations(segs)

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
        headers = output_reader.getFieldNames()
        for rcol in required_cols:
            self.assertTrue(rcol in headers)

        for line_dict in output_reader:
            self.assertTrue(line_dict['start'] is not None)
            self.assertTrue(line_dict['start'].strip() != "")
            self.assertTrue(line_dict['end'] is not None)
            self.assertTrue(line_dict['end'].strip() != "")

示例#2

0

显示文件

文件： AnnotatorTest.py 项目： ihuerga/oncotator

    def testBlankAnnotatorInit(self):
        """ Test an extremely simple scenario, where no additional annotations are needed.  I.e. no data sources """
        self.logger.info("Starting Blank Annotator Init Test...")

        inputCreator = MafliteInputMutationCreator(
            'testdata/maflite/tiny_maflite.maf.txt')
        outputRenderer = SimpleOutputRenderer(
            "out/testBlankAnnotatorTestFile.tsv")

        # Assumed myIC and myOC have been initialized as the proper Input and Output Creators, respectively.
        # 1) Initialize the Annotator
        annotator = Annotator()
        annotator.setInputCreator(inputCreator)
        annotator.setOutputRenderer(outputRenderer)
        testOutputFilename = annotator.annotate()

        # Test that file exists and that it has correct # of mutations (+1 for header +1 for annotator comment line).
        numSamples = 1
        numExtraLines = 3  # one for header, two for comment lines
        numDoubleLines = 0  # Number of lines with two alt alleles
        numVariants = 9
        gt = numSamples * numVariants + numDoubleLines * numSamples + numExtraLines
        fp = file(testOutputFilename, 'r')
        ctr = 0
        for line in fp:
            ctr += 1
        fp.close()
        self.assertEqual(
            ctr, gt, "Number of lines read was not correct: " + str(ctr) +
            " -- should have been: " + str(gt))

示例#3

0

显示文件

    def testSNPsAndIndelStartAndEndPos(self):
        """
        Tests that the start and end positions of SNPs and Indels are parsed as defined by the NCI's MAF specification
        (https://wiki.nci.nih.gov/display/TCGA/Mutation+Annotation+Format+(MAF)+Specification).
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.snps.indels.vcf"])
        outputFilename = os.path.join("out", "example.snps.indels.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        for row in tsvReader:
            if row['start'] == "16890445":
                self.assertEqual(row["end"], "16890445", "The value should be %s but it was %s." % ("16890445",
                                                                                                    row["end"]))
            elif row["start"] == "154524458":
                self.assertEqual(row["end"], "154524459", "The value should be %s but it was %s." % ("154524459",
                                                                                                     row["end"]))
            elif row["start"] == "114189432":
                self.assertEqual(row["end"], "114189433", "The value should be %s but it was %s." % ("114189433",
                                                                                                     row["end"]))

示例#4

0

显示文件

    def testDuplicateAnnotation(self):
        """
        Tests that the duplicate annotations are parsed correctly.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_annotation.vcf"])
        outputFilename = os.path.join("out", "example.duplicate_annotation.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        fieldnames = tsvReader.getFieldNames()
        self.assertTrue("variant_status" in fieldnames, "variant_status field is missing in the header.")
        self.assertTrue("sample_variant_status" in fieldnames, "sample_variant_status is missing in the header.")

        row = tsvReader.next()
        self.assertTrue("variant_status" in row, "variant_status field is missing in the row.")
        self.assertTrue("sample_variant_status" in row, "sample_variant_status is missing in the row.")

        self.assertEqual("2", row["variant_status"], "Incorrect value of variant_status.")
        self.assertEqual("0", row["sample_variant_status"], "Incorrect value of sample_variant_status")

示例#5

0

显示文件

    def testBasicAnnotation(self):
        ''' Test annotation from a generic TSV based on a transcript annotation.  Only confirms the proper headers of the output. '''
        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            config=self.config)
        transcriptDS = DatasourceFactory.createDatasource(
            "testdata/small_transcript_tsv_ds/small_transcript_tsv_ds.config",
            "testdata/small_transcript_tsv_ds/")
        outputFilename = 'out/genericTranscriptTest.out.tsv'

        annotator = Annotator()
        annotator.setInputCreator(
            MafliteInputMutationCreator(
                'testdata/maflite/Patient0.snp.maf.txt'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(transcriptDS)
        outputFilename = annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        headers = tsvReader.getFieldNames()
        self.assertTrue(
            "refseq_test_mRNA_Id" in headers,
            "refseq_test_mRNA_Id not found in headers: " + str(headers))
        self.assertTrue(
            "refseq_test_prot_Id" in headers,
            "refseq_test_prot_Id not found in headers: " + str(headers))

示例#6

0

显示文件

文件： GenericGeneProteinPositionDatasourceTest.py 项目： ihuerga/oncotator

    def testCreationAndAnnotation(self):
        """ Test the datasource creation and then do a simple annotation
        """
        outputFilename = 'out/genericGeneProteinPositionTest.out.tsv'

        gafDS = TestUtils.createTranscriptProviderDatasource(self.config)
        gppDS = DatasourceFactory.createDatasource("testdata/simple_uniprot_natvar/simple_uniprot_natvar.config", "testdata/simple_uniprot_natvar/")

        annotator = Annotator()
        annotator.setInputCreator(MafliteInputMutationCreator('testdata/maflite/tiny_maflite_natvar.maf.tsv'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDS)
        annotator.addDatasource(gppDS)
        testFilename = annotator.annotate()

        # Make sure that some values were populated
        self.assertTrue(os.path.exists(testFilename))
        tsvReader = GenericTsvReader(testFilename)

        ctr = 0
        for lineDict in tsvReader:
            colName = "UniProt_NatVar_natural_variations"
            self.assertTrue(sorted(lineDict[colName].split("|")) == sorted("R -> RR (in EDMD2).|R -> Q (in EDMD2).".split("|")), "Annotation value did not match: " + lineDict[colName])
            ctr += 1

        self.assertTrue(ctr == 1, "Number of mutations incorrect (1): " + str(ctr) )

示例#7

0

显示文件

文件： AnnotatorTest.py 项目： Yixf-Self/oncotator

    def testManualAnnotations(self):
        """ Test that the manual annotation facility in the Annotator is working properly. """
        annotator = Annotator()
        overrides = {'source': 'Capture', 'status': 'Somatic', 'phase': 'Phase_I', 'sequencer': 'Illumina GAIIx'}
        annotator.setManualAnnotations(overrides)
        inputCreator = MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt')
        outputRenderer = SimpleOutputRenderer("out/testManualAnnotationsFile.tsv")
        annotator.setInputCreator(inputCreator)
        annotator.setOutputRenderer(outputRenderer)

        testOutputFilename = annotator.annotate()

        keysOfInterest = overrides.keys()

        statinfo = os.stat(testOutputFilename)
        self.assertTrue(statinfo.st_size > 0, "Generated TSV file (" + testOutputFilename + ") is empty.")

        tsvReader = GenericTsvReader(testOutputFilename)

        ctr = 1
        for lineDict in tsvReader:
            for k in keysOfInterest:
                self.assertTrue(lineDict[k] != "__UNKNOWN__",
                                "__UNKNOWN__ value seen on line " + str(ctr) + ", when it should be populated: " + k)
                self.assertTrue(lineDict[k] != "",
                                "Blank value seen on line " + str(ctr) + ", when it should be populated: " + k)
                self.assertTrue(lineDict[k] == overrides[k],
                                "Value for " + k + " on line " + str(ctr) + " did not match override: " + str(
                                    lineDict[k]) + " <> " + str(overrides[k]))
            ctr += 1

示例#8

0

显示文件

文件： MafliteInputMutationCreatorTest.py 项目： xingtech/oncotator

    def test_simple_seg_file_annotations(self):
        """Test that we can read in a seg file, do GENCODE annotation, and output as SIMPLE_TSV"""
        inputFilename = "testdata/seg/Patient0.seg.txt"
        output_filename = "out/test_simple_seg_file_annotations.tsv"
        if os.path.exists(output_filename):
            os.remove(output_filename)
        ic = MafliteInputMutationCreator(inputFilename, None,
                                         'configs/seg_file_input.config')
        segs = ic.createMutations()

        i = 1
        for i, seg in enumerate(segs):
            pass

        self.assertTrue(
            (i + 1) == 27,
            "Found %d segments when there should have been 27." % (i + 1))

        ic = MafliteInputMutationCreator(inputFilename, None,
                                         'configs/seg_file_input.config')
        segs = ic.createMutations()

        gencode_ds = TestUtils._create_test_gencode_v19_ds(
            "out/seg_file_gencode_ds")
        annotator = Annotator()

        segs_annotated = []
        for seg in segs:
            segs_annotated.append(gencode_ds.annotate_segment(seg))

        outputRenderer = SimpleOutputRenderer(output_filename, '')
        outputRenderer.renderMutations(segs_annotated.__iter__())

        # Now check the output
        output_reader = GenericTsvReader(output_filename)

        required_cols = ["Sample", "Num_Probes", "Segment_Mean"]
        headers = output_reader.getFieldNames()
        for rcol in required_cols:
            self.assertTrue(rcol in headers)

        for line_dict in output_reader:
            self.assertTrue(line_dict['start'] is not None)
            self.assertTrue(line_dict['start'].strip() != "")
            self.assertTrue(line_dict['end'] is not None)
            self.assertTrue(line_dict['end'].strip() != "")
            self.assertTrue("genes" in line_dict.keys())

示例#9

0

显示文件

文件： GafDatasourceTest.py 项目： xingtech/oncotator

    def testMulticoreAnnotateFromChunkedFile(self):
        #TODO: Add unit test that Mutation data is pickle-able
        inputFile = "testdata/maflite/Patient0.snp.maf.txt"
        outputFile = "out/testGAFMulticorePatient0.snp.maf.txt"
        chunkSize = 200
        numChunks = 4

        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)
        ic = MafliteInputMutationCreator(inputFile)
        oc = SimpleOutputRenderer(outputFile)

        # createChunks
        muts = ic.createMutations()

        allAnnotatedChunksFlat = []
        are_mutations_remaining = True
        p = LoggingPool(processes=numChunks)
        while are_mutations_remaining:

            chunks = []
            for j in xrange(0, numChunks):
                chunk = []
                for i in xrange(0, chunkSize):
                    try:
                        chunk.append(muts.next())
                    except StopIteration:
                        are_mutations_remaining = False
                        break

                chunks.append((chunk, gafDatasource))

            annotatedChunks = p.map(annotate_mutations_global, chunks)
            annotatedChunksFlat = self._flattenChunks(annotatedChunks)
            allAnnotatedChunksFlat.append(annotatedChunksFlat)
        p.close()
        p.join()

        annotatedMuts = chain.from_iterable(allAnnotatedChunksFlat)

        ctr = 0
        oc.renderMutations(annotatedMuts, Metadata())
        tsvReader = GenericTsvReader(outputFile)
        for line in tsvReader:
            ctr += 1
        self.assertTrue(ctr == 730,
                        "Should have read 730 variants, but read " + str(ctr))

示例#10

0

显示文件

文件： GafDatasourceTest.py 项目： Tmacme/oncotator

    def testMulticoreAnnotateFromChunkedFile(self):
        #TODO: Add unit test that Mutation data is pickle-able
        inputFile = "testdata/maflite/Patient0.snp.maf.txt"
        outputFile = "out/testGAFMulticorePatient0.snp.maf.txt"
        chunkSize = 200
        numChunks = 4


        gafDatasource = TestUtils.createGafDatasourceProxy(self.config)
        ic = MafliteInputMutationCreator(inputFile)
        oc = SimpleOutputRenderer(outputFile)

        # createChunks
        muts = ic.createMutations()

        allAnnotatedChunksFlat = []
        are_mutations_remaining = True
        p = LoggingPool(processes=numChunks)
        while are_mutations_remaining:

            chunks = []
            for j in xrange(0, numChunks):
                chunk = []
                for i in xrange(0, chunkSize):
                    try:
                        chunk.append(muts.next())
                    except StopIteration:
                        are_mutations_remaining = False
                        break

                chunks.append((chunk, gafDatasource))

            annotatedChunks = p.map(annotate_mutations_global, chunks)
            annotatedChunksFlat = self._flattenChunks(annotatedChunks)
            allAnnotatedChunksFlat.append(annotatedChunksFlat)
        p.close()
        p.join()

        annotatedMuts = chain.from_iterable(allAnnotatedChunksFlat)

        ctr = 0
        oc.renderMutations(annotatedMuts, Metadata())
        tsvReader = GenericTsvReader(outputFile)
        for line in tsvReader:
            ctr += 1
        self.assertTrue(ctr == 730, "Should have read 730 variants, but read " + str(ctr))

示例#11

0

显示文件

文件： GenericGeneDataSourceTest.py 项目： xingtech/oncotator

    def testBasicAnnotation(self):
        ''' Annotate from a basic tsv gene file.  Use the Gaf to annotate before trying the tsv -- required since the gene annotation must be populated.
        Using trimmed CancerGeneCensus as basis for this test.
        '''

        # cut -f 1 oncotator/test/testdata/small_tsv_ds/CancerGeneCensus_Table_1_full_2012-03-15_trim.txt | egrep -v Symbol | sed -r "s/^/'/g" | sed ':a;N;$!ba;s/\n/,/g' | sed -r "s/,'/','/g"
        genesAvailable = [
            'ABL1', 'ABL2', 'ACSL3', 'AF15Q14', 'AF1Q', 'AF3p21', 'AF5q31',
            'AKAP9', 'AKT1', 'AKT2', 'ALDH2', 'ALK', 'ALO17', 'APC',
            'ARHGEF12', 'ARHH', 'ARID1A', 'ARID2', 'ARNT', 'ASPSCR1', 'ASXL1',
            'ATF1', 'ATIC', 'ATM', 'ATRX', 'BAP1', 'BCL10', 'BCL11A', 'BCL11B'
        ]

        # We need a gaf data source to annotate gene

        gafDatasource = TestUtils.createTranscriptProviderDatasource(
            config=self.config)
        geneDS = DatasourceFactory.createDatasource(
            "testdata/small_tsv_ds/small_tsv_ds.config",
            "testdata/small_tsv_ds/")
        outputFilename = 'out/genericGeneTest.out.tsv'

        annotator = Annotator()
        annotator.setInputCreator(
            MafliteInputMutationCreator(
                'testdata/maflite/Patient0.snp.maf.txt'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gafDatasource)
        annotator.addDatasource(geneDS)
        annotator.annotate()

        # Check that there were actual annotations performed.
        tsvReader = GenericTsvReader(outputFilename)

        fields = tsvReader.getFieldNames()
        self.assertTrue(
            'CGC_Abridged_Other Syndrome/Disease' in fields,
            "'CGC_Other Syndrome/Disease' was not present in the header")
        self.assertTrue(
            'CGC_Abridged_Mutation Type' in fields,
            "'CGC_Abridged_Mutation Type' was not present in the header")

        ctr = 1
        linesThatShouldBeAnnotated = 0
        for lineDict in tsvReader:
            self.assertTrue('gene' in lineDict.keys())
            if lineDict['gene'] in genesAvailable:
                self.assertTrue(
                    lineDict['CGC_Abridged_GeneID'] != '',
                    "'CGC_Abridged_GeneID' was missing on a row that should have been populated.  Line: "
                    + str(ctr))
                linesThatShouldBeAnnotated += 1
            ctr += 1
        self.assertTrue((linesThatShouldBeAnnotated) > 0,
                        "Bad data -- cannot test missed detects.")

示例#12

0

显示文件

    def testNumberGRenderingOfRandomVcf(self):
        inputFilename = os.path.join(*["testdata", "vcf", "number_g.random.vcf"])
        outputFilename = os.path.join("out", "number_g.random.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

示例#13

0

显示文件

    def testAnnotationWithNoSampleNameExampleVcf(self):
        """
        Tests whether parsed annotations match the actual annotations when the input is a VCF file that has no samples.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.sampleName.removed.vcf"])
        outputFilename = os.path.join("out", "example.sampleName.removed.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

示例#14

0

显示文件

    def testSwitchedFieldsWithExampleVcf(self):
        """
        Tests whether the switched tags are ignored.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.bad.switched.fields.vcf"])
        outputFilename = os.path.join("out", "example.switched.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)

示例#15

0

显示文件

    def testSimpleAnnotationWithAComplexVcf(self):
        """
        Tests the ability to parse a rather complex VCF file without any errors.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "random.vcf"])
        outputFilename = os.path.join("out", "random.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

示例#16

0

显示文件

    def testAnnotationWithDuplicateValuesInVcf(self):
        """
        Tests the ability to parse a VCF that contains an INFO, FILTER, and INFO field with the same name.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_fields.vcf"])
        outputFilename = os.path.join("out", "example.duplicate_fields2.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

示例#17

0

显示文件

    def testSimpleAnnotationWithExampleVcf(self):
        """
        Tests the ability to do a simple Gaf 3.0 annotation.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])
        outputFilename = os.path.join("out", "simpleVCF.Gaf.annotated.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.addDatasource(TestUtils.createTranscriptProviderDatasource(self.config))
        annotator.annotate()

示例#18

0

显示文件

文件： VcfInputMutationCreatorTest.py 项目： Tmacme/oncotator

    def testAnnotationWithExampleVcf(self):
        """
        Tests whether parsed annotations match the actual annotations in a simple TSV.  Missing format fields yield -->""  ".,." --> ","
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])
        outputFilename = os.path.join("out", "example.out.tsv")
        expectedOutputFilename = os.path.join(
            *["testdata", "vcf", "example.expected.out.tsv"])

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)

        current = pandas.read_csv(outputFilename,
                                  sep='\t',
                                  header=len(tsvReader.getCommentsAsList()))
        expected = pandas.read_csv(expectedOutputFilename, sep='\t')

        currentColNames = set()
        for i in range(len(current.columns)):
            currentColNames.add(current.columns[i])

        expectedColNames = set()
        for i in range(len(expected.columns)):
            expectedColNames.add(expected.columns[i])

        self.assertTrue(
            len(currentColNames.symmetric_difference(expectedColNames)) is 0,
            "Should have the same columns")
        self.assertTrue(
            len(current.index) == len(expected.index),
            "Should have the same number of rows")

        for colName in currentColNames:
            self.assertTrue(
                sum((current[colName] == expected[colName])
                    | (pandas.isnull(current[colName])
                       & pandas.isnull(expected[colName]))) == len(
                           current.index),
                "Should have the same values in column " + colName + ": \n" +
                str(current[colName]) + "\nvs\n" + str(expected[colName]))

示例#19

0

显示文件

文件： VcfInputMutationCreatorTest.py 项目： Tmacme/oncotator

    def testMissingFilter(self):
        """
        Tests that the missing FILTER fields are parsed correctly.
        """
        inputFilename = os.path.join(
            *["testdata", "vcf", "example.missing_filters.vcf"])
        outputFilename = os.path.join("out", "example.missing_filters.out.tsv")
        expectedOutputFilename = os.path.join(
            *["testdata", "vcf", "example.expected.missing_filters.out.tsv"])

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)

        current = pandas.read_csv(outputFilename,
                                  sep='\t',
                                  header=len(tsvReader.getCommentsAsList()))
        expected = pandas.read_csv(expectedOutputFilename, sep='\t')

        currentColNames = set()
        for i in range(len(current.columns)):
            currentColNames.add(current.columns[i])

        expectedColNames = set()
        for i in range(len(expected.columns)):
            expectedColNames.add(expected.columns[i])

        self.assertTrue(
            len(currentColNames.symmetric_difference(expectedColNames)) is 0,
            "Should have the same columns")
        self.assertTrue(
            len(current.index) == len(expected.index),
            "Should have the same number of rows")

        for colName in currentColNames:
            self.assertTrue(
                sum((current[colName] == expected[colName])
                    | (pandas.isnull(current[colName])
                       & pandas.isnull(expected[colName]))) == len(
                           current.index),
                "Should have the same values in column " + colName)

示例#20

0

显示文件

文件： GenericGenomicPositionDatasourceTest.py 项目： xingtech/oncotator

    def testDoubleAnnotationError(self):
        ''' Given a maf file that used to cause a duplicate annotation exception, do not throw that (or any) exception. '''
        outputFilename = 'out/genericGenomePositionDoubleAnnotationTest.out.tsv'

        gpDS = DatasourceFactory.createDatasource(
            "testdata/small_genome_position_tsv_ds/oreganno_trim.config",
            "testdata/small_genome_position_tsv_ds/")

        annotator = Annotator()
        annotator.setInputCreator(
            MafliteInputMutationCreator(
                'testdata/maflite/testDoubleAnnotate.maf.tsv'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gpDS)
        testFilename = annotator.annotate()

        # Make sure that some values were populated
        self.assertTrue(os.path.exists(testFilename))

示例#21

0

显示文件

文件： GenericGenomicPositionDatasourceTest.py 项目： xingtech/oncotator

    def testBasicAnnotation(self):
        ''' Annotate from a basic tsv of Genomic positions.  This tests both single- and multiple-nucleotide variants.  The tsv is already installed (i.e. proper config file created).
        '''
        outputFilename = 'out/genericGenomePositionTest.out.tsv'

        gpDS = DatasourceFactory.createDatasource(
            "testdata/small_genome_position_tsv_ds/oreganno_trim.config",
            "testdata/small_genome_position_tsv_ds/")

        annotator = Annotator()
        annotator.setInputCreator(
            MafliteInputMutationCreator(
                'testdata/maflite/tiny_maflite.maf.txt'))
        annotator.setOutputRenderer(SimpleOutputRenderer(outputFilename))
        annotator.addDatasource(gpDS)
        testFilename = annotator.annotate()

        # Make sure that some values were populated
        self.assertTrue(os.path.exists(testFilename))
        tsvReader = GenericTsvReader(testFilename)

        ctr = 1
        # Two overlap, one does not.  Repeat...
        for lineDict in tsvReader:
            if (ctr % 3 == 0):
                self.assertTrue(
                    lineDict["ORegAnno_hg19.oreganno.id"] == '', "Line " +
                    str(ctr) + " should have had blank value, but did not: " +
                    lineDict["ORegAnno_hg19.oreganno.id"])
            else:
                self.assertFalse(
                    lineDict["ORegAnno_hg19.oreganno.id"] == '', "Line " +
                    str(ctr) + " should not have had blank value, but did.")
                self.assertTrue(
                    lineDict["ORegAnno_hg19.oreganno.id"] == 'OREG0013034',
                    "Line " + str(ctr) + " did not have correct value: " +
                    lineDict["ORegAnno_hg19.oreganno.id"])
            ctr = ctr + 1