示例#1
0
    def test_WhenDepracatedHeadersAreUse_Return_correspondingValue(self):
        deprecatedTsvInputRow = {
            'alt_allele': '-',
            'read_depth': '11572',
            'ensembl_gene_id': '',
            'rs_id_Variant': '',
            'Platform': 'Targeted NGS',
            'ucsc_gene_id': '',
            'ref_allele': 'T',
            'chromosome': '19',
            'ncbi_gene_id': '',
            'nucleotide_change': '1225delA',
            'hgnc_symbol': 'KEAP1',
            'consequence': 'stop gain',
            'Allele_frequency': '96,47',
            'Passage': '27',
            'amino_acid_change': 'M409X',
            'Model_ID': 'LCF16',
            'datasource': 'CURIE-LC',
            'ensembl_transcript_id': '',
            'Sample_ID': 'LCF16p27:26/08/2016',
            'seq_start_position': '10491677',
            'genome_assembly': 'GRCh38',
            'sample_origin': 'xenograft'
        }
        tsvInputRow = {
            'alt_allele': '-',
            'read_depth': '11572',
            'ensembl_gene_id': '',
            'rs_id_Variant': '',
            'platform': 'Targeted NGS',
            'ucsc_gene_id': '',
            'ref_allele': 'T',
            'chromosome': '19',
            'ncbi_gene_id': '',
            'nucleotide_change': '1225delA',
            'symbol': 'KEAP1',
            'consequence': 'stop gain',
            'allele_frequency': '96,47',
            'passage': '27',
            'amino_acid_change': 'M409X',
            'model_id': 'LCF16',
            'datasource': 'CURIE-LC',
            'ensembl_transcript_id': '',
            'sample_ID': 'LCF16p27:26/08/2016',
            'seq_start_position': '10491677',
            'genome_assembly': 'GRCh38',
            'sample_origin': 'xenograft'
        }

        actualDeprecatedHeader = AnnotationMerger.getEitherFromRow(
            deprecatedTsvInputRow, "Model_ID", "model_id")
        actualUpdatedHeader = AnnotationMerger.getEitherFromRow(
            tsvInputRow, "Model_ID", "model_id")

        self.assertEquals(actualDeprecatedHeader, 'LCF16')
        self.assertEquals(actualUpdatedHeader, 'LCF16')
示例#2
0
    def test_blankRowsFilter_When_buildFinalTemplateIsCalled_Return_blankList(
            self):
        blankrow1 = []
        blankrow2 = []
        tsvInputRow = {
            'alt_allele': '-',
            'read_depth': '11572',
            'ensembl_gene_id': '',
            'rs_id_Variant': '',
            'Platform': 'Targeted NGS',
            'ucsc_gene_id': '',
            'ref_allele': 'T',
            'chromosome': '19',
            'ncbi_gene_id': '',
            'nucleotide_change': '1225delA',
            'hgnc_symbol': 'KEAP1',
            'consequence': 'stop gain',
            'Allele_frequency': '96,47',
            'Passage': '27',
            'amino_acid_change': 'M409X',
            'Model_ID': 'LCF16',
            'datasource': 'CURIE-LC',
            'ensembl_transcript_id': '',
            'Sample_ID': 'LCF16p27:26/08/2016',
            'seq_start_position': '10491677',
            'genome_assembly': 'GRCh38',
            'sample_origin': 'xenograft'
        }

        inputRows = ps.DataFrame([blankrow1, blankrow2])
        actualRow = AnnotationMerger.buildFinalTemplate(inputRows, tsvInputRow)
        self.assertEquals(len(actualRow), 0)
示例#3
0
def test_givenNonMatchingFirstNucleotideAlleles_WhenformatAlleles_ThenReturnInput(
        self):
    basicRefAllele = "A"
    basicAltAllele = "T"
    row1 = {"ref_allele": basicRefAllele, "alt_allele": basicAltAllele}

    refAllele = "ATC"
    altAllele = "CTT"
    row2 = {"alt_allele": altAllele, "ref_allele": refAllele}

    actualRef1, actualAlt1 = AnnotationMerger.formatAlleles(row1)
    actualRef2, actualAlt2 = AnnotationMerger.formatAlleles(row2)

    self.assertEquals(actualRef1, basicRefAllele)
    self.assertEquals(actualRef2, refAllele)
    self.assertEquals(actualAlt1, basicAltAllele)
    self.assertEquals(actualAlt2, altAllele)
示例#4
0
    def test_GivenTwoEmblRow_When_buildFinalTemplateIsCalled_Return_BuiltRowWithoutNCBIdata(
            self):
        emblGene = "ENSG00000068078"
        emblFeature = "ENST00000260795"
        emblGene2 = "ENSG0000000999"
        emblFeature2 = 'ENST000000111'

        emblRow = [
            "4_1804915_A/G", "4:1804915", "G", emblGene, emblFeature,
            "Transcript", "3_prime_UTR_variant", "1509", "-", "-", "-", "-",
            "rs1466726466",
            "IMPACT=MODIFIER;STRAND=1;VARIANT_CLASS=SNV;SYMBOL=FGFR3;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC "
        ]
        emblRow2 = [
            "4_1804915_A/G", "4:1804916", "G", emblGene2, emblFeature2,
            "Transcript", "missense", "1509", "-", "-", "-", "-",
            "rs1466726466",
            "IMPACT=High;STRAND=1;VARIANT_CLASS=SNV;SYMBOL=FGFR3;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC "
        ]

        colNames = [
            "#Uploaded_variation", "Location", "Allele", "Gene", "Feature",
            "Feature_type", "Consequence", "cDNA_position", "CDS_position",
            "Protein_position", "Amino_acids", "Codons", "Existing_variation",
            "Extra"
        ]
        tsvInputRow = {
            'alt_allele': '-',
            'read_depth': '11572',
            'ensembl_gene_id': '',
            'rs_id_Variant': '',
            'Platform': 'Targeted NGS',
            'ucsc_gene_id': '',
            'ref_allele': 'T',
            'chromosome': '19',
            'ncbi_gene_id': '',
            'nucleotide_change': '1225delA',
            'hgnc_symbol': 'KEAP1',
            'consequence': 'stop gain',
            'Allele_frequency': '96,47',
            'Passage': '27',
            'amino_acid_change': 'M409X',
            'Model_ID': 'LCF16',
            'datasource': 'CURIE-LC',
            'ensembl_transcript_id': '',
            'Sample_ID': 'LCF16p27:26/08/2016',
            'seq_start_position': '10491677',
            'genome_assembly': 'GRCh38',
            'sample_origin': 'xenograft'
        }

        inputRows = ps.DataFrame([emblRow, emblRow2], columns=colNames)
        actualRow = AnnotationMerger.buildFinalTemplate(inputRows, tsvInputRow)

        self.assertEquals(actualRow[20], None)
        self.assertEquals(actualRow[21], None)
        self.assertEquals(actualRow[22], emblGene)
        self.assertEquals(actualRow[23], emblFeature)
示例#5
0
def test_givenMatchingFirstNucleotideAlleles_WhenformatAlleles_ThenReturnFirstDroppedAlleles(
        self):
    refAllele = "NC"
    altAllele = "NT"
    row = {"alt_allele": altAllele, "ref_allele": refAllele}
    refAllele2 = "NATC"
    altAllele2 = "NCTT"
    row2 = {"alt_allele": altAllele2, "ref_allele": refAllele2}

    expectedRefAllele1 = "C"
    expectedAltAllele1 = "T"
    expectedRefAllele2 = "ATC"
    expectedAltAllele2 = "CTT"

    actualRef1, actualAlt1 = AnnotationMerger.formatAlleles(row)
    actualRef2, actualAlt2 = AnnotationMerger.formatAlleles(row2)

    self.assertEquals(actualRef1, expectedRefAllele1)
    self.assertEquals(actualRef2, expectedRefAllele2)
    self.assertEquals(actualAlt1, expectedAltAllele1)
    self.assertEquals(actualAlt2, expectedAltAllele2)
示例#6
0
    def test_GivenChrPosKeyWithInsertionWithMatchingFirstNucleotides_WhenChrPosKeyIsused_Then_ReturnShiftedAndHyphonatedResult(
            self):
        chromo = '1'
        seq_start = '10000'
        refAllele = 'A'
        altAllele = 'ATT'
        expectedAnnoId = "{0}_{1}_{2}_{3}".format(chromo, seq_start, refAllele,
                                                  altAllele)

        rowDic = {
            'chromosome': chromo,
            'seq_start_position': seq_start,
            'ref_allele': refAllele,
            'alt_allele': altAllele
        }
        row = ps.Series(rowDic)

        annoId = AnnotationMerger.createAnnotationKey(row)

        self.assertEqual(annoId, expectedAnnoId)
示例#7
0
def test_buildFinalAnnotation_realDroppedAnnotations_Debug(self):

    AnnotationMerger.buildFinalTemplate()
示例#8
0
    def test_Given_MultipleSNVs_WhenMatchingKeys_returnRowsWithMatchChrosomeAndRefAndAltAllele(
            self):
        expectedRef = "C"
        expectedAlt = "T"

        annoFileHeaders = [
            "#Uploaded_variation", "Location", "Allele", "Gene", "Feature",
            "Feature_type", "Consequence", "cDNA_position", "CDS_position",
            "Protein_position", "Amino_acids", "Codons", "Existing_variation",
            "Extra"
        ]

        annoFile = [
            [
                "12_25245350_C_A", "12:25245350", "A", "ENSG00000133703",
                "ENST00000311936", "Transcript", "missense_variant", "225",
                "35", "12", "G/V", "gGt/gTt",
                "rs121913529,COSM1135366,COSM1140133,COSM1140134,COSM12657,COSM49168,COSM520,COSM521,COSM522",
                "IMPACT=MODERATE;STRAND=-1;VARIANT_CLASS=SNV;SYMBOL=KRAS;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:6407;BIOTYPE=protein_coding;TSL=1;APPRIS=A1;CCDS=CCDS8702.1;ENSP=ENSP00000308495;SWISSPROT=P01116;TREMBL=A0A024RAV5;UNIPARC=UPI0000001252;SOURCE=Ensembl;GIVEN_REF=C;USED_REF=C;GENE_PHENO=1;SIFT=deleterious(0);PolyPhen=probably_damaging(0.96);EXON=2/5;DOMAINS=Low_complexity_(Seg):seg,PROSITE_profiles:PS51421,CDD:cd04138,PANTHER:PTHR24070,PANTHER:PTHR24070:SF186,Pfam:PF00071,TIGRFAM:TIGR00231,Gene3D:3.40.50.300,SMART:SM00173,SMART:SM00174,SMART:SM00175,Superfamily:SSF52540,Prints:PR00449;HGVSc=ENST00000311936.8:c.35G>T;HGVSp=ENSP00000308495.3:p.Gly12Val;gnomAD_AF=0;gnomAD_AFR_AF=0;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=0;gnomAD_FIN_AF=0;gnomAD_NFE_AF=0;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=0;MAX_AF_POPS=gnomAD_AFR,gnomAD_AMR,gnomAD_ASJ,gnomAD_EAS,gnomAD_FIN,gnomAD_NFE,gnomAD_OTH,gnomAD_SAS;CLIN_SIG=pathogenic,pathogenic/likely_pathogenic,likely_pathogenic;SOMATIC=0,1,1,1,1,1,1,1,1;PHENO=1,1,1,1,1,1,1,1,1;PUBMED=25157968,19029981,22499344,17332249,21079152,2278970,3122217,12460918,16434492,19075190,22407852,17384584,18794081,19047918,19255327,19773371,23406027,22683711,19018267,21975775,17704260,15696205,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,19794967,21398618,23182985,7773929,8439212,15842656,17910045,19358724,19881948,20609353,20805368,20949522,21169357,22025163,22235099,22282465,22897852,23014527,25044103,26372703,27872090,29525983,30463544"
            ],
            [
                "12_25245350_C/_", "12:25245350", "A", "ENSG00000133703",
                "ENST00000556131", "Transcript", "missense_variant", "212",
                "35", "12", "G/V", "gGt/gTt",
                "rs121913529,COSM1135366,COSM1140133,COSM1140134,COSM12657,COSM49168,COSM520,COSM521,COSM522",
                "IMPACT=MODERATE;STRAND=-1;VARIANT_CLASS=SNV;SYMBOL=KRAS;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:6407;BIOTYPE=protein_coding;TSL=1;ENSP=ENSP00000451856;TREMBL=G3V4K2;UNIPARC=UPI00001FB7B5;SOURCE=Ensembl;GIVEN_REF=C;USED_REF=C;GENE_PHENO=1;SIFT=deleterious(0);PolyPhen=possibly_damaging(0.884);EXON=2/3;DOMAINS=Low_complexity_(Seg):seg,PROSITE_profiles:PS51421,PANTHER:PTHR24070,PANTHER:PTHR24070:SF397,Pfam:PF00071,Gene3D:3.40.50.300,Superfamily:SSF52540,Prints:PR00449;HGVSc=ENST00000556131.1:c.35G>T;HGVSp=ENSP00000451856.1:p.Gly12Val;gnomAD_AF=0;gnomAD_AFR_AF=0;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=0;gnomAD_FIN_AF=0;gnomAD_NFE_AF=0;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=0;MAX_AF_POPS=gnomAD_AFR,gnomAD_AMR,gnomAD_ASJ,gnomAD_EAS,gnomAD_FIN,gnomAD_NFE,gnomAD_OTH,gnomAD_SAS;CLIN_SIG=pathogenic,pathogenic/likely_pathogenic,likely_pathogenic;SOMATIC=0,1,1,1,1,1,1,1,1;PHENO=1,1,1,1,1,1,1,1,1;PUBMED=25157968,19029981,22499344,17332249,21079152,2278970,3122217,12460918,16434492,19075190,22407852,17384584,18794081,19047918,19255327,19773371,23406027,22683711,19018267,21975775,17704260,15696205,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,19794967,21398618,23182985,7773929,8439212,15842656,17910045,19358724,19881948,20609353,20805368,20949522,21169357,22025163,22235099,22282465,22897852,23014527,25044103,26372703,27872090,29525983,30463544"
            ],
            [
                "12_25245350_C_A", "12:25245350", "A", "ENSG00000133703",
                "ENST00000557334", "Transcript", "missense_variant", "232",
                "35", "12", "G/V", "gGt/gTt",
                "rs121913529,COSM1135366,COSM1140133,COSM1140134,COSM12657,COSM49168,COSM520,COSM521,COSM522",
                "IMPACT=MODERATE;STRAND=-1;VARIANT_CLASS=SNV;SYMBOL=KRAS;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:6407;BIOTYPE=protein_coding;TSL=5;ENSP=ENSP00000452512;TREMBL=G3V5T7;UNIPARC=UPI00021CF477;SOURCE=Ensembl;GIVEN_REF=C;USED_REF=C;GENE_PHENO=1;SIFT=deleterious_low_confidence(0);PolyPhen=probably_damaging(0.979);EXON=2/3;DOMAINS=Gene3D:3.40.50.300,Pfam:PF00071,Prints:PR00449,PROSITE_profiles:PS51421,PANTHER:PTHR24070,PANTHER:PTHR24070:SF186,SMART:SM00173,Superfamily:SSF52540,Low_complexity_(Seg):seg;HGVSc=ENST00000557334.5:c.35G>T;HGVSp=ENSP00000452512.1:p.Gly12Val;gnomAD_AF=0;gnomAD_AFR_AF=0;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=0;gnomAD_FIN_AF=0;gnomAD_NFE_AF=0;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=0;MAX_AF_POPS=gnomAD_AFR,gnomAD_AMR,gnomAD_ASJ,gnomAD_EAS,gnomAD_FIN,gnomAD_NFE,gnomAD_OTH,gnomAD_SAS;CLIN_SIG=pathogenic,pathogenic/likely_pathogenic,likely_pathogenic;SOMATIC=0,1,1,1,1,1,1,1,1;PHENO=1,1,1,1,1,1,1,1,1;PUBMED=25157968,19029981,22499344,17332249,21079152,2278970,3122217,12460918,16434492,19075190,22407852,17384584,18794081,19047918,19255327,19773371,23406027,22683711,19018267,21975775,17704260,15696205,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,19794967,21398618,23182985,7773929,8439212,15842656,17910045,19358724,19881948,20609353,20805368,20949522,21169357,22025163,22235099,22282465,22897852,23014527,25044103,26372703,27872090,29525983,30463544"
            ],
            [
                "12_25245350_C_T", "12:25245350", "T", "ENSG00000133703",
                "ENST00000311936", "Transcript", "missense_variant", "225",
                "35", "12", "G/D", "gGt/gAt",
                "rs121913529,COSM1135366,COSM1140133,COSM1140134,COSM12657,COSM49168,COSM520,COSM521,COSM522",
                "IMPACT=MODERATE;STRAND=-1;VARIANT_CLASS=SNV;SYMBOL=KRAS;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:6407;BIOTYPE=protein_coding;TSL=1;APPRIS=A1;CCDS=CCDS8702.1;ENSP=ENSP00000308495;SWISSPROT=P01116;TREMBL=A0A024RAV5;UNIPARC=UPI0000001252;SOURCE=Ensembl;GIVEN_REF=C;USED_REF=C;GENE_PHENO=1;SIFT=deleterious(0);PolyPhen=benign(0.21);EXON=2/5;DOMAINS=Low_complexity_(Seg):seg,PROSITE_profiles:PS51421,CDD:cd04138,PANTHER:PTHR24070,PANTHER:PTHR24070:SF186,Pfam:PF00071,TIGRFAM:TIGR00231,Gene3D:3.40.50.300,SMART:SM00173,SMART:SM00174,SMART:SM00175,Superfamily:SSF52540,Prints:PR00449;HGVSc=ENST00000311936.8:c.35G>A;HGVSp=ENSP00000308495.3:p.Gly12Asp;gnomAD_AF=4.011e-06;gnomAD_AFR_AF=0;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=0;gnomAD_FIN_AF=0;gnomAD_NFE_AF=8.883e-06;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=8.883e-06;MAX_AF_POPS=gnomAD_NFE;CLIN_SIG=pathogenic,likely_pathogenic;SOMATIC=0,1,1,1,1,1,1,1,1;PHENO=1,1,1,1,1,1,1,1,1;PUBMED=25157968,19029981,22499344,17332249,21079152,2278970,3122217,12460918,16434492,19075190,22407852,17384584,18794081,19047918,19255327,19773371,23406027,22683711,19018267,21975775,17704260,15696205,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,19794967,21398618,23182985,7773929,8439212,15842656,17910045,19358724,19881948,20609353,20805368,20949522,21169357,22025163,22235099,22282465,22897852,23014527,25044103,26372703,27872090,29525983,30463544"
            ],
            [
                "12_25245350_C_T", "12:25245350", "T", "ENSG00000133703",
                "ENST00000556131", "Transcript", "missense_variant", "212",
                "35", "12", "G/D", "gGt/gAt",
                "rs121913529,COSM1135366,COSM1140133,COSM1140134,COSM12657,COSM49168,COSM520,COSM521,COSM522",
                "IMPACT=MODERATE;STRAND=-1;VARIANT_CLASS=SNV;SYMBOL=KRAS;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:6407;BIOTYPE=protein_coding;TSL=1;ENSP=ENSP00000451856;TREMBL=G3V4K2;UNIPARC=UPI00001FB7B5;SOURCE=Ensembl;GIVEN_REF=C;USED_REF=C;GENE_PHENO=1;SIFT=deleterious(0);PolyPhen=benign(0.039);EXON=2/3;DOMAINS=Low_complexity_(Seg):seg,PROSITE_profiles:PS51421,PANTHER:PTHR24070,PANTHER:PTHR24070:SF397,Pfam:PF00071,Gene3D:3.40.50.300,Superfamily:SSF52540,Prints:PR00449;HGVSc=ENST00000556131.1:c.35G>A;HGVSp=ENSP00000451856.1:p.Gly12Asp;gnomAD_AF=4.011e-06;gnomAD_AFR_AF=0;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=0;gnomAD_FIN_AF=0;gnomAD_NFE_AF=8.883e-06;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=8.883e-06;MAX_AF_POPS=gnomAD_NFE;CLIN_SIG=pathogenic,likely_pathogenic;SOMATIC=0,1,1,1,1,1,1,1,1;PHENO=1,1,1,1,1,1,1,1,1;PUBMED=25157968,19029981,22499344,17332249,21079152,2278970,3122217,12460918,16434492,19075190,22407852,17384584,18794081,19047918,19255327,19773371,23406027,22683711,19018267,21975775,17704260,15696205,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,19794967,21398618,23182985,7773929,8439212,15842656,17910045,19358724,19881948,20609353,20805368,20949522,21169357,22025163,22235099,22282465,22897852,23014527,25044103,26372703,27872090,29525983,30463544"
            ],
            [
                "12_25245350_C_T", "12:25245350", "T", "ENSG00000133703",
                "ENST00000557334", "Transcript", "missense_variant", "232",
                "35", "12", "G/D", "gGt/gAt",
                "rs121913529,COSM1135366,COSM1140133,COSM1140134,COSM12657,COSM49168,COSM520,COSM521,COSM522",
                "IMPACT=MODERATE;STRAND=-1;VARIANT_CLASS=SNV;SYMBOL=KRAS;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:6407;BIOTYPE=protein_coding;TSL=5;ENSP=ENSP00000452512;TREMBL=G3V5T7;UNIPARC=UPI00021CF477;SOURCE=Ensembl;GIVEN_REF=C;USED_REF=C;GENE_PHENO=1;SIFT=deleterious_low_confidence(0);PolyPhen=probably_damaging(0.969);EXON=2/3;DOMAINS=Gene3D:3.40.50.300,Pfam:PF00071,Prints:PR00449,PROSITE_profiles:PS51421,PANTHER:PTHR24070,PANTHER:PTHR24070:SF186,SMART:SM00173,Superfamily:SSF52540,Low_complexity_(Seg):seg;HGVSc=ENST00000557334.5:c.35G>A;HGVSp=ENSP00000452512.1:p.Gly12Asp;gnomAD_AF=4.011e-06;gnomAD_AFR_AF=0;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=0;gnomAD_FIN_AF=0;gnomAD_NFE_AF=8.883e-06;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=8.883e-06;MAX_AF_POPS=gnomAD_NFE;CLIN_SIG=pathogenic,likely_pathogenic;SOMATIC=0,1,1,1,1,1,1,1,1;PHENO=1,1,1,1,1,1,1,1,1;PUBMED=25157968,19029981,22499344,17332249,21079152,2278970,3122217,12460918,16434492,19075190,22407852,17384584,18794081,19047918,19255327,19773371,23406027,22683711,19018267,21975775,17704260,15696205,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,19794967,21398618,23182985,7773929,8439212,15842656,17910045,19358724,19881948,20609353,20805368,20949522,21169357,22025163,22235099,22282465,22897852,23014527,25044103,26372703,27872090,29525983,30463544"
            ],
            [
                "12_25245350_C_G", "12:25245350", "G", "ENSG00000133703",
                "ENST00000311936", "Transcript", "missense_variant", "225",
                "35", "12", "G/A", "gGt/gCt",
                "rs121913529,COSM1135366,COSM1140133,COSM1140134,COSM12657,COSM49168,COSM520,COSM521,COSM522",
                "IMPACT=MODERATE;STRAND=-1;VARIANT_CLASS=SNV;SYMBOL=KRAS;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:6407;BIOTYPE=protein_coding;TSL=1;APPRIS=A1;CCDS=CCDS8702.1;ENSP=ENSP00000308495;SWISSPROT=P01116;TREMBL=A0A024RAV5;UNIPARC=UPI0000001252;SOURCE=Ensembl;GIVEN_REF=C;USED_REF=C;GENE_PHENO=1;SIFT=deleterious(0.02);PolyPhen=possibly_damaging(0.597);EXON=2/5;DOMAINS=Low_complexity_(Seg):seg,PROSITE_profiles:PS51421,CDD:cd04138,PANTHER:PTHR24070,PANTHER:PTHR24070:SF186,Pfam:PF00071,TIGRFAM:TIGR00231,Gene3D:3.40.50.300,SMART:SM00173,SMART:SM00174,SMART:SM00175,Superfamily:SSF52540,Prints:PR00449;HGVSc=ENST00000311936.8:c.35G>C;HGVSp=ENSP00000308495.3:p.Gly12Ala;gnomAD_AF=0;gnomAD_AFR_AF=0;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=0;gnomAD_FIN_AF=0;gnomAD_NFE_AF=0;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=0;MAX_AF_POPS=gnomAD_AFR,gnomAD_AMR,gnomAD_ASJ,gnomAD_EAS,gnomAD_FIN,gnomAD_NFE,gnomAD_OTH,gnomAD_SAS;CLIN_SIG=not_provided,likely_pathogenic,pathogenic;SOMATIC=0,1,1,1,1,1,1,1,1;PHENO=1,1,1,1,1,1,1,1,1;PUBMED=25157968,19029981,22499344,17332249,21079152,2278970,3122217,12460918,16434492,19075190,22407852,17384584,18794081,19047918,19255327,19773371,23406027,22683711,19018267,21975775,17704260,15696205,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,19794967,21398618,23182985,7773929,8439212,15842656,17910045,19358724,19881948,20609353,20805368,20949522,21169357,22025163,22235099,22282465,22897852,23014527,25044103,26372703,27872090,29525983,30463544"
            ],
            [
                "12_25245350_C_G", "12:25245350", "G", "ENSG00000133703",
                "ENST00000556131", "Transcript", "missense_variant", "212",
                "35", "12", "G/A", "gGt/gCt",
                "rs121913529,COSM1135366,COSM1140133,COSM1140134,COSM12657,COSM49168,COSM520,COSM521,COSM522",
                "IMPACT=MODERATE;STRAND=-1;VARIANT_CLASS=SNV;SYMBOL=KRAS;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:6407;BIOTYPE=protein_coding;TSL=1;ENSP=ENSP00000451856;TREMBL=G3V4K2;UNIPARC=UPI00001FB7B5;SOURCE=Ensembl;GIVEN_REF=C;USED_REF=C;GENE_PHENO=1;SIFT=deleterious(0.04);PolyPhen=possibly_damaging(0.614);EXON=2/3;DOMAINS=Low_complexity_(Seg):seg,PROSITE_profiles:PS51421,PANTHER:PTHR24070,PANTHER:PTHR24070:SF397,Pfam:PF00071,Gene3D:3.40.50.300,Superfamily:SSF52540,Prints:PR00449;HGVSc=ENST00000556131.1:c.35G>C;HGVSp=ENSP00000451856.1:p.Gly12Ala;gnomAD_AF=0;gnomAD_AFR_AF=0;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=0;gnomAD_FIN_AF=0;gnomAD_NFE_AF=0;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=0;MAX_AF_POPS=gnomAD_AFR,gnomAD_AMR,gnomAD_ASJ,gnomAD_EAS,gnomAD_FIN,gnomAD_NFE,gnomAD_OTH,gnomAD_SAS;CLIN_SIG=not_provided,likely_pathogenic,pathogenic;SOMATIC=0,1,1,1,1,1,1,1,1;PHENO=1,1,1,1,1,1,1,1,1;PUBMED=25157968,19029981,22499344,17332249,21079152,2278970,3122217,12460918,16434492,19075190,22407852,17384584,18794081,19047918,19255327,19773371,23406027,22683711,19018267,21975775,17704260,15696205,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,19794967,21398618,23182985,7773929,8439212,15842656,17910045,19358724,19881948,20609353,20805368,20949522,21169357,22025163,22235099,22282465,22897852,23014527,25044103,26372703,27872090,29525983,30463544"
            ],
            [
                "12_25245350_C_G", "12:25245350", "G", "ENSG00000133703",
                "ENST00000557334", "Transcript", "missense_variant", "232",
                "35", "12", "G/A", "gGt/gCt",
                "rs121913529,COSM1135366,COSM1140133,COSM1140134,COSM12657,COSM49168,COSM520,COSM521,COSM522",
                "IMPACT=MODERATE;STRAND=-1;VARIANT_CLASS=SNV;SYMBOL=KRAS;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC:6407;BIOTYPE=protein_coding;TSL=5;ENSP=ENSP00000452512;TREMBL=G3V5T7;UNIPARC=UPI00021CF477;SOURCE=Ensembl;GIVEN_REF=C;USED_REF=C;GENE_PHENO=1;SIFT=tolerated_low_confidence(0.13);PolyPhen=possibly_damaging(0.899);EXON=2/3;DOMAINS=Gene3D:3.40.50.300,Pfam:PF00071,Prints:PR00449,PROSITE_profiles:PS51421,PANTHER:PTHR24070,PANTHER:PTHR24070:SF186,SMART:SM00173,Superfamily:SSF52540,Low_complexity_(Seg):seg;HGVSc=ENST00000557334.5:c.35G>C;HGVSp=ENSP00000452512.1:p.Gly12Ala;gnomAD_AF=0;gnomAD_AFR_AF=0;gnomAD_AMR_AF=0;gnomAD_ASJ_AF=0;gnomAD_EAS_AF=0;gnomAD_FIN_AF=0;gnomAD_NFE_AF=0;gnomAD_OTH_AF=0;gnomAD_SAS_AF=0;MAX_AF=0;MAX_AF_POPS=gnomAD_AFR,gnomAD_AMR,gnomAD_ASJ,gnomAD_EAS,gnomAD_FIN,gnomAD_NFE,gnomAD_OTH,gnomAD_SAS;CLIN_SIG=not_provided,likely_pathogenic,pathogenic;SOMATIC=0,1,1,1,1,1,1,1,1;PHENO=1,1,1,1,1,1,1,1,1;PUBMED=25157968,19029981,22499344,17332249,21079152,2278970,3122217,12460918,16434492,19075190,22407852,17384584,18794081,19047918,19255327,19773371,23406027,22683711,19018267,21975775,17704260,15696205,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,19794967,21398618,23182985,7773929,8439212,15842656,17910045,19358724,19881948,20609353,20805368,20949522,21169357,22025163,22235099,22282465,22897852,23014527,25044103,26372703,27872090,29525983,30463544"
            ]
        ]

        headers = [
            "datasource", "Model_ID", "Sample_ID", "sample_origin", "Passage",
            "host_strain_name", "hgnc_symbol", "amino_acid_change",
            "nucleotide_change", "consequence", "read_depth",
            "Allele_frequency", "chromosome", "seq_start_position",
            "ref_allele", "alt_allele", "ucsc_gene_id", "ncbi_gene_id",
            "ensembl_gene_id", "ensembl_transcript_id", "rs_id_Variant",
            "genome_assembly", "Platform"
        ]
        data = [
            "IRCC-CRC", "CRC1082PR", "CRC1082PRX0A01001TUMD05000", "xenograft",
            "1", "NOD.Cg-Prkdc<sup>scid</sup> Il2rg<sup>tm1Wjl</sup>/SzJ",
            "KRAS", "G12D", "", "missense", "", "0.96", "12", "25245350",
            expectedRef, expectedAlt, "", "", "", "", "rs121913529", "GRCh38",
            "TargetedNGS_MUT"
        ]

        testAnnotations = ps.DataFrame(annoFile, columns=annoFileHeaders)
        row = ps.DataFrame([data], columns=headers).to_dict("index").pop(0)

        actualMatches = AnnotationMerger.returnMatchingRows(
            row, testAnnotations)
        actualAllele = actualMatches['Allele'].unique()
        self.assertEquals(len(actualAllele), 1)
示例#9
0
    def test_GivenTwoMatchingColumns_WhenRowBuilderIsCalled_Then_ReturnCorrectlyBuiltRow(
            self):
        emblGene = "ENSG00000068078"
        emblFeature = "ENST00000260795"
        ncbiGene = "9817"
        ncbiFeature = 'NM_203500.2'

        emblRow = ["4_1804915_A/G", "4:1804915", "G", emblGene, emblFeature, "Transcript",
                   "3_prime_UTR_variant", \
                   "1509", "-", "-", "-", "-", "rs1466726466", \
                   "IMPACT=MODIFIER;STRAND=1;VARIANT_CLASS=SNV;SYMBOL=FGFR3;SYMBOL_SOURCE=HGNC;HGNC_ID=HGNC "]
        ncbiRow = {
            'Feature_type': 'Transcript',
            'Extra': 'IMPACT=HIGH;STRAND=-1;VARIANT_'
            'CLASS=deletion;SYMBOL=KEAP1;SYMBOL_SOURCE=EntrezGene;HGNC_ID=HGNC:23177;BIOTYPE=protein_coding;CANONICAL=YES;ENSP=NP_987096.1;SOURCE=RefSeq;GIVEN_REF=T;USED_REF=T;EXON=3/6;HGVSc=NM_203500.2:c.1225del;HGVSp=NP_987096.1:p.Met409Ter',
            '#Uploaded_variation': '19_10491677_T/-',
            'cDNA_position': '1388',
            'Feature': ncbiFeature,
            'Codons': 'Atg/tg',
            'Existing_variation': '-',
            'Location': '19:10491677',
            'CDS_position': '1225',
            'Protein_position': '409',
            'Consequence': 'frameshift_variant',
            'Allele': '-',
            'Gene': ncbiGene,
            'Amino_acids': 'M/X'
        }
        colNames = [
            "#Uploaded_variation", "Location", "Allele", "Gene", "Feature",
            "Feature_type", "Consequence", "cDNA_position", "CDS_position",
            "Protein_position", "Amino_acids", "Codons", "Existing_variation",
            "Extra"
        ]
        tsvInputRow = {
            'alt_allele': '-',
            'read_depth': '11572',
            'ensembl_gene_id': '',
            'rs_id_Variant': '',
            'Platform': 'Targeted NGS',
            'ucsc_gene_id': '',
            'ref_allele': 'T',
            'chromosome': '19',
            'ncbi_gene_id': '',
            'nucleotide_change': '1225delA',
            'hgnc_symbol': 'KEAP1',
            'consequence': 'stop gain',
            'Allele_frequency': '96,47',
            'Passage': '27',
            'amino_acid_change': 'M409X',
            'Model_ID': 'LCF16',
            'datasource': 'CURIE-LC',
            'ensembl_transcript_id': '',
            'Sample_ID': 'LCF16p27:26/08/2016',
            'seq_start_position': '10491677',
            'genome_assembly': 'GRCh38',
            'sample_origin': 'xenograft'
        }

        EMBLdf = ps.DataFrame([emblRow], columns=colNames)
        NCBIdf = ps.DataFrame([ncbiRow], columns=colNames)
        inputRows = ps.concat([EMBLdf, NCBIdf])

        actualRow = AnnotationMerger.buildFinalTemplate(inputRows, tsvInputRow)

        self.assertEquals(actualRow[20], ncbiGene)
        self.assertEquals(actualRow[21], ncbiFeature)
        self.assertEquals(actualRow[22], emblGene)
        self.assertEquals(actualRow[23], emblFeature)