示例#1
0
def gatherSignificantSites():
    import scipy.stats
    import CPTAC.Endometrial as en

    acetyl = en.get_acetylproteomics()
    clinical_attribute = "Proteomics_Tumor_Normal"
    acetyle_clinical_df = en.compare_clinical(acetyl, clinical_attribute)

    acetyle_clinical_df.loc[acetyle_clinical_df.Proteomics_Tumor_Normal ==
                            'Adjacent_normal',
                            'Proteomics_Tumor_Normal'] = 'Non_Tumor'
    acetyle_clinical_df.loc[acetyle_clinical_df.Proteomics_Tumor_Normal ==
                            'Enriched_normal',
                            'Proteomics_Tumor_Normal'] = 'Non_Tumor'
    acetyle_clinical_df.replace('Myometrium_normal', np.nan, inplace=True)
    tumor = acetyle_clinical_df.loc[acetyle_clinical_df[clinical_attribute] ==
                                    "Tumor"]
    non_tumor = acetyle_clinical_df.loc[acetyle_clinical_df[clinical_attribute]
                                        == "Non_Tumor"]

    threshold = .05 / len(acetyle_clinical_df.columns)
    P_VALUE_INDEX = 1
    print("Threshold:", threshold)
    significantSites = []
    for num in range(1, len(acetyle_clinical_df.columns)):
        site = acetyle_clinical_df.columns[num]
        ttestRes = scipy.stats.ttest_ind(tumor[site], non_tumor[site])
        if (ttestRes[P_VALUE_INDEX] < threshold):
            significantSites.append(site)
    return significantSites
示例#2
0
 def evaluate(self, data, trait):
     data_trait = en.compare_clinical(data, trait)
     threshold = .05 / len(data.columns)
     tscutoff = .5
     significantTests = []
     significantGenes = []
     for num in range(1,len(data_trait.columns)):
         gene = data_trait.columns[num]
         oneGene = data_trait[[trait, gene]]
         oneGene = oneGene.dropna(axis=0)
         spearmanrTest = stats.spearmanr(oneGene[trait], oneGene[gene])
         if (abs(spearmanrTest[0]) >= tscutoff) and (spearmanrTest[1] <= threshold):
             significantTests.append(spearmanrTest)
             significantGenes.append(gene)
     if len(significantGenes) > 0:
         return "PASS"
     else:
         return "FAIL"
示例#3
0
 def evaluate_utilities(self): #compare_**** functions
     print("Evaluating utilities...")
     results = []
     functions = {}
     results.append(en.compare_gene(en.get_proteomics(), en.get_transcriptomics(), "A1BG")); functions[len(results)] = "compare_gene"
     results.append(en.compare_gene(en.get_proteomics(), en.get_transcriptomics(), ["A1BG","RPL11"])); functions[len(results)] = "compare_genes"
     results.append(en.compare_clinical(en.get_proteomics(), "BMI")); functions[len(results)] = "compare_clinical"
     results.append(en.compare_mutations(en.get_proteomics(),"TP53")); functions[len(results)] = "compare_mutations(Proteomics)"
     results.append(en.compare_mutations(en.get_proteomics(),"TP53","AURKA")); functions[len(results)] = "compare_mutations(Proteomics with Somatic)"
     results.append(en.compare_mutations(en.get_phosphoproteomics(), "IRS2")); functions[len(results)] = "compare_mutations(Phosphoproteomics)"
     results.append(en.compare_mutations(en.get_phosphoproteomics(), "IRS2","PIK3CA")); functions[len(results)] = "compare_mutations(Phosphoproteomics with Somatic)"
     results.append(en.compare_mutations_full(en.get_proteomics(),"TP53")); functions[len(results)] = "compare_mutations_full(Proteomics)"
     results.append(en.compare_mutations_full(en.get_proteomics(),"TP53","AURKA")); functions[len(results)] = "compare_mutations_full(Proteomics with Somatic)"
     results.append(en.compare_mutations_full(en.get_phosphoproteomics(), "IRS2")); functions[len(results)] = "compare_mutations_full(Phosphoproteomics)"
     results.append(en.compare_mutations_full(en.get_phosphoproteomics(), "IRS2","PIK3CA")); functions[len(results)] = "compare_mutations_full(Phosphoproteomics with Somatic)"
     results.append(en.compare_phosphosites("TP53")); functions[len(results)] = "compare_phosphosites"
     PASS = True
     for x in range(0,len(results)):
         if results[x] is None:
             print("Error with",functions[x+1],"function")
             PASS = False
     if PASS:
         print("PASS")
     else:
         print("FAIL")
示例#4
0
 def evaluate_special_getters(self):
     print("Evaluating special getters...")
     results = []
     functions = {}
     results.append(en.get_clinical_cols()); functions[len(results)] = "clinical_cols"
     results.append(en.get_cohort_clinical(["Diabetes","BMI"])); functions[len(results)] = "cohort_meta"
     results.append(en.get_proteomics_quant(["S018","S100"])); functions[len(results)] = "proteomics_quant"
     results.append(en.get_proteomics_cols()); functions[len(results)] = "proteomics_cols"
     results.append(en.get_transcriptomics_cols()); functions[len(results)] = "transcriptomics_cols"
     results.append(en.get_cohort_proteomics(["A1BG","TP53"])); functions[len(results)] = "cohort_proteomics"
     results.append(en.get_cohort_transcriptomics(["A1BG","TP53"])); functions[len(results)] = "cohort_transcriptomics"
     results.append(en.get_cohort_cna(["SASS6","TTTY22"])); functions[len(results)] = "cohort_cna"
     results.append(en.get_cohort_phosphoproteomics(["TP53-S315","AAAS-S541"])); functions[len(results)] = "cohort_phosphoproteomics"
     results.append(en.get_patient_mutations("C3L-00157")); functions[len(results)] = "patient_mutations(Patient_Id)"
     results.append(en.get_patient_mutations("S013")); functions[len(results)] = "patient_mutations(Clinical_Patient_Key)"
     results.append(en.get_phosphosites("TP53")); functions[len(results)] = "phosphosites"
     PASS = True
     for x in range(0,len(results)):
         if results[x] is None:
             print("Error with get",functions[x+1], "function")
             PASS = False
     if PASS:
         print("PASS")
     else:
         print("FAIL")
示例#5
0
 def evaluate_getters(self):
     print("Evaluating getters...")
     dataframes = []
     file_names = {}
     dataframes.append(en.get_clinical()); file_names[len(dataframes)] = "clinical"
     dataframes.append(en.get_proteomics()); file_names[len(dataframes)] = "proteomics"
     dataframes.append(en.get_phosphoproteomics()); file_names[len(dataframes)] = "site phosphoproteomics"
     dataframes.append(en.get_phosphoproteomics(gene_level = True)); file_names[len(dataframes)] = "gene phosphoproteomics"
     dataframes.append(en.get_transcriptomics()); file_names[len(dataframes)] = "linear transcriptomics"
     dataframes.append(en.get_transcriptomics(data_type="circular")); file_names[len(dataframes)] = "circular transcriptomics"
     dataframes.append(en.get_transcriptomics(data_type="miRNA")); file_names[len(dataframes)] = "miRNA"
     dataframes.append(en.get_CNA()); file_names[len(dataframes)] = "CNA"
     dataframes.append(en.get_somatic()); file_names[len(dataframes)] = "parsed somatic maf"
     dataframes.append(en.get_somatic(binary=True)); file_names[len(dataframes)] = "binary somatic"
     dataframes.append(en.get_somatic(unparsed=True)); file_names[len(dataframes)] = "unparsed somatic maf"
     PASS = True
     for x in range(0,len(dataframes)):
         if dataframes[x] is None:
             print("Error reading", file_names[x+1], "data")
             PASS = False
     if PASS:
         print("PASS")
     else:
         print("FAIL")
示例#6
0
    def evaluate_getters_v2(self):
        # ***UNDER CONSTRUCTION***

        print("Evaluating getters v2...")
        tester = Basic()
        PASS = True

        # Test get_clinical() with the default parameter excluded=False
        if not tester.evaluate_getter(
            "Clinical",
            en.get_clinical(),
            (144, 171),
            ['Proteomics_Participant_ID', 'Case_excluded', 'Proteomics_TMT_batch', 'Proteomics_TMT_plex', 'Proteomics_TMT_channel', 'Proteomics_Parent_Sample_IDs', 'Proteomics_Aliquot_ID', 'Proteomics_Tumor_Normal', 'Proteomics_OCT', 'Country', 'RNAseq_R1_sample_type', 'RNAseq_R1_filename', 'RNAseq_R1_UUID', 'RNAseq_R2_sample_type', 'RNAseq_R2_filename', 'RNAseq_R2_UUID', 'miRNAseq_sample_type', 'miRNAseq_UUID', 'Methylation_available', 'Methylation_quality'],
            ((79, 81), (15, 146), (88, 12)),
            (-1.03, 8.888888889, 'Serous')
        ):
            PASS = False

        # Test get_clinical(excluded=True)
        if not tester.evaluate_getter(
            "Clinical (with excluded cases)",
            en.get_clinical(excluded=True),
            (153, 171),
            ['Proteomics_Participant_ID', 'Case_excluded', 'Proteomics_TMT_batch', 'Proteomics_TMT_plex', 'Proteomics_TMT_channel', 'Proteomics_Parent_Sample_IDs', 'Proteomics_Aliquot_ID', 'Proteomics_Tumor_Normal', 'Proteomics_OCT', 'Country', 'RNAseq_R1_sample_type', 'RNAseq_R1_filename', 'RNAseq_R1_UUID', 'RNAseq_R2_sample_type', 'RNAseq_R2_filename', 'RNAseq_R2_UUID', 'miRNAseq_sample_type', 'miRNAseq_UUID', 'Methylation_available', 'Methylation_quality'],
            ((23, 44), (151, 6), (32, 165)),
            (0.004118258, 'CPT0230400002,CPT0230400003,CPT0230400004,CPT0230410002,CPT0230410003,CPT0230410004,CPT0230420002,CPT0230420003,CPT0230420004', '171011_UNC31-K00269_0086_AHLJLCBBXX_CTTGTA_S7_L003_R2_001.fastq.gz')
        ):
            PASS = False

        # Test get_proteomics()
        if not  tester.evaluate_getter(
            "Proteomics",
            en.get_proteomics(),
            (153, 10999),
            ['A1BG', 'A2M', 'A2ML1', 'A4GALT', 'AAAS', 'AACS', 'AADAT', 'AAED1', 'AAGAB', 'AAK1', 'ZSWIM8', 'ZSWIM9', 'ZW10', 'ZWILCH', 'ZWINT', 'ZXDC', 'ZYG11B', 'ZYX', 'ZZEF1', 'ZZZ3'],
            ((34, 6003), (99, 9544), (152, 32)),
            (-0.8170000000000001, -1.28, 0.904)
        ):
            PASS = False

        # Test get_transcriptomics() with default parameter data_type="linear"
        if not tester.evaluate_getter(
            "Transcriptomics (linear)",
            en.get_transcriptomics(),
            (115, 28057),
            ['A1BG', 'A1BG-AS1', 'A1CF', 'A2M', 'A2M-AS1', 'A2ML1', 'A2MP1', 'A3GALT2', 'A4GALT', 'A4GNT', 'ZWILCH', 'ZWINT', 'ZXDA', 'ZXDB', 'ZXDC', 'ZYG11A', 'ZYG11B', 'ZYX', 'ZZEF1', 'ZZZ3'],
            ((22, 25483), (110, 23), (101, 17748)),
            (0.89, 11.83, 7.02)
        ):
            PASS = False

        # Test get_transcriptomics(data_type="circular")
        if not tester.evaluate_getter(
            "Transcriptomics (circular)",
            en.get_transcriptomics(data_type="circular"),
            (115, 4945),
            ['circ_chr10_100260218_100262063_CWF19L1', 'circ_chr10_100923975_100926019_SLF2', 'circ_chr10_100923978_100926019_SLF2', 'circ_chr10_100937402_100944128_SLF2', 'circ_chr10_100937402_100950753_SLF2', 'circ_chr10_101584602_101586156_POLL', 'circ_chr10_101667886_101676436_FBXW4', 'circ_chr10_101672915_101676436_FBXW4', 'circ_chr10_101792839_101807901_OGA', 'circ_chr10_101792839_101810314_OGA', 'circ_chrX_80288906_80310233_CHMP1B2P', 'circ_chrX_80289664_80310233_CHMP1B2P', 'circ_chrX_80707427_80719656_BRWD3', 'circ_chrX_80791854_80793772_BRWD3', 'circ_chrX_84096194_84164387_RPS6KA6', 'circ_chrX_84134782_84164387_RPS6KA6', 'circ_chrX_85067127_85074391_APOOL', 'circ_chrX_85978767_85981809_CHM', 'circ_chrX_91414904_91418871_PABPC5-AS1', 'circ_chrX_9691579_9693419_TBL1X'],
            ((110, 1), (34, 4935), (73, 2003)),
            (8.85, 6.48, 0.0)
        ):
            PASS = False

        # Test get_transcriptomics(data_type="miRNA")
        if not tester.evaluate_getter(
            "Transcriptomics (miRNA)",
            en.get_transcriptomics(data_type="miRNA"),
            (104, 2337),
            ['hsa-let-7a-2-3p', 'hsa-let-7a-3p', 'hsa-let-7a-5p', 'hsa-let-7b-3p', 'hsa-let-7b-5p', 'hsa-let-7c-3p', 'hsa-let-7c-5p', 'hsa-let-7d-3p', 'hsa-let-7d-5p', 'hsa-let-7e-3p', 'hsa-miR-9901', 'hsa-miR-9902', 'hsa-miR-9903', 'hsa-miR-9983-3p', 'hsa-miR-9985', 'hsa-miR-9986', 'hsa-miR-99a-3p', 'hsa-miR-99a-5p', 'hsa-miR-99b-3p', 'hsa-miR-99b-5p'],
            ((5, 0), (100, 1597), (54, 2231)),
            (1.79, 1.25, 1.86)
        ):
            PASS = False

        # Test get_transcriptomics() with an invalid parameter, and make sure that it raises an exception
        try:
            en.get_transcriptomics("lorem ipsum")
        except ValueError:
            pass
        else:
            print("Error: get_transcriptomics() did not raise ValueError as expected, when given invalid parameter.")
            PASS = False

        # Test get_CNA()
        if not tester.evaluate_getter(
            "CNA",
            en.get_CNA(),
            (103, 28057),
            ['MFSD14A', 'SASS6', 'TRMT13', 'LRRC39', 'DBT', 'RTCA-AS1', 'RTCA', 'MIR553', 'UBE4B', 'CDC14A', 'TSPY8', 'FAM197Y2', 'FAM197Y4', 'FAM197Y5', 'FAM197Y7', 'FAM197Y8', 'FAM197Y6', 'FAM197Y3', 'RBMY3AP', 'TTTY22'],
            ((12, 27865), (67, 8), (102, 15439)),
            (-0.19, 0.01, 0.03)
        ):
            PASS = False

        # Test get_phosphoproteomics() with default parameter gene_level=False
        if not tester.evaluate_getter(
            "Phosphoproteomics (site)",
            en.get_phosphoproteomics(),
            (153, 73212),
            ['AAAS-S495', 'AAAS-S541', 'AAAS-Y485', 'AACS-S618', 'AAED1-S12', 'AAGAB-S310', 'AAGAB-S311', 'AAK1-S14', 'AAK1-S18', 'AAK1-S20', 'ZZZ3-S397', 'ZZZ3-S411', 'ZZZ3-S420', 'ZZZ3-S424', 'ZZZ3-S426', 'ZZZ3-S468', 'ZZZ3-S89', 'ZZZ3-T415', 'ZZZ3-T418', 'ZZZ3-Y399'],
            ((46, 45), (12, 72435), (96, 45362)),
            (0.195, -0.27899999999999997, -0.13)
        ):
            PASS = False

        # Test get_phosphoproteomics(gene_level=True)
        if not tester.evaluate_getter(
            "Phosphoproteomics (gene)",
            en.get_phosphoproteomics(gene_level=True),
            (153, 8466),
            ['AAAS', 'AACS', 'AAED1', 'AAGAB', 'AAK1', 'AAMDC', 'AARS', 'AASDH', 'AATF', 'ABCA1', 'ZSCAN5C', 'ZSWIM3', 'ZSWIM8', 'ZUP1', 'ZW10', 'ZXDA', 'ZXDC', 'ZYX', 'ZZEF1', 'ZZZ3'],
            ((2, 7999), (148, 1045), (78, 6543)),
            (-0.0879, 1.33, 0.153)
        ):
            PASS = False

        # Test get_phosphosites
        if not tester.evaluate_getter(
            "Phosphosites for the AAK1-S14 gene",
            en.get_phosphosites('AAK1-S14'),
            (153, 1),
            ['AAK1-S14'],
            ((34, 0), (76, 0), (143, 0)),
            (1.46, -0.0511, 0.9940000000000001)
        ):
            PASS = False

        # Test get_somatic() with default parameters binary=False, unparsed=False (this will return the Somatic Maf dataframe)
        if not tester.evaluate_getter(
            "Somatic (maf)",
            en.get_somatic(),
            (53101, 5),
            ['Patient_Id', 'Gene', 'Mutation', 'Location', 'Clinical_Patient_Key'],
            ((53000, 3), (12, 4), (34567, 0)),
            ('p.M1259I', 'S001', 'C3N-00151')
        ):
            PASS = False

        # Test get_somatic(binary=True) and therefore unparsed=False
        somatic_binary_name = "Somatic (binary)"
        somatic_binary_df = en.get_somatic(binary=True)
        somatic_binary_dim = (103, 51559)
        somatic_binary_headers = ['A1BG_p.E298K', 'A1BG_p.S181N', 'A1CF_p.F487L', 'A1CF_p.S236Y', 'A2ML1_p.A8V', 'A2ML1_p.G1306D', 'A2ML1_p.L1347F', 'A2ML1_p.L82I', 'A2ML1_p.P712S', 'A2ML1_p.R443Q', 'ZYG11A_p.Q442H', 'ZYG11B_p.H315R', 'ZYG11B_p.R495M', 'ZYG11B_p.R728C', 'ZYX_p.C447Y', 'ZZEF1_p.A2723V', 'ZZEF1_p.D845Y', 'ZZEF1_p.K1251E', 'ZZEF1_p.K2387Sfs*40', 'ZZZ3_p.Y891C']
        somatic_binary_test_coord = ((102, 51558), (0, 0), (45, 25436))
        somatic_binary_test_vals = (0, 0, 0)

        if not tester.evaluate_getter(somatic_binary_name, somatic_binary_df, somatic_binary_dim, somatic_binary_headers, somatic_binary_test_coord, somatic_binary_test_vals):
            PASS = False

        
        # Test get_somatic(unparsed=True) and therefore binary=False
        somatic_unparsed_name = "Somatic (unparsed)"
        somatic_unparsed_df = en.get_somatic(unparsed=True)
        somatic_unparsed_dim = (53101, 124)
        somatic_unparsed_headers = ['Hugo_Symbol', 'Entrez_Gene_Id', 'Center', 'NCBI_Build', 'Chromosome', 'Start_Position', 'End_Position', 'Strand', 'Variant_Classification', 'Variant_Type', 'ExAC_AC_AN_Adj', 'ExAC_AC_AN', 'ExAC_AC_AN_AFR', 'ExAC_AC_AN_AMR', 'ExAC_AC_AN_EAS', 'ExAC_AC_AN_FIN', 'ExAC_AC_AN_NFE', 'ExAC_AC_AN_OTH', 'ExAC_AC_AN_SAS', 'ExAC_FILTER']
        somatic_unparsed_test_coord = ((52265, 45), (12, 70), (27658, 1))
        somatic_unparsed_test_vals = ('strelkasnv-varssnv-mutectsnv', 'UPI0000167B91', 0)

        if not tester.evaluate_getter(somatic_unparsed_name, somatic_unparsed_df, somatic_unparsed_dim, somatic_unparsed_headers, somatic_unparsed_test_coord, somatic_unparsed_test_vals):
            PASS = False

        # Indicate whether the overall test passed
        if PASS:
            print("PASS")
        else:
            print("FAIL")
示例#7
0
        if method == "scatterplot":
            plot = sns.relplot(x = column1, y = column2, data = data)
        elif method == "barplot":
            plot = sns.barplot(x = column1, y = column2, data = data)
        elif method == "boxplot":
            plot = sns.boxplot(x = column1, y = column2, data = data)
        else:
            message = method + " not a recognized method"
            print(message)
            return ""
        plt.show()

print("\nRunning tests:\n")

Basic().evaluate_getters()
Basic().evaluate_special_getters()
Basic().evaluate_utilities()
Basic().evaluate_getters_v2()

print("Plotting...")
Plotter().plot(en.get_proteomics(), "A1BG","PTEN","scatterplot")
Plotter().plot(en.get_clinical(), "Diabetes","BMI","barplot")
Plotter().plot(en.get_clinical(), "Diabetes","BMI","boxplot")
print("PASS")

print("Running statistics...")
message = Stats().evaluate(en.get_proteomics(), "Pathway_activity_p53")
print(message)

print("Version:",en.version())