def main(): args = parser.parse_args() beta = 0.5 #get mutational data mvals = getMutationalData() ##load up interactome gfile = args.graph ##TODO: replace this with Docker image call g = pickle.load(open(gfile, 'rb')) key = 'mpnstPDXmuts' this_hyp = hyphalNetwork(mvals, g) this_hyp._to_file(key + '_hypha.pkl') ##read from file ###this is all we need to do in a single eval, then we can do tests later this_hyp.node_stats().to_csv(key + '_nodelist.csv') for_e = hyEnrich.go_enrich_forests(this_hyp) this_hyp.assign_enrichment(for_e, type='forest') for_e.to_csv(key + 'enrichedForestGoTerms.csv') com_e = hyEnrich.go_enrich_communities(this_hyp) this_hyp.assign_enrichment(com_e, type='community') this_hyp._to_file(key + '_hypha.pkl') com_e.to_csv(key + 'enrichedCommunityGOterms.csv') this_hyp.community_stats(prefix=key).to_csv(key + '_communityStats.csv') res = hyStats.compute_all_distances({'mutations': this_hyp}) res.to_csv('panPDXDistances.csv') nmi = hyStats.compute_all_nmi({'mutations': this_hyp}, g) nmi.to_csv('panPDXNMI.csv')
def significant_genes(data_frame, group, subgroup, value): data_frame['zscore'] = stats.zscore(data_frame[value]) #SG #here you reference `data` and not `data frame` #significant = data[abs(data['zscore']) >= 2.58] #also altered to remove absolute value significant = data_frame[data_frame['zscore'] >= 2.58] #replaced with new function #gene_dictionary = nested_dict(significant[[group, subgroup, value]]) #SG: updated this based on what i found on google.... gene_dictionary = (significant.groupby(group).apply( lambda x: dict(zip(x[subgroup], x[value]))).to_dict()) #SG This is causing issue, it's old code g = hyp.make_graph_from_dict(gfile) hyphae = dict() beta = 0.5 #SG: #this was the issue, it was an uncessary loop #for key, val in gene_dictionary.items(): # hyphalNetwork is supposed to take a gene dictionary! key = 'proteomics' this_hyp = hyphalNetwork(gene_dictionary, g.copy(), beta) hyphae[key] = this_hyp this_hyp._to_file(key + '_hypha.pkl') #print (hyphae) return hyphae
def main(): args = parser.parse_args() qval = args.quant ##first we run a helper function to make sure we have all cptac data fdict = pdata.cptacData() ##first get proteomics measurements allDat = pdata.getCancerData(fdict, qval, byType=False) patientData = pdata.getCombinedClinicalData(fdict) patientData.to_csv('clinicalData.csv') mutationData = pdata.getCombinedMutationData(fdict) mutationData.to_csv('mutationData.csv') ##make srue this file is built! g = pickle.load(open('../odata/igraphPPI.pkl', 'rb')) beta = .5 #build hyphal network of network communities phyph = hyphalNetwork(allDat, g, beta) phpyh._to_file(args.refName + '_hypNet.pkl') #write out distances within communities res = phyph.distVals fname = args.refName + '_DistanceVals.csv' res.to_csv(fname)
def main(): args = parser.parse_args() beta = 0.5 #get mutational data mdf = pd.read_csv(args.df) key = args.output mvals = df2dict(mdf) ##load up interactome gfile = args.graph ##TODO: replace this with Docker image call g = pickle.load(open(gfile, 'rb')) this_hyp = hyphalNetwork(mvals, g) this_hyp._to_file(key + '_hypha.pkl')
def significant_genes(data_frame, group, subgroup, value): data_frame['zscore'] = stats.zscore(data_frame[value]) significant = data_frame[data_frame['zscore'] >= 2.58] gene_dictionary = (significant.groupby(group).apply( lambda x: dict(zip(x[subgroup], x[value]))).to_dict()) hyphae = dict() beta = 0.5 key = 'proteomics' this_hyp = hyphalNetwork(gene_dictionary, g.copy(), beta) hyphae[key] = this_hyp this_hyp._to_file(key + '_hypha.pkl') return hyphae
def build_hyphae_from_data(qt, g, sample=False): """ Temp function to load data from local directory""" ##this is the framework for the PDC data parser. #now we want to build network communities for each hyphae = dict() patDiffs = loadCancerData(qt) beta = 0.5 for key, vals in patDiffs.items(): if sample: new_vals = {} for v in random.sample(list(vals), 300): new_vals[v] = vals[v] vals = new_vals print(len(vals)) this_hyp = hyphalNetwork(vals, g.copy(),beta=beta, g=3, do_forest=False, noComms=False) hyphae[key+str(qt)] = this_hyp this_hyp._to_file(key+str(qt)+'_hypha.pkl') return hyphae
def build_hyphae_from_data(): """ Temp function to load data from local directory""" ##this is the framework for the PDC data parser. norms = prot.normals_from_manifest('data/PDC_biospecimen_manifest_05112020_184928.csv') # bcData = prot.parsePDCfile('data/TCGA_Breast_BI_Proteome.itraq.tsv') bcData = prot.parsePDCfile('data/CPTAC2_Breast_Prospective_Collection_BI_Proteome.tmt10.tsv') lungData = prot.parsePDCfile('data/CPTAC3_Lung_Adeno_Carcinoma_Proteome.tmt10.tsv') colData = prot.parsePDCfile('data/CPTAC2_Colon_Prospective_Collection_PNNL_Proteome.tmt10.tsv') gbmData = prot.parsePDCfile('data/CPTAC3_Glioblastoma_Multiforme_Proteome.tmt11.tsv') normPats = {'brca': set([a for a in bcData['Patient'] if a in norms['Breast Invasive Carcinoma']]),\ 'coad': set([a for a in colData['Patient'] if a in norms['Colon Adenocarcinoma']]),\ 'luad': set([a for a in lungData['Patient'] if a in norms['Lung Adenocarcinoma']]),\ 'gbm': set([a for a in gbmData['Patient'] if a in norms['Other']])} gfile = '../../../OmicsIntegrator2/interactomes/inbiomap.9.12.2016.full.oi2' g = hyp.make_graph(gfile) namemapper = None #hyp.mapHGNCtoNetwork() ##here we get the top values for each patient patVals = {'brca':prot.getProtsByPatient(bcData, namemapper),\ 'luad':prot.getProtsByPatient(lungData, namemapper),\ 'coad':prot.getProtsByPatient(colData, namemapper),\ 'gbm':prot.getProtsByPatient(gbmData, namemapper)} #here we get the top most distinguished from normals patDiffs = {'brca': prot.getTumorNorm(bcData, normPats['brca'], namemapper), 'luad': prot.getTumorNorm(lungData, normPats['luad'], namemapper), 'coad': prot.getTumorNorm(colData, normPats['coad'], namemapper), 'gbm': prot.getTumorNorm(gbmData, normPats['gbm'], namemapper)} #now we want to build network communities for each hyphae = dict() for key in patDiffs: this_hyp = hyphalNetwork(patDiffs[key], g) hyphae[key] = this_hyp this_hyp._to_file(key+'_hypha.pkl') return hyphae
def main(): args = parser.parse_args() qval = args.quant ##first we run a helper function to make sure we have all cptac data fdict = pdat.cptacData() ##first get proteomics measurements allDat = pdat.getCancerData(fdict, qval, byType=True) ##make srue this file is built! g = pickle.load(open('../odata/igraphPPI.pkl', 'rb')) beta = .5 #build hyphal network of network communities phyph = pickle.load(open(args.hyph, 'rb')) hDict = {'panCan': phyph} for ct, dat in allDat.items(): hDict[ct] = hyphalNetwork(dat, g, beta) nmi = hyStats.compute_all_nmi(hDict, g) nmi.to_csv(args.refName + '_nmi.csv')
def main(): gfile = '../../data/igraphPPI.pkl' g = pickle.load(open(gfile, 'rb')) #hyp.make_graph_from_dict(gfile) args = parser.parse_args() beta = 0.5 proteomics_dictionary = significant_prots(data, 'AML sample', 'Gene', 'LogFoldChange') gene_dictionary = tumor_genes(data, 'AML sample', 'Gene', 'Tumor VAF') if args.fromFile is None: hyphae = dict() hyphae['mutations'] = hyphalNetwork(gene_dictionary, g.copy(), beta) hyphae['proteomics'] = hyphalNetwork(proteomics_dictionary, g.copy(), beta) for key, this_hyp in hyphae.items(): this_hyp._to_file(key + '_amlPatientData_hypha.pkl') else: hyphae = loadFromFile(args.fromFile) #now compute graph distances to ascertain fidelity if args.getDist: res = hyStats.compute_all_distances(hyphae) res.to_csv('amlNetworkdistances.csv') tab = table.build_table("AML Network Distances", 'syn22128879', res) syn.store(tab) nmi = hyStats.compute_all_nmi(hyphae, g) nmi.to_csv('amlNMI.csv') syn.store(File('amlNMI.csv', parent='syn22269875')) #store distances for key, this_hyp in hyphae.items(): node_stats = this_hyp.node_stats() node_stats.to_csv(key + '_nodelist.csv') tab = table.build_table("AML Network Nodes", 'syn22128879', node_stats) syn.store(tab) if args.doEnrich: if len(this_hyp.forest_enrichment) == 0: for_e = hyEnrich.go_enrich_forests(this_hyp) #SG, ncbi) this_hyp.assign_enrichment(for_e, type='forest') for_e.to_csv(key + 'enrichedForestGoTerms.csv') syn.store( File(key + 'enrichedForestGoTerms.csv', parent='syn22269875')) this_hyp._to_file(key + '_amlPatientData_hypha.pkl') if len(this_hyp.community_enrichment) == 0: com_e = hyEnrich.go_enrich_communities(this_hyp) this_hyp.assign_enrichment(com_e, type='community') com_e.to_csv(key + 'enrichedCommunityGOterms.csv') syn.store( File(key + 'enrichedCommunityGOterms.csv', parent='syn22269875')) this_hyp._to_file(key + '_amlPatientData_hypha.pkl') ##next: compare enrichment between patients mapped to communities this_hyp.community_stats(prefix=key).to_csv(key + '_communityStats.csv') this_hyp.forest_stats().to_csv(key + '_TreeStats.csv') for files in [ key + '_amlPatientData_hypha.pkl', key + '_communityStats.csv', key + '_TreeStats.csv' ]: syn.store(File(files, parent='syn22269875'))