def get_basic_set(self): swissprots = set( dataio.all_uniprots( organism=self.organism, swissprot='yes')) return dict( filter(lambda x: x[0] in swissprots, getattr(self.annotation, self.aspect.lower()).iteritems()))
def load_uniprots(self): """ Retrieves a set of all UniProt IDs to have a base set of the entire proteome. """ self.uniprots = set(dataio.all_uniprots(organism=self.ncbi_tax_id))
net.lists['dgb'] = uniqList(flatList([net.mapper.map_name(dgb, 'genesymbol', 'uniprot') \ for dgb in dataio.get_dgidb()])) net.lists['kin'] = uniqList(flatList([net.mapper.map_name(kin, 'genesymbol', 'uniprot') \ for kin in dataio.get_kinases()])) net.lists['tfs'] = uniqList(flatList([net.mapper.map_name(tf, 'ensg', 'uniprot') \ for tf in dataio.get_tfcensus()['ensg']])) net.lists['dis'] = uniqList(flatList([\ net.mapper.map_name(dis['genesymbol'], 'genesymbol', 'uniprot') \ for dis in dataio.get_disgenet()])) # defining the proteome as the set of all human swissprot ids console(':: Loading the human proteome') proteome = dataio.all_uniprots(swissprot='yes') fi = open(fisherFile, 'w') # Fisher's exact test for enrichment of disease related proteins # in OmniPath compared to their ratio in the whole proteome console(':: Fisher\'s exact test for enrichment of disease related proteins in the network'\ 'compared to their abundance in the proteome') contDisg = np.array([[len(proteome), net.graph.vcount()], [ len(net.lists['dis']), len([1 for v in net.graph.vs if len(v['dis']) > 0]) ]]) fi.write('Disease related proteins:\t%s\t%s\n' % stats.fisher_exact(contDisg)) # Fisher's exact test for enrichment of cancer driver proteins # in OmniPath compared to their ratio in the whole proteome