def genomeAnalysis(datadir, label, gname, method): """this method should be made independent of web app paths etc""" path = os.path.join(datadir, '%s/%s/%s' % (label, gname, method)) #path='test' gfile = os.path.join(genomespath, '%s.gb' % gname) g = sequtils.genbank2Dataframe(gfile, cds=True) b = getAllBinders(path, method=method, n=5) P = base.getPredictor(method) res = b.groupby('name').agg({ P.scorekey: [np.mean, np.size, np.max] }).sort() res.columns = res.columns.get_level_values(1) res = res.merge(g[['locus_tag', 'length', 'gene', 'product', 'order']], left_index=True, right_on='locus_tag') res['perc'] = res['size'] / res.length * 100 res = res.sort('perc', ascending=False) top = b.groupby('peptide').agg({ P.scorekey: np.mean, 'allele': np.max, 'name': lambda x: x }).reset_index() top = top.sort(P.scorekey, ascending=P.rankascending) cl = findClusters(b, method, dist=9, minsize=3) if cl is not None: gc = cl.groupby('name').agg({'density': np.max}) res = res.merge(gc, left_on='locus_tag', right_index=True) #print res[:10] return res
def testrun(gname): method = 'tepitope' #'iedbmhc1'#'netmhciipan' path = 'test' gfile = os.path.join(genomespath, '%s.gb' % gname) df = sequtils.genbank2Dataframe(gfile, cds=True) #names = list(df.locus_tag[:1]) names = ['VP24'] alleles1 = [ "HLA-A*02:02", "HLA-A*11:01", "HLA-A*32:07", "HLA-B*15:17", "HLA-B*51:01", "HLA-C*04:01", "HLA-E*01:03" ] alleles2 = [ "HLA-DRB1*0101", "HLA-DRB1*0305", "HLA-DRB1*0812", "HLA-DRB1*1196", "HLA-DRB1*1346", "HLA-DRB1*1455", "HLA-DRB1*1457", "HLA-DRB1*1612", "HLA-DRB4*0107", "HLA-DRB5*0203" ] P = base.getPredictor(method) P.iedbmethod = 'IEDB_recommended' #'netmhcpan' P.predictProteins(df, length=11, alleles=alleles2, names=names, save=True, path=path) f = os.path.join('test', names[0] + '.mpk') df = pd.read_msgpack(f) P.data = df #b = P.getBinders(data=df) #print b[:20] base.getScoreDistributions(method, path) return
def genomeAnalysis(datadir,label,gname,method): """this method should be made independent of web app paths etc""" path = os.path.join(datadir, '%s/%s/%s' %(label,gname,method)) #path='test' gfile = os.path.join(genomespath,'%s.gb' %gname) g = sequtils.genbank2Dataframe(gfile, cds=True) b = getAllBinders(path, method=method, n=5) P = base.getPredictor(method) res = b.groupby('name').agg({P.scorekey:[np.mean,np.size,np.max]}).sort() res.columns = res.columns.get_level_values(1) res = res.merge(g[['locus_tag','length','gene','product','order']], left_index=True,right_on='locus_tag') res['perc'] = res['size']/res.length*100 res = res.sort('perc',ascending=False) top = b.groupby('peptide').agg({P.scorekey:np.mean,'allele':np.max, 'name': lambda x: x}).reset_index() top = top.sort(P.scorekey,ascending=P.rankascending) cl = findClusters(b, method, dist=9, minsize=3) if cl is not None: gc = cl.groupby('name').agg({'density':np.max}) res = res.merge(gc,left_on='locus_tag',right_index=True) #print res[:10] return res
def testLoad(self): """Test re-loading predictions""" infile = os.path.join(self.testdir, 'ZEBOVgp1.mpk') pred = pd.read_msgpack(infile) P = base.getPredictor('iedbmhc1') P.data = pred return
def testBcell(self): """IEDB BCell test""" df = self.df names = ['VP24'] P = base.getPredictor('bcell') P.iedbmethod='Chou-Fasman' P.predictProteins(df, names=names, save=True, path=self.testdir) return
def testFasta(self): """Test fasta predictions""" fastafile = 'testing/zaire-ebolavirus.faa' df = sequtils.fasta2Dataframe(fastafile) alleles = ["HLA-DRB1*0101"] P = base.getPredictor('tepitope') P.predictProteins(df, length=11, alleles=alleles, save=True, path=self.testdir) return
def testBcell(gname): path = 'test' gfile = os.path.join(genomespath, '%s.gb' % gname) df = sequtils.genbank2Dataframe(gfile, cds=True) names = ['VP24'] P = base.getPredictor('bcell') P.iedbmethod = 'Chou-Fasman' P.predictProteins(df, names=names, save=True, path=path) print P.data return
def testBcell(gname): path='test' gfile = os.path.join(genomespath,'%s.gb' %gname) df = sequtils.genbank2Dataframe(gfile, cds=True) names=['VP24'] P = base.getPredictor('bcell') P.iedbmethod='Chou-Fasman' P.predictProteins(df,names=names,save=True,path=path) print P.data return
def testTepitope(self): """Tepitope test""" df = self.df P = base.getPredictor('tepitope') alleles = ["HLA-DRB1*0101", "HLA-DRB1*0305"] print P P.predictProteins(df, length=11, alleles=alleles, save=True, path=self.testdir) P.getBinders(data=P.data) return
def testIEDB(self): """IEDB MHCI test""" df = self.df P = base.getPredictor('iedbmhc1') print P alleles = ["HLA-A*02:02", "HLA-A*11:01", "HLA-B*15:17", "HLA-B*51:01", "HLA-C*04:01", "HLA-E*01:03"] P.predictProteins(df, length=11, alleles=alleles, save=True, path=self.testdir) return
def testnetMHCIIpan(self): """netMHCIIpan test""" #requires netmHCIIpan is installed df = self.df P = base.getPredictor('netmhciipan') alleles = ["HLA-DRB1*0101"] names = ['ZEBOVgp1'] print P P.predictProteins(df, length=11, alleles=alleles, names=names, save=True, path=self.testdir) P.getBinders(data=P.data) return
def getPredictions(path, tag, method='tepitope', q=0.96): """Get predictions from file system""" q = round(q, 2) #preds = OrderedDict() cutoffs = {} filename = os.path.join(path, tag + '.mpk') if not os.path.exists(filename): return df = pd.read_msgpack(filename) pred = base.getPredictor(name=method, data=df) cutoffs = pred.allelecutoffs = getCutoffs(path, method, q) pred = pred return pred
def getPredictions(path,tag,method='tepitope',q=0.96): """Get predictions from file system""" q=round(q,2) #preds = OrderedDict() cutoffs = {} filename = os.path.join(path, tag+'.mpk') if not os.path.exists(filename): return df = pd.read_msgpack(filename) pred = base.getPredictor(name=method, data=df) cutoffs = pred.allelecutoffs = getCutoffs(path, method, q) pred = pred return pred
def testrun(gname): method = 'tepitope'#'iedbmhc1'#'netmhciipan' path='test' gfile = os.path.join(genomespath,'%s.gb' %gname) df = sequtils.genbank2Dataframe(gfile, cds=True) #names = list(df.locus_tag[:1]) names=['VP24'] alleles1 = ["HLA-A*02:02", "HLA-A*11:01", "HLA-A*32:07", "HLA-B*15:17", "HLA-B*51:01", "HLA-C*04:01", "HLA-E*01:03"] alleles2 = ["HLA-DRB1*0101", "HLA-DRB1*0305", "HLA-DRB1*0812", "HLA-DRB1*1196", "HLA-DRB1*1346", "HLA-DRB1*1455", "HLA-DRB1*1457", "HLA-DRB1*1612", "HLA-DRB4*0107", "HLA-DRB5*0203"] P = base.getPredictor(method) P.iedbmethod='IEDB_recommended' #'netmhcpan' P.predictProteins(df,length=11,alleles=alleles2,names=names, save=True,path=path) f = os.path.join('test', names[0]+'.mpk') df = pd.read_msgpack(f) P.data=df #b = P.getBinders(data=df) #print b[:20] base.getScoreDistributions(method, path) return
def getAllBinders(path, method='tepitope', n=3, cutoff=0.95, promiscuous=True): """Get all promiscuous binders from a set of proteins in path""" print 'getting binders..' binders = [] m = method if m == 'bcell': return #not applicable l = 9 P = base.getPredictor(m) files = glob.glob(os.path.join(path, '*.mpk')) #get allele specific cutoffs P.allelecutoffs = getCutoffs(path, method, cutoff, overwrite=True) for f in files: df = pd.read_msgpack(f) if promiscuous == True: b = P.getPromiscuousBinders(data=df, n=n) else: b = P.getBinders(data=df) #print b[:5] binders.append(b) result = pd.concat(binders) result['start'] = result.pos result['end'] = result.pos + result.peptide.str.len() return result
def getAllBinders(path, method='tepitope', n=3, cutoff=0.95, promiscuous=True): """Get all promiscuous binders from a set of proteins in path""" print 'getting binders..' binders = [] m=method if m=='bcell': return #not applicable l=9 P = base.getPredictor(m) files = glob.glob(os.path.join(path, '*.mpk')) #get allele specific cutoffs P.allelecutoffs = getCutoffs(path, method, cutoff, overwrite=True) for f in files: df = pd.read_msgpack(f) if promiscuous== True: b = P.getPromiscuousBinders(data=df,n=n) else: b = P.getBinders(data=df) #print b[:5] binders.append(b) result = pd.concat(binders) result['start'] = result.pos result['end'] = result.pos+result.peptide.str.len() return result