def main(argv=sys.argv): if len(argv) != 3: print "\n Usage: %s num_pairs.dat potential.dat" % argv[0] print "" sys.exit(1) inf = open(argv[1], "r") dc = pickle.load(inf) inf.close() p = dist.PTS(dc) outf = open(argv[2], "w") pickle.dump(p, outf) outf.close()
def main(): file_list=GetFileList() dc_fn='dist_splited_train_0525.dat' dc=dist.UnformatedInput(dc_fn) pts=dist.PTS(dc) for i in range(len(file_list)): lig_fn=file_list[i][0] pro_fn=file_list[i][1] pdbid=file_list[i][2] if os.path.isfile(lig_fn): lig=pybel.readfile('sdf',lig_fn).next() pro=pybel.readfile('pdb',pro_fn).next() score,scorev=GetFinger(lig,pro,pts,pdbid) print score,scorev
def singleTest(dc_fn): lig = pybel.readfile( 'sdf', 'E:/brd4/PMF_Yolanda/dataSet/version2/trainsetLigand/PDB_3U5L.sdf' ).next() pro = pybel.readfile( 'pdb', 'E:/brd4/PMF_Yolanda/dataSet/version2/trainsetPocket/PDB_3U5L.pdb' ).next() dc = dist.UnformatedInput(dc_fn) pts = dist.PTS(dc) dd = GetDscrptor(lig, pro, pts) dscrptors = [str(dd[d]) for d in DESCRIPTOR] for i in DESCRIPTOR: print i, dd[i] return dscrptors
def dscrptorOut(dc_fn): dirs = {'Train':{'lig':'e:/brd4/PMF_Yolanda/dataSet/version4/trainsetLigand/',\ 'pro':'e:/brd4/PMF_Yolanda/dataSet/version4/trainsetPocket/',\ 'exp':'e:/brd4/PMF_Yolanda/dataSet/version4/dscrptorValueLE_train.txt',\ 'exp_s':'e:/brd4/PMF_Yolanda/dataSet/version4/dscrptorValueLEScaled_train.txt'},\ 'Test':{'lig':'e:/brd4/PMF_Yolanda/dataSet/version4/testsetLigand/',\ 'pro':'e:/brd4/PMF_Yolanda/dataSet/version4/testsetPocket/',\ 'exp':'e:/brd4/PMF_Yolanda/dataSet/version4/dscrptorValueLE_test.txt',\ 'exp_s':'e:/brd4/PMF_Yolanda/dataSet/version4/dscrptorValueLEScaled_test.txt'} } dc = dist.UnformatedInput(dc_fn) pts = dist.PTS(dc) def writeDES(ligdir, prodir, expdir): with open(expdir, 'w') as f: f.write('\t'.join(['Name', 'Active'] + DESCRIPTOR) + '\n') for num, p in enumerate(os.listdir(ligdir)): print p, num a = 0 lig = pybel.readfile('sdf', ligdir + p).next() pro = pybel.readfile('pdb', prodir + p[:-4] + '.pdb').next() if lig.data['Active'] == '1': a = 1 dd = GetDscrptor(lig, pro, pts) dscrptors = [str(dd[d]) for d in DESCRIPTOR] f.write(p[:-4].replace('ss', '/') + '\t' + str(a) + '\t' + '\t'.join(dscrptors) + '\n') writeDES(dirs['Train']['lig'], dirs['Train']['pro'], dirs['Train']['exp']) writeDES(dirs['Test']['lig'], dirs['Test']['pro'], dirs['Test']['exp']) scaler = getScaler_fromFile(dirs['Train']['exp']) table_train = scaleDES_fromFile(scaler, dirs['Train']['exp']) table_test = scaleDES_fromFile(scaler, dirs['Test']['exp']) with open(dirs['Train']['exp_s'], 'w') as ftrain: ftrain.write('\t'.join(['Name', 'Active'] + DESCRIPTOR) + '\n') for i in table_train: ftrain.write('\t'.join(map(str, i)) + '\n') with open(dirs['Test']['exp_s'], 'w') as ftest: ftest.write('\t'.join(['Name', 'Active'] + DESCRIPTOR) + '\n') for i in table_test: ftest.write('\t'.join(map(str, i)) + '\n')
def dscrptorOut_robscale(dc_fn): dirs = {'Train':{'lig':'e:/brd4/PMF_Yolanda/dataSet/version3/trainsetLigand/',\ 'pro':'e:/brd4/PMF_Yolanda/dataSet/version3/trainsetPocket/',\ 'exp':'e:/brd4/PMF_Yolanda/dataSet/version3/dscrptorValueSscaled_train.txt'},\ 'Test':{'lig':'e:/brd4/PMF_Yolanda/dataSet/version3/testsetLigand/',\ 'pro':'e:/brd4/PMF_Yolanda/dataSet/version3/testsetPocket/',\ 'exp':'e:/brd4/PMF_Yolanda/dataSet/version3/dscrptorValueSscaled_test.txt'} } trainScaler = None dc = dist.UnformatedInput(dc_fn) pts = dist.PTS(dc) def tableGen(ligdir, prodir): dscrptorTable, activTable = [], [] for num, p in enumerate(os.listdir(ligdir)): print p, num a = 0 lig = pybel.readfile('sdf', ligdir + p).next() pro = pybel.readfile('pdb', prodir + p[:-4] + '.pdb').next() if lig.data['Active'] == '1': a = 1 activTable.append([p[:-4].replace('ss', '/'), str(a)]) dd = GetDscrptor(lig, pro, pts) dscrptorTable.append([dd[d] for d in DESCRIPTOR]) return dscrptorTable, activTable def tableOut(dscTable, acTable, expdir): with open(expdir, 'w') as f: f.write('\t'.join(['Name', 'Active'] + DESCRIPTOR) + '\n') for ac, dsc in zip(dscTable, acTable): entry = ac + dsc f.write('\t'.join([str(i) for i in entry]) + '\n') dscTabTrain, acTabTrain = tableGen(dirs['Train']['lig'], dirs['Train']['pro']) dscTabTrain_s, trainScaler = scaleDES(dscrptorTable, trainScaler) tableOut(dscTabTrain_s, acTabTrain, dirs['Train']['exp']) dscTabTest, acTabTest = tableGen(dirs['Test']['lig'], dirs['Test']['pro']) dscTabTest_s, trainScaler = scaleDES(dscrptorTable, trainScaler) tableOut(dscTabTest_s, acTabTest, dirs['Test']['exp'])
def main(argv=sys.argv): if len(argv) < 3: print "\n Usage: %s outfile in.num[...]" % argv[0] print " in.num: generated by pmf_atom_pairs.py" print "" sys.exit(1) if os.path.exists(argv[1]): print "Warning: %s already exists. Are you sure you've specified {outfile}?" % argv[ 1] sys.exit(1) dir = os.path.dirname(argv[0]) dat_name = os.path.join(dir, "dist_splited_train_0525.dat") dc = dist.UnformatedInput(dat_name) global pts pts = dist.PTS(dc) outf = open(argv[1], "w") print >> outf, "name\tPMF.score" for name in argv[2:]: for each_name in glob(name): do_for_each(each_name, outf) outf.close()
def main(argv=sys.argv): if len(argv) != 2: print "\n Usage: %s in.list"%argv[0] print " in.list: each line should be 'proname ligname'" print "" sys.exit(1) file_list=GetFileList(argv[1]) #to get atom pair potentional A_ij(r) dirname = os.path.dirname(argv[0]) dc_fn = os.path.join(dirname,'dist_splited_train_0525.dat') dc=dist.UnformatedInput(dc_fn) pts=dist.PTS(dc) for i in range(len(file_list)): lig_fn=file_list[i][0] pro_fn=file_list[i][1] pdbid=file_list[i][2] lig_format = lig_fn[lig_fn.rfind(".")+1:] pro_format = pro_fn[pro_fn.rfind(".")+1:] if os.path.isfile(lig_fn): lig=pybel.readfile(lig_format,lig_fn).next() pro=pybel.readfile(pro_format,pro_fn).next() score,scorev = GetFinger(lig,pro,pts,pdbid) print pdbid,score,scorev
def molGen(sdf, proString, pts, model, scaler): with open(sdf) as sdfile: molString = '' for line in sdfile: molString += line if line.strip() == '$$$$': yield (molString, proString, pts, model, scaler) molString = '' if __name__ == '__main__': proString = open(pr).read() pts = dist.PTS(dist.UnformatedInput(dc_fn)) model = loadModel(modelfile) f, fsdout = open(expdir, 'w'), open(expsdf, 'w') f.write('\t'.join(['Name']+DESCRIPTOR+['Prob'])+'\n') poo = mul.Pool(2) import time t = time.time() scaler = rdg.getScaler_fromFile(scalerFit) for n,v in enumerate(poo.imap(singleRun, molGen(sdf,proString,pts,model,scaler))): entry, molString = v f.write('\t'.join(entry)+'\n') fsdout.write(molString) if n%100==0: print n, molString[:15], entry[-1], time.time()-t print time.time()-t
import dist import pmf_atom_typer as pat import raw_print_dist as rpd import resiDescriptorGen_Scaled_LE as rdg import extract_pdb1 as ep from pybel import * import os from math import exp import mpi4py.MPI as MPI import time sdfile = '../' DESCRIPTOR = rdg.DESCRIPTOR proString = open('4GPJ_confPred.pdb').read() pts = dist.PTS(dist.UnformatedInput('dist_scPDB_train_4.dat')) scaler = rdg.getScaler_fromFile('dscrptorValueLE_train.txt') def loadModel(modelfile): rawPara = [float(l.strip().split()[1]) for l in open(modelfile)] rawDES = [l.strip().split()[0] for l in open(modelfile)] para = rawPara[:-1] descriptor = rawDES[:-1] intercept = rawPara[-1] return descriptor, para, intercept model = loadModel('./model160126.txt') E = exp(1.0)