示例#1
0
def main(argv=sys.argv):
    if len(argv) != 3:
        print "\n  Usage: %s num_pairs.dat potential.dat" % argv[0]
        print ""
        sys.exit(1)

    inf = open(argv[1], "r")
    dc = pickle.load(inf)
    inf.close()

    p = dist.PTS(dc)
    outf = open(argv[2], "w")
    pickle.dump(p, outf)
    outf.close()
示例#2
0
def main():
    file_list=GetFileList()
    dc_fn='dist_splited_train_0525.dat'
    dc=dist.UnformatedInput(dc_fn)
    pts=dist.PTS(dc)
    for i in range(len(file_list)):
        lig_fn=file_list[i][0]
        pro_fn=file_list[i][1]
        pdbid=file_list[i][2]
        if os.path.isfile(lig_fn):
            lig=pybel.readfile('sdf',lig_fn).next()
            pro=pybel.readfile('pdb',pro_fn).next()
            score,scorev=GetFinger(lig,pro,pts,pdbid)
            print score,scorev
def singleTest(dc_fn):
    lig = pybel.readfile(
        'sdf',
        'E:/brd4/PMF_Yolanda/dataSet/version2/trainsetLigand/PDB_3U5L.sdf'
    ).next()
    pro = pybel.readfile(
        'pdb',
        'E:/brd4/PMF_Yolanda/dataSet/version2/trainsetPocket/PDB_3U5L.pdb'
    ).next()
    dc = dist.UnformatedInput(dc_fn)
    pts = dist.PTS(dc)
    dd = GetDscrptor(lig, pro, pts)
    dscrptors = [str(dd[d]) for d in DESCRIPTOR]
    for i in DESCRIPTOR:
        print i, dd[i]
    return dscrptors
def dscrptorOut(dc_fn):
    dirs = {'Train':{'lig':'e:/brd4/PMF_Yolanda/dataSet/version4/trainsetLigand/',\
        'pro':'e:/brd4/PMF_Yolanda/dataSet/version4/trainsetPocket/',\
        'exp':'e:/brd4/PMF_Yolanda/dataSet/version4/dscrptorValueLE_train.txt',\
        'exp_s':'e:/brd4/PMF_Yolanda/dataSet/version4/dscrptorValueLEScaled_train.txt'},\
      'Test':{'lig':'e:/brd4/PMF_Yolanda/dataSet/version4/testsetLigand/',\
        'pro':'e:/brd4/PMF_Yolanda/dataSet/version4/testsetPocket/',\
        'exp':'e:/brd4/PMF_Yolanda/dataSet/version4/dscrptorValueLE_test.txt',\
        'exp_s':'e:/brd4/PMF_Yolanda/dataSet/version4/dscrptorValueLEScaled_test.txt'}
      }
    dc = dist.UnformatedInput(dc_fn)
    pts = dist.PTS(dc)

    def writeDES(ligdir, prodir, expdir):
        with open(expdir, 'w') as f:
            f.write('\t'.join(['Name', 'Active'] + DESCRIPTOR) + '\n')
            for num, p in enumerate(os.listdir(ligdir)):
                print p, num
                a = 0
                lig = pybel.readfile('sdf', ligdir + p).next()
                pro = pybel.readfile('pdb', prodir + p[:-4] + '.pdb').next()
                if lig.data['Active'] == '1': a = 1
                dd = GetDscrptor(lig, pro, pts)
                dscrptors = [str(dd[d]) for d in DESCRIPTOR]
                f.write(p[:-4].replace('ss', '/') + '\t' + str(a) + '\t' +
                        '\t'.join(dscrptors) + '\n')

    writeDES(dirs['Train']['lig'], dirs['Train']['pro'], dirs['Train']['exp'])
    writeDES(dirs['Test']['lig'], dirs['Test']['pro'], dirs['Test']['exp'])
    scaler = getScaler_fromFile(dirs['Train']['exp'])
    table_train = scaleDES_fromFile(scaler, dirs['Train']['exp'])
    table_test = scaleDES_fromFile(scaler, dirs['Test']['exp'])
    with open(dirs['Train']['exp_s'], 'w') as ftrain:
        ftrain.write('\t'.join(['Name', 'Active'] + DESCRIPTOR) + '\n')
        for i in table_train:
            ftrain.write('\t'.join(map(str, i)) + '\n')
    with open(dirs['Test']['exp_s'], 'w') as ftest:
        ftest.write('\t'.join(['Name', 'Active'] + DESCRIPTOR) + '\n')
        for i in table_test:
            ftest.write('\t'.join(map(str, i)) + '\n')
def dscrptorOut_robscale(dc_fn):
    dirs = {'Train':{'lig':'e:/brd4/PMF_Yolanda/dataSet/version3/trainsetLigand/',\
        'pro':'e:/brd4/PMF_Yolanda/dataSet/version3/trainsetPocket/',\
        'exp':'e:/brd4/PMF_Yolanda/dataSet/version3/dscrptorValueSscaled_train.txt'},\
      'Test':{'lig':'e:/brd4/PMF_Yolanda/dataSet/version3/testsetLigand/',\
        'pro':'e:/brd4/PMF_Yolanda/dataSet/version3/testsetPocket/',\
        'exp':'e:/brd4/PMF_Yolanda/dataSet/version3/dscrptorValueSscaled_test.txt'}
      }
    trainScaler = None
    dc = dist.UnformatedInput(dc_fn)
    pts = dist.PTS(dc)

    def tableGen(ligdir, prodir):
        dscrptorTable, activTable = [], []
        for num, p in enumerate(os.listdir(ligdir)):
            print p, num
            a = 0
            lig = pybel.readfile('sdf', ligdir + p).next()
            pro = pybel.readfile('pdb', prodir + p[:-4] + '.pdb').next()
            if lig.data['Active'] == '1': a = 1
            activTable.append([p[:-4].replace('ss', '/'), str(a)])
            dd = GetDscrptor(lig, pro, pts)
            dscrptorTable.append([dd[d] for d in DESCRIPTOR])
        return dscrptorTable, activTable

    def tableOut(dscTable, acTable, expdir):
        with open(expdir, 'w') as f:
            f.write('\t'.join(['Name', 'Active'] + DESCRIPTOR) + '\n')
            for ac, dsc in zip(dscTable, acTable):
                entry = ac + dsc
                f.write('\t'.join([str(i) for i in entry]) + '\n')

    dscTabTrain, acTabTrain = tableGen(dirs['Train']['lig'],
                                       dirs['Train']['pro'])
    dscTabTrain_s, trainScaler = scaleDES(dscrptorTable, trainScaler)
    tableOut(dscTabTrain_s, acTabTrain, dirs['Train']['exp'])
    dscTabTest, acTabTest = tableGen(dirs['Test']['lig'], dirs['Test']['pro'])
    dscTabTest_s, trainScaler = scaleDES(dscrptorTable, trainScaler)
    tableOut(dscTabTest_s, acTabTest, dirs['Test']['exp'])
示例#6
0
def main(argv=sys.argv):
    if len(argv) < 3:
        print "\n  Usage: %s outfile in.num[...]" % argv[0]
        print "  in.num: generated by pmf_atom_pairs.py"
        print ""
        sys.exit(1)

    if os.path.exists(argv[1]):
        print "Warning: %s already exists. Are you sure you've specified {outfile}?" % argv[
            1]
        sys.exit(1)
    dir = os.path.dirname(argv[0])
    dat_name = os.path.join(dir, "dist_splited_train_0525.dat")
    dc = dist.UnformatedInput(dat_name)
    global pts
    pts = dist.PTS(dc)

    outf = open(argv[1], "w")
    print >> outf, "name\tPMF.score"
    for name in argv[2:]:
        for each_name in glob(name):
            do_for_each(each_name, outf)
    outf.close()
示例#7
0
def main(argv=sys.argv):
    if len(argv) != 2:
        print "\n  Usage: %s in.list"%argv[0]
        print "  in.list: each line should be 'proname ligname'"
        print ""
        sys.exit(1)
    file_list=GetFileList(argv[1])
    #to get atom pair potentional A_ij(r)
    dirname = os.path.dirname(argv[0])
    dc_fn = os.path.join(dirname,'dist_splited_train_0525.dat')
    dc=dist.UnformatedInput(dc_fn)
    pts=dist.PTS(dc)
    for i in range(len(file_list)):
        lig_fn=file_list[i][0]
        pro_fn=file_list[i][1]
        pdbid=file_list[i][2]
        lig_format = lig_fn[lig_fn.rfind(".")+1:]
        pro_format = pro_fn[pro_fn.rfind(".")+1:]
        if os.path.isfile(lig_fn):
            lig=pybel.readfile(lig_format,lig_fn).next()
            pro=pybel.readfile(pro_format,pro_fn).next()
            score,scorev = GetFinger(lig,pro,pts,pdbid)
            print pdbid,score,scorev
示例#8
0

def molGen(sdf, proString, pts, model, scaler):
    with open(sdf) as sdfile:
        molString = ''
        for line in sdfile:
            molString += line
            if line.strip() == '$$$$':
                yield (molString, proString, pts, model, scaler)
                molString = ''



if __name__ == '__main__':
    proString = open(pr).read()
    pts = dist.PTS(dist.UnformatedInput(dc_fn))
    model = loadModel(modelfile)

    f, fsdout = open(expdir, 'w'), open(expsdf, 'w')
    f.write('\t'.join(['Name']+DESCRIPTOR+['Prob'])+'\n')

    poo = mul.Pool(2)
    import time
    t = time.time()
    scaler = rdg.getScaler_fromFile(scalerFit)
    for n,v in enumerate(poo.imap(singleRun, molGen(sdf,proString,pts,model,scaler))):
        entry, molString = v
        f.write('\t'.join(entry)+'\n')
        fsdout.write(molString)
        if n%100==0: print n, molString[:15], entry[-1], time.time()-t
    print time.time()-t
示例#9
0
import dist
import pmf_atom_typer as pat
import raw_print_dist as rpd
import resiDescriptorGen_Scaled_LE as rdg
import extract_pdb1 as ep
from pybel import *
import os
from math import exp
import mpi4py.MPI as MPI
import time

sdfile = '../'

DESCRIPTOR = rdg.DESCRIPTOR
proString = open('4GPJ_confPred.pdb').read()
pts = dist.PTS(dist.UnformatedInput('dist_scPDB_train_4.dat'))
scaler = rdg.getScaler_fromFile('dscrptorValueLE_train.txt')


def loadModel(modelfile):
    rawPara = [float(l.strip().split()[1]) for l in open(modelfile)]
    rawDES = [l.strip().split()[0] for l in open(modelfile)]
    para = rawPara[:-1]
    descriptor = rawDES[:-1]
    intercept = rawPara[-1]
    return descriptor, para, intercept


model = loadModel('./model160126.txt')

E = exp(1.0)