def alternativeAbundance(matchf, pds, topn = 50):
    MASTER = '/home/anthill/fzheng/home/scripts/termanal_updating'

    # generate the original structures of topn hits
    # pds = General.changeExt(pdb, 'pds')
    # cmd = [MASTER + '/createPDS', '--type', 'query', '--pdb', pdb, '--pds', pds]
    # cmd = ' '.join(cmd)
    # os.system(cmd)
    cmd = [MASTER + '/master', '--query', pds, '--matchIn', matchf, '--structOut', General.getBase(pds) + 'tmp', '--outType', 'match', '--bbRMSD', '--topN', str(topn)]
    cmd = ' '.join(cmd)
    os.system(cmd)

    # for these N structures, calculate RMSD between any two. should be O(N^2)
    odir = os.getcwd()
    ndir = General.getBase(pds) + 'tmp'
    os.chdir(ndir)
    mpdbs = glob.glob('*.pdb')
    mpdbs.sort()
    RMSDs = []
    print 'calculating pairwise RMSD'
    for i in range(len(mpdbs)-1):
        for j in range(i+1, len(mpdbs)):
            mol1, mol2 = parsePDB(mpdbs[i]), parsePDB(mpdbs[j])
            bbAtoms1, bbAtoms2 = mol1.select('backbone').copy(), mol2.select('backbone').copy()
            trans = calcTransformation(bbAtoms2, bbAtoms1)
            bbAtoms2_t = applyTransformation(trans, bbAtoms2)
            rmsd = calcRMSD(bbAtoms1, bbAtoms2_t)
            RMSDs.append(round(rmsd, 3))
    print 'finish calculating RMSD'
    os.chdir(odir)
    # now calculate the average Z-score of all the rmsds of the query
    RMSDs = np.array(RMSDs)
    qRMSD = Analyze.readColumn(matchf, 0, top = topn)
    qRMSD = np.array([float(x) for x in qRMSD])
    meanRMSD, stdRMSD = np.mean(RMSDs), np.std(RMSDs)
    Z_qRMSD = (qRMSD - meanRMSD) / stdRMSD
    return round(np.median(Z_qRMSD), 3)
Пример #2
0
for seqf in seqfs:
	pdbf = General.changeExt( seqf.replace(args.head + '_', ''), 'pdb')
	if not os.path.isfile(pdbf):
		print(pdbf + ' doesn\'t exist!')
		continue

	outf = General.changeExt(pdbf, args.o)

	if args.wgap != None: # specific to gap
		assert args.conR == False, 'wgap and conR cannot be specified simultaneously'
		dirname = General.getBase(pdbf)
		pdbf = args.wgap + '/' + dirname + '/'+ pdbf

	index = PDB.findPositionInPDB(pdbf, resnum, cid)
	aacol = Analyze.readColumn(seqf, index, top = args.uplimit)

	if args.conR: # should contacting residue be constrained?
		conid = General.getBase(seqf).split('_')[-1]
		ccid, cresnum = conid[0], conid[1:]
		cindex = PDB.findPositionInPDB(pdbf, cresnum, ccid)
		cres = PDB.getResByInd(pdbf, ccid, cresnum).getResname()
		cres = PDB.t2s(cres)
		caacol = Analyze.readColumn(seqf, cindex, top = args.uplimit)

	if args.env != None: # environment corrected counts
		envf = General.getBase(seqf.replace(args.head, args.envhead)) + '.' + args.env
		if not os.path.isfile(envf):
			print(envf + ' doesn\'t exist!')
			continue
Пример #3
0
import glob
import Analyze

dscdat = glob.glob('*.dsc50.TR*.dat')
dscdat.sort()

oldpath = '/home/anthill/fzheng/home/designScore/allfeatures_individual/'
for d in dscdat:
	modelname = d.split('.')[-2]
	oldfile = oldpath + 'allfeatures.' + modelname + '.dat'
	newdsc = Analyze.readColumn(d, -1)
	ad = d.replace('dsc', 'abd')
	sd = d.replace('dsc', 'ssc')
	newabd = Analyze.readColumn(ad, -1)
	newssc = Analyze.readColumn(sd, -1)
	newdsc.insert(0, 'new_designscore')
	newabd.insert(0, 'new_abundance')
	newssc.insert(0, 'new_structurescore')
	newfile = modelname + '.dat'
	newfh = open(newfile, 'w')
	array = open(oldfile).readlines()
	for i in range(len(array)):
		outstr = '\t'.join(array[i].split()[0:3] + [newdsc[i], newabd[i], newssc[i]])
#		outstr = '\t'.join(array[i].split()[0:3] + [newdsc[i]])
		newfh.write(outstr + '\n')