def alternativeAbundance(matchf, pds, topn = 50): MASTER = '/home/anthill/fzheng/home/scripts/termanal_updating' # generate the original structures of topn hits # pds = General.changeExt(pdb, 'pds') # cmd = [MASTER + '/createPDS', '--type', 'query', '--pdb', pdb, '--pds', pds] # cmd = ' '.join(cmd) # os.system(cmd) cmd = [MASTER + '/master', '--query', pds, '--matchIn', matchf, '--structOut', General.getBase(pds) + 'tmp', '--outType', 'match', '--bbRMSD', '--topN', str(topn)] cmd = ' '.join(cmd) os.system(cmd) # for these N structures, calculate RMSD between any two. should be O(N^2) odir = os.getcwd() ndir = General.getBase(pds) + 'tmp' os.chdir(ndir) mpdbs = glob.glob('*.pdb') mpdbs.sort() RMSDs = [] print 'calculating pairwise RMSD' for i in range(len(mpdbs)-1): for j in range(i+1, len(mpdbs)): mol1, mol2 = parsePDB(mpdbs[i]), parsePDB(mpdbs[j]) bbAtoms1, bbAtoms2 = mol1.select('backbone').copy(), mol2.select('backbone').copy() trans = calcTransformation(bbAtoms2, bbAtoms1) bbAtoms2_t = applyTransformation(trans, bbAtoms2) rmsd = calcRMSD(bbAtoms1, bbAtoms2_t) RMSDs.append(round(rmsd, 3)) print 'finish calculating RMSD' os.chdir(odir) # now calculate the average Z-score of all the rmsds of the query RMSDs = np.array(RMSDs) qRMSD = Analyze.readColumn(matchf, 0, top = topn) qRMSD = np.array([float(x) for x in qRMSD]) meanRMSD, stdRMSD = np.mean(RMSDs), np.std(RMSDs) Z_qRMSD = (qRMSD - meanRMSD) / stdRMSD return round(np.median(Z_qRMSD), 3)
for seqf in seqfs: pdbf = General.changeExt( seqf.replace(args.head + '_', ''), 'pdb') if not os.path.isfile(pdbf): print(pdbf + ' doesn\'t exist!') continue outf = General.changeExt(pdbf, args.o) if args.wgap != None: # specific to gap assert args.conR == False, 'wgap and conR cannot be specified simultaneously' dirname = General.getBase(pdbf) pdbf = args.wgap + '/' + dirname + '/'+ pdbf index = PDB.findPositionInPDB(pdbf, resnum, cid) aacol = Analyze.readColumn(seqf, index, top = args.uplimit) if args.conR: # should contacting residue be constrained? conid = General.getBase(seqf).split('_')[-1] ccid, cresnum = conid[0], conid[1:] cindex = PDB.findPositionInPDB(pdbf, cresnum, ccid) cres = PDB.getResByInd(pdbf, ccid, cresnum).getResname() cres = PDB.t2s(cres) caacol = Analyze.readColumn(seqf, cindex, top = args.uplimit) if args.env != None: # environment corrected counts envf = General.getBase(seqf.replace(args.head, args.envhead)) + '.' + args.env if not os.path.isfile(envf): print(envf + ' doesn\'t exist!') continue
import glob import Analyze dscdat = glob.glob('*.dsc50.TR*.dat') dscdat.sort() oldpath = '/home/anthill/fzheng/home/designScore/allfeatures_individual/' for d in dscdat: modelname = d.split('.')[-2] oldfile = oldpath + 'allfeatures.' + modelname + '.dat' newdsc = Analyze.readColumn(d, -1) ad = d.replace('dsc', 'abd') sd = d.replace('dsc', 'ssc') newabd = Analyze.readColumn(ad, -1) newssc = Analyze.readColumn(sd, -1) newdsc.insert(0, 'new_designscore') newabd.insert(0, 'new_abundance') newssc.insert(0, 'new_structurescore') newfile = modelname + '.dat' newfh = open(newfile, 'w') array = open(oldfile).readlines() for i in range(len(array)): outstr = '\t'.join(array[i].split()[0:3] + [newdsc[i], newabd[i], newssc[i]]) # outstr = '\t'.join(array[i].split()[0:3] + [newdsc[i]]) newfh.write(outstr + '\n')