def resh(self): """ plot the residues minimum distance graph. """ filenm = self.args.pdb if self.load_file(filenm): resl = self.residst() resx = np.arange(1, len(resl) + 1) L = np.column_stack((resx, resl)) np.savetxt("gbest.txt", L, fmt="%03d, %02.9f", header="residue, distance (angstrom)") logger.info( "Final gbest residue min-distance profile: gbest.txt ") L = np.loadtxt("gbest.txt", dtype=[('resn', int), ('dest', float)], delimiter=",") L.sort(order=['dest']) np.savetxt("gbest_sorted.txt", L, fmt="%03d, %02.9f", header="residue, distance (angstrom)") logger.info( "Sorted by the distance of each residue: gbest_sorted.txt " )
def protation(self): s = self.args.steps step = 360.0 / s sum = 1 for i in range(s): for j in range(s): #save state self.cmd.create("mov", "protein", self.cmd.get_state(), sum) logger.debug( "[Current state ({})]Saved new state (count: {}), after rotate x" .format(self.cmd.get_state(), i * s + j + 1)) logger.debug( "[protation] for loop i: {}, j: {}, ia: {}, ja: {}".format( i, j, i * step, j * step)) logger.debug("{:=^70}".format("next round")) self.protate("x", step) sum += 1 logger.debug("{:=^70}".format("Y AXIS")) self.protate("y", step) logger.debug("{:=^70}".format("N TURN")) self.cmd.ending() logger.debug("[protation] Goes to the last state ({}).".format( self.cmd.get_state())) self.cmd.create("final", "protein", self.cmd.get_state()) logger.debug("Saved the final structure.") self.savefile(pdb=True) logger.info( "[protation] Saved file (count: {}), rotation sampling is done". format(self.svcount))
def main(self): # TODO: assume all files exist. # checking... # self.args.proteinf # self.args.surfacef # self.args.topolf logger.info("Starting the main function...")
def face2surface(self, resi): """ rotate the protein let the residue number resi face to the surface. input argument resi should be a list which contains at least one element. """ #ASSUMED the list of residue (resi) is valid self.cmd.center("protein") proteinc = np.array(self.cmd.get_position()) xi = [1, 0, 0] yi = [0, 1, 0] #TODO: check resi in range #TODO: should project the vector to x-z and y-z plane self.cmd.center("protein and resi {}".format("+".join( str(x) for x in resi))) residuec = np.array(self.cmd.get_position()) rvt = residuec - proteinc cosx = np.dot(rvt, xi) / (np.linalg.norm(rvt) * np.linalg.norm(xi)) cosy = np.dot(rvt, yi) / (np.linalg.norm(rvt) * np.linalg.norm(yi)) degx = np.degrees(np.arccos(cosx)) degy = np.degrees(np.arccos(cosy)) logger.info("Rotate the residue group to face to surface.") logger.info("degx: {}, degy: {}".format(degx, degy)) self.protate("y", -90 - degx) self.protate("x", -90 - degy) self.cmd.create("protein_bak", "protein") logger.debug( "backup the initial protein position after face to surface") #fpath = os.path.join(self.args.outdir, "bak_init.pdb") #self.cmd.save(fpath, "protein surface") #logger.debug("backup the initial orientation pdb file: {}".format(fpath)) #Compute the radius #cosine = adjacent over hypotenuse hypoten = 0 for i in resi: self.cmd.center("protein and resi {}".format(str(i))) ric = np.array(self.cmd.get_position()) length = np.linalg.norm(ric - proteinc) if length > hypoten: hypoten = length adjacent = np.linalg.norm(rvt) cosTheta = adjacent / (hypoten + 5 ) #hard code! 5 is the adsorption site cut-off #It should even works if only one residue in the list Theta = np.degrees(np.arccos(cosTheta)) logger.debug("Adjacent({}) over hypoten({}) +5".format( adjacent, hypoten)) logger.debug("Computed cosTheta: {}, and Theta: {}".format( cosTheta, Theta)) return Theta
def load_top(self): logger.info("loading dihedral angles from gromacs top file.") f = open(self.args.topolf) logger.info("opened file: " + self.args.topolf) m = "no" #mode, true when goes into dihedrals section logger.info("reading dihedrals ...") anglen = 0 for l in f.readlines(): if len(l) > 0 and l[0] == ";": continue #skip all comments if l.strip() == "[ dihedrals ]": m = "dh" #this is dihedrals section if m == "dh" and l.strip() == "": m = "no" #ending by empty line if m == "dh": r = l.split() #split heading and ending space chs logger.debug("line: " + ", ".join(r)) if len(r) >= 4 and r[4] == "1": #we need funct 1 only. #filter out all backbone atoms is_bkbon = 0 for i in r[:4]: logger.debug("get_model protein and id " + i) atoms = self.cmd.get_model("protein and id " + i) for idx, a in enumerate(atoms.atom): logger.debug( "({})~ [ID: {}] chain {} res {} ({}) atom {} ({})" .format(idx, i, a.chain, a.resn, a.resi, a.name, a.index)) if not is_bkbon: self.topol.append(r[:4]) #we only need non-backbone anglen += 1 logger.debug("record was just added: " + " ".join(map(str, self.topol[-1]))) logger.info("finished reading top !!") random.shuffle(self.topol) #TODO: randomly insert on the fly.
def gtha(self, atoms): d = {} for at in atoms.atom: logger.info("ATOM DEFINITION: "+at.chain+" "\ +at.resn+" "\ +str(at.resi)+" "\ +str(at.name)+" "\ +str(at.index)+" "\ +str(at.b)+" "\ +str(at.coord[0])+" "\ +str(at.coord[1])+" "\ +str(at.coord[2])) d[at.resn] = at.resi return d
def savefile(self, diry=None, pdb=False): self.jsdb["confn"] = 0 #TODO may need to clear the db? self.jsdb["confd"] = "" self.jsdb["confs"] = [] if not diry: diry = "." if not os.path.isdir(diry): os.mkdir(diry) self.cmd.save("{}/system{:05}.pse".format(diry, self.svcount)) logger.info("Saved state file file.") if pdb: try: os.mkdir("{}/conf{:05}".format(diry, self.svcount)) logger.debug("Creating dir: conf{:05}".format(self.svcount)) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir( "{}/conf{:05}".format(diry, self.svcount)): logger.debug("Already exist, pass.") pass else: logger.warning("Can not create directory, skip...") return finally: for i in range(self.cmd.count_states("mov")): self.cmd.create("system", "mov", i + 1, 1) self.cmd.order("system", location="top") self.cmd.save( "{}/conf{:05}/system{:05}.pdb".format( diry, self.svcount, i), "system surface") logger.debug( "Saved system{:05}.pdb (state: {}) under directory: conf{:05}." .format(i, i + 1, self.svcount)) pdbfs = {} pdbfs["xi"] = i % self.args.steps pdbfs["yi"] = i / self.args.steps pdbfs["id"] = "system{:05}".format(i) self.jsdb["confs"].append(pdbfs) logger.debug("append ({}) confs, xi: {} yi: {}".format( pdbfs["id"], pdbfs["xi"], pdbfs["yi"])) self.jsdb["confn"] = self.cmd.count_states("mov") self.jsdb["steps"] = self.args.steps self.jsdb["confd"] = "conf{:05}".format(self.svcount) logger.debug( "Saved tmp data, {} number of confs under {} directory.". format(self.jsdb["confn"], self.jsdb["confd"])) self.svcount += 1
def scoring(self): logger.info("Now we call gromacs to calculate the score.") for con in self.jsdb["confs"]: f = con["id"] + ".pdb" logger.debug("processing file: {}".format(f)) fn = os.path.join(self.jsdb["confd"], f) logger.debug("scoringone the file: {}".format(fn)) r = self.scoringone(fn) try: con["stepN"] = r["stepN"] con["coul"] = r["coul"] con["ljsr"] = r["ljsr"] con["file"] = r["file"] logger.debug("updated the configuration db, {}:{}".format( con["coul"], con["ljsr"])) except: logger.warning("scoringone return incorrect data!!!") self.savejdb()
def __init__(self): self.parser = argparse.ArgumentParser( description="This is simpleANS.py") self.parser.add_argument("--pdb", help="set the input protein pdb filename.", required=False, default="gbest.pdb") self.parser.add_argument("--jdb", help="the filename of json db file.", required=False, default="db.json") self.args = self.parser.parse_args() # pymol launching import pymol pymol.pymol_argv = ['pymol', '-qc'] pymol.finish_launching() self.cmd = pymol.cmd self.cmd.set("retain_order", 1) self.cmd.set("pdb_retain_ids", 1) logger.info( "Starting to analyze the lowest energy orientation and search trajectory " )
def initcmd(self): logger.info("Initialized command line arguments") self.parser = argparse.ArgumentParser( description="This is PSO testing") self.parser.add_argument( "--outdir", help="set the output configuration directory.", required=False, default="pso_conf") self.parser.add_argument("--jsdbf", help="set the json db file path.", default="db.json") self.parser.add_argument("--proteinf", help="set the input protein pdb filename.", default="protein.pdb", metavar="protein.pdb") self.parser.add_argument("--surfacef", help="set the input surface pdb filename.", default="surface.pdb", metavar="surface.pdb") self.parser.add_argument("--n", help="set the number of birds.", default=200, required=False, type=int) self.parser.add_argument("--r", help="set the total iteration number.", default=10, required=False, type=int) self.parser.add_argument("--w", help="set the weight for updating velocity.", default=0.721, required=False, type=float) self.parser.add_argument("--c1", help="set the parameter C1.", default=1.193, required=False, type=float) self.parser.add_argument("--c2", help="set the parameter C2.", default=1.193, required=False, type=float) self.parser.add_argument("--maxitr", help="set the maximum number of iteration.", default=0, required=False, type=int) self.parser.add_argument( "--emdir", help="set the directory for EM, energy minimization using Gromacs.", default="EM", required=False, type=str) self.parser.add_argument( "--keep-pdb", help="keep all of the pdf files generated during the searching.", default=False, action="store_true", required=False) self.initpg = self.parser.add_mutually_exclusive_group() self.initpg.add_argument( "--resi", help="searching only the prefered residue surface", type=int, nargs="+") self.initpg.add_argument("--offset", help="set the initial searching orientation", nargs=6, type=float) self.initpg.add_argument( "--init", help="use the exactly same molecule conformation as input", action="store_true", default=False) self.args = self.parser.parse_args() logger.debug("args: {}".format(self.args))
def __init__(self): super(simplePSO, self).__init__() self.measure = measurem(self.cmd) logger.info("Initialized simplePSO object ")
def plot(self): '''output the db.json to csv''' if self.args.no_plot: logger.info("skipped the plot progress...") return 0 try: a = self.jsdb["confs"] steps = self.jsdb["steps"] mindst = self.jsdb["mindst"] except KeyError: logger.error( "Can not find the confs/steps from the json db file. It might be corrupted." ) return 1 count = 0 x = [] y = [] z = [] xt = [] yt = [] zt = [] ang = 360.0 / steps lx = [] ly = [] ld = [] minres = "UNKNOW" mineng = 7777777 mincnt = 0 maxeng = -7777777 maxres = "UNKNOW" minxra = 0 minyra = 0 logger.debug("preparing X Y Z") for i in a: try: enegy = float(i["coul"]) + float(i["ljsr"]) resnm = i["id"] xi = int(i["xi"]) yi = int(i["yi"]) except (ValueError, KeyError): enegy = 7777777 resnm = "UNKNOW" xi = 0 yi = 0 if enegy < mineng: mineng = enegy minres = resnm mincnt = count minxra = xi minyra = yi logger.debug("Found a lower energy residue: {}@{}".format( minres, mineng)) if enegy > maxeng: maxeng = enegy logger.debug("Found a higher energy residue: {}@{}".format( maxres, maxeng)) xti = float(xi * ang - self.args.init_xr) yti = float(yi * ang - self.args.init_yr) xt.append(xti) yt.append(yti) zt.append(enegy) lx.append(count) ly.append(enegy) ld.append(resnm) count += 1 if (count % steps) == 0: x.append(xt) xt = [] y.append(yt) yt = [] z.append(zt) zt = [] x = np.array(x) y = np.array(y) z = np.array(z) logger.debug("get X, Y and Z, then plot the graph.") figa = plt.figure() cmap = plt.cm.get_cmap("RdBu_r") if self.args.minlv != None and self.args.maxlv != None: a = self.args.minlv b = self.args.maxlv i = ( b - a ) / self.ctlvs #ASSUMED: the maxlv is always bigger than minlv levels = range(a, b, i) logger.debug("The contour map levels: {}".format(levels)) else: levels = None logger.debug("Use the default contour map levels.") plt.plot(112, -65, 'ko') #the PSO ans #plot the minimum location plt.plot(float(minxra * ang - self.args.init_xr), float(minyra * ang - self.args.init_yr), 'kx') Da = plt.contourf(x, y, z, cmap=cmap, levels=levels, norm=mpl.colors.SymLogNorm(011)) plt.title(u"contour diagram\ndistance={}Å".format(mindst)) plt.xlabel("X rotation angle") plt.ylabel("Y rotation angle") cbar = plt.colorbar(Da) cbar.ax.set_ylabel("energy level") plt.savefig("diagram_0a.pdf") logger.debug("plot contour diagram and save as pdf file.") figb = plt.figure() plt.title(u"energy line\ndistance={}Å".format(mindst)) plt.xlabel("Iteration Number") plt.ylabel("Energy Value") Db = plt.plot(lx, ly, 'k') plt.plot(mincnt, mineng, 'bo') plt.plot([mincnt, mincnt * 1.1], [mineng, mineng], 'k') plt.text(mincnt * 1.13, mineng, "id: {}\nen: {}".format(minres, mineng), verticalalignment="center", horizontalalignment="left") logger.debug("plot energy line only.") if self.args.minlv != None: plt.ylim(ymin=self.args.minlv) logger.debug("set the y-axis minimum range.") if self.args.maxlv != None: plt.ylim(ymax=self.args.maxlv) logger.debug("set the y-axis maximum range.") plt.savefig("diagram_0k.pdf") ly = np.array(ly) ld = np.array(ld) L = np.column_stack((ly, ld)) np.savetxt("energy.txt.gz", L, delimiter=" ", fmt="%11s %11s") logger.debug("plot energy line diagram and save as pdf file.") fige = plt.figure() plt.title(u"normalized energy line\ndistance={}Å".format(mindst)) plt.ylabel("Energy Value") plt.xlabel("residues") plt.axis("off") plt.grid("on") plt.xticks([]) plt.yticks([]) sly = np.sort(ly) sry = sly[::-1] nly = (sry - mineng) / (maxeng - mineng) De = plt.plot(range(len(nly)), nly) plt.text(0, 0, "id: {}@{}".format(minres, mineng)) logger.debug("plot energy histogram.") plt.savefig("diagram_0e.pdf") logger.debug("plot energy histogram diagram and saved as pdf file.") figh = plt.figure() plt.title(u"normalized energy line\ndistance={}Å".format(mindst)) Dh = plt.hist(ly, 100) plt.xlabel("The lowest configuration is id: {}@eng: {}".format( minres, mineng)) plt.savefig("diagram_0h.pdf") logger.debug("plot another histogram diagram and saved as pdf file.") if self.args.no_plot_re: logger.info("skipped the residues plot progress...") return 0 logger.debug( "plot residues configuration graph, go through all configurations..." ) self.cmd.load("system00003.pse") for con in a: cid = con["id"] try: nid = re.findall('\d+', cid)[0] nid = int(nid) except: logger.warning("function plot - can not find digit from cid.") nid = 0 logger.debug("find the digit: {} from cid.".format(nid)) cen = float(con["coul"]) + float(con["ljsr"]) logger.debug("processing conf: {}, the energy value is: {}".format( cid, cen)) logger.debug("Now create the system state for processing...") self.cmd.create("system", "mov", nid + 1, 1) atoms = self.cmd.get_model("system") resds = atoms.get_residues() xr = [] # x axis: residues id yr = [] # y axis: mini distance mindt = [999999.99] * len(resds) surfc = self.surfaceh() szv = surfc["stop"] for atom in atoms.atom: rid = int(atom.resi) - 1 azv = atom.coord[2] dst = azv - szv if dst < mindt[rid]: mindt[rid] = dst #save the graph under jsdb["confd"] directory figs = plt.figure() Ds = plt.plot(range(1, len(mindt)+1), mindt, 'r+', \ range(1, len(mindt)+1), mindt, 'k') plt.title( u"residues configuraions diagram\ndistance={}Å; energy={}kj". format(self.jsdb["mindst"], cen)) plt.savefig(os.path.join(self.jsdb["confd"], cid + ".pdf")) logger.debug("plot a residues diagram and saved as pdf.")
def sampling(self): r = random.random() logger.info("the random number is :" + str(r)) self.cmd.do('print "Hajimaru yo ~♪"') self.load_file() # Load surface object and protein object logger.debug( "SECRET thing, rotate X/Y slightly... {}/{} degrees...".format( self.args.init_xr, self.args.init_yr)) self.protate("x", self.args.init_xr) self.protate("y", self.args.init_yr) logger.info( "ONE more thing, translate X/Y {}/{} and rotate Z {}".format( self.args.init_xt, self.args.init_yt, self.args.init_zr)) self.protate("z", self.args.init_zr) self.ptransl("x", self.args.init_xt) self.ptransl("y", self.args.init_yt) self.ptransl("z", self.args.init_height) logger.info( "FINALLY, The minimal Z distance after adjustment is : {}".format( self.jsdb["mindst"])) self.savefile() logger.info( "Saved file (count: {}), after rotation and translation".format( self.svcount)) # Ready to start sampling r = 0 # count the number of runs c = 1 # when c is true, continue. while r < self.args.stepd and c != 0: self.protation() #rotate the protein c = 0 #now we only run once protation r += 1 if c != 0: self.savefile(pdb=True) logger.info("Saved file (count: {}), final configuration.".format( self.svcount)) else: logger.info( "The final configuration should be the last count ({}).". format(self.svcount)) self.jsdb["mindst"] = self.mindst(state=0, z=True) logger.debug("updated the mini distance Z: {}".format( self.jsdb["mindst"])) self.jsdb["svcount"] = self.svcount with open(self.args.jsdbf, "w") as openfile: json.dump(self.jsdb, openfile) logger.info("Saved the jsdb file, after sampling all confs!")
def load_file(self): logger.debug("loading " + self.args.proteinf) self.cmd.load(self.args.proteinf, "protein") logger.debug("loading " + self.args.surfacef) self.cmd.load(self.args.surfacef, "surface") logger.info("loaded protein and surface pdb files.")
def savejdb(self): #ASSUMED: it should have jsdbf argument. with open(self.args.jsdbf, "w") as openfile: json.dump(self.jsdb, openfile) logger.info("Saved the jsdb file: {}.".format(self.args.jsdbf))
def energyP(self, target=-1): """ Just plot the gbest's energy values and location as function of iterations Its design can be imporved, it mixed up tow tasks in this function. Locate the target bird's values and find the gbest bird. I think these should be two separated tasks. The current version is try to save time, find the gbest bird with its values on-the-fly """ try: target = int(target) except ValueError: logger.error("Please gives the target ID, should be integer!") return 1 if self.loaddb(self.args.jdb) != 0: logger.error("Please check the db file existed or not!") return 1 try: birds = self.jsdb["birds"] except KeyError: logger.error("jsdb does not have birds!") return 1 if not isinstance(birds, list): logger.error("there are no birds in it!") return 1 lasti = 0 I = [] E = [] X = [] Y = [] Z = [] A = [] B = [] C = [] P = [] EE = [] maxe = -999999999 mine = +999999999 Pnp = np.array([]) laste = 0 lastxr = lastyr = lastzr = 0 lastxt = lastyt = lastzt = 0 for bird in birds: try: n = int(bird["bird"]) i = int(bird["iteration"]) e = float(bird["energy"]) * -1 g = bool(bird["gbest"]) v = bird["velocity"] p = bird["position"] xr = float(p[0]) yr = float(p[1]) zr = float(p[2]) xt = float(p[3]) yt = float(p[4]) zt = float(p[5]) except (TypeError, IndexError, ValueError, KeyError): logger.error("There are alien in the birds!") return 1 if i == 0: P.append(e) if i == 1: Pnp = np.array(P) if P[n] > e: P[n] = e if mine > e: mine = e if maxe < e: maxe = e if (target == -1 and g) or (target != -1 and target == n): laste = e #will case ERROR if this statement never runs lastxr = xr lastyr = yr lastzr = zr lastxt = xt lastyt = yt lastzt = zt if (target != -1 and target == n) or (target == -1 and i > lasti): if target == -1: #fixed incorrect iteration number for non-gbest target ino = lasti #for gbest, the data is added after the iteration else: ino = i #for non-gbest, the data is added on-the-fly I.append(ino) E.append(laste) A.append(lastxr) B.append(lastyr) C.append(lastzr) X.append(lastxt) Y.append(lastyt) Z.append(lastzt) lasti = i maxe = -999999999 mine = +999999999 if i == 0: #TODO: can get EE for non-gbest target EE.append(0) #because the data is added on-the-fly elif len(Pnp) > 0: #it's OK if added after the iteration EE.append(Pnp.std()) #gbest must be added after the iteration #because there are more than one gbest records in one iteration if target == -1: I.append(lasti) E.append(laste) A.append(lastxr) B.append(lastyr) C.append(lastzr) X.append(lastxt) Y.append(lastyt) Z.append(lastzt) lasti = i maxe = -999999999 mine = +999999999 if len(Pnp) > 0: EE.append(Pnp.std()) I = np.array(I) E = np.array(E) X = np.array(X) Y = np.array(Y) Z = np.array(Z) A = np.array(A) B = np.array(B) C = np.array(C) EE = np.array(EE) #save all the numbers into txt EN = np.arange(1, len(E) + 1) ES = np.column_stack((EN, E)) np.savetxt("gbest_energy.txt", ES, fmt="%03d, %2.9e", header="iteration, ProtPOS score (kJ/mol)") logger.info( "Gbest energy evolution: gbest_energy.txt") EV = np.column_stack((EN, A, B, C, X, Y, Z)) np.savetxt("gbest_vector.txt", EV, fmt="%03d, %2.9e, %2.9e, %2.9e, %2.9e, %2.9e, %2.9e", \ header="iteration, (orientation:) rotation-X, rotation-Y, rotation-Z, translation-X, translation-Y, translation-Z") logger.info( "Gbest orientation evolution: gbest_vector.txt")
def printoutcluster(self): """ example output: Assume homogeneous surface, performing clustering analysis based on the residue min-distance profiles: === Sorted by cluster size === Cluster Size Average E Median E Lowest E Highest E Min-distance profile 01 35 -842.84335 -855.28589 -1002.338455 highest energy give the path to the profile of the cluster plot 02 10 -788.97523 -806.48693 -1015.307678 highest energy give the path to the profile of the cluster plot ... ==== Cluster 01 ==== # ProtPOS Score Predicted PDB 1 -1002.338455 ./200/trun-PSO-072112-test03/gbest.pdb 2 - 800.3949830 ./250/trun-PSO-072112-test03/gbest.pdb 3 - 700.3949830 ./210/trun-PSO-072112-test03/gbest.pdb 4 - 600.3949830 ./220/trun-PSO-072112-test03/gbest.pdb 5 - 500.3949830 ./240/trun-PSO-072112-test03/gbest.pdb ... Contacting Residues Within 5 Angstrom from Surface: ResID Freq % Avg Coulomb Avg LJ 128 35 100.00 -123.232312 -232.929392 <-- i.e. occurrence frequency in all cluster members, the percentage, average coulomb (of all members) and LJ energy 20 25 71.43 -123.232312 -232.929392 <-- =25/35*100 = 71.42857... round to 2 decimal places 18 10 28.57 -123.232312 -232.929392 <-- =10/35*100 = 28.571428... round to 2 decimal places ==== Cluster 02 ==== ... """ logger.info("Assume homogeneous surface, performing clustering analysis based on the residue min-distance profiles:") #logger.info("=== Sorted by cluster size ===") logger.info("=== Clusters summary ===\n") labels = ["Cluster id", "Size", "Average E", "Median E", "Lowest E", "Highest E", "Min-distance profile"] labelp = ["{:<12s}"] + ["{:^7s}"] + ["{:^12s}"] * 4 + ["{}"] labelw = ["{:<12s}"] + ["{:^7d}"] + ["{:^12.3f}"] * 4 + ["{}"] outstr="" for i,j in zip(labelp, labels): outstr+=(i.format(j)) logger.info(outstr) #for c in self.clusters: for c in self.cs_eid: cs = self.clusters[c] enl = np.array(cs["englt"]) csl = [cs["coden"], enl.size, np.average(enl), np.median(enl), enl.min(), enl.max(), cs["fpath"]] outstr = "" for i,j in zip(labelw, csl): outstr += i.format(j) logger.info(outstr) logger.info("\nEnergy (E): in kJ/mol") logger.info("noise: not grouped into any cluster\n") for c in self.cs_eid: cs = self.clusters[c] logger.info("\n=== Cluster {} ===\n".format(cs["coden"])) englt = cs["englt"] csize = len(englt) * 1.0 gbest = cs["gbest"] #this is gbest id gbest_pdb = [re.sub("\.txt", ".pdb", self.labels[x]) for x in gbest] cs_list = zip(englt, gbest_pdb) cs_sorted = sorted(cs_list, key=lambda x: x[0], reverse=False) idslt = range(1, len(englt)+1) logger.info("{:<2s} {:^15s} {}".format("#", "ProtPOS Score", "Predicted PDB")) #for i,j,k in zip(idslt, englt, gbest_pdb): for i,j in zip(idslt, cs_sorted): #ASSUMED j has two elements logger.info("{:<2d} {:^15.5f} {}".format(i, j[0], j[1])) logger.info("\nContacting Residues Within 5 Angstrom from Surface:") logger.info(",".join(["{:.0f}".format(x) for x in cs["adsit"]])) #logger.info("Common adsorption site: {}".format(str(cs["adsit"]))) #logger.info("Contacting Residues Within 5 Angstrom from Surface:") logger.info("\nResID Freq % ") cc = Counter(cs["adslt"]) cc_sorted = sorted(cc, key=lambda x: cc[x], reverse=True) for i in cc_sorted: logger.info(" {:03.0f} {:>3d} {:06.2f}%".format(i, cc[i], cc[i]/csize*100))