def __init__(self, filename, database, length): self.filename = filename self.database = database self.length = length self.eff = Efficiency() self.admin = Admin(self.database) self.readlogfile()
def find_max(pcbm_lumo): """Find the maximum efficiency value for a particular pcbm_lumo""" effcalc = Eff.Efficiency() delta = 0.005 ## delta = 0.5 lims = [[-8.5, -4.91], [2.5, 0.5]] ranges = [ numpy.arange(lims[0][0], lims[0][1] + delta, delta), numpy.arange(lims[1][0], lims[1][1] - delta, -delta) ] cutoff = pcbm_lumo + 0.3 eff = [] maxe = (0, 0, 0, 0) for h**o in ranges[0]: lumo = cutoff bandgap = lumo - h**o e = effcalc.efficiency(lumo, bandgap, False, False, pcbm_lumo=pcbm_lumo) eff.append(e) if e > maxe[0]: maxe = (e, h**o, lumo, bandgap) print "Max", maxe pylab.plot(ranges[0], eff)
def plot(pcbm_lumo, colorbar=True): """Plot the landscape of efficiency values This is imported by other scripts""" effcalc = Eff.Efficiency() alleff = [] delta = 0.05 lims = [[-6.8, -4.7], [2.5, 0.5]] lims = [[-11.5, -4.7], [5.3, 0.5]] ranges = [ numpy.arange(lims[0][0], lims[0][1] + delta, delta), numpy.arange(lims[1][0], lims[1][1] - delta, -delta) ] cutoff = pcbm_lumo + 0.3 contours = [ 0, 0.0001, 0.01, 0.04, 0.1, 0.2, 0.4, 0.7, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14 ] labels = ["0", "1E-4", "0.01", "0.04", "0.1", "0.2", "0.4", "0.7"] labels += ["%d" % x for x in contours if x >= 1.0] for h**o in ranges[0]: eff = [] for bandgap in ranges[1]: eff.append( effcalc.efficiency(h**o, bandgap, pcbm_lumo, verbose=False, cutoff=False)) alleff.append(eff) cs = pylab.contour(ranges[1], ranges[0], numpy.array(alleff), contours) pylab.plot([0, 6.5], [cutoff, -6.5 + cutoff], color="black") pylab.xlim(lims[1][1], lims[1][0]) pylab.ylim(lims[0][0], lims[0][1]) if colorbar: CB = pylab.colorbar( cs, shrink=0.8, extend='both', ticks=contours, format=matplotlib.ticker.FixedFormatter(labels), ) CB.set_label("% Efficiency") return CB
def plot_heeger(pcbm_lumo, colorbar=True): """Plot the landscape of efficiency values This is imported by other scripts""" cutoff = pcbm_lumo + 0.3 effcalc = Eff.Efficiency() alleff = [] delta = 0.05 lims = [[-6.8, -4.7], [2.5, 0.5]] lims = [[cutoff + 1.0, cutoff], [1.0, 3.1]] ranges = [ numpy.arange(lims[0][0], lims[0][1] - delta, -delta), numpy.arange(lims[1][0], lims[1][1] + delta, delta) ] print contours = [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] labels = ["0", "0.5"] labels += ["%d" % x for x in contours if x >= 1.0] for homo_plus_bg in ranges[0]: eff = [] for bandgap in ranges[1]: h**o = homo_plus_bg - bandgap eff.append( effcalc.efficiency(h**o, bandgap, pcbm_lumo, verbose=False, cutoff=False)) alleff.append(eff) cs = pylab.contour(ranges[1], ranges[0], numpy.array(alleff), contours) ## pylab.plot([0, 6.5], [cutoff, -6.5 + cutoff], color="black") pylab.xlim(lims[1][1], lims[1][0]) pylab.ylim(lims[0][0], lims[0][1]) if colorbar: CB = pylab.colorbar( cs, shrink=0.8, extend='both', ticks=contours, format=matplotlib.ticker.FixedFormatter(labels), ) CB.set_label("% Efficiency") return CB
def hist(): """Draw histograms of the distribution of HOMOs and transition Es for the dimers vs the tetramers""" eff = Efficiency() pcbm_lumo = -4.61 homos = [[], []] trans = [[], []] for i in range(2): db = ['alldimers', 'alltetramers'][i] admin = Admin(db) x = [] y = [] selected = [] for d in admin.getalldata(): h**o, tran = getHplusBG(d[2]) homos[i].append(h**o) trans[i].append(tran) print np.mean(homos[i]), np.mean(trans[i]) delta = 0.2 bins = [np.arange(-10, -5.5, delta), np.arange(1, 7, delta)] titles = ['H**O (eV)', 'Lowest energy significant transition (eV)'] for i in range(2): data = [homos, trans][i] n, bns = np.histogram(data[1], bins[i]) c = pylab.bar(bns[:-1], n / float(n.sum()), delta * .4, color="gray") for rect in c: rect.set_x(rect.get_x() + delta / 2. * 0.8) n, bns = np.histogram(data[0], bins[i]) f = pylab.bar(bns[:-1], n / float(n.sum()), delta * .4, color="k") pylab.legend([f[0], c[0]], ["Dimers", "Tetramers"]) pylab.ylabel("Fraction") pylab.xlabel(titles[i]) pylab.savefig(os.path.join("pictures", "Figure1_hist_%d.png" % i)) pylab.clf()
def initscorefn(self): self.efficiency = Efficiency() self.efficiency.unittest()
class GA(object): selected_initial_population = ['[CH]/C=C\\1/OCC[C](OCC1)', 'C\\1=C/O/C=C(/O/C=C/O/C=C/Oc2c(cccc2)O1)', 'C=C(C)CN', 'C1=C(C=C/C/1=C\\1/C=CC(=C1)N)N', 'C1=C(F)C=C/C/1=C\\1/C(=CC(=C1)F)', 'C1=C(N(=O)=O)C=C/C/1=C\\1/C=CC(N(=O)=O)=C1', 'C1=C/C(/C(=C1)C#N)=C\\1/C=C(C=C1C#N)', 'C1=C/C(/C(=C1)F)=C\\1/C=C(C=C1F)', 'C1=C/C(/C(=C1)OC)=C\\1/C=C(C=C1OC)', 'C1=C2OCCSC2=C([S@@]1NC)', 'C1=CC(OC)=C(/C/1=C\\1/C=C(OC)C=C1)', 'C1=CC2=C(OC=CO2)/C/1=C/1\\C2=C(OC=CO2)C=C1', 'c1c([n]c2[CH]Sc12)', 'c1c(C(=O)C(F)(F)F)scc1', 'c1c2c(=O)oc(=O)c2c(c2c1c(=O)oc2=O)', 'c1c2c3c4c(c1CC)c(=O)[nH]c(=O)c4cc(CC)c3c(=O)n(c2=O)', 'c1cc2c(C=C/C/2=C\\2/C=Cc3c2cccc3)c(c1)', 'c1cnc(c2c1nsn2)', 'c1nnc(nn1)', 'c1sc(c(c1C(F)(F)F)C(F)(F)F)', 'c1sc(c(c1N(=O)=O)C#N)', 'c1sc(c2c1CCC[C@H]2N(=O)=O)', 'c1sc(S[CH2])c(c1)', 'C1SC[C@@H]2[C@H]1[C](O[C@@H]2[O])NCC', 'N1[C@H]2[C@H](N[C@@H]3[C@@H](CSC3)N2)N(S1)', 'N1[C@H]2CSC[C@H]2N(S1)', 'n1cc2c3c(c1)ccc1c3c(cc2)cn(c1)', 'N1N[C@H]2[C@@H](C)[C@@H]3[C@@H](NSN3)[C@H](C)[C@H]2N1', 'C1=C[C@@H]2[C@H](C1)[C@@H]1[C@@H](C=CC1)C2=C', 'C1=CC=C(S1(=O)=O)', 'c1c(C)cc(c(c1)C)', 'c1c(C#N)sc(OC)c1', 'c1c(ncc2c1non2)', 'c1c2c(=O)n(c(=O)c2cc2c1c(=O)[nH]c2=O)', 'c1cc(C)cc(c1C)', 'c1cc(N(=O)=O)c(cc1)', 'c1ccc2c(c1)C(=O)c1c2ccc(c1)', 'c1cccc2c1N(C)C(=O)[C]2[C]1C(=O)N(C)c2cc(ccc12)', 'c1cscc1C(=O)O', 'c1oc(c(c1C#N)C#N)', 'C1S[C]2[C]3SCC[C@@H]3C(=C(CN)CN)[C@H]2C1', 'C1S[C]2[C]3SCC[C@@H]3C(=O)[C@H]2C1', 'c1sc(c(c1C#N)C#N)', 'c1sc(c(c1O)C#N)', 'c1sc(c2c1[C@H](C(=O)C(F)(F)F)CCC2)', 'c1sc(c2c1C(=O)CC2=O)', 'c1sc(c2c1oc(=S)o2)', 'c1sc(nn1)', 'c1scc2c1C(=C[S@@]2[O])', 'N1C(=O)[C](c2c1cc(cc2)C)[C]1C(=O)N(c2c1ccc(C)c2)', 'N1C(=S)C=C(C1=S)', 'Sc1cscc1S', '[C]1C(=O)Oc2c1cc1c(c2)[C](C(=O)O1)', 'C1=[S]C(=S)C2=C1C=C(S2(=O)=O)', 'C1=C(CC)C(CC)=C(S1(=O)=O)', 'C1=C2C(C(=O)N1)=C(OC2=O)', 'c1c(C(=O)C)scc1', 'c1c(F)cccc1', 'C1C[C@@H]2[C@@H](CC1)N(CC)[C@@H]1[C@@H](S2)CCCC1', 'c1c2c(=O)n(c(=O)c2cc2c1c(=O)sc2=O)', 'c1c2C(=O)OCc2ccc1', 'c1c2c(nccn2)c(c2c1nccn2)', 'c1c2nonc2c(cc1)', 'c1cc(c(cc1)N(c1ccccc1)c1ccccc1)', 'c1oc(c(c1C#N)C(F)(F)F)', 'C1Oc2cscc2OC(C1=O)', 'c1sc(C(F)(F)F)cc1', 'c1sc(c2c1[CH][N]2)', 'c1sc(c2c1C(=O)NC2=O)', 'c1sc(c2c1CC(=O)C(=O)C2)', 'c1sc(c2c1oc(=O)c(=O)s2)', 'C1SC[C@@H]2[C@@]1(S)C[C@H](OC)[C@@H](OC)C2', 'c1sc2cc(C(=O)O)sc2c1', 'c1scc(N(=O)=O)c1N', 'C1SCN2[C@H]1NCC2', 'N1[C@]2(SC)CSC[C@H]2NC(=C1)', 'N1[C@@H]2C[C@@H]3NSN[C@@H]3C[C@@H]2N(S1)', 'N1c2cscc2N(CC(=O)C1)', 'c1[n]c2c(c(=O)c3c2[n]cc3)c1', 'c1[nH]cc2c1S(=O)(=O)C(=C2)', 'C1=[S]C(=O)C2=C1C(=O)[S]=C2', 'C1=CC(=C2[C@H]1CCS2)', 'C1=CC=C(C1=S)', 'c1c([CH2])c(c2c1ccsc2)', 'c1c(C(F)(F)F)sc(C#N)c1', 'c1c(F)c(F)c(c(c1F)F)', 'c1c(F)sc(F)c1', 'c1c(O)sc2c1[nH]c1c2sc(c1)', 'c1c(OC)c(cc(c1)OC)', 'c1c2[CH][S]([CH2])[CH]c2c(cc1)', 'c1c2c(=O)n(c(=O)c2cc2c1c(=O)oc2=O)', 'c1c2c(ncc(C)n2)c(s1)', 'c1cc2c(=O)n(C)c(=O)c3c2c2c1C1=C4[C@@H](c2cc3)C=CC2=C4[C@@H](C(=O)N(C2=O)C)C(=C1)', 'c1cc2c(s1)c1c(c(=O)[nH]c2=O)cc(s1)', 'c1coc(N(=O)=O)c1', 'c1oc(c(c1)N(=O)=O)', 'c1oc2c(c1)C(=O)c1c2sc(c1)', 'C1S[C@@H]2[C@@H]1NCC2', 'c1sc(C(=O)O)cc1O', 'c1sc(c(c1)C=O)', 'c1sc(c(c1)N(=O)=O)', 'c1sc(c(c1C#N)C(F)(F)F)', 'c1sc(c2c1C[C@H](N)[C@@H](N(=O)=O)C2)', 'c1sc(c2c1C=[S]C=C2)', 'c1sc(c2c1nc(OCC)c(CN)n2)', 'c1sc(c2c1nc1c3ccccc3c3ccccc3c1n2)', 'c1sc(c2c1nc1c3sccc3c3ccsc3c1n2)', 'c1sc(c2c1oc(N(=O)=O)c2)', 'c1sc(c2c1sc(=O)s2)', 'c1sc(c2c1sc(N(=O)=O)c2)', 'c1sc(c2c1SCCS2)', 'c1scc2c1C(=[S@@](OC)C(=C2)OC)', 'N(CC)c1ccc(c(c1)C=C)N(CC)', 'N1CN[C@@H]2[C@H]1N(CN2)', 'N1CN[C@@H]2C[C@H]3[C@@H](C[C@H]12)N(CN3)'] def __init__(self, admin, length, Nchromos, R, simmatrix, objectivefn, logmessage=""): self.admin = admin self.N = Nchromos self.R = R # the number of nbrs self.simmatrix = simmatrix self.monomers = sorted(self.simmatrix.keys()) self.length = length self.objectivefn = objectivefn self.gen = 0 self.initscorefn() if logmessage: self.log(logmessage) def initscorefn(self): self.efficiency = Efficiency() self.efficiency.unittest() def log(self, msg): self.admin.log.write(msg + "\n") print msg def initpop(self): self.log("\tInitialising population") # Make self.N polymers of length self.length dimerunits = [] for i in range(self.N): monos = [random.choice(self.monomers) for j in range(2)] # If you want define the initial population (instead of a random set of monomers) use monos variable below #monos = [random.choice(self.selected_initial_population) for j in range(2)] if monos[1] > monos[0]: # In alphabetical order monos = [monos[1], monos[0]] directions = randomdirs(monos, self.length) dimerunits.append((monos, directions[0], directions[1])) self.pop = dimerunits self.logpop(self.pop) def initallpop(self, chosen_monos=None): if chosen_monos is None: chosen_monos = self.monomers # If want to select an initial poplulation manually, use the following function and define above in code. # chosen_monos = self.selected_initial_popluation self.log("\tInitialising population") self.N = 0 dimerunits = createAllCombinations(chosen_monos, self.length) self.log("\tTotal size of potential pop is %d" % len(dimerunits)) newunits = [] for dimerunit in dimerunits: data = self.admin.getdata(polname(dimerunit)) if not data: newunits.append(dimerunit) self.pop = newunits self.log("\tTotal size of uncalc pop is %d" % len(newunits)) self.logpop(self.pop) def logpop(self, pop): self.log("Population of size %d" % len(pop)) for i, pol in enumerate(pop): ## self.log("%d: %s %s %s" % (i, pol[0], pol[1], pol[2])) self.log(polname(pol)) def loggjf(self): self.log("%d GJF files created" % len(self.gjfs)) for j, x in enumerate(self.gjforder): self.log("GJF %d: polymer numbers %s" % (j, self.gjfs[x])) def logfitness(self, pop, text): self.log("\t%s population fitness" % text) for j, x in enumerate(sorted(pop, key=lambda x: self.getscore(polname(x)), reverse=True)): score, logtext = self.getscore(polname(x), log=True) if score is not None: self.log("%d: %s with %.3f %s" % (j, polname(x), score, logtext)) else: self.log("%d: %s with FAIL" % (j, polname(x))) def makeGJF(self, pop, length): self.log("\tCreating txt files") if not os.path.isdir("gaussian"): os.mkdir("gaussian") self.gjfs = {} for i, x in enumerate(pop): if self.getscore(polname(x)) == None: self.gjfs.setdefault(polname(x), []).append(i) print "showing the pol: %s" % polname(x) # Sort the gjfs by molecular weight self.gjforder = sorted(self.gjfs.keys(), key=lambda x: molname_to_mol(str(x), self.length).molwt, reverse=True) for idx, smi in enumerate(self.gjforder): mol = molname_to_mol(smi, length) mol.make3D() globalopt(mol) header = "%nproc=1\n%mem=1GB\n#n ZINDO(NStates=10,Singlets)" header_b = "\n" #header = "%%nproc=1\n%%mem=1GB\n%%Chk=%s.chk\n#T PM6 OPT" #header_b = """ #--Link1-- #%%nproc=1 #%%mem=1GB #%%Chk=%s.chk #%%NoSave ## Geom=AllCheck ZINDO(NStates=15,Singlets) #""" gaussian = (header + "\n\n" + smi + "\n" + "\n".join(mol.write("gau").replace("0 3\n", "0 1\n").split("\n")[3:]) + header_b) # % (idx, idx) output = open(os.path.join("gaussian", "%s.gjf" % idx), "w") output.write(gaussian) output.close() print "finished creating %s.gjf" % idx def runGaussian(self): if len(self.gjfs) > 0: # Create gaussian/end.txt to terminate the GA if os.path.isfile(os.path.join("gaussian", "end.txt")): self.log("\nFound end.txt. Finishing") sys.exit(0) self.log("\tRunning Gaussian") # loop through the gjf input files and run g09 for i in range(len(self.gjfs)): # if there are old .gz files (e.g., previous generation).. remove them filename = os.path.join("gaussian", "%d.log.gz" % i) if os.path.isfile(filename): os.remove(filename) # run g09 as a subprocess g09 = subprocess.call("(cd gaussian; g09 %d.gjf %d.log)" % (i,i), shell=True) # run g09 as a subprocess on Frank (work in progress) #g09 = subprocess.call("(cd gaussian; runGaussian.sh %d.gjf %d.out)" % (i,i), shell=True) gzCmd = subprocess.call("(cd gaussian; gzip -f *.log)", shell=True) def extractcalcdata(self): if len(self.gjfs) > 0: self.log("\tExtracting data from log files") tostore = [] for j, pname in enumerate(self.gjforder): print 'on pname=%s, j=%s' % (pname, j) mylogfile = os.path.join("gaussian", "%d.log.gz" % j) if not os.path.isfile(mylogfile): continue # logfile = ccopen("tmp.out") logfile = ccopen(mylogfile) logfile.logger.setLevel(logging.ERROR) try: data = logfile.parse() except AssertionError: continue try: # Values rounded to reduce size of output file lumo = round(data.moenergies[0][data.homos[0] + 1], 3) h**o = round(data.moenergies[0][data.homos[0]], 3) etens = [round(x*convert, 3) for x in data.etenergies] # cm-1 to eV etoscs = [round(x, 3) for x in data.etoscs] except: continue if max(etens) <= 0: continue # File stores too much info for large data set, so use other function (below) which saves fewer energies #myjson = json.dumps([float(h**o), float(lumo), etens, list(etoscs), list(data.moenergies[0]), int(data.homos[0])]) #tostore.append((pname, myjson)) myjson = json.dumps([float(h**o), float(lumo), etens, list(etoscs)]) tostore.append((pname, myjson)) for pname, myjson in tostore: # get the sequence output = get_comb(pname, self.length) seq = output[0] # chain the .replace(old, new) function to replace id with A, di with B, uq with D, qu with E to make # sequences easier to read seqSym = seq.replace("(qu)", "A").replace("(uq)", "B").replace("(di)", "D").replace("(id)", "E") self.admin.storedata(pname, self.gen, seqSym, myjson) def getscore(self, polname, log=False): data = self.admin.getdata(polname) logtext = "" if not data: if log: return None, None else: return None gen, sequence, myjson = data jsondata = json.loads(myjson) if len(jsondata) == 4: h**o, lumo, etens, etoscs = jsondata else: h**o, lumo, etens, etoscs, moenergies, homo_idx = jsondata scale, trans = besttrans_revised(etens, etoscs) if scale < 1.0: logtext += "Os=%.1f" % (scale*100,) if self.objectivefn == "eff": score = scale * self.efficiency.efficiency(h**o, trans, -4.61) elif self.objectivefn == "distance": penalty = 1.0 - scale distance = math.sqrt((h**o - (-5.70)) ** 2 + (trans - 1.39) ** 2) score = distance + penalty score = -score # We are finding the maximum if log: return score, logtext else: return score def makechildren(self, moverandomly=False): """ It should be possible for a single monomer to mutate The mutations should always allow the exploration of local space """ self.gen += 1 scores = [] for chromo in self.pop: x = polname(chromo) scores.append((chromo, self.getscore(x))) poolsize = self.N / 5 pool = [] for i in range(poolsize): tournament = random.sample(scores, 3) tournament.sort(reverse=True, key=lambda x:x[1]) select = tournament[0] scores.remove(select) pool.append(select[0]) self.children = [] while len(self.children) < self.N: # Crossover to make two children x = copy.deepcopy(random.choice(pool)) y = copy.deepcopy(random.choice(pool)) children = [[x[0][0], y[0][1]], [x[0][1], y[0][0]]] for child in children: newchild = [child[0], child[1]] # Mutate backbone for i, mon in enumerate(child): if random.random() > 0.25: if moverandomly: newchild[i] = random.choice(self.monomers) else: newchild[i] = random.choice( self.simmatrix[mon][:self.R]) if newchild[1] > newchild[0]: # Alphabetical order newchild = [newchild[1], newchild[0]] # Create random dirs directions = randomdirs(newchild, self.length) fullchild = (newchild, directions[0], directions[1]) # Don't add a duplicate if fullchild not in self.pop + self.children: self.children.append(fullchild) def nextgen(self): self.pop.sort(key=lambda x: self.getscore(polname(x)), reverse=True) self.children.sort(key=lambda x: self.getscore(polname(x)), reverse=True) self.pop = self.pop[:self.N/2] + self.children[:self.N/2]
def __init__(self): self.eff = effmod.Efficiency()
class GA(object): def __init__(self, admin, length, Nchromos, R, simmatrix, objectivefn, logmessage=""): self.admin = admin self.N = Nchromos self.R = R # the number of nbrs self.simmatrix = simmatrix self.monomers = sorted(self.simmatrix.keys()) self.length = length self.objectivefn = objectivefn self.gen = 0 self.initscorefn() if logmessage: self.log(logmessage) def initscorefn(self): self.efficiency = Efficiency() self.efficiency.unittest() def log(self, msg): self.admin.log.write(msg + "\n") print msg def initpop(self): self.log("\tInitialising population") # Make self.N polymers of length self.length dimerunits = [] for i in range(self.N): dimerunit = [[], "", ""] monos = [random.choice(self.monomers) for j in range(2)] if monos[1] > monos[0]: # In alphabetical order monos = [monos[1], monos[0]] directions = randomdirs(monos, self.length) dimerunits.append((monos, directions[0], directions[1])) self.pop = dimerunits self.logpop(self.pop) def initallpop(self, chosen_monos=None): if chosen_monos is None: chosen_monos = self.monomers self.log("\tInitialising population") self.N = 0 dimerunits = createAllCombinations(chosen_monos, self.length) self.log("\tTotal size of potential pop is %d" % len(dimerunits)) newunits = [] for dimerunit in dimerunits: data = self.admin.getdata(polname(dimerunit)) if not data: newunits.append(dimerunit) self.pop = newunits self.log("\tTotal size of uncalc pop is %d" % len(newunits)) self.logpop(self.pop) def logpop(self, pop): self.log("Population of size %d" % len(pop)) for i, pol in enumerate(pop): ## self.log("%d: %s %s %s" % (i, pol[0], pol[1], pol[2])) self.log(polname(pol)) def loggjf(self): self.log("%d GJF files created" % len(self.gjfs)) for j, x in enumerate(self.gjforder): self.log("GJF %d: polymer numbers %s" % (j, self.gjfs[x])) def logfitness(self, pop, text): self.log("\t%s population fitness" % text) for j, x in enumerate( sorted(pop, key=lambda x: self.getscore(polname(x)), reverse=True)): score, logtext = self.getscore(polname(x), log=True) if score is not None: self.log("%d: %s with %.3f %s" % (j, polname(x), score, logtext)) else: self.log("%d: %s with FAIL" % (j, polname(x))) def makeGJF(self, pop): self.log("\tCreating txt files") if not os.path.isdir("gaussian"): os.mkdir("gaussian") self.gjfs = {} for i, x in enumerate(pop): if self.getscore(polname(x)) == None: self.gjfs.setdefault(polname(x), []).append(i) # Sort the gjfs by molecular weight self.gjforder = sorted( self.gjfs.keys(), key=lambda x: molname_to_mol(str(x), self.length).molwt, reverse=True) self.loggjf() for j, pname in enumerate(self.gjforder): output = open(os.path.join("gaussian", "%d.txt" % j), "w") output.write(pname) output.close() def runGaussian(self): CPUS_PER_NODE = 4 if len(self.gjfs) > 0: if os.path.isfile(os.path.join("gaussian", "end.txt")): self.log("\nFound end.txt. Finishing") sys.exit(0) self.log("\tRunning Gaussian") output = open("tasks", "w") for i in range(len(self.gjfs)): output.write( 'echo -n "%d "; date; cd $SGE_O_WORKDIR; python %s/smi23D.py %d %d; cd gaussian; g09 < %d.gjf > %d.out; gzip %d.out; echo -n "%d "; date; rm -f %d.chk\n' % (i, relpath, i, self.length, i, i, i, i, i)) output.close() template = open("template.sh", "r").read() time_per_job = 3 # 16 for 8mers, 11 for 6, 6 for 4, 3 for dimers if self.length >= 8 and len(self.gjfs) <= 64: # Schedule the long jobs first, and give those slow jobs # extra time scheme = "largevariation" else: scheme = "normal" if scheme == "largevariation": N_parallel_jobs = len(self.gjfs) * 5 / CPUS_PER_NODE if N_parallel_jobs == 0: N_parallel_jobs = 1 mins = time_per_job * 2 hours = 0 N_nodes = 1 + (N_parallel_jobs - 1) / CPUS_PER_NODE else: # Normal MAX = CPUS_PER_NODE * 8 if len(self.gjfs) <= MAX: M = 1 N_nodes = 1 + (len(self.gjfs) - 1) / CPUS_PER_NODE else: M = 1 + (len(self.gjfs) - 1) / MAX N_nodes = MAX / CPUS_PER_NODE walltime = int(M * time_per_job) mins = walltime % 60 hours = walltime / 60 # Workaround for Stokes (3 nodes not allowed) if N_nodes == 3: N_nodes = 4 template = template.replace("REPLACENODES", str(N_nodes)) template = template.replace("REPLACEHOUR", str(hours)) template = template.replace("REPLACEMIN", str(mins)) output = open("runwith1.sh", "w") print >> output, template output.close() for i in range(len(self.gjfs)): filename = os.path.join("gaussian", "%d.out.gz" % i) if os.path.isfile(filename): os.remove(filename) qsub = subprocess.Popen(["qsub", "runwith1.sh"], stdout=subprocess.PIPE) stdout = qsub.stdout.read() self.log(stdout) pid = stdout.split(".")[0] stderr = "" while not stderr.strip(): time.sleep(10) qstat = subprocess.Popen(["qstat", pid], stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout = qstat.stdout.read() stderr = qstat.stderr.read() def extractcalcdata(self): if len(self.gjfs) > 0: self.log("\tExtracting data from log files") tostore = [] for j, pname in enumerate(self.gjforder): mylogfile = os.path.join("gaussian", "%d.out.gz" % j) if not os.path.isfile(mylogfile): continue text = gzip.open(mylogfile, "r").read() if text.find( "Excitation energies and oscillator strength") < 0: continue lines = iter(text.split("\n")) for line in lines: if line.startswith(" #T PM6 OPT"): line = lines.next() line = lines.next() line = lines.next() break for line in lines: if line.startswith(" Initial command"): break zindofile = list(lines) if len(zindofile) == 0: # All the PM6 data is missing continue with open("tmp.out", "w") as f: f.write("\n".join(zindofile)) logfile = ccopen("tmp.out") logfile.logger.setLevel(logging.ERROR) try: data = logfile.parse() except AssertionError: continue try: lumo = data.moenergies[0][data.homos[0] + 1] h**o = data.moenergies[0][data.homos[0]] etens = [x * convert for x in data.etenergies] # cm-1 to eV etoscs = data.etoscs except: continue if max(etens) <= 0: continue ## myjson = json.dumps([h**o, lumo, etens, etoscs]) myjson = json.dumps([ h**o, lumo, etens, etoscs, data.moenergies[0], data.homos[0] ]) tostore.append((pname, myjson)) for pname, myjson in tostore: self.admin.storedata(pname, self.gen, myjson) def getscore(self, polname, log=False): data = self.admin.getdata(polname) logtext = "" if not data: if log: return None, None else: return None gen, myjson = data jsondata = json.loads(myjson) if len(jsondata) == 4: h**o, lumo, etens, etoscs = jsondata else: h**o, lumo, etens, etoscs, moenergies, homo_idx = jsondata scale, trans = besttrans(etens, etoscs) if scale < 1.0: logtext += "Os=%.1f" % (scale * 100, ) if self.objectivefn == "eff": score = scale * self.efficiency.efficiency(h**o, trans, -4.61) elif self.objectivefn == "distance": penalty = 1.0 - scale distance = math.sqrt((h**o - (-5.70))**2 + (trans - 1.39)**2) score = distance + penalty score = -score # We are finding the maximum if log: return score, logtext else: return score def makechildren(self, moverandomly=False): """ It should be possible for a single monomer to mutate The mutations should always allow the exploration of local space """ self.gen += 1 scores = [] for chromo in self.pop: x = polname(chromo) scores.append((chromo, self.getscore(x))) poolsize = self.N / 5 pool = [] for i in range(poolsize): tournament = random.sample(scores, 3) tournament.sort(reverse=True, key=lambda x: x[1]) select = tournament[0] scores.remove(select) pool.append(select[0]) self.children = [] while len(self.children) < self.N: # Crossover to make two children x = copy.deepcopy(random.choice(pool)) y = copy.deepcopy(random.choice(pool)) children = [[x[0][0], y[0][1]], [x[0][1], y[0][0]]] for child in children: newchild = [child[0], child[1]] # Mutate backbone for i, mon in enumerate(child): if random.random() > 0.25: if moverandomly: newchild[i] = random.choice(self.monomers) else: newchild[i] = random.choice( self.simmatrix[mon][:self.R]) if newchild[1] > newchild[0]: # Alphabetical order newchild = [newchild[1], newchild[0]] # Create random dirs directions = randomdirs(newchild, self.length) fullchild = (newchild, directions[0], directions[1]) # Don't add a duplicate if fullchild not in self.pop + self.children: self.children.append(fullchild) def nextgen(self): self.pop.sort(key=lambda x: self.getscore(polname(x)), reverse=True) self.children.sort(key=lambda x: self.getscore(polname(x)), reverse=True) self.pop = self.pop[:self.N / 2] + self.children[:self.N / 2]
from cclib.parser import Gaussian from Efficiency import * import json import logging efficient = Efficiency() h = open('hexamer/hexamersDB.txt', 'r') homos = {} exState = {} exStr = {} for line in h: data = line.split('"') smiles = data[1].split('_')[0] eData = json.loads(data[5]) homos[smiles] = float(eData[0]) eTrans = eData[2] eStr = eData[3] bestExcitationIdx = 0 bestExcitationStr = 0.0 j = 0 for excitation in eStr: if excitation > bestExcitationStr: bestExcitationStr = excitation bestExcitationIdx = j j += 1 # OK, now bestExcitationIdx has the best index exState[smiles] = homos[smiles] + float(eTrans[bestExcitationIdx])
#!/usr/bin/env python import sys from Efficiency import * efficient = Efficiency() if (len(sys.argv) == 2): for line in open(sys.argv[1]): (h**o, bandgap) = line.split() print efficient.zindoEff(float(h**o), float(bandgap)) elif (len(sys.argv) > 3): print "B3LYP Efficiency: ", efficient.b3lypEff(float(sys.argv[1]), float(sys.argv[2])) else: print "ZINDO Efficiency: ", efficient.zindoEff(float(sys.argv[1]), float(sys.argv[2]))
#!/usr/bin/env python import os import sys import SimpleUtils as utils from Efficiency import * efficient = Efficiency() if __name__ == "__main__": monodatafile = open("monomer-zindo.txt", 'r') homos = {} lumos = {} for line in monodatafile: dataList = line.split(' ') if len(dataList) < 5: continue # (number, smiles, h**o, lumo, oscstrs) smiles = dataList[1] homos[smiles] = float(dataList[2]) lumos[smiles] = float(dataList[3]) dimerdata = open("../dims_and_tets/alldimerDB.txt") dimHOMO = {} dimTrans = {} dimEff = {} dimHab = {} # skip line with column names dimerdata.readline()