示例#1
0
 def __init__(self, filename, database, length):
     self.filename = filename
     self.database = database
     self.length = length
     self.eff = Efficiency()
     self.admin = Admin(self.database)
     self.readlogfile()
示例#2
0
文件: solar.py 项目: gpilania/lamarck
def find_max(pcbm_lumo):
    """Find the maximum efficiency value for a particular
    pcbm_lumo"""

    effcalc = Eff.Efficiency()
    delta = 0.005
    ##    delta = 0.5
    lims = [[-8.5, -4.91], [2.5, 0.5]]
    ranges = [
        numpy.arange(lims[0][0], lims[0][1] + delta, delta),
        numpy.arange(lims[1][0], lims[1][1] - delta, -delta)
    ]

    cutoff = pcbm_lumo + 0.3
    eff = []
    maxe = (0, 0, 0, 0)
    for h**o in ranges[0]:
        lumo = cutoff
        bandgap = lumo - h**o
        e = effcalc.efficiency(lumo,
                               bandgap,
                               False,
                               False,
                               pcbm_lumo=pcbm_lumo)
        eff.append(e)
        if e > maxe[0]:
            maxe = (e, h**o, lumo, bandgap)
    print "Max", maxe

    pylab.plot(ranges[0], eff)
示例#3
0
文件: solar.py 项目: gpilania/lamarck
def plot(pcbm_lumo, colorbar=True):
    """Plot the landscape of efficiency values

    This is imported by other scripts"""

    effcalc = Eff.Efficiency()

    alleff = []
    delta = 0.05

    lims = [[-6.8, -4.7], [2.5, 0.5]]
    lims = [[-11.5, -4.7], [5.3, 0.5]]
    ranges = [
        numpy.arange(lims[0][0], lims[0][1] + delta, delta),
        numpy.arange(lims[1][0], lims[1][1] - delta, -delta)
    ]

    cutoff = pcbm_lumo + 0.3
    contours = [
        0, 0.0001, 0.01, 0.04, 0.1, 0.2, 0.4, 0.7, 1, 2, 3, 4, 5, 6, 8, 10, 12,
        14
    ]
    labels = ["0", "1E-4", "0.01", "0.04", "0.1", "0.2", "0.4", "0.7"]
    labels += ["%d" % x for x in contours if x >= 1.0]
    for h**o in ranges[0]:
        eff = []
        for bandgap in ranges[1]:
            eff.append(
                effcalc.efficiency(h**o,
                                   bandgap,
                                   pcbm_lumo,
                                   verbose=False,
                                   cutoff=False))
        alleff.append(eff)
    cs = pylab.contour(ranges[1], ranges[0], numpy.array(alleff), contours)
    pylab.plot([0, 6.5], [cutoff, -6.5 + cutoff], color="black")
    pylab.xlim(lims[1][1], lims[1][0])
    pylab.ylim(lims[0][0], lims[0][1])
    if colorbar:
        CB = pylab.colorbar(
            cs,
            shrink=0.8,
            extend='both',
            ticks=contours,
            format=matplotlib.ticker.FixedFormatter(labels),
        )
        CB.set_label("% Efficiency")
        return CB
示例#4
0
文件: solar.py 项目: gpilania/lamarck
def plot_heeger(pcbm_lumo, colorbar=True):
    """Plot the landscape of efficiency values

    This is imported by other scripts"""

    cutoff = pcbm_lumo + 0.3
    effcalc = Eff.Efficiency()

    alleff = []
    delta = 0.05

    lims = [[-6.8, -4.7], [2.5, 0.5]]
    lims = [[cutoff + 1.0, cutoff], [1.0, 3.1]]
    ranges = [
        numpy.arange(lims[0][0], lims[0][1] - delta, -delta),
        numpy.arange(lims[1][0], lims[1][1] + delta, delta)
    ]
    print

    contours = [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
    labels = ["0", "0.5"]
    labels += ["%d" % x for x in contours if x >= 1.0]
    for homo_plus_bg in ranges[0]:
        eff = []
        for bandgap in ranges[1]:
            h**o = homo_plus_bg - bandgap
            eff.append(
                effcalc.efficiency(h**o,
                                   bandgap,
                                   pcbm_lumo,
                                   verbose=False,
                                   cutoff=False))
        alleff.append(eff)
    cs = pylab.contour(ranges[1], ranges[0], numpy.array(alleff), contours)
    ##    pylab.plot([0, 6.5], [cutoff, -6.5 + cutoff], color="black")
    pylab.xlim(lims[1][1], lims[1][0])
    pylab.ylim(lims[0][0], lims[0][1])
    if colorbar:
        CB = pylab.colorbar(
            cs,
            shrink=0.8,
            extend='both',
            ticks=contours,
            format=matplotlib.ticker.FixedFormatter(labels),
        )
        CB.set_label("% Efficiency")
        return CB
示例#5
0
def hist():
    """Draw histograms of the distribution of HOMOs and transition Es
    for the dimers vs the tetramers"""

    eff = Efficiency()
    pcbm_lumo = -4.61
    homos = [[], []]
    trans = [[], []]
    for i in range(2):
        db = ['alldimers', 'alltetramers'][i]
        admin = Admin(db)
        x = []
        y = []
        selected = []
        for d in admin.getalldata():
            h**o, tran = getHplusBG(d[2])
            homos[i].append(h**o)
            trans[i].append(tran)
        print np.mean(homos[i]), np.mean(trans[i])
    delta = 0.2
    bins = [np.arange(-10, -5.5, delta), np.arange(1, 7, delta)]

    titles = ['H**O (eV)', 'Lowest energy significant transition (eV)']
    for i in range(2):
        data = [homos, trans][i]
        n, bns = np.histogram(data[1], bins[i])
        c = pylab.bar(bns[:-1], n / float(n.sum()), delta * .4, color="gray")

        for rect in c:
            rect.set_x(rect.get_x() + delta / 2. * 0.8)
        n, bns = np.histogram(data[0], bins[i])
        f = pylab.bar(bns[:-1], n / float(n.sum()), delta * .4, color="k")

        pylab.legend([f[0], c[0]], ["Dimers", "Tetramers"])
        pylab.ylabel("Fraction")
        pylab.xlabel(titles[i])
        pylab.savefig(os.path.join("pictures", "Figure1_hist_%d.png" % i))
        pylab.clf()
示例#6
0
 def initscorefn(self):
     self.efficiency = Efficiency()
     self.efficiency.unittest()
示例#7
0
class GA(object):
    selected_initial_population = ['[CH]/C=C\\1/OCC[C](OCC1)', 'C\\1=C/O/C=C(/O/C=C/O/C=C/Oc2c(cccc2)O1)',
                               'C=C(C)CN', 'C1=C(C=C/C/1=C\\1/C=CC(=C1)N)N',
                                'C1=C(F)C=C/C/1=C\\1/C(=CC(=C1)F)', 'C1=C(N(=O)=O)C=C/C/1=C\\1/C=CC(N(=O)=O)=C1',
                                'C1=C/C(/C(=C1)C#N)=C\\1/C=C(C=C1C#N)', 'C1=C/C(/C(=C1)F)=C\\1/C=C(C=C1F)',
                                 'C1=C/C(/C(=C1)OC)=C\\1/C=C(C=C1OC)', 'C1=C2OCCSC2=C([S@@]1NC)',
                                 'C1=CC(OC)=C(/C/1=C\\1/C=C(OC)C=C1)', 'C1=CC2=C(OC=CO2)/C/1=C/1\\C2=C(OC=CO2)C=C1',
                                  'c1c([n]c2[CH]Sc12)', 'c1c(C(=O)C(F)(F)F)scc1', 'c1c2c(=O)oc(=O)c2c(c2c1c(=O)oc2=O)',
                                   'c1c2c3c4c(c1CC)c(=O)[nH]c(=O)c4cc(CC)c3c(=O)n(c2=O)',
                                   'c1cc2c(C=C/C/2=C\\2/C=Cc3c2cccc3)c(c1)', 'c1cnc(c2c1nsn2)', 'c1nnc(nn1)',
                                    'c1sc(c(c1C(F)(F)F)C(F)(F)F)', 'c1sc(c(c1N(=O)=O)C#N)', 'c1sc(c2c1CCC[C@H]2N(=O)=O)',
                                     'c1sc(S[CH2])c(c1)', 'C1SC[C@@H]2[C@H]1[C](O[C@@H]2[O])NCC',
                                     'N1[C@H]2[C@H](N[C@@H]3[C@@H](CSC3)N2)N(S1)', 'N1[C@H]2CSC[C@H]2N(S1)',
                                     'n1cc2c3c(c1)ccc1c3c(cc2)cn(c1)', 'N1N[C@H]2[C@@H](C)[C@@H]3[C@@H](NSN3)[C@H](C)[C@H]2N1',
                                     'C1=C[C@@H]2[C@H](C1)[C@@H]1[C@@H](C=CC1)C2=C',
                                      'C1=CC=C(S1(=O)=O)', 'c1c(C)cc(c(c1)C)', 'c1c(C#N)sc(OC)c1', 'c1c(ncc2c1non2)',
                                       'c1c2c(=O)n(c(=O)c2cc2c1c(=O)[nH]c2=O)', 'c1cc(C)cc(c1C)', 'c1cc(N(=O)=O)c(cc1)',
                                        'c1ccc2c(c1)C(=O)c1c2ccc(c1)', 'c1cccc2c1N(C)C(=O)[C]2[C]1C(=O)N(C)c2cc(ccc12)',
                                         'c1cscc1C(=O)O', 'c1oc(c(c1C#N)C#N)', 'C1S[C]2[C]3SCC[C@@H]3C(=C(CN)CN)[C@H]2C1',
                                          'C1S[C]2[C]3SCC[C@@H]3C(=O)[C@H]2C1', 'c1sc(c(c1C#N)C#N)', 'c1sc(c(c1O)C#N)',
                                           'c1sc(c2c1[C@H](C(=O)C(F)(F)F)CCC2)', 'c1sc(c2c1C(=O)CC2=O)',
                                            'c1sc(c2c1oc(=S)o2)', 'c1sc(nn1)', 'c1scc2c1C(=C[S@@]2[O])',
                                            'N1C(=O)[C](c2c1cc(cc2)C)[C]1C(=O)N(c2c1ccc(C)c2)', 'N1C(=S)C=C(C1=S)',
                                             'Sc1cscc1S', '[C]1C(=O)Oc2c1cc1c(c2)[C](C(=O)O1)', 'C1=[S]C(=S)C2=C1C=C(S2(=O)=O)',
                                              'C1=C(CC)C(CC)=C(S1(=O)=O)', 'C1=C2C(C(=O)N1)=C(OC2=O)', 'c1c(C(=O)C)scc1',
                                               'c1c(F)cccc1', 'C1C[C@@H]2[C@@H](CC1)N(CC)[C@@H]1[C@@H](S2)CCCC1',
                                                'c1c2c(=O)n(c(=O)c2cc2c1c(=O)sc2=O)', 'c1c2C(=O)OCc2ccc1',
                                                'c1c2c(nccn2)c(c2c1nccn2)', 'c1c2nonc2c(cc1)', 'c1cc(c(cc1)N(c1ccccc1)c1ccccc1)',
                                                 'c1oc(c(c1C#N)C(F)(F)F)', 'C1Oc2cscc2OC(C1=O)', 'c1sc(C(F)(F)F)cc1',
                                                  'c1sc(c2c1[CH][N]2)', 'c1sc(c2c1C(=O)NC2=O)', 'c1sc(c2c1CC(=O)C(=O)C2)',
                                                   'c1sc(c2c1oc(=O)c(=O)s2)', 'C1SC[C@@H]2[C@@]1(S)C[C@H](OC)[C@@H](OC)C2',
                                                    'c1sc2cc(C(=O)O)sc2c1', 'c1scc(N(=O)=O)c1N', 'C1SCN2[C@H]1NCC2',
                                                     'N1[C@]2(SC)CSC[C@H]2NC(=C1)', 'N1[C@@H]2C[C@@H]3NSN[C@@H]3C[C@@H]2N(S1)',
                                                      'N1c2cscc2N(CC(=O)C1)',
                                                       'c1[n]c2c(c(=O)c3c2[n]cc3)c1', 'c1[nH]cc2c1S(=O)(=O)C(=C2)',
                                                        'C1=[S]C(=O)C2=C1C(=O)[S]=C2', 'C1=CC(=C2[C@H]1CCS2)',
                                                         'C1=CC=C(C1=S)', 'c1c([CH2])c(c2c1ccsc2)', 'c1c(C(F)(F)F)sc(C#N)c1',
                                                          'c1c(F)c(F)c(c(c1F)F)', 'c1c(F)sc(F)c1', 'c1c(O)sc2c1[nH]c1c2sc(c1)',
                                                           'c1c(OC)c(cc(c1)OC)', 'c1c2[CH][S]([CH2])[CH]c2c(cc1)',
                                                           'c1c2c(=O)n(c(=O)c2cc2c1c(=O)oc2=O)', 'c1c2c(ncc(C)n2)c(s1)',
                                                            'c1cc2c(=O)n(C)c(=O)c3c2c2c1C1=C4[C@@H](c2cc3)C=CC2=C4[C@@H](C(=O)N(C2=O)C)C(=C1)',
                                                            'c1cc2c(s1)c1c(c(=O)[nH]c2=O)cc(s1)', 'c1coc(N(=O)=O)c1', 'c1oc(c(c1)N(=O)=O)',
                                                            'c1oc2c(c1)C(=O)c1c2sc(c1)', 'C1S[C@@H]2[C@@H]1NCC2',
                                                            'c1sc(C(=O)O)cc1O', 'c1sc(c(c1)C=O)', 'c1sc(c(c1)N(=O)=O)',
                                                            'c1sc(c(c1C#N)C(F)(F)F)', 'c1sc(c2c1C[C@H](N)[C@@H](N(=O)=O)C2)',
                                                            'c1sc(c2c1C=[S]C=C2)', 'c1sc(c2c1nc(OCC)c(CN)n2)',
                                                            'c1sc(c2c1nc1c3ccccc3c3ccccc3c1n2)', 'c1sc(c2c1nc1c3sccc3c3ccsc3c1n2)',
                                                            'c1sc(c2c1oc(N(=O)=O)c2)', 'c1sc(c2c1sc(=O)s2)', 'c1sc(c2c1sc(N(=O)=O)c2)',
                                                            'c1sc(c2c1SCCS2)', 'c1scc2c1C(=[S@@](OC)C(=C2)OC)', 'N(CC)c1ccc(c(c1)C=C)N(CC)',
                                                            'N1CN[C@@H]2[C@H]1N(CN2)', 'N1CN[C@@H]2C[C@H]3[C@@H](C[C@H]12)N(CN3)']

    def __init__(self, admin, length, Nchromos, R, simmatrix, objectivefn,
                 logmessage=""):
        self.admin = admin
        self.N = Nchromos
        self.R = R  # the number of nbrs
        self.simmatrix = simmatrix
        self.monomers = sorted(self.simmatrix.keys())
        self.length = length
        self.objectivefn = objectivefn
        self.gen = 0
        self.initscorefn()
        if logmessage:
            self.log(logmessage)

    def initscorefn(self):
        self.efficiency = Efficiency()
        self.efficiency.unittest()

    def log(self, msg):
        self.admin.log.write(msg + "\n")
        print msg

    def initpop(self):
        self.log("\tInitialising population")
        # Make self.N polymers of length self.length
        dimerunits = []
        for i in range(self.N):
            monos = [random.choice(self.monomers) for j in range(2)]
            # If you want define the initial population (instead of a random set of monomers) use monos variable below
            #monos = [random.choice(self.selected_initial_population) for j in range(2)]
            if monos[1] > monos[0]:  # In alphabetical order
                monos = [monos[1], monos[0]]
            directions = randomdirs(monos, self.length)
            dimerunits.append((monos, directions[0], directions[1]))
        self.pop = dimerunits
        self.logpop(self.pop)

    def initallpop(self, chosen_monos=None):
        if chosen_monos is None:
            chosen_monos = self.monomers
            # If want to select an initial poplulation manually, use the following function and define above in code.
            # chosen_monos = self.selected_initial_popluation
        self.log("\tInitialising population")
        self.N = 0

        dimerunits = createAllCombinations(chosen_monos, self.length)

        self.log("\tTotal size of potential pop is %d" % len(dimerunits))
        newunits = []
        for dimerunit in dimerunits:
            data = self.admin.getdata(polname(dimerunit))
            if not data:
                newunits.append(dimerunit)
        self.pop = newunits
        self.log("\tTotal size of uncalc pop is %d" % len(newunits))
        self.logpop(self.pop)

    def logpop(self, pop):
        self.log("Population of size %d" % len(pop))
        for i, pol in enumerate(pop):
##            self.log("%d: %s %s %s" % (i, pol[0], pol[1], pol[2]))
            self.log(polname(pol))

    def loggjf(self):
        self.log("%d GJF files created" % len(self.gjfs))
        for j, x in enumerate(self.gjforder):
            self.log("GJF %d: polymer numbers %s" % (j, self.gjfs[x]))

    def logfitness(self, pop, text):
        self.log("\t%s population fitness" % text)
        for j, x in enumerate(sorted(pop, key=lambda x: self.getscore(polname(x)), reverse=True)):
            score, logtext = self.getscore(polname(x), log=True)
            if score is not None:
                self.log("%d: %s with %.3f %s" % (j, polname(x), score, logtext))
            else:
                self.log("%d: %s with FAIL" % (j, polname(x)))

    def makeGJF(self, pop, length):
        self.log("\tCreating txt files")
        if not os.path.isdir("gaussian"):
            os.mkdir("gaussian")
        self.gjfs = {}

        for i, x in enumerate(pop):
            if self.getscore(polname(x)) == None:
                self.gjfs.setdefault(polname(x), []).append(i)

                print "showing the pol: %s" % polname(x)

        # Sort the gjfs by molecular weight
        self.gjforder = sorted(self.gjfs.keys(), key=lambda x: molname_to_mol(str(x), self.length).molwt, reverse=True)

        for idx, smi in enumerate(self.gjforder):
            mol = molname_to_mol(smi, length)
            mol.make3D()
            globalopt(mol)

            header = "%nproc=1\n%mem=1GB\n#n ZINDO(NStates=10,Singlets)"
            header_b = "\n"
            #header = "%%nproc=1\n%%mem=1GB\n%%Chk=%s.chk\n#T PM6 OPT"
            #header_b = """
#--Link1--
#%%nproc=1
#%%mem=1GB
#%%Chk=%s.chk
#%%NoSave
## Geom=AllCheck ZINDO(NStates=15,Singlets)
#"""
            gaussian = (header + "\n\n" + smi + "\n"
                + "\n".join(mol.write("gau").replace("0  3\n", "0  1\n").split("\n")[3:])
                + header_b)  # % (idx, idx)
            output = open(os.path.join("gaussian", "%s.gjf" % idx), "w")
            output.write(gaussian)
            output.close()
            print "finished creating %s.gjf" % idx

    def runGaussian(self):
        if len(self.gjfs) > 0:
            # Create gaussian/end.txt to terminate the GA
            if os.path.isfile(os.path.join("gaussian", "end.txt")):
                self.log("\nFound end.txt. Finishing")
                sys.exit(0)

            self.log("\tRunning Gaussian")

            # loop through the gjf input files and run g09
            for i in range(len(self.gjfs)):
                # if there are old .gz files (e.g., previous generation).. remove them
                filename = os.path.join("gaussian", "%d.log.gz" % i)
                if os.path.isfile(filename):
                    os.remove(filename)
                # run g09 as a subprocess
                g09 = subprocess.call("(cd gaussian; g09  %d.gjf  %d.log)" % (i,i), shell=True)

                # run g09 as a subprocess on Frank (work in progress)
                #g09 = subprocess.call("(cd gaussian; runGaussian.sh %d.gjf %d.out)" % (i,i), shell=True)
            gzCmd = subprocess.call("(cd gaussian; gzip -f  *.log)", shell=True)

    def extractcalcdata(self):
        if len(self.gjfs) > 0:
            self.log("\tExtracting data from log files")
            tostore = []
            for j, pname in enumerate(self.gjforder):

                print 'on pname=%s, j=%s' % (pname, j)

                mylogfile = os.path.join("gaussian", "%d.log.gz" % j)
                if not os.path.isfile(mylogfile):
                    continue
                # logfile = ccopen("tmp.out")
                logfile = ccopen(mylogfile)
                logfile.logger.setLevel(logging.ERROR)
                try:
                    data = logfile.parse()
                except AssertionError:
                    continue
                try:
                    # Values rounded to reduce size of output file
                    lumo = round(data.moenergies[0][data.homos[0] + 1], 3)
                    h**o = round(data.moenergies[0][data.homos[0]], 3)
                    etens = [round(x*convert, 3) for x in data.etenergies] # cm-1 to eV
                    etoscs = [round(x, 3) for x in data.etoscs]
                except:
                    continue
                if max(etens) <= 0:
                    continue

                # File stores too much info for large data set, so use other function (below) which saves fewer energies
                #myjson = json.dumps([float(h**o), float(lumo), etens, list(etoscs), list(data.moenergies[0]), int(data.homos[0])])
                #tostore.append((pname, myjson))

                myjson = json.dumps([float(h**o), float(lumo), etens, list(etoscs)])
                tostore.append((pname, myjson))

            for pname, myjson in tostore:
                # get the sequence
                output = get_comb(pname, self.length)
                seq = output[0]
                # chain the .replace(old, new) function to replace id with A, di with B, uq with D, qu with E to make
                # sequences easier to read
                seqSym = seq.replace("(qu)", "A").replace("(uq)", "B").replace("(di)", "D").replace("(id)", "E")
                self.admin.storedata(pname, self.gen, seqSym, myjson)

    def getscore(self, polname, log=False):
        data = self.admin.getdata(polname)
        logtext = ""
        if not data:
            if log:
                return None, None
            else:
                return None
        gen, sequence, myjson = data
        jsondata = json.loads(myjson)
        if len(jsondata) == 4:
            h**o, lumo, etens, etoscs = jsondata
        else:
            h**o, lumo, etens, etoscs, moenergies, homo_idx = jsondata
        scale, trans = besttrans_revised(etens, etoscs)
        if scale < 1.0:
            logtext += "Os=%.1f" % (scale*100,)

        if self.objectivefn == "eff":
            score = scale * self.efficiency.efficiency(h**o, trans, -4.61)
        elif self.objectivefn == "distance":
            penalty = 1.0 - scale
            distance = math.sqrt((h**o - (-5.70)) ** 2 + (trans - 1.39) ** 2)
            score = distance + penalty
            score = -score  # We are finding the maximum
        if log:
            return score, logtext
        else:
            return score

    def makechildren(self, moverandomly=False):
        """
It should be possible for a single monomer to mutate

The mutations should always allow the exploration of local space
"""
        self.gen += 1
        scores = []
        for chromo in self.pop:
            x = polname(chromo)
            scores.append((chromo, self.getscore(x)))

        poolsize = self.N / 5
        pool = []
        for i in range(poolsize):
            tournament = random.sample(scores, 3)
            tournament.sort(reverse=True, key=lambda x:x[1])
            select = tournament[0]
            scores.remove(select)
            pool.append(select[0])

        self.children = []
        while len(self.children) < self.N:
            # Crossover to make two children
            x = copy.deepcopy(random.choice(pool))
            y = copy.deepcopy(random.choice(pool))
            children = [[x[0][0], y[0][1]], [x[0][1], y[0][0]]]
            for child in children:
                newchild = [child[0], child[1]]
                # Mutate backbone
                for i, mon in enumerate(child):
                    if random.random() > 0.25:
                        if moverandomly:
                            newchild[i] = random.choice(self.monomers)
                        else:
                            newchild[i] = random.choice(
                                           self.simmatrix[mon][:self.R])

                if newchild[1] > newchild[0]:  # Alphabetical order
                    newchild = [newchild[1], newchild[0]]

                # Create random dirs
                directions = randomdirs(newchild, self.length)
                fullchild = (newchild, directions[0], directions[1])
                # Don't add a duplicate
                if fullchild not in self.pop + self.children:
                    self.children.append(fullchild)

    def nextgen(self):
        self.pop.sort(key=lambda x: self.getscore(polname(x)),
                      reverse=True)
        self.children.sort(key=lambda x: self.getscore(polname(x)),
                           reverse=True)

        self.pop = self.pop[:self.N/2] + self.children[:self.N/2]
示例#8
0
文件: Utils.py 项目: gpilania/lamarck
 def __init__(self):
     self.eff = effmod.Efficiency()
示例#9
0
class GA(object):
    def __init__(self,
                 admin,
                 length,
                 Nchromos,
                 R,
                 simmatrix,
                 objectivefn,
                 logmessage=""):
        self.admin = admin
        self.N = Nchromos
        self.R = R  # the number of nbrs
        self.simmatrix = simmatrix
        self.monomers = sorted(self.simmatrix.keys())
        self.length = length
        self.objectivefn = objectivefn
        self.gen = 0
        self.initscorefn()
        if logmessage:
            self.log(logmessage)

    def initscorefn(self):
        self.efficiency = Efficiency()
        self.efficiency.unittest()

    def log(self, msg):
        self.admin.log.write(msg + "\n")
        print msg

    def initpop(self):
        self.log("\tInitialising population")
        # Make self.N polymers of length self.length
        dimerunits = []
        for i in range(self.N):
            dimerunit = [[], "", ""]
            monos = [random.choice(self.monomers) for j in range(2)]
            if monos[1] > monos[0]:  # In alphabetical order
                monos = [monos[1], monos[0]]
            directions = randomdirs(monos, self.length)
            dimerunits.append((monos, directions[0], directions[1]))
        self.pop = dimerunits
        self.logpop(self.pop)

    def initallpop(self, chosen_monos=None):
        if chosen_monos is None:
            chosen_monos = self.monomers
        self.log("\tInitialising population")
        self.N = 0

        dimerunits = createAllCombinations(chosen_monos, self.length)

        self.log("\tTotal size of potential pop is %d" % len(dimerunits))
        newunits = []
        for dimerunit in dimerunits:
            data = self.admin.getdata(polname(dimerunit))
            if not data:
                newunits.append(dimerunit)
        self.pop = newunits
        self.log("\tTotal size of uncalc pop is %d" % len(newunits))
        self.logpop(self.pop)

    def logpop(self, pop):
        self.log("Population of size %d" % len(pop))
        for i, pol in enumerate(pop):
            ##            self.log("%d: %s %s %s" % (i, pol[0], pol[1], pol[2]))
            self.log(polname(pol))

    def loggjf(self):
        self.log("%d GJF files created" % len(self.gjfs))
        for j, x in enumerate(self.gjforder):
            self.log("GJF %d: polymer numbers %s" % (j, self.gjfs[x]))

    def logfitness(self, pop, text):
        self.log("\t%s population fitness" % text)
        for j, x in enumerate(
                sorted(pop,
                       key=lambda x: self.getscore(polname(x)),
                       reverse=True)):
            score, logtext = self.getscore(polname(x), log=True)
            if score is not None:
                self.log("%d: %s with %.3f %s" %
                         (j, polname(x), score, logtext))
            else:
                self.log("%d: %s with FAIL" % (j, polname(x)))

    def makeGJF(self, pop):
        self.log("\tCreating txt files")
        if not os.path.isdir("gaussian"):
            os.mkdir("gaussian")
        self.gjfs = {}
        for i, x in enumerate(pop):
            if self.getscore(polname(x)) == None:
                self.gjfs.setdefault(polname(x), []).append(i)

        # Sort the gjfs by molecular weight
        self.gjforder = sorted(
            self.gjfs.keys(),
            key=lambda x: molname_to_mol(str(x), self.length).molwt,
            reverse=True)

        self.loggjf()
        for j, pname in enumerate(self.gjforder):
            output = open(os.path.join("gaussian", "%d.txt" % j), "w")
            output.write(pname)
            output.close()

    def runGaussian(self):
        CPUS_PER_NODE = 4

        if len(self.gjfs) > 0:
            if os.path.isfile(os.path.join("gaussian", "end.txt")):
                self.log("\nFound end.txt. Finishing")
                sys.exit(0)
            self.log("\tRunning Gaussian")
            output = open("tasks", "w")
            for i in range(len(self.gjfs)):
                output.write(
                    'echo -n "%d "; date; cd $SGE_O_WORKDIR; python %s/smi23D.py %d %d; cd gaussian; g09 < %d.gjf > %d.out; gzip %d.out; echo -n "%d "; date; rm -f %d.chk\n'
                    % (i, relpath, i, self.length, i, i, i, i, i))

            output.close()
            template = open("template.sh", "r").read()
            time_per_job = 3  # 16 for 8mers, 11 for 6, 6 for 4, 3 for dimers

            if self.length >= 8 and len(self.gjfs) <= 64:
                # Schedule the long jobs first, and give those slow jobs
                # extra time
                scheme = "largevariation"
            else:
                scheme = "normal"

            if scheme == "largevariation":
                N_parallel_jobs = len(self.gjfs) * 5 / CPUS_PER_NODE
                if N_parallel_jobs == 0:
                    N_parallel_jobs = 1
                mins = time_per_job * 2
                hours = 0
                N_nodes = 1 + (N_parallel_jobs - 1) / CPUS_PER_NODE
            else:  # Normal
                MAX = CPUS_PER_NODE * 8
                if len(self.gjfs) <= MAX:
                    M = 1
                    N_nodes = 1 + (len(self.gjfs) - 1) / CPUS_PER_NODE
                else:
                    M = 1 + (len(self.gjfs) - 1) / MAX
                    N_nodes = MAX / CPUS_PER_NODE
                walltime = int(M * time_per_job)
                mins = walltime % 60
                hours = walltime / 60
            # Workaround for Stokes (3 nodes not allowed)
            if N_nodes == 3:
                N_nodes = 4
            template = template.replace("REPLACENODES", str(N_nodes))
            template = template.replace("REPLACEHOUR", str(hours))
            template = template.replace("REPLACEMIN", str(mins))
            output = open("runwith1.sh", "w")
            print >> output, template
            output.close()
            for i in range(len(self.gjfs)):
                filename = os.path.join("gaussian", "%d.out.gz" % i)
                if os.path.isfile(filename):
                    os.remove(filename)
            qsub = subprocess.Popen(["qsub", "runwith1.sh"],
                                    stdout=subprocess.PIPE)
            stdout = qsub.stdout.read()
            self.log(stdout)
            pid = stdout.split(".")[0]
            stderr = ""
            while not stderr.strip():
                time.sleep(10)
                qstat = subprocess.Popen(["qstat", pid],
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE)
                stdout = qstat.stdout.read()
                stderr = qstat.stderr.read()

    def extractcalcdata(self):
        if len(self.gjfs) > 0:
            self.log("\tExtracting data from log files")
            tostore = []
            for j, pname in enumerate(self.gjforder):
                mylogfile = os.path.join("gaussian", "%d.out.gz" % j)
                if not os.path.isfile(mylogfile):
                    continue
                text = gzip.open(mylogfile, "r").read()
                if text.find(
                        "Excitation energies and oscillator strength") < 0:
                    continue
                lines = iter(text.split("\n"))
                for line in lines:
                    if line.startswith(" #T PM6 OPT"):
                        line = lines.next()
                        line = lines.next()
                        line = lines.next()
                        break
                for line in lines:
                    if line.startswith(" Initial command"): break
                zindofile = list(lines)
                if len(zindofile) == 0:
                    # All the PM6 data is missing
                    continue
                with open("tmp.out", "w") as f:
                    f.write("\n".join(zindofile))
                logfile = ccopen("tmp.out")
                logfile.logger.setLevel(logging.ERROR)
                try:
                    data = logfile.parse()
                except AssertionError:
                    continue
                try:
                    lumo = data.moenergies[0][data.homos[0] + 1]
                    h**o = data.moenergies[0][data.homos[0]]
                    etens = [x * convert
                             for x in data.etenergies]  # cm-1 to eV
                    etoscs = data.etoscs
                except:
                    continue
                if max(etens) <= 0:
                    continue


##                myjson = json.dumps([h**o, lumo, etens, etoscs])
                myjson = json.dumps([
                    h**o, lumo, etens, etoscs, data.moenergies[0],
                    data.homos[0]
                ])
                tostore.append((pname, myjson))

            for pname, myjson in tostore:
                self.admin.storedata(pname, self.gen, myjson)

    def getscore(self, polname, log=False):
        data = self.admin.getdata(polname)
        logtext = ""
        if not data:
            if log:
                return None, None
            else:
                return None
        gen, myjson = data
        jsondata = json.loads(myjson)
        if len(jsondata) == 4:
            h**o, lumo, etens, etoscs = jsondata
        else:
            h**o, lumo, etens, etoscs, moenergies, homo_idx = jsondata
        scale, trans = besttrans(etens, etoscs)
        if scale < 1.0:
            logtext += "Os=%.1f" % (scale * 100, )

        if self.objectivefn == "eff":
            score = scale * self.efficiency.efficiency(h**o, trans, -4.61)
        elif self.objectivefn == "distance":
            penalty = 1.0 - scale
            distance = math.sqrt((h**o - (-5.70))**2 + (trans - 1.39)**2)
            score = distance + penalty
            score = -score  # We are finding the maximum
        if log:
            return score, logtext
        else:
            return score

    def makechildren(self, moverandomly=False):
        """
It should be possible for a single monomer to mutate

The mutations should always allow the exploration of local space
"""
        self.gen += 1
        scores = []
        for chromo in self.pop:
            x = polname(chromo)
            scores.append((chromo, self.getscore(x)))

        poolsize = self.N / 5
        pool = []
        for i in range(poolsize):
            tournament = random.sample(scores, 3)
            tournament.sort(reverse=True, key=lambda x: x[1])
            select = tournament[0]
            scores.remove(select)
            pool.append(select[0])

        self.children = []
        while len(self.children) < self.N:
            # Crossover to make two children
            x = copy.deepcopy(random.choice(pool))
            y = copy.deepcopy(random.choice(pool))
            children = [[x[0][0], y[0][1]], [x[0][1], y[0][0]]]
            for child in children:
                newchild = [child[0], child[1]]
                # Mutate backbone
                for i, mon in enumerate(child):
                    if random.random() > 0.25:
                        if moverandomly:
                            newchild[i] = random.choice(self.monomers)
                        else:
                            newchild[i] = random.choice(
                                self.simmatrix[mon][:self.R])

                if newchild[1] > newchild[0]:  # Alphabetical order
                    newchild = [newchild[1], newchild[0]]

                # Create random dirs
                directions = randomdirs(newchild, self.length)
                fullchild = (newchild, directions[0], directions[1])
                # Don't add a duplicate
                if fullchild not in self.pop + self.children:
                    self.children.append(fullchild)

    def nextgen(self):
        self.pop.sort(key=lambda x: self.getscore(polname(x)), reverse=True)
        self.children.sort(key=lambda x: self.getscore(polname(x)),
                           reverse=True)

        self.pop = self.pop[:self.N / 2] + self.children[:self.N / 2]
示例#10
0
from cclib.parser import Gaussian
from Efficiency import *
import json
import logging

efficient = Efficiency()

h = open('hexamer/hexamersDB.txt', 'r')
homos = {}
exState = {}
exStr = {}
for line in h:
    data = line.split('"')
    smiles = data[1].split('_')[0]
    eData = json.loads(data[5])
    homos[smiles] = float(eData[0])

    eTrans = eData[2]
    eStr = eData[3]

    bestExcitationIdx = 0
    bestExcitationStr = 0.0
    j = 0
    for excitation in eStr:
        if excitation > bestExcitationStr:
            bestExcitationStr = excitation
            bestExcitationIdx = j
        j += 1

    # OK, now bestExcitationIdx has the best index
    exState[smiles] = homos[smiles] + float(eTrans[bestExcitationIdx])
示例#11
0
#!/usr/bin/env python

import sys
from Efficiency import *

efficient = Efficiency()

if (len(sys.argv) == 2):
    for line in open(sys.argv[1]):
        (h**o, bandgap) = line.split()
        print efficient.zindoEff(float(h**o), float(bandgap))
elif (len(sys.argv) > 3):
    print "B3LYP Efficiency: ", efficient.b3lypEff(float(sys.argv[1]),
                                                   float(sys.argv[2]))
else:
    print "ZINDO Efficiency: ", efficient.zindoEff(float(sys.argv[1]),
                                                   float(sys.argv[2]))
示例#12
0
#!/usr/bin/env python

import os
import sys

import SimpleUtils as utils
from Efficiency import *

efficient = Efficiency()

if __name__ == "__main__":

    monodatafile = open("monomer-zindo.txt", 'r')
    homos = {}
    lumos = {}
    for line in monodatafile:
        dataList = line.split(' ')
        if len(dataList) < 5:
            continue
        # (number, smiles, h**o, lumo, oscstrs)
        smiles = dataList[1]
        homos[smiles] = float(dataList[2])
        lumos[smiles] = float(dataList[3])

    dimerdata = open("../dims_and_tets/alldimerDB.txt")
    dimHOMO = {}
    dimTrans = {}
    dimEff = {}
    dimHab = {}
    # skip line with column names
    dimerdata.readline()