Python select示例，ase.db.select Python示例

示例#1

0

显示文件

def get_structure(formula, prototype=None, base_dir="./", c=None, **filters):
    db_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                           "../c2db.db")
    # Serial version, only on rank 0
    candidates = {}
    # if world.rank == 0:
    db = ase.db.connect(db_file)
    if prototype is None:
        res = db.select(formula=formula, **filters)
    else:
        res = db.select(formula=formula, prototype=prototype)
    for mol in res:
        symbol = mol.formula
        pos = mol.positions
        cell = mol.cell
        pbc = mol.pbc
        # Change distance
        if (c is not None) and (isinstance(c, (float, int))):
            cell.setflags(write=True)
            cell[-1][-1] = c
            pbc = (True, True, True)  # Use full periodic
        name = "{}-{}".format(symbol, mol.prototype)
        # name = os.path.join(os.path.abspath(base_dir),
        # "{}-{}.traj".format(symbol, mol.prototype))
        atoms = Atoms(symbol, positions=pos, cell=cell, pbc=pbc)
        candidates[name] = atoms

        if (c is not None) and (isinstance(c, (float, int))):
            atoms.center()  # center the atoms, although not really needed
    return candidates

示例#2

0

显示文件

def analyze(filename, tag='results'):
    energies = defaultdict(list)
    mintimes = defaultdict(lambda: 999999)
    formulas = []
    db = ase.db.connect(filename)
    for row in db.select(sort='formula'):
        if row.formula not in formulas:
            formulas.append(row.formula)
        energies[row.formula].append(row.get('energy', inf))
    emin = {formula: min(energies[formula]) for formula in energies}

    data = defaultdict(list)
    for row in db.select(sort='formula'):
        if row.get('energy', inf) - emin[row.formula] < 0.01:
            t = row.t
            if row.n < 100:
                nsteps = row.n
                mintimes[row.formula] = min(mintimes[row.formula], t)
            else:
                nsteps = 9999
                t = inf
        else:
            nsteps = 9999
            t = inf
        data[row.optimizer].append((nsteps, t))

    print(formulas)

    D = sorted(data.items(), key=lambda x: sum(y[0] for y in x[1]))
    with open(tag + '-iterations.csv', 'w') as f:
        print('optimizer,' + ','.join(formulas), file=f)
        for o, d in D:
            print('{:18},{}'.format(
                o, ','.join('{:3}'.format(x[0]) if x[0] < 100 else '   '
                            for x in d)),
                  file=f)

    data = {
        opt: [(n, t / mintimes[f]) for (n, t), f in zip(x, formulas)]
        for opt, x in data.items()
    }
    D = sorted(data.items(), key=lambda x: sum(min(y[1], 999) for y in x[1]))
    with open(tag + '-time.csv', 'w') as f:
        print('optimizer,' + ','.join(formulas), file=f)
        for o, d in D:
            print('{:18},{}'.format(
                o,
                ','.join('{:8.1f}'.format(x[1]) if x[0] < 100 else '        '
                         for x in d)),
                  file=f)

示例#3

0

显示文件

文件： db2folder.py 项目： mhoffman/ctp_tools

def main(db_filename, folder_name):
    db = ase.db.connect(db_filename)

    mkdir_p(folder_name)
    for row in db.select():
        if not os.path.exists(os.path.join(folder_name, 'publication.txt')):
            with open(os.path.join(folder_name, 'publication.txt'),
                      'w') as outfile:
                data = publication_data_from_row(row)
                json.dump(data, outfile)

        atoms = row.toatoms()
        dft_code = row.key_value_pairs.get('dft_code', '')
        dft_functional = row.key_value_pairs.get('dft_functional', '')
        reaction = row.key_value_pairs.get('reaction', '')
        substrate = row.key_value_pairs.get('substrate', '')
        facet = row.key_value_pairs.get('facet', '').strip('()')
        adsorbate = row.key_value_pairs.get('adsorbate', '')

        out_dirname = "{folder_name}/{dft_code}/{dft_functional}/{reaction}/{substrate}/{facet}".format(
            **locals())
        out_dirname = out_dirname.replace('/None', '')
        print(out_dirname)
        out_trajname = "{out_dirname}/{adsorbate}.traj".format(**locals())

        mkdir_p(out_dirname)
        ase.io.write(out_trajname, atoms)

示例#4

0

显示文件

文件： find_volumes.py 项目： davidkleiven/GPAWTutorial

def main():
    db = ase.db.connect("ce_hydrostatic.db")
    volumes = []
    concs = []
    for row in db.select(converged=1):
        atoms = db.get_atoms(id=row.id)
        volumes.append(volume(atoms))
        concs.append(mg_conc(atoms))
    lattice_params = [fcc_lattice_parameter_from_volume_primitive_cell(V,64) for V in volumes]

    fname = "almg_lattice_parameter.csv" # From J. L. Murray, The Al-Mg system, 1982
    data = np.loadtxt( fname, delimiter=",")

    mg_conc_exp = data[:,0]
    lattice_param_exp = data[:,1]*10
    slope, interscept, r_value, p_value, stderr = linregress( concs, lattice_params )
    print (slope,interscept)
    x = np.linspace(0.0,0.6,10)
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(x,interscept+slope*x)
    ax.plot( concs, lattice_params, 'o', label="DFT", mfc="none" )
    ax.plot( mg_conc_exp, lattice_param_exp, 'x', label="Exp")
    ax.legend(loc="best", labelspacing=0.05, frameon=False)
    ax.set_xlabel("Mg concentration")
    ax.set_ylabel("FCC lattice parameter")
    ax.spines["right"].set_visible(False)
    ax.spines["top"].set_visible(False)
    plt.show()

示例#5

0

显示文件

def choose_bulk(bulk_database, n_elems):
    '''
    Chooses a bulks from our database at random as long as the bulk contains
    all the specified elements.

    Args:
        bulk_database   A string pointing to the ASE *.db object that contains
                        the bulks you want to consider.
        n_elems         An integer indicating how many elements should be
                        inside the bulk to be selected.
    Returns:
        atoms   `ase.Atoms` of the chosen bulk structure.
        mpid    A string indicating which MPID the bulk is
    '''
    db = ase.db.connect(bulk_database)
    rows = list(db.select(n_elements=n_elems))
    row_index = np.random.choice(range(len(rows)))
    try:
        atoms, mpid = rows[row_index].toatoms(), rows[row_index].mpid
        return atoms, mpid

    except IndexError:
        raise ValueError('Randomly chose to look for a %i-component material, '
                         'but no such materials exist in %s. Please add one '
                         'to the database or change the weights to exclude '
                         'this number of components.' %
                         (n_elems, n_elems, bulk_database))

示例#6

0

显示文件

文件： relation_2D3D.py 项目： alchem0x2A/paper.2D_dielectric

def get_bulk(name, proto, id=None, method="gpaw"):
    # Get bulk properties
    if id is None:
        res = list(db.select(formula=name, prototype=proto))
        if len(res) == 0:
            return None
        r = res[0]
    else:
        r = db.get(id)
    try:
        if method.lower() == "gpaw":
            L = r.bulk_L
            eps_para = (r.bulk_eps_x + r.bulk_eps_y) / 2
            eps_perp = r.bulk_eps_z
            e = r.gap_hse
        # VASP version below:
        elif method.lower() == "vasp":
            L = r.bulk_L_vasp
            eps_para = (r.bulk_eps_x_vasp + r.bulk_eps_y_vasp) / 2
            eps_perp = r.bulk_eps_z_vasp
            if r.bulk_gap < 0:
                e = r.gap_hse
            else:
                e = r.bulk_gap
        else:
            return None
        if eps_para < 0 or eps_perp < 0:
            return None
    except Exception:
        return None
    return L, eps_para, eps_perp, e

示例#7

0

显示文件

文件： adsorptions.py 项目： ulissigroup/Open-Catalyst-Dataset

def choose_elements(bulk_database, n):
    '''
    Chooses `n` elements at random from the set of elements inside the given
    database.

    Args:
        bulk_database   A string pointing to the ASE *.db object that contains
                        the bulks you want to consider.
        n               A positive integer indicating how many elements you
                        want to choose.
    Returns:
        elements    A list of strings indicating the chosen elements
    '''
    db = ase.db.connect(bulk_database)
    all_elements = {
        ELEMENTS[number]
        for row in db.select() for number in row.numbers
    }
    elements = random.sample(all_elements, n)

    # Make sure we choose a combination of elements that exists in our bulk
    # database
    while db.count(elements) == 0:
        warnings.warn(
            'Sampled the elements %s, but could not find any matching '
            'bulks in the database (%s). Trying to re-sample' %
            (elements, bulk_database), RuntimeWarning)
        elements = random.sample(all_elements, n)

    return elements

示例#8

0

显示文件

def prep_runfolders(dbname,query):
    import os
    import shutil
    from ase.db import connect
    
    db = connect(dbname)
    
    prevdir = os.getcwd()
    
    for row in db.select(query):
        dir = str(row.id)
        try:
            os.mkdir(dir)
        except FileExistsError:
            print(f'Keeping folder {dir}')
        else:
            print(f'Creating folder {dir}')
        os.chdir(dir)
        try:
            os.symlink('../run.sh', 'run.sh')
        except:
            pass
        with open('db_id', 'w') as out:
            out.write(dir)
        os.chdir(prevdir)
    return print('Done')

示例#9

0

显示文件

def get_data():
    candidates = db.select(selection="gap_gw>0.5")
    candidates = db.select(selection="gap_gw>0.05")
    materials = []
    alpha_x = []
    alpha_z = []
    Eg_HSE = []
    Eg_GW = []
    Eg_PBE = []
    thick = []
    n_2D = []
    polar = []

    for mol in candidates:
        if "Cr" in mol.formula:  # CrS2 stuffs are not correct?
            continue
        print("{0}-{1}".format(mol.formula, mol.prototype))
        togo = True
        for attrib in ("gap", "gap_hse", "gap_gw", "alphax", "alphaz"):
            if not hasattr(mol, attrib):
                warnings.warn("{0} doesn't have attribute {1}!".format(
                    mol.formula, attrib))
                togo = False
        if togo is not True:
            warnings.warn("{0} not calculated!".format(mol.formula))
            continue
        materials.append("{0}-{1}".format(mol.formula, mol.prototype))
        alpha_x.append(mol.alphax)
        alpha_z.append(mol.alphaz)
        Eg_HSE.append(mol.gap_hse)
        Eg_GW.append(mol.gap_gw)
        Eg_PBE.append(mol.gap)
        delta, n, apol = get_thick(mol)
        thick.append(delta)
        n_2D.append(n)
        polar.append(apol)

    print(len(alpha_x))
    alpha_x = numpy.array(alpha_x)
    alpha_z = numpy.array(alpha_z)
    Eg_HSE = numpy.array(Eg_HSE)
    Eg_GW = numpy.array(Eg_GW)
    Eg_PBE = numpy.array(Eg_PBE)
    thick = numpy.array(thick)
    n_2D = numpy.array(n_2D)
    polar = numpy.array(polar)
    return alpha_x, alpha_z, Eg_HSE, thick

示例#10

0

显示文件

文件： app.py 项目： ryancoleman/lotsofcoresbook2code

def tofile(query, type, limit=0):
    fd, name = tempfile.mkstemp(suffix="." + type)
    con = ase.db.connect(name, use_lock_file=False)
    for dct in db.select(query, limit=limit):
        con.write(dct, data=dct.get("data", {}), **dct.get("key_value_pairs", {}))
    os.close(fd)
    data = open(name).read()
    os.unlink(name)
    return data

示例#11

0

显示文件

文件： cleanDB.py 项目： davidkleiven/GPAWTutorial

def main():
    db = ase.db.connect("ceTest.db")

    # Remove all entries that does not have a gen field

    delID = []
    for row in db.select():
        if (row.get("gen") is None):
            delID.append(row.id)
    db.delete(delID)

示例#12

0

显示文件

def get_atoms(db, formula, phase):
    system_list = list(db.select('formula={},xc=PBE,phase={}'.format(formula, phase)))
    if len(system_list) > 1:
        # TODO - handle this better
        raise ValueError("found multiple matches for {}, PBE, H phase".format(formula))

    atoms = system_list[0]
    #if atoms["hform"] > 0.0:
    #    print("Warning: hform {} > 0".format(formula))

    return atoms

示例#13

0

显示文件

文件： app.py 项目： Xu-Kai/lotsofcoresbook2code

def tofile(query, type, limit=0):
    fd, name = tempfile.mkstemp(suffix='.' + type)
    con = ase.db.connect(name, use_lock_file=False)
    for dct in db.select(query, limit=limit):
        con.write(dct,
                  data=dct.get('data', {}),
                  **dct.get('key_value_pairs', {}))
    os.close(fd)
    data = open(name).read()
    os.unlink(name)
    return data

示例#14

0

显示文件

文件： app.py 项目： rchiechi/QuantumParse

def tofile(project, query, type, limit=0):
    fd, name = tempfile.mkstemp(suffix='.' + type)
    con = ase.db.connect(name, use_lock_file=False)
    db = databases[project]
    for row in db.select(query, limit=limit):
        con.write(row,
                  data=row.get('data', {}),
                  **row.get('key_value_pairs', {}))
    os.close(fd)
    data = open(name, 'rb').read()
    os.unlink(name)
    return data

示例#15

0

显示文件

文件： app.py 项目： essil1/ase-laser

def tofile(project, query, type, limit=0):
    fd, name = tempfile.mkstemp(suffix='.' + type)
    con = ase.db.connect(name, use_lock_file=False)
    db = databases[project]
    for row in db.select(query, limit=limit):
        con.write(row,
                  data=row.get('data', {}),
                  **row.get('key_value_pairs', {}))
    os.close(fd)
    data = open(name, 'rb').read()
    os.unlink(name)
    return data

示例#16

0

显示文件

文件： db.py 项目： btodac/ase

def read_db(filename, index, **kwargs):
    db = ase.db.connect(filename, serial=True, **kwargs)

    if isinstance(index, basestring):
        try:
            index = string2index(index)
        except ValueError:
            pass

    if isinstance(index, int):
        index = slice(index, index + 1 or None)

    if isinstance(index, basestring):
        # index is a database query string:
        for row in db.select(index):
            yield row.toatoms()
    else:
        start, stop, step = index.indices(db.count())
        if start == stop:
            return
        assert step == 1
        for row in db.select(offset=start, limit=stop - start):
            yield row.toatoms()

示例#17

0

显示文件

文件： db.py 项目： rchiechi/QuantumParse

def read_db(filename, index, **kwargs):
    db = ase.db.connect(filename, serial=True, **kwargs)

    if isinstance(index, basestring):
        try:
            index = string2index(index)
        except ValueError:
            pass

    if isinstance(index, int):
        index = slice(index, index + 1 or None)

    if isinstance(index, basestring):
        # index is a database query string:
        for row in db.select(index):
            yield row.toatoms()
    else:
        start, stop, step = index.indices(db.count())
        if start == stop:
            return
        assert step == 1
        for row in db.select(offset=start, limit=stop - start):
            yield row.toatoms()

示例#18

0

显示文件

文件： cell.py 项目： mukhtarbayerouniversity/displ

def get_layer_system(db, formula, phase):
    system_list = list(
        db.select('formula={},xc=PBE,phase={}'.format(formula, phase)))
    if len(system_list) > 1:
        # TODO - handle this better
        raise ValueError("found multiple matches for {}, PBE, {} phase".format(
            formula, phase))
    elif len(system_list) == 0:
        raise ValueError("found no matches for {}, PBE, {} phase".format(
            formula, phase))

    layer_system = system_list[0].toatoms()

    return layer_system

示例#19

0

显示文件

    def get_data(self):
        """Get the atoms objects."""
        db = ase.db.connect('data/cubic_perovskites.db')
        atoms = list(db.select(combination='ABO3'))[:10]

        # Compile a list of atoms and target values.
        alist = []
        for row in atoms:
            try:
                alist.append(row.toatoms())
            except AttributeError:
                continue

        return alist

示例#20

0

显示文件

def analyze():
    db = ase.db.connect(db_name())
    kpts = []
    cutoff = []
    energy = []
    for row in db.select():
        try:
            new_kpt = row.n_kpt
            new_cut = row.cutoff
            new_eng = row.trial_energy
            kpts.append(new_kpt)
            cutoff.append(new_cut)
            energy.append(new_eng)
        except:
            pass

    kpt_kpt = []
    eng_kpt = []
    for i in range(len(energy)):
        if (cutoff[i] == 500):
            kpt_kpt.append(kpts[i])
            eng_kpt.append(energy[i])

    srt_indx = np.argsort(kpt_kpt)
    kpt_kpt = [kpt_kpt[indx] for indx in srt_indx]
    eng_kpt = [eng_kpt[indx] for indx in srt_indx]

    cut_cut = []
    eng_cut = []
    for i in range(len(energy)):
        if (kpts[i] == 1):
            cut_cut.append(cutoff[i])
            eng_cut.append(energy[i])
    srt_indx = np.argsort(cut_cut)
    cut_cut = [cut_cut[indx] for indx in srt_indx]
    eng_cut = [eng_cut[indx] for indx in srt_indx]

    fig1 = plt.figure()
    ax1 = fig1.add_subplot(1, 1, 1)
    ax1.plot(kpt_kpt, eng_kpt, "-o")
    ax1.set_xlabel("Number of k-points")
    ax1.set_ylabel("Energy (eV)")

    fig2 = plt.figure()
    ax2 = fig2.add_subplot(1, 1, 1)
    ax2.plot(cut_cut, eng_cut, "-o")
    ax2.set_xlabel("Plane wave cutoff (eV)")
    ax2.set_ylabel("Energy (eV)")
    plt.show()

示例#21

0

显示文件

文件： convert_db_to_pkl.py 项目： apalizha/Open-Catalyst-Dataset

def get_bulk_inverted_index_1(input_bulk_database, max_num_elements):
    '''
    Converts an input ASE.db to an inverted index to efficiently sample bulks
    '''
    assert max_num_elements > 0
    db = ase.db.connect(input_bulk_database)

    index = {}
    total_entries = 0
    for i in range(1, max_num_elements + 1):
        index[i] = []
        rows = list(db.select(n_elements=i))
        print(len(rows))
        for r in range(len(rows)):
            index[i].append((rows[r].toatoms(), rows[r].mpid))
            total_entries += 1

    return index, total_entries

示例#22

0

显示文件

    def getJobIDs(self):
        """
        Returns all the job IDs
        """
        if (not hasASE):
            raise ImportError("Could not find ASE")

        db = ase.db.connect(self.args["dbname"])

        # Defautl condition is to run new jobs
        condition = "queued=False, started=False"

        if ("restart" in self.args.keys()):
            if (self.args["restart"] == "True"):
                # Re-start an old simulation that was not converged
                condition = "converged=False"
        ids = [row.id for row in db.select(condition)]
        return ids

示例#23

0

显示文件

文件： convert_db_to_pkl.py 项目： apalizha/Open-Catalyst-Dataset

def convert_adsorbate(input_adsorbate_database, output_pkl):
    '''
    Converts an input ASE.db to an inverted index to efficiently sample adsorbates
    '''
    db = ase.db.connect(input_adsorbate_database)

    index = {}

    for i, row in enumerate(db.select()):
        atoms = row.toatoms()
        data = row.data
        smiles = data['SMILE']
        bond_indices = data['bond_idx']
        index[i] = (atoms, smiles, bond_indices)

    with open(output_pkl, 'wb') as f:
        pickle.dump(index, f)

    # As of adsorbates.db file in master on April 28 2020
    assert len(index) == 82

示例#24

0

显示文件

文件： convert_db_to_pkl.py 项目： passerbyzju/Open-Catalyst-Dataset

def convert_bulk(input_bulk_database, max_num_elements, output_pkl):
    '''
    Converts an input ASE.db to an inverted index to efficiently sample bulks
    '''
    assert max_num_elements > 0
    db = ase.db.connect(input_bulk_database)

    index = {}
    total_entries = 0
    for i in range(1, max_num_elements + 1):
        index[i] = []
        rows = list(db.select(n_elements=i))
        for r in range(len(rows)):
            index[i].append((rows[r].toatoms(), rows[r].mpid))
            total_entries += 1
    with open(output_pkl, 'wb') as f:
        pickle.dump(index, f)

    # As of bulk.db file from Kevin on 01 May 2020
    assert total_entries == 11010

示例#25

0

显示文件

文件： adsorptions.py 项目： ulissigroup/Open-Catalyst-Dataset

def choose_bulk(bulk_database, elements):
    '''
    Chooses a bulks from our database at random as long as the bulk contains
    all the specified elements.

    Args:
        bulk_database   A string pointing to the ASE *.db object that contains
                        the bulks you want to consider.
        elements        A list of strings indicating the elements you want to
                        show up in the bulk. The strings much match one of the
                        values in the `ELEMENTS` constant in this submodule.
    Returns:
        atoms   `ase.Atoms` of the chosen bulk structure.
        mpid    String indicating the the Materials Project ID number of the
                bulk that was selected.
    '''
    db = ase.db.connect(bulk_database)
    bulks_subset = [(row.toatoms(), row.mpid) for row in db.select(elements)]
    atoms, mpid = random.choice(bulks_subset)
    return atoms, mpid

示例#26

0

显示文件

    def train(self,
              label,
              dbfile,
              nepochs=10,
              learning_rate=0.001,
              shuffle=True,
              percenttest=0.1):
        """Train the potential against the data in a database.

    Parameters
    ----------
    label: string, used for saving the results.
    db: the path to an ase database containing training examples.
    shuffle: boolean, if True, shuffle the data.
    percenttest: float, fraction of data to use only for testing
    """

        with ase.db.connect(dbfile) as db:
            data = [(row.toatoms(), row.energy) for row in db.select()]

        if shuffle:
            import random
            random.shuffle(data)

        N_train = int(len(data) * (1 - percenttest))

        train_data = data[0:N_train]
        test_data = data[N_train:]

        known_energies = tf.placeholder(tf.float64, None)
        tf_energies = tf.placeholder(tf.float64, None)

        #loss = tf.reduce_mean(tf.square(tf_energies - known_energies))
        #opt = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        for i in range(nepochs):
            for atoms, ke in train_data:
                atoms.set_calculator(self)
                te = atoms.get_calculator()._energy

                _loss = self.sess.run([te])

示例#27

0

显示文件

文件： convert_db_to_pkl.py 项目： apalizha/Open-Catalyst-Dataset

def get_bulk_inverted_index_2(input_bulk_database, max_num_elements):
    '''
    Converts an input ASE.db to an inverted index to efficiently sample bulks
    '''
    assert max_num_elements > 0
    db = ase.db.connect(input_bulk_database)
    rows = list(db.select())

    index = {}
    total_entries = 0
    for r in range(len(rows)):
        bulk = rows[r].toatoms()
        mpid = rows[r].mpid
        formula_str = str(bulk.symbols)
        num_ele = sum(1 for c in formula_str if c.isupper())
        if num_ele > max_num_elements:
            continue
        if num_ele not in index:
            index[num_ele] = []
        index[num_ele].append((bulk, mpid))
        total_entries += 1

    return index, total_entries

示例#28

0

显示文件

文件： alpha_valence.py 项目： alchem0x2A/paper.2D_dielectric

emass = []
hmass = []

valence = numpy.load("../post_processing/valence.npy")
pol = numpy.load("../post_processing/valence.npy")

def get_thick(atom_row):
    pos = atom_row.positions[:, -1]
    diff = covalent_radii[atom_row.numbers]
    zmax = numpy.max(pos + diff) - numpy.min(pos - diff)
    vals = valence[atom_row.numbers]  # valence electrons
    atom_pol = pol[atom_row.numbers]
    A = atom_row.cell_area
    return zmax, sum(vals) / A, sum(atom_pol) / A

candidates = db.select(selection="gap_gw>0.5")


for mol in candidates:
    if "Cr" in mol.formula:     # CrS2 stuffs are not correct?
        continue
    print("{0}-{1}".format(mol.formula, mol.prototype))
    togo = True
    for attrib in ("gap_hse", "emass1",
                   "alphax", "alphaz",
    ):
        if not hasattr(mol, attrib):
            warnings.warn("{0} doesn't have attribute {1}!".format(mol.formula,
                                                                   attrib))
            togo = False
    if togo is not True:

示例#29

0

显示文件

文件： plot_band_alignment.py 项目： svn2github/computational-materials-repository

# creates: band_alignment.png
from math import floor, ceil
import re
import numpy as np
import matplotlib.pyplot as plt
import ase.db

# Connect to database
db = ase.db.connect('c2dm.db')

# Select the rows that have G0W0 results
rows = db.select('xc=LDA,ind_gap_g0w0>0')

data = []
for row in rows:
    name = row.name
    phase = row.phase
    # Use regular expressions to get the atomic species from the name
    m = re.search('([A-Z][a-z]?)([A-Z][a-z]?)2', name)
    M = m.group(1)
    X = m.group(2)
    
    label = ''
    if phase == 'H':
        label += '2H-'
    elif phase == 'T':
        label += '1T-'
    label += name.replace('2', '$_2$')
    
    # Store data as tuples - easier to sort
    data.append((M, X, label, row.vbm_g0w0, row.cbm_g0w0))

示例#30

0

显示文件

文件： hallucinate.py 项目： ulissigroup/catalyst-acquisitions

import sys
sys.path.insert(0, '../../../')
from src.discoverers.adsorption.values import calc_co2rr_activities
from src.discoverers.adsorption.mms import MultiscaleDiscoverer
from src.discoverers.adsorption.models import NullModel

# Discoverer settings
adsorbate = 'CO'
initial_training_size = 1000
batch_size = 200
quantile_cutoff = 0.95

# Data loading
db_dir = '../../pull_data/%s/' % adsorbate
db = ase.db.connect(db_dir + '%s.db' % adsorbate)
rows = list(db.select())
random.Random(42).shuffle(rows)


def parse_rows(rows):
    features = []
    labels = []
    surfaces = []

    for row in rows:
        features.append(row.id)
        data = row.data
        labels.append(data['adsorption_energy'])
        surface = (data['mpid'], data['miller'], data['shift'], data['top'])
        surfaces.append(surface)

示例#31

0

显示文件

csv_file = "../../data/gpaw_data/gpaw_vasp_aa.csv"

with open(csv_file, "r", encoding="utf-8") as f:
    while True:
        line = f.readline()
        if len(line) == 0:
            break
        sys, gap, L, ex, ey, ez = line.strip().split(",")
        if any(len(s) == 0 for s in [ex, ey, ez]):  # bad results, discard
            continue
        try:
            gap = float(gap); L = float(L)
            ex = float(ex); ey = float(ey); ez = float(ez)
        except ValueError:
            continue
        formula, proto = sys.split("-")
        # print(sys.encode("utf8"), gap, L, ex, ey, ez)
        res = list(db.select(formula=formula, prototype=proto))
        
        if len(res) == 0:
            continue
        mol = res[0]
        db_id = mol.id
        db.update(db_id, bulk_L_vasp=L,
                  bulk_gap_vasp=gap,
                  bulk_eps_x_vasp=ex,
                  bulk_eps_y_vasp=ey,
                  bulk_eps_z_vasp=ez)
        print(sys, "Suscessful!")

示例#32

0

显示文件

    def transfer(self,
                 filename_sqlite,
                 block_size=1000,
                 start_block=0,
                 write_ase=True,
                 write_publication=True,
                 write_reaction=True,
                 write_reaction_system=True,
                 check=False):
        """ Transfer data from local sqlite3 .db file to the
        catalysis-hub postgreSQL server

        Parameters:
        filename_sqlite: str
            name of .db file
        block_size: int (default 1000)
            Number of atomic structures and reactions to write together
            in each block.
        start_block: int (default 0)
            Block to start with
        write_ase: bool
            whether or not to write atomic structures
        write_publication: bool
            whether or not to transfer publication table
        write_reaction: bool
            whether or not to transfer reaction table
        write_reaction_system: bool
            whether or not to write reaction_system table
        """

        self.stdout.write('Starting transfer\n')
        con = self.connection or self._connect()
        self._initialize(con)
        self.stdout.write('Finished initialization\n')
        cur = con.cursor()
        self.stdout.write('Got a cursor\n')
        self.stdout.write('Connecting to {0}\n'.format(self.server_name))

        nrows = 0
        if write_ase:
            self.stdout.write('Transfering atomic structures\n')
            db = ase.db.connect(filename_sqlite)
            n_structures = db.count()
            n_blocks = n_structures // block_size + 1
            t_av = 0
            for block_id in range(start_block, n_blocks):
                i = block_id - start_block
                t1 = time.time()
                b0 = block_id * block_size
                b1 = (block_id + 1) * block_size + 1

                if block_id + 1 == n_blocks:
                    b1 = n_structures + 1

                rows = list(db.select('{}<id<{}'.format(b0, b1)))

                with ase.db.connect(self.server_name,
                                    type='postgresql') as db2:
                    # write one row at the time until ase is updated
                    # db2.write(rows)
                    for row in rows:
                        db2.write(row)

                nrows += len(rows)
                t2 = time.time()
                dt = t2 - t1
                t_av = (t_av * i + dt) / (i + 1)

                self.stdout.write(
                    '  Finnished Block {0} / {1} in {2} sec\n'.format(
                        block_id + 1, n_blocks, dt))
                self.stdout.write(
                    '    Completed transfer of {0} atomic structures\n'.format(
                        nrows))
                self.stdout.write('    Estimated time left: {0} sec\n'.format(
                    t_av * (n_blocks - block_id - 1)))

        db = CathubSQLite(filename_sqlite)
        con_lite = db._connect()
        cur_lite = con_lite.cursor()

        Npub = 0
        Npubstruc = 0
        if write_publication:
            self.stdout.write('Transfering publications\n')
            try:
                npub = db.get_last_pub_id(cur_lite)
            except BaseException:
                npub = 1
            for id_lite in range(1, npub + 1):
                Npub += 1
                row = db.read(id=id_lite, table='publication')
                if len(row) == 0:
                    continue
                values = row[0]
                pid, pub_id = self.write_publication(values)

            # Publication structures connection
            cur_lite.execute("""SELECT * from publication_system;""")
            publication_system_values = []
            rows = cur_lite.fetchall()
            for row in rows:
                Npubstruc += 1
                values = list(row)
                value_list = get_value_list(values)
                publication_system_values += [tuple(value_list)]

            # Insert into publication_system table
            key_str = get_key_str(table='publication_system')
            insert_command = """INSERT INTO publication_system ({0})
            VALUES %s ON CONFLICT DO NOTHING;"""\
                .format(key_str)

            execute_values(cur=cur,
                           sql=insert_command,
                           argslist=publication_system_values,
                           page_size=1000)

            # Write pub_id to systems table
            cur.execute("""UPDATE systems SET
            key_value_pairs=jsonb_set(key_value_pairs, '{{"pub_id"}}', '"{pub_id}"')
            WHERE unique_id IN
            (SELECT ase_id from publication_system WHERE pub_id='{pub_id}')"""\
                        .format(pub_id=pub_id))

            con.commit()
            self.stdout.write('  Completed transfer of publications\n')

        Ncat = 0
        Ncatstruc = 0

        if write_reaction:
            self.stdout.write('Transfering reactions')
            cur.execute('SELECT max(id) from reaction;')
            ID = cur.fetchone()[0] or 0

            n_react = db.get_last_id(cur_lite)

            n_blocks = int(n_react / block_size) + 1
            t_av = 0
            for block_id in range(start_block, n_blocks):
                reaction_values = []
                reaction_system_values = []
                Ncat0 = Ncat
                Ncatstruc0 = Ncatstruc

                i = block_id - start_block
                t1 = time.time()
                b0 = block_id * block_size + 1
                b1 = (block_id + 1) * block_size + 1
                if block_id + 1 == n_blocks:
                    b1 = n_react + 1

                for id_lite in range(b0, b1):
                    row = db.read(id_lite)
                    if len(row) == 0:
                        continue
                    values = row[0]

                    # id = self.check(values[13], values[1], values[6], values[7],
                    #                values[8], strict=True)
                    id = None
                    update_rs = False
                    if id is not None:
                        id = self.update(id, values)
                        self.stdout.write(
                            'Updated reaction db with row id = {}\n'.format(
                                id))
                        update_rs = True
                    else:
                        ID += 1
                        Ncat += 1
                        value_list = get_value_list(values)
                        value_list[0] = ID  # set new ID
                        reaction_values += [tuple(value_list)]
                        if write_reaction_system:
                            cur_lite.execute(
                                "SELECT * from reaction_system where id={};".
                                format(id_lite))
                            rows = cur_lite.fetchall()
                            if update_rs:
                                cur.execute("""Delete from reaction_system
                                where id={0}""".format(id))
                            for row in rows:
                                Ncatstruc += 1
                                values = list(row)
                                if len(values) == 3:
                                    values.insert(1, None)
                                value_list = get_value_list(values)
                                value_list[3] = ID
                                reaction_system_values += [tuple(value_list)]

                q = ', '.join('?' * 14)
                q = '({})'.format(q.replace('?', '%s'))

                key_str = get_key_str()
                insert_command = """INSERT INTO reaction
                ({0}) VALUES %s;""".format(key_str)

                execute_values(cur=cur,
                               sql=insert_command,
                               argslist=reaction_values,
                               template=q,
                               page_size=block_size)

                key_str = get_key_str('reaction_system')
                insert_command = """INSERT INTO reaction_system
                ({0}) VALUES %s ON CONFLICT DO NOTHING;""".format(key_str)

                execute_values(cur=cur,
                               sql=insert_command,
                               argslist=reaction_system_values,
                               page_size=1000)
                con.commit()

                t2 = time.time()
                dt = t2 - t1
                t_av = (t_av * i + dt) / (i + 1)

                self.stdout.write(
                    '  Finnished Block {0} / {1} in {2} sec \n'.format(
                        block_id + 1, n_blocks, dt))
                self.stdout.write(
                    '    Completed transfer of {0} reactions. \n'.format(
                        Ncat - Ncat0))
                self.stdout.write('    Estimated time left: {0} sec \n'.format(
                    t_av * (n_blocks - block_id - 1)))

            self.stdout.write('  Completed transfer of reactions\n')

        for statement in tsvector_update:
            cur.execute(statement)

        if self.connection is None:
            con.commit()
            con.close()

        self.stdout.write('Inserted into:\n')
        self.stdout.write('  systems: {0}\n'.format(nrows))
        self.stdout.write('  publication: {0}\n'.format(Npub))
        self.stdout.write('  publication_system: {0}\n'.format(Npubstruc))
        self.stdout.write('  reaction: {0}\n'.format(Ncat))
        self.stdout.write('  reaction_system: {0}\n'.format(Ncatstruc))

示例#33

0

显示文件

文件： hallucinate.py 项目： ulissigroup/catalyst-acquisitions

import sys
sys.path.insert(0, '../../../')
from src.discoverers.adsorption.values import calc_co2rr_activities
from src.discoverers.adsorption.mms import MultiscaleDiscoverer
from src.discoverers.adsorption.models import PrimeModel

# Discoverer settings
adsorbate = 'CO'
initial_training_size = 1000
batch_size = 200
quantile_cutoff = 0.9

# Data loading
db_dir = '../../pull_data/%s_synthesized/' % adsorbate
db = ase.db.connect(db_dir + '%s.db' % adsorbate)
rows = list(tqdm(db.select(), desc='reading ASE db', total=db.count()))
random.Random(42).shuffle(rows)


def parse_row(row):
    feature = row.id
    data = row.data
    label = data['adsorption_energy']
    surface = (data['mpid'], data['miller'], data['shift'], data['top'])
    return feature, label, surface


def parse_rows(rows):
    with Pool(processes=32, maxtasksperchild=1000) as pool:
        iterator = pool.imap(parse_row, rows, chunksize=100)
        iterator_tracked = tqdm(iterator, desc='parsing rows', total=len(rows))

示例#34

0

显示文件

    if row[4] != "":  # Eps calculated
        name, proto = row[:2]
        print(name, proto)
        L, E, ex, ey, ez, E_direct, E_min = map(float, row[2:])
        # Elements
        key = (name, proto)
        e_xy = numpy.sqrt(ex * ey)
        ax = (e_xy - 1) / (4 * pi) * L
        az = (1 - 1 / ez) * L / (4 * pi)

        if proto == "ABX3":  # perovskite?
            delta = 14.24
            n_2D = None
            mol = None
        else:
            mol = list(db.select(formula=name, prototype=proto))[0]
            delta, n_2D = get_thick(mol)
        # 3D
        try:
            L_3D, epsx, epsz, E_3D = get_bulk(name, proto)
        except TypeError:
            L_3D, epsx, epsz, E_3D = (None, None, None, None)
            # QC
        if (name, proto) in QC_res:
            qc_n, qc_p = QC_res[(name, proto)]
        else:
            qc_n = None
            qc_p = None
        # emass
        try:
            emass = mol.emass1