def test_formulas(x): f = Formula(x) y = str(f) assert y == x print(f.count(), '{:latex}'.format(f)) a, b = divmod(f, 'H2O') assert a * Formula('H2O') + b == f assert f != 117 # check that formula can be compared to non-formula object
def check(key_value_pairs): for key, value in key_value_pairs.items(): if key == "external_tables": # Checks for external_tables are not # performed continue if not word.match(key) or key in reserved_keys: raise ValueError('Bad key: {}'.format(key)) try: Formula(key, strict=True) except ValueError: pass else: warnings.warn( 'It is best not to use keys ({0}) that are also a ' 'chemical formula. If you do a "db.select({0!r})",' 'you will not find rows with your key. Instead, you wil get ' 'rows containing the atoms in the formula!'.format(key)) if not isinstance(value, (numbers.Real, str, np.bool_)): raise ValueError('Bad value for {!r}: {}'.format(key, value)) if isinstance(value, str): for t in [int, float]: if str_represents(value, t): raise ValueError('Value ' + value + ' is put in as string ' + 'but can be interpreted as ' + '{}! Please convert '.format(t.__name__) + 'to {} using '.format(t.__name__) + '{}(value) before '.format(t.__name__) + 'writing to the database OR change ' + 'to a different string.')
def test_refilling(self): formula = Formula('H2O') env = MolecularEnvironment( reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula], max_h_distance=1.0, bag_refills=5, initial_formula=Formula('H2O'), ) action = self.action_space.from_atom( atom=Atom(symbol='H', position=(1.0, 0, 0))) obs, reward, done, info = env.step(action=action) self.assertFalse(done)
def __add__(self, other) -> "MullikenContribution": l = "".join(set([self.l, other.l])) d = self.contribution + other.contribution s1 = string2symbols(self.symbol) s2 = string2symbols(other.symbol) s = Formula.from_list(s1 + s2).format("reduce") return MullikenContribution(s, d, l)
def test_invalid_formula(self): formula = Formula('He2') with self.assertRaises(ValueError): MolecularEnvironment(reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula])
def test_formula(self): f = Formula('HCO') f2 = remove_from_formula(f, 'H') self.assertEqual(f2.count()['H'], 0) with self.assertRaises(KeyError): remove_from_formula(f, 'He')
def from_formula(self, formula: Formula) -> BagType: formula_dict = formula.count() bag = [0] * self.symbol_table.count() for symbol, value in formula_dict.items(): bag[self.symbol_table.get_index(symbol)] = value return tuple(bag)
def plot_output(jobn, latoms, dpos_all, gamma, Asf, ibulk): from ase.formula import Formula njobs = len(jobn) print('njobs:', njobs) import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt fig_wh = [3.15, 3] fig_subp = [1, 1] xi = latoms[ibulk].positions[:, 2].copy() for i in np.arange(njobs): if np.linalg.norm( dpos_all[i, :] ) > 1e-10: fig1, ax1 = vf.my_plot(fig_wh, fig_subp) temp = np.hstack([ dpos_all[i, :], xi[:, np.newaxis] ]) ind = np.argsort(temp[:, -1]) temp = temp[ind, :] ax1.plot(temp[:, -1], temp[:, 0], '-s', label='$u_1$' ) ax1.plot(temp[:, -1], temp[:, 1], '-o', label='$u_2$' ) ax1.plot(temp[:, -1], temp[:, 2], '-^', label='$u_3$' ) ax1.legend(loc='lower center', ncol=3, framealpha=0.4) ax1.set_xlabel('Atom positions in $x_3$ ($\\mathrm{\\AA}$)') ax1.set_ylabel('Displacements $u_i$ ($\\mathrm{\\AA}$)') ax1.set_position([0.25, 0.16, 0.7, 0.76]) if jobn[i] == 'ssf': str1 = '$\\gamma_\\mathrm{ssf} =$ %.0f mJ/m$^2$' %(gamma[i]) elif jobn[i] == 'usf': str1 = '$\\gamma_\\mathrm{usf} =$ %.0f mJ/m$^2$' %(gamma[i]) elif jobn[i] == 'surf': str1 = '$\\gamma_\\mathrm{surf} =$ %.0f mJ/m$^2$' %(gamma[i]/2) else: str1 = '$\\Delta E / A =$ %.0f mJ/m$^2$' %(gamma[i]) str2 = latoms[ibulk].get_chemical_formula() str2 = Formula(str2).format('latex') str_all = '%s\n$A =$%.4f $\\mathrm{\\AA}^2$\n%s' \ %(str2, Asf, str1) ax1.text( xi.max()*0.2, dpos_all[i, :].max()*0.6, str_all ) filename = 'y_post_planar_relaxed.%s.pdf' %(jobn[i]) plt.savefig(filename)
def get_chemical_formula( self, mode: str = 'hill', empirical: bool = False, ) -> str: """Get chemical formula. See documentation of ase.atoms.Atoms.get_chemical_formula().""" # XXX Delegate the work to the Formula object! if mode in ('reduce', 'all') and empirical: warnings.warn("Empirical chemical formula not available " "for mode '{}'".format(mode)) if len(self) == 0: return '' numbers = self.numbers if mode == 'reduce': n = len(numbers) changes = np.concatenate( ([0], np.arange(1, n)[numbers[1:] != numbers[:-1]])) symbols = [chemical_symbols[e] for e in numbers[changes]] counts = np.append(changes[1:], n) - changes tokens = [] for s, c in zip(symbols, counts): tokens.append(s) if c > 1: tokens.append(str(c)) formula = ''.join(tokens) elif mode == 'all': formula = ''.join([chemical_symbols[n] for n in numbers]) else: symbols = [chemical_symbols[Z] for Z in numbers] f = Formula('', _tree=[(symbols, 1)]) if empirical: f, _ = f.reduce() if mode in {'hill', 'metal'}: formula = f.format(mode) else: raise ValueError( "Use mode = 'all', 'reduce', 'hill' or 'metal'.") return formula
def parse_formula(formula): aq = formula.endswith('(aq)') if aq: formula = formula[:-4] charge = formula.count('+') - formula.count('-') if charge: formula = formula.rstrip('+-') count = Formula(formula).count() return count, charge, aq
def to_formula(self, bag: 'BagType') -> Formula: if len(bag) != self.symbol_table.count(): raise ValueError(f'Bag {bag} does not fit symbol table') d = { self.symbol_table.get_symbol(index): count for index, count in enumerate(bag) } return Formula.from_dict(d)
def row2dct( row, key_descriptions: Dict[str, Tuple[str, str, str]] = {}) -> Dict[str, Any]: """Convert row to dict of things for printing or a web-page.""" from ase.db.core import float_to_time_string, now dct = {} atoms = Atoms(cell=row.cell, pbc=row.pbc) dct['size'] = kptdensity2monkhorstpack(atoms, kptdensity=1.8, even=False) dct['cell'] = [['{:.3f}'.format(a) for a in axis] for axis in row.cell] par = ['{:.3f}'.format(x) for x in cell_to_cellpar(row.cell)] dct['lengths'] = par[:3] dct['angles'] = par[3:] stress = row.get('stress') if stress is not None: dct['stress'] = ', '.join('{0:.3f}'.format(s) for s in stress) dct['formula'] = Formula(row.formula).format('abc') dipole = row.get('dipole') if dipole is not None: dct['dipole'] = ', '.join('{0:.3f}'.format(d) for d in dipole) data = row.get('data') if data: dct['data'] = ', '.join(data.keys()) constraints = row.get('constraints') if constraints: dct['constraints'] = ', '.join(c.__class__.__name__ for c in constraints) keys = ({'id', 'energy', 'fmax', 'smax', 'mass', 'age'} | set(key_descriptions) | set(row.key_value_pairs)) dct['table'] = [] for key in keys: if key == 'age': age = float_to_time_string(now() - row.ctime, True) dct['table'].append(('ctime', 'Age', age)) continue value = row.get(key) if value is not None: if isinstance(value, float): value = '{:.3f}'.format(value) elif not isinstance(value, str): value = str(value) desc, unit = key_descriptions.get(key, ['', '', ''])[1:] if unit: value += ' ' + unit dct['table'].append((key, desc, value)) return dct
def __add__(self, other) -> "DOSContribution": assert (self.values.shape == other.values.shape ), "DOS contributions shape does not match for addition." d = self.values + other.values l = "".join(set([self.l, other.l])) s1 = string2symbols(self.symbol) s2 = string2symbols(other.symbol) s = Formula.from_list(s1 + s2).format("reduce").format("metal") return DOSContribution(s, d, l)
def extract_descriptor(rows_input): global extra_adding # print('extract descriptor start') bocs, targets = [], [] row_ids, syms = [], [] id_sym_dict = {} for row in rows_input: #### check if molecular formula in ir.db is the same with qm9.db ##### sym1 = 'C' sym2 = 'B' while (Formula(str(sym1)) != Formula(str(sym2))): if str(sym1) != 'C' and str(sym2) != 'B': # print(row.id, sym1, sym2, 'An mismatch occur, extra id:', extra_adding) extra_adding += 1 sym1 = row.toatoms().symbols sym2 = rows_qm9[row.id - 1 + extra_adding].toatoms().symbols if (str(sym1) == 'CH4'): for i, j in zip(row.data.ir_spectrum[0], row.data.ir_spectrum[1]): if j > 0.001: pass # print(i, j) row_ids.append(row.id) syms.append(str(sym1)) #### read boc from pre_calculated boc.lst ######### bocs.append(c[row.id - 1 + extra_adding][1]) #### read ir targets from ir.db ############## s = np.array(row.data.ir_spectrum[1]) targets.append(s / np.amax(s)) #### pca #### global pca pca_targets = pca.fit_transform(targets) #### shuffle together #### shfl = list(zip(bocs, pca_targets, targets, row_ids, syms)) random.shuffle(shfl) bocs, pca_targets, targets, row_ids, syms = zip(*shfl) for i in range(len(row_ids)): id_sym_dict[row_ids[i]] = syms[i] np.save('data_id.npy', id_sym_dict) return bocs, pca_targets, targets
def test_invalid_action(self): formula = Formula('H2CO') env = MolecularEnvironment(reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula]) action = self.action_space.from_atom( Atom(symbol='He', position=(0, 1, 0))) with self.assertRaises(KeyError): env.step(action)
def test_h0c1(): f = Formula.from_dict({'H': 0, 'C': 1}) assert f.format('hill') == 'C' with pytest.raises(ValueError): Formula.from_dict({'H': -1}) with pytest.raises(ValueError): Formula.from_dict({'H': 1.5}) with pytest.raises(ValueError): Formula.from_dict({7: 1})
def set_symbol(self, symbol): assert type(symbol) == str, "Symbol must be a string." try: s = string2symbols(symbol) except Exception as expt: raise Exception( "String could not be interpreted as atomic symbols.") assert all(k in chemical_symbols for k in s), "Symbol is not an element from the PSE." s = Formula.from_list(s).format("reduce").format("metal") self._symbol = s
def substitute_atoms(self, atoms, new_symbols): """ Substitute new elements into atoms object""" formula = atoms.get_chemical_formula() rep = Formula(formula).reduce()[1] chemical_symbols = np.array(atoms.get_chemical_symbols(), dtype='U2') unique_old, counts_old = np.unique(chemical_symbols, return_counts=True) counts_old = counts_old / rep idx = np.argsort(counts_old) counts_old = counts_old[idx] unique_old = unique_old[idx] unique_new, counts_new = np.unique(new_symbols, return_counts=True) idx = np.argsort(counts_new) counts_new = counts_new[idx] unique_new = unique_new[idx] new_perm_temp = [[]] for i, c in enumerate(counts_new): perm_temp = [] same_count = np.where(counts_new == c)[0] new_perm = [] for temp in new_perm_temp: for i2 in same_count: sym = unique_new[i2] if not sym in temp: new_perm += [temp + [sym]] new_perm_temp = new_perm.copy() old_symbols = chemical_symbols.copy() atoms_list = [] for unique_new in new_perm: atoms_temp = atoms.copy() for i, old_s in enumerate(unique_old): loc_symbols = [ i for i, s in enumerate(old_symbols) if s == old_s ] chemical_symbols[loc_symbols] = np.repeat([unique_new[i]], len(loc_symbols)) atoms_temp.set_chemical_symbols(chemical_symbols) atoms_list += [atoms_temp.copy()] return atoms_list
def main(): qe = vf.phy_const('qe') jobn, Etot, Eent, pres = vf.vasp_read_post_data() njobs = len(jobn) # number of jobs if njobs < 1.5: sys.exit('==> ABORT! more structures needed. ') if jobn[0] != '0.00': sys.exit('==> ABORT! no reference state. ') latoms = vf.get_list_of_atoms() Asf = np.linalg.norm( \ np.cross(latoms[0].cell[0, :], latoms[0].cell[1, :] ) ) a11 = latoms[0].cell[0, 0] a22 = latoms[0].cell[1, 1] natoms = latoms[0].get_positions().shape[0] E0bulk = Etot[0] / natoms dE, da33 = check_constraints(Etot, latoms) # check jobname k = np.array([]) for i in np.arange(len(jobn)): k = np.append(k, float(jobn[i]) ) if np.linalg.norm( k - da33 ) > 1e-10: sys.exit('==> ABORT. wrong jobname. ') gamma = dE/Asf *qe*1e23 #[mJ/m^2] from ase.formula import Formula str2 = latoms[0].get_chemical_formula() str2 = Formula(str2).format('latex') str_all = '%s\n$A =$%.4f $\\mathrm{\\AA}^2$' %(str2, Asf) #========================= write_output(Asf, a11, a22, E0bulk, jobn, dE, gamma, da33) plot_output(gamma, da33, str_all)
def test_h_distance(self): formula = Formula('H2CO') env = MolecularEnvironment( reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula], max_h_distance=1.0, ) # First H can be on its own action = self.action_space.from_atom( atom=Atom(symbol='H', position=(0, 0, 0))) obs, reward, done, info = env.step(action=action) self.assertFalse(done) # Second H cannot action = self.action_space.from_atom( atom=Atom(symbol='H', position=(0, 1.5, 0))) obs, reward, done, info = env.step(action=action) self.assertTrue(done)
def bondAnalysis(data, focusElement = "C", bondelems = ["C", "F", "H", "Si", "N"], verbose = False): """ `data` should be a pd Series consisting of (structure id: Atoms object) pairs Length of returned values reflects only # of atoms of focusElement that have at least one bond to an atom in bondelems. """ analyses = {key:Analysis(value) for key, value in data.iteritems()} cbonds = {key: { i: a.get_bonds(focusElement, i)[0] for i in bondelems} for key, a in analyses.items()} cIdxs = {key: [atom.index for atom in value if atom.symbol == focusElement] for key, value in data.iteritems()} # construct cbonds cbonds = {} for key, lst in cIdxs.items(): _bonds = analyses[key].all_bonds[0] _struct = data[key] for idx in lst: mybonds = _bonds[idx] mybondDict = {} for bondelem in bondelems: mybondDict[bondelem] = sum(_struct[i].symbol == bondelem for i in mybonds) if np.sum(pd.Series(mybondDict)) == 0: if verbose: print("no bonds between focusElement and bondelems detected") else: cbonds[(key, idx)] = mybondDict cbonds = pd.DataFrame(cbonds).T # construct combos combos = {} combolists = {} for key, value in cbonds.iterrows(): newkey = "".join([key*value for key,value in value.iteritems() if value > 0]) newkey = Formula(newkey).format('hill') combos[newkey] = combos.get(newkey,0) + 1 combolists[newkey] = combolists.get(newkey,[]) + [key] combos = pd.Series(combos) combolists = pd.Series(combolists) return cbonds, combos, combolists
def solvated(symbols): """Extract solvation energies from database. symbols: str Extract only those molecules that contain the chemical elements given by the symbols string (plus water and H+). Data from: Johnson JW, Oelkers EH, Helgeson HC (1992) Comput Geosci 18(7):899. doi:10.1016/0098-3004(92)90029-Q and: Pourbaix M (1966) Atlas of electrochemical equilibria in aqueous solutions. No. v. 1 in Atlas of Electrochemical Equilibria in Aqueous Solutions. Pergamon Press, New York. Returns list of (name, energy) tuples. """ if isinstance(symbols, str): symbols = Formula(symbols).count().keys() if len(_solvated) == 0: for line in _aqueous.splitlines(): energy, formula = line.split(',') name = formula + '(aq)' count, charge, aq = parse_formula(name) energy = float(energy) * 0.001 * units.kcal / units.mol _solvated.append((name, count, charge, aq, energy)) references = [] for name, count, charge, aq, energy in _solvated: for symbol in count: if symbol not in 'HO' and symbol not in symbols: break else: references.append((name, energy)) return references
def test_addition(self): formula = Formula('H2CO') env = MolecularEnvironment(reward=self.reward, observation_space=self.observation_space, action_space=self.action_space, formulas=[formula]) action = self.action_space.from_atom( Atom(symbol='H', position=(0.0, 1.0, 0.0))) obs, reward, done, info = env.step(action=action) atoms1, f1 = self.observation_space.parse(obs) self.assertEqual(atoms1[0].symbol, 'H') self.assertDictEqual(f1.count(), { 'H': 1, 'C': 1, 'O': 1, 'N': 0, 'X': 0 }) self.assertEqual(reward, 0.0) self.assertFalse(done)
def get_struct_id(self, universal=False): """ Get the id for the structure. If a struct_id has already been stored, this will be returned. Otherwise, a universal struct_id will be constructed. If universal argument is True, then the current struct_id will be discarded and a universal struct_id will be constructed. """ if len(self.struct_id) > 0 and universal == False: return self.struct_id else: ## Get type name = "" if len(self.get_lattice_vectors_better()) > 0: name = "Structure" else: name = "Molecule" ## Get formula formula = Formula.from_list(self.geometry["element"]) ## Reduce formula, which returns formula object formula = formula.format("hill") ## Then get string representation stored in formula._formula formula = str(formula) ## Add formula to name name += "_{}".format(formula) ## Add Date today = datetime.date.today() name += "_{}{}{}".format(today.year, today.month, today.year) ## Add random string name += "_{}".format(rand_str(10)) self.struct_id = name return self.struct_id
def string2symbols(s): """Convert string to list of chemical symbols.""" return list(Formula(s))
def formula(self): """Formula object.""" return Formula.from_list([chemical_symbols[Z] for Z in self.numbers])
def read_vasp(filename='CONTCAR'): """Import POSCAR/CONTCAR type file. Reads unitcell, atom positions and constraints from the POSCAR/CONTCAR file and tries to read atom types from POSCAR/CONTCAR header, if this fails the atom types are read from OUTCAR or POTCAR file. """ from ase.constraints import FixAtoms, FixScaled from ase.data import chemical_symbols fd = filename # The first line is in principle a comment line, however in VASP # 4.x a common convention is to have it contain the atom symbols, # eg. "Ag Ge" in the same order as later in the file (and POTCAR # for the full vasp run). In the VASP 5.x format this information # is found on the fifth line. Thus we save the first line and use # it in case we later detect that we're reading a VASP 4.x format # file. line1 = fd.readline() lattice_constant = float(fd.readline().split()[0]) # Now the lattice vectors a = [] for ii in range(3): s = fd.readline().split() floatvect = float(s[0]), float(s[1]), float(s[2]) a.append(floatvect) basis_vectors = np.array(a) * lattice_constant # Number of atoms. Again this must be in the same order as # in the first line # or in the POTCAR or OUTCAR file atom_symbols = [] numofatoms = fd.readline().split() # Check whether we have a VASP 4.x or 5.x format file. If the # format is 5.x, use the fifth line to provide information about # the atomic symbols. vasp5 = False try: int(numofatoms[0]) except ValueError: vasp5 = True atomtypes = numofatoms numofatoms = fd.readline().split() # check for comments in numofatoms line and get rid of them if necessary commentcheck = np.array(['!' in s for s in numofatoms]) if commentcheck.any(): # only keep the elements up to the first including a '!': numofatoms = numofatoms[:np.arange(len(numofatoms))[commentcheck][0]] if not vasp5: # Split the comment line (first in the file) into words and # try to compose a list of chemical symbols from ase.formula import Formula import re atomtypes = [] for word in line1.split(): word_without_delims = re.sub(r"-|_|,|\.|=|[0-9]|^", "", word) if len(word_without_delims) < 1: continue try: atomtypes.extend(list(Formula(word_without_delims))) except ValueError: # print(atomtype, e, 'is comment') pass # Now the list of chemical symbols atomtypes must be formed. # For example: atomtypes = ['Pd', 'C', 'O'] numsyms = len(numofatoms) if len(atomtypes) < numsyms: # First line in POSCAR/CONTCAR didn't contain enough symbols. # Sometimes the first line in POSCAR/CONTCAR is of the form # "CoP3_In-3.pos". Check for this case and extract atom types if len(atomtypes) == 1 and '_' in atomtypes[0]: atomtypes = get_atomtypes_from_formula(atomtypes[0]) else: atomtypes = atomtypes_outpot(fd.name, numsyms) else: try: for atype in atomtypes[:numsyms]: if atype not in chemical_symbols: raise KeyError except KeyError: atomtypes = atomtypes_outpot(fd.name, numsyms) for i, num in enumerate(numofatoms): numofatoms[i] = int(num) [atom_symbols.append(atomtypes[i]) for na in range(numofatoms[i])] # Check if Selective dynamics is switched on sdyn = fd.readline() selective_dynamics = sdyn[0].lower() == 's' # Check if atom coordinates are cartesian or direct if selective_dynamics: ac_type = fd.readline() else: ac_type = sdyn cartesian = ac_type[0].lower() == 'c' or ac_type[0].lower() == 'k' tot_natoms = sum(numofatoms) atoms_pos = np.empty((tot_natoms, 3)) if selective_dynamics: selective_flags = np.empty((tot_natoms, 3), dtype=bool) for atom in range(tot_natoms): ac = fd.readline().split() atoms_pos[atom] = (float(ac[0]), float(ac[1]), float(ac[2])) if selective_dynamics: curflag = [] for flag in ac[3:6]: curflag.append(flag == 'F') selective_flags[atom] = curflag if cartesian: atoms_pos *= lattice_constant atoms = Atoms(symbols=atom_symbols, cell=basis_vectors, pbc=True) if cartesian: atoms.set_positions(atoms_pos) else: atoms.set_scaled_positions(atoms_pos) if selective_dynamics: constraints = [] indices = [] for ind, sflags in enumerate(selective_flags): if sflags.any() and not sflags.all(): constraints.append(FixScaled(atoms.get_cell(), ind, sflags)) elif sflags.all(): indices.append(ind) if indices: constraints.append(FixAtoms(indices)) if constraints: atoms.set_constraint(constraints) return atoms
def formula(self): """Chemical formula string.""" return Formula('', _tree=[(self.symbols, 1)]).format('metal')
def get_latex_symbol(self): """Returns latex-formatted symbol string.""" s = self.symbol s = Formula(s) return s.format("latex")
def __sub__(self, other): l = "".join(set([self.l, other.l])) d = self.contribution - other.contribution s = Formula.from_list([self.symbol, other.symbol]).format("reduce") s = "$\Delta$" + s return MullikenContribution(s, d, l)