def read_nwchem(filename): """Method to read geometry from a nwchem output """ f = filename if isinstance(filename, str): f = open(filename) lines = f.readlines() i = 0 while i < len(lines): if lines[i].find('XYZ format geometry') >= 0: natoms = int(lines[i + 2].split()[0]) string = '' for j in range(2, natoms + 4): xyzstring = lines[i + j] symbol = xyzstring.split()[0].strip() # replace bq ghost with X: MDTMP can we do better? if symbol.startswith('bq'): xyzstring = xyzstring.replace(symbol, 'X') string += xyzstring atoms = read_xyz(StringIO(string)) i += natoms + 4 else: i += 1 if type(filename) == str: f.close() return atoms
def read_nwchem(filename): """Method to read geometry from a nwchem output """ from ase import Atoms, Atom if isinstance(filename, str): f = open(filename) lines = f.readlines() i = 0 while i < len(lines): if lines[i].find('XYZ format geometry') >=0: natoms = int(lines[i + 2].split()[0]) string = '' for j in range(2, natoms + 4): xyzstring = lines[i + j] symbol = xyzstring.split()[0].strip() # replace bq ghost with X: MDTMP can we do better? if symbol.startswith('bq'): xyzstring = xyzstring.replace(symbol, 'X') string += xyzstring atoms = read_xyz(StringIO(string)) i += natoms + 4 else: i += 1 if type(filename) == str: f.close() return atoms
def read_nwchem_input(filename): """Method to read geometry from an NWChem input file.""" f = filename if isinstance(filename, str): f = open(filename) lines = f.readlines() # Find geometry region of input file. stopline = 0 for index, line in enumerate(lines): if line.startswith('geometry'): startline = index + 1 stopline = -1 elif (line.startswith('end') and stopline == -1): stopline = index # Format and send to read_xyz. xyz_text = '%i\n' % (stopline - startline) xyz_text += ' geometry\n' for line in lines[startline:stopline]: xyz_text += line atoms = read_xyz(StringIO(xyz_text)) atoms.set_cell((0., 0., 0.)) # no unit cell defined if type(filename) == str: f.close() return atoms
def read_orca_input(filename): """Method to read geometry from an ORCA input file.""" f = filename if isinstance(filename, str): f = open(filename) lines = f.readlines() # Find geometry region of input file. done = False i = 0 xyzstring = '' for l, line in enumerate(lines): if line.find('xyz') > -1 and line.find('*') > -1: while not done: i += 1 if not (lines[l + i].find('*') > -1): xyzstring += lines[l + i] sym = lines[l + i].strip().split()[0] else: done = True if done: break xyzstring = str(i - 1) + '\n\n' + xyzstring atoms = read_xyz(StringIO(xyzstring)) if type(filename) == str: f.close() return atoms
def evaluate_molecules(molecules: List[str], b3lyp_energies: List[float]) -> List[float]: """Compute the atomization energy of molecules Args: molecules ([str]): XYZ-format molecular structures. Assumed to be fully-relaxed b3lyp_energies ([float]): B3LYP total energies of structures Returns: ([float]): Estimated G4MP2 atomization energies of molecules """ # Convert the molecules to atoms objects atoms = [next(read_xyz(StringIO(x))) for x in molecules] # Generate the local environment for each atom conv = AtomsConverter(environment) inputs = [conv.convert_atoms(atom) for atom in atoms] # Add the b3lyp_energies to each atom object for i, e in zip(inputs, b3lyp_energies): i['u0'] = torch.Tensor(np.expand_dims(e, 0)) # Execute in batches results = [] for i in inputs: outputs = model(i) results.append(np.squeeze(outputs['y'].cpu().data.numpy())) # Return atomization energy return [ compute_atomization_energy(a, e, 'g4mp2') for a, e in zip(atoms, results) ]
def read_orca(filename): """Method to read geometry from a ORCA output """ f = filename if isinstance(filename, str): f = open(filename) lines = f.readlines() done = False i = 0 xyzstring = '' for l, line in enumerate(lines): if line.find('CARTESIAN COORDINATES (ANGSTROEM)') >= 0: i += 1 while not done: i += 1 if not lines[l + i] == '\n': xyzstring += lines[l + i] sym = lines[l + i].strip().split()[0] else: done = True if done: break xyzstring = str(i - 2) + '\n\n' + xyzstring atoms = read_xyz(StringIO(xyzstring)) if type(filename) == str: f.close() return atoms
def evaluate_molecules(molecules: List[str], b3lyp_energies: List[float]) -> List[float]: """Compute the atomization energy of molecules Args: molecules ([str]): XYZ-format molecular structures. Assumed to be fully-relaxed b3lyp_energies ([float]): B3LYP total energies of structures Returns: ([float]): Estimated G4MP2 atomization energies of molecules """ # Convert all of the molecules to the qml representation compnds = [Compound(StringIO(x)) for x in molecules] # Compute the atomization energy for each compound b3lyp_atom = [ compute_atomization_energy(next(read_xyz(StringIO(x))), u0, 'b3lyp') for x, u0 in zip(molecules, b3lyp_energies) ] # Compute the representaiton for each compound def compute_rep(x): """Generates representation and returns the values""" x.generate_fchl_representation(max_size) return x.representation reps = np.array(list(map(compute_rep, compnds))) # Compute the delta between B3LYP and G4MP2 delta = model.predict(reps) # Return the sum of the two return np.add(b3lyp_atom, delta)
def _load_molecule(self, molecule_path): """ Load molecule from file (can handle all ase formats). Args: molecule_path (str): Path to molecular geometry """ file_format = os.path.splitext(molecule_path)[-1] if file_format == 'xyz': self.molecule = read_xyz(molecule_path) else: self.molecule = read(molecule_path)
def acetaldehyde(): return next( read_xyz( StringIO("""7 H4 C2 O1 C -0.002945 1.509914 0.008673 C 0.026083 0.003276 -0.037459 O 0.942288 -0.655070 -0.456826 H 0.922788 1.926342 -0.391466 H -0.862015 1.878525 -0.564795 H -0.150506 1.843934 1.042891 H -0.894430 -0.486434 0.357749""")))
def main(filename, source, x=1, y=1, z=1): temp = list(xyz.read_xyz(filename))[0] if source == "NA": temp.cell = Cell([[x, 0, 0], [0, y, 0], [0, 0, z]]) else: try: temp.cell = vasp.read_vasp(source).cell except: raise Exception("source is neither valid POSCAR nor NA") vasp.write_vasp("../POSCAR_ec", temp, sort=True, vasp5=True)
def evaluate_schnet(models: List[Union[TorchMessage, torch.nn.Module, Path]], molecules: List[str], property_name: str, batch_size: int = 64, device: str = 'cpu') -> np.ndarray: """Run inference for a machine learning model Args: models: List of models to evaluate. Either a SchNet model or the bytes corresponding to a serialized model molecules: XYZ-format structures of molecules to be evaluate property_name: Name of the property being predicted batch_size: Number of molecules to evaluate per batch device: Device on which to run the computation """ # Make sure the models are converted to Torch models if isinstance(models[0], TorchMessage): models = [m.get_model(device) for m in models] elif isinstance(models[0], (Path, str)): models = [torch.load(m, map_location='cpu') for m in models] # Load to main memory first # Make the dataset with TemporaryDirectory() as td: # Convert the molecules to ase.Atoms objects atoms = [next(read_xyz(StringIO(x), slice(None))) for x in molecules] # Save the data to an ASE Atoms database run_file = os.path.join(td, 'run_data.db') db = AtomsData(run_file, available_properties=[]) db.add_systems(atoms, [{} for _ in atoms]) # Build the data loader loader = AtomsLoader(db, batch_size=batch_size) # Run the models y_preds = [] for model in models: y_pred = [] model.to(device) # Move the model to the device for batch in loader: # Push the batch to the device batch = {k: v.to(device) for k, v in batch.items()} # Run it and save results pred = model(batch) y_pred.append(pred[property_name].detach().cpu().numpy()) y_preds.append(np.squeeze(np.concatenate(y_pred))) return np.vstack(y_preds).T
def read_centers(filename=None): # read atoms = list(read_xyz(filename))[0] symbols = atoms.get_chemical_symbols() positions = atoms.get_positions() # split X (wannier centers) and atoms. # map( (filter( (lambda x: x[0]=='X'), zip(symbols,positions)))) # wannier_centers= [xpos[1] for xpos in zip(symbols,positions) if xpos[0] ] wannier_centers = [ xpos[1] for xpos in zip(symbols, positions) if xpos[0] == 'X' ] asymbols = [xpos[0] for xpos in zip(symbols, positions) if xpos[0] != 'X'] aposes = [xpos[1] for xpos in zip(symbols, positions) if xpos[0] != 'X'] cell = atoms.get_cell() atoms = Atoms(symbols=asymbols, positions=aposes, cell=cell) return wannier_centers, atoms
def make_schnetpack_data(dataset, dbpath, properties, xyz_col='xyz', conformers=None, overwrite=True): """Convert a Pandas dictionary to a SchNet database Args: dataset (pd.DataFrame): Dataset to convert dbpath (string): Path to database to be saved properties ([string]): List of properties to include in the dataset conformers (str): Name of column with conformers as xyz xyz_col (string): Name of the column with the XYZ data overwrite (True): Whether to overwrite the database """ # If needed, delete the previous database if os.path.exists(dbpath) and overwrite: os.unlink(dbpath) # Convert all entries to ase.Atoms objects atoms = dataset[xyz_col].apply(lambda x: read_xyz(StringIO(x)).__next__()) # Every column besides the training set will be a property prop_cols = set(properties).difference([xyz_col]) property_list = [ dict(zip(prop_cols, [np.atleast_1d(row[p]) for p in prop_cols])) for i, row in dataset.iterrows() ] # Add conformers to the property list, but it isn't a required property when loading entries if conformers is not None: for d, c in zip(property_list, dataset[conformers]): d['conformers'] = np.atleast_1d(c) # Initialize the object db = AtomsData(dbpath, required_properties=properties, conformers=conformers is not None) # Add every system to the db object db.add_systems(atoms, property_list) return db
def get_initial_structure(smiles: str) -> Tuple[Atoms, pybel.Molecule]: """Generate an initial guess for a molecular structure Args: smiles: SMILES string Returns: Generate an Atoms object """ # Make the 3D structure mol = pybel.readstring("smi", smiles) mol.make3D() # Convert it to ASE atoms = next(read_xyz(StringIO(mol.write('xyz')), slice(None))) atoms.charge = mol.charge atoms.set_initial_charges([a.formalcharge for a in mol.atoms]) return atoms, mol
def run_model(model, data, xyz_col, additional_cols=None, progbar=True): """Runs a SchNetPack model on the column of a dataframe containing XYZ files Args: model (AtomisticModel): Model to be evaluated data (DataFrame): Data to be evaluated xyz_col (string): Column containing the XYZ data additional_cols ([string]): Any other columns to add to the input (e.g., B3LYP results) progbar (boolean): Whether to display a progress bar Returns: (ndarray) Predictions from the model """ # Get default value for additional_cols if additional_cols is None: additional_cols = [] # Make the tool to convert ase.Atoms to SchNet inputs c = AtomsConverter() results = [] for xyz, more_data in tqdm(list( zip(data[xyz_col], data[additional_cols].values)), disable=not progbar, leave=False): # Convert the XYZ file to an ASE object atoms = next(read_xyz(StringIO(xyz))) # Generate it in the input format needed inputs = c.convert_atoms(atoms) # Add in the additional columns for i, col in enumerate(additional_cols): inputs[col] = torch.Tensor(np.expand_dims(more_data[i], 0)) # Run it through the model outputs = model(inputs) # Get the value in numpy format results.append(np.squeeze(outputs['y'].cpu().data.numpy())) return np.array(results)
def read_gamess_us(filename): """Method to read geometry from a GAMESS-US output """ f = filename if isinstance(filename, str): f = open(filename) lines = f.readlines() done = False i = 0 xyzstring = '' for l, line in enumerate(lines): if line.find('CHARGE X Y Z' ) >= 0: while not done: i += 1 if not lines[l + i] == '\n': sym = lines[l + i].strip().split()[0] for c in range(2, 5): pos = map(float, lines[l + i].strip().split()[2:5]) xyzstring += sym + ' ' + str(pos[0] * Bohr) + ' ' + str( pos[1] * Bohr) + ' ' + str(pos[2] * Bohr) + '\n' else: done = True if done: break xyzstring = str(i - 1) + '\n\n' + xyzstring atoms = read_xyz(StringIO(xyzstring)) if type(filename) == str: f.close() return atoms
def read_gamess_us_input(filename): """Method to read geometry from an GAMESS-US input file.""" f = filename if isinstance(filename, str): f = open(filename) lines = f.readlines() # Find geometry region of input file. done = False i = 0 xyzstring = '' for l, line in enumerate(lines): if line.find(' $data') > -1: i += 2 while not done: i += 1 if not (lines[l + i].find('$end') > -1): sym = lines[l + i].strip().split()[0] pos = map(float, lines[l + i].strip().split()[2:5]) xyzstring += sym + ' ' + str(pos[0]) + ' ' + str( pos[1]) + ' ' + str(pos[2]) + '\n' else: done = True if done: break xyzstring = str(i - 3) + '\n\n' + xyzstring atoms = read_xyz(StringIO(xyzstring)) if type(filename) == str: f.close() return atoms
def get_initial_structure(smiles: str) -> Tuple[Atoms, Dict[int, Set[int]]]: """Generate an initial guess for a molecular structure Args: smiles: SMILES string Returns: An ASE atoms object, bond graph """ # Make the 3D structure mol = pybel.readstring("smi", smiles) mol.make3D() # Convert it to ASE atoms = next(read_xyz(StringIO(mol.write('xyz')), slice(None))) # Get the bonding graph g = nx.Graph() g.add_nodes_from(range(len(mol.atoms))) for bond in OBMolBondIter(mol.OBMol): g.add_edge(bond.GetBeginAtomIdx() - 1, bond.GetEndAtomIdx() - 1, data={"rotor": bond.IsRotor()}) return atoms, g
def read_gamess_us(filename): """Method to read geometry from a GAMESS-US output """ f = filename if isinstance(filename, str): f = open(filename) lines = f.readlines() done = False i = 0 xyzstring = '' for l, line in enumerate(lines): if line.find('CHARGE X Y Z') >= 0: while not done: i += 1 if not lines[l+i] == '\n': sym = lines[l+i].strip().split()[0] for c in range(2,5): pos = map(float, lines[l+i].strip().split()[2:5]) xyzstring += sym + ' ' + str(pos[0]*Bohr)+ ' ' + str(pos[1]*Bohr)+ ' ' + str(pos[2]*Bohr)+'\n' else: done = True if done: break xyzstring = str(i-1) + '\n\n' + xyzstring atoms = read_xyz(StringIO(xyzstring)) if type(filename) == str: f.close() return atoms
def read_gamess_us_input(filename): """Method to read geometry from an GAMESS-US input file.""" f = filename if isinstance(filename, str): f = open(filename) lines = f.readlines() # Find geometry region of input file. done = False i = 0 xyzstring = '' for l, line in enumerate(lines): if line.find(' $data') > -1: i += 2 while not done: i += 1 if not (lines[l+i].find('$end') > -1): sym = lines[l+i].strip().split()[0] pos = map(float, lines[l+i].strip().split()[2:5]) xyzstring += sym + ' ' + str(pos[0])+ ' ' + str(pos[1])+ ' ' + str(pos[2])+'\n' else: done = True if done: break xyzstring = str(i-3) + '\n\n' + xyzstring atoms = read_xyz(StringIO(xyzstring)) if type(filename) == str: f.close() return atoms
def read(filename, index=-1, format=None): """Read Atoms object(s) from file. filename: str Name of the file to read from. index: int or slice If the file contains several configurations, the last configuration will be returned by default. Use index=n to get configuration number n (counting from zero). format: str Used to specify the file-format. If not given, the file-format will be guessed by the *filetype* function. If it's 'babel', will try to use the OpenBabel library. Known formats: ========================= =========== format short name ========================= =========== GPAW restart-file gpw Dacapo netCDF output file dacapo Old ASE netCDF trajectory nc Virtual Nano Lab file vnl ASE pickle trajectory traj GPAW text output gpaw-text CUBE file cube XCrySDen Structure File xsf Dacapo text output dacapo-text XYZ-file xyz VASP POSCAR/CONTCAR file vasp Protein Data Bank pdb VTK XML Image Data vti VTK XML Structured Grid vts VTK XML Unstructured Grid vtu ========================= =========== """ p = filename.rfind('@') if p != -1: try: index = string2index(filename[p + 1:]) except ValueError: pass else: filename = filename[:p] if format is None: format = filetype(filename) if format.startswith('gpw'): import gpaw r = gpaw.io.open(filename, 'r') positions = r.get('CartesianPositions') * Bohr numbers = r.get('AtomicNumbers') cell = r.get('UnitCell') * Bohr pbc = r.get('BoundaryConditions') tags = r.get('Tags') magmoms = r.get('MagneticMoments') atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc) if tags.any(): atoms.set_tags(tags) if magmoms.any(): atoms.set_initial_magnetic_moments(magmoms) return atoms if format == 'xyz': from ase.io.xyz import read_xyz return read_xyz(filename, index) if format == 'traj': from ase.io.trajectory import read_trajectory return read_trajectory(filename, index) if format == 'cube': from ase.io.cube import read_cube return read_cube(filename, index) if format == 'nc': from ase.io.netcdf import read_netcdf return read_netcdf(filename, index) if format == 'gpaw-text': from ase.io.gpawtext import read_gpaw_text return read_gpaw_text(filename, index) if format == 'dacapo-text': from ase.io.dacapo import read_dacapo_text return read_dacapo_text(filename) if format == 'dacapo': from ase.io.dacapo import read_dacapo return read_dacapo(filename) if format == 'xsf': from ase.io.xsf import read_xsf return read_xsf(filename, index) if format == 'vasp': from ase.io.vasp import read_vasp return read_vasp(filename) if format == 'mol': from ase.io.mol import read_mol return read_mol(filename) if format == 'pdb': from ase.io.pdb import read_pdb return read_pdb(filename) if format == 'cif': from ase.io.cif import read_cif return read_cif(filename) if format == 'babel': from ase.io.babel import read_babel return read_babel(filename, index=index) if format == 'vti': from ase.io.vtkxml import read_vti return read_vti(filename) if format == 'vts': from ase.io.vtkxml import read_vts return read_vts(filename) if format == 'vtu': from ase.io.vtkxml import read_vtu return read_vtu(filename) if format == 'iwm': from ase.io.iwm import read_iwm return read_iwm(filename) if format == 'Cmdft': from ase.io.cmdft import read_I_info return read_I_info(filename) raise RuntimeError('That can *not* happen!')
# Get the system information host_info = get_platform_info() # Set the random seed np.random.seed(args.random) rng = np.random.RandomState(args.random) # Download the QM9 dataset and get the molecule of interest qm9_path = get_qm9_path() with gzip.open(qm9_path, 'rt') as fp: for _, d in zip(range(args.mol), fp): pass mol_info = json.loads(d) # Parse the molecule coordinates into an ASE object atoms = next(read_xyz(StringIO(mol_info['xyz']))) # Open an experiment directory start_time = datetime.utcnow() out_dir = os.path.join('runs', f'{start_time.strftime("%d%b%y-%H%M%S")}') os.makedirs(out_dir) # Save the parameters and host information with open(os.path.join(out_dir, 'run_params.json'), 'w') as fp: json.dump(run_params, fp, indent=2) with open(os.path.join(out_dir, 'host_info.json'), 'w') as fp: json.dump(host_info, fp, indent=2) # Initialize the ASE calculator calc = Psi4(memory='500MB', **_fidelity[args.fidelity])
"-f", "--format", action="store", type="string", default="xyz", help="format of the output file: POSCAR, (xyz in preparation)") (options, args) = parser.parse_args() print_debug = False if (num < 2): parser.print_help() else: file_xyz = read_xyz(input_file, slice(0, None, 1)) natoms = file_xyz[0].get_number_of_atoms() # if possible read unit-cell if (os.path.isfile(options.cell)): cell = [[], [], []] f = open(options.cell, "r") ls = f.read().splitlines() for i in range(3): l = ls[i].split() cell[i] = [float(l[0]), float(l[1]), float(l[2])] for step in file_xyz: step.set_cell(cell) step.set_pbc([True, True, True]) # --------------- Atoms -----------------
butadiene = """10 C 3.649801161546418 5.442281389577507 3.863313703750026 C 5.051651240044169 5.368220758269772 4.162165876906096 C 5.750174626862403 4.162261915959347 4.240449977068684 C 7.150130182125531 4.155384186721486 4.537328602062397 H 3.218154657585170 4.565210696328925 3.522601038049320 H 3.077656122062729 6.375092902842770 3.826039498180272 H 5.478464901706067 6.370680001794822 4.422235395756437 H 5.320549047980879 3.220584852467720 3.974551561510350 H 7.723359150977955 3.224855971783890 4.574146712279462 H 7.580803493981530 5.034479218283977 4.877211530909463 """ h = 0.3 atoms = Cluster(read_xyz(StringIO.StringIO(butadiene))) atoms.minimal_box(3., h) atoms.set_calculator(GPAW(h=h)) if 0: dyn = FIRE(atoms) dyn.run(fmax=0.05) atoms.write('butadiene.xyz') vibname = 'fcvib' vib = Vibrations(atoms, name=vibname) vib.run() # Modul a = FranckCondon(atoms, vibname, minfreq=250) # excited state forces
from ase.io.xyz import read_xyz from gpaw import GPAW from gpaw.mixer import Mixer from gpaw import ConvergenceError from gpaw.mpi import rank from gpaw.eigensolvers.rmm_diis import RMM_DIIS from gpaw import setup_paths # Use setups from the $PWD and $PWD/.. first setup_paths.insert(0, '.') setup_paths.insert(0, '../') atoms = read_xyz('../Au102_revised.xyz') prefix = 'Au_cluster' L = 32.0 atoms.set_cell((L,L,L),scale_atoms=False) atoms.center() atoms.set_pbc(1) r = [1, 1, 1] atoms = atoms.repeat(r) n = [240 * ri for ri in r] # nbands (>=1683) is the number of bands per cluster nbands = 3*6*6*16 # 1728 for ri in r: nbands = nbands*ri mixer = Mixer(beta=0.1, nmaxold=5, weight=100.0) # the next three lines decrease memory usage es = RMM_DIIS(keep_htpsit=False)
butadiene = """10 C 3.649801161546418 5.442281389577507 3.863313703750026 C 5.051651240044169 5.368220758269772 4.162165876906096 C 5.750174626862403 4.162261915959347 4.240449977068684 C 7.150130182125531 4.155384186721486 4.537328602062397 H 3.218154657585170 4.565210696328925 3.522601038049320 H 3.077656122062729 6.375092902842770 3.826039498180272 H 5.478464901706067 6.370680001794822 4.422235395756437 H 5.320549047980879 3.220584852467720 3.974551561510350 H 7.723359150977955 3.224855971783890 4.574146712279462 H 7.580803493981530 5.034479218283977 4.877211530909463 """ h = 0.3 atoms = Cluster(read_xyz(StringIO.StringIO(butadiene))) atoms.minimal_box(3.0, h) atoms.set_calculator(GPAW(h=h)) if 0: dyn = FIRE(atoms) dyn.run(fmax=0.05) atoms.write("butadiene.xyz") vibname = "fcvib" vib = Vibrations(atoms, name=vibname) vib.run() # Modul a = FranckCondon(atoms, vibname, minfreq=250) # excited state forces
def train_schnet( model: Union[TorchMessage, torch.nn.Module, Path], database: Dict[str, float], num_epochs: int, reset_weights: bool = True, property_name: str = 'output', test_set: Optional[List[str]] = None, device: str = 'cpu', batch_size: int = 32, validation_split: float = 0.1, bootstrap: bool = False, random_state: int = 1, learning_rate: float = 1e-3, patience: int = None, timeout: float = None ) -> Union[Tuple[TorchMessage, pd.DataFrame], Tuple[TorchMessage, pd.DataFrame, List[float]]]: """Train a SchNet model Args: model: Model to be retrained database: Mapping of XYZ format structure to property num_epochs: Number of training epochs property_name: Name of the property being predicted reset_weights: Whether to re-initialize weights before training, or start training from previous test_set: Hold-out set. If provided, function will return the performance of the model on those weights device: Device (e.g., 'cuda', 'cpu') used for training batch_size: Batch size during training validation_split: Fraction to training set to use for the validation loss bootstrap: Whether to take a bootstrap sample of the training set before training random_state: Random seed used for generating validation set and bootstrap sampling learning_rate: Initial learning rate for optimizer patience: Patience until learning rate is lowered. Default: epochs / 8 timeout: Maximum training time in seconds Returns: - model: Retrained model - history: Training history - test_pred: Predictions on ``test_set``, if provided """ # Make sure the models are converted to Torch models if isinstance(model, TorchMessage): model = model.get_model(device) elif isinstance(model, (Path, str)): model = torch.load(model, map_location='cpu') # Load to main memory first # If desired, re-initialize weights if reset_weights: for module in model.modules(): if hasattr(module, 'reset_parameters'): module.reset_parameters() # Separate the database into molecules and properties xyz, y = zip(*database.items()) xyz = np.array(xyz) y = np.array(y) # Convert the xyz files to ase Atoms atoms = np.array([next(read_xyz(StringIO(x), slice(None))) for x in xyz]) # Make the training and validation splits rng = np.random.RandomState(random_state) train_split = rng.rand(len(xyz)) > validation_split train_X = atoms[train_split] train_y = y[train_split] valid_X = atoms[~train_split] valid_y = y[~train_split] # Perform a bootstrap sample of the training data if bootstrap: sample = rng.choice(len(train_X), size=(len(train_X), ), replace=True) train_X = train_X[sample] train_y = train_y[sample] # Start the training process with TemporaryDirectory() as td: # Save the data to an ASE Atoms database train_file = os.path.join(td, 'train_data.db') db = AtomsData(train_file, available_properties=[property_name]) db.add_systems(train_X, [{property_name: i} for i in train_y]) train_loader = AtomsLoader(db, batch_size=batch_size, shuffle=True) valid_file = os.path.join(td, 'valid_data.db') db = AtomsData(valid_file, available_properties=[property_name]) db.add_systems(valid_X, [{property_name: i} for i in valid_y]) valid_loader = AtomsLoader(db, batch_size=batch_size) # Make the trainer opt = optim.Adam(model.parameters(), lr=learning_rate) loss = trn.build_mse_loss(['delta']) metrics = [spk.metrics.MeanSquaredError('delta')] if patience is None: patience = num_epochs // 8 hooks = [ trn.CSVHook(log_path=td, metrics=metrics), trn.ReduceLROnPlateauHook(opt, patience=patience, factor=0.8, min_lr=1e-6, stop_after_min=True) ] if timeout is not None: hooks.append(TimeoutHook(timeout)) trainer = trn.Trainer( model_path=td, model=model, hooks=hooks, loss_fn=loss, optimizer=opt, train_loader=train_loader, validation_loader=valid_loader, checkpoint_interval=num_epochs + 1 # Turns off checkpointing ) trainer.train(device, n_epochs=num_epochs) # Load in the best model model = torch.load(os.path.join(td, 'best_model')) # If desired, report the performance on a test set test_pred = None if test_set is not None: test_pred = evaluate_schnet([model], test_set, property_name=property_name, batch_size=batch_size, device=device) # Move the model off of the GPU to save memory if 'cuda' in device: model.to('cpu') # Load in the training results train_results = pd.read_csv(os.path.join(td, 'log.csv')) # Return the results if test_pred is None: return TorchMessage(model), train_results else: return TorchMessage(model), train_results, test_pred[:, 0].tolist()
def read(filename, index=-1, format=None): """Read Atoms object(s) from file. filename: str Name of the file to read from. index: int or slice If the file contains several configurations, the last configuration will be returned by default. Use index=n to get configuration number n (counting from zero). format: str Used to specify the file-format. If not given, the file-format will be guessed by the *filetype* function. Known formats: ========================= =========== format short name ========================= =========== GPAW restart-file gpw Dacapo netCDF output file dacapo Old ASE netCDF trajectory nc Virtual Nano Lab file vnl ASE pickle trajectory traj ASE bundle trajectory bundle GPAW text output gpaw-text CUBE file cube XCrySDen Structure File xsf Dacapo text output dacapo-text XYZ-file xyz VASP POSCAR/CONTCAR file vasp VASP OUTCAR file vasp_out SIESTA STRUCT file struct_out ABINIT input file abinit V_Sim ascii file v_sim Protein Data Bank pdb CIF-file cif FHI-aims geometry file aims FHI-aims output file aims_out VTK XML Image Data vti VTK XML Structured Grid vts VTK XML Unstructured Grid vtu TURBOMOLE coord file tmol TURBOMOLE gradient file tmol-gradient exciting input exi AtomEye configuration cfg WIEN2k structure file struct DftbPlus input file dftb CASTEP geom file cell CASTEP output file castep CASTEP trajectory file geom ETSF format etsf.nc DFTBPlus GEN format gen CMR db/cmr-file db CMR db/cmr-file cmr LAMMPS dump file lammps ========================= =========== """ if isinstance(filename, str): p = filename.rfind('@') if p != -1: try: index = string2index(filename[p + 1:]) except ValueError: pass else: filename = filename[:p] if isinstance(index, str): index = string2index(index) if format is None: format = filetype(filename) if format.startswith('gpw'): import gpaw r = gpaw.io.open(filename, 'r') positions = r.get('CartesianPositions') * Bohr numbers = r.get('AtomicNumbers') cell = r.get('UnitCell') * Bohr pbc = r.get('BoundaryConditions') tags = r.get('Tags') magmoms = r.get('MagneticMoments') energy = r.get('PotentialEnergy') * Hartree if r.has_array('CartesianForces'): forces = r.get('CartesianForces') * Hartree / Bohr else: forces = None atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc) if tags.any(): atoms.set_tags(tags) if magmoms.any(): atoms.set_initial_magnetic_moments(magmoms) else: magmoms = None atoms.calc = SinglePointCalculator(energy, forces, None, magmoms, atoms) return atoms if format == 'castep': from ase.io.castep import read_castep return read_castep(filename, index) if format == 'castep_cell': import ase.io.castep return ase.io.castep.read_cell(filename, index) if format == 'castep_geom': import ase.io.castep return ase.io.castep.read_geom(filename, index) if format == 'exi': from ase.io.exciting import read_exciting return read_exciting(filename, index) if format == 'xyz': from ase.io.xyz import read_xyz return read_xyz(filename, index) if format == 'traj': from ase.io.trajectory import read_trajectory return read_trajectory(filename, index) if format == 'bundle': from ase.io.bundletrajectory import read_bundletrajectory return read_bundletrajectory(filename, index) if format == 'cube': from ase.io.cube import read_cube return read_cube(filename, index) if format == 'nc': from ase.io.netcdf import read_netcdf return read_netcdf(filename, index) if format == 'gpaw-text': from ase.io.gpawtext import read_gpaw_text return read_gpaw_text(filename, index) if format == 'dacapo-text': from ase.io.dacapo import read_dacapo_text return read_dacapo_text(filename) if format == 'dacapo': from ase.io.dacapo import read_dacapo return read_dacapo(filename) if format == 'xsf': from ase.io.xsf import read_xsf return read_xsf(filename, index) if format == 'vasp': from ase.io.vasp import read_vasp return read_vasp(filename) if format == 'vasp_out': from ase.io.vasp import read_vasp_out return read_vasp_out(filename, index) if format == 'abinit': from ase.io.abinit import read_abinit return read_abinit(filename) if format == 'v_sim': from ase.io.v_sim import read_v_sim return read_v_sim(filename) if format == 'mol': from ase.io.mol import read_mol return read_mol(filename) if format == 'pdb': from ase.io.pdb import read_pdb return read_pdb(filename, index) if format == 'cif': from ase.io.cif import read_cif return read_cif(filename, index) if format == 'struct': from ase.io.wien2k import read_struct return read_struct(filename) if format == 'struct_out': from ase.io.siesta import read_struct return read_struct(filename) if format == 'vti': from ase.io.vtkxml import read_vti return read_vti(filename) if format == 'vts': from ase.io.vtkxml import read_vts return read_vts(filename) if format == 'vtu': from ase.io.vtkxml import read_vtu return read_vtu(filename) if format == 'aims': from ase.io.aims import read_aims return read_aims(filename) if format == 'aims_out': from ase.io.aims import read_aims_output return read_aims_output(filename, index) if format == 'iwm': from ase.io.iwm import read_iwm return read_iwm(filename) if format == 'Cmdft': from ase.io.cmdft import read_I_info return read_I_info(filename) if format == 'tmol': from ase.io.turbomole import read_turbomole return read_turbomole(filename) if format == 'tmol-gradient': from ase.io.turbomole import read_turbomole_gradient return read_turbomole_gradient(filename) if format == 'cfg': from ase.io.cfg import read_cfg return read_cfg(filename) if format == 'dftb': from ase.io.dftb import read_dftb return read_dftb(filename) if format == 'sdf': from ase.io.sdf import read_sdf return read_sdf(filename) if format == 'etsf': from ase.io.etsf import ETSFReader return ETSFReader(filename).read_atoms() if format == 'gen': from ase.io.gen import read_gen return read_gen(filename) if format == 'db': from ase.io.cmr_io import read_db return read_db(filename, index) if format == 'lammps': from ase.io.lammps import read_lammps_dump return read_lammps_dump(filename, index) raise RuntimeError('File format descriptor ' + format + ' not recognized!')
def read(filename, index=-1, format=None): """Read Atoms object(s) from file. filename: str Name of the file to read from. index: int or slice If the file contains several configurations, the last configuration will be returned by default. Use index=n to get configuration number n (counting from zero). format: str Used to specify the file-format. If not given, the file-format will be guessed by the *filetype* function. Known formats: ========================= =========== format short name ========================= =========== GPAW restart-file gpw Dacapo netCDF output file dacapo Old ASE netCDF trajectory nc Virtual Nano Lab file vnl ASE pickle trajectory traj ASE bundle trajectory bundle GPAW text output gpaw-text CUBE file cube XCrySDen Structure File xsf Dacapo text output dacapo-text XYZ-file xyz VASP POSCAR/CONTCAR file vasp VASP OUTCAR file vasp_out SIESTA STRUCT file struct_out ABINIT input file abinit V_Sim ascii file v_sim Protein Data Bank pdb CIF-file cif FHI-aims geometry file aims FHI-aims output file aims_out VTK XML Image Data vti VTK XML Structured Grid vts VTK XML Unstructured Grid vtu TURBOMOLE coord file tmol TURBOMOLE gradient file tmol-gradient exciting input exi AtomEye configuration cfg WIEN2k structure file struct DftbPlus input file dftb CASTEP geom file cell CASTEP output file castep CASTEP trajectory file geom ETSF format etsf.nc DFTBPlus GEN format gen CMR db/cmr-file db CMR db/cmr-file cmr LAMMPS dump file lammps ========================= =========== """ if isinstance(filename, str): p = filename.rfind('@') if p != -1: try: index = string2index(filename[p + 1:]) except ValueError: pass else: filename = filename[:p] if isinstance(index, str): index = string2index(index) if format is None: format = filetype(filename) if format.startswith('gpw'): import gpaw r = gpaw.io.open(filename, 'r') positions = r.get('CartesianPositions') * Bohr numbers = r.get('AtomicNumbers') cell = r.get('UnitCell') * Bohr pbc = r.get('BoundaryConditions') tags = r.get('Tags') magmoms = r.get('MagneticMoments') energy = r.get('PotentialEnergy') * Hartree if r.has_array('CartesianForces'): forces = r.get('CartesianForces') * Hartree / Bohr else: forces = None atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc) if tags.any(): atoms.set_tags(tags) if magmoms.any(): atoms.set_initial_magnetic_moments(magmoms) else: magmoms = None atoms.calc = SinglePointCalculator(energy, forces, None, magmoms, atoms) return atoms if format == 'castep': from ase.io.castep import read_castep return read_castep(filename, index) if format == 'castep_cell': import ase.io.castep return ase.io.castep.read_cell(filename, index) if format == 'castep_geom': import ase.io.castep return ase.io.castep.read_geom(filename, index) if format == 'exi': from ase.io.exciting import read_exciting return read_exciting(filename, index) if format == 'xyz': from ase.io.xyz import read_xyz return read_xyz(filename, index) if format == 'traj': from ase.io.trajectory import read_trajectory return read_trajectory(filename, index) if format == 'bundle': from ase.io.bundletrajectory import read_bundletrajectory return read_bundletrajectory(filename, index) if format == 'cube': from ase.io.cube import read_cube return read_cube(filename, index) if format == 'nc': from ase.io.netcdf import read_netcdf return read_netcdf(filename, index) if format == 'gpaw-text': from ase.io.gpawtext import read_gpaw_text return read_gpaw_text(filename, index) if format == 'dacapo-text': from ase.io.dacapo import read_dacapo_text return read_dacapo_text(filename) if format == 'dacapo': from ase.io.dacapo import read_dacapo return read_dacapo(filename) if format == 'xsf': from ase.io.xsf import read_xsf return read_xsf(filename, index) if format == 'vasp': from ase.io.vasp import read_vasp return read_vasp(filename) if format == 'vasp_out': from ase.io.vasp import read_vasp_out return read_vasp_out(filename, index) if format == 'abinit': from ase.io.abinit import read_abinit return read_abinit(filename) if format == 'v_sim': from ase.io.v_sim import read_v_sim return read_v_sim(filename) if format == 'mol': from ase.io.mol import read_mol return read_mol(filename) if format == 'pdb': from ase.io.pdb import read_pdb return read_pdb(filename, index) if format == 'cif': from ase.io.cif import read_cif return read_cif(filename, index) if format == 'struct': from ase.io.wien2k import read_struct return read_struct(filename) if format == 'struct_out': from ase.io.siesta import read_struct return read_struct(filename) if format == 'vti': from ase.io.vtkxml import read_vti return read_vti(filename) if format == 'vts': from ase.io.vtkxml import read_vts return read_vts(filename) if format == 'vtu': from ase.io.vtkxml import read_vtu return read_vtu(filename) if format == 'aims': from ase.io.aims import read_aims return read_aims(filename) if format == 'aims_out': from ase.io.aims import read_aims_output return read_aims_output(filename, index) if format == 'iwm': from ase.io.iwm import read_iwm return read_iwm(filename) if format == 'Cmdft': from ase.io.cmdft import read_I_info return read_I_info(filename) if format == 'tmol': from ase.io.turbomole import read_turbomole return read_turbomole(filename) if format == 'tmol-gradient': from ase.io.turbomole import read_turbomole_gradient return read_turbomole_gradient(filename) if format == 'cfg': from ase.io.cfg import read_cfg return read_cfg(filename) if format == 'dftb': from ase.io.dftb import read_dftb return read_dftb(filename) if format == 'sdf': from ase.io.sdf import read_sdf return read_sdf(filename) if format == 'etsf': from ase.io.etsf import ETSFReader return ETSFReader(filename).read_atoms() if format == 'gen': from ase.io.gen import read_gen return read_gen(filename) if format == 'db': from ase.io.cmr_io import read_db return read_db(filename, index) if format == 'lammps': from ase.io.lammps import read_lammps_dump return read_lammps_dump(filename, index) raise RuntimeError('File format descriptor '+format+' not recognized!')
from ase.io.xyz import read_xyz from gpaw import GPAW from gpaw.mixer import Mixer from gpaw import ConvergenceError from gpaw.mpi import rank from gpaw.eigensolvers.rmm_diis_old import RMM_DIIS from gpaw import setup_paths # Use setups from the $PWD and $PWD/.. first setup_paths.insert(0, '.') setup_paths.insert(0, '../') atoms = read_xyz('../Au102_revised.xyz') prefix = 'Au_cluster' L = 32.0 atoms.set_cell((L,L,L),scale_atoms=False) atoms.center() atoms.set_pbc(1) r = [1, 1, 1] atoms = atoms.repeat(r) n = [240 * ri for ri in r] # nbands (>=1683) is the number of bands per cluster nbands = 3*6*6*16 # 1728 for ri in r: nbands = nbands*ri mixer = Mixer(beta=0.1, nmaxold=5, weight=100.0) # the next three lines decrease memory usage es = RMM_DIIS(keep_htpsit=False)
natoms = 0 if(num < 2): parser.print_help() else: # >>>>>>>>>>>>>>>>>>>>> READ GEOMETRY <<<<<<<<<<<<<<<<<<<< if(iformat == "geometry.in"): atoms = read_aims(ifile) # elif(iformat == "cube"): # atoms = read_cube(sys.argv[num-1]) # elif(iformat == "xsf"): # atoms = read_xsf(sys.argv[num-1],read_data=True) elif(iformat == "POSCAR"): atoms = read_vasp(ifile) elif(iformat == "xyz"): atoms = read_xyz(ifile) cfile = options.xyzcell ThereIsCellFile = os.path.isfile(cfile) if(ThereIsCellFile): cell = [[],[],[]] f = open(cfile, "r") ls = f.read().splitlines() for i in range(3): l = ls[i].split() cell[i] = [float(l[0]), float(l[1]), float(l[2])] atoms.set_cell(cell) atoms.set_pbc([True,True,True]) if(options.vector == True): a = options.atoms