def determine_unique_configurations(configurations): cutoff = float(np.max(configurations[0].cell.array) / 2 + 1) unique_reps, unique_config, reps, count_configs = [], [], [], [] schnet = SchNet(n_atom_basis=32, n_filters=32, n_interactions=1, cutoff=cutoff, cutoff_network=CosineCutoff) env = AseEnvironmentProvider(cutoff=cutoff) data = [posinp_to_ase_atoms(pos) for pos in configurations] data = SchnetPackData(data=data, environment_provider=env, collect_triples=False) data_loader = AtomsLoader(data, batch_size=1) for batch in data_loader: reps.append(torch.squeeze(schnet(batch))) for i, rep in enumerate(reps): for j, uni in enumerate(unique_reps): if compare_reps(rep, uni): count_configs[j] += 1 break else: unique_reps.append(rep) unique_config.append(configurations[i]) count_configs.append(1) return unique_config, count_configs
json.dumps(run_params).encode()).hexdigest()[:6] # Determine the output directory test_dir = os.path.join( 'networks', f'T{args.num_messages}_b{args.batch_size}_n{args.num_epochs}_{params_hash}' ) os.makedirs(test_dir) with open(os.path.join(test_dir, 'config.json'), 'w') as fp: json.dump(run_params, fp) # Making the data loaders train_data = AtomsData('datasets/train.db') train_loader = AtomsLoader(train_data, args.batch_size, shuffle=True, pin_memory=True, num_workers=2) test_data = AtomsData('datasets/test.db') test_loader = AtomsLoader(test_data, args.batch_size) valid_data = AtomsData('datasets/valid.db') valid_loader = AtomsLoader(valid_data, args.batch_size, pin_memory=True, num_workers=2) # Make the model mean, std = train_loader.get_statistics('ip', divide_by_atoms=args.atomwise) model = build_fn(atom_features=args.atom_features, message_steps=args.num_messages,
run_params = args.__dict__ params_hash = hashlib.sha256( json.dumps(run_params).encode()).hexdigest()[:6] # Determine the output directory test_dir = os.path.join( 'networks', f'T{args.num_messages}_b{args.batch_size}_n{args.num_epochs}_{params_hash}' ) os.makedirs(test_dir) with open(os.path.join(test_dir, 'config.json'), 'w') as fp: json.dump(run_params, fp) # Making the data loaders train_data = AtomsData('datasets/train.db') train_loader = AtomsLoader(train_data, args.batch_size, shuffle=True) test_data = AtomsData('datasets/test.db') test_loader = AtomsLoader(test_data, args.batch_size) valid_data = AtomsData('datasets/valid.db') valid_loader = AtomsLoader(valid_data, args.batch_size) # Make the model mean, std = train_loader.get_statistics('delta', divide_by_atoms=args.atomwise) model = build_fn(atom_features=args.atom_features, message_steps=args.num_messages, output_layers=args.output_layers, reduce_fn=args.readout_fn, atomwise=args.atomwise, mean=mean['delta'], std=std['delta'])
def run( self, property, posinp=None, batch_size=128, ): r""" Central method to use when making a calculation with the calculator. Parameters ---------- property : str Property to be predicted by the calculator posinp : Posinp Atomic configuration to pass to the model batch_size : int Batch sizes. Default is 128. Returns ------- predictions : :class:`numpy.ndarray` Corresponding prediction by the model. """ init_property, out_name, derivative, wrt = get_derivative_names( property, self.available_properties) if abs(derivative) >= 1: self.model.output_modules[0].create_graph = True if len(posinp) > 1 and derivative: batch_size = 1 data = [posinp_to_ase_atoms(pos) for pos in posinp] pbc = True if any(pos.pbc.any() for pos in data) else False environment_provider = (AseEnvironmentProvider( cutoff=self.cutoff) if pbc else SimpleEnvironmentProvider()) data = SchnetPackData( data=data, environment_provider=environment_provider, collect_triples=self.model_type == "wacsf", ) data_loader = AtomsLoader(data, batch_size=batch_size) pred = [] if derivative == 0: if self.model.output_modules[0].derivative is not None: for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} pred.append(self.model(batch)) else: with torch.no_grad(): for batch in data_loader: batch = { k: v.to(self.device) for k, v in batch.items() } pred.append(self.model(batch)) if abs(derivative) == 1: for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} batch[wrt[0]].requires_grad_() results = self.model(batch) deriv1 = torch.unsqueeze( torch_derivative(results[init_property], batch[wrt[0]]), 0) if derivative < 0: deriv1 = -1.0 * deriv1 pred.append({out_name: deriv1}) if abs(derivative) == 2: for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} for inp in set(wrt): batch[inp].requires_grad_() results = self.model(batch) deriv2 = torch.unsqueeze( torch_derivative( torch_derivative( results[init_property], batch[wrt[0]], create_graph=True, ), batch[wrt[0]], ), 0, ) if derivative < 0: deriv2 = -1.0 * deriv2 pred.append({out_name: deriv2}) predictions = {} if self.md: for p in ["energy", "forces"]: predictions[p] = np.concatenate( [batch[p].cpu().detach().numpy() for batch in pred]) else: if derivative: predictions[property] = np.concatenate( [batch[out_name].cpu().detach().numpy() for batch in pred]) else: predictions[property] = np.concatenate([ batch[init_property].cpu().detach().numpy() for batch in pred ]) return predictions
def run( self, property, posinp=None, batch_size=1, ): r""" Central method to use when making a calculation with the calculator. Parameters ---------- property : str Property to be predicted by the calculator posinp : Posinp Atomic configuration to pass to the model Returns ------- predictions : :class:`numpy.ndarray` Corresponding prediction by the model. """ # Initial setup assert ( len(posinp) == 1 ), "Use the PatchSPCalculator for one configuration at a time." atoms = posinp_to_ase_atoms(posinp[0]) if property == "hessian" and any(self.subgrid == 2): raise warnings.warn( """ The hessian matrix can have some bad values with a grid of size 2 because the same atom can be copied multiple times in the buffers of the same subcell. Use a larger grid. """ ) init_property, out_name, derivative, wrt = get_derivative_names( property, self.available_properties ) if abs(derivative) >= 1: self.model.output_modules[0].create_graph = True pbc = True if atoms.pbc.any() else False environment_provider = ( AseEnvironmentProvider(cutoff=self.cutoff) if pbc else SimpleEnvironmentProvider() ) # Split the configuration according to the subgrid at_to_patches = AtomsToPatches( cutoff=self.cutoff, n_interaction=self.n_interaction, grid=self.subgrid ) ( subcells, subcells_main_idx, original_cell_idx, complete_subcell_copy_idx, ) = at_to_patches.split_atoms(atoms) # Pass each subcell independantly results = [] for subcell in subcells: data = SchnetPackData( data=[subcell], environment_provider=environment_provider, collect_triples=self.model_type == "wacsf", ) data_loader = AtomsLoader(data, batch_size=1) if derivative == 0: if self.model.output_modules[0].derivative is not None: for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} results.append(self.model(batch)) else: with torch.no_grad(): for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} results.append(self.model(batch)) if abs(derivative) == 1: for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} batch[wrt[0]].requires_grad_() forward_results = self.model(batch) deriv1 = torch_derivative( forward_results[init_property], batch[wrt[0]] ) if derivative < 0: deriv1 = -1.0 * deriv1 results.append({out_name: deriv1}) if abs(derivative) == 2: raise NotImplementedError() predictions = {} if property == "energy": predictions["energy"] = np.sum( [ patch["individual_energy"][subcells_main_idx[i]] .detach() .cpu() .numpy() for i, patch in enumerate(results) ] ) elif property == "forces": forces = np.zeros((len(atoms), 3)) for i in range(len(results)): forces[original_cell_idx[i]] = ( results[i]["forces"] .detach() .squeeze() .cpu() .numpy()[subcells_main_idx[i]] ) predictions["forces"] = forces elif property == "hessian": hessian = np.zeros((3 * len(atoms), 3 * len(atoms))) for i in range(len(results)): ( hessian_original_cell_idx_0, hessian_original_cell_idx_1, ) = prepare_hessian_indices( original_cell_idx[i], complete_subcell_copy_idx[i] ) ( hessian_subcells_main_idx_0, hessian_subcells_main_idx_1, ) = prepare_hessian_indices( subcells_main_idx[i], np.arange(0, len(complete_subcell_copy_idx[i])), ) hessian[hessian_original_cell_idx_0, hessian_original_cell_idx_1] = ( results[i]["hessian"] .detach() .squeeze() .cpu() .numpy()[hessian_subcells_main_idx_0, hessian_subcells_main_idx_1] ) predictions["hessian"] = hessian else: raise NotImplementedError() return predictions
def predict( modelpath, posinp, name=None, device="cpu", disk_out=True, batch_size=128, overwrite=False, return_values=False, ): if overwrite: to_remove = [dat for dat in os.listdir() if dat.endswith(".db")] for f in to_remove: os.remove(f) model = load_model(modelpath, device=device) if "representation.cutoff.cutoff" in model.state_dict().keys(): model_type = "wacsf" cutoff = float(model.state_dict()["representation.cutoff.cutoff"]) elif any(name in model.state_dict().keys() for name in [ "module.representation.embedding.weight", "representation.embedding.weight", ]): model_type = "schnet" try: cutoff = float( model.state_dict() ["module.representation.interactions.0.cutoff_network.cutoff"]) except KeyError: cutoff = float( model.state_dict() ["representation.interactions.0.cutoff_network.cutoff"]) else: raise NotImplementedError("Model type is not recognized.") if isinstance(posinp, str): if posinp.endswith(".xyz"): name = posinp.split("/")[-1].strip(".xyz") pos = mybigdft.Posinp.from_file(posinp) pbc = False if pos.boundary_conditions == "free" else True data = [pos] elif posinp.endswith(".db"): name = posinp.split("/")[-1].strip(".db") data = connect(posinp) pbc = True if any(row["pbc"].any() for row in data.select()) else False else: raise NotImplementedError("File format not supported.") elif isinstance(posinp, list): if name is None or name == "": name = "structures" if all([isinstance(pos, mybigdft.Posinp) for pos in posinp]): data = [sim.mb_posinp_to_ase_atoms(pos) for pos in posinp] else: raise TypeError( "Posinp should be a list of exclusively mybigdft.Posinp instances." ) pbc = True if any(pos.pbc.any() for pos in data) else False else: raise TypeError(""" Positions should be given either as a path to a file or database, or as a list of mybigdft.Posinp instances. """) environment_provider = (AseEnvironmentProvider( cutoff=cutoff) if pbc else SimpleEnvironmentProvider()) data = BigdftAtomsData( data=data, environment_provider=environment_provider, collect_triples=model_type == "wacsf", ) data_loader = AtomsLoader(data, batch_size=batch_size) with torch.no_grad(): pred = [] for batch in data_loader: batch = {k: v.to(device) for k, v in batch.items()} pred.append(model(batch)) predictions = {"idx": np.arange(1, len(data) + 1)} for property in list(pred[0].keys()): predictions[property] = np.concatenate( [p[property].cpu().numpy() for p in pred]) if disk_out: outfile = name + ".out" with open(outfile, "w") as file: wr = csv.writer(file) wr.writerow(list(predictions.keys())) wr.writerows( zip(*[ predictions[property] for property in list(predictions.keys()) ])) if return_values: return predictions
def qm9_test_loader(qm9_splits, batch_size, shuffle): return AtomsLoader(qm9_splits[2], batch_size=batch_size, shuffle=shuffle)
def qm9_val_loader(qm9_splits, batch_size, shuffle): return AtomsLoader(qm9_splits[1], batch_size=batch_size, shuffle=shuffle)
def qm9_train_loader(qm9_splits, batch_size, shuffle): return AtomsLoader(qm9_splits[0], batch_size=batch_size, shuffle=shuffle)