def _refine_structures(structures, atomnos, calculator, method, procs, loadstring=''): ''' Refine a set of structures. ''' energies = [] for i, conformer in enumerate(deepcopy(structures)): loadbar(i, len(structures), f'{loadstring} {i+1}/{len(structures)} ') opt_coords, energy, success = optimize(conformer, atomnos, calculator, method=method, procs=procs) if success: structures[i] = opt_coords energies.append(energy) else: energies.append(np.inf) loadbar(len(structures), len(structures), f'{loadstring} {len(structures)}/{len(structures)} ') # optimize the generated conformers return structures, energies
def csearch_operator(filename, embedder): ''' ''' embedder.log(f'--> Performing conformational search on {filename}') t_start = time.perf_counter() data = read_xyz(filename) if len(data.atomcoords) > 1: embedder.log( f'Requested conformational search on multimolecular file - will do\n' + 'an individual search from each conformer (might be time-consuming).' ) calc, method, procs = _get_lowest_calc(embedder) conformers = [] for i, coords in enumerate(data.atomcoords): opt_coords = optimize( coords, data.atomnos, calculator=calc, method=method, procs=procs)[0] if embedder.options.optimization else coords # optimize starting structure before running csearch conf_batch = clustered_csearch(opt_coords, data.atomnos, title=f'{filename}, conformer {i+1}', logfunction=embedder.log) # generate the most diverse conformers starting from optimized geometry conformers.append(conf_batch) conformers = np.array(conformers) batch_size = conformers.shape[1] conformers = conformers.reshape(-1, data.atomnos.shape[0], 3) # merging structures from each run in a single array if embedder.embed is not None: embedder.log( f'\nSelected the most diverse {batch_size} out of {conformers.shape[0]} conformers for {filename} ({time_to_string(time.perf_counter()-t_start)})' ) conformers = most_diverse_conformers(batch_size, conformers, data.atomnos) confname = filename[:-4] + '_confs.xyz' with open(confname, 'w') as f: for i, conformer in enumerate(conformers): write_xyz(conformer, data.atomnos, f, title=f'Generated conformer {i}') # if len(conformers) > 10 and not embedder.options.let: # s += f' Will use only the best 10 conformers for TSCoDe embed.' # embedder.log(s) embedder.log('\n') return confname
def clustered_csearch(coords, atomnos, constrained_indexes=None, keep_hb=False, ff_opt=False, n=10, mode=1, calc=None, method=None, title='test', logfunction=print): ''' n: number of structures to keep from each torsion cluster mode: 0 - keep the n lowest energy conformers 1 - keep the n most diverse conformers keep_hb: whether to preserve the presence of current hydrogen bonds or not ''' assert mode == 1 or ff_opt, 'Either leave mode=1 or turn on force field optimization' assert mode in (0, 1), 'The mode keyword can only be 0 or 1' calc = FF_CALC if calc is None else calc method = DEFAULT_FF_LEVELS[calc] if method is None else method # Set default calculator attributes if user did not specify them constrained_indexes = np.array( []) if constrained_indexes is None else constrained_indexes t_start_run = time.perf_counter() graph = graphize(coords, atomnos) for i1, i2 in constrained_indexes: graph.add_edge(i1, i2) # build a molecular graph of the TS # that includes constrained indexes pairs if keep_hb and len(list(nx.connected_components(graph))) > 1: hydrogen_bonds = _get_hydrogen_bonds(coords, atomnos, graph) for hb in hydrogen_bonds: graph.add_edge(*hb) else: hydrogen_bonds = () # get informations on the intra/intermolecular hydrogen # bonds that we should avoid disrupting double_bonds = get_double_bonds_indexes(coords, atomnos) # get all double bonds - do not rotate these torsions = _get_torsions(graph, hydrogen_bonds, double_bonds) # get all torsions that we should explore for t in torsions: t.sort_torsion(graph, constrained_indexes) # sort torsion indexes so that first index of each torsion # is the half that will move and is external to the TS if not torsions: logfunction(f'No rotable bonds found for {title}.') return np.array([coords]) grouped_torsions = _group_torsions(coords, torsions, max_size=3 if ff_opt else 8) ############################################## LOG TORSIONS # with open('n_fold_log.txt', 'w') as f: # for t in torsions: # f.write(f'{t.torsion} - {t.n_fold}-fold\n') logfunction(f'Torsion list: (indexes : n-fold)') for t in torsions: logfunction(' - {:21s} : {}-fold'.format(str(t.torsion), t.n_fold)) _write_torsion_vmd(coords, atomnos, constrained_indexes, grouped_torsions) ############################################## LOG TORSIONS logfunction( f'\n--> Clustered CSearch - mode {mode} ({"stability" if mode == 0 else "diversity"}) - ' + f'{len(torsions)} torsions in {len(grouped_torsions)} group{"s" if len(grouped_torsions) != 1 else ""} - ' + f'{[len(t) for t in grouped_torsions]}') output_structures = [] starting_points = [coords] for tg, torsions_group in enumerate(grouped_torsions): logfunction() angles = cartesian_product(*[t.get_angles() for t in torsions_group]) candidates = len(angles) * len(starting_points) new_structures = [] for s, sp in enumerate(starting_points): for a, angle_set in enumerate(angles): new_coords = deepcopy(sp) for t, torsion in enumerate(torsions_group): angle = angle_set[t] if angle != 0: mask = _get_rotation_mask(graph, torsion.torsion) new_coords = rotate_dihedral(new_coords, torsion.torsion, angle, mask=mask) new_structures.append(new_coords) mask = np.zeros(len(new_structures), dtype=bool) new_structures = np.array(new_structures) for s, structure in enumerate(new_structures): mask[s] = compenetration_check(structure) new_structures = new_structures[mask] for_comp = np.count_nonzero(~mask) logfunction( f'> Group {tg+1}/{len(grouped_torsions)} - {len(torsions_group)} bonds, ' + f'{[t.n_fold for t in torsions_group]} n-folds, {len(starting_points)} ' + f'starting point{"s" if len(starting_points) > 1 else ""} = {candidates} conformers' ) logfunction( f' {candidates} generated, {for_comp} removed for compenetration ({len(new_structures)} left)' ) energies = None if ff_opt: t_start = time.perf_counter() energies = np.zeros(new_structures.shape[0]) for c, new_coords in enumerate(deepcopy(new_structures)): opt_coords, energy, success = optimize( new_coords, atomnos, calc, method=method, constrained_indexes=constrained_indexes) if success: new_structures[c] = opt_coords energies[c] = energy else: energies[c] = np.inf logfunction( f'Optimized {len(new_structures)} structures at {method} level ({time_to_string(time.perf_counter()-t_start)})' ) if tg + 1 != len(grouped_torsions): if n is not None and len(new_structures) > n: if mode == 0: new_structures, energies = zip(*sorted( zip(new_structures, energies), key=lambda x: x[1])) new_structures = new_structures[0:n] tag = 'stable' if mode == 1: new_structures = most_diverse_conformers( n, new_structures, atomnos, energies) tag = 'diverse' logfunction( f' Kept the most {tag} {len(new_structures)} starting points for next rotation cluster' ) output_structures.extend(new_structures) starting_points = new_structures output_structures = np.array(output_structures) n_out = sum([t.n_fold for t in torsions]) if len(new_structures) > n_out: if mode == 0: output_structures, energies = zip( *sorted(zip(output_structures, energies), key=lambda x: x[1])) output_structures = output_structures[0:n_out] output_structures = np.array(output_structures) if mode == 1: output_structures = most_diverse_conformers( n_out, output_structures, atomnos, energies) logfunction( f' Selected the {"best" if mode == 0 else "most diverse"} {len(output_structures)} new structures ({time_to_string(time.perf_counter()-t_start_run)})' ) return output_structures
def dihedral_embed(embedder): ''' ''' from tscode.atropisomer_module import ase_torsion_TSs mol = embedder.objects[0] embedder.structures, embedder.energies = [], [] embedder.log( f'\n--> {mol.name} - performing a scan of dihedral angle with indices {mol.reactive_indexes}\n' ) for c, coords in enumerate(mol.atomcoords): embedder.log( f'\n--> Pre-optimizing input structure{"s" if len(mol.atomcoords) > 1 else ""} ' f'({embedder.options.theory_level} via {embedder.options.calculator})' ) embedder.log( f'--> Performing relaxed scans (conformer {c+1}/{len(mol.atomcoords)})' ) new_coords, ground_energy, success = optimize( coords, mol.atomnos, embedder.options.calculator, method=embedder.options.theory_level, procs=embedder.options.procs, solvent=embedder.options.solvent) if not success: embedder.log(f'Pre-optimization failed - Skipped conformer {c+1}', p=False) continue structures, energies = ase_torsion_TSs( embedder, new_coords, mol.atomnos, mol.reactive_indexes, threshold_kcal=embedder.options.kcal_thresh, title=mol.rootname + f'_conf_{c+1}', optimization=embedder.options.optimization, logfile=embedder.logfile, bernytraj=mol.rootname + '_berny' if embedder.options.debug else None, plot=True) for structure, energy in zip(structures, energies): embedder.structures.append(structure) embedder.energies.append(energy - ground_energy) embedder.structures = np.array(embedder.structures) embedder.energies = np.array(embedder.energies) if len(embedder.structures) == 0: s = ( '\n--> Dihedral embed did not find any suitable maxima above the set threshold\n' f' ({embedder.options.kcal_thresh} kcal/mol) during the scan procedure. Observe the\n' ' generated energy plot and try lowering the threshold value (KCAL keyword).' ) embedder.log(s) raise ZeroCandidatesError() embedder.atomnos = mol.atomnos embedder.similarity_refining() embedder.write_structures('TS_guesses', indexes=embedder.objects[0].reactive_indexes, relative=False, extra='(barrier height)') embedder.write_vmd(indexes=embedder.objects[0].reactive_indexes) embedder.normal_termination()
def optimization_refining(self): ''' Refines structures by constrained optimizations with the active calculator, discarding similar ones and scrambled ones. ''' t_start = time.perf_counter() self.log( f'--> Structure optimization ({self.options.theory_level} level via {self.options.calculator})' ) if self.options.calculator == 'MOPAC': method = f'{self.options.theory_level} GEO-OK CYCLES=500' else: method = f'{self.options.theory_level}' for i, structure in enumerate(deepcopy(self.structures)): loadbar( i, len(self.structures), prefix=f'Optimizing structure {i+1}/{len(self.structures)} ') try: t_start_opt = time.perf_counter() new_structure, self.energies[i], self.exit_status[ i] = optimize( structure, self.atomnos, self.options.calculator, method=method, constrained_indexes=self.constrained_indexes[i], mols_graphs=self.graphs, procs=self.options.procs, max_newbonds=self.options.max_newbonds, check=(self.embed != 'prune')) if self.exit_status[i]: self.structures[i] = new_structure exit_str = 'CONVERGED' if self.exit_status[i] else 'SCRAMBLED' except MopacReadError: # ase will throw a ValueError if the output lacks a space in the "FINAL POINTS AND DERIVATIVES" table. # This occurs when one or more of them is not defined, that is when the calculation did not end well. # The easiest solution is to reject the structure and go on. self.energies[i] = np.inf self.exit_status[i] = False exit_str = 'FAILED TO READ FILE' except Exception as e: raise e self.log(( f' - {self.options.calculator} {self.options.theory_level} optimization: Structure {i+1} {exit_str} - ' f'took {time_to_string(time.perf_counter()-t_start_opt)}'), p=False) loadbar( 1, 1, prefix= f'Optimizing structure {len(self.structures)}/{len(self.structures)} ' ) self.log( f'Successfully optimized {len([b for b in self.exit_status if b])}/{len(self.structures)} structures. Non-optimized ones will not be discarded.' ) self.log(( f'{self.options.calculator} {self.options.theory_level} optimization took ' f'{time_to_string(time.perf_counter()-t_start)} (~{time_to_string((time.perf_counter()-t_start)/len(self.structures))} per structure)' )) ################################################# PRUNING: SIMILARITY (POST SEMIEMPIRICAL OPT) self.zero_candidates_check() self.similarity_refining() ################################################# REFINING: BONDING DISTANCES if self.embed != 'prune': self.write_structures('TS_guesses_unrefined', energies=False, p=False) self.log( f'--> Checkpoint output - Updated {len(self.structures)} TS structures before distance refinement.\n' ) self.log( f'--> Refining bonding distances for TSs ({self.options.theory_level} level)' ) if self.options.ff_opt: try: os.remove(f'TSCoDe_checkpoint_{self.stamp}.xyz') # We don't need the pre-optimized structures anymore except FileNotFoundError: pass self._set_target_distances() t_start = time.perf_counter() for i, structure in enumerate(deepcopy(self.structures)): loadbar( i, len(self.structures), prefix=f'Refining structure {i+1}/{len(self.structures)} ') try: traj = f'refine_{i}.traj' if self.options.debug else None new_structure, new_energy, self.exit_status[ i] = ase_adjust_spacings(self, structure, self.atomnos, self.constrained_indexes[i], title=i, traj=traj) if self.exit_status[i]: self.structures[i] = new_structure self.energies[i] = new_energy except ValueError as e: # ase will throw a ValueError if the output lacks a space in the "FINAL POINTS AND DERIVATIVES" table. # This occurs when one or more of them is not defined, that is when the calculation did not end well. # The easiest solution is to reject the structure and go on. self.log(repr(e)) self.log( f'Failed to read MOPAC file for Structure {i+1}, skipping distance refinement', p=False) loadbar(1, 1, prefix=f'Refining structure {i+1}/{len(self.structures)} ') t_end = time.perf_counter() self.log( f'{self.options.calculator} {self.options.theory_level} refinement took {time_to_string(t_end-t_start)} (~{time_to_string((t_end-t_start)/len(self.structures))} per structure)' ) before = len(self.structures) if self.options.only_refined: mask = self.exit_status self.apply_mask(('structures', 'energies', 'exit_status', 'constrained_indexes'), mask) s = f'Discarded {len([i for i in mask if not i])} unrefined structures.' else: s = 'Non-refined ones will not be discarded.' self.log( f'Successfully refined {len([i for i in self.exit_status if i])}/{before} structures. {s}' ) ################################################# PRUNING: SIMILARITY (POST REFINEMENT) self.zero_candidates_check() self.similarity_refining() ################################################# PRUNING: FITNESS self.fitness_refining() ################################################# PRUNING: ENERGY self.energies = self.energies - np.min(self.energies) _, sequence = zip(*sorted( zip(self.energies, range(len(self.energies))), key=lambda x: x[0])) self.energies = self.scramble(self.energies, sequence) self.structures = self.scramble(self.structures, sequence) self.constrained_indexes = self.scramble(self.constrained_indexes, sequence) # sorting structures based on energy if self.options.kcal_thresh is not None: mask = (self.energies - np.min(self.energies)) < self.options.kcal_thresh self.apply_mask(('structures', 'energies', 'exit_status'), mask) if False in mask: self.log( f'Discarded {len([b for b in mask if not b])} candidates for energy (Threshold set to {self.options.kcal_thresh} kcal/mol)' ) ################################################# XYZ GUESSES OUTPUT self.outname = f'TSCoDe_TS_guesses_{self.stamp}.xyz' with open(self.outname, 'w') as f: for i, structure in enumerate( align_structures(self.structures, self.constrained_indexes[0])): kind = 'REFINED - ' if self.exit_status[i] else 'NOT REFINED - ' write_xyz( structure, self.atomnos, f, title= f'Structure {i+1} - {kind}Rel. E. = {round(self.energies[i], 3)} kcal/mol' ) try: os.remove(f'TSCoDe_TS_guesses_unrefined_{self.stamp}.xyz') # since we have the refined structures, we can get rid of the unrefined ones except FileNotFoundError: pass self.log( f'Wrote {len(self.structures)} rough TS structures to {self.outname} file.\n' )
def force_field_refining(self): ''' Performs structural optimizations with the embedder force field caculator. Only structures that do not scramble during FF optimization are updated, while the rest are kept as they are. ''' ################################################# CHECKPOINT BEFORE FF OPTIMIZATION self.outname = f'TSCoDe_checkpoint_{self.stamp}.xyz' with open(self.outname, 'w') as f: for i, structure in enumerate( align_structures(self.structures, self.constrained_indexes[0])): write_xyz( structure, self.atomnos, f, title= f'TS candidate {i+1} - Checkpoint before FF optimization') self.log( f'\n--> Checkpoint output - Wrote {len(self.structures)} TS structures to {self.outname} file before FF optimization.\n' ) ################################################# GEOMETRY OPTIMIZATION - FORCE FIELD self.log( f'--> Structure optimization ({self.options.ff_level} level via {self.options.ff_calc})' ) t_start = time.perf_counter() for i, structure in enumerate(deepcopy(self.structures)): loadbar( i, len(self.structures), prefix=f'Optimizing structure {i+1}/{len(self.structures)} ') try: new_structure, _, self.exit_status[i] = optimize( structure, self.atomnos, self.options.ff_calc, method=self.options.ff_level, constrained_indexes=self.constrained_indexes[i], mols_graphs=self.graphs, check=(self.embed != 'prune')) if self.exit_status[i]: self.structures[i] = new_structure except Exception as e: raise e loadbar( 1, 1, prefix= f'Optimizing structure {len(self.structures)}/{len(self.structures)} ' ) t_end = time.perf_counter() self.log( f'Force Field {self.options.ff_level} optimization took {time_to_string(t_end-t_start)} (~{time_to_string((t_end-t_start)/len(self.structures))} per structure)' ) ################################################# EXIT STATUS self.log( f'Successfully pre-refined {len([b for b in self.exit_status if b])}/{len(self.structures)} candidates at {self.options.ff_level} level.' ) ################################################# PRUNING: SIMILARITY (POST FORCE FIELD OPT) self.zero_candidates_check() self.similarity_refining() ################################################# CHECKPOINT BEFORE OPTIMIZATION with open(self.outname, 'w') as f: for i, structure in enumerate( align_structures(self.structures, self.constrained_indexes[0])): exit_str = f'{self.options.ff_level} REFINED' if self.exit_status[ i] else 'RAW' write_xyz( structure, self.atomnos, f, title= f'TS candidate {i+1} - {exit_str} - Checkpoint before {self.options.calculator} optimization' ) self.log( f'--> Checkpoint output - Updated {len(self.structures)} TS structures to {self.outname} file before {self.options.calculator} optimization.\n' )