Пример #1
0
    def write_structures(self,
                         tag,
                         indexes=None,
                         energies=True,
                         relative=True,
                         extra='',
                         p=True):
        '''
        '''

        if indexes is None:
            indexes = self.constrained_indexes[0]

        if energies:
            rel_e = self.energies

            if relative:
                rel_e -= np.min(self.energies)

        self.outname = f'TSCoDe_{tag}_{self.stamp}.xyz'
        with open(self.outname, 'w') as f:
            for i, structure in enumerate(
                    align_structures(self.structures, indexes)):
                title = f'TS candidate {i+1} - {tag}'

                if energies:
                    title += f' - Rel. E. = {round(rel_e[i], 3)} kcal/mol '

                title += extra

                write_xyz(structure, self.atomnos, f, title=title)

        if p:
            self.log(
                f'Wrote {len(self.structures)} {tag} TS structures to {self.outname} file.\n'
            )
Пример #2
0
def ase_torsion_TSs(embedder,
                    coords,
                    atomnos,
                    indexes,
                    threshold_kcal=5,
                    title='temp',
                    optimization=True,
                    logfile=None,
                    bernytraj=None,
                    plot=False):
    '''
    '''
    
    assert len(indexes) == 4
    # cyclical = False
    
    ts_structures, energies = [], []

    graph = graphize(coords, atomnos)
    i1, i2, i3, i4 = indexes

    if all([len(shortest_path(graph, start, end)) == 2 for start, end in zip(indexes[0:-1], indexes[1:])]):
        graph.remove_edge(i2, i3)
        subgraphs = connected_components(graph)

        for subgraph in subgraphs:
            if i3 in subgraph:
                indexes_to_be_moved = subgraph - {i3}
                break

        if i1 in indexes_to_be_moved:

            # cyclical = True
            indexes_to_be_moved = [i4]
            # if molecule is cyclical, just move the fourth atom and
            # let the rest of the structure relax

            s = 'The specified dihedral angle is comprised within a cycle. Switching to safe dihedral scan (moving only last index).'
            print(s)
            if logfile is not None:
                logfile.write(s+'\n')

    else:

        if not embedder.options.let:
            raise SystemExit('The specified dihedral angle is made up of non-contiguous atoms. To prevent errors, the\n' +
                             'run has been stopped. Override this behavior with the LET keyword.')

        # if user did not provide four contiguous indexes,
        # and did that on purpose, just move the fourth atom and
        # let the rest of the structure relax
        indexes_to_be_moved = [i4]
        # cyclical = True

        s = 'The specified dihedral angle is made up of non-contiguous atoms.\nThis might cause some unexpected results.'
        print(s)
        if logfile is not None:
            logfile.write(s+'\n')


    # routine = ((10, 18, '_clockwise'), (-10, 18, '_counterclockwise')) if cyclical else ((10, 36, ''),)
    routine = ((10, 36, '_clockwise'), (-10, 36, '_counterclockwise'))


    for degrees, steps, direction in routine:

        print()
        if logfile is not None:
            logfile.write('\n')

        structures, energies = ase_scan(embedder,
                                        coords,
                                        atomnos,
                                        indexes=indexes,
                                        degrees=degrees,
                                        steps=steps,
                                        relaxed=optimization,
                                        indexes_to_be_moved=indexes_to_be_moved,
                                        title='Preliminary scan' + ((' (clockwise)' if direction == '_clockwise' \
                                              else ' (counterclockwise)') if direction != '' else ''),
                                        logfile=logfile)

        min_e = min(energies)
        rel_energies = [e-min_e for e in energies]

        tag = '_relaxed' if optimization else '_rigid'
        
        with open(title + tag + direction + '_scan.xyz', 'w') as outfile:
            for s, structure in enumerate(align_structures(np.array(structures), indexes[:-1])):
                write_xyz(structure, atomnos, outfile, title=f'Scan point {s+1}/{len(structures)} - Rel. E = {round(rel_energies[s], 3)} kcal/mol')

        if plot:
            import pickle

            import matplotlib.pyplot as plt

            fig = plt.figure()

            x1 = [dihedral(structure[indexes]) for structure in structures]
            y1 = [e-min_e for e in energies]

            for i, (x_, y_) in enumerate(get_plot_segments(x1, y1, max_step=abs(degrees)+1)):

                plt.plot(x_,
                        y_,
                        '-',
                        color='tab:blue',
                        label=('Preliminary SCAN'+direction) if i == 0 else None,
                        linewidth=3,
                        alpha=0.50)

        peaks_indexes = atropisomer_peaks(energies, min_thr=min_e+threshold_kcal, max_thr=min_e+75)

        if peaks_indexes:

            s = 's' if len(peaks_indexes) > 1 else ''
            print(f'Found {len(peaks_indexes)} peak{s}. Performing accurate scan{s}.\n')
            if logfile is not None:
                logfile.write(f'Found {len(peaks_indexes)} peak{s}. Performing accurate scan{s}.\n\n')


            for p, peak in enumerate(peaks_indexes):

                sub_structures, sub_energies = ase_scan(embedder,
                                                        structures[peak-1],
                                                        atomnos,
                                                        indexes=indexes,
                                                        degrees=degrees/10, #1° or -1°
                                                        steps=20,
                                                        relaxed=optimization,
                                                        ad_libitum=True, # goes on until the hill is crossed
                                                        indexes_to_be_moved=indexes_to_be_moved,
                                                        title=f'Accurate scan {p+1}/{len(peaks_indexes)}',
                                                        logfile=logfile)

                if logfile is not None:
                    logfile.write('\n')

                if plot:
                    x2 = [dihedral(structure[indexes]) for structure in sub_structures]
                    y2 = [e-min_e for e in sub_energies]

                    for i, (x_, y_) in enumerate(get_plot_segments(x2, y2, max_step=abs(degrees/10)+1)):

                        plt.plot(x_, 
                                y_,
                                '-o',
                                color='tab:red',
                                label='Accurate SCAN' if (p == 0 and i == 0) else None,
                                markersize=1,
                                linewidth=2,
                                alpha=0.5)

                sub_peaks_indexes = atropisomer_peaks(sub_energies, min_thr=threshold_kcal+min_e, max_thr=min_e+75)

                if sub_peaks_indexes:

                    s = 's' if len(sub_peaks_indexes) > 1 else ''
                    msg = f'Found {len(sub_peaks_indexes)} sub-peak{s}.'
                    
                    if embedder.options.saddle or embedder.options.neb:
                        if embedder.options.saddle:
                            tag = 'saddle'
                        else:
                            tag = 'NEB TS'

                        msg += f'Performing {tag} optimization{s}.'

                    print(msg)

                    if logfile is not None:
                        logfile.write(s+'\n')

                    for s, sub_peak in enumerate(sub_peaks_indexes):

                        if plot:
                            x = dihedral(sub_structures[sub_peak][indexes])
                            y = sub_energies[sub_peak]-min_e
                            plt.plot(x, y, color='gold', marker='o', label='Maxima' if p == 0 else None, markersize=3)

                        if embedder.options.saddle:

                            loadbar_title = f'  > Saddle opt on sub-peak {s+1}/{len(sub_peaks_indexes)}'
                            # loadbar(s+1, len(sub_peaks_indexes), loadbar_title+' '*(29-len(loadbar_title)))
                            print(loadbar_title)
                        
                            optimized_geom, energy, _ = ase_saddle(embedder,
                                                                    sub_structures[sub_peak],
                                                                    atomnos,
                                                                    title=f'Saddle opt - peak {p+1}, sub-peak {s+1}',
                                                                    logfile=logfile,
                                                                    traj=bernytraj+f'_{p+1}_{s+1}.traj' if bernytraj is not None else None)

                            if molecule_check(coords, optimized_geom, atomnos):
                                ts_structures.append(optimized_geom)
                                energies.append(energy)

                        elif embedder.options.neb:

                            loadbar_title = f'  > NEB TS opt on sub-peak {s+1}/{len(sub_peaks_indexes)}, {direction[1:]}'
                            drctn = 'clkws' if direction == '_clockwise' else 'ccws'
                            
                            print(loadbar_title)
                        
                            optimized_geom, energy, success = ase_neb(embedder,
                                                                        sub_structures[sub_peak-2],
                                                                        sub_structures[(sub_peak+1)%len(sub_structures)],
                                                                        atomnos,
                                                                        n_images=5,
                                                                        title=f'{title}_NEB_peak_{p+1}_sub-peak_{s+1}_{drctn}',
                                                                        logfunction=embedder.log)

                            if success and molecule_check(coords, optimized_geom, atomnos):
                                ts_structures.append(optimized_geom)
                                energies.append(energy)

                        else:
                            ts_structures.append(sub_structures[sub_peak])
                            energies.append(sub_energies[sub_peak])

                        print()
            
                else:
                    print('No suitable sub-peaks found.\n')
                    if logfile is not None:
                        logfile.write('No suitable sub-peaks found.\n\n')
        else:
            print('No suitable peaks found.\n')
            if logfile is not None:
                logfile.write('No suitable peaks found.\n\n')

        if plot:
            plt.legend()
            plt.xlabel(f'Dihedral Angle {tuple(indexes)}')
            plt.ylabel('Energy (kcal/mol)')
            pickle.dump(fig, open(f'{title}{direction}_plt.pickle', 'wb'))
            plt.savefig(f'{title}{direction}_plt.svg')

    ts_structures = np.array(ts_structures)

    clean_directory()

    return ts_structures, energies
Пример #3
0
def ase_scan(embedder,
            coords,
            atomnos,
            indexes,
            degrees=10,
            steps=36,
            relaxed=True,
            ad_libitum=False,
            indexes_to_be_moved=None,
            title='temp scan',
            logfile=None):
    '''
    if ad libitum, steps is the minimum number of performed steps
    '''
    assert len(indexes) == 4

    if ad_libitum:
        if not relaxed:
            raise Exception(f'The ad_libitum keyword is only available for relaxed scans.')

    atoms = Atoms(atomnos, positions=coords)
    structures, energies = [], []

    atoms.calc = get_ase_calc(embedder)

    if indexes_to_be_moved is None:
        indexes_to_be_moved = range(len(atomnos))

    mask = np.array([i in indexes_to_be_moved for i, _ in enumerate(atomnos)], dtype=bool)

    t_start = time()

    if logfile is not None:
        logfile.write(f'  > {title}\n')

    for scan_step in range(1000):

        loadbar_title = f'{title} - step {scan_step+1}'
        if ad_libitum:
            print(loadbar_title, end='\r')
        else:
            loadbar_title += '/'+str(steps)
            loadbar(scan_step+1, steps, loadbar_title+' '*(29-len(loadbar_title)))

        if logfile is not None:
            t_start_step = time()

        if relaxed:
            atoms.set_constraint(FixInternals(dihedrals_deg=[[atoms.get_dihedral(*indexes), indexes]]))
            
            with LBFGS(atoms, maxstep=0.2, logfile=None, trajectory=None) as opt:
                
                try:
                    opt.run(fmax=0.05, steps=500)
                    exit_str = 'converged'

                except ValueError: # Shake did not converge
                    exit_str = 'crashed'

                iterations = opt.nsteps


            energies.append(atoms.get_total_energy() * 23.06054194532933) # eV to kcal/mol

        if logfile is not None:
            elapsed = time() - t_start_step
            s = '/' + str(steps) if not ad_libitum else ''
            logfile.write(f'        Step {scan_step+1}{s} - {exit_str} - {iterations} iterations ({time_to_string(elapsed)})\n')

        structures.append(atoms.get_positions())

        atoms.rotate_dihedral(*indexes, angle=degrees, mask=mask)

        if exit_str == 'crashed':
            break

        elif scan_step+1 >= steps:
            if ad_libitum:
                if any((
                    (max(energies) - energies[-1]) > 1,
                    (max(energies) - energies[-1]) > max(energies)-energies[0],
                    (energies[-1] - min(energies)) > 50
                )):

                    # ad_libitum stops when one of these conditions is met:
                    # - we surpassed and are below the maximum of at least 1 kcal/mol
                    # - we surpassed maximum and are below starting point
                    # - current step energy is more than 50 kcal/mol above starting point

                    print(loadbar_title)
                    break
            else:
                break

    structures = np.array(structures)

    clean_directory()

    if logfile is not None:
        elapsed = time() - t_start
        logfile.write(f'{title} - completed ({time_to_string(elapsed)})\n')

    return align_structures(structures, indexes), energies
Пример #4
0
def most_diverse_conformers(n,
                            structures,
                            atomnos,
                            energies=None,
                            force_enantiomer_pruning=False):
    '''
    Return the n most diverse structures from the set.
    First removes similar structures, then divides them in n subsets and:
    - If the enrgy list is given, chooses the
      one with the lowest energy from each.
    - If it is not, picks the most diverse structures.
    '''

    # print('Removing similar structures...', end='\r')
    # for k in (5000, 2000, 1000, 500, 200, 100, 50, 20, 10, 5, 2, 1):
    #     if 5*k < len(structures):
    #         structures, mask = prune_conformers(structures, atomnos, max_rmsd=2, max_delta=2, k=k)
    #         if energies is not None:
    #             energies = energies[mask]
    # Remove similar structures based on RMSD and max deviation

    if len(structures) < 3000 or force_enantiomer_pruning:
        print(f'Removing enantiomers...{" "*10}', end='\r')
        structures, mask = prune_enantiomers(structures, atomnos)
        if energies is not None:
            energies = energies[mask]
        # Remove enantiomers or structures similar under reflections
        # Skip if structures are too many (avoids stumping)

    if len(structures) <= n:
        return structures
    # if we already pruned enough structures to meet the requirement, return them

    print(f'Aligning structures...{" "*10}', end='\r')
    structures = align_structures(structures)
    features = structures.reshape(
        (structures.shape[0], structures.shape[1] * structures.shape[2]))
    # reduce the dimensionality of the rest of the structure array to cluster them with KMeans

    kmeans = KMeans(n_clusters=n)
    kmeans.fit(features)
    # Generate and train the model

    if energies is not None:
        clusters = [[] for _ in range(n)]
        for coords, energy, c in zip(structures, energies, kmeans.labels_):
            clusters[c].append((coords, energy))

        output = []
        for group in clusters:
            sorted_s, _ = zip(*sorted(group, key=lambda x: x[1]))
            output.append(sorted_s[0])
    # if energies are given, pick the lowest energy structure from each cluster

    else:
        centers = kmeans.cluster_centers_.reshape((n, *structures.shape[1:3]))

        clusters = [[] for _ in range(n)]
        for coords, c in zip(structures, kmeans.labels_):
            clusters[c].append(coords)

        r = np.arange(len(clusters))
        output = []
        for cluster in clusters:
            cumdists = [
                np.sum(np.linalg.norm(centers[r != c] - ref, axis=2))
                for c, ref in enumerate(cluster)
            ]
            furthest = cluster[cumdists.index(max(cumdists))]
            output.append(furthest)
    # if not, from each cluster yield the structure that is more distant from the other clusters

    return np.array(output)
Пример #5
0
    def saddle_refining(self):
        '''
        Performs a first order saddle optimization for each structure.
        '''
        self.log(
            f'--> Saddle optimization ({self.options.theory_level} level)')
        t_start = time.perf_counter()

        for i, structure in enumerate(self.structures):

            loadbar(
                i,
                len(self.structures),
                prefix=f'Performing saddle opt {i+1}/{len(self.structures)} ')

            try:

                self.structures[i], self.energies[i], self.exit_status[
                    i] = ase_saddle(self,
                                    structure,
                                    self.atomnos,
                                    self.constrained_indexes[i],
                                    mols_graphs=self.graphs
                                    if self.embed != 'monomolecular' else None,
                                    title=f'Saddle opt - Structure {i+1}',
                                    logfile=self.logfile,
                                    traj=f'Saddle_opt_{i+1}.traj',
                                    maxiterations=200)

            except ValueError:
                # Thrown when an ASE read fails (during saddle opt)
                self.exit_status[i] = False

        loadbar(
            1,
            1,
            prefix=
            f'Performing saddle opt {len(self.structures)}/{len(self.structures)} '
        )
        t_end = time.perf_counter()
        self.log(
            f'{self.options.calculator} {self.options.theory_level} saddle optimization took {time_to_string(t_end-t_start)} ({time_to_string((t_end-t_start)/len(self.structures))} per structure)'
        )
        self.log(
            f'Saddle opt completed for {len([i for i in self.exit_status if i])}/{len(self.structures)} structures'
        )

        mask = self.exit_status

        self.apply_mask(('structures', 'energies', 'exit_status'), mask)

        ################################################# PRUNING: SIMILARITY (POST SADDLE OPT)

        if len(self.structures) != 0:

            t_start = time.perf_counter()
            self.structures, mask = prune_conformers(
                self.structures,
                self.atomnos,
                max_rmsd=self.options.pruning_thresh)
            self.apply_mask(('energies', 'exit_status'), mask)
            t_end = time.perf_counter()

            if False in mask:
                self.log(
                    f'Discarded {len([b for b in mask if not b])} candidates for similarity ({len([b for b in mask if b])} left, {time_to_string(t_end-t_start)})'
                )
            self.log()

            ################################################# SADDLE OPT EXTRA XYZ OUTPUT

            self.energies -= np.min(self.energies)
            _, sequence = zip(
                *sorted(zip(self.energies, range(len(self.energies))),
                        key=lambda x: x[0]))
            self.energies = scramble(self.energies, sequence)
            self.structures = scramble(self.structures, sequence)
            self.constrained_indexes = scramble(self.constrained_indexes,
                                                sequence)
            # sorting structures based on energy

            self.outname = f'TSCoDe_SADDLE_TSs_{self.stamp}.xyz'
            with open(self.outname, 'w') as f:
                for i, structure in enumerate(
                        align_structures(self.structures,
                                         self.constrained_indexes[0])):
                    write_xyz(
                        structure,
                        self.atomnos,
                        f,
                        title=
                        f'Structure {i+1} - TS - Rel. E. = {round(self.energies[i], 3)} kcal/mol'
                    )

            self.log(
                f'Wrote {len(self.structures)} saddle-optimized structures to {self.outname} file\n'
            )

        else:
            self.log()
Пример #6
0
    def hyperneb_refining(self):
        '''
        Performs a clibing-image NEB calculation inferring reagents and products for each structure.
        '''
        self.log(
            f'--> HyperNEB optimization ({self.options.theory_level} level)')
        t_start = time.perf_counter()

        for i, structure in enumerate(self.structures):

            loadbar(i,
                    len(self.structures),
                    prefix=f'Performing NEB {i+1}/{len(self.structures)} ')

            t_start_opt = time.perf_counter()

            try:

                self.structures[i], self.energies[i], self.exit_status[
                    i] = hyperNEB(self,
                                  structure,
                                  self.atomnos,
                                  self.ids,
                                  self.constrained_indexes[i],
                                  title=f'structure_{i+1}')

                exit_str = 'COMPLETED' if self.exit_status[i] else 'CRASHED'

            except (MopacReadError, ValueError):
                # Both are thrown if a MOPAC file read fails, but the former occurs when an internal (TSCoDe)
                # read fails (getting reagent or product), the latter when an ASE read fails (during NEB)
                exit_str = 'CRASHED'
                self.exit_status[i] = False

            t_end_opt = time.perf_counter()

            self.log(
                f'    - {self.options.calculator} {self.options.theory_level} NEB optimization: Structure {i+1} - {exit_str} - ({time_to_string(t_end_opt-t_start_opt)})',
                p=False)

        loadbar(
            1,
            1,
            prefix=
            f'Performing NEB {len(self.structures)}/{len(self.structures)} ')
        t_end = time.perf_counter()
        self.log(
            f'{self.options.calculator} {self.options.theory_level} NEB optimization took {time_to_string(t_end-t_start)} ({time_to_string((t_end-t_start)/len(self.structures))} per structure)'
        )
        self.log(
            f'NEB converged for {len([i for i in self.exit_status if i])}/{len(self.structures)} structures\n'
        )

        mask = self.exit_status
        self.apply_mask(('structures', 'energies', 'exit_status'), mask)

        ################################################# PRUNING: SIMILARITY (POST NEB)

        if len(self.structures) != 0:

            t_start = time.perf_counter()
            self.structures, mask = prune_conformers(
                self.structures,
                self.atomnos,
                max_rmsd=self.options.pruning_thresh)
            self.energies = self.energies[mask]
            t_end = time.perf_counter()

            if False in mask:
                self.log(
                    f'Discarded {len([b for b in mask if not b])} candidates for similarity ({len([b for b in mask if b])} left, {time_to_string(t_end-t_start)})'
                )
            self.log()

            ################################################# NEB XYZ OUTPUT

            self.energies -= np.min(self.energies)
            _, sequence = zip(
                *sorted(zip(self.energies, range(len(self.energies))),
                        key=lambda x: x[0]))
            self.energies = scramble(self.energies, sequence)
            self.structures = scramble(self.structures, sequence)
            self.constrained_indexes = scramble(self.constrained_indexes,
                                                sequence)
            # sorting structures based on energy

            self.outname = f'TSCoDe_NEB_TSs_{self.stamp}.xyz'
            with open(self.outname, 'w') as f:
                for i, structure in enumerate(
                        align_structures(self.structures,
                                         self.constrained_indexes[0])):
                    write_xyz(
                        structure,
                        self.atomnos,
                        f,
                        title=
                        f'Structure {i+1} - TS - Rel. E. = {round(self.energies[i], 3)} kcal/mol'
                    )

            self.log(
                f'Wrote {len(self.structures)} final TS structures to {self.outname} file\n'
            )
Пример #7
0
    def optimization_refining(self):
        '''
        Refines structures by constrained optimizations with the active calculator,
        discarding similar ones and scrambled ones.
        '''

        t_start = time.perf_counter()

        self.log(
            f'--> Structure optimization ({self.options.theory_level} level via {self.options.calculator})'
        )

        if self.options.calculator == 'MOPAC':
            method = f'{self.options.theory_level} GEO-OK CYCLES=500'

        else:
            method = f'{self.options.theory_level}'

        for i, structure in enumerate(deepcopy(self.structures)):
            loadbar(
                i,
                len(self.structures),
                prefix=f'Optimizing structure {i+1}/{len(self.structures)} ')
            try:
                t_start_opt = time.perf_counter()
                new_structure, self.energies[i], self.exit_status[
                    i] = optimize(
                        structure,
                        self.atomnos,
                        self.options.calculator,
                        method=method,
                        constrained_indexes=self.constrained_indexes[i],
                        mols_graphs=self.graphs,
                        procs=self.options.procs,
                        max_newbonds=self.options.max_newbonds,
                        check=(self.embed != 'prune'))

                if self.exit_status[i]:
                    self.structures[i] = new_structure

                exit_str = 'CONVERGED' if self.exit_status[i] else 'SCRAMBLED'

            except MopacReadError:
                # ase will throw a ValueError if the output lacks a space in the "FINAL POINTS AND DERIVATIVES" table.
                # This occurs when one or more of them is not defined, that is when the calculation did not end well.
                # The easiest solution is to reject the structure and go on.
                self.energies[i] = np.inf
                self.exit_status[i] = False
                exit_str = 'FAILED TO READ FILE'

            except Exception as e:
                raise e

            self.log((
                f'    - {self.options.calculator} {self.options.theory_level} optimization: Structure {i+1} {exit_str} - '
                f'took {time_to_string(time.perf_counter()-t_start_opt)}'),
                     p=False)

        loadbar(
            1,
            1,
            prefix=
            f'Optimizing structure {len(self.structures)}/{len(self.structures)} '
        )

        self.log(
            f'Successfully optimized {len([b for b in self.exit_status if b])}/{len(self.structures)} structures. Non-optimized ones will not be discarded.'
        )

        self.log((
            f'{self.options.calculator} {self.options.theory_level} optimization took '
            f'{time_to_string(time.perf_counter()-t_start)} (~{time_to_string((time.perf_counter()-t_start)/len(self.structures))} per structure)'
        ))

        ################################################# PRUNING: SIMILARITY (POST SEMIEMPIRICAL OPT)

        self.zero_candidates_check()
        self.similarity_refining()

        ################################################# REFINING: BONDING DISTANCES

        if self.embed != 'prune':

            self.write_structures('TS_guesses_unrefined',
                                  energies=False,
                                  p=False)
            self.log(
                f'--> Checkpoint output - Updated {len(self.structures)} TS structures before distance refinement.\n'
            )

            self.log(
                f'--> Refining bonding distances for TSs ({self.options.theory_level} level)'
            )

            if self.options.ff_opt:
                try:
                    os.remove(f'TSCoDe_checkpoint_{self.stamp}.xyz')
                    # We don't need the pre-optimized structures anymore
                except FileNotFoundError:
                    pass

            self._set_target_distances()
            t_start = time.perf_counter()

            for i, structure in enumerate(deepcopy(self.structures)):
                loadbar(
                    i,
                    len(self.structures),
                    prefix=f'Refining structure {i+1}/{len(self.structures)} ')
                try:

                    traj = f'refine_{i}.traj' if self.options.debug else None

                    new_structure, new_energy, self.exit_status[
                        i] = ase_adjust_spacings(self,
                                                 structure,
                                                 self.atomnos,
                                                 self.constrained_indexes[i],
                                                 title=i,
                                                 traj=traj)

                    if self.exit_status[i]:
                        self.structures[i] = new_structure
                        self.energies[i] = new_energy

                except ValueError as e:
                    # ase will throw a ValueError if the output lacks a space in the "FINAL POINTS AND DERIVATIVES" table.
                    # This occurs when one or more of them is not defined, that is when the calculation did not end well.
                    # The easiest solution is to reject the structure and go on.
                    self.log(repr(e))
                    self.log(
                        f'Failed to read MOPAC file for Structure {i+1}, skipping distance refinement',
                        p=False)

            loadbar(1,
                    1,
                    prefix=f'Refining structure {i+1}/{len(self.structures)} ')
            t_end = time.perf_counter()
            self.log(
                f'{self.options.calculator} {self.options.theory_level} refinement took {time_to_string(t_end-t_start)} (~{time_to_string((t_end-t_start)/len(self.structures))} per structure)'
            )

            before = len(self.structures)
            if self.options.only_refined:

                mask = self.exit_status
                self.apply_mask(('structures', 'energies', 'exit_status',
                                 'constrained_indexes'), mask)

                s = f'Discarded {len([i for i in mask if not i])} unrefined structures.'

            else:
                s = 'Non-refined ones will not be discarded.'

            self.log(
                f'Successfully refined {len([i for i in self.exit_status if i])}/{before} structures. {s}'
            )

            ################################################# PRUNING: SIMILARITY (POST REFINEMENT)

            self.zero_candidates_check()
            self.similarity_refining()

            ################################################# PRUNING: FITNESS

            self.fitness_refining()

        ################################################# PRUNING: ENERGY

        self.energies = self.energies - np.min(self.energies)
        _, sequence = zip(*sorted(
            zip(self.energies, range(len(self.energies))), key=lambda x: x[0]))
        self.energies = self.scramble(self.energies, sequence)
        self.structures = self.scramble(self.structures, sequence)
        self.constrained_indexes = self.scramble(self.constrained_indexes,
                                                 sequence)
        # sorting structures based on energy

        if self.options.kcal_thresh is not None:

            mask = (self.energies -
                    np.min(self.energies)) < self.options.kcal_thresh

            self.apply_mask(('structures', 'energies', 'exit_status'), mask)

            if False in mask:
                self.log(
                    f'Discarded {len([b for b in mask if not b])} candidates for energy (Threshold set to {self.options.kcal_thresh} kcal/mol)'
                )

        ################################################# XYZ GUESSES OUTPUT

        self.outname = f'TSCoDe_TS_guesses_{self.stamp}.xyz'
        with open(self.outname, 'w') as f:
            for i, structure in enumerate(
                    align_structures(self.structures,
                                     self.constrained_indexes[0])):

                kind = 'REFINED - ' if self.exit_status[i] else 'NOT REFINED - '

                write_xyz(
                    structure,
                    self.atomnos,
                    f,
                    title=
                    f'Structure {i+1} - {kind}Rel. E. = {round(self.energies[i], 3)} kcal/mol'
                )

        try:
            os.remove(f'TSCoDe_TS_guesses_unrefined_{self.stamp}.xyz')
            # since we have the refined structures, we can get rid of the unrefined ones
        except FileNotFoundError:
            pass

        self.log(
            f'Wrote {len(self.structures)} rough TS structures to {self.outname} file.\n'
        )
Пример #8
0
    def force_field_refining(self):
        '''
        Performs structural optimizations with the embedder force field caculator.
        Only structures that do not scramble during FF optimization are updated,
        while the rest are kept as they are.
        '''

        ################################################# CHECKPOINT BEFORE FF OPTIMIZATION

        self.outname = f'TSCoDe_checkpoint_{self.stamp}.xyz'
        with open(self.outname, 'w') as f:
            for i, structure in enumerate(
                    align_structures(self.structures,
                                     self.constrained_indexes[0])):
                write_xyz(
                    structure,
                    self.atomnos,
                    f,
                    title=
                    f'TS candidate {i+1} - Checkpoint before FF optimization')
        self.log(
            f'\n--> Checkpoint output - Wrote {len(self.structures)} TS structures to {self.outname} file before FF optimization.\n'
        )

        ################################################# GEOMETRY OPTIMIZATION - FORCE FIELD

        self.log(
            f'--> Structure optimization ({self.options.ff_level} level via {self.options.ff_calc})'
        )

        t_start = time.perf_counter()

        for i, structure in enumerate(deepcopy(self.structures)):
            loadbar(
                i,
                len(self.structures),
                prefix=f'Optimizing structure {i+1}/{len(self.structures)} ')
            try:
                new_structure, _, self.exit_status[i] = optimize(
                    structure,
                    self.atomnos,
                    self.options.ff_calc,
                    method=self.options.ff_level,
                    constrained_indexes=self.constrained_indexes[i],
                    mols_graphs=self.graphs,
                    check=(self.embed != 'prune'))

                if self.exit_status[i]:
                    self.structures[i] = new_structure

            except Exception as e:
                raise e

        loadbar(
            1,
            1,
            prefix=
            f'Optimizing structure {len(self.structures)}/{len(self.structures)} '
        )
        t_end = time.perf_counter()
        self.log(
            f'Force Field {self.options.ff_level} optimization took {time_to_string(t_end-t_start)} (~{time_to_string((t_end-t_start)/len(self.structures))} per structure)'
        )

        ################################################# EXIT STATUS

        self.log(
            f'Successfully pre-refined {len([b for b in self.exit_status if b])}/{len(self.structures)} candidates at {self.options.ff_level} level.'
        )

        ################################################# PRUNING: SIMILARITY (POST FORCE FIELD OPT)

        self.zero_candidates_check()
        self.similarity_refining()

        ################################################# CHECKPOINT BEFORE OPTIMIZATION

        with open(self.outname, 'w') as f:
            for i, structure in enumerate(
                    align_structures(self.structures,
                                     self.constrained_indexes[0])):
                exit_str = f'{self.options.ff_level} REFINED' if self.exit_status[
                    i] else 'RAW'
                write_xyz(
                    structure,
                    self.atomnos,
                    f,
                    title=
                    f'TS candidate {i+1} - {exit_str} - Checkpoint before {self.options.calculator} optimization'
                )
        self.log(
            f'--> Checkpoint output - Updated {len(self.structures)} TS structures to {self.outname} file before {self.options.calculator} optimization.\n'
        )