示例#1
0
def _refine_structures(structures,
                       atomnos,
                       calculator,
                       method,
                       procs,
                       loadstring=''):
    '''
    Refine a set of structures.
    '''
    energies = []
    for i, conformer in enumerate(deepcopy(structures)):

        loadbar(i, len(structures), f'{loadstring} {i+1}/{len(structures)} ')

        opt_coords, energy, success = optimize(conformer,
                                               atomnos,
                                               calculator,
                                               method=method,
                                               procs=procs)

        if success:
            structures[i] = opt_coords
            energies.append(energy)
        else:
            energies.append(np.inf)

    loadbar(len(structures), len(structures),
            f'{loadstring} {len(structures)}/{len(structures)} ')
    # optimize the generated conformers

    return structures, energies
示例#2
0
def csearch_operator(filename, embedder):
    '''
    '''

    embedder.log(f'--> Performing conformational search on {filename}')

    t_start = time.perf_counter()

    data = read_xyz(filename)

    if len(data.atomcoords) > 1:
        embedder.log(
            f'Requested conformational search on multimolecular file - will do\n'
            +
            'an individual search from each conformer (might be time-consuming).'
        )

    calc, method, procs = _get_lowest_calc(embedder)
    conformers = []

    for i, coords in enumerate(data.atomcoords):

        opt_coords = optimize(
            coords, data.atomnos, calculator=calc, method=method,
            procs=procs)[0] if embedder.options.optimization else coords
        # optimize starting structure before running csearch

        conf_batch = clustered_csearch(opt_coords,
                                       data.atomnos,
                                       title=f'{filename}, conformer {i+1}',
                                       logfunction=embedder.log)
        # generate the most diverse conformers starting from optimized geometry

        conformers.append(conf_batch)

    conformers = np.array(conformers)
    batch_size = conformers.shape[1]

    conformers = conformers.reshape(-1, data.atomnos.shape[0], 3)
    # merging structures from each run in a single array

    if embedder.embed is not None:
        embedder.log(
            f'\nSelected the most diverse {batch_size} out of {conformers.shape[0]} conformers for {filename} ({time_to_string(time.perf_counter()-t_start)})'
        )
        conformers = most_diverse_conformers(batch_size, conformers,
                                             data.atomnos)

    confname = filename[:-4] + '_confs.xyz'
    with open(confname, 'w') as f:
        for i, conformer in enumerate(conformers):
            write_xyz(conformer,
                      data.atomnos,
                      f,
                      title=f'Generated conformer {i}')

    # if len(conformers) > 10 and not embedder.options.let:
    #     s += f' Will use only the best 10 conformers for TSCoDe embed.'
    # embedder.log(s)

    embedder.log('\n')

    return confname
示例#3
0
def clustered_csearch(coords,
                      atomnos,
                      constrained_indexes=None,
                      keep_hb=False,
                      ff_opt=False,
                      n=10,
                      mode=1,
                      calc=None,
                      method=None,
                      title='test',
                      logfunction=print):
    '''
    n: number of structures to keep from each torsion cluster
    mode: 0 - keep the n lowest energy conformers
          1 - keep the n most diverse conformers

    keep_hb: whether to preserve the presence of current hydrogen bonds or not
    '''

    assert mode == 1 or ff_opt, 'Either leave mode=1 or turn on force field optimization'
    assert mode in (0, 1), 'The mode keyword can only be 0 or 1'

    calc = FF_CALC if calc is None else calc
    method = DEFAULT_FF_LEVELS[calc] if method is None else method
    # Set default calculator attributes if user did not specify them

    constrained_indexes = np.array(
        []) if constrained_indexes is None else constrained_indexes
    t_start_run = time.perf_counter()

    graph = graphize(coords, atomnos)
    for i1, i2 in constrained_indexes:
        graph.add_edge(i1, i2)
    # build a molecular graph of the TS
    # that includes constrained indexes pairs

    if keep_hb and len(list(nx.connected_components(graph))) > 1:
        hydrogen_bonds = _get_hydrogen_bonds(coords, atomnos, graph)
        for hb in hydrogen_bonds:
            graph.add_edge(*hb)
    else:
        hydrogen_bonds = ()
    # get informations on the intra/intermolecular hydrogen
    # bonds that we should avoid disrupting

    double_bonds = get_double_bonds_indexes(coords, atomnos)
    # get all double bonds - do not rotate these

    torsions = _get_torsions(graph, hydrogen_bonds, double_bonds)
    # get all torsions that we should explore

    for t in torsions:
        t.sort_torsion(graph, constrained_indexes)
    # sort torsion indexes so that first index of each torsion
    # is the half that will move and is external to the TS

    if not torsions:
        logfunction(f'No rotable bonds found for {title}.')
        return np.array([coords])

    grouped_torsions = _group_torsions(coords,
                                       torsions,
                                       max_size=3 if ff_opt else 8)

    ############################################## LOG TORSIONS

    # with open('n_fold_log.txt', 'w') as f:
    #     for t in torsions:
    #         f.write(f'{t.torsion} - {t.n_fold}-fold\n')
    logfunction(f'Torsion list: (indexes : n-fold)')
    for t in torsions:
        logfunction('  - {:21s} : {}-fold'.format(str(t.torsion), t.n_fold))

    _write_torsion_vmd(coords, atomnos, constrained_indexes, grouped_torsions)

    ############################################## LOG TORSIONS

    logfunction(
        f'\n--> Clustered CSearch - mode {mode} ({"stability" if mode == 0 else "diversity"}) - '
        +
        f'{len(torsions)} torsions in {len(grouped_torsions)} group{"s" if len(grouped_torsions) != 1 else ""} - '
        + f'{[len(t) for t in grouped_torsions]}')

    output_structures = []
    starting_points = [coords]
    for tg, torsions_group in enumerate(grouped_torsions):

        logfunction()

        angles = cartesian_product(*[t.get_angles() for t in torsions_group])
        candidates = len(angles) * len(starting_points)

        new_structures = []
        for s, sp in enumerate(starting_points):
            for a, angle_set in enumerate(angles):

                new_coords = deepcopy(sp)

                for t, torsion in enumerate(torsions_group):
                    angle = angle_set[t]
                    if angle != 0:
                        mask = _get_rotation_mask(graph, torsion.torsion)
                        new_coords = rotate_dihedral(new_coords,
                                                     torsion.torsion,
                                                     angle,
                                                     mask=mask)

                new_structures.append(new_coords)

        mask = np.zeros(len(new_structures), dtype=bool)
        new_structures = np.array(new_structures)
        for s, structure in enumerate(new_structures):
            mask[s] = compenetration_check(structure)

        new_structures = new_structures[mask]
        for_comp = np.count_nonzero(~mask)

        logfunction(
            f'> Group {tg+1}/{len(grouped_torsions)} - {len(torsions_group)} bonds, '
            +
            f'{[t.n_fold for t in torsions_group]} n-folds, {len(starting_points)} '
            +
            f'starting point{"s" if len(starting_points) > 1 else ""} = {candidates} conformers'
        )
        logfunction(
            f'  {candidates} generated, {for_comp} removed for compenetration ({len(new_structures)} left)'
        )

        energies = None
        if ff_opt:

            t_start = time.perf_counter()

            energies = np.zeros(new_structures.shape[0])
            for c, new_coords in enumerate(deepcopy(new_structures)):

                opt_coords, energy, success = optimize(
                    new_coords,
                    atomnos,
                    calc,
                    method=method,
                    constrained_indexes=constrained_indexes)

                if success:
                    new_structures[c] = opt_coords
                    energies[c] = energy

                else:
                    energies[c] = np.inf

            logfunction(
                f'Optimized {len(new_structures)} structures at {method} level ({time_to_string(time.perf_counter()-t_start)})'
            )

        if tg + 1 != len(grouped_torsions):
            if n is not None and len(new_structures) > n:
                if mode == 0:
                    new_structures, energies = zip(*sorted(
                        zip(new_structures, energies), key=lambda x: x[1]))
                    new_structures = new_structures[0:n]
                    tag = 'stable'
                if mode == 1:
                    new_structures = most_diverse_conformers(
                        n, new_structures, atomnos, energies)
                    tag = 'diverse'
            logfunction(
                f'  Kept the most {tag} {len(new_structures)} starting points for next rotation cluster'
            )

        output_structures.extend(new_structures)
        starting_points = new_structures

    output_structures = np.array(output_structures)

    n_out = sum([t.n_fold for t in torsions])

    if len(new_structures) > n_out:
        if mode == 0:
            output_structures, energies = zip(
                *sorted(zip(output_structures, energies), key=lambda x: x[1]))
            output_structures = output_structures[0:n_out]
            output_structures = np.array(output_structures)
        if mode == 1:
            output_structures = most_diverse_conformers(
                n_out, output_structures, atomnos, energies)
    logfunction(
        f'  Selected the {"best" if mode == 0 else "most diverse"} {len(output_structures)} new structures ({time_to_string(time.perf_counter()-t_start_run)})'
    )

    return output_structures
示例#4
0
def dihedral_embed(embedder):
    '''
    '''
    from tscode.atropisomer_module import ase_torsion_TSs
    mol = embedder.objects[0]
    embedder.structures, embedder.energies = [], []

    embedder.log(
        f'\n--> {mol.name} - performing a scan of dihedral angle with indices {mol.reactive_indexes}\n'
    )

    for c, coords in enumerate(mol.atomcoords):

        embedder.log(
            f'\n--> Pre-optimizing input structure{"s" if len(mol.atomcoords) > 1 else ""} '
            f'({embedder.options.theory_level} via {embedder.options.calculator})'
        )

        embedder.log(
            f'--> Performing relaxed scans (conformer {c+1}/{len(mol.atomcoords)})'
        )

        new_coords, ground_energy, success = optimize(
            coords,
            mol.atomnos,
            embedder.options.calculator,
            method=embedder.options.theory_level,
            procs=embedder.options.procs,
            solvent=embedder.options.solvent)

        if not success:
            embedder.log(f'Pre-optimization failed - Skipped conformer {c+1}',
                         p=False)
            continue

        structures, energies = ase_torsion_TSs(
            embedder,
            new_coords,
            mol.atomnos,
            mol.reactive_indexes,
            threshold_kcal=embedder.options.kcal_thresh,
            title=mol.rootname + f'_conf_{c+1}',
            optimization=embedder.options.optimization,
            logfile=embedder.logfile,
            bernytraj=mol.rootname +
            '_berny' if embedder.options.debug else None,
            plot=True)

        for structure, energy in zip(structures, energies):
            embedder.structures.append(structure)
            embedder.energies.append(energy - ground_energy)

    embedder.structures = np.array(embedder.structures)
    embedder.energies = np.array(embedder.energies)

    if len(embedder.structures) == 0:
        s = (
            '\n--> Dihedral embed did not find any suitable maxima above the set threshold\n'
            f'    ({embedder.options.kcal_thresh} kcal/mol) during the scan procedure. Observe the\n'
            '    generated energy plot and try lowering the threshold value (KCAL keyword).'
        )
        embedder.log(s)
        raise ZeroCandidatesError()

    embedder.atomnos = mol.atomnos
    embedder.similarity_refining()
    embedder.write_structures('TS_guesses',
                              indexes=embedder.objects[0].reactive_indexes,
                              relative=False,
                              extra='(barrier height)')
    embedder.write_vmd(indexes=embedder.objects[0].reactive_indexes)
    embedder.normal_termination()
示例#5
0
文件: run.py 项目: ntampellini/TSCoDe
    def optimization_refining(self):
        '''
        Refines structures by constrained optimizations with the active calculator,
        discarding similar ones and scrambled ones.
        '''

        t_start = time.perf_counter()

        self.log(
            f'--> Structure optimization ({self.options.theory_level} level via {self.options.calculator})'
        )

        if self.options.calculator == 'MOPAC':
            method = f'{self.options.theory_level} GEO-OK CYCLES=500'

        else:
            method = f'{self.options.theory_level}'

        for i, structure in enumerate(deepcopy(self.structures)):
            loadbar(
                i,
                len(self.structures),
                prefix=f'Optimizing structure {i+1}/{len(self.structures)} ')
            try:
                t_start_opt = time.perf_counter()
                new_structure, self.energies[i], self.exit_status[
                    i] = optimize(
                        structure,
                        self.atomnos,
                        self.options.calculator,
                        method=method,
                        constrained_indexes=self.constrained_indexes[i],
                        mols_graphs=self.graphs,
                        procs=self.options.procs,
                        max_newbonds=self.options.max_newbonds,
                        check=(self.embed != 'prune'))

                if self.exit_status[i]:
                    self.structures[i] = new_structure

                exit_str = 'CONVERGED' if self.exit_status[i] else 'SCRAMBLED'

            except MopacReadError:
                # ase will throw a ValueError if the output lacks a space in the "FINAL POINTS AND DERIVATIVES" table.
                # This occurs when one or more of them is not defined, that is when the calculation did not end well.
                # The easiest solution is to reject the structure and go on.
                self.energies[i] = np.inf
                self.exit_status[i] = False
                exit_str = 'FAILED TO READ FILE'

            except Exception as e:
                raise e

            self.log((
                f'    - {self.options.calculator} {self.options.theory_level} optimization: Structure {i+1} {exit_str} - '
                f'took {time_to_string(time.perf_counter()-t_start_opt)}'),
                     p=False)

        loadbar(
            1,
            1,
            prefix=
            f'Optimizing structure {len(self.structures)}/{len(self.structures)} '
        )

        self.log(
            f'Successfully optimized {len([b for b in self.exit_status if b])}/{len(self.structures)} structures. Non-optimized ones will not be discarded.'
        )

        self.log((
            f'{self.options.calculator} {self.options.theory_level} optimization took '
            f'{time_to_string(time.perf_counter()-t_start)} (~{time_to_string((time.perf_counter()-t_start)/len(self.structures))} per structure)'
        ))

        ################################################# PRUNING: SIMILARITY (POST SEMIEMPIRICAL OPT)

        self.zero_candidates_check()
        self.similarity_refining()

        ################################################# REFINING: BONDING DISTANCES

        if self.embed != 'prune':

            self.write_structures('TS_guesses_unrefined',
                                  energies=False,
                                  p=False)
            self.log(
                f'--> Checkpoint output - Updated {len(self.structures)} TS structures before distance refinement.\n'
            )

            self.log(
                f'--> Refining bonding distances for TSs ({self.options.theory_level} level)'
            )

            if self.options.ff_opt:
                try:
                    os.remove(f'TSCoDe_checkpoint_{self.stamp}.xyz')
                    # We don't need the pre-optimized structures anymore
                except FileNotFoundError:
                    pass

            self._set_target_distances()
            t_start = time.perf_counter()

            for i, structure in enumerate(deepcopy(self.structures)):
                loadbar(
                    i,
                    len(self.structures),
                    prefix=f'Refining structure {i+1}/{len(self.structures)} ')
                try:

                    traj = f'refine_{i}.traj' if self.options.debug else None

                    new_structure, new_energy, self.exit_status[
                        i] = ase_adjust_spacings(self,
                                                 structure,
                                                 self.atomnos,
                                                 self.constrained_indexes[i],
                                                 title=i,
                                                 traj=traj)

                    if self.exit_status[i]:
                        self.structures[i] = new_structure
                        self.energies[i] = new_energy

                except ValueError as e:
                    # ase will throw a ValueError if the output lacks a space in the "FINAL POINTS AND DERIVATIVES" table.
                    # This occurs when one or more of them is not defined, that is when the calculation did not end well.
                    # The easiest solution is to reject the structure and go on.
                    self.log(repr(e))
                    self.log(
                        f'Failed to read MOPAC file for Structure {i+1}, skipping distance refinement',
                        p=False)

            loadbar(1,
                    1,
                    prefix=f'Refining structure {i+1}/{len(self.structures)} ')
            t_end = time.perf_counter()
            self.log(
                f'{self.options.calculator} {self.options.theory_level} refinement took {time_to_string(t_end-t_start)} (~{time_to_string((t_end-t_start)/len(self.structures))} per structure)'
            )

            before = len(self.structures)
            if self.options.only_refined:

                mask = self.exit_status
                self.apply_mask(('structures', 'energies', 'exit_status',
                                 'constrained_indexes'), mask)

                s = f'Discarded {len([i for i in mask if not i])} unrefined structures.'

            else:
                s = 'Non-refined ones will not be discarded.'

            self.log(
                f'Successfully refined {len([i for i in self.exit_status if i])}/{before} structures. {s}'
            )

            ################################################# PRUNING: SIMILARITY (POST REFINEMENT)

            self.zero_candidates_check()
            self.similarity_refining()

            ################################################# PRUNING: FITNESS

            self.fitness_refining()

        ################################################# PRUNING: ENERGY

        self.energies = self.energies - np.min(self.energies)
        _, sequence = zip(*sorted(
            zip(self.energies, range(len(self.energies))), key=lambda x: x[0]))
        self.energies = self.scramble(self.energies, sequence)
        self.structures = self.scramble(self.structures, sequence)
        self.constrained_indexes = self.scramble(self.constrained_indexes,
                                                 sequence)
        # sorting structures based on energy

        if self.options.kcal_thresh is not None:

            mask = (self.energies -
                    np.min(self.energies)) < self.options.kcal_thresh

            self.apply_mask(('structures', 'energies', 'exit_status'), mask)

            if False in mask:
                self.log(
                    f'Discarded {len([b for b in mask if not b])} candidates for energy (Threshold set to {self.options.kcal_thresh} kcal/mol)'
                )

        ################################################# XYZ GUESSES OUTPUT

        self.outname = f'TSCoDe_TS_guesses_{self.stamp}.xyz'
        with open(self.outname, 'w') as f:
            for i, structure in enumerate(
                    align_structures(self.structures,
                                     self.constrained_indexes[0])):

                kind = 'REFINED - ' if self.exit_status[i] else 'NOT REFINED - '

                write_xyz(
                    structure,
                    self.atomnos,
                    f,
                    title=
                    f'Structure {i+1} - {kind}Rel. E. = {round(self.energies[i], 3)} kcal/mol'
                )

        try:
            os.remove(f'TSCoDe_TS_guesses_unrefined_{self.stamp}.xyz')
            # since we have the refined structures, we can get rid of the unrefined ones
        except FileNotFoundError:
            pass

        self.log(
            f'Wrote {len(self.structures)} rough TS structures to {self.outname} file.\n'
        )
示例#6
0
文件: run.py 项目: ntampellini/TSCoDe
    def force_field_refining(self):
        '''
        Performs structural optimizations with the embedder force field caculator.
        Only structures that do not scramble during FF optimization are updated,
        while the rest are kept as they are.
        '''

        ################################################# CHECKPOINT BEFORE FF OPTIMIZATION

        self.outname = f'TSCoDe_checkpoint_{self.stamp}.xyz'
        with open(self.outname, 'w') as f:
            for i, structure in enumerate(
                    align_structures(self.structures,
                                     self.constrained_indexes[0])):
                write_xyz(
                    structure,
                    self.atomnos,
                    f,
                    title=
                    f'TS candidate {i+1} - Checkpoint before FF optimization')
        self.log(
            f'\n--> Checkpoint output - Wrote {len(self.structures)} TS structures to {self.outname} file before FF optimization.\n'
        )

        ################################################# GEOMETRY OPTIMIZATION - FORCE FIELD

        self.log(
            f'--> Structure optimization ({self.options.ff_level} level via {self.options.ff_calc})'
        )

        t_start = time.perf_counter()

        for i, structure in enumerate(deepcopy(self.structures)):
            loadbar(
                i,
                len(self.structures),
                prefix=f'Optimizing structure {i+1}/{len(self.structures)} ')
            try:
                new_structure, _, self.exit_status[i] = optimize(
                    structure,
                    self.atomnos,
                    self.options.ff_calc,
                    method=self.options.ff_level,
                    constrained_indexes=self.constrained_indexes[i],
                    mols_graphs=self.graphs,
                    check=(self.embed != 'prune'))

                if self.exit_status[i]:
                    self.structures[i] = new_structure

            except Exception as e:
                raise e

        loadbar(
            1,
            1,
            prefix=
            f'Optimizing structure {len(self.structures)}/{len(self.structures)} '
        )
        t_end = time.perf_counter()
        self.log(
            f'Force Field {self.options.ff_level} optimization took {time_to_string(t_end-t_start)} (~{time_to_string((t_end-t_start)/len(self.structures))} per structure)'
        )

        ################################################# EXIT STATUS

        self.log(
            f'Successfully pre-refined {len([b for b in self.exit_status if b])}/{len(self.structures)} candidates at {self.options.ff_level} level.'
        )

        ################################################# PRUNING: SIMILARITY (POST FORCE FIELD OPT)

        self.zero_candidates_check()
        self.similarity_refining()

        ################################################# CHECKPOINT BEFORE OPTIMIZATION

        with open(self.outname, 'w') as f:
            for i, structure in enumerate(
                    align_structures(self.structures,
                                     self.constrained_indexes[0])):
                exit_str = f'{self.options.ff_level} REFINED' if self.exit_status[
                    i] else 'RAW'
                write_xyz(
                    structure,
                    self.atomnos,
                    f,
                    title=
                    f'TS candidate {i+1} - {exit_str} - Checkpoint before {self.options.calculator} optimization'
                )
        self.log(
            f'--> Checkpoint output - Updated {len(self.structures)} TS structures to {self.outname} file before {self.options.calculator} optimization.\n'
        )