def filter_structures(mol_list, mark_unreactive=True, allow_expanded_octet=True, features=None): """ We often get too many resonance structures from the combination of all rules, particularly for species containing lone pairs. This function filters them out by minimizing the number of C/N/O/S atoms without a full octet. """ if not all([(mol.multiplicity == mol_list[0].multiplicity) for mol in mol_list]): raise ValueError("Cannot filter structures with different multiplicities!") # Get an octet deviation list octet_deviation_list = get_octet_deviation_list(mol_list, allow_expanded_octet=allow_expanded_octet) # Filter mol_list using the octet rule and the respective octet deviation list filtered_list, charge_span_list = octet_filtration(mol_list, octet_deviation_list) # Filter by charge filtered_list = charge_filtration(filtered_list, charge_span_list) # Filter aromatic structures if features is not None and features['isAromatic']: filtered_list = aromaticity_filtration(filtered_list, features) if not filtered_list: raise ResonanceError('Could not determine representative localized structures for species {0}'.format( mol_list[0].toSMILES())) if mark_unreactive: # Mark selected unreactive structures if OS and/or adjacent birad unidirectional transitions were used mark_unreactive_structures(filtered_list, mol_list) # Check that there's at least one reactive structure in the list check_reactive(filtered_list) return filtered_list
def check_reactive(filtered_list): """ Check that there's at least one reactive structure in the returned list. If not, raise an error (does not return anything) """ if not any([mol.reactive for mol in filtered_list]): logging.info('\n\n') logging.error('No reactive structures were attributed to species {0}'.format(filtered_list[0].toSMILES())) for mol in filtered_list: logging.info('Structure: {0}\n{1}Reactive: {2}'.format(mol.toSMILES(),mol.toAdjacencyList(),mol.reactive)) logging.info('\n') raise ResonanceError('Each species must have at least one reactive structure. Something probably went wrong' ' when exploring resonance structures for species {0}'.format(filtered_list[0].toSMILES()))
def _generate_resonance_structures(mol_list, method_list, keep_isomorphic=False, copy=False, filter_structures=True): """ Iteratively generate all resonance structures for a list of starting molecules using the specified methods. Args: mol_list starting list of molecules method_list list of resonance structure algorithms keep_isomorphic if False, removes any structures that give is_isomorphic=True (default) if True, only remove structures that give is_identical=True copy if False, append new resonance structures to input list (default) if True, make a new list with all of the resonance structures """ cython.declare(index=cython.int, molecule=Molecule, new_mol_list=list, new_mol=Molecule, mol=Molecule) if copy: # Make a copy of the list so we don't modify the input list mol_list = mol_list[:] min_octet_deviation = min(filtration.get_octet_deviation_list(mol_list)) min_charge_span = min(filtration.get_charge_span_list(mol_list)) # Iterate over resonance structures index = 0 while index < len(mol_list): molecule = mol_list[index] new_mol_list = [] # On-the-fly filtration: Extend methods only for molecule that don't deviate too much from the octet rule # (a +2 distance from the minimal deviation is used, octet deviations per species are in increments of 2) # Sometimes rearranging the structure requires an additional higher charge span structure, so allow # structures with a +1 higher charge span compared to the minimum, e.g., [O-]S#S[N+]#N # This is run by default even if filter_structures=False. octet_deviation = filtration.get_octet_deviation(molecule) charge_span = molecule.get_charge_span() if octet_deviation <= min_octet_deviation + 2 and charge_span <= min_charge_span + 1: for method in method_list: new_mol_list.extend(method(molecule)) if octet_deviation < min_octet_deviation: # update min_octet_deviation to make this criterion tighter min_octet_deviation = octet_deviation if charge_span < min_charge_span: # update min_charge_span to make this criterion tighter min_charge_span = charge_span for new_mol in new_mol_list: # Append to structure list if unique for mol in mol_list: if not keep_isomorphic and mol.is_isomorphic(new_mol): break elif keep_isomorphic and mol.is_identical(new_mol): break else: mol_list.append(new_mol) # Move to the next resonance structure index += 1 # check net charge for mol in mol_list: if mol.get_net_charge() != 0: raise ResonanceError('Resonance generation gave a net charged molecule:\n{0}' 'Ions are not yet supported in RMG.'.format( mol.to_adjacency_list())) return mol_list
def generate_resonance_structures(mol, clar_structures=True, keep_isomorphic=False, filter_structures=True): """ Generate and return all of the resonance structures for the input molecule. Most of the complexity of this method goes into handling aromatic species, particularly to generate an accurate set of resonance structures that is consistent regardless of the input structure. The following considerations are made: 1. False positives from RDKit aromaticity detection can occur if a molecule has exocyclic double bonds 2. False negatives from RDKit aromaticity detection can occur if a radical is delocalized into an aromatic ring 3. sp2 hybridized radicals in the plane of an aromatic ring do not participate in hyperconjugation 4. Non-aromatic resonance structures of PAHs are not important resonance contributors (assumption) Aromatic species are broken into the following categories for resonance treatment: - Radical polycyclic aromatic species: Kekule structures are generated in order to generate adjacent resonance structures. The resulting structures are then used for Clar structure generation. After all three steps, any non-aromatic structures are removed, under the assumption that they are not important resonance contributors. - Radical monocyclic aromatic species: Kekule structures are generated along with adjacent resonance structures. All are kept regardless of aromaticity because the radical is more likely to delocalize into the ring. - Stable polycyclic aromatic species: Clar structures are generated - Stable monocyclic aromatic species: Kekule structures are generated """ cython.declare(mol_list=list, new_mol_list=list, features=dict, method_list=list) # Check that mol is a valid structure in terms of atomTypes and net charge. Since SMILES with hypervalance # heteroatoms are not always read correctly, print a suggestion to input the structure using an adjList. try: mol.update() except AtomTypeError: logging.error("The following molecule has at least one atom with an undefined atomtype:\n{0}" "\nIf this structure was entered in SMILES, try using the adjacencyList format for an unambiguous" " definition.".format(mol.to_adjacency_list())) raise if mol.get_net_charge() != 0: raise ValueError("Got the following structure:\nSMILES: {0}\nAdjacencyList:\n{1}\nNet charge: {2}\n\n" "Currently RMG cannot process charged species correctly." "\nIf this structure was entered in SMILES, try using the adjacencyList format for an" " unambiguous definition.".format(mol.to_smiles(), mol.to_adjacency_list(), mol.get_net_charge())) if not mol.reactive: raise ResonanceError('Can only generate resonance structures for reactive molecules! Got the following ' 'unreactive structure:\n{0}Reactive = {1}'.format(mol.to_adjacency_list(), mol.reactive)) mol_list = [mol] # Analyze molecule features = analyze_molecule(mol) # Use generate_optimal_aromatic_resonance_structures to check for false positives and negatives if features['is_aromatic'] or (features['is_cyclic'] and features['is_radical'] and not features['is_aryl_radical']): new_mol_list = generate_optimal_aromatic_resonance_structures(mol, features) if len(new_mol_list) == 0: # Encountered false positive, ie. the molecule is not actually aromatic features['is_aromatic'] = False features['isPolycyclicAromatic'] = False else: features['is_aromatic'] = True if len(new_mol_list[0].get_aromatic_rings()[0]) > 1: features['isPolycyclicAromatic'] = True for new_mol in new_mol_list: # Append to structure list if unique if not keep_isomorphic and mol.is_isomorphic(new_mol): continue elif keep_isomorphic and mol.is_identical(new_mol): continue else: mol_list.append(new_mol) # Special handling for aromatic species if features['is_aromatic']: if features['is_radical'] and not features['is_aryl_radical']: _generate_resonance_structures(mol_list, [generate_kekule_structure], keep_isomorphic=keep_isomorphic, filter_structures=filter_structures) _generate_resonance_structures(mol_list, [generate_allyl_delocalization_resonance_structures], keep_isomorphic=keep_isomorphic, filter_structures=filter_structures) if features['isPolycyclicAromatic'] and clar_structures: _generate_resonance_structures(mol_list, [generate_clar_structures], keep_isomorphic=keep_isomorphic, filter_structures=filter_structures) else: _generate_resonance_structures(mol_list, [generate_aromatic_resonance_structure], keep_isomorphic=keep_isomorphic, filter_structures=filter_structures) # Generate remaining resonance structures method_list = populate_resonance_algorithms(features) _generate_resonance_structures(mol_list, method_list, keep_isomorphic=keep_isomorphic, filter_structures=filter_structures) if filter_structures: return filtration.filter_structures(mol_list, features=features) return mol_list