def get_smiles( self, mol): if not mol.is_connected(): raise oasa_exceptions.oasa_not_implemented_error( "SMILES", "Cannot encode disconnected compounds, such as salts etc. HINT - use molecule.get_disconnected_subgraphs() to divide the molecule to individual parts.") #mol = molec.copy() self.molecule = mol self.ring_joins = [] self._processed_atoms = [] self.branches = {} self._stereo_bonds_to_code = {} # for bond it will contain character it uses self._stereo_bonds_to_others = {} # for bond it will contain the other bonds self._stereo_centers = {} # at first we mark all the atoms with aromatic bonds # it is much simple to do it now when all the edges are present # we can make use of the properties attribute of the vertex for e in mol.edges: if e.aromatic: for v in e.vertices: v.properties_[ 'aromatic'] = 1 # stereochemistry information preparation for st in mol.stereochemistry: if isinstance( st, stereochemistry.cis_trans_stereochemistry): end1, inside1, inside2, end2 = st.references e1 = end1.get_edge_leading_to( inside1) e2 = end2.get_edge_leading_to( inside2) self._stereo_bonds_to_others[ e1] = self._stereo_bonds_to_others.get( e1, []) + [(e2, st)] self._stereo_bonds_to_others[ e2] = self._stereo_bonds_to_others.get( e2, []) + [(e1, st)] elif isinstance( st, stereochemistry.tetrahedral_stereochemistry): self._stereo_centers[st.center] = st else: pass # we cannot handle this ret = ''.join( [i for i in self._get_smiles( mol)]) mol.reconnect_temporarily_disconnected_edges() # this is needed because the way temporarily_disconnected edges are handled is not compatible with the way smiles # generation works - it splits the molecule while reusing the same atoms and bonds and thus disconnected bonds accounting fails for e in mol.edges: e.disconnected = False # here tetrahedral stereochemistry is added for v, st in self._stereo_centers.iteritems(): processed_neighbors = [] for n in self._processed_atoms: if n in v.neighbors: processed_neighbors.append( n) elif v.explicit_hydrogens and n is v: processed_neighbors.append( stereochemistry.explicit_hydrogen()) count = match_atom_lists( st.references, processed_neighbors) clockwise = st.value == st.CLOCKWISE if count % 2 == 1: clockwise = not clockwise ch_symbol = clockwise and "@@" or "@" ret = ret.replace( "{{stereo%d}}" % mol.vertices.index(v), ch_symbol) return ret
def _process_stereochemistry( self, mol): ## process stereochemistry ## double bonds def get_stereobond_direction( end_atom, inside_atom, bond, init): position = mol.vertices.index( end_atom) - mol.vertices.index( inside_atom) char = bond.properties_['stereo'] == "\\" and 1 or -1 direction = (position * char * init) < 0 and "up" or "down" return direction def get_end_and_inside_vertex_from_edge_path( edge, path): a1,a2 = edge.vertices if len( [e for e in a1.neighbor_edges if e in path]) == 1: return a1, a2 return a2, a1 stereo_edges = [e for e in mol.edges if "stereo" in e.properties_] paths = [] for i,e1 in enumerate( stereo_edges): for e2 in stereo_edges[i+1:]: path = mol.get_path_between_edges( e1, e2) path2 = path[1:-1] if len( path2)%2 and not [_e for _e in path2 if _e.order != 2]: # only odd number of double bonds, double bonds only for _e in path[1:-1]: if not mol.is_edge_a_bridge_fast_and_dangerous( _e): break else: # only stereo related to non-cyclic bonds paths.append( path) for path in paths: bond1 = path[0] end_atom1,inside_atom1 = get_end_and_inside_vertex_from_edge_path( bond1, path) bond2 = path[-1] end_atom2,inside_atom2 = get_end_and_inside_vertex_from_edge_path( bond2, path) d1 = get_stereobond_direction( end_atom1, inside_atom1, bond1, -1) d2 = get_stereobond_direction( end_atom2, inside_atom2, bond2, -1) if d1 == d2: value = stereochemistry.cis_trans_stereochemistry.SAME_SIDE else: value = stereochemistry.cis_trans_stereochemistry.OPPOSITE_SIDE if len( path) == 3: center = path[1] else: center = None refs = [end_atom1,inside_atom1,inside_atom2,end_atom2] st = stereochemistry.cis_trans_stereochemistry( center=center, value=value, references=refs) mol.add_stereochemistry( st) # tetrahedral stereochemistry for v in mol.vertices: refs = None if 'stereo' in v.properties_: idx = [mol.vertices.index( n) for n in v.neighbors] idx.sort() if len( idx) < 3: pass # no stereochemistry with less then 3 neighbors elif len( idx) == 3: if v.explicit_hydrogens == 0: pass # no stereochemistry without adding hydrogen here else: if self.explicit_hydrogens_to_real_atoms: hs = mol.explicit_hydrogens_to_real_atoms( v) h = hs.pop() else: h = stereochemistry.explicit_hydrogen() v_idx = mol.vertices.index( v) idx1 = [i for i in idx if i < v_idx] idx2 = [i for i in idx if i > v_idx] refs = [mol.vertices[i] for i in idx1] + [h] + [mol.vertices[i] for i in idx2] elif len( idx) == 4: refs = [mol.vertices[i] for i in idx] else: pass # unhandled stereochemistry if refs: if v.properties_["stereo"] == "@": direction = stereochemistry.tetrahedral_stereochemistry.ANTICLOCKWISE elif v.properties_['stereo'] == "@@": direction = stereochemistry.tetrahedral_stereochemistry.CLOCKWISE else: continue # no meaning st = stereochemistry.tetrahedral_stereochemistry( center=v, value=direction, references=refs) mol.add_stereochemistry( st) # delete the data after processing for e in mol.edges: if 'stereo' in e.properties_: del e.properties_['stereo'] for v in mol.vertices: if 'stereo' in v.properties_: del v.properties_['stereo']