Exemplo n.º 1
0
 def get_smiles( self, mol):
   if not mol.is_connected():
     raise oasa_exceptions.oasa_not_implemented_error( "SMILES", "Cannot encode disconnected compounds, such as salts etc. HINT - use molecule.get_disconnected_subgraphs() to divide the molecule to individual parts.")
   #mol = molec.copy()
   self.molecule = mol
   self.ring_joins = []
   self._processed_atoms = []
   self.branches = {}
   self._stereo_bonds_to_code = {} # for bond it will contain character it uses
   self._stereo_bonds_to_others = {} # for bond it will contain the other bonds
   self._stereo_centers = {}
   # at first we mark all the atoms with aromatic bonds
   # it is much simple to do it now when all the edges are present
   # we can make use of the properties attribute of the vertex
   for e in mol.edges:
     if e.aromatic:
       for v in e.vertices:
         v.properties_[ 'aromatic'] = 1
   # stereochemistry information preparation
   for st in mol.stereochemistry:
     if isinstance( st, stereochemistry.cis_trans_stereochemistry):
       end1, inside1, inside2, end2 = st.references
       e1 = end1.get_edge_leading_to( inside1)
       e2 = end2.get_edge_leading_to( inside2)
       self._stereo_bonds_to_others[ e1] = self._stereo_bonds_to_others.get( e1, []) + [(e2, st)]
       self._stereo_bonds_to_others[ e2] = self._stereo_bonds_to_others.get( e2, []) + [(e1, st)]
     elif isinstance( st, stereochemistry.tetrahedral_stereochemistry):
       self._stereo_centers[st.center] = st
     else:
       pass # we cannot handle this
       
   ret = ''.join( [i for i in self._get_smiles( mol)])
   mol.reconnect_temporarily_disconnected_edges()
   # this is needed because the way temporarily_disconnected edges are handled is not compatible with the way smiles
   # generation works - it splits the molecule while reusing the same atoms and bonds and thus disconnected bonds accounting fails
   for e in mol.edges:
     e.disconnected = False
   # here tetrahedral stereochemistry is added
   for v, st in self._stereo_centers.iteritems():
     processed_neighbors = []
     for n in self._processed_atoms:
       if n in v.neighbors:
         processed_neighbors.append( n)
       elif v.explicit_hydrogens and n is v:
         processed_neighbors.append( stereochemistry.explicit_hydrogen())
     count = match_atom_lists( st.references, processed_neighbors)
     clockwise = st.value == st.CLOCKWISE
     if count % 2 == 1:
       clockwise = not clockwise
     ch_symbol = clockwise and "@@" or "@"
     ret = ret.replace( "{{stereo%d}}" % mol.vertices.index(v), ch_symbol)
   return ret
Exemplo n.º 2
0
  def _process_stereochemistry( self, mol):
    ## process stereochemistry
    ## double bonds
    def get_stereobond_direction( end_atom, inside_atom, bond, init):
      position = mol.vertices.index( end_atom) - mol.vertices.index( inside_atom)
      char = bond.properties_['stereo'] == "\\" and 1 or -1
      direction = (position * char * init) < 0 and "up" or "down"
      return direction
    def get_end_and_inside_vertex_from_edge_path( edge, path):
      a1,a2 = edge.vertices
      if len( [e for e in a1.neighbor_edges if e in path]) == 1:
        return a1, a2
      return a2, a1
    
    stereo_edges = [e for e in mol.edges if "stereo" in e.properties_]
    paths = []
    for i,e1 in enumerate( stereo_edges):
      for e2 in stereo_edges[i+1:]:
        path = mol.get_path_between_edges( e1, e2)
        path2 = path[1:-1]
        if len( path2)%2 and not [_e for _e in path2 if _e.order != 2]:
          # only odd number of double bonds, double bonds only
          for _e in path[1:-1]:
            if not mol.is_edge_a_bridge_fast_and_dangerous( _e):
              break
          else:
            # only stereo related to non-cyclic bonds
            paths.append( path)
    
    for path in paths:
      bond1 = path[0]
      end_atom1,inside_atom1 = get_end_and_inside_vertex_from_edge_path( bond1, path)
      bond2 = path[-1]
      end_atom2,inside_atom2 = get_end_and_inside_vertex_from_edge_path( bond2, path)
      d1 = get_stereobond_direction( end_atom1, inside_atom1, bond1, -1)
      d2 = get_stereobond_direction( end_atom2, inside_atom2, bond2, -1)
      if d1 == d2:
        value = stereochemistry.cis_trans_stereochemistry.SAME_SIDE
      else:
        value = stereochemistry.cis_trans_stereochemistry.OPPOSITE_SIDE
      if len( path) == 3:
        center = path[1]
      else:
        center = None
      refs = [end_atom1,inside_atom1,inside_atom2,end_atom2]
      st = stereochemistry.cis_trans_stereochemistry( center=center, value=value, references=refs)
      mol.add_stereochemistry( st)

    # tetrahedral stereochemistry
    for v in mol.vertices:
      refs = None
      if 'stereo' in v.properties_:
        idx = [mol.vertices.index( n) for n in v.neighbors]
        idx.sort()
        if len( idx) < 3:
          pass # no stereochemistry with less then 3 neighbors
        elif len( idx) == 3:
          if v.explicit_hydrogens == 0:
            pass # no stereochemistry without adding hydrogen here
          else:
            if self.explicit_hydrogens_to_real_atoms:
              hs = mol.explicit_hydrogens_to_real_atoms( v)
              h = hs.pop()
            else:
              h = stereochemistry.explicit_hydrogen()
            v_idx = mol.vertices.index( v)
            idx1 = [i for i in idx if i < v_idx]
            idx2 = [i for i in idx if i > v_idx]
            refs = [mol.vertices[i] for i in idx1] + [h] + [mol.vertices[i] for i in idx2]
        elif len( idx) == 4:
          refs = [mol.vertices[i] for i in idx]
        else:
          pass # unhandled stereochemistry
      if refs:
        if v.properties_["stereo"] == "@":
          direction = stereochemistry.tetrahedral_stereochemistry.ANTICLOCKWISE
        elif v.properties_['stereo'] == "@@":
          direction = stereochemistry.tetrahedral_stereochemistry.CLOCKWISE
        else:
          continue # no meaning
        st = stereochemistry.tetrahedral_stereochemistry( center=v, value=direction, references=refs)
        mol.add_stereochemistry( st)

    # delete the data after processing
    for e in mol.edges:
      if 'stereo' in e.properties_:
        del e.properties_['stereo']
    for v in mol.vertices:
      if 'stereo' in v.properties_:
        del v.properties_['stereo']