Exemplo n.º 1
0
def adjlist_from_smiles(smiles: str) -> Union[str, None]:
    """
    Get an RMG adjacency list from SMILES.
    Uses RMG for the conversion.

    Args:
        smiles (str): The SMILES descriptor.

    Returns:
        str: The respective adjacency list.
    """
    try:
        mol = Molecule().from_smiles(smilesstr=smiles,
                                     raise_atomtype_exception=False)
    except:
        return None
    if mol is not None:
        adjlist = mol.to_adjacency_list()
        return adjlist
    return None
Exemplo n.º 2
0
def update_dictionary_entries(old_entries, need_to_add):
    """
    Expects dictionary of species entries and
    unique list of species (as SMILES) that need to be added

    Creates new entries for the species that need to be added
    Returns old and new entries
    """
    list(set(need_to_add))
    for j, species in enumerate(need_to_add):

        molecule = RMGMolecule(SMILES=species)
        adjlist = molecule.to_adjacency_list()

        multiplicity = None
        if re.search('(?<=multiplicity ).*', adjlist):
            multiplicity = int(
                re.search('(?<=multiplicity ).*', adjlist).group(0))
            adjlist = re.sub(r'multiplicity .*',
                             'multiplicity [{}]'.format(multiplicity), adjlist)

        group = rmgpy.molecule.group.Group()
        group.from_adjacency_list(adjlist)

        atom_counts = {}
        rel_label = ''
        for atom in ['C', 'H', 'O']:
            count = species.count(atom)
            if count > 0:
                rel_label = rel_label + atom + str(count)

        assert rel_label != ''
        """
        3 Scenerios:
        No old -> no need for ID number: max_ID = -1
        Only one old -> needs to have ID of 1: max_ID = 0
        Multiple old -> needs to have a unique ID: max_ID > 0
        """

        new_ID = None
        max_ID = -1
        duplicate = False
        for old_label in old_entries:
            old_entry = old_entries[old_label]

            if group.is_isomorphic(old_entry.item):
                duplicate = True
                print('{} found to be duplicate'.format(old_entry))
                continue

            if rel_label not in old_label:
                continue

            if rel_label == old_label and max_ID == -1:
                # Atleast one with same label
                max_ID = 0

            if old_label.find('-') > 0:
                old_label, ID_str = old_label.split('-')
                ID = int(ID_str)

                if old_label == rel_label and ID > max_ID:
                    # Multiple exisitng labels
                    max_ID = ID

        if max_ID > -1:
            # Existing label
            new_ID = max_ID + 1
            rel_label = rel_label + '-' + str(new_ID)

        if not duplicate:
            entry = Entry()
            entry.label = rel_label
            entry.item = group
            assert rel_label not in list(old_entries.keys())
            old_entries[rel_label] = entry

    entry_labels = [old_entries[key].label for key in old_entries]
    assert len(entry_labels) == len(list(
        set(entry_labels))), 'Non-unique labels in dictionary'

    return old_entries
Exemplo n.º 3
0
def getAdjacencyList(request, identifier):
    """
    Returns an adjacency list of the species corresponding to `identifier`.

    `identifier` should be something recognized by NCI resolver, eg.
    SMILES, InChI, CACTVS, chemical name, etc.

    The NCI resolver has some bugs regarding reading SMILES of radicals.
    E.g. it thinks CC[CH] is CCC, so we first try to use the identifier
    directly as a SMILES string, and only pass it through the resolver
    if that does not work.

    For specific problematic cases, the NCI resolver is bypassed and the SMILES
    is returned from a dictionary of values. For O2, the resolver returns the singlet
    form which is inert in RMG. For oxygen, the resolver returns 'O' as the SMILES, which
    is the SMILES for water.
    """
    from rmgpy.molecule import Molecule
    from rmgpy.exceptions import AtomTypeError
    from ssl import SSLError

    known_names = {
        'o2': '[O][O]',
        'oxygen': '[O][O]',
        'benzyl': '[CH2]c1ccccc1',
        'phenyl': '[c]1ccccc1',
        'carbon monoxide': '[C-]#[O+]',
        'co': '[C-]#[O+]',
    }

    # Ensure that input is a string
    identifier = identifier.strip()

    # Return empty string for empty input
    if identifier == "":
        return HttpResponse("", content_type="text/plain", charset='utf-8')

    molecule = Molecule()

    # Check if identifier is an InChI string
    if identifier.startswith('InChI=1'):
        try:
            molecule.from_inchi(identifier)
        except AtomTypeError as e:
            return analyze_atomtype_error(f'{e}')
        except KeyError as e:
            return analyze_element_error(f'{e}')
    elif identifier.lower() in known_names:
        molecule.from_smiles(known_names[identifier.lower()])
    else:
        try:
            # Try parsing as a SMILES string
            molecule.from_smiles(identifier)
        except AtomTypeError as e:
            return analyze_atomtype_error(f'{e}')
        except KeyError as e:
            return analyze_element_error(f'{e}')
        except (IOError, ValueError):
            # Try converting it to a SMILES using the NCI chemical resolver
            url = "https://cactus.nci.nih.gov/chemical/structure/{0}/smiles".format(
                urllib.parse.quote(identifier))
            try:
                f = urllib.request.urlopen(url, timeout=5)
            except urllib.error.URLError as e:
                return HttpResponse(
                    f'Could not identify {identifier}. NCI resolver responded with {e}.',
                    status=404)
            except SSLError:
                return HttpResponse(
                    'NCI resolver timed out, please try again.', status=504)
            smiles = f.read().decode('utf-8')
            try:
                molecule.from_smiles(smiles)
            except AtomTypeError as e:
                return analyze_atomtype_error(f'{e}', cactus_result=smiles)
            except KeyError as e:
                return analyze_element_error(f'{e}', cactus_result=smiles)
            except ValueError as e:
                return HttpResponse(
                    f'Identifier was resolved by NCI resolver (https://cactus.nci.nih.gov). '
                    f'The resolved SMILES is {smiles}, but RMG has trouble parsing this smiles. '
                    f'This can be due to a bad resolution or a bad identifier input.',
                    status=500)

    adjlist = molecule.to_adjacency_list(remove_h=False)
    return HttpResponse(adjlist, content_type="text/plain")