Пример #1
0
def desalt(mol):
    # This molecule escaped my patterns: InChI InChI=1S/2C6H11NO5.O.V/c2*1-3(5(8)9)7(12)4(2)6(10)11;;/h2*3-4,12H,1-2H3,(H,8,9)(H,10,11);;/q;;;+2/p-2/t2*3-,4-;;/m00../s1 gave an error Molecule must be fully connected by covalent bonds.:

    #input is an rdkit mol
    #returns an rdkit mol keeping the biggest component
    #returns original mol if only one component
    #returns a boolean indicated if cleaning was necessary
    mol = MolToInchi(mol)
    mol = MolFromInchi(mol)
    SanitizeMol(mol)
    d = Chem.rdmolops.GetMolFrags(mol)  #these are atom indices
    if len(
            d
    ) == 1:  #If there are fragments or multiple molecules this will be greater than 1
        return mol, False
    my_smiles = Chem.MolToSmiles(mol, True)
    parent_atom_count = 0
    disconnected = my_smiles.split('.')
    #With GetMolFrags, we've already established that there is more than one disconnected structure
    status = False
    for s in disconnected:
        little_mol = Chem.MolFromInchi(
            Chem.MolToInchi(Chem.MolFromSmiles(s, sanitize=False)))
        #Sanitize=True will fail for choline sulfate.  Can't sanitize the radical.
        if little_mol is not None:
            count = little_mol.GetNumAtoms()
            if count > parent_atom_count:
                parent_atom_count = count
                parent_mol = little_mol
                status = True
    return parent_mol, status
Пример #2
0
def parallel_wrapper(mol, rest_inchis, n_total):
    sims = np.zeros(n_total, dtype=np.float32)
    n_rest = len(rest_inchis)
    fill_idx = n_total - n_rest

    for inchi in rest_inchis:
        mol_j = MolFromInchi(inchi)
        sims[fill_idx] = tanimoto_sim(mol, mol_j)
        fill_idx += 1
    return sims
Пример #3
0
    def render_structure(self):
        # Try to render structure from InChI or SMILES
        if RDKIT_AVAILABLE:
            mol = None
            if RDKIT_INCHI and self._inchi:  # Use InChI first
                mol = MolFromInchi(self._inchi)
            elif self._smiles is not None:  # If InChI not available, use SMILES as a fallback
                mol = MolFromSmiles(self._smiles)

            if mol is not None:
                if not mol.GetNumConformers():
                    rdDepictor.Compute2DCoords(mol)
                drawer = rdMolDraw2D.MolDraw2DSVG(self.size().width(),
                                                  self.size().height())
                drawer.DrawMolecule(mol)
                drawer.FinishDrawing()
                svg = drawer.GetDrawingText().replace('svg:', '')
                self.load(QByteArray(svg.encode()))
            else:
                self.load(QByteArray(b''))
        elif OPENBABEL_AVAILABLE:  # If RDkit not available, try to use OpenBabel
            mol = None
            try:
                if OPENBABEL_INCHI and self._inchi:
                    mol = pybel.readstring('inchi', self._inchi)
                elif self._smiles:
                    mol = pybel.readstring('smiles', self._smiles)
            except OSError:
                self.load(QByteArray(b''))
            else:
                if mol is not None:
                    # Convert to svg, code loosely based on _repr_svg_ from pybel's Molecule
                    namespace = "http://www.w3.org/2000/svg"
                    tree = etree.fromstring(mol.write("svg"))
                    svg = tree.find(f"{{{namespace}}}g/{{{namespace}}}svg")
                    self.load(QByteArray(etree.tostring(svg)))
                else:
                    self.load(QByteArray(b''))
        self.btShowStructure.setVisible(False)
Пример #4
0
def smi_to_inchi_with_val(smiles, ovalues):
    inchis = []
    values = []

    for smi, val in zip(smiles, ovalues):
        mol = MolFromSmiles(smi)
        if mol is not None:
            try:
                inchi = MolToInchi(mol)
                m = MolFromInchi(inchi)
                if m is not None:  # ensure rdkit can read an inchi it just wrote...
                    inchis.append(inchi)
                    values.append(val)
            except:
                continue
    return inchis, values
Пример #5
0
def process_cyp(neutralize=False):
    df = pd.read_csv(os.path.join(DATA_PATH, "cyp", "CYP3A4.csv"),
                     header=0,
                     sep=";")
    df["Value"] = [1 if class_ == "Active" else 0 for class_ in df["Class"]]
    inchis, values = smi_to_inchi_with_val(df["SMILES"], df["Value"])
    df = pd.DataFrame({"inchi": inchis, "values": values})
    inchis, values = mean_by_key(df, "inchi", "values")

    if neutralize:
        inchis = [
            MolToInchi(neutralize_atoms(MolFromInchi(inchi), PATTERN))
            for inchi in inchis
        ]

    with open(os.path.join(DATA_PATH, "cyp", "data_cyp.pt"), "wb") as handle:
        pickle.dump([inchis, values], handle)
Пример #6
0
def smiles_from_inchi(inchi: str) -> Union[str, None]:
    """
    Get a SMILES descriptor from an InChI descriptor.
    Uses RDKit for the conversion.

    Args:
        inchi (str): The InChI descriptor.

    Returns:
        str: The SMILES descriptor.
    """
    try:
        rd_mol = MolFromInchi(inchi)
        smiles = MolToSmiles(rd_mol, isomericSmiles=True, canonical=True, allBondsExplicit=False, allHsExplicit=False)
    except:
        return None
    return smiles
Пример #7
0
def sim_matrix(inchis):
    """Computes pairwise similarity matrix between all compounds in the `inchis` list.

    Parameters
    ----------
    inchis : list
        A list of inchi strings
    Returns
    -------
    np.ndarray
    """
    n_total = len(inchis)
    sims = Parallel(n_jobs=-1, verbose=100, backend="multiprocessing")(
        delayed(parallel_wrapper)(MolFromInchi(inchi), inchis[(idx +
                                                               1):], n_total)
        for idx, inchi in enumerate(inchis))
    sims = np.stack(sims)
    sims += sims.copy().T
    sims += np.eye(n_total)
    return sims
Пример #8
0
def process_herg(list_csvs, keep_operators=False, neutralize=False):
    df = pd.read_csv(list_csvs[0], sep="\t")

    for idx, csv in enumerate(list_csvs):
        if idx > 0:
            df_next = pd.read_csv(csv, sep="\t")
            df = pd.concat([df, df_next])

    # filter only IC50, nM, = data.
    condition = (df.Value_type == "IC50") & (df.Unit == "nM")
    if not keep_operators:
        condition = condition & (df.Relation == "=")

    df = df.loc[condition, ["Canonical_smiles", "Value"], ]

    df.Value = -np.log10(df.Value * 1e-9)  # pIC50 conversion
    per_dup, stds = duplicate_analysis(df, "Canonical_smiles", "Value")
    print(
        "Percentage of duplicates for hERG dataset: {:.3f}, with average std.: {:.3f}, and median std.:{:.3f}"
        .format(per_dup, np.mean(stds), np.median(stds)))

    df.drop_duplicates(inplace=True)

    # average values with several measurements
    uq_smiles, uq_values = mean_by_key(df, "Canonical_smiles", "Value")

    # drop faulty molecules
    print("Dropping faulty molecules...")
    inchis, values = smi_to_inchi_with_val(uq_smiles, uq_values)

    if neutralize:
        inchis = [
            MolToInchi(neutralize_atoms(MolFromInchi(inchi), PATTERN))
            for inchi in inchis
        ]

    with open(os.path.join(DATA_PATH, "herg", "data_herg.pt"), "wb") as handle:
        pickle.dump([inchis, values], handle)
Пример #9
0
def is_valid_inchi(inchi: str) -> Tuple[bool, str]:
    """
    Check whether a string represents a valid InChI descriptor.

    Args:
        inchi (str): The string to be checked.

    Returns:
        Tuple[bool, str]:
            - Whether the string represents a valid InChI descriptor.
            - A reason for invalidating the argument.
    """
    if not isinstance(inchi, str):
        # this is important, not only a shortcut, since a try except block does not capture Boost.Python.ArgumentError
        # being raised if the argument does not match the C++ signature.
        return False, f'An InChI descriptor must be a string, got "{inchi}" which is a {type(inchi)}.'
    try:
        rd_mol = MolFromInchi(inchi)
    except Exception as e:
        return False, str(e)
    if rd_mol is None:
        return False, f'Could not generate an RDKit Molecule from InChI "{inchi}"'
    return True, ''
Пример #10
0
def draw_inchi(inchi, imgfile):
    molecule = Chem.AddHs(MolFromInchi(inchi))
    AllChem.EmbedMolecule(molecule)
    AllChem.MMFFOptimizeMolecule(molecule)
    Draw.MolToFile(molecule, imgfile)
Пример #11
0
def process_caco2(neutralize=False):
    # peerJ data
    df1 = pd.read_excel(
        os.path.join(DATA_PATH, "caco2", "peerj-03-1405-s001.xls"))
    df1 = df1.loc[:, ["InChI", "Caco-2 Papp * 10^6 cm/s"]]
    df1.dropna(inplace=True)
    df1["Value"] = -np.log10(df1["Caco-2 Papp * 10^6 cm/s"] * 1e-6)

    new_inchis = []
    values = []

    for inchi, val in zip(df1["InChI"], df1["Value"]):
        mol = MolFromInchi(inchi)
        if mol is not None:
            new_inchis.append(
                MolToInchi(mol))  # ensure same inchi specification
            values.append(val)

    df1 = pd.DataFrame({"InChI": new_inchis, "Value": values})

    # plos one data
    df2 = pd.read_csv(os.path.join(DATA_PATH, "caco2", "caco2perm_pone.csv"))
    df2["Value"] = -np.log10(df2["Papp (Caco-2) [cm/s]"])
    df2 = df2.loc[:, ["name", "Value"]]
    df2.dropna(inplace=True)

    print("Querying InchI strings from IUPAC names...")
    inchis = []
    values = []

    for mol_name, val in tqdm(zip(df2["name"], df2["Value"]), total=len(df2)):
        ans = requests.get(IUPAC_REST.format(mol_name))
        if ans.status_code == 200:
            inchi = ans.content.decode("utf8")
            new_mol = MolFromInchi(inchi)  # ensure same inchi specification
            if new_mol is not None:
                new_inchi = MolToInchi(new_mol)
                inchis.append(new_inchi)
                values.append(val)

    inchis.extend(df1["InChI"].tolist())
    values.extend(df1["Value"].tolist())

    df = pd.DataFrame({"inchi": inchis, "values": values})
    per_dup, stds = duplicate_analysis(df, "inchi", "values")

    print(
        "Percentage of duplicates for CaCO2 dataset: {:.5f}, with average std.: {:.3f}, and median std.:{:.3f}"
        .format(per_dup, np.mean(stds), np.median(stds)))

    uq_inchi = pd.unique(df["inchi"]).tolist()

    print("Averaging values and ensuring rdkit readability...")
    inchis = []
    values = []

    # Average values and make sure rdkit can read all inchis
    for inchi in tqdm(uq_inchi):
        mol = MolFromInchi(inchi)
        if mol is not None:
            df_uq = df.loc[df["inchi"] == inchi]
            inchis.append(inchi)
            values.append(df_uq["values"].mean())

    if neutralize:
        inchis = [
            MolToInchi(neutralize_atoms(MolFromInchi(inchi), PATTERN))
            for inchi in inchis
        ]

    with open(os.path.join(DATA_PATH, "caco2", "data_caco2.pt"),
              "wb") as handle:
        pickle.dump([inchis, values], handle)
Пример #12
0
def process_ppb(neutralize=False):
    inchis = []
    values = []

    # first dataset
    xlsxs = glob(
        os.path.join(DATA_PATH, "ppb", "11095_2013_1023_MOESM[2-4]_ESM.xlsx"))
    for idx, xlsx in enumerate(xlsxs):
        ppb_col = "Experimental_%PPB" if idx < 3 else "Experimental PPB_[%]"
        df1 = pd.read_excel(xlsx)
        df1 = df1.loc[:, ["SMILES", ppb_col]]
        inchis_1, values_1 = smi_to_inchi_with_val(df1["SMILES"], df1[ppb_col])
        inchis.extend(inchis_1)
        values.extend(values_1)

    # second dataset
    df2 = pd.read_excel(os.path.join(DATA_PATH, "ppb",
                                     "ci6b00291_si_001.xlsx"))
    df2 = df2.loc[:, ["SMILES", "Fub"]]
    df2["Value"] = 100 * (1 - df2["Fub"])
    inchis_2, values_2 = smi_to_inchi_with_val(df2["SMILES"], df2["Value"])
    inchis.extend(inchis_2)
    values.extend(values_2)

    # third dataset
    df3 = pd.read_excel(
        os.path.join(DATA_PATH, "ppb",
                     "cmdc201700582-sup-0001-misc_information.xlsx"),
        sheet_name=4,
    )
    df3 = df3.loc[:,
                  ["SMILES", "PPB_Traditional_assay(serve as the true value)"]]
    df3["Value"] = 100 * df3["PPB_Traditional_assay(serve as the true value)"]
    inchis_3, values_3 = smi_to_inchi_with_val(df3["SMILES"], df3["Value"])
    inchis.extend(inchis_3)
    values.extend(values_3)

    # fourth dataset
    df4 = pd.read_excel(
        os.path.join(DATA_PATH, "ppb", "jm051245vsi20061025_033631.xls"))
    df4 = df4.loc[:, ["NAME (Drug or chemical  name)", "PBexp(%)"]]

    for mol_name, val in tqdm(zip(df4["NAME (Drug or chemical  name)"],
                                  df4["PBexp(%)"]),
                              total=len(df4)):
        ans = requests.get(IUPAC_REST.format(mol_name))
        if ans.status_code == 200:
            inchi = ans.content.decode("utf8")
            mol = MolFromInchi(inchi)
            # Use same inchi specification as rdkit...
            new_inchi = MolToInchi(mol)
            new_mol = MolFromInchi(new_inchi)
            if new_mol is not None:
                inchis.append(new_inchi)
                values.append(val)

    # fifth dataset
    df5 = pd.read_excel(os.path.join(DATA_PATH, "ppb",
                                     "mp8b00785_si_002.xlsx"))
    df5 = df5.loc[:, ["canonical_smiles", "fup"]]
    df5["Value"] = 100 * (1 - df5["fup"])
    inchis_5, values_5 = smi_to_inchi_with_val(df5["canonical_smiles"],
                                               df5["Value"])
    inchis.extend(inchis_5)
    values.extend(values_5)

    # sixth dataset
    df6 = pd.read_html(os.path.join(DATA_PATH, "ppb", "kratochwil2002.html"),
                       header=0)[0]
    df6 = df6.loc[:, ["Compound", "fb (%)b"]].dropna()

    for mol_name, val in tqdm(zip(df6["Compound"], df6["fb (%)b"]),
                              total=len(df6)):
        ans = requests.get(IUPAC_REST.format(mol_name))
        if ans.status_code == 200:
            inchi = ans.content.decode(
                "utf8")  # maybe not the same standard as rdkit...
            mol = MolFromInchi(inchi)
            if mol is not None:
                new_inchi = MolToInchi(mol)
                new_mol = MolFromInchi(new_inchi)
                if new_mol is not None:
                    inchis.append(new_inchi)
                    values.append(val)

    # join them all together
    df = pd.DataFrame({"inchi": inchis, "values": values})

    # checking duplicates
    per_dup, stds = duplicate_analysis(df, "inchi", "values")
    print(
        "Percentage of duplicates for PPB dataset: {:.5f}, with average std.: {}, and median std.:{}"
        .format(per_dup, np.mean(stds), np.median(stds)))

    # average values w. equal inchi and check readability
    print("Averaging values and ensuring rdkit readability...")
    inchis, values = mean_by_key(df, "inchi", "values")

    inchis, values = ensure_readability(inchis, values, MolFromInchi)

    if neutralize:
        inchis = [
            MolToInchi(neutralize_atoms(MolFromInchi(inchi), PATTERN))
            for inchi in inchis
        ]

    with open(os.path.join(DATA_PATH, "ppb", "data_ppb.pt"), "wb") as handle:
        pickle.dump([inchis, values], handle)
Пример #13
0
if __name__ == "__main__":
    mws = []
    logps = []
    nhdonors = []
    values = []
    dataset = []

    for data in list(LABEL_GUIDE.keys()) + ["cyp"]:
        with open(os.path.join(DATA_PATH, data, f"data_{data}.pt"),
                  "rb") as handle:
            inchis, v = pickle.load(handle)

        values.extend(v)

        for inchi in tqdm(inchis):
            mol = MolFromInchi(inchi)
            mws.append(MolWt(mol))
            logps.append(MolLogP(mol))
            nhdonors.append(NumHDonors(mol))
            dataset.append(DATASET_GUIDE[data])

    df = pd.DataFrame({
        "Molecular weight (gr./mol)": mws,
        r"aLog$P$": logps,
        "No. hydrogen donors": nhdonors,
        "values": values,
        "dataset": dataset,
    })

    f, axs = plt.subplots(1, 3, figsize=(18, 6))
Пример #14
0
def InchiToPixmap(inchi: str, size: QSize):
    if size.isNull() or not inchi:
        return QPixmap()

    return MolToPixmap(MolFromInchi(inchi), size)
Пример #15
0
        elif task == "binary":
            base_model = RandomForestClassifier

        else:
            raise ValueError("Task not supported")

        with open(
            os.path.join(DATA_PATH, f"{data}", f"data_{data}.pt"), "rb"
        ) as handle:
            inchis, values = pickle.load(handle)

        inchis = np.array(inchis)
        values = np.array(values)[:, np.newaxis]
        kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=1337)

        fps = np.vstack([featurize_ecfp4(MolFromInchi(inchi)) for inchi in inchis])

        for idx_split, (idx_train, idx_test) in enumerate(kf.split(inchis)):
            print(f"Fold {idx_split + 1}/{N_FOLDS}...")
            fps_train, fps_test = fps[idx_train, :], fps[idx_test, :]
            values_train, values_test = values[idx_train, :], values[idx_test, :]

            rf = base_model(n_estimators=N_ESTIMATORS, n_jobs=-1)
            rf.fit(fps_train, values_train.squeeze())

            if task == "regression":
                yhat_test = rf.predict(fps_test)
            elif task == "binary":
                yhat_test = rf.predict_proba(fps_test)

            np.save(
Пример #16
0
        elif task == "binary":
            base_model = RandomForestClassifier

        else:
            raise ValueError("Task not supported")

        with open(os.path.join(DATA_PATH, f"{data}", f"data_{data}.pt"),
                  "rb") as handle:
            inchis, values = pickle.load(handle)

        inchis = np.array(inchis)
        values = np.array(values)[:, np.newaxis]
        kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=1337)

        fps = np.vstack(
            [featurize_ecfp4(MolFromInchi(inchi)) for inchi in inchis])

        for idx_split, (idx_train, idx_test) in enumerate(kf.split(inchis)):
            print(f"Fold {idx_split + 1}/{N_FOLDS}...")
            fps_train, fps_test = fps[idx_train, :], fps[idx_test, :]
            values_train, values_test = values[idx_train, :], values[
                idx_test, :]

            rf = base_model(n_estimators=N_ESTIMATORS, n_jobs=-1)
            rf.fit(fps_train, values_train.squeeze())

            if task == "regression":
                yhat_test = rf.predict(fps_test)
            elif task == "binary":
                yhat_test = rf.predict_proba(fps_test)
Пример #17
0
def process(init_data, use_cache=True):
    '''
    
    Examples
    --------
    
    >>> res = process({'CAS': '10170-69-1', 'synonyms': ['14267-36-8', 'NSC 22319'], 'name': 'Manganese, decacarbonyldi-, (Mn-Mn)'})
    >>> res['inchi'], res['smiles'], res['cid'], res['CAS']
    ('InChI=1S/10CO.2Mn/c10*1-2;;', '[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[C-]#[O+].[Mn].[Mn]', 517769, '10170-69-1')
    '''
    # print(locals())
    init_data = init_data.copy()
    cc = cc_CAS = cc_name = cc_inchi = cc_inchikey = cc_smiles = cc_synonyms = cc_deprecated_CASs = None
    if 'CAS' in init_data:
        try:
            cc = common_chemistry_data(init_data['CAS'])
            cc_CAS, cc_name, cc_inchi, cc_inchikey, cc_smiles, cc_synonyms, cc_deprecated_CASs = cc
        except ValueError:
            # Compund is not in common chemistry; this is OK
            pass

    cid = iupac_name = p_MW = p_inchi = p_inchikey = p_smiles = p_formula = p_synonyms = None

    if init_data.get('mol', None) is not None:
        # If not in common chemistry or no InChi there, but if we have a mol file, get the inchi and inchikey for the
        # pubchem lookup
        mol = Chem.MolFromMolFile(init_data['mol'])
        if mol is not None:
            init_data['inchi'] = MolToInchi(mol)
            init_data['inchikey'] = InchiToInchiKey(init_data['inchi'])

    can_search_pubchem = (init_data.get('pubchem') is not None
                          or init_data.get('CASRN', cc_CAS) is not None
                          or init_data.get('inchi', cc_inchi) is not None
                          or init_data.get('inchikey', cc_inchikey) is not None
                          or init_data.get('smiles', cc_smiles) is not None)

    if can_search_pubchem:
        try:
            p = find_pubchem_from_ids(
                pubchem=init_data.get('pubchem'),
                CASRN=init_data.get('CASRN', cc_CAS),
                inchi=init_data.get('inchi', cc_inchi),
                inchikey=init_data.get('inchikey', cc_inchikey),
                smiles=init_data.get('smiles', cc_smiles),
                use_cache=use_cache)
        except Exception as e:
            p = None
            print(e, 'exception')
        if p is not None:
            cid, iupac_name, p_MW, p_inchi, p_inchikey, p_smiles, p_formula, p_synonyms = p
    # print(locals())
    mol = None
    # Be aware some smiles descriptions are wrong
    # Start with user overridding
    if 'mol' in init_data:
        mol = Chem.MolFromMolFile(init_data['mol'])
    if mol is None and 'smiles' in init_data:
        mol = Chem.MolFromSmiles(init_data['smiles'])
    if mol is None and 'inchi' in init_data:
        mol = MolFromInchi(
            init_data['inchi']) if init_data['inchi'].startswith(
                "InChI=1S/") else MolFromInchi("InChI=1S/" +
                                               init_data['inchi'])
    # Trust common chemistry next
    if mol is None and cc_smiles is not None:
        mol = Chem.MolFromSmiles(cc_smiles)
    if mol is None and cc_inchi is not None:
        mol = MolFromInchi(cc_inchi) if cc_inchi.startswith(
            "InChI=1S/") else MolFromInchi("InChI=1S/" + cc_inchi)
    # Did we pull up the structure from pubchem??
    if mol is None and p_smiles is not None:
        mol = Chem.MolFromSmiles(p_smiles)
    if mol is None and p_inchi is not None:
        mol = MolFromInchi(p_inchi) if p_inchi.startswith(
            "InChI=1S/") else MolFromInchi("InChI=1S/" + p_inchi)
    if mol is None:
        raise ValueError("No structure found")

    smiles = Chem.MolToSmiles(mol, True)
    inchi = MolToInchi(mol)
    inchikey = InchiToInchiKey(inchi)
    #MW = Descriptors.ExactMolWt(mol)
    formula = CalcMolFormula(mol, True, True)
    formula = serialize_formula(formula)
    MW = molecular_weight(nested_formula_parser(formula))

    # print(inchi, cc_inchi, p_inchi)
    # print(inchikey, cc_inchikey, p_inchikey)
    # print(smiles, cc_smiles, p_smiles)

    # output values
    if 'pubchem' in init_data:
        cid = init_data['pubchem']
    elif cid is None:
        cid = -1

    if cc_CAS is not None:
        CAS = cc_CAS
    elif 'CAS' in init_data:
        CAS = init_data['CAS']
    else:
        raise ValueError("CAS culd not be found")

    if 'formula' in init_data:
        # Override rdkit
        formula = init_data['formula']

    if 'MW' in init_data:
        # Override rdkit
        MW = init_data['MW']

    if 'smiles' in init_data:
        smiles = init_data['smiles']
    if 'inchi' in init_data:
        inchi = init_data['inchi']
    if 'inchikey' in init_data:
        inchikey = init_data['inchikey']

    if inchikey == '*' or smiles == '*' or inchi == '*':
        raise ValueError("Failure in rdkit")

    # Do we have a name specified in the settings?
    if 'name' in init_data:
        name = init_data['name']
    elif cc_name is not None:
        name = cc_name
    elif iupac_name is not None:
        name = iupac_name
    else:
        raise ValueError("There is no name for this compound")

    synonyms = []
    if cc_synonyms is not None:
        synonyms += cc_synonyms
    if cc_deprecated_CASs is not None:
        synonyms += cc_deprecated_CASs
    if p_synonyms is not None:
        synonyms += p_synonyms
    if 'synonyms' in init_data:
        synonyms += init_data['synonyms']
    synonyms = list(set(synonyms))
    if name in synonyms:
        synonyms.remove(name)
    if synonyms:

        def key_sort_str(s):
            return len(s), s.lower()

        synonyms = sorted(synonyms, key=key_sort_str)
        # synonyms = natsorted(synonyms)
    # synonyms = []

    return {
        'cid': cid,
        'CAS': CAS,
        'formula': formula,
        'MW': MW,
        'smiles': smiles,
        'inchi': inchi,
        'inchikey': inchikey,
        'name': name,
        'synonyms': synonyms
    }
def smiles_from_inchi(inchi):
    mol = MolFromInchi(inchi)
    return MolToSmiles(mol, isomericSmiles=True)