示例#1
0
def check_ligand(file_path):
    bool = False
    if os.path.isfile(file_path):
        suppl = Chem.SDMolSupplier(file_path)
        for mol in suppl:
            if mol is not None:
                # components of rule
                hydrogen_bond_doner = True if Lipinski.NumHDonors(
                    mol) <= 5 else False
                hydrogen_bond_acceptors = True if Lipinski.NumHAcceptors(
                    mol) <= 10 else False
                molecular_mass = True if Descriptors.ExactMolWt(
                    mol) <= 500 else False
                octanol_water_partition_coefficient_logP = True if Crippen.MolLogP(
                    mol) <= 5 else False
                components_rank = hydrogen_bond_doner + hydrogen_bond_acceptors + molecular_mass + octanol_water_partition_coefficient_logP

                # variants
                partition_coefficient_logP = True if -0.4 <= Crippen.MolLogP(
                    mol) <= 5.6 else False
                molar_refractivity = True if 40 <= Crippen.MolMR(
                    mol) <= 130 else False
                molecular_weight = True if 180 <= Descriptors.ExactMolWt(
                    mol) <= 500 else False
                number_of_atoms = True if 20 <= Lipinski.HeavyAtomCount(
                    mol) <= 70 else False
                polar_surface_area = True if MolSurf.TPSA(
                    mol) <= 140 else False
                variants_rank = partition_coefficient_logP + molar_refractivity + molecular_weight + number_of_atoms + polar_surface_area

                if (components_rank == 4) and (variants_rank == 4
                                               or variants_rank == 5):
                    bool = True
    return bool
示例#2
0
def getAromaticProportion(m):
    aromatic_list = [m.GetAtomWithIdx(i).GetIsAromatic() for i in range(m.GetNumAtoms())]
    aromatic = 0
    for i in aromatic_list:
        if i:
            aromatic += 1
    heavy_atom = Lipinski.HeavyAtomCount(m)
    return aromatic / heavy_atom
示例#3
0
def desalt_compound(smiles):
    """Function to desalt compound a given smiles string
    Takes a smiles string
    Returns a desalted smiles string."""
    # Chose the biggest fragment, after splitting into fragments
    return sorted([(x, Lipinski.HeavyAtomCount(Chem.MolFromSmiles(x)))
                   for x in smiles.split(".")],
                  key=lambda x: x[1],
                  reverse=True)[0][0]
示例#4
0
    def getDiscriptor(self):
        from rdkit.Chem import Crippen
        from rdkit import Chem
        import pandas as pd
        from rdkit.Chem import Descriptors, Lipinski
        import os

        os.chdir(r"G:\マイドライブ\Data\Meram Chronic Data")
        df = pd.read_csv('extChronicStrcture.csv', engine='python')
        df = df[['CAS', 'canonical_smiles']]
        df = df.dropna(how='any')

        #df = pd.read_csv('extractInchi.csv',header=None)
        columns = [
            'CAS', 'weight', 'logP', 'RotatableBonds', 'HeavyAtomCounts',
            'AromProp', 'TPSA', 'HDonor', 'HAcceptors', 'FractionCSP3',
            'AromaticCarbocycles', 'AromaticHeterocycles'
        ]
        CAS = df['CAS']
        SMILES = df['canonical_smiles']

        resultDf = pd.DataFrame(columns=columns)
        for cas, smiles in zip(CAS, SMILES):
            mol = Chem.MolFromSmiles(smiles)
            wt = Descriptors.MolWt(mol)
            rot = Lipinski.NumRotatableBonds(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            logp = Crippen.MolLogP(mol)
            aromaticHeavyatoms = len(
                mol.GetSubstructMatches(Chem.MolFromSmarts('[a]')))
            numAtoms = mol.GetNumAtoms()
            aromprop = float(aromaticHeavyatoms / numAtoms)
            TPSA = Descriptors.TPSA(mol)
            HDonors = Descriptors.NumHDonors(mol)
            HAcceptors = Descriptors.NumHAcceptors(mol)

            FractionCSP3 = Descriptors.FractionCSP3(mol)
            AromaticCarbocycles = Descriptors.NumAromaticCarbocycles(mol)
            AromaticHeterocycles = Descriptors.NumAromaticHeterocycles(mol)

            (print(HDonors, HAcceptors))
            tempDf = pd.DataFrame([[
                cas, wt, logp, rot, heavy, aromprop, TPSA, HDonors, HAcceptors,
                FractionCSP3, AromaticCarbocycles, AromaticHeterocycles
            ]],
                                  columns=columns)
            resultDf = pd.concat([resultDf, tempDf])
        resultDf.to_csv('Descriptors.csv', index=False)
示例#5
0
def PhyChem(smiles):
    """ Calculating the 19D physicochemical descriptors for each molecules,
    the value has been normalized with Gaussian distribution.

    Arguments:
        smiles (list): list of SMILES strings.
    Returns:
        props (ndarray): m X 19 matrix as normalized PhysChem descriptors.
            m is the No. of samples
    """
    props = []
    for smile in smiles:
        mol = Chem.MolFromSmiles(smile)
        try:
            MW = desc.MolWt(mol)
            LOGP = Crippen.MolLogP(mol)
            HBA = Lipinski.NumHAcceptors(mol)
            HBD = Lipinski.NumHDonors(mol)
            rotable = Lipinski.NumRotatableBonds(mol)
            amide = AllChem.CalcNumAmideBonds(mol)
            bridge = AllChem.CalcNumBridgeheadAtoms(mol)
            heteroA = Lipinski.NumHeteroatoms(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            spiro = AllChem.CalcNumSpiroAtoms(mol)
            FCSP3 = AllChem.CalcFractionCSP3(mol)
            ring = Lipinski.RingCount(mol)
            Aliphatic = AllChem.CalcNumAliphaticRings(mol)
            aromatic = AllChem.CalcNumAromaticRings(mol)
            saturated = AllChem.CalcNumSaturatedRings(mol)
            heteroR = AllChem.CalcNumHeterocycles(mol)
            TPSA = MolSurf.TPSA(mol)
            valence = desc.NumValenceElectrons(mol)
            mr = Crippen.MolMR(mol)
            # charge = AllChem.ComputeGasteigerCharges(mol)
            prop = [
                MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy,
                spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR,
                TPSA, valence, mr
            ]
        except Exception:
            print(smile)
            prop = [0] * 19
        props.append(prop)
    props = np.array(props)
    props = Scaler().fit_transform(props)
    return props
示例#6
0
文件: models.py 项目: vorsilam/MI_ADM
    def __init__(self, *args, **kwargs):
        if len(args) > 2:
            super(Compound, self).__init__(*args, **kwargs)
            return
        mol_as_RDmol = args[0] if len(args) > 0 else None
        if not mol_as_RDmol:
            mol_as_RDmol = kwargs['mol_as_RDmol'] if 'mol_as_RDmol' in kwargs else None
        if not mol_as_RDmol:
            raise RuntimeError("No RDMol specified")
        description = args[1] if len(args) > 1 else None
        if not description:
            description = kwargs['description'] if 'description' in kwargs else ''
        new_kwargs = dict()
        new_kwargs['unique_id'] = self._generate_id()
        new_kwargs['smiles'] = Chem.MolToSmiles(mol_as_RDmol, isomericSmiles=True, canonical=True)
        new_kwargs['inchi'] = Chem.MolToInchi(mol_as_RDmol)
        new_kwargs['inchi_key'] = Chem.InchiToInchiKey(new_kwargs['inchi'])
        new_kwargs['mol_weight_exact'] = Descriptors.ExactMolWt(mol_as_RDmol)
        new_kwargs['heavy_atoms_count'] = Lipinski.HeavyAtomCount(mol_as_RDmol)
        new_kwargs['ring_count'] = Lipinski.RingCount(mol_as_RDmol)
        new_kwargs['mol'] = mol_as_RDmol

        super(Compound, self).__init__(description=description, **new_kwargs)
示例#7
0
    def extract(x, from_smiles):
        if from_smiles:
            mol = Chem.MolFromSmiles(x)
        else:
            mol = x

        if (mol is None) or (len(mol.GetAtoms()) == 0):
            if include_3D:
                return [0] * 29
            else:
                return [0] * 24
        else:
            logP = Crippen.MolLogP(mol)
            refractivity = Crippen.MolMR(mol)

            weight = Descriptors.MolWt(mol)
            exact_weight = Descriptors.ExactMolWt(mol)
            heavy_weight = Descriptors.HeavyAtomMolWt(mol)
            heavy_count = Lipinski.HeavyAtomCount(mol)
            nhoh_count = Lipinski.NHOHCount(mol)
            no_count = Lipinski.NOCount(mol)
            hacceptor_count = Lipinski.NumHAcceptors(mol)
            hdonor_count = Lipinski.NumHDonors(mol)
            hetero_count = Lipinski.NumHeteroatoms(mol)
            rotatable_bond_count = Lipinski.NumRotatableBonds(mol)
            valance_electron_count = Descriptors.NumValenceElectrons(mol)
            amide_bond_count = rdMolDescriptors.CalcNumAmideBonds(mol)
            aliphatic_ring_count = Lipinski.NumAliphaticRings(mol)
            aromatic_ring_count = Lipinski.NumAromaticRings(mol)
            saturated_ring_count = Lipinski.NumSaturatedRings(mol)
            aliphatic_cycle_count = Lipinski.NumAliphaticCarbocycles(mol)
            aliphaticHetero_cycle_count = Lipinski.NumAliphaticHeterocycles(
                mol)
            aromatic_cycle_count = Lipinski.NumAromaticCarbocycles(mol)
            aromaticHetero_cycle_count = Lipinski.NumAromaticHeterocycles(mol)
            saturated_cycle_count = Lipinski.NumSaturatedCarbocycles(mol)
            saturatedHetero_cycle_count = Lipinski.NumSaturatedHeterocycles(
                mol)

            tpsa = rdMolDescriptors.CalcTPSA(mol)

            if include_3D:
                mol_3D = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol_3D)
                AllChem.MMFFOptimizeMolecule(mol_3D)
                eccentricity = rdMolDescriptors.CalcEccentricity(mol_3D)
                asphericity = rdMolDescriptors.CalcAsphericity(mol_3D)
                spherocity = rdMolDescriptors.CalcSpherocityIndex(mol_3D)
                inertial = rdMolDescriptors.CalcInertialShapeFactor(mol_3D)
                gyration = rdMolDescriptors.CalcRadiusOfGyration(mol_3D)

                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa, eccentricity,
                    asphericity, spherocity, inertial, gyration
                ]
            else:
                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa
                ]
示例#8
0
def calc_rdkit(mol):
    descriptors = pd.Series(
        np.array([
            Crippen.MolLogP(mol),
            Crippen.MolMR(mol),
            Descriptors.FpDensityMorgan1(mol),
            Descriptors.FpDensityMorgan2(mol),
            Descriptors.FpDensityMorgan3(mol),
            Descriptors.FractionCSP3(mol),
            Descriptors.HeavyAtomMolWt(mol),
            Descriptors.MaxAbsPartialCharge(mol),
            Descriptors.MaxPartialCharge(mol),
            Descriptors.MinAbsPartialCharge(mol),
            Descriptors.MinPartialCharge(mol),
            Descriptors.MolWt(mol),
            Descriptors.NumRadicalElectrons(mol),
            Descriptors.NumValenceElectrons(mol),
            EState.EState.MaxAbsEStateIndex(mol),
            EState.EState.MaxEStateIndex(mol),
            EState.EState.MinAbsEStateIndex(mol),
            EState.EState.MinEStateIndex(mol),
            EState.EState_VSA.EState_VSA1(mol),
            EState.EState_VSA.EState_VSA10(mol),
            EState.EState_VSA.EState_VSA11(mol),
            EState.EState_VSA.EState_VSA2(mol),
            EState.EState_VSA.EState_VSA3(mol),
            EState.EState_VSA.EState_VSA4(mol),
            EState.EState_VSA.EState_VSA5(mol),
            EState.EState_VSA.EState_VSA6(mol),
            EState.EState_VSA.EState_VSA7(mol),
            EState.EState_VSA.EState_VSA8(mol),
            EState.EState_VSA.EState_VSA9(mol),
            Fragments.fr_Al_COO(mol),
            Fragments.fr_Al_OH(mol),
            Fragments.fr_Al_OH_noTert(mol),
            Fragments.fr_aldehyde(mol),
            Fragments.fr_alkyl_carbamate(mol),
            Fragments.fr_alkyl_halide(mol),
            Fragments.fr_allylic_oxid(mol),
            Fragments.fr_amide(mol),
            Fragments.fr_amidine(mol),
            Fragments.fr_aniline(mol),
            Fragments.fr_Ar_COO(mol),
            Fragments.fr_Ar_N(mol),
            Fragments.fr_Ar_NH(mol),
            Fragments.fr_Ar_OH(mol),
            Fragments.fr_ArN(mol),
            Fragments.fr_aryl_methyl(mol),
            Fragments.fr_azide(mol),
            Fragments.fr_azo(mol),
            Fragments.fr_barbitur(mol),
            Fragments.fr_benzene(mol),
            Fragments.fr_benzodiazepine(mol),
            Fragments.fr_bicyclic(mol),
            Fragments.fr_C_O(mol),
            Fragments.fr_C_O_noCOO(mol),
            Fragments.fr_C_S(mol),
            Fragments.fr_COO(mol),
            Fragments.fr_COO2(mol),
            Fragments.fr_diazo(mol),
            Fragments.fr_dihydropyridine(mol),
            Fragments.fr_epoxide(mol),
            Fragments.fr_ester(mol),
            Fragments.fr_ether(mol),
            Fragments.fr_furan(mol),
            Fragments.fr_guanido(mol),
            Fragments.fr_halogen(mol),
            Fragments.fr_hdrzine(mol),
            Fragments.fr_hdrzone(mol),
            Fragments.fr_HOCCN(mol),
            Fragments.fr_imidazole(mol),
            Fragments.fr_imide(mol),
            Fragments.fr_Imine(mol),
            Fragments.fr_isocyan(mol),
            Fragments.fr_isothiocyan(mol),
            Fragments.fr_ketone(mol),
            Fragments.fr_ketone_Topliss(mol),
            Fragments.fr_lactam(mol),
            Fragments.fr_lactone(mol),
            Fragments.fr_methoxy(mol),
            Fragments.fr_morpholine(mol),
            Fragments.fr_N_O(mol),
            Fragments.fr_Ndealkylation1(mol),
            Fragments.fr_Ndealkylation2(mol),
            Fragments.fr_NH0(mol),
            Fragments.fr_NH1(mol),
            Fragments.fr_NH2(mol),
            Fragments.fr_Nhpyrrole(mol),
            Fragments.fr_nitrile(mol),
            Fragments.fr_nitro(mol),
            Fragments.fr_nitro_arom(mol),
            Fragments.fr_nitro_arom_nonortho(mol),
            Fragments.fr_nitroso(mol),
            Fragments.fr_oxazole(mol),
            Fragments.fr_oxime(mol),
            Fragments.fr_para_hydroxylation(mol),
            Fragments.fr_phenol(mol),
            Fragments.fr_phenol_noOrthoHbond(mol),
            Fragments.fr_phos_acid(mol),
            Fragments.fr_phos_ester(mol),
            Fragments.fr_piperdine(mol),
            Fragments.fr_piperzine(mol),
            Fragments.fr_priamide(mol),
            Fragments.fr_prisulfonamd(mol),
            Fragments.fr_pyridine(mol),
            Fragments.fr_quatN(mol),
            Fragments.fr_SH(mol),
            Fragments.fr_sulfide(mol),
            Fragments.fr_sulfonamd(mol),
            Fragments.fr_sulfone(mol),
            Fragments.fr_term_acetylene(mol),
            Fragments.fr_tetrazole(mol),
            Fragments.fr_thiazole(mol),
            Fragments.fr_thiocyan(mol),
            Fragments.fr_thiophene(mol),
            Fragments.fr_unbrch_alkane(mol),
            Fragments.fr_urea(mol),
            GraphDescriptors.BalabanJ(mol),
            GraphDescriptors.BertzCT(mol),
            GraphDescriptors.Chi0(mol),
            GraphDescriptors.Chi0n(mol),
            GraphDescriptors.Chi0v(mol),
            GraphDescriptors.Chi1(mol),
            GraphDescriptors.Chi1n(mol),
            GraphDescriptors.Chi1v(mol),
            GraphDescriptors.Chi2n(mol),
            GraphDescriptors.Chi2v(mol),
            GraphDescriptors.Chi3n(mol),
            GraphDescriptors.Chi3v(mol),
            GraphDescriptors.Chi4n(mol),
            GraphDescriptors.Chi4v(mol),
            GraphDescriptors.HallKierAlpha(mol),
            GraphDescriptors.Ipc(mol),
            GraphDescriptors.Kappa1(mol),
            GraphDescriptors.Kappa2(mol),
            GraphDescriptors.Kappa3(mol),
            Lipinski.HeavyAtomCount(mol),
            Lipinski.NHOHCount(mol),
            Lipinski.NOCount(mol),
            Lipinski.NumAliphaticCarbocycles(mol),
            Lipinski.NumAliphaticHeterocycles(mol),
            Lipinski.NumAliphaticRings(mol),
            Lipinski.NumAromaticCarbocycles(mol),
            Lipinski.NumAromaticHeterocycles(mol),
            Lipinski.NumAromaticRings(mol),
            Lipinski.NumHAcceptors(mol),
            Lipinski.NumHDonors(mol),
            Lipinski.NumHeteroatoms(mol),
            Lipinski.NumRotatableBonds(mol),
            Lipinski.NumSaturatedCarbocycles(mol),
            Lipinski.NumSaturatedHeterocycles(mol),
            Lipinski.NumSaturatedRings(mol),
            Lipinski.RingCount(mol),
            MolSurf.LabuteASA(mol),
            MolSurf.PEOE_VSA1(mol),
            MolSurf.PEOE_VSA10(mol),
            MolSurf.PEOE_VSA11(mol),
            MolSurf.PEOE_VSA12(mol),
            MolSurf.PEOE_VSA13(mol),
            MolSurf.PEOE_VSA14(mol),
            MolSurf.PEOE_VSA2(mol),
            MolSurf.PEOE_VSA3(mol),
            MolSurf.PEOE_VSA4(mol),
            MolSurf.PEOE_VSA5(mol),
            MolSurf.PEOE_VSA6(mol),
            MolSurf.PEOE_VSA7(mol),
            MolSurf.PEOE_VSA8(mol),
            MolSurf.PEOE_VSA9(mol),
            MolSurf.SlogP_VSA1(mol),
            MolSurf.SlogP_VSA10(mol),
            MolSurf.SlogP_VSA11(mol),
            MolSurf.SlogP_VSA12(mol),
            MolSurf.SlogP_VSA2(mol),
            MolSurf.SlogP_VSA3(mol),
            MolSurf.SlogP_VSA4(mol),
            MolSurf.SlogP_VSA5(mol),
            MolSurf.SlogP_VSA6(mol),
            MolSurf.SlogP_VSA7(mol),
            MolSurf.SlogP_VSA8(mol),
            MolSurf.SlogP_VSA9(mol),
            MolSurf.SMR_VSA1(mol),
            MolSurf.SMR_VSA10(mol),
            MolSurf.SMR_VSA2(mol),
            MolSurf.SMR_VSA3(mol),
            MolSurf.SMR_VSA4(mol),
            MolSurf.SMR_VSA5(mol),
            MolSurf.SMR_VSA6(mol),
            MolSurf.SMR_VSA7(mol),
            MolSurf.SMR_VSA8(mol),
            MolSurf.SMR_VSA9(mol),
            MolSurf.TPSA(mol)
        ]))
    return descriptors
示例#9
0
def get_num_heavy_atoms_(mol: Mol) -> int:
    """Returns the number of heavy atoms in the molecule"""
    return Lipinski.HeavyAtomCount(mol)
示例#10
0
def smiles_to_all_labels(df):

    smilesList = df['SMILES']
    feature_df = df.copy()

    # get all functions of modules
    all_lipinski = inspect.getmembers(l, inspect.isfunction)
    all_fragments = inspect.getmembers(f, inspect.isfunction)

    # bad features have the same value for all our compounds
    bad_features = []
    for (columnName, columnData) in df.iteritems():
        if (len(set(columnData.values)) == 1):
            bad_features.append(columnName)

    # add fragment features
    for i in range(len(all_fragments)):
        new_col = []

        # exclude attributes which start with _ and exclude bad features
        if all_fragments[i][0].startswith(
                '_') == False and all_fragments[i][0] not in bad_features:

            for smiles in smilesList:
                molecule = chem.MolFromSmiles(smiles)
                mol_method = all_fragments[i][1](molecule)
                new_col.append(mol_method)

            # add new col with feature name to our df
            feature_df[all_fragments[i][0]] = new_col

    print('fragments over')

    # add lipinski features
    for i in range(len(all_lipinski)):

        new_col = []
        if all_lipinski[i][0].startswith(
                '_') == False and all_fragments[i][0] not in bad_features:

            for smiles in smilesList:

                molecule = chem.MolFromSmiles(smiles)
                mol_method = all_lipinski[i][1](molecule)
                new_col.append(mol_method)

            feature_df[all_lipinski[i][0]] = new_col

    print('lipinski over')

    new_col = []
    for smiles in smilesList:

        molecule = chem.MolFromSmiles(smiles)
        new_col.append(f.fr_Al_COO(molecule))

    feature_df["fr_Al_COO"] = new_col

    # new_col = []
    for smiles in smilesList:

        molecule = chem.MolFromSmiles(smiles)
        new_col.append(l.HeavyAtomCount(molecule))

    feature_df["HeavyAtomCount"] = new_col

    # add getnumatoms as feature
    new_col = []
    for smiles in smilesList:

        molecule = chem.MolFromSmiles(smiles)
        new_col.append(molecule.GetNumAtoms())

    feature_df["GetNumAtoms"] = new_col

    # add CalcExactMolWt as feature
    new_col = []
    for smiles in smilesList:

        molecule = chem.MolFromSmiles(smiles)
        new_col.append(molDesc.CalcExactMolWt(molecule))

    feature_df["CalcExactMolWt"] = new_col

    # print('other over')

    return feature_df
示例#11
0
def main():
    # CLI options parsing
    parser = argparse.ArgumentParser(
        description = "Project molecules read from a SMILES file into an 8D \
        space whose dimensions are molecular descriptors: \
        (MolW, HA, cLogP, MR, TPSA, RotB, HBA, HBD, FC)")
    parser.add_argument("-i", metavar = "input_smi", dest = "input_smi",
                        help = "input SMILES file")
    parser.add_argument("-o", metavar = "output_csv", dest = "output_csv",
                        help = "output CSV file")
    parser.add_argument('--no-header', dest='no_header',
                        action='store_true', default=False,
                        help = "no CSV header in output file")
    # just warn about aliens by default
    parser.add_argument('--remove-aliens', dest='rm_aliens',
                        action='store_true', default=False,
                        help = "don't allow aliens in output file")
    # parse CLI
    if len(sys.argv) == 1:
        # show help in case user has no clue of what to do
        parser.print_help(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()
    input_smi = args.input_smi
    output_csv = args.output_csv
    rm_aliens = args.rm_aliens
    no_header = args.no_header
    out_count = 0
    alien_count = 0
    error_count = 0
    with open(output_csv, 'w') as out_file:
        if not no_header:
            print("#name,MolW,HA,cLogP,AR,MR,TPSA,RotB,HBA,HBD,FC", file=out_file)
        for i, mol, name in RobustSmilesMolSupplier(input_smi):
            if mol is None:
                error_count += 1
            else:
                MolW = Descriptors.MolWt(mol)
                HA = Lipinski.HeavyAtomCount(mol)
                cLogP = Descriptors.MolLogP(mol)
                AR = Lipinski.NumAromaticRings(mol)
                MR = Descriptors.MolMR(mol)
                TPSA = Descriptors.TPSA(mol)
                RotB = Descriptors.NumRotatableBonds(mol)
                HBA = Descriptors.NumHAcceptors(mol)
                HBD = Descriptors.NumHDonors(mol)
                FC = Chem.rdmolops.GetFormalCharge(mol)
                alien = is_alien(MolW, cLogP, TPSA, RotB, HBA, HBD, FC)
                if alien:
                    alien_str = alien_diagnose(i, name, MolW, cLogP, TPSA,
                                               RotB, HBA, HBD, FC)
                    print("WARN: %s" % alien_str, file=sys.stderr)
                    alien_count += 1
                if (not alien) or (not rm_aliens):
                    csv_line = "%s,%g,%d,%g,%d,%g,%g,%d,%d,%d,%d" % \
                               (name, MolW, HA, cLogP, AR, MR, TPSA, RotB,
                                HBA, HBD, FC)
                    print(csv_line, file=out_file)
                    out_count += 1
    total_count = out_count + error_count
    if rm_aliens:
        total_count += alien_count
    print("encoded: %d aliens: %d errors: %d total: %d" % \
          (out_count, alien_count, error_count, total_count),
          file=sys.stderr)