Exemplo n.º 1
0
def write_smiles_id_file(meas, target, activity_type, max_heavy):
    """
Format dataset for MMP analysis and write to temp file
    """

    from rdkit import Chem
    from rdkit.Chem import SaltRemover
    remover = SaltRemover.SaltRemover()

    smifile = target + "_" + activity_type + "_ligands.smi"
    f = open(smifile, 'w')

    error_files = target + "_" + activity_type + "_problem_smiles.smi"
    g = open(error_files, 'w')

    for mol in meas.keys():
        try:
            cpd = Chem.MolFromSmiles(meas[mol][0]['smiles'])
            res = remover.StripMol(cpd)  # Remove Salts
            if res.GetNumAtoms() > max_heavy: continue
            smiles = Chem.MolToSmiles(res, True)  # Canonicalize smiles
            if "." in smiles:
                print "Found unknown salt in ", mol, ": ", smiles
                print "This compound will be ignored in all further calculations."
                continue

            f.write(smiles + " " + mol + '\n')
        except:
            g.write(meas[mol][0]['smiles'] + " " + mol + "\n")

    f.close()
    g.close()

    return
Exemplo n.º 2
0
def smiles_to_mol(smiles: list,
                  max_attempts: int = 10,
                  use_random_coords: bool = False,
                  deisomerize=False) -> dict:
    if deisomerize:
        f = deisomerize_smiles
    else:
        f = lambda x: x
    mols_raw = [Chem.MolFromSmiles(f(smi)) for smi in smiles]
    logger.info("Computing 3D coordinates...")
    s = SaltRemover.SaltRemover()
    mols = {}
    n = len(mols_raw)
    pbar = tqdm(total=n)
    for i, mol in enumerate(mols_raw):
        pbar.update()
        logger.debug("Embedding %s" % smiles[i])
        try:
            mol = s.StripMol(mol, dontRemoveEverything=True)
            mol = Chem.AddHs(mol)
            AllChem.Compute2DCoords(mol)
            AllChem.EmbedMolecule(mol,
                                  maxAttempts=max_attempts,
                                  useRandomCoords=use_random_coords)
            AllChem.UFFOptimizeMolecule(mol)  # Is this deterministic?
        except Exception as e:
            logger.warning("Exception for %s: %s" % (smiles[i], str(e)))
        else:
            mols[smiles[i]] = mol
    logger.info("Finished embedding all molecules")
    return mols
Exemplo n.º 3
0
def RemoveSaltsFromFrame(frame, molCol='ROMol'):
  '''
  Removes salts from mols in pandas DataFrame's ROMol column
  '''
  global _saltRemover
  if _saltRemover is None:
    from rdkit.Chem import SaltRemover
    _saltRemover = SaltRemover.SaltRemover()
  frame[molCol] = frame.apply(lambda x: _saltRemover.StripMol(x[molCol]), axis=1)
Exemplo n.º 4
0
def process_mol(mol):

    #removal of mixtures
    fragmenter_object = molvs.fragment.LargestFragmentChooser(
        prefer_organic=True)
    mol = fragmenter_object.choose(mol)
    if mol is None:
        logging.info("Mixture removal failed for molecule")

    #removal of inorganics
    if not molvs.fragment.is_organic(mol):
        raise ManualReviewException("Molecule is not organic")

    #removal of salts
    remover = SaltRemover.SaltRemover()
    mol = remover.StripMol(
        mol,
        dontRemoveEverything=True)  #tartrate is listed as a salt? what do?
    if mol is None:
        raise ManualReviewException("Salt removal failed for molecule")

    #structure normalization
    normalizer = molvs.normalize.Normalizer(
        normalizations=molvs.normalize.NORMALIZATIONS,
        max_restarts=molvs.normalize.MAX_RESTARTS)
    mol = normalizer.normalize(mol)
    if mol is None:
        raise ManualReviewException("Normalization failed for molecule")

    #tautomer selection
    tautomerizer = molvs.tautomer.TautomerCanonicalizer(
        transforms=molvs.tautomer.TAUTOMER_TRANSFORMS,
        scores=molvs.tautomer.TAUTOMER_SCORES,
        max_tautomers=molvs.tautomer.MAX_TAUTOMERS)
    if mol is None:
        raise ManualReviewException("Tautomerization failed for molecule")

    #disconnect metals
    metal_remover = molvs.metal.MetalDisconnector()
    mol = metal_remover.disconnect(mol)
    if mol is None:
        raise ManualReviewException("Metal removal failed for molecule")

    #final check for only valid atoms
    check_valid_atoms(mol)

    inchi = Chem.MolToInchi(mol)

    return inchi
def initialize_ChEMBL_PDB_conversion():
    """
    Return ChEMBL to PDB conversion and vice versa via uniprot IDs
    """
    
    f = open("cc-to-pdb.txt",'r')
    a = f.readlines()
    f.close()

    PDB_LIG_ID_to_PDB_PROT = dict((line.split()[0],line.split()[1:]) for line in a)
    PDB_PROT_to_PDB_LIG_ID = defaultdict(list)
    for line in a:
        for pdb_id in line.split()[1:]:
            PDB_PROT_to_PDB_LIG_ID[pdb_id].append(line.split()[0])

    f = open("pdbtosp.txt","r")
    a = f.read()
    f.close()

    a = a.replace("\n                           ","")
    a = a.split("\n")
    a = a[24:-6]

    PDB_RES = dict((string.lower(line.split()[0]),line.split()[2]) for line in a if line.split()[1] == "X-ray")
    PDB_to_UNIPROT = dict((string.lower(line[:4]),[i[:6] for i in line[41:].split("(")]) for line in a if line.split()[1] == "X-ray")

    UNIPROT_to_PDB = defaultdict(list)

    for line in a:
        if not line.split()[1] == "X-ray": continue
        for uniprot_id in [i[:6] for i in line[41:].split("(")]:
            UNIPROT_to_PDB[uniprot_id].append(string.lower(line[:4]))

    # Read PDB Ligand file and generate Fingerprints
    max_heavy = 70

    remover = SaltRemover.SaltRemover()

    pdb_ligands = {}
    f = open("Components-smiles-stereo-oe.smi","r")
    for line in f:
        line = line.split()
        if len(line) >= 2: pdb_ligands[line[1]] = {"smiles":line[0],"fp":None}

    f.close()

    return PDB_LIG_ID_to_PDB_PROT, PDB_PROT_to_PDB_LIG_ID, PDB_RES, PDB_to_UNIPROT, UNIPROT_to_PDB, pdb_ligands
Exemplo n.º 6
0
def screen_organic(smiles):
    """
    Heuristic to determine if a input SMILES string is considered as only organic matter.


    Parameters
    -----------
    smiles : str

    Returns
    ------------
    is_organic : bool
    """
    if smiles is None: return False
    remover = SaltRemover.SaltRemover()

    # SMARTS pattern for organic elements
    # H, B, C, N, O, F, P, S, Cl, Br, I
    patt = '[!$([#1,#5,#6,#7,#8,#9,#15,#16,#17,#35,#53])]'
    mpatt = Chem.MolFromSmarts(patt)
    m = Chem.MolFromSmiles(smiles, sanitize=True)
    if m is None: return False

    # remove salts
    res = remover.StripMol(m)
    if res is not None and res.GetNumAtoms() < m.GetNumAtoms():
        return False

    # take only the largest fragment
    frags = AllChem.GetMolFrags(m, asMols=True)
    if len(frags) > 1:
        return False


#     nums = [(f.GetNumAtoms(), f) for f in frags]
#     nums.sort(reverse=True)
#     m = nums[0][1]

# take only organic molecules
    if not m.HasSubstructMatch(mpatt):
        return True
    else:
        return False
Exemplo n.º 7
0
def filter_ions(df):
    print 'You provided {} molecules to the salt remover'.format(df.shape[0])
    #Strip common ions out of molecule objects
    remover = SaltRemover.SaltRemover(
        defnData="[Li,Na,K,Rb,Cs,Mg,Ca,Sr,Ba,Zn,Cl,Br,F,I]")
    df['mol_strip'] = df['mol_send'].map(remover.StripMol)
    df['smilesf'] = df['mol_strip'].map(Chem.MolToSmiles)
    df['smilesf'] = df['smilesf'].map(lambda x: max(x.split('.'), key=len))
    print """CAUTION. You are removing ions and other fragments. However, the
            fingerprints used to calculate diversity were determined before removal.
            Consider recalculating fingerprints."""

    print 'Filter will try to remove duplicates after de-salting...'

    #remove duplicates remaining after removing counterions
    df.drop_duplicates(inplace=True, subset='smilesf')

    print 'After duplicate removal, there are {} molecules'.format(df.shape[0])
    print '...'
    return df
def get_pdbs_with_similar_ligands(circles,target,tc,pdbids, PDB_PROT_to_PDB_LIG_ID, pdb_ligands, meas):
    """
    Find similar ligands within pdb files
    """

    remover = SaltRemover.SaltRemover()
    similar_pdbs = []
    for circle in circles:
        circle_pdbs = []
        for cpd in circle:
            t_smi = meas[cpd][0]['smiles']
            t_cpd = Chem.MolFromSmiles(t_smi)
            t_res = remover.StripMol(t_cpd)        # Remove Salts
            t_fp = FingerprintMols.FingerprintMol(t_res)
            pdbs = []
            for pdbid in pdbids:
                for lig in PDB_PROT_to_PDB_LIG_ID[pdbid]:
                    try:
                        if pdb_ligands[lig]["fp"] == "skip": continue
                        if pdb_ligands[lig]["fp"] == None:
                            cpd = Chem.MolFromSmiles(pdb_ligands[lig]["smiles"])
                            res = remover.StripMol(cpd)        # Remove Salts
                            if res.GetNumAtoms() > max_heavy: continue # if the ligand is too large
                            smiles = Chem.MolToSmiles(res)     # Canonicalize smiles
                            fp = FingerprintMols.FingerprintMol(res)
                            pdb_ligands[lig] = ({"smiles":smiles,"fp":fp,"mol":cpd})
                    except:
                        pdb_ligands[lig] = ({"fp":"skip"})
                        continue
                    sim = DataStructs.FingerprintSimilarity(t_fp,pdb_ligands[lig]["fp"])
                    if sim >= tc:
                        pdbs.append((pdbid,sim))
            circle_pdbs.append(pdbs)
        similar_pdbs.append(circle_pdbs)

    return similar_pdbs
def main(prm_file):

    pref = prm_file.split('.sdf')[0]

    print('## Reading file...')
    prm_df = PandasTools.LoadSDF(prm_file,
                                 smilesName='SMILES',
                                 molColName='MOL',
                                 includeFingerprints=False)
    print(prm_df[:10])

    ## remove salts and rename the smiles
    print('## Cleaning moleucles...')
    remover = SaltRemover.SaltRemover()
    chooser = rdMolStandardize.LargestFragmentChooser(preferOrganic=True)

    prm_df['molx'] = prm_df.MOL.apply(remover.StripMol)
    prm_df['mol'] = prm_df.molx.apply(chooser.choose)
    prm_df['smiles'] = prm_df.mol.apply(Chem.MolToSmiles)

    def add_cb(inp):
        return 'CB_' + str(inp)

    prm_df['ID'] = prm_df.CB_ID.apply(add_cb)

    #  prm_df['ID'] = prm_df.CB_ID

    ## shuffle
    print('## Shuffling molecules...')
    df = prm_df.sample(frac=1).reset_index(drop=True)

    print(prm_df[:10])

    ## recalculate molecular properties
    print('## Calculating properties...')
    prm_df['qed'] = prm_df.mol.apply(QED.properties)
    prm_df['MW'] = prm_df.qed.apply(lambda x: x.MW)
    #  prm_df['logP'] = prm_df.qed.apply(lambda x: x.ALOGP)
    #  prm_df['HBA']  = prm_df.qed.apply(lambda x: x.HBA)
    #  prm_df['HBD']  = prm_df.qed.apply(lambda x: x.HBD)
    #  prm_df['PSA']  = prm_df.qed.apply(lambda x: x.PSA)
    #  prm_df['ROTB'] = prm_df.qed.apply(lambda x: x.ROTB)
    #  prm_df['AROM'] = prm_df.qed.apply(lambda x: x.AROM)
    #  prm_df['HA']   = prm_df.mol.apply(rdchem.Mol.GetNumHeavyAtoms)
    print(prm_df[:10])
    print(' > number of molecules... ', len(prm_df))

    ## print out molecule properties and smiles (shuffled)
    print('## Writing results...')
    Cols_csv = [
        'ID', 'MW', 'HA', 'logP', 'LogS', 'HBA', 'HBD', 'PSA', 'ROTB', 'AROM',
        'SaltType', 'smiles'
    ]
    Cols_smi = ['smiles', 'ID']

    prm_df.loc[(prm_df.MW > 150.) & (prm_df.MW <= 300.)].to_csv(
        pref + '.frag.csv.bz2',
        sep=',',
        float_format='%.2f',
        columns=Cols_csv,
        index=False)
    prm_df.loc[(prm_df.MW > 150.) & (prm_df.MW <= 300.)].to_csv(
        pref + '.frag.smi', sep='\t', columns=Cols_smi, index=False)

    prm_df.loc[(prm_df.MW > 300.) & (prm_df.MW <= 400.)].to_csv(
        pref + '.lead.csv.bz2',
        sep=',',
        float_format='%.2f',
        columns=Cols_csv,
        index=False)
    prm_df.loc[(prm_df.MW > 300.) & (prm_df.MW <= 400.)].to_csv(
        pref + '.lead.smi', sep='\t', columns=Cols_smi, index=False)

    prm_df.loc[prm_df.MW > 400.].to_csv(pref + '.drug.csv.bz2',
                                        sep=',',
                                        float_format='%.2f',
                                        columns=Cols_csv,
                                        index=False)
    prm_df.loc[prm_df.MW > 400.].to_csv(pref + '.drug.smi',
                                        sep='\t',
                                        columns=Cols_smi,
                                        index=False)

    prm_df.loc[prm_df.MW <= 150.].to_csv(pref + '.small.csv.bz2',
                                         sep=',',
                                         float_format='%.2f',
                                         columns=Cols_csv,
                                         index=False)
    prm_df.loc[prm_df.MW <= 150.].to_csv(pref + '.small.smi',
                                         sep='\t',
                                         columns=Cols_smi,
                                         index=False)
Exemplo n.º 10
0
    def process_mol(self):

        mol = self.mol

        #removal of mixtures
        fragmenter_object = molvs.fragment.LargestFragmentChooser(
            prefer_organic=True)
        newmol = fragmenter_object.choose(mol)
        if newmol is None:
            self.history.add_modification(
                text="REJECT: Fragment chooser failed")
            self.rejected = True
            return False

        if Chem.MolToInchi(newmol) != Chem.MolToInchi(mol):
            self.history.add_modification(
                text="Detected mixture, chose largest fragment")
            mol = newmol

        #removal of inorganics
        if not molvs.fragment.is_organic(mol):
            self.history.add_modification(
                text="REJECT: Molecule is not organic")
            self.rejected = True
            return False

        #removal of salts
        remover = SaltRemover.SaltRemover()
        newmol = remover.StripMol(
            mol,
            dontRemoveEverything=True)  #tartrate is listed as a salt? what do?
        if newmol is None:
            self.history.add_modification(text="REJECT: Salt removal failed")
            self.rejected = True
            return False
        if Chem.MolToInchi(newmol) != Chem.MolToInchi(mol):
            self.history.add_modification(text="Detected salts, removed")
            mol = newmol

        #structure normalization
        normalizer = molvs.normalize.Normalizer(
            normalizations=molvs.normalize.NORMALIZATIONS,
            max_restarts=molvs.normalize.MAX_RESTARTS)
        newmol = normalizer.normalize(mol)
        if newmol is None:
            self.history.add_modification(text="REJECT: Normalization failed")
            self.rejected = True
            return False
        if Chem.MolToInchi(newmol) != Chem.MolToInchi(mol):
            self.history.add_modification(text="Normalization(s) applied")
            mol = newmol

        #tautomer selection
        tautomerizer = molvs.tautomer.TautomerCanonicalizer(
            transforms=molvs.tautomer.TAUTOMER_TRANSFORMS,
            scores=molvs.tautomer.TAUTOMER_SCORES,
            max_tautomers=molvs.tautomer.MAX_TAUTOMERS)
        newmol = tautomerizer(mol)
        if newmol is None:
            self.history.add_modification(
                text="REJECT: Tautomerization failed")
            self.rejected = True
            return False
        if Chem.MolToInchi(newmol) != Chem.MolToInchi(mol):
            self.history.add_modification(text="Tautomer(s) canonicalized")
            mol = newmol

        #disconnect metals
        metal_remover = molvs.metal.MetalDisconnector()
        newmol = metal_remover.disconnect(mol)
        if newmol is None:
            self.history.add_modification(text="REJECT: Metal removal failed")
            self.rejected = True
            return False
        if Chem.MolToInchi(newmol) != Chem.MolToInchi(mol):
            self.history.add_modification(text="Metal(s) disconnected")
            mol = newmol

        #final check for only valid atoms
        passed_valid = self.check_valid_atoms()
        if not passed_valid:
            return False

        self.history.add_modification(text="Passed validation")
        self.mol = mol

        return True
def remove_salts(mol):
    remover = SaltRemover.SaltRemover()
    res = remover.StripMol(mol)
    # return Chem.MolToSmiles(res)
    return res
Exemplo n.º 12
0
def _remove_salts(mol):
    return SaltRemover.SaltRemover().StripMol(mol, dontRemoveEverything=True)
Exemplo n.º 13
0
def build_ligand_dictionary_from_infile(
    infile: str,
    error_path: str,
    props,
    units,
    *,
    delimiter=None,
    series_column=None,
):
    """
    Read input file and assemble dictionaries
    """
    if delimiter == "comma":
        delimiter = ","
    if delimiter == "tab" or delimiter is None:
        delimiter = "\t"
    elif delimiter == "space":
        delimiter = " "
    elif delimiter == "semicolon":
        delimiter = ";"

    with open(infile, "r") as f, open(error_path, "w") as g:
        ########
        # Process header
        header = [
            i.strip('"') for i in f.readline().rstrip("\n").split(delimiter)
        ]

        ########
        # Figure out Column ID of SMILES and ID column
        id_col = [
            i for i, name in enumerate(header) if "SRN" in name or "ID" in name
        ]
        id_col = 0 if len(id_col) == 0 else id_col[0]

        smi_col = [
            i for i, name in enumerate(header) if "smiles" in name.lower()
        ]
        smi_col = 1 if len(smi_col) == 0 else smi_col[0]

        if series_column:
            ser_col = header.index(series_column)

        ########
        # Figure out target column ids
        if not props:
            act_col = [2]
            props = [header[2]]
        else:
            try:
                act_col = [header.index(i) for i in props]
            except Exception:
                print(
                    "Could not find all given Activity columns in file header."
                )
                raise

        ########
        # Figure out conversion of target columns
        # Valid Flags for not converting activity data to pActivity: pIC50, pEC50, pKi, pKd, noconv
        log_flags = [
            "pIC50", "pEC50", "pKi", "pKd", "pCC50", "pIC20", "pID50", "noconv"
        ]
        col_convert = [
            False if any(log_flag.lower() in target.lower()
                         for log_flag in log_flags) else True
            for target in props
        ]
        log10 = [False for _ in props]

        #########
        # Write Identified Columns to STDOUT
        print("Identifier Column found: " + header[id_col])
        print("Smiles column found: " + header[smi_col])
        for i in range(len(props)):
            if len(units) > 0:
                if units[i] == "noconv":
                    col_convert[i] = False
                elif units[i] == "log10":
                    col_convert[i] = False
                    log10[i] = True
            if col_convert[i]:
                print("Activity column #" + str(i + 1) + ": " + props[i] +
                      " will be converted to -log10(" + props[i] + ")")
            elif log10[i]:
                print("Activity column #" + str(i + 1) + ": " + props[i] +
                      " will be converted to log10(" + props[i] + ")")
            else:
                print("Activity column #" + str(i + 1) + ": " + props[i])

        if series_column:
            print("Series Column found: " + header[ser_col])

        if id_col == smi_col or id_col in act_col or smi_col in act_col:
            print(
                "Was not able to cleanly distinguish ID, SMILES, and activity columns."
            )
            print(
                "Please assign unambiguous names (no overlap in 'SMILES', 'ID', 'SRN'."
            )
            raise RuntimeError

        ########
        # Assemble data
        salt_defns = os.path.join(
            RDConfig.RDDataDir,
            "Salts.txt")  # replace if you have more specific definitions
        remover = SaltRemover.SaltRemover(defnFilename=salt_defns)
        meas = dict()
        smiles_registered = dict()

        for line in f:
            line = [i.strip('"') for i in line.rstrip("\n").split(delimiter)]
            if line[0][0] == "#":  # skip commented-out compounds
                continue
            compound_id = line[id_col]
            if compound_id in meas:
                print("Two or more entries for the same identifier: " +
                      compound_id)
                print("Please fix.")
                raise RuntimeError
            smiles = line[smi_col].replace("\\\\", "\\")
            if len(line) < len(props) + 2:
                print("Could not properly read line:")
                print(line)
                raise RuntimeError
            try:
                mol = Chem.MolFromSmiles(smiles)
                res = remover.StripMol(mol)  # Remove Salts
                smiles = Chem.MolToSmiles(res, True)  # Canonicalize smiles
                mwt = Descriptors.MolWt(mol)
                if "." in smiles:
                    print("Found unknown salt in " + line[id_col] + ": " +
                          smiles)
                    print("This compound will be ignored.")
                    continue
            except:
                print("Could not properly read SMILES " + smiles +
                      "(see error SMILES file)")
                print("This compound will be ignored.")
                g.write(smiles + "\n")
                continue

            if smiles in smiles_registered:
                print("Two entries with the same structure: " +
                      smiles_registered[smiles] + " and " + compound_id)
                print(
                    "Nonadd will use the first compound and discard the second.\n"
                )
                continue
            else:
                smiles_registered[smiles] = compound_id

            meas[compound_id] = dict(smiles=smiles,
                                     Act=[],
                                     pAct=[],
                                     qualifiers=[],
                                     mwt=mwt,
                                     series=None)
            if series_column:
                meas[compound_id]["series"] = line[ser_col]
            for i, target in enumerate(props):
                if col_convert[i]:
                    u_conv = unit_conv["uM"]
                    if not len(units) == 0:
                        try:
                            u_conv = unit_conv[units[i]]
                        except:
                            print("Given unit " + units[i] +
                                  " has not been recognized.")
                            print(
                                "Please give one out of [M, mM, uM, nM, pM, noconv]"
                            )
                    if line[act_col[i]] in ["NA", "", "No Value"]:
                        meas[compound_id]["qualifiers"].append("")
                        meas[compound_id]["Act"].append("NA")
                        meas[compound_id]["pAct"].append("NA")
                    elif is_number(line[act_col[i]]):
                        if float(line[act_col[i]]) <= 0.0:
                            print("Cannot interpret measured activity of " +
                                  line[act_col[i]] + units[i] +
                                  " for compound " + compound_id)
                            print("Please fix.")
                            raise RuntimeError
                        meas[compound_id]["qualifiers"].append("")
                        meas[compound_id]["Act"].append(float(
                            line[act_col[i]]))
                        meas[compound_id]["pAct"].append(
                            (-1) *
                            math.log10(float(line[act_col[i]]) * u_conv))
                    elif line[act_col[i]][0] in (">", "<", "*") and is_number(
                            line[act_col[i]][1:]):
                        meas[compound_id]["qualifiers"].append(
                            line[act_col[i]][0])
                        meas[compound_id]["Act"].append(
                            float(line[act_col[i]][1:]))
                        meas[compound_id]["pAct"].append(
                            (-1) *
                            math.log10(float(line[act_col[i]][1:]) * u_conv))
                    else:
                        print("Did not recognize number " +
                              str(line[act_col[i]]))
                        print(" in line: " + " ".join(line))
                        print("Please fix.")
                        raise RuntimeError
                elif log10[i]:
                    if line[act_col[i]] in ["NA", "", "No Value"]:
                        meas[compound_id]["qualifiers"].append("")
                        meas[compound_id]["Act"].append("NA")
                        meas[compound_id]["pAct"].append("NA")
                    elif is_number(line[act_col[i]]):
                        if float(line[act_col[i]]) <= 0.0:
                            print("Cannot interpret measured activity of " +
                                  line[act_col[i]] + units[i] +
                                  " for compound " + compound_id)
                            print("Please fix.")
                            raise RuntimeError
                        meas[compound_id]["qualifiers"].append("")
                        meas[compound_id]["Act"].append(float(
                            line[act_col[i]]))
                        meas[compound_id]["pAct"].append(
                            math.log10(float(line[act_col[i]])))
                    elif line[act_col[i]][0] in (">", "<", "*") and is_number(
                            line[act_col[i]][1:]):
                        meas[compound_id]["qualifiers"].append(
                            line[act_col[i]][0])
                        meas[compound_id]["Act"].append(
                            float(line[act_col[i]][1:]))
                        meas[compound_id]["pAct"].append(
                            (-1) * math.log10(float(line[act_col[i]][1:])))
                    else:
                        print("Did not recognize number " +
                              str(line[act_col[i]]))
                        print(" in line: " + " ".join(line))
                        print("Please fix.")
                        raise RuntimeError
                else:
                    if line[act_col[i]] in ["NA", "", "No Value"]:
                        meas[compound_id]["qualifiers"].append("")
                        meas[compound_id]["Act"].append("NA")
                        meas[compound_id]["pAct"].append("NA")
                    elif is_number(line[act_col[i]]):
                        meas[line[id_col]]["qualifiers"].append("")
                        if len(units) > 0:
                            if units[i] == "noconv":
                                meas[compound_id]["Act"].append("")
                            else:
                                meas[compound_id]["Act"].append(
                                    1e6 * 10**((-1) * float(line[act_col[i]])))
                        else:
                            meas[compound_id]["Act"].append(
                                1e6 * 10**((-1) * float(line[act_col[i]])))
                        meas[compound_id]["pAct"].append(
                            float(line[act_col[i]]))
                    elif line[act_col[i]][0] in (">", "<", "*") and is_number(
                            line[act_col[i]][1:]):
                        meas[compound_id]["qualifiers"].append(
                            line[act_col[i]][0])
                        if len(units) > 0:
                            if units[i] == "noconv":
                                meas[compound_id]["Act"].append("")
                            else:
                                meas[compound_id]["Act"].append(
                                    1e6 *
                                    10**((-1) * float(line[act_col[i]][1:])))
                        else:
                            meas[compound_id]["Act"].append(
                                1e6 * 10**((-1) * float(line[act_col[i]][1:])))
                        meas[compound_id]["pAct"].append(
                            float(line[act_col[i]][1:]))
                    else:
                        print("Did not recognize number " +
                              str(line[act_col[i]]))
                        print(" in line: " + " ".join(line))
                        print("Please fix.")
                        raise RuntimeError

    if len(units) == 0:
        units = ["noconv" for _ in props]

    return meas, props, units