示例#1
0
def main():

    ### command line args defintions #########################################
    parser = argparse.ArgumentParser(
        description='Calculate plane of best fit for molecules')
    parameter_utils.add_default_io_args(parser)
    args = parser.parse_args()
    utils.log("PBFEV args: ", args)
    input, output, suppl, writer, output_base = rdkit_utils.default_open_input_output(
        args.input, args.informat, args.output, 'PBFEV', args.outformat)
    i = 0
    count = 0
    errors = 0
    out_results = []
    for mol in suppl:
        i += 1
        AllChem.EmbedMolecule(mol)
        if mol is None: continue
        out_vector = PBFev(mol)
        if out_vector is None: continue
        rd = PBFRD(mol)
        mol.SetDoubleProp("distance", rd)
        for j, angle in enumerate(out_vector):
            mol.SetDoubleProp("angle" + "_" + str(j), angle)
        out_results.append(mol)
    count = write_out(out_results, count, writer, args.outformat)
    utils.log("Handled " + str(i) + " molecules, resulting in " + str(count) +
              " outputs")
    writer.flush()
    writer.close()
    input.close()
    output.close()
示例#2
0
def doO3Dalign(i,
               mol,
               qmol,
               threshold,
               perfect_score,
               writer,
               conformerProps=None,
               minEnergy=None):
    pyO3As = rdMolAlign.GetO3AForProbeConfs(mol, qmol)
    best_score = 0
    j = 0
    conf_id = -1
    for pyO3A in pyO3As:
        align = pyO3A.Align()
        score = pyO3A.Score()
        if score > best_score:
            best_score = score
            conf_id = j
        j += 1

    #utils.log("Best score = ",best_score)
    if not threshold or perfect_score - best_score < threshold:
        utils.log(i, align, score, Chem.MolToSmiles(mol, isomericSmiles=True))
        mol.SetDoubleProp(field_O3DAScore, score)
        if conformerProps and minEnergy:
            eAbs = conformerProps[conf_id][(conformers.field_EnergyAbs)]
            eDelta = eAbs - minEnergy
            if eAbs:
                mol.SetDoubleProp(conformers.field_EnergyAbs, eAbs)
            if eDelta:
                mol.SetDoubleProp(conformers.field_EnergyDelta, eDelta)
        writer.write(mol, confId=conf_id)
        return 1
    return 0
def main():
    parser = argparse.ArgumentParser(description='RDKit constrained conformer generator')
    parameter_utils.add_default_io_args(parser)
    parser.add_argument('-n', '--num', type=int, default=10, help='number of conformers to generate')
    parser.add_argument('-r', '--refmol', help="Reference molecule file")
    parser.add_argument('--refmolidx', help="Reference molecule index in file", type=int, default=1)
    parser.add_argument('-c', '--core_smi', help='Core substructure. If not specified - guessed using MCS', default='')

    args = parser.parse_args()
    # Get the reference molecule
    ref_mol_input, ref_mol_suppl = rdkit_utils.default_open_input(args.refmol, args.refmol)
    counter = 0
    # Get the specified reference molecule. Default is the first
    for mol in ref_mol_suppl:
        counter+=1
        if counter == args.refmolidx:
            ref_mol = mol
            break
    ref_mol_input.close()

    if counter < args.refmolidx:
        raise ValueError("Invalid refmolidx. " + str(args.refmolidx) + " was specified but only " + str(counter) + " molecules were present in refmol.")


    # handle metadata
    source = "constrained_conf_gen.py"
    datasetMetaProps = {"source":source, "description": "Constrained conformer generation using RDKit " + rdBase.rdkitVersion}
    clsMappings = {"EmbedRMS": "java.lang.Float"}
    fieldMetaProps = [{"fieldName":"EmbedRMS", "values": {"source":source, "description":"Embedding RMS value"}}]

    # Get the molecules
    input, suppl = rdkit_utils.default_open_input(args.input, args.informat)
    output, WRITER, output_base = rdkit_utils.\
        default_open_output(args.output, "const_conf_gen", args.outformat,
                            valueClassMappings=clsMappings,
                            datasetMetaProps=datasetMetaProps,
                            fieldMetaProps=fieldMetaProps)

    inputs = 0
    totalCount = 0
    totalErrors = 0
    for mol in suppl:
        inputs += 1
        if mol:
            count, errors = generate_conformers(inputs, mol, args.num, ref_mol, WRITER, args.core_smi)
            totalCount += count
            totalErrors += errors

    input.close()
    WRITER.close()

    if totalErrors > 0:
        utils.log("WARNING:", totalErrors, "conformers failed to generate")

    # write metrics
    if args.meta:
        metrics = {'__InputCount__':inputs, '__OutputCount__':totalCount, 'RDKitConstrainedConformer':totalCount}
        if totalErrors > 0:
            metrics['__ErrorCount__'] = totalErrors
        utils.write_metrics(output_base, metrics)
def main():
    """
    Example usage:
    python -m pipelines.xchem.split-fragnet-candidates -i ../../data/mpro/expanded-17.json

    :return:
    """

    parser = argparse.ArgumentParser(
        description=
        'Split fragnet candidates - Split fragment network expansion into individual sets'
    )

    parser.add_argument('-i',
                        '--input',
                        help='JSON containing the expanded candidates)')
    parser.add_argument(
        '-g',
        '--generate-filenames',
        action='store_true',
        help=
        'Use automatically generated file names instead of the title field)')

    args = parser.parse_args()
    utils.log("Split fragnet candidates args: ", args)

    infile = args.input

    execute(infile, args.generate_filenames)
def process(inputs, writer):
    total = 0
    success = 0
    errors = 0
    for mol in inputs:
        total += 1
        if mol is None:
            errors += 1
            continue
        try:
            quantScore, qualScore = get_fmap_scores(mol)
            # utils.log('Score:', score)
            if total % 1000 == 0:
                utils.log('Processed molecule', total, '...')
            mol.SetDoubleProp(field_FeatureSteinQualityScore, qualScore)
            mol.SetDoubleProp(field_FeatureSteinQuantityScore, quantScore)
            mol.SetProp('Name', mol.GetProp('_Name'))
            writer.write(mol)
            success += 1
        except:
            utils.log("Error scoring molecule", sys.exc_info()[0])
            traceback.print_exc()
            errors += 1

    return total, success, errors
示例#6
0
def parse_mol_simple(my_type, txt):
    """Function to parse individual mols given a type.

    :param my_type: A type definition (i.e. "mol" or "smiles")
    :param txt: The textual definition of the molecule (i.e. a SMILES string)
    :return: A mol instance or None if the molecule could not be compiled
    """
    # Ignore unexpected parameter values...
    if my_type is None or not my_type or txt is None or not txt:
        return None

    if my_type == "mol":
        # Try this way
        mol = Chem.MolFromMolBlock(txt.strip())
        if mol is None:
            mol = Chem.MolFromMolBlock(txt)
        if mol is None:
            mol = Chem.MolFromMolBlock("\n".join(txt.split("\n")[1:]))
        # Now try to do sanifix
        if mol is None:
            mol = fix_mol(Chem.MolFromMolBlock(txt, False))
        # And again
        if mol is None:
            mol = fix_mol(Chem.MolFromMolBlock(txt.strip(), False))
    elif my_type == "smiles":
        # Assumes that smiles is the first column -> and splits on chemaxon
        mol = Chem.MolFromSmiles(txt.split()[0].split(":")[0])
    if mol is None:
        utils.log('Failed to parse mol "%s" for my_type %s' % (txt, my_type))
    return mol
def main():

    ### command line args definitions #########################################

    parser = argparse.ArgumentParser(description='RDKit Input Splitter')
    parameter_utils.add_default_input_args(parser)
    parser.add_argument('-o',
                        '--output',
                        required=True,
                        help="Directory name for output files (no extension).")
    parser.add_argument(
        '-f',
        '--field',
        required=True,
        help=
        "field to use to split input. Output files will have the name of this field's value"
    )
    parser.add_argument('--meta',
                        action='store_true',
                        help='Write metadata and metrics files')

    args = parser.parse_args()
    utils.log("Splitter Args: ", args)

    filenames = split(args.input, args.informat, args.field, args.output,
                      args.meta)
    utils.log("Files generated:", " ".join(filenames))
示例#8
0
def patch_scores_sdf(sdf_in, outfile, scores):

    global work_dir

    counter = 0
    sdf_path = "{0}{1}{2}.sdf".format(work_dir, os.path.sep, outfile)
    tsv_path = "{0}{1}{2}.tsv".format(work_dir, os.path.sep, outfile)
    utils.log("Writing results to {0} and {1}".format(sdf_path, tsv_path))
    with open(tsv_path, 'w') as tsv_file:
        sdf_file = pybel.Outputfile("sdf", sdf_path)
        for mol in pybel.readfile("sdf", sdf_in):
            if counter in scores:
                score = scores[counter]
                # utils.log("Score for record {0} is {1}".format(counter, score))

                mol.data['dls_deep_score'] = score
                if 'SCORE' in mol.data:
                    rdock_score = mol.data['SCORE']
                else:
                    rdock_score = ''

                if 'SCORE.norm' in mol.data:
                    rdock_nscore = mol.data['SCORE.norm']
                else:
                    rdock_nscore = ''

                sdf_file.write(mol)
                tsv_file.write("{0}\t{1}\t{2}\t{3}\n".format(
                    counter, rdock_score, rdock_nscore, score))

            else:
                utils.log("No score found for record", counter)
            counter += 1
        sdf_file.close()
def filter_value(value, min, max, key, quiet=False):
    if value is not None and value < min:
        if not quiet:
            utils.log(key, value, "<", min)
        return False
    if value is not None and value > max:
        if not quiet:
            utils.log(key, value, ">", max)
        return False
    return True
示例#10
0
def run_predictions():
    global types_file_name
    global predict_file_name
    global work_dir
    # python3 scripts/predict.py -m resources/dense.prototxt -w resources/weights.caffemodel -i work_0/test_set.types >> work_0/caffe_output/predictions.txt
    cmd1 = "python3 /root/train/fragalysis_test_files/scripts/predict.py -m /root/train/fragalysis_test_files/resources/dense.prototxt" +\
           " -w /root/train/fragalysis_test_files/resources/weights.caffemodel" +\
            " -i {0}/{1} -o {0}/{2}".format(work_dir, types_file_name, predict_file_name)
    utils.log("CMD:", cmd1)
    os.system(cmd1)
示例#11
0
def determine_protein_format(protein_file, protein_format):
    if protein_format:
        return protein_format
    elif protein_file.endswith('.pdb') or protein_file.endswith('.pdb.gz'):
        return 'pdb'
    elif protein_file.endswith('.mol2') or protein_file.endswith('.mol2.gz'):
        return 'mol2'
    else:
        utils.log(
            "Can't determine file format, so assuming pdb. Please use the --protein-format parameter"
        )
        return 'pdb'
示例#12
0
def read_next_protein(proteins, format, previous, index, keep_hs=False):
    if previous and index >= len(proteins):
        return previous
    protein = next(
        toolkit.readfile(format, proteins[index], removeHs=not keep_hs))
    if not protein:
        raise ValueError('Unable to read protein')
    else:
        utils.log('Read protein', index + 1)
        protein.protein = True
        protein.removeh()
        return protein
示例#13
0
def create_featuremap(fragments):

    mols = [m for m in fragments if m]
    fmaps, scores = build_feat_data(mols)
    merged_fmaps = fmaps.copy()
    utils.log('Processing', len(fmaps), 'molecules')
    while len(merged_fmaps) > 1:
        merge_feat_maps(merged_fmaps, scores)
    merged_fmap = merged_fmaps[0]
    utils.log('Created merged feature map with', merged_fmap.GetNumFeatures(),
              'features')

    return merged_fmap
def process(inputs, fname):

    mols = [m for m in inputs if m]
    fmaps, scores = build_feat_data(mols)
    merged_fmaps = fmaps.copy()
    utils.log('Processing', len(fmaps), 'molecules')
    while len(merged_fmaps) > 1:
        merge_feat_maps(merged_fmaps, scores)
    merged_fmap = merged_fmaps[0]
    pickle.dump(merged_fmap, open(fname, "wb"))
    utils.log('Wrote merged feature map with', merged_fmap.GetNumFeatures(),
              'features as pickle to', fname)

    return len(mols), merged_fmap.GetNumFeatures()
def split(input, informat, fieldName, outputBase, writeMetrics):
    """Splits the input into separate files. The name of each file and the file the each record is written to
    is determined by the fieldName parameter
    """

    input, suppl = rdkit_utils.default_open_input(input, informat)

    i = 0
    written = 0
    writers = {}
    outputs = []
    filenames = []
    for mol in suppl:
        i += 1
        if mol is None: continue
        if not mol.HasProp(fieldName):
            utils.log("Skipping molecule", i, "- did not contain field",
                      fieldName)
            continue
        value = mol.GetProp(fieldName)
        if value:
            s = str(value)
            if writers.has_key(s):
                writer = writers[s]
            else:
                name = outputBase + s
                output, writer = rdkit_utils.default_open_output_sdf(
                    name, outputBase, False, False)
                filenames.append(name + '.sdf')
                outputs.append(output)
                writers[s] = writer
            writer.write(mol)
            written += 1

    utils.log("Generated", len(writers), "outputs from", i, "records")

    input.close()
    for k in writers:
        writers[k].close()
    for o in outputs:
        o.close()

    if writeMetrics:
        utils.write_metrics(outputBase, {
            '__InputCount__': i,
            '__OutputCount__': written,
            'Splitter': i
        })

    return filenames
示例#16
0
def generate_mols_from_json(input):
    """Create a supplier of RDKit Mol objects from the json

    :param input: file like object containing the json representation of the molecules
    """
    j = 0
    for item in input:
        j += 1
        mol = create_mol_from_props(item)
        if not mol:
            # TODO - get a count of the errors and report
            utils.log("Failed to create molecule - skipping. Data was ", item)
            continue
        yield mol
示例#17
0
def main():
    global PDB_PATH,WRITER,THRESHOLD
    parser = argparse.ArgumentParser(description='Open babel PDB prepare')
    parser.add_argument('--no-gzip', action='store_true', help='Do not compress the output')
    parser.add_argument('-i', '--input', help="PDB file for converting")
    parser.add_argument('-o', '--output', help="Base name for output files (no extension).")
    parser.add_argument('-mol2', '--mol2', action='store_true', help='Output as Mol2 format.')
    parser.add_argument('-pdbqt', '--pdbqt', action='store_true', help='Output as pdbqt format.')
    parser.add_argument('--meta', action='store_true', help='Write metrics files')
    parser.add_argument('-prot', '--protonate', type=float, help="protonate at this pH (optional)")

    args = parser.parse_args()

    utils.log("Prepare Args: ", args)

    if not (args.mol2 or args.pdbqt):
        raise ValueError("Must specify at least one output fromat: mol2 and/or pdbqt")


    if args.pdbqt:
        utils.log("Preparing as pdbqt")
        execute(args.input, args.output, "pdbqt", "-opdbqt", args.protonate, args.no_gzip)

    if args.mol2:
        utils.log("Preparing as mol2")
        execute(args.input, args.output, "mol2", "-omol2", args.protonate, args.no_gzip)

    utils.log("Preparation complete")
示例#18
0
def SelectDiverseSubset(mols, clusters, distances, count, field, maximise,
                        score, quiet):
    total = len(mols)
    num_clusters = len(clusters)
    pickedList = []
    clustersList = []
    for i in range(0, num_clusters):
        pickedList.append([])
        if field:
            filteredByValue = [
                x for x in clusters[i] if mols[x].HasProp(field)
            ]
            sortedByValue = sorted(
                filteredByValue,
                key=lambda idx: FetchScore(idx, mols, field, maximise))
            clustersList.append(sortedByValue)
        else:
            allRecords = [x for x in clusters[i]]
            clustersList.append(allRecords)

    totalIter = 0
    clusterIter = 0
    pickedCount = 0

    while totalIter < total and pickedCount < count:
        clusterNum = totalIter % num_clusters
        clus = clustersList[clusterNum]
        pick = pickedList[clusterNum]
        #utils.log("iter",totalIter,"cluster",clusterNum,"length",len(clus))
        if len(clus) > 0:
            # remove that item from the cluster so that it's not tried again
            molIndex = clus.pop(0)
            if len(pick) == 0:  # first time for this cluster
                pick.append(molIndex)
                pickedCount += 1
                clusterIter += 1
                if not quiet:
                    utils.log("Cluster", clusterNum, "initialised with",
                              molIndex)
            else:
                closestDist = GetClosestDistance(distances, molIndex, pick)
                #utils.log("Closest score",closestDist)
                if closestDist < score:
                    pick.append(molIndex)
                    pickedCount += 1
                    clusterIter += 1
                    if not quiet:
                        utils.log("Cluster", clusterNum, "added", molIndex,
                                  "with score", closestDist)
                elif not quiet:
                    utils.log("Cluster", clusterNum, "discarded", molIndex,
                              "with score", closestDist)
        else:  # cluster has been exhausted
            #utils.log("Cluster",clusterNum,"exhasted")
            clusterIter += 1

        totalIter += 1

    utils.log("Picked", pickedCount, "using", totalIter, "iterations")
    return pickedList
示例#19
0
def main():

    # Example usage
    # python -m pipelines.xchem.featurestein_score -i ../../data/mpro/poses.sdf.gz -f mpro-fstein.p -o fstein

    global fmaps

    parser = argparse.ArgumentParser(description='FeatureStein scoring with RDKit')
    parameter_utils.add_default_io_args(parser)
    parser.add_argument('-f', '--feat-map', help='Feature Map pickle to score with')
    parser.add_argument('--no-gzip', action='store_true', help='Do not compress the output (STDOUT is never compressed')
    parser.add_argument('--metrics', action='store_true', help='Write metrics')


    args = parser.parse_args()
    utils.log("FeatureStein Args: ", args)

    source = "featurestein_score.py"
    datasetMetaProps = {"source":source, "description": "FeatureStein scoring using RDKit " + rdBase.rdkitVersion}

    clsMappings = {}
    fieldMetaProps = []
    clsMappings[field_FeatureSteinQualityScore] = "java.lang.Float"
    clsMappings[field_FeatureSteinQuantityScore] = "java.lang.Float"
    fieldMetaProps.append({"fieldName":field_FeatureSteinQualityScore,   "values": {"source":source, "description":"FeatureStein quality score"},
                           "fieldName":field_FeatureSteinQuantityScore,   "values": {"source":source, "description":"FeatureStein quantity score"}})

    pkl_file = open(args.feat_map, 'rb')
    fmaps = pickle.load(pkl_file)
    utils.log('FeatureMap has', fmaps.GetNumFeatures(), "features")

    inputs_file, inputs_supplr = rdkit_utils.default_open_input(args.input, args.informat)
    output, writer, output_base = rdkit_utils.default_open_output(args.output,
                        'featurestein', args.outformat,
                        valueClassMappings=clsMappings,
                        datasetMetaProps=datasetMetaProps,
                        fieldMetaProps=fieldMetaProps,
                        compress=not args.no_gzip)

    # this does the processing
    total, success, errors = process(inputs_supplr, writer)

    inputs_file.close()
    writer.flush()
    writer.close()
    output.close()

    if args.metrics:
        utils.write_metrics(output_base, {'__InputCount__':total, '__OutputCount__':success, '__ErrorCount__':errors, 'RDKitFeatureMap':success})
示例#20
0
def mock_predictions():
    global work_dir
    global predict_file_name
    global inputs_protein
    global inputs_ligands
    utils.log("WARNING: generating mock results instead of running on GPU")
    outfile = generate_predictions_filename(work_dir, predict_file_name)
    with open(outfile, 'w') as predictions:
        for protein in inputs_protein:
            for ligand in inputs_ligands:
                score = random.random()
                line = "{0} | 0 {1}{4}proteins{4}{2} {1}{4}ligands{4}{3}\n".format(
                    score, work_dir, protein, ligand, os.path.sep)
                # utils.log("Writing", line)
                predictions.write(line)
示例#21
0
def main():
    ### command line args defintions #########################################

    parser = argparse.ArgumentParser(description='RDKit cluster 3D')
    parameter_utils.add_default_io_args(parser)

    args = parser.parse_args()

    utils.log("Cluster_3d Args: ", args)

    source = "cluster_3d.py"
    datasetMetaProps = {
        "source": source,
        "description": "Cluster 3D using RDKit " + rdBase.rdkitVersion
    }
    clsMappings = {
        # "RMSToCentroid": "java.lang.Float",
        # "EnergyDelta": "java.lang.Float",
        # "EnergyAbs": "java.lang.Float",
        # "ConformerNum": "java.lang.Integer",
        # "ClusterCentroid": "java.lang.Integer",
        # "ClusterNum": "java.lang.Integer",
        # "StructureNum": "java.lang.Integer"
    }
    fieldMetaProps = [
        # {"fieldName":"RMSToCentroid",   "values": {"source":source, "description":"RMS distance to the cluster centroid"}},
        # {"fieldName":"EnergyDelta",     "values": {"source":source, "description":"Energy difference to lowest energy structure"}},
        # {"fieldName":"EnergyAbs",       "values": {"source":source, "description":"Absolute energy"}},
        # {"fieldName":"ConformerNum",    "values": {"source":source, "description":"Conformer number"}},
        # {"fieldName":"ClusterCentroid", "values": {"source":source, "description":"Conformer number of the cluster centroid"}},
        # {"fieldName":"ClusterNum",      "values": {"source":source, "description":"Cluster number"}},
        # {"fieldName":"StructureNum",    "values": {"source":source, "description":"Structure number this conformer was generated from"}}
    ]

    input, output, suppl, writer, output_base = rdkit_utils. \
        default_open_input_output(args.input, args.informat, args.output,
                                  'conformers', args.outformat,
                                  valueClassMappings=clsMappings,
                                  datasetMetaProps=datasetMetaProps,
                                  fieldMetaProps=fieldMetaProps)

    basemol = combine_conformers(suppl)

    if input:
        input.close()
    writer.flush()
    writer.close()
    output.close()
示例#22
0
文件: sucos.py 项目: kinow/pipelines
def get_FeatureMapScore(small_feats,
                        large_feats,
                        tani=False,
                        score_mode=FeatMaps.FeatMapScoreMode.All):
    """
    Generate the feature map score.

    :param small_feats:
    :param large_feats:
    :param tani:
    :return:
    """

    featLists = []
    for rawFeats in [small_feats, large_feats]:
        # filter that list down to only include the ones we're interested in
        featLists.append(rawFeats)
    fms = [
        FeatMaps.FeatMap(feats=x, weights=[1] * len(x), params=fmParams)
        for x in featLists
    ]
    # set the score mode
    fms[0].scoreMode = score_mode

    try:
        if tani:
            c = fms[0].ScoreFeats(featLists[1])
            A = fms[0].GetNumFeatures()
            B = len(featLists[1])
            if B != fms[1].GetNumFeatures():
                utils.log("Why isn't B equal to number of features...?!")
            tani_score = float(c) / (A + B - c)
            return tani_score
        else:
            fm_score = fms[0].ScoreFeats(featLists[1]) / min(
                fms[0].GetNumFeatures(), len(featLists[1]))
            return fm_score
    except ZeroDivisionError:
        utils.log("ZeroDivisionError")
        return 0

    if tani:
        tani_score = float(c) / (A + B - c)
        return tani_score
    else:
        fm_score = fms[0].ScoreFeats(featLists[1]) / min(
            fms[0].GetNumFeatures(), len(featLists[1]))
        return fm_score
示例#23
0
def process(molecules, fragments, writer, threshold=0.4):

    frag_mol_list = []
    errors = 0
    for m in fragments:
        if m is None:
            errors += 1
        else:
            frag_mol_list.append(m)
    if errors:
        utils.log(errors, 'molecules failed to load. Using', len(frag_mol_list), 'fragments.')
    else:
        utils.log('Using', len(frag_mol_list), 'fragments. No errors')

    #mols, frags, score_threshold, writer
    getReverseScores(molecules, frag_mol_list, threshold, writer)
示例#24
0
def execute(input, output, extension, format, ph, noGzip):

    # TODO - convert this to use the Python API rather than an external process

    filename = output + "." + extension
    base_args = ["obabel", "-ipdb", input, format, "-O", filename]
    if ph:
        base_args.append("-p")
        base_args.append(str(ph))
    utils.log("Command: " + " ".join(base_args))

    subprocess.check_call(base_args, stdout=sys.stderr, stderr=sys.stderr)

    # NOTE the -z argument does not seem to work correctly with obabel (truncated files generated) so we
    # fall back to good old gzip to handle the compression once the uncompressed file is created
    if not noGzip:
        subprocess.check_call(['gzip', filename], stdout=sys.stderr, stderr=sys.stderr)
示例#25
0
文件: sucos.py 项目: kinow/pipelines
def get_SucosScore(ref_mol,
                   query_mol,
                   tani=False,
                   ref_features=None,
                   query_features=None,
                   score_mode=FeatMaps.FeatMapScoreMode.All):
    """
    This is the key function that calculates the SuCOS scores and is expected to be called from other modules.
    To improve performance you can pre-calculate the features and pass them in as optional parameters to avoid having
    to recalculate them. Use the getRawFeatures function to pre-calculate the features.

    :param ref_mol: The reference molecule to compare to
    :param query_mol: The molecule to compare to the reference
    :param tani: Whether to calculate Tanimoto distances
    :param ref_features: An optional feature map for the reference molecule, avoiding the need to re-calculate it.
    :param query_features: An optional feature map for the query molecule, avoiding the need to re-calculate it.
    :return: A tuple of 3 values. 1 the sucos score, 2 the feature map score,
        3 the Tanimoto distance or 1 minus the protrude distance
    """

    if not ref_features:
        ref_features = getRawFeatures(ref_mol)
    if not query_features:
        query_features = getRawFeatures(query_mol)

    fm_score = get_FeatureMapScore(ref_features, query_features, tani,
                                   score_mode)
    fm_score = np.clip(fm_score, 0, 1)

    try:
        if tani:
            tani_sim = 1 - float(
                rdShapeHelpers.ShapeTanimotoDist(ref_mol, query_mol))
            tani_sim = np.clip(tani_sim, 0, 1)
            SuCOS_score = 0.5 * fm_score + 0.5 * tani_sim
            return SuCOS_score, fm_score, tani_sim
        else:
            protrude_dist = rdShapeHelpers.ShapeProtrudeDist(
                ref_mol, query_mol, allowReordering=False)
            protrude_dist = np.clip(protrude_dist, 0, 1)
            protrude_val = 1.0 - protrude_dist
            SuCOS_score = 0.5 * fm_score + 0.5 * protrude_val
            return SuCOS_score, fm_score, protrude_val
    except:
        utils.log("Failed to calculate SuCOS scores. Returning 0,0,0")
        return 0, 0, 0
示例#26
0
def read_predictions():
    global predict_file_name
    global work_dir
    scores = {}
    with open("{0}{1}{2}".format(work_dir, os.path.sep, predict_file_name),
              'r') as input:
        for line in input:
            #utils.log(line)
            tokens = line.split()
            if len(tokens) == 5 and tokens[1] == '|':
                # utils.log(len(tokens), tokens[0], tokens[3], tokens[4])
                record_no = match_ligand(tokens[4])
                if record_no is not None:
                    # utils.log(record_no, tokens[0])
                    scores[record_no] = tokens[0]
    utils.log("Found", len(scores), "scores")
    return scores
示例#27
0
def main():

    # Example usage:
    # python -m pipelines.xchem.xcos -f ../../data/mpro/hits-17.sdf.gz -i ../../data/mpro/poses.sdf.gz  -o xcos

    parser = argparse.ArgumentParser(description='XCos scoring with RDKit')
    parameter_utils.add_default_io_args(parser)
    parser.add_argument('-f', '--fragments', required=True, help='Fragments to compare')
    parser.add_argument('-ff', '--fragments-format', help='Fragments format')
    parser.add_argument('-t', '--score-threshold', type=float, default=0.4,
                        help='Minimum shape overlay and feature map score required for scoring a bit to a fragment')
    parser.add_argument('--no-gzip', action='store_true', help='Do not compress the output (STDOUT is never compressed')
    parser.add_argument('--metrics', action='store_true', help='Write metrics')

    args = parser.parse_args()
    utils.log("XCos Args: ", args)

    source = "xcos.py"
    datasetMetaProps = {"source":source, "description": "XCos scoring using RDKit " + rdBase.rdkitVersion}

    clsMappings = {}
    fieldMetaProps = []

    clsMappings[field_XCosRefMols] = "java.lang.String"
    clsMappings[field_XCosNumHits] = "java.lang.Integer"
    clsMappings[field_XCosScore1] = "java.lang.Float"

    fieldMetaProps.append({"fieldName":field_XCosRefMols,   "values": {"source":source, "description":"XCos reference fragments"}})
    fieldMetaProps.append({"fieldName":field_XCosNumHits,   "values": {"source":source, "description":"XCos number of hits"}})
    fieldMetaProps.append({"fieldName":field_XCosScore1,   "values": {"source":source, "description":"XCos score 1"}})
    
    frags_input,frags_suppl = rdkit_utils.default_open_input(args.fragments, args.fragments_format)

    inputs_file, inputs_supplr = rdkit_utils.default_open_input(args.input, args.informat)
    output, writer, output_base = rdkit_utils.default_open_output(args.output,
                                                                  'xcos', args.outformat,
                                                                  valueClassMappings=clsMappings,
                                                                  datasetMetaProps=datasetMetaProps,
                                                                  fieldMetaProps=fieldMetaProps,
                                                                  compress=not args.no_gzip)

    # this does the processing
    process(inputs_supplr, frags_suppl, writer, threshold=args.score_threshold)

    writer.close()
示例#28
0
def doO3Dalign(i,
               mol,
               qmol,
               use_crippen,
               threshold,
               perfect_score,
               writer,
               conformerProps=None,
               minEnergy=None):

    if use_crippen:
        pyO3As = rdMolAlign.GetCrippenO3AForProbeConfs(mol, qmol)
    else:
        pyO3As = rdMolAlign.GetO3AForProbeConfs(mol, qmol)

    if len(pyO3As) == 0:
        return 0

    best_score = 0
    j = 0
    conf_id = -1

    for pyO3A in pyO3As:
        align = pyO3A.Align()
        score = pyO3A.Score()
        if score > best_score:
            best_score = score
            conf_id = j
        j += 1

    #utils.log("Best score = ",best_score)
    if not threshold or perfect_score - best_score < threshold:
        utils.log("Molecule", i, align, score)
        mol.SetDoubleProp(field_O3DAScore, score)
        if conformerProps and minEnergy:
            eAbs = conformerProps[conf_id][(conformers.field_EnergyAbs)]
            eDelta = eAbs - minEnergy
            if eAbs:
                mol.SetDoubleProp(conformers.field_EnergyAbs, eAbs)
            if eDelta:
                mol.SetDoubleProp(conformers.field_EnergyDelta, eDelta)
        writer.write(mol, confId=conf_id)
        return 1
    return 0
示例#29
0
def fragment(mol, mode, quiet=False):
    frags = Chem.GetMolFrags(mol, asMols=True)

    if len(frags) == 1:
        return mol
    else:
        # TODO - handle ties
        biggest_index = -1
        i = 0
        if mode == 'hac':
            biggest_count = 0
            for frag in frags:
                hac = frag.GetNumHeavyAtoms()
                if hac > biggest_count:
                    biggest_count = hac
                    biggest_mol = frag
                    biggest_index = i
                i+=1
            if not quiet:
                utils.log("Chose fragment", biggest_index, "from", len(frags), "based on HAC")
        elif mode == 'mw':
            biggest_mw = 0
            for frag in frags:
                mw = Descriptors.MolWt(frag)
                if mw > biggest_mw:
                    biggest_mw = mw
                    biggest_mol = frag
                    biggest_index = i
                i+=1
            if not quiet:
                utils.log("Chose fragment", biggest_index, "from", len(frags), "based on MW")
        else:
            raise ValueError('Invalid fragment mode:',mode)

        # copy the properties across
        for name in mol.GetPropNames():
            biggest_mol.SetProp(name, mol.GetProp(name))

        # _Name is a magical property that is not in the ones returned by GetPropNames
        if '_Name' in mol.GetPropNames():
            biggest_mol.SetProp("_Name", mol.GetProp("_Name"))

        return biggest_mol
示例#30
0
def run_dock(mol):
    global WRITER, COUNTER, SUCCESS, THRESHOLD
    answer_dict = run_and_get_ans(mol, PDB_PATH)
    COUNTER += 1
    if not answer_dict:
        utils.log("FAILED MOL", Chem.MolToSmiles(mol))
        return
    if THRESHOLD is not None:
        if answer_dict["system"]["pliff_score"] > THRESHOLD:
            return
    for ans in answer_dict["system"]:
        if ans.startswith(u"pliff"):
            mol.SetDoubleProp(str(ans), answer_dict["system"][ans])
    utils.log("SCORED MOL:", Chem.MolToSmiles(mol), answer_dict)
    lock.acquire()
    WRITER.write(mol)
    SUCCESS += 1
    WRITER.flush()
    lock.release()