def main(): ### command line args defintions ######################################### parser = argparse.ArgumentParser( description='Calculate plane of best fit for molecules') parameter_utils.add_default_io_args(parser) args = parser.parse_args() utils.log("PBFEV args: ", args) input, output, suppl, writer, output_base = rdkit_utils.default_open_input_output( args.input, args.informat, args.output, 'PBFEV', args.outformat) i = 0 count = 0 errors = 0 out_results = [] for mol in suppl: i += 1 AllChem.EmbedMolecule(mol) if mol is None: continue out_vector = PBFev(mol) if out_vector is None: continue rd = PBFRD(mol) mol.SetDoubleProp("distance", rd) for j, angle in enumerate(out_vector): mol.SetDoubleProp("angle" + "_" + str(j), angle) out_results.append(mol) count = write_out(out_results, count, writer, args.outformat) utils.log("Handled " + str(i) + " molecules, resulting in " + str(count) + " outputs") writer.flush() writer.close() input.close() output.close()
def doO3Dalign(i, mol, qmol, threshold, perfect_score, writer, conformerProps=None, minEnergy=None): pyO3As = rdMolAlign.GetO3AForProbeConfs(mol, qmol) best_score = 0 j = 0 conf_id = -1 for pyO3A in pyO3As: align = pyO3A.Align() score = pyO3A.Score() if score > best_score: best_score = score conf_id = j j += 1 #utils.log("Best score = ",best_score) if not threshold or perfect_score - best_score < threshold: utils.log(i, align, score, Chem.MolToSmiles(mol, isomericSmiles=True)) mol.SetDoubleProp(field_O3DAScore, score) if conformerProps and minEnergy: eAbs = conformerProps[conf_id][(conformers.field_EnergyAbs)] eDelta = eAbs - minEnergy if eAbs: mol.SetDoubleProp(conformers.field_EnergyAbs, eAbs) if eDelta: mol.SetDoubleProp(conformers.field_EnergyDelta, eDelta) writer.write(mol, confId=conf_id) return 1 return 0
def main(): parser = argparse.ArgumentParser(description='RDKit constrained conformer generator') parameter_utils.add_default_io_args(parser) parser.add_argument('-n', '--num', type=int, default=10, help='number of conformers to generate') parser.add_argument('-r', '--refmol', help="Reference molecule file") parser.add_argument('--refmolidx', help="Reference molecule index in file", type=int, default=1) parser.add_argument('-c', '--core_smi', help='Core substructure. If not specified - guessed using MCS', default='') args = parser.parse_args() # Get the reference molecule ref_mol_input, ref_mol_suppl = rdkit_utils.default_open_input(args.refmol, args.refmol) counter = 0 # Get the specified reference molecule. Default is the first for mol in ref_mol_suppl: counter+=1 if counter == args.refmolidx: ref_mol = mol break ref_mol_input.close() if counter < args.refmolidx: raise ValueError("Invalid refmolidx. " + str(args.refmolidx) + " was specified but only " + str(counter) + " molecules were present in refmol.") # handle metadata source = "constrained_conf_gen.py" datasetMetaProps = {"source":source, "description": "Constrained conformer generation using RDKit " + rdBase.rdkitVersion} clsMappings = {"EmbedRMS": "java.lang.Float"} fieldMetaProps = [{"fieldName":"EmbedRMS", "values": {"source":source, "description":"Embedding RMS value"}}] # Get the molecules input, suppl = rdkit_utils.default_open_input(args.input, args.informat) output, WRITER, output_base = rdkit_utils.\ default_open_output(args.output, "const_conf_gen", args.outformat, valueClassMappings=clsMappings, datasetMetaProps=datasetMetaProps, fieldMetaProps=fieldMetaProps) inputs = 0 totalCount = 0 totalErrors = 0 for mol in suppl: inputs += 1 if mol: count, errors = generate_conformers(inputs, mol, args.num, ref_mol, WRITER, args.core_smi) totalCount += count totalErrors += errors input.close() WRITER.close() if totalErrors > 0: utils.log("WARNING:", totalErrors, "conformers failed to generate") # write metrics if args.meta: metrics = {'__InputCount__':inputs, '__OutputCount__':totalCount, 'RDKitConstrainedConformer':totalCount} if totalErrors > 0: metrics['__ErrorCount__'] = totalErrors utils.write_metrics(output_base, metrics)
def main(): """ Example usage: python -m pipelines.xchem.split-fragnet-candidates -i ../../data/mpro/expanded-17.json :return: """ parser = argparse.ArgumentParser( description= 'Split fragnet candidates - Split fragment network expansion into individual sets' ) parser.add_argument('-i', '--input', help='JSON containing the expanded candidates)') parser.add_argument( '-g', '--generate-filenames', action='store_true', help= 'Use automatically generated file names instead of the title field)') args = parser.parse_args() utils.log("Split fragnet candidates args: ", args) infile = args.input execute(infile, args.generate_filenames)
def process(inputs, writer): total = 0 success = 0 errors = 0 for mol in inputs: total += 1 if mol is None: errors += 1 continue try: quantScore, qualScore = get_fmap_scores(mol) # utils.log('Score:', score) if total % 1000 == 0: utils.log('Processed molecule', total, '...') mol.SetDoubleProp(field_FeatureSteinQualityScore, qualScore) mol.SetDoubleProp(field_FeatureSteinQuantityScore, quantScore) mol.SetProp('Name', mol.GetProp('_Name')) writer.write(mol) success += 1 except: utils.log("Error scoring molecule", sys.exc_info()[0]) traceback.print_exc() errors += 1 return total, success, errors
def parse_mol_simple(my_type, txt): """Function to parse individual mols given a type. :param my_type: A type definition (i.e. "mol" or "smiles") :param txt: The textual definition of the molecule (i.e. a SMILES string) :return: A mol instance or None if the molecule could not be compiled """ # Ignore unexpected parameter values... if my_type is None or not my_type or txt is None or not txt: return None if my_type == "mol": # Try this way mol = Chem.MolFromMolBlock(txt.strip()) if mol is None: mol = Chem.MolFromMolBlock(txt) if mol is None: mol = Chem.MolFromMolBlock("\n".join(txt.split("\n")[1:])) # Now try to do sanifix if mol is None: mol = fix_mol(Chem.MolFromMolBlock(txt, False)) # And again if mol is None: mol = fix_mol(Chem.MolFromMolBlock(txt.strip(), False)) elif my_type == "smiles": # Assumes that smiles is the first column -> and splits on chemaxon mol = Chem.MolFromSmiles(txt.split()[0].split(":")[0]) if mol is None: utils.log('Failed to parse mol "%s" for my_type %s' % (txt, my_type)) return mol
def main(): ### command line args definitions ######################################### parser = argparse.ArgumentParser(description='RDKit Input Splitter') parameter_utils.add_default_input_args(parser) parser.add_argument('-o', '--output', required=True, help="Directory name for output files (no extension).") parser.add_argument( '-f', '--field', required=True, help= "field to use to split input. Output files will have the name of this field's value" ) parser.add_argument('--meta', action='store_true', help='Write metadata and metrics files') args = parser.parse_args() utils.log("Splitter Args: ", args) filenames = split(args.input, args.informat, args.field, args.output, args.meta) utils.log("Files generated:", " ".join(filenames))
def patch_scores_sdf(sdf_in, outfile, scores): global work_dir counter = 0 sdf_path = "{0}{1}{2}.sdf".format(work_dir, os.path.sep, outfile) tsv_path = "{0}{1}{2}.tsv".format(work_dir, os.path.sep, outfile) utils.log("Writing results to {0} and {1}".format(sdf_path, tsv_path)) with open(tsv_path, 'w') as tsv_file: sdf_file = pybel.Outputfile("sdf", sdf_path) for mol in pybel.readfile("sdf", sdf_in): if counter in scores: score = scores[counter] # utils.log("Score for record {0} is {1}".format(counter, score)) mol.data['dls_deep_score'] = score if 'SCORE' in mol.data: rdock_score = mol.data['SCORE'] else: rdock_score = '' if 'SCORE.norm' in mol.data: rdock_nscore = mol.data['SCORE.norm'] else: rdock_nscore = '' sdf_file.write(mol) tsv_file.write("{0}\t{1}\t{2}\t{3}\n".format( counter, rdock_score, rdock_nscore, score)) else: utils.log("No score found for record", counter) counter += 1 sdf_file.close()
def filter_value(value, min, max, key, quiet=False): if value is not None and value < min: if not quiet: utils.log(key, value, "<", min) return False if value is not None and value > max: if not quiet: utils.log(key, value, ">", max) return False return True
def run_predictions(): global types_file_name global predict_file_name global work_dir # python3 scripts/predict.py -m resources/dense.prototxt -w resources/weights.caffemodel -i work_0/test_set.types >> work_0/caffe_output/predictions.txt cmd1 = "python3 /root/train/fragalysis_test_files/scripts/predict.py -m /root/train/fragalysis_test_files/resources/dense.prototxt" +\ " -w /root/train/fragalysis_test_files/resources/weights.caffemodel" +\ " -i {0}/{1} -o {0}/{2}".format(work_dir, types_file_name, predict_file_name) utils.log("CMD:", cmd1) os.system(cmd1)
def determine_protein_format(protein_file, protein_format): if protein_format: return protein_format elif protein_file.endswith('.pdb') or protein_file.endswith('.pdb.gz'): return 'pdb' elif protein_file.endswith('.mol2') or protein_file.endswith('.mol2.gz'): return 'mol2' else: utils.log( "Can't determine file format, so assuming pdb. Please use the --protein-format parameter" ) return 'pdb'
def read_next_protein(proteins, format, previous, index, keep_hs=False): if previous and index >= len(proteins): return previous protein = next( toolkit.readfile(format, proteins[index], removeHs=not keep_hs)) if not protein: raise ValueError('Unable to read protein') else: utils.log('Read protein', index + 1) protein.protein = True protein.removeh() return protein
def create_featuremap(fragments): mols = [m for m in fragments if m] fmaps, scores = build_feat_data(mols) merged_fmaps = fmaps.copy() utils.log('Processing', len(fmaps), 'molecules') while len(merged_fmaps) > 1: merge_feat_maps(merged_fmaps, scores) merged_fmap = merged_fmaps[0] utils.log('Created merged feature map with', merged_fmap.GetNumFeatures(), 'features') return merged_fmap
def process(inputs, fname): mols = [m for m in inputs if m] fmaps, scores = build_feat_data(mols) merged_fmaps = fmaps.copy() utils.log('Processing', len(fmaps), 'molecules') while len(merged_fmaps) > 1: merge_feat_maps(merged_fmaps, scores) merged_fmap = merged_fmaps[0] pickle.dump(merged_fmap, open(fname, "wb")) utils.log('Wrote merged feature map with', merged_fmap.GetNumFeatures(), 'features as pickle to', fname) return len(mols), merged_fmap.GetNumFeatures()
def split(input, informat, fieldName, outputBase, writeMetrics): """Splits the input into separate files. The name of each file and the file the each record is written to is determined by the fieldName parameter """ input, suppl = rdkit_utils.default_open_input(input, informat) i = 0 written = 0 writers = {} outputs = [] filenames = [] for mol in suppl: i += 1 if mol is None: continue if not mol.HasProp(fieldName): utils.log("Skipping molecule", i, "- did not contain field", fieldName) continue value = mol.GetProp(fieldName) if value: s = str(value) if writers.has_key(s): writer = writers[s] else: name = outputBase + s output, writer = rdkit_utils.default_open_output_sdf( name, outputBase, False, False) filenames.append(name + '.sdf') outputs.append(output) writers[s] = writer writer.write(mol) written += 1 utils.log("Generated", len(writers), "outputs from", i, "records") input.close() for k in writers: writers[k].close() for o in outputs: o.close() if writeMetrics: utils.write_metrics(outputBase, { '__InputCount__': i, '__OutputCount__': written, 'Splitter': i }) return filenames
def generate_mols_from_json(input): """Create a supplier of RDKit Mol objects from the json :param input: file like object containing the json representation of the molecules """ j = 0 for item in input: j += 1 mol = create_mol_from_props(item) if not mol: # TODO - get a count of the errors and report utils.log("Failed to create molecule - skipping. Data was ", item) continue yield mol
def main(): global PDB_PATH,WRITER,THRESHOLD parser = argparse.ArgumentParser(description='Open babel PDB prepare') parser.add_argument('--no-gzip', action='store_true', help='Do not compress the output') parser.add_argument('-i', '--input', help="PDB file for converting") parser.add_argument('-o', '--output', help="Base name for output files (no extension).") parser.add_argument('-mol2', '--mol2', action='store_true', help='Output as Mol2 format.') parser.add_argument('-pdbqt', '--pdbqt', action='store_true', help='Output as pdbqt format.') parser.add_argument('--meta', action='store_true', help='Write metrics files') parser.add_argument('-prot', '--protonate', type=float, help="protonate at this pH (optional)") args = parser.parse_args() utils.log("Prepare Args: ", args) if not (args.mol2 or args.pdbqt): raise ValueError("Must specify at least one output fromat: mol2 and/or pdbqt") if args.pdbqt: utils.log("Preparing as pdbqt") execute(args.input, args.output, "pdbqt", "-opdbqt", args.protonate, args.no_gzip) if args.mol2: utils.log("Preparing as mol2") execute(args.input, args.output, "mol2", "-omol2", args.protonate, args.no_gzip) utils.log("Preparation complete")
def SelectDiverseSubset(mols, clusters, distances, count, field, maximise, score, quiet): total = len(mols) num_clusters = len(clusters) pickedList = [] clustersList = [] for i in range(0, num_clusters): pickedList.append([]) if field: filteredByValue = [ x for x in clusters[i] if mols[x].HasProp(field) ] sortedByValue = sorted( filteredByValue, key=lambda idx: FetchScore(idx, mols, field, maximise)) clustersList.append(sortedByValue) else: allRecords = [x for x in clusters[i]] clustersList.append(allRecords) totalIter = 0 clusterIter = 0 pickedCount = 0 while totalIter < total and pickedCount < count: clusterNum = totalIter % num_clusters clus = clustersList[clusterNum] pick = pickedList[clusterNum] #utils.log("iter",totalIter,"cluster",clusterNum,"length",len(clus)) if len(clus) > 0: # remove that item from the cluster so that it's not tried again molIndex = clus.pop(0) if len(pick) == 0: # first time for this cluster pick.append(molIndex) pickedCount += 1 clusterIter += 1 if not quiet: utils.log("Cluster", clusterNum, "initialised with", molIndex) else: closestDist = GetClosestDistance(distances, molIndex, pick) #utils.log("Closest score",closestDist) if closestDist < score: pick.append(molIndex) pickedCount += 1 clusterIter += 1 if not quiet: utils.log("Cluster", clusterNum, "added", molIndex, "with score", closestDist) elif not quiet: utils.log("Cluster", clusterNum, "discarded", molIndex, "with score", closestDist) else: # cluster has been exhausted #utils.log("Cluster",clusterNum,"exhasted") clusterIter += 1 totalIter += 1 utils.log("Picked", pickedCount, "using", totalIter, "iterations") return pickedList
def main(): # Example usage # python -m pipelines.xchem.featurestein_score -i ../../data/mpro/poses.sdf.gz -f mpro-fstein.p -o fstein global fmaps parser = argparse.ArgumentParser(description='FeatureStein scoring with RDKit') parameter_utils.add_default_io_args(parser) parser.add_argument('-f', '--feat-map', help='Feature Map pickle to score with') parser.add_argument('--no-gzip', action='store_true', help='Do not compress the output (STDOUT is never compressed') parser.add_argument('--metrics', action='store_true', help='Write metrics') args = parser.parse_args() utils.log("FeatureStein Args: ", args) source = "featurestein_score.py" datasetMetaProps = {"source":source, "description": "FeatureStein scoring using RDKit " + rdBase.rdkitVersion} clsMappings = {} fieldMetaProps = [] clsMappings[field_FeatureSteinQualityScore] = "java.lang.Float" clsMappings[field_FeatureSteinQuantityScore] = "java.lang.Float" fieldMetaProps.append({"fieldName":field_FeatureSteinQualityScore, "values": {"source":source, "description":"FeatureStein quality score"}, "fieldName":field_FeatureSteinQuantityScore, "values": {"source":source, "description":"FeatureStein quantity score"}}) pkl_file = open(args.feat_map, 'rb') fmaps = pickle.load(pkl_file) utils.log('FeatureMap has', fmaps.GetNumFeatures(), "features") inputs_file, inputs_supplr = rdkit_utils.default_open_input(args.input, args.informat) output, writer, output_base = rdkit_utils.default_open_output(args.output, 'featurestein', args.outformat, valueClassMappings=clsMappings, datasetMetaProps=datasetMetaProps, fieldMetaProps=fieldMetaProps, compress=not args.no_gzip) # this does the processing total, success, errors = process(inputs_supplr, writer) inputs_file.close() writer.flush() writer.close() output.close() if args.metrics: utils.write_metrics(output_base, {'__InputCount__':total, '__OutputCount__':success, '__ErrorCount__':errors, 'RDKitFeatureMap':success})
def mock_predictions(): global work_dir global predict_file_name global inputs_protein global inputs_ligands utils.log("WARNING: generating mock results instead of running on GPU") outfile = generate_predictions_filename(work_dir, predict_file_name) with open(outfile, 'w') as predictions: for protein in inputs_protein: for ligand in inputs_ligands: score = random.random() line = "{0} | 0 {1}{4}proteins{4}{2} {1}{4}ligands{4}{3}\n".format( score, work_dir, protein, ligand, os.path.sep) # utils.log("Writing", line) predictions.write(line)
def main(): ### command line args defintions ######################################### parser = argparse.ArgumentParser(description='RDKit cluster 3D') parameter_utils.add_default_io_args(parser) args = parser.parse_args() utils.log("Cluster_3d Args: ", args) source = "cluster_3d.py" datasetMetaProps = { "source": source, "description": "Cluster 3D using RDKit " + rdBase.rdkitVersion } clsMappings = { # "RMSToCentroid": "java.lang.Float", # "EnergyDelta": "java.lang.Float", # "EnergyAbs": "java.lang.Float", # "ConformerNum": "java.lang.Integer", # "ClusterCentroid": "java.lang.Integer", # "ClusterNum": "java.lang.Integer", # "StructureNum": "java.lang.Integer" } fieldMetaProps = [ # {"fieldName":"RMSToCentroid", "values": {"source":source, "description":"RMS distance to the cluster centroid"}}, # {"fieldName":"EnergyDelta", "values": {"source":source, "description":"Energy difference to lowest energy structure"}}, # {"fieldName":"EnergyAbs", "values": {"source":source, "description":"Absolute energy"}}, # {"fieldName":"ConformerNum", "values": {"source":source, "description":"Conformer number"}}, # {"fieldName":"ClusterCentroid", "values": {"source":source, "description":"Conformer number of the cluster centroid"}}, # {"fieldName":"ClusterNum", "values": {"source":source, "description":"Cluster number"}}, # {"fieldName":"StructureNum", "values": {"source":source, "description":"Structure number this conformer was generated from"}} ] input, output, suppl, writer, output_base = rdkit_utils. \ default_open_input_output(args.input, args.informat, args.output, 'conformers', args.outformat, valueClassMappings=clsMappings, datasetMetaProps=datasetMetaProps, fieldMetaProps=fieldMetaProps) basemol = combine_conformers(suppl) if input: input.close() writer.flush() writer.close() output.close()
def get_FeatureMapScore(small_feats, large_feats, tani=False, score_mode=FeatMaps.FeatMapScoreMode.All): """ Generate the feature map score. :param small_feats: :param large_feats: :param tani: :return: """ featLists = [] for rawFeats in [small_feats, large_feats]: # filter that list down to only include the ones we're interested in featLists.append(rawFeats) fms = [ FeatMaps.FeatMap(feats=x, weights=[1] * len(x), params=fmParams) for x in featLists ] # set the score mode fms[0].scoreMode = score_mode try: if tani: c = fms[0].ScoreFeats(featLists[1]) A = fms[0].GetNumFeatures() B = len(featLists[1]) if B != fms[1].GetNumFeatures(): utils.log("Why isn't B equal to number of features...?!") tani_score = float(c) / (A + B - c) return tani_score else: fm_score = fms[0].ScoreFeats(featLists[1]) / min( fms[0].GetNumFeatures(), len(featLists[1])) return fm_score except ZeroDivisionError: utils.log("ZeroDivisionError") return 0 if tani: tani_score = float(c) / (A + B - c) return tani_score else: fm_score = fms[0].ScoreFeats(featLists[1]) / min( fms[0].GetNumFeatures(), len(featLists[1])) return fm_score
def process(molecules, fragments, writer, threshold=0.4): frag_mol_list = [] errors = 0 for m in fragments: if m is None: errors += 1 else: frag_mol_list.append(m) if errors: utils.log(errors, 'molecules failed to load. Using', len(frag_mol_list), 'fragments.') else: utils.log('Using', len(frag_mol_list), 'fragments. No errors') #mols, frags, score_threshold, writer getReverseScores(molecules, frag_mol_list, threshold, writer)
def execute(input, output, extension, format, ph, noGzip): # TODO - convert this to use the Python API rather than an external process filename = output + "." + extension base_args = ["obabel", "-ipdb", input, format, "-O", filename] if ph: base_args.append("-p") base_args.append(str(ph)) utils.log("Command: " + " ".join(base_args)) subprocess.check_call(base_args, stdout=sys.stderr, stderr=sys.stderr) # NOTE the -z argument does not seem to work correctly with obabel (truncated files generated) so we # fall back to good old gzip to handle the compression once the uncompressed file is created if not noGzip: subprocess.check_call(['gzip', filename], stdout=sys.stderr, stderr=sys.stderr)
def get_SucosScore(ref_mol, query_mol, tani=False, ref_features=None, query_features=None, score_mode=FeatMaps.FeatMapScoreMode.All): """ This is the key function that calculates the SuCOS scores and is expected to be called from other modules. To improve performance you can pre-calculate the features and pass them in as optional parameters to avoid having to recalculate them. Use the getRawFeatures function to pre-calculate the features. :param ref_mol: The reference molecule to compare to :param query_mol: The molecule to compare to the reference :param tani: Whether to calculate Tanimoto distances :param ref_features: An optional feature map for the reference molecule, avoiding the need to re-calculate it. :param query_features: An optional feature map for the query molecule, avoiding the need to re-calculate it. :return: A tuple of 3 values. 1 the sucos score, 2 the feature map score, 3 the Tanimoto distance or 1 minus the protrude distance """ if not ref_features: ref_features = getRawFeatures(ref_mol) if not query_features: query_features = getRawFeatures(query_mol) fm_score = get_FeatureMapScore(ref_features, query_features, tani, score_mode) fm_score = np.clip(fm_score, 0, 1) try: if tani: tani_sim = 1 - float( rdShapeHelpers.ShapeTanimotoDist(ref_mol, query_mol)) tani_sim = np.clip(tani_sim, 0, 1) SuCOS_score = 0.5 * fm_score + 0.5 * tani_sim return SuCOS_score, fm_score, tani_sim else: protrude_dist = rdShapeHelpers.ShapeProtrudeDist( ref_mol, query_mol, allowReordering=False) protrude_dist = np.clip(protrude_dist, 0, 1) protrude_val = 1.0 - protrude_dist SuCOS_score = 0.5 * fm_score + 0.5 * protrude_val return SuCOS_score, fm_score, protrude_val except: utils.log("Failed to calculate SuCOS scores. Returning 0,0,0") return 0, 0, 0
def read_predictions(): global predict_file_name global work_dir scores = {} with open("{0}{1}{2}".format(work_dir, os.path.sep, predict_file_name), 'r') as input: for line in input: #utils.log(line) tokens = line.split() if len(tokens) == 5 and tokens[1] == '|': # utils.log(len(tokens), tokens[0], tokens[3], tokens[4]) record_no = match_ligand(tokens[4]) if record_no is not None: # utils.log(record_no, tokens[0]) scores[record_no] = tokens[0] utils.log("Found", len(scores), "scores") return scores
def main(): # Example usage: # python -m pipelines.xchem.xcos -f ../../data/mpro/hits-17.sdf.gz -i ../../data/mpro/poses.sdf.gz -o xcos parser = argparse.ArgumentParser(description='XCos scoring with RDKit') parameter_utils.add_default_io_args(parser) parser.add_argument('-f', '--fragments', required=True, help='Fragments to compare') parser.add_argument('-ff', '--fragments-format', help='Fragments format') parser.add_argument('-t', '--score-threshold', type=float, default=0.4, help='Minimum shape overlay and feature map score required for scoring a bit to a fragment') parser.add_argument('--no-gzip', action='store_true', help='Do not compress the output (STDOUT is never compressed') parser.add_argument('--metrics', action='store_true', help='Write metrics') args = parser.parse_args() utils.log("XCos Args: ", args) source = "xcos.py" datasetMetaProps = {"source":source, "description": "XCos scoring using RDKit " + rdBase.rdkitVersion} clsMappings = {} fieldMetaProps = [] clsMappings[field_XCosRefMols] = "java.lang.String" clsMappings[field_XCosNumHits] = "java.lang.Integer" clsMappings[field_XCosScore1] = "java.lang.Float" fieldMetaProps.append({"fieldName":field_XCosRefMols, "values": {"source":source, "description":"XCos reference fragments"}}) fieldMetaProps.append({"fieldName":field_XCosNumHits, "values": {"source":source, "description":"XCos number of hits"}}) fieldMetaProps.append({"fieldName":field_XCosScore1, "values": {"source":source, "description":"XCos score 1"}}) frags_input,frags_suppl = rdkit_utils.default_open_input(args.fragments, args.fragments_format) inputs_file, inputs_supplr = rdkit_utils.default_open_input(args.input, args.informat) output, writer, output_base = rdkit_utils.default_open_output(args.output, 'xcos', args.outformat, valueClassMappings=clsMappings, datasetMetaProps=datasetMetaProps, fieldMetaProps=fieldMetaProps, compress=not args.no_gzip) # this does the processing process(inputs_supplr, frags_suppl, writer, threshold=args.score_threshold) writer.close()
def doO3Dalign(i, mol, qmol, use_crippen, threshold, perfect_score, writer, conformerProps=None, minEnergy=None): if use_crippen: pyO3As = rdMolAlign.GetCrippenO3AForProbeConfs(mol, qmol) else: pyO3As = rdMolAlign.GetO3AForProbeConfs(mol, qmol) if len(pyO3As) == 0: return 0 best_score = 0 j = 0 conf_id = -1 for pyO3A in pyO3As: align = pyO3A.Align() score = pyO3A.Score() if score > best_score: best_score = score conf_id = j j += 1 #utils.log("Best score = ",best_score) if not threshold or perfect_score - best_score < threshold: utils.log("Molecule", i, align, score) mol.SetDoubleProp(field_O3DAScore, score) if conformerProps and minEnergy: eAbs = conformerProps[conf_id][(conformers.field_EnergyAbs)] eDelta = eAbs - minEnergy if eAbs: mol.SetDoubleProp(conformers.field_EnergyAbs, eAbs) if eDelta: mol.SetDoubleProp(conformers.field_EnergyDelta, eDelta) writer.write(mol, confId=conf_id) return 1 return 0
def fragment(mol, mode, quiet=False): frags = Chem.GetMolFrags(mol, asMols=True) if len(frags) == 1: return mol else: # TODO - handle ties biggest_index = -1 i = 0 if mode == 'hac': biggest_count = 0 for frag in frags: hac = frag.GetNumHeavyAtoms() if hac > biggest_count: biggest_count = hac biggest_mol = frag biggest_index = i i+=1 if not quiet: utils.log("Chose fragment", biggest_index, "from", len(frags), "based on HAC") elif mode == 'mw': biggest_mw = 0 for frag in frags: mw = Descriptors.MolWt(frag) if mw > biggest_mw: biggest_mw = mw biggest_mol = frag biggest_index = i i+=1 if not quiet: utils.log("Chose fragment", biggest_index, "from", len(frags), "based on MW") else: raise ValueError('Invalid fragment mode:',mode) # copy the properties across for name in mol.GetPropNames(): biggest_mol.SetProp(name, mol.GetProp(name)) # _Name is a magical property that is not in the ones returned by GetPropNames if '_Name' in mol.GetPropNames(): biggest_mol.SetProp("_Name", mol.GetProp("_Name")) return biggest_mol
def run_dock(mol): global WRITER, COUNTER, SUCCESS, THRESHOLD answer_dict = run_and_get_ans(mol, PDB_PATH) COUNTER += 1 if not answer_dict: utils.log("FAILED MOL", Chem.MolToSmiles(mol)) return if THRESHOLD is not None: if answer_dict["system"]["pliff_score"] > THRESHOLD: return for ans in answer_dict["system"]: if ans.startswith(u"pliff"): mol.SetDoubleProp(str(ans), answer_dict["system"][ans]) utils.log("SCORED MOL:", Chem.MolToSmiles(mol), answer_dict) lock.acquire() WRITER.write(mol) SUCCESS += 1 WRITER.flush() lock.release()