def main(): ### command line args definitions ######################################### parser = argparse.ArgumentParser(description='RDKit Input Splitter') parameter_utils.add_default_input_args(parser) parser.add_argument('-o', '--output', required=True, help="Directory name for output files (no extension).") parser.add_argument( '-f', '--field', required=True, help= "field to use to split input. Output files will have the name of this field's value" ) parser.add_argument('--meta', action='store_true', help='Write metadata and metrics files') args = parser.parse_args() utils.log("Splitter Args: ", args) filenames = split(args.input, args.informat, args.field, args.output, args.meta) utils.log("Files generated:", " ".join(filenames))
def test_add_default_input_args_with_long_options(self): """Checks ArgParse manipulation. """ parser = argparse.ArgumentParser() parameter_utils.add_default_input_args(parser) result = parser.parse_args('--input inputfile --informat sdf'.split()) self.assertEquals('inputfile', result.input) self.assertEquals('sdf', result.informat)
def main(): # Example usage: # python -m pipelines.xchem.featurestein_generate -i ../../data/mpro/hits-17.sdf.gz -f mpro-fstein.p global fmaps parser = argparse.ArgumentParser( description='FeatureStein generation with RDKit') parameter_utils.add_default_input_args(parser) parser.add_argument('-f', '--feat-map', default='featurestein.p', help='Name of pickle to generate') parser.add_argument('--metrics', action='store_true', help='Write metrics') args = parser.parse_args() utils.log("FeatureStein Args: ", args) inputs_file, inputs_supplr = rdkit_utils. \ default_open_input(args.input, args.informat) # this does the processing num_mols, num_feats = process(inputs_supplr, args.feat_map) inputs_file.close() if args.metrics: utils.write_metrics( output_base, { '__StatusMessage__': 'Generated ' + num_feats + ' from ' + num_mols + ' molecules', '__InputCount__': num_mols, 'RDKitFeatureMap': num_mols })
def main(): # Example usage: # 1. Create keycloak token: # export KEYCLOAK_TOKEN=$(curl -d "grant_type=password" -d "client_id=fragnet-search" -d "username=<username>" -d "password=<password>" \ # https://squonk.it/auth/realms/squonk/protocol/openid-connect/token 2> /dev/null | jq -r '.access_token') # # 2. Run the module: # python -m pipelines.xchem.fragnet_expand -i ../../data/mpro/hits-17.sdf.gz --token $KEYCLOAK_TOKEN parser = argparse.ArgumentParser( description='Fragnet expand scoring with RDKit') parameter_utils.add_default_input_args(parser) parser.add_argument('--hac-min', type=int, default=3, help='The min change in heavy atom count') parser.add_argument('--hac-max', type=int, default=3, help='The max change in heavy atom count') parser.add_argument('--rac-min', type=int, default=1, help='The min change in ring atom count') parser.add_argument('--rac-max', type=int, default=1, help='The max change in ring atom count') parser.add_argument('--hops', type=int, default=1, help='The number of graph traversals (hops)') parser.add_argument( '-s', '--server', default='https://fragnet-external.xchem-dev.diamond.ac.uk', help='The fragnet search server') parser.add_argument( '--token', help='Keycloak auth token (or specify as KEYCLOAK_TOKEN env variable') parser.add_argument( '--index-as-filename', action='store_true', help='Use the index as the file name instead of the molecule name') args = parser.parse_args() utils.log("FragnetExpand Args: ", args) inputs_file, inputs_supplr = rdkit_utils.default_open_input( args.input, args.informat) if args.token: auth_token = args.token else: auth_token = os.getenv('KEYCLOAK_TOKEN') if not auth_token: utils.log( 'WARNING: not authentication token found in environment variable KEYCLOAK_TOKEN' ) # this does the processing process(inputs_supplr, hac_min=args.hac_min, hac_max=args.hac_max, rac_min=args.rac_min, rac_max=args.rac_max, hops=args.hops, server=args.server, token=auth_token, index_as_filename=args.index_as_filename) inputs_file.close()
def main(): global work_dir parser = argparse.ArgumentParser(description='DLS Deep - pose scoring') parameter_utils.add_default_input_args(parser) parser.add_argument( '--no-gzip', action='store_true', help='Do not compress the output (STDOUT is never compressed') parser.add_argument('-r', '--receptor', help="Receptor file for scoring (PDB or Mol2 format)") parser.add_argument('-o', '--outfile', default='scored_ligands', help="Base file name for results") parser.add_argument('-of', '--outformat', choices=['sdf', 'json'], default='sdf', help="Output format. Defaults to 'sdf'.") parser.add_argument('-w', '--work-dir', default=".", help="Working directory") parser.add_argument('--mock', action='store_true', help='Generate mock scores rather than run on GPU') parser.add_argument('--thin', action='store_true', help='Thin output mode') args = parser.parse_args() utils.log("DLS deep args: ", args) work_dir = args.work_dir informat = args.informat protein = args.receptor ligands = args.input outfile = args.outfile if informat == 'json' or ligands.lower().endswith( '.data') or ligands.lower().endswith('.data.gz'): # we need to write to SDF utils.log("Converting ligands from JSON to SDF") ligands_sdf = "{0}{1}ligands.sdf".format(work_dir, os.path.sep) write_json_as_sdf(ligands, ligands_sdf) elif informat == 'sdf' or ligands.lower().endswith( '.sdf') or ligands.lower().endswith('.sdf.gz'): ligands_sdf = ligands else: raise ValueError("Unexpected input format for ligands") # # Open the output file # s_now = datetime.datetime.utcnow().strftime("%d-%b-%Y %H:%M:%S UTC") # source = 'pipelines/gnina/dls-deep-score.py' # output, WRITER, output_base = \ # rdkit_utils.default_open_output(args.output, "dls-deep-score", args.outformat, # compress=not args.no_gzip, # thinOutput=args.thin, # valueClassMappings={'dls-deep-score': 'java.lang.Float'}, # datasetMetaProps={'created': s_now, # 'source': source, # 'description': 'DLS Deep - pose scoring'} # ) # # PDB_PATH = args.pdb_file # # Close the file # WRITER.close() write_inputs(protein, ligands_sdf) if args.mock: mock_predictions() else: run_predictions() scores = read_predictions() if args.outformat == 'sdf': patch_scores_sdf(ligands_sdf, outfile, scores) elif args.outformat == 'json': patch_scores_json(ligands_sdf, outfile, scores) if args.outformat == 'sdf': if not args.no_gzip: os.system("gzip {0}{1}{2}.sdf".format(work_dir, os.path.sep, outfile))
def main(): ### command line args defintions ######################################### parser = argparse.ArgumentParser(description='RDKit Butina Cluster Matrix') parameter_utils.add_default_input_args(parser) parser.add_argument( '-o', '--output', help= "Base name for output file (no extension). If not defined then SDTOUT is used for the structures and output is used as base name of the other files." ) parser.add_argument('-of', '--outformat', choices=['tsv', 'json'], default='tsv', help="Output format. Defaults to 'tsv'.") parser.add_argument('--meta', action='store_true', help='Write metadata and metrics files') parser.add_argument( '-t', '--threshold', type=float, default=0.7, help='Similarity clustering threshold (1.0 means identical)') parser.add_argument( '-mt', '--matrixThreshold', type=float, default=0.5, help='Threshold for outputting values (1.0 means identical)') parser.add_argument('-d', '--descriptor', type=str.lower, choices=list(cluster_butina.descriptors.keys()), default='rdkit', help='descriptor or fingerprint type (default rdkit)') parser.add_argument('-m', '--metric', type=str.lower, choices=list(cluster_butina.metrics.keys()), default='tanimoto', help='similarity metric (default tanimoto)') parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode') args = parser.parse_args() utils.log("Cluster Matrix Args: ", args) descriptor = cluster_butina.descriptors[args.descriptor] if descriptor is None: raise ValueError('Invalid descriptor name ' + args.descriptor) input, suppl = rdkit_utils.default_open_input(args.input, args.informat) # handle metadata source = "cluster_butina_matrix.py" datasetMetaProps = { "source": source, "description": "Butina clustering using RDKit " + rdBase.rdkitVersion } clsMappings = { "Cluster1": "java.lang.Integer", "Cluster2": "java.lang.Integer", "ID1": "java.lang.String", "ID2": "java.lang.String", "M1": "java.lang.String", "M2": "java.lang.String", "Similarity": "java.lang.Float" } fieldMetaProps = [{ "fieldName": "Cluster", "values": { "source": source, "description": "Cluster number" } }] fieldNames = collections.OrderedDict() fieldNames['ID1'] = 'ID1' fieldNames['ID2'] = 'ID2' fieldNames['Cluster1'] = 'Cluster1' fieldNames['Cluster2'] = 'Cluster2' fieldNames['Similarity'] = 'Similarity' fieldNames['M1'] = 'M1' fieldNames['M2'] = 'M2' writer,output_base = utils.\ create_simple_writer(args.output, 'cluster_butina_matrix', args.outformat, fieldNames, valueClassMappings=clsMappings, datasetMetaProps=datasetMetaProps, fieldMetaProps=fieldMetaProps) ### generate fingerprints mols = [x for x in suppl if x is not None] fps = [descriptor(x) for x in mols] input.close() ### do clustering utils.log("Clustering with descriptor", args.descriptor, "metric", args.metric, "and threshold", args.threshold) clusters, dists, matrix, = cluster_butina.ClusterFps( fps, args.metric, 1.0 - args.threshold) utils.log("Found", len(clusters), "clusters") MapClusterToMols(clusters, mols) if not args.quiet: utils.log("Clusters:", clusters) writer.writeHeader() size = len(matrix) #utils.log("len(matrix):", size) count = 0 for i in range(size): #utils.log("element",i, "has length", len(matrix[i])) writer.write(create_values(mols, i, i, 1.0)) count += 1 for j in range(len(matrix[i])): #utils.log("writing",i,j) dist = matrix[i][j] if dist > args.matrixThreshold: # the matrix is the lower left segment without the diagonal x = j y = i + 1 writer.write(create_values(mols, x, y, dist)) writer.write(create_values(mols, y, x, dist)) count += 2 writer.write(create_values(mols, size, size, 1.0)) writer.writeFooter() writer.close() if args.meta: utils.write_metrics(output_base, { '__InputCount__': i, '__OutputCount__': count, 'RDKitCluster': i })