def main():

    ### command line args definitions #########################################

    parser = argparse.ArgumentParser(description='RDKit Input Splitter')
    parameter_utils.add_default_input_args(parser)
    parser.add_argument('-o',
                        '--output',
                        required=True,
                        help="Directory name for output files (no extension).")
    parser.add_argument(
        '-f',
        '--field',
        required=True,
        help=
        "field to use to split input. Output files will have the name of this field's value"
    )
    parser.add_argument('--meta',
                        action='store_true',
                        help='Write metadata and metrics files')

    args = parser.parse_args()
    utils.log("Splitter Args: ", args)

    filenames = split(args.input, args.informat, args.field, args.output,
                      args.meta)
    utils.log("Files generated:", " ".join(filenames))
    def test_add_default_input_args_with_long_options(self):
        """Checks ArgParse manipulation.
        """
        parser = argparse.ArgumentParser()

        parameter_utils.add_default_input_args(parser)

        result = parser.parse_args('--input inputfile --informat sdf'.split())
        self.assertEquals('inputfile', result.input)
        self.assertEquals('sdf', result.informat)
def main():

    # Example usage:
    # python -m pipelines.xchem.featurestein_generate -i ../../data/mpro/hits-17.sdf.gz -f mpro-fstein.p

    global fmaps

    parser = argparse.ArgumentParser(
        description='FeatureStein generation with RDKit')
    parameter_utils.add_default_input_args(parser)
    parser.add_argument('-f',
                        '--feat-map',
                        default='featurestein.p',
                        help='Name of pickle to generate')
    parser.add_argument('--metrics', action='store_true', help='Write metrics')

    args = parser.parse_args()
    utils.log("FeatureStein Args: ", args)

    inputs_file, inputs_supplr = rdkit_utils. \
        default_open_input(args.input, args.informat)

    # this does the processing
    num_mols, num_feats = process(inputs_supplr, args.feat_map)

    inputs_file.close()

    if args.metrics:
        utils.write_metrics(
            output_base, {
                '__StatusMessage__':
                'Generated ' + num_feats + ' from ' + num_mols + ' molecules',
                '__InputCount__':
                num_mols,
                'RDKitFeatureMap':
                num_mols
            })
示例#4
0
def main():
    # Example usage:
    # 1. Create keycloak token:
    # export KEYCLOAK_TOKEN=$(curl -d "grant_type=password" -d "client_id=fragnet-search" -d "username=<username>" -d "password=<password>" \
    #   https://squonk.it/auth/realms/squonk/protocol/openid-connect/token 2> /dev/null | jq -r '.access_token')
    #
    # 2. Run the module:
    #  python -m pipelines.xchem.fragnet_expand -i ../../data/mpro/hits-17.sdf.gz --token $KEYCLOAK_TOKEN

    parser = argparse.ArgumentParser(
        description='Fragnet expand scoring with RDKit')
    parameter_utils.add_default_input_args(parser)
    parser.add_argument('--hac-min',
                        type=int,
                        default=3,
                        help='The min change in heavy atom count')
    parser.add_argument('--hac-max',
                        type=int,
                        default=3,
                        help='The max change in heavy atom count')
    parser.add_argument('--rac-min',
                        type=int,
                        default=1,
                        help='The min change in ring atom count')
    parser.add_argument('--rac-max',
                        type=int,
                        default=1,
                        help='The max change in ring atom count')
    parser.add_argument('--hops',
                        type=int,
                        default=1,
                        help='The number of graph traversals (hops)')
    parser.add_argument(
        '-s',
        '--server',
        default='https://fragnet-external.xchem-dev.diamond.ac.uk',
        help='The fragnet search server')
    parser.add_argument(
        '--token',
        help='Keycloak auth token (or specify as KEYCLOAK_TOKEN env variable')
    parser.add_argument(
        '--index-as-filename',
        action='store_true',
        help='Use the index as the file name instead of the molecule name')

    args = parser.parse_args()
    utils.log("FragnetExpand Args: ", args)

    inputs_file, inputs_supplr = rdkit_utils.default_open_input(
        args.input, args.informat)

    if args.token:
        auth_token = args.token
    else:
        auth_token = os.getenv('KEYCLOAK_TOKEN')
    if not auth_token:
        utils.log(
            'WARNING: not authentication token found in environment variable KEYCLOAK_TOKEN'
        )

    # this does the processing
    process(inputs_supplr,
            hac_min=args.hac_min,
            hac_max=args.hac_max,
            rac_min=args.rac_min,
            rac_max=args.rac_max,
            hops=args.hops,
            server=args.server,
            token=auth_token,
            index_as_filename=args.index_as_filename)

    inputs_file.close()
示例#5
0
def main():

    global work_dir

    parser = argparse.ArgumentParser(description='DLS Deep - pose scoring')
    parameter_utils.add_default_input_args(parser)
    parser.add_argument(
        '--no-gzip',
        action='store_true',
        help='Do not compress the output (STDOUT is never compressed')
    parser.add_argument('-r',
                        '--receptor',
                        help="Receptor file for scoring (PDB or Mol2 format)")
    parser.add_argument('-o',
                        '--outfile',
                        default='scored_ligands',
                        help="Base file name for results")
    parser.add_argument('-of',
                        '--outformat',
                        choices=['sdf', 'json'],
                        default='sdf',
                        help="Output format. Defaults to 'sdf'.")
    parser.add_argument('-w',
                        '--work-dir',
                        default=".",
                        help="Working directory")
    parser.add_argument('--mock',
                        action='store_true',
                        help='Generate mock scores rather than run on GPU')
    parser.add_argument('--thin', action='store_true', help='Thin output mode')

    args = parser.parse_args()
    utils.log("DLS deep args: ", args)

    work_dir = args.work_dir

    informat = args.informat
    protein = args.receptor
    ligands = args.input
    outfile = args.outfile

    if informat == 'json' or ligands.lower().endswith(
            '.data') or ligands.lower().endswith('.data.gz'):
        # we need to write to SDF
        utils.log("Converting ligands from JSON to SDF")
        ligands_sdf = "{0}{1}ligands.sdf".format(work_dir, os.path.sep)
        write_json_as_sdf(ligands, ligands_sdf)

    elif informat == 'sdf' or ligands.lower().endswith(
            '.sdf') or ligands.lower().endswith('.sdf.gz'):
        ligands_sdf = ligands

    else:
        raise ValueError("Unexpected input format for ligands")

    # # Open the output file
    # s_now = datetime.datetime.utcnow().strftime("%d-%b-%Y %H:%M:%S UTC")
    # source = 'pipelines/gnina/dls-deep-score.py'
    # output, WRITER, output_base = \
    #     rdkit_utils.default_open_output(args.output, "dls-deep-score", args.outformat,
    #                                     compress=not args.no_gzip,
    #                                     thinOutput=args.thin,
    #                                     valueClassMappings={'dls-deep-score': 'java.lang.Float'},
    #                                     datasetMetaProps={'created': s_now,
    #                                                       'source': source,
    #                                                       'description': 'DLS Deep - pose scoring'}
    #                                     )
    #
    # PDB_PATH = args.pdb_file
    #    # Close the file
    # WRITER.close()

    write_inputs(protein, ligands_sdf)
    if args.mock:
        mock_predictions()
    else:
        run_predictions()
    scores = read_predictions()

    if args.outformat == 'sdf':
        patch_scores_sdf(ligands_sdf, outfile, scores)
    elif args.outformat == 'json':
        patch_scores_json(ligands_sdf, outfile, scores)

    if args.outformat == 'sdf':
        if not args.no_gzip:
            os.system("gzip {0}{1}{2}.sdf".format(work_dir, os.path.sep,
                                                  outfile))
def main():

    ### command line args defintions #########################################

    parser = argparse.ArgumentParser(description='RDKit Butina Cluster Matrix')
    parameter_utils.add_default_input_args(parser)
    parser.add_argument(
        '-o',
        '--output',
        help=
        "Base name for output file (no extension). If not defined then SDTOUT is used for the structures and output is used as base name of the other files."
    )
    parser.add_argument('-of',
                        '--outformat',
                        choices=['tsv', 'json'],
                        default='tsv',
                        help="Output format. Defaults to 'tsv'.")
    parser.add_argument('--meta',
                        action='store_true',
                        help='Write metadata and metrics files')
    parser.add_argument(
        '-t',
        '--threshold',
        type=float,
        default=0.7,
        help='Similarity clustering threshold (1.0 means identical)')
    parser.add_argument(
        '-mt',
        '--matrixThreshold',
        type=float,
        default=0.5,
        help='Threshold for outputting values (1.0 means identical)')
    parser.add_argument('-d',
                        '--descriptor',
                        type=str.lower,
                        choices=list(cluster_butina.descriptors.keys()),
                        default='rdkit',
                        help='descriptor or fingerprint type (default rdkit)')
    parser.add_argument('-m',
                        '--metric',
                        type=str.lower,
                        choices=list(cluster_butina.metrics.keys()),
                        default='tanimoto',
                        help='similarity metric (default tanimoto)')
    parser.add_argument('-q',
                        '--quiet',
                        action='store_true',
                        help='Quiet mode')

    args = parser.parse_args()
    utils.log("Cluster Matrix Args: ", args)

    descriptor = cluster_butina.descriptors[args.descriptor]
    if descriptor is None:
        raise ValueError('Invalid descriptor name ' + args.descriptor)

    input, suppl = rdkit_utils.default_open_input(args.input, args.informat)

    # handle metadata
    source = "cluster_butina_matrix.py"
    datasetMetaProps = {
        "source": source,
        "description": "Butina clustering using RDKit " + rdBase.rdkitVersion
    }
    clsMappings = {
        "Cluster1": "java.lang.Integer",
        "Cluster2": "java.lang.Integer",
        "ID1": "java.lang.String",
        "ID2": "java.lang.String",
        "M1": "java.lang.String",
        "M2": "java.lang.String",
        "Similarity": "java.lang.Float"
    }
    fieldMetaProps = [{
        "fieldName": "Cluster",
        "values": {
            "source": source,
            "description": "Cluster number"
        }
    }]

    fieldNames = collections.OrderedDict()
    fieldNames['ID1'] = 'ID1'
    fieldNames['ID2'] = 'ID2'
    fieldNames['Cluster1'] = 'Cluster1'
    fieldNames['Cluster2'] = 'Cluster2'
    fieldNames['Similarity'] = 'Similarity'
    fieldNames['M1'] = 'M1'
    fieldNames['M2'] = 'M2'

    writer,output_base = utils.\
        create_simple_writer(args.output, 'cluster_butina_matrix',
                             args.outformat, fieldNames,
                             valueClassMappings=clsMappings,
                             datasetMetaProps=datasetMetaProps,
                             fieldMetaProps=fieldMetaProps)

    ### generate fingerprints
    mols = [x for x in suppl if x is not None]
    fps = [descriptor(x) for x in mols]
    input.close()

    ### do clustering
    utils.log("Clustering with descriptor", args.descriptor, "metric",
              args.metric, "and threshold", args.threshold)
    clusters, dists, matrix, = cluster_butina.ClusterFps(
        fps, args.metric, 1.0 - args.threshold)
    utils.log("Found", len(clusters), "clusters")

    MapClusterToMols(clusters, mols)

    if not args.quiet:
        utils.log("Clusters:", clusters)

    writer.writeHeader()

    size = len(matrix)
    #utils.log("len(matrix):", size)
    count = 0
    for i in range(size):
        #utils.log("element",i, "has length", len(matrix[i]))
        writer.write(create_values(mols, i, i, 1.0))
        count += 1
        for j in range(len(matrix[i])):
            #utils.log("writing",i,j)
            dist = matrix[i][j]
            if dist > args.matrixThreshold:
                # the matrix is the lower left segment without the diagonal
                x = j
                y = i + 1
                writer.write(create_values(mols, x, y, dist))
                writer.write(create_values(mols, y, x, dist))
                count += 2
    writer.write(create_values(mols, size, size, 1.0))

    writer.writeFooter()
    writer.close()

    if args.meta:
        utils.write_metrics(output_base, {
            '__InputCount__': i,
            '__OutputCount__': count,
            'RDKitCluster': i
        })