# optional arguments
    do_append = False
    if options.do_append: do_append = options.do_append
    simil_metric = 'Dice'
    if options.simil: simil_metric = options.simil
    outpath = path
    outpath_set = False
    if options.outpath:
        outpath_set = True
        outpath = path+options.outpath

    # check for sensible input
    fp_names = scor.checkFPFile(fp_file)
    if not fp_names: raise ValueError('No fingerprints given in', fp_file)
    if outpath_set: scor.checkPath(outpath, 'output')
    scor.checkSimil(simil_metric)
    scor.checkQueryMols(num_query_mols, conf.list_num_query_mols)

    # loop over data-set sources
    for dataset in conf.set_data.keys():
        print dataset
        # loop over targets
        for target in conf.set_data[dataset]['ids']:
            print target

            # read in actives and calculate fps
            actives = []
            for line in gzip.open(inpath_cmp+dataset+'/cmp_list_'+dataset+'_'+str(target)+'_actives.dat.gz', 'r'):
                if line[0] != '#': 
                    # structure of line: [external ID, internal ID, SMILES]]
        do_append = options.do_append
    simil_metric = "Dice"
    if options.simil:
        simil_metric = options.simil
    outpath = path
    outpath_set = False
    if options.outpath:
        outpath_set = True
        # outpath = path + options.outpath
        # changed to absolute , by @Matt
        outpath = options.outpath
        os.system("mkdir -p {}".format(outpath))

    # check for sensible input
    if outpath_set:
        scor.checkPath(outpath, "output")
    scor.checkSimil(simil_metric)
    scor.checkQueryMols(num_query_mols, conf.list_num_query_mols)

    # default machine-learning method variables
    ml_dict = dict(
        criterion="gini",
        max_features="auto",
        n_jobs=1,
        max_depth=10,
        min_samples_split=2,
        min_samples_leaf=1,
        num_estimators=100,
    )
    if options.ml:
        ml_dict = ml_func.readMLFile(ml_dict, read_dict, path + options.ml)
    # optional arguments
    do_append = False
    if options.do_append: do_append = options.do_append
    simil_metric = 'Dice'
    if options.simil: simil_metric = options.simil
    outpath = path
    outpath_set = False
    if options.outpath:
        outpath_set = True
        outpath = path + options.outpath

    # check for sensible input
    fp_names = scor.checkFPFile(fp_file)
    if not fp_names: raise ValueError('No fingerprints given in', fp_file)
    if outpath_set: scor.checkPath(outpath, 'output')
    scor.checkSimil(simil_metric)
    scor.checkQueryMols(num_query_mols, conf.list_num_query_mols)

    # loop over data-set sources
    for dataset in conf.set_data.keys():
        print dataset
        # loop over targets
        for target in conf.set_data[dataset]['ids']:
            print target

            # read in actives and calculate fps
            actives = []
            for line in gzip.open(
                    inpath_cmp + dataset + '/cmp_list_' + dataset + '_' +
                    str(target) + '_actives.dat.gz', 'r'):
    dest="do_append",
    action="store_true",
    help="append to the output file (default: False)",
)


######################## MAIN PART ###########################
if __name__ == "__main__":

    # read in command line options
    (options, args) = parser.parse_args()
    # required arguments
    if options.inpath:
        inpath = [path + i for i in options.inpath]
        for inp in inpath:
            scor.checkPath(inp, "input")
    else:
        raise RuntimeError(
            "one or more of the required options was not given!"
        )

    # optional arguments
    method = "max"
    if options.method:
        if options.method not in ["max", "ave"]:
            raise ValueError(
                "method is unkown. supported methods are: max and ave"
            )
        else:
            method = options.method
    remove_fps = []
Пример #5
0
parser.add_option("-a",
                  "--append",
                  dest="do_append",
                  action="store_true",
                  help="append to the output file (default: False)")

######################## MAIN PART ###########################
if __name__ == '__main__':

    # read in command line options
    (options, args) = parser.parse_args()
    # required arguments
    if options.inpath:
        inpath = [path + i for i in options.inpath]
        for inp in inpath:
            scor.checkPath(inp, 'input')
    else:
        raise RuntimeError(
            'one or more of the required options was not given!')

    # optional arguments
    method = 'max'
    if options.method:
        if options.method not in ['max', 'ave']:
            raise ValueError(
                'method is unkown. supported methods are: max and ave')
        else:
            method = options.method
    remove_fps = []
    if options.rm_file:
        remove_fps = scor.readFPs(path + options.rm_file)
        do_append = options.do_append
    simil_metric = "Dice"
    if options.simil:
        simil_metric = options.simil
    outpath = path
    outpath_set = False
    if options.outpath:
        outpath_set = True
        outpath = path + options.outpath

    # check for sensible input
    fp_names = scor.checkFPFile(fp_file)
    if not fp_names:
        raise ValueError("No fingerprints given in", fp_file)
    if outpath_set:
        scor.checkPath(outpath, "output")
    scor.checkSimil(simil_metric)

    # loop over targets
    for target in conf.set_data:
        print target

        # read in training actives and calculate fps
        actives = cPickle.load(open(inpath_cmp + "ChEMBL_II/Target_no_" + str(target) + ".pkl", "r"))
        for k in actives.keys():
            for i, m in enumerate(actives[k]):
                fp_dict = scor.getFPDict(fp_names, m[1])
                actives[k][i] = [str(target) + "_" + str(k) + "_A_" + str(i + 1), fp_dict]

        # read in test actives and calculate fps
        div_actives = []
            method = options.method
            MODE = "SIM"
        else:
            raise RuntimeError("the method option was not recognized")
    else:
        method = "Tanimoto"
        MODE = "SIM"

    DEFAULT_OUTDIR = os.path.join(os.getcwd(), "benchmark_results/")
    if options.outdir:
        outdir = options.outdir
    else:
        outdir = DEFAULT_OUTDIR
    if not os.path.exists(outdir):
        os.system("mkdir {path}".format(path=outdir))
    scor.checkPath(outdir, "outdir")

    ### run the required benchmark scripts ###

    # first score
    os.chdir(scoring_path)
    print("cwd: ", os.getcwd())
    if MODE == "ML":
        # need to change dir
        os.system(
            "python calculate_scored_lists_{ml_method}.py -n {ml_num} -f {ml_fp} -s {ml_sim} -o {ml_outpath}".format(
                ml_method=method,
                ml_num=num_query_mols,
                ml_fp=fp,
                ml_sim="Tanimoto",
                ml_outpath=os.path.join(
Пример #8
0
parser.add_option("-m", "--method", dest="method", help="method for data fusion (max or ave, default: max)")
parser.add_option("-r", "--remove", dest="rm_file", metavar="FILE", help="FILE containing the fingerprints to be left out (default: all fingerprints are read)")
parser.add_option("-o", "--outpath", dest="outpath", metavar="PATH", help="relative output PATH (default: pwd)")
parser.add_option("-a", "--append", dest="do_append", action="store_true", help="append to the output file (default: False)")


######################## MAIN PART ###########################
if __name__=='__main__':

    # read in command line options
    (options, args) = parser.parse_args()
    # required arguments
    if options.inpath:
        inpath = [path+i for i in options.inpath]
        for inp in inpath:
            scor.checkPath(inp, 'input')
    else:
        raise RuntimeError('one or more of the required options was not given!')

    # optional arguments
    method = 'max'
    if options.method:
        if options.method not in ['max', 'ave']:
            raise ValueError('method is unkown. supported methods are: max and ave')
        else:
            method = options.method
    remove_fps = []
    if options.rm_file:
        remove_fps = scor.readFPs(path+options.rm_file)
    outpath = path
    if options.outpath: