simil_metric = 'Dice'
    if options.simil: simil_metric = options.simil
    outpath = path
    outpath_set = False
    if options.outpath:
        outpath_set = True
        outpath = path+options.outpath

    # check for sensible input
    if outpath_set: scor.checkPath(outpath, 'output')
    scor.checkSimil(simil_metric)

    # default machine-learning method variables
    ml_dict = dict(alpha=1.0, binarize=None, fit_prior=True)
    if options.ml:
        ml_dict = ml_func.readMLFile(ml_dict, read_dict, path+options.ml)

    # initialize machine-learning method
    ml = BernoulliNB(alpha=ml_dict['alpha'], binarize=ml_dict['binarize'], fit_prior=ml_dict['fit_prior'])

    # loop over targets
    for target in conf.set_data:
        print target

        # read in training actives and calculate fps
        actives = cPickle.load(open(inpath_cmp+'ChEMBL_II/Target_no_'+str(target)+'.pkl', 'r'))
        for k in actives.keys():
            for i,m in enumerate(actives[k]):
                fp_dict = scor.getFP(fp_build, m[1])
                actives[k][i] = [str(target)+'_'+str(k)+'_A_'+str(i+1), fp_dict]
Пример #2
0
    # check for sensible input
    if outpath_set: scor.checkPath(outpath, 'output')
    scor.checkSimil(simil_metric)
    scor.checkQueryMols(num_query_mols, conf.list_num_query_mols)

    # default machine-learning method variables
    ml_dict = dict(criterion='gini',
                   max_features='auto',
                   n_jobs=1,
                   max_depth=10,
                   min_samples_split=2,
                   min_samples_leaf=1,
                   num_estimators=100)
    if options.ml:
        ml_dict = ml_func.readMLFile(ml_dict, read_dict, path + options.ml)

    # initialize machine-learning method
    ml = RandomForestClassifier(criterion=ml_dict['criterion'],
                                max_features=ml_dict['max_features'],
                                min_samples_split=ml_dict['min_samples_split'],
                                max_depth=ml_dict['max_depth'],
                                min_samples_leaf=ml_dict['min_samples_leaf'],
                                n_estimators=ml_dict['num_estimators'],
                                n_jobs=ml_dict['n_jobs'])

    # loop over data-set sources
    for dataset in conf.set_data.keys():
        print dataset
        # loop over targets
        for target in conf.set_data[dataset]['ids']: