Exemplo n.º 1
0
def single_run(dkey, train_size, param, seed, profile=False):

    print(
        "Processing data set %s with train_size %s, seed %s, and parameters %s ..."
        % (str(dkey), str(train_size), str(seed), str(param)))

    if dkey == "covtype":
        Xtrain, ytrain, Xtest, ytest = covtype(train_size=train_size,
                                               seed=seed)
    elif dkey == "higgs":
        Xtrain, ytrain, Xtest, ytest = higgs(train_size=train_size, seed=seed)
    elif dkey == "susy":
        Xtrain, ytrain, Xtest, ytest = susy(train_size=train_size, seed=seed)
    else:
        raise Exception("Unknown data set!")

    print("")
    print("Number of training patterns:\t%i" % Xtrain.shape[0])
    print("Number of test patterns:\t%i" % Xtest.shape[0])
    print("Dimensionality of the data:\t%i\n" % Xtrain.shape[1])

    model = WoodClassifier(n_estimators=param['n_estimators'],
                           criterion="gini",
                           max_features=param['max_features'],
                           min_samples_split=2,
                           n_jobs=param['n_jobs'],
                           seed=seed,
                           bootstrap=param['bootstrap'],
                           tree_traversal_mode="dfs",
                           tree_type=param['tree_type'],
                           min_samples_leaf=1,
                           float_type="double",
                           max_depth=None,
                           verbose=0)

    if profile == True:
        import yep
        assert param['n_jobs'] == 1
        yep.start("train.prof")

    # training
    fit_start_time = time.time()
    model.fit(Xtrain, ytrain)
    fit_end_time = time.time()
    if profile == True:
        yep.stop()
    ypreds_train = model.predict(Xtrain)

    # testing
    test_start_time = time.time()
    ypred_test = model.predict(Xtest)
    test_end_time = time.time()

    results = {}
    results['dataset'] = dkey
    results['param'] = param
    results['training_time'] = fit_end_time - fit_start_time
    results['testing_time'] = test_end_time - test_start_time
    print("Training time:     %f" % results['training_time'])
    print("Testing time:      %f" % results['testing_time'])

    evaluate(ypreds_train, ytrain, results, "training")
    evaluate(ypred_test, ytest, results, "testing")

    fname = '%s_%s_%s_%s_%s_%s.json' % (
        str(param['n_estimators']),
        str(param['max_features']),
        str(param['n_jobs']),
        str(param['bootstrap']),
        str(param['tree_type']),
        str(seed),
    )
    fname = os.path.join(params.odir, str(dkey), str(train_size), "wood",
                         fname)
    ensure_dir_for_file(fname)
    with open(fname, 'w') as fp:
        json.dump(results, fp)
Exemplo n.º 2
0
    Xtest = Xtest.astype(np.float32)
    ytest = ytest.astype(np.float32)

print("")
print("Number of training patterns:\t%i" % Xtrain.shape[0])
print("Number of test patterns:\t%i" % Xtest.shape[0])
print("Dimensionality of the data:\t%i" % Xtrain.shape[1])
model = WoodClassifier(n_estimators=nr_tree,
                       criterion="gini",
                       max_features=None,
                       min_samples_split=2,
                       n_jobs=4,
                       seed=seed,
                       bootstrap=True,
                       tree_traversal_mode="dfs",
                       tree_type="standard",
                       min_samples_leaf=1,
                       float_type="double",
                       max_depth=None,
                       verbose=1)

fit_start_time = time.time()
model.fit(Xtrain, ytrain)
fit_end_time = time.time()

if (f_type == 0):
    file_name = "./models/model_susy" + str(nr_tree) + "tree_4jobs.data"
elif (f_type == 1):
    file_name = "./models/model_" + str(nr_tree) + "tree_4jobs.data"

model.save(file_name)