Пример #1
0
def deserialize_random_forest(model_dict):
    model = RandomForestClassifier(**model_dict['params'])
    estimators = [deserialize_decision_tree(decision_tree) for decision_tree in model_dict['estimators_']]
    model.estimators_ = np.array(estimators)

    model.classes_ = np.array(model_dict['classes_'])
    model.n_features_ = model_dict['n_features_']
    model.n_outputs_ = model_dict['n_outputs_']
    model.max_depth = model_dict['max_depth']
    model.min_samples_split = model_dict['min_samples_split']
    model.min_samples_leaf = model_dict['min_samples_leaf']
    model.min_weight_fraction_leaf = model_dict['min_weight_fraction_leaf']
    model.max_features = model_dict['max_features']
    model.max_leaf_nodes = model_dict['max_leaf_nodes']
    model.min_impurity_decrease = model_dict['min_impurity_decrease']
    model.min_impurity_split = model_dict['min_impurity_split']

    if 'oob_score_' in model_dict:
        model.oob_score_ = model_dict['oob_score_']
    if 'oob_decision_function_' in model_dict:
        model.oob_decision_function_ = model_dict['oob_decision_function_']

    if isinstance(model_dict['n_classes_'], list):
        model.n_classes_ = np.array(model_dict['n_classes_'])
    else:
        model.n_classes_ = model_dict['n_classes_']

    return model
Пример #2
0
def main():
    _n_classes = 2
    sigmas = [0.3, 0.7, 1., 3.5]
    args = parse_args()
    path = args.path
    _train_split = args.train_split
    if args.test is None:
        whole = args.whole
        if whole:
            rfc = RandomForestClassifier(n_estimators=200, n_jobs=-1)
            rfc.n_classes_ = _n_classes
            rfc = train_forest_whole(rfc, path)
        else:
            batch_size = args.batch_size
            n_estimators = 100
            shape = Brats15NumpyDataset(path, True, 1., -1)[0][0].shape[1:]
            #np_transform = [crop, resize]
            #np_params = [{'size': np.multiply(shape, 0.8).astype(int)},
            #             {'output_shape': np.multiply(shape, 0.4).astype(int),
            #              'mode': 'constant'}]
            #np_transform = None
            #np_params = None
            np_transform = [crop]
            np_params = [{'size': np.multiply(shape, 0.8).astype(int)}]

            dset_train = Brats15NumpyDataset(path,
                                             True,
                                             _train_split,
                                             _random_state,
                                             transform=None,
                                             np_transform=np_transform,
                                             np_transform_params=np_params,
                                             tensor_conversion=False)

            rfc = RandomForestClassifier(n_estimators=0,
                                         warm_start=True,
                                         n_jobs=-1)
            rfc.n_classes_ = _n_classes
            rfc = train_forest_sequential(rfc,
                                          dset_train,
                                          sigmas,
                                          batch_size=batch_size,
                                          n_estimators=n_estimators)
        joblib.dump(rfc, 'rfc_test_{}.pkl'.format(int(whole)))
    else:
        dset_test = Brats15NumpyDataset(path,
                                        False,
                                        _train_split,
                                        _random_state,
                                        transform=None,
                                        np_transform=None,
                                        np_transform_params=None,
                                        tensor_conversion=False)
        test(dset_test,
             sigmas,
             load_path=args.test,
             sample_imgs=args.sample_imgs)
Пример #3
0
def ReprodIndividualsFromRF(list_indiv, max_id, options):

    list_indiv = list(list_indiv)
    rf = RandomForestClassifier(n_estimators=len(list_indiv))
    trees = list()
    for indiv in list_indiv:
        trees.append(indiv.clf)

    rf.estimators_ = trees
    rf.n_classes_ = trees[0].n_classes_
    rf.classes_ = trees[0].classes_

    new_dt = eqtree_rec_rf(rf,
                           0,
                           max_depth=options['max_depth'],
                           smallest_tree=False)

    new_id = max_id + 1

    indiv3 = genetic.individual(new_dt,
                                new_id,
                                type_rf=False,
                                alpha=options['alpha'],
                                evaluate_on_data=options['on_data'],
                                X=options['X'],
                                y=options['y'])

    return indiv3
Пример #4
0
def train_random_forest(X, Y, estimators, classes, features):
    train_x = X[0:3600, :]
    train_y = Y[0:3600]
    validate_x = X[3600:, :]
    validate_y = Y[3600:]
    clf = RandomForestClassifier(n_estimators=estimators)
    clf.n_classes_ = classes
    clf.n_features_ = features
    clf.fit(train_x, train_y)
    predictions = clf.predict(validate_x).reshape(400, 1)
    result = sum([1 if predictions[i] == validate_y[i] else 0 for i in range(validate_y.shape[0])]) / \
             validate_y.shape[0] * 100
    return result
def random_forest(X, Y, n_samples):
    clf = RandomForestClassifier(n_estimators=500,
                                 max_depth=10,
                                 random_state=0)
    clf.n_classes_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

    clf.fit(X[:n_samples // 2], Y[:n_samples // 2])
    start_time = time.time()
    predicted = clf.predict(X[n_samples // 2:])
    elapsed_time = time.time() - start_time

    expected = Y[n_samples // 2:]

    return [elapsed_time, sum(expected == predicted) * 1. / len(expected)]
Пример #6
0
def learn(train_set):
    clf = RandomForestClassifier(n_estimators=150)
    clf.n_classes_ = len(object_types)

    # x -- 2d array, x[i] -- list of feature values for segment  i
    # y -- 1d array, y[i] -- true class of segment  i
    x = []
    y = []

    for num in train_set:
        x += get_features_value(num)
        y += get_segments_classes(num)

    clf.fit(x, y)
    cPickle.dump(clf, open("classifier", "wb+"))
Пример #7
0
def build_classifier(trees):
    def build_decision_tree(t):
        dt = DecisionTreeClassifier(random_state=0)
        dt.n_features_ = t.n_features
        dt.n_outputs_ = t.n_outputs
        dt.n_classes_ = t.n_classes[0]
        dt.classes_ = np.array([x for x in range(dt.n_classes_)])
        dt.tree_ = t
        return dt

    if len(trees) > 1:
        clf = RandomForestClassifier(random_state=0, n_estimators=len(trees))
        clf.estimators_ = [build_decision_tree(t) for t in trees]
        clf.n_features_ = trees[0].n_features
        clf.n_outputs_ = trees[0].n_outputs
        clf.n_classes_ = trees[0].n_classes[0]
        clf.classes_ = np.array([x for x in range(clf.n_classes_)])
    else:
        clf = build_decision_tree(trees[0])
    return clf
Пример #8
0
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import sys
sys.path.append('..')
import data_loader
from data_loader import *

classifier = RandomForestClassifier(
    n_estimators=20,
    min_samples_leaf=200
)
classifier.n_classes_ = 2

classifier.fit(train_data, label_data.ravel())

prediction = classifier.predict(test)

frame = pd.DataFrame(data=prediction)
frame.index += 1

frame.to_csv(
    "random_forest.csv", index=True, header=['Solution'], index_label='Id')

if __name__ == "__main__":
    pass
Пример #9
0
	X[i,:]=image
	#images[i,:,:] = image








#print(X)
#print(y)

clf = RandomForestClassifier(n_estimators=500, max_depth=10, random_state=0)
clf.n_classes_=[0,1,2,3,4,5,6,7,8,9]
#clf.n_classes_=[0,1,2,3,4,5,6,7,8,9]


clf.fit(X[:n_samples //2], Y[:n_samples // 2])

predicted = clf.predict(X[n_samples//2:])
expected = Y[n_samples//2:]
print("Classification report for classifier %s:\n%s\n"
      % (clf, metrics.classification_report(expected, predicted)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))
images_and_predictions = list(zip(images[n_samples // 2:], predicted))
for index, (image, prediction) in enumerate(images_and_predictions[:25]):
    plt.subplot(5, 5, index+1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
Пример #10
0
index = 0
for tweet in tweets:
    words = tweet.split(' ')
    for word in words:
        tweetX[index][bagOfWords[word]] += 1
    index += 1

#75-25 Train-Test split results
X_train, X_test, y_train, y_test = train_test_split(tweetX,
                                                    labels,
                                                    test_size=0.25,
                                                    random_state=42)

rf = RandomForestClassifier(n_estimators=50)
rf.classes_ = [0, 1, -1]
rf.n_classes_ = 3
rf.fit(X_train, y_train)

a = rf.predict(X_test)

print 'Train-Test Split Results'
print np.sum(a == y_test) * 100.0 / len(y_test)

#Cross Validation Results
'''
rf = RandomForestClassifier(n_estimators=50)
rf.classes_ = [0,1,-1]
rf.n_classes_  = 3
rf.fit(tweetX,labels)

Пример #11
0
    RF = RandomForestClassifier(n_estimators=rfSize)
    RF.fit(train_x, train_y)
    RF_path = model_path + '/RF.m'
    joblib.dump(RF, RF_path)

    # BRAF
    rf3 = RandomForestClassifier(n_estimators=rf2_size)
    rf3.fit(training_c_x, training_c_y)
    rf3_path = model_path + '/rf3.m'
    joblib.dump(rf3, rf3_path)

    RF1 = RandomForestClassifier(n_estimators=rfSize)
    Gobaltree = rf1.estimators_ + rf3.estimators_
    RF1.estimators_ = Gobaltree
    RF1.classes_ = rf1.classes_
    RF1.n_classes_ = rf1.n_classes_
    RF1.n_outputs_ = rf1.n_outputs_
    RF1_path = model_path + '/braf.m'
    joblib.dump(RF1, RF1_path)

    # DBRF
    RF2 = RandomForestClassifier(n_estimators=rfSize)
    mod_Gobaltree = rf1.estimators_ + rf2.estimators_
    RF2.estimators_ = mod_Gobaltree
    RF2.classes_ = rf2.classes_
    RF2.n_classes_ = rf2.n_classes_
    RF2.n_outputs_ = rf2.n_outputs_
    RF2_path = model_path + '/borderlindbscan.m'
    joblib.dump(RF2, RF2_path)

    from sklearn import metrics
def Train1(X, y):
	rfc = RandomForestClassifier(n_estimators=10, oob_score=True)
	rfc.n_classes_ = 3
	model = rfc.fit(X, y)
	return model
def Train_Kfold(X, y, K):
	#y = np.array(y)
	kf = KFold(X.shape[0], n_folds = K)

	record = {}

	k = 0
	for train_index, test_index in kf:
		k = k + 1
		rfc = RandomForestClassifier(n_estimators=5, oob_score=True)
		rfc.n_classes_ = 3
		model = rfc.fit(X[train_index], y[train_index])
		pred = model.predict(X[test_index])
		count = 0
		#AB: predicted as "1" while really is "2"
		AA, AB, AC, BA, BB, BC, CA, CB, CC = 0,0,0,0,0,0,0,0,0
		TA, TB, TC, PA, PB, PC = 0,0,0,0,0,0
		for i in range(len(pred)):
			if pred[i] == '1' and y[test_index][i] == '1':
				AA = AA + 1
				PA = PA + 1
				TA = TA + 1
			if pred[i] == '1' and y[test_index][i] == '2':
				AB = AB + 1
				PA = PA + 1
				TB = TB + 1
			if pred[i] == '1' and y[test_index][i] == '3':
				AC = AC + 1
				PA = PA + 1
				TC = TC + 1
			if pred[i] == '2' and y[test_index][i] == '1':
				BA = BA + 1	
				PB = PB + 1
				TA = TA + 1
			if pred[i] == '2' and y[test_index][i] == '2':
				BB = BB + 1
				PB = PB + 1
				TB = TB + 1
			if pred[i] == '2' and y[test_index][i] == '3':
				BC = BC + 1
				PB = PB + 1
				TC = TC + 1
			if pred[i] == '3' and y[test_index][i] == '1':
				CA = CA + 1
				PC = PC + 1
				TA = TA + 1	
			if pred[i] == '3' and y[test_index][i] == '2':
				CB = CB + 1
				PC = PC + 1
				TB = TB + 1
			if pred[i] == '3' and y[test_index][i] == '3':
				CC = CC + 1
				PC = PC + 1
				TC = TC + 1		
			if pred[i] != y[test_index][i]:
				count = count + 1
		record[str(k)] = [count, AA, AB, AC, BA, BB, BC, CA, CB, CC, TA, TB, TC, PA, PB, PC, len(pred)]
	err, Aerr, Berr, Cerr = 0, 0, 0, 0
	for key in record:
		Aerr = Aerr + (record[key][2]+record[key][3])/float(record[key][13])
		Berr = Berr + (record[key][4]+record[key][5])/float(record[key][14])
		#Cerr = Cerr + (record[key][7]+record[key][8])/float(record[key][15])
		err = err + record[key][0]/float(record[key][16])
	err = err/float(K)
	Aerr = err/float(K)
	Berr = err/float(K)
	#Cerr = err/float(K)
	#err = float(count)/K
	#AA, AB, AC, BA, BB, BC, CA, CB, CC = float(AA)/K, float(AB)/K, float(AC)/K, float(BA)/K, float(BB)/K, float(BC)/K, float(CA)/K, float(CB)/K, float(CC)/K
	return (err, Aerr, Berr)
    local_dict = sorted(local_dict)

    ############################# Part 3: Linearity ##########################

    # random forest 1
    rf1 = RandomForestClassifier(random_state=10)
    rf1.fit(train_vectors, y_train)

    # random forest 2
    rf2 = RandomForestClassifier(random_state=15)
    rf2.fit(train_vectors, y_train)

    # random forest 3
    rf3 = RandomForestClassifier(random_state=22)
    rf3.estimators_ = rf1.estimators_ + rf2.estimators_
    rf3.n_classes_ = rf1.n_classes_

    # model 1
    def model_rf1(data):
        n_data = len(data)
        res = np.zeros((n_data, 2))
        tfidf = vectorizer.transform(data)
        p = rf1.predict_proba(tfidf)
        res[:, 0] = p[:, 1]
        res[:, 1] = p[:, 1]
        return res

    # model 2
    def model_rf2(data):
        n_data = len(data)
        res = np.zeros((n_data, 2))