def train(self, data, labels): num_features = len(data[0]) k = math.floor(len(labels)/float(self.num)) p = math.ceil(np.sqrt(num_features)) for i in range(self.num): indices_to_delete = np.random.permutation(num_features)[p:] tree = DTree(self.depth, self.impurity, self.segmentor) tree.train(np.delete(data[int(i*k):int((i+1)*k)], [], 1), labels[int(i*k):int((i+1)*k)]) if not isinstance(tree.root, LeafNode): pass self.trees.append(tree)
def train(self, data, labels): def sample(data, labels): n = data.shape[0] samples = self.samples if self.samples else (n / 2) i = np.random.choice(n, samples, replace=False) return data[i], labels[i] for index in range(self.nTrees): tree = DTree(self.impurity, self.segmentor, self.depth, self.randomness, name=index) sData, sLabel = sample(data, labels) tree.train(sData, sLabel) self.forest.append(tree)
def create_trees(bags, data, label): N = len(bags) trees = [] for k in range(N): #b = pd.DataFrame(bags[k]) #print b.columns t = DTree.get_tree(bags[k], label) trees.append(t) return trees
print('\t1. Drvo odlucivanja') print('\t2. K najblizih suseda') print('\t3. Gausova raspodela') print('\t4. Neuronske mreze') while True: metoda = int(input('')) if metoda in (1, 2, 3, 4): break else: print('Neispravna opcija') print('Odabrali ste metodu pod rednim brojem ' + str(metoda)) print('--------------------------------------------------------') if metoda == 1: print('Odabrali ste analizu metodom drveta odlučivanja') DTree.DTree(data) elif metoda == 2: print('Odabrali ste analizu metodom k najbližih suseda') KNeighbors.KNeighboors(data) elif metoda == 3: print('Odabrali ste analizu Gausovom metodom') Gaussian.Gaussian(data) elif metoda == 4: print('Odabrali ste analizu metodom neuronskih mreža') MLP.MLP(data) else: print('Neispravna opcija') metoda = input('Zelite li da isprobate drugu metodu? (y/n)') if metoda == 'n': break
X = data['training_data'] Y = data['training_labels'].T.ravel() randomIndex = np.random.choice(TRAIN_SIZE, TRAIN_SIZE, replace=False) # Split Data xTrain = X[randomIndex[:-VALIDATION_SIZE]] yTrain = Y[randomIndex[:-VALIDATION_SIZE]] xValidate = X[randomIndex[-VALIDATION_SIZE:]] yValidate = Y[randomIndex[-VALIDATION_SIZE:]] xTest = data['test_data'] segmentor = Segmentor() print "============= Decision Tree ==========" tree = DTree(Impurity.impurity, segmentor, depth=20) tree.train(xTrain, yTrain) labels = tree.predict(xValidate) counts = np.bincount(tree.predict(xTrain) == yTrain) error = 1.0 - (counts[True] / float(counts[True] + counts[False])) print "Training Error: %f" % (error) counts = np.bincount(labels == yValidate) error = 1.0 - (counts[True] / float(counts[True] + counts[False])) print "Validation Error: %f" % (error) #import pdb; pdb.set_trace() print "========== Random Forest ==========" forest = RForest(Impurity.impurity, segmentor, nTrees=30, randomness=10)