def train(data, attributes, labels, k): trees = [] for i in range(k): new_data = data sample = data.raw_data[np.random.choice(data.raw_data.shape[0], 100, replace=True)] new_data.raw_data = sample numbers = np.random.randint(1, data.raw_data.shape[1] - 1, size=50) features = copy.deepcopy(attributes) for attr in attributes: if int(attr) not in numbers: del features[attr] new_data.attributes = features tree = dt.id3(new_data, features, labels) pruned = dt.pruning_tree(tree, 1) trees.append(pruned) err, depth = dt.report_error(new_data, pruned) transformed_data = np.zeros((data.raw_data.shape[0], k + 1)) labels = [] for row, test in enumerate(data.raw_data): transformed_data[row, 0] = test[0] for col, tree in enumerate(trees, 1): label = dt.predict(data, test, tree) transformed_data[row, col] = int(label) labels.append(int(label)) labels.append(1) lbls = transformed_data[:, 0] w, a, lab = svm.train(transformed_data, lbls, k) return a, lab
def train(data, attributes, labels, k): maj =[] for i in range(k): new_data = data sample = data.raw_data[np.random.choice(data.raw_data.shape[0], 100, replace=True)] new_data.raw_data = sample numbers = np.random.randint(1, data.raw_data.shape[1]-1, size=50) features = copy.deepcopy(attributes) for attr in attributes: if int(attr) not in numbers: del features[attr] new_data.attributes = features tree = dt.id3(new_data, features, labels) pruned = dt.pruning_tree(tree, 1) err, depth = dt.report_error(new_data, pruned) maj.append(100-err) return sum(maj)/k
def train(data, attributes, labels, k): tree = dt.id3(data, attributes, labels, maxDepth=2) return tree