def run(self, X_train, X_test, y_train, classes): wsd = wp.Wisard(self.addr_size) for i, y in enumerate(y_train): start_time = time.time() y.sort() y_ps = "-".join(y) ds = wp.DataSet() ds.add(wp.BinInput(X_train[i]), y_ps) wsd.train(ds) self.training_time.append(time.time() - start_time) y_pred_ps = [] for x in X_test: start_time = time.time() ds_test = wp.DataSet() ds_test.add(wp.BinInput(x)) y_pred_ps.append(wsd.classify(ds_test)[0]) self.classification_time.append(time.time() - start_time) y_pred = [] for y in y_pred_ps: y_pred.append(y.split("-")) return y_pred
def run(self, X_train, X_test, y_train, classes): clus = wp.ClusWisard(self.addr_size, self.minScore, self.threshold, self.discriminatorLimit) for i, y in enumerate(y_train): start_time = time.time() y.sort() y_ps = "-".join(y) ds = wp.DataSet() ds.add(wp.BinInput(X_train[i]), y_ps) clus.train(ds) self.training_time.append(time.time() - start_time) y_pred_ps = [] for x in X_test: start_time = time.time() ds_test = wp.DataSet() ds_test.add(x) y_pred_ps.append(clus.classify(ds_test)[0]) self.classification_time.append(time.time() - start_time) y_pred = [] for y in y_pred_ps: y_pred.append(y.split("-")) return y_pred
def run(self, X_train, X_test, y_train, classes): wsds = {} for label in classes: wsds[label] = wp.ClusWisard(self.addr_size, self.minScore, self.threshold, self.discriminatorLimit) start_time = time.time() for i in range(len(X_train)): ds = wp.DataSet() if label in y_train[i]: ds.add(wp.BinInput(X_train[i]), "true") wsds[label].train(ds) else: ds.add(wp.BinInput(X_train[i]), "false") wsds[label].train(ds) self.training_time.append(time.time() - start_time) y_pred = [[]] * len(X_test) ds_test = wp.DataSet() for i in range(len(X_test)): ds_test.add(X_test[i]) for label in classes: start_time = time.time() outputs = wsds[label].classify(ds_test) self.classification_time.append( (time.time() - start_time) / len(X_test)) for i in range(len(outputs)): if outputs[i] == "true": y_pred[i].append(label) return y_pred
def binarize( train_images, train_labels, test_images, test_labels, dataset_name, binarization_name, num_classes=0, entry_size=0, shape=None, ): if shape == None and np.array(train_images[0]).ndim == 2: shape = np.shape(train_images[0]) method = get_preprocessing(binarization_name) if binarization_name == "sv": method.shape = shape binary_train_images = method.transform(train_images) binary_test_images = method.transform(test_images) if type(train_labels[0]) == str: le = LabelEncoder() le.fit(train_labels) binary_train_labels = le.transform(train_labels) binary_test_labels = le.transform(test_labels) else: binary_train_labels = train_labels binary_test_labels = test_labels return Dataset( train=wp.DataSet( binary_train_images, np.array(binary_train_labels).astype(str) ), test=wp.DataSet( binary_test_images, np.array(binary_test_labels).astype(str) ), dataset_name=dataset_name, binarization_name=binarization_name, num_classes=num_classes, entry_size=entry_size, shape=shape, )
def generate_dataset(self): local_data_positions = random.sample( self.data_positions, int(len(self.train_dataset) / self.learners)) dataset = wp.DataSet() for i in range(0, len(local_data_positions)): self.data_positions.remove(local_data_positions[i]) dataset.add(self.train_dataset.get(local_data_positions[i]), self.train_dataset.getLabel(local_data_positions[i])) return dataset
def load(dataset_name, binarization_name, folder): if folder[-1] != "/": folder = "{}/".format(folder) folder = "{}{}/".format(folder, dataset_name) filename = "{}{}".format(folder, binarization_name) data = {} with open("{}_info.ds".format(filename), "rb") as input_file: data = pk.load(input_file) return Dataset( train=wp.DataSet("{}{}".format(folder, data["train_filename"])), test=wp.DataSet("{}{}".format(folder, data["test_filename"])), dataset_name=dataset_name, binarization_name=binarization_name, num_classes=data["num_classes"], entry_size=data["entry_size"], shape=data["shape"], )
def generate_dataset(self): boot = [] for i in range(len(self.train_dataset)): boot.append(i) with NumpyRNGContext(1): bootresult = bootstrap(np.array(boot), self.learners, int(len(self.train_dataset)*self.partitions)) dataset = [] for samples in bootresult: d = wp.DataSet() for sample in samples: d.add(self.train_dataset.get(int(sample)), self.train_dataset.getLabel(int(sample))) dataset.append(d) return dataset
def classify(self, test_dataset): results = [] for i in range(0, len(test_dataset)): result = {} for j in range(0, len(self.nets)): test = wp.DataSet() bi = wp.BinInput(test_dataset.get(i)) test.add(bi, test_dataset.getLabel(i)) r = self.nets[j].classify(test) if (r[0] in result): result[r[0]] += self.ensemble_weights[j] else: result[r[0]] = 0 results.append(max(result, key=result.get)) return results
def classify(self, test_dataset): results = [] for i in range(0, len(test_dataset)): scores = [] test = wp.DataSet() bi = wp.BinInput(test_dataset.get(i)) test.add(bi, test_dataset.getLabel(i)) for j in range(0, len(self.nets)): scores.append(self.nets[j].classify(test)[0]) out = self.nets[0].getAllScores(test) labels = self.get_labels(out) result = 0 if (self.voting == "borda0"): result = self.borda_count_0(scores, labels) else: if (self.voting == "borda1"): result = self.borda_count_1(scores, labels) else: result = self.dowdall(scores, labels) results.append(result) return results
def classify(self, test_dataset): results = [] for i in range(0, len(test_dataset)): votes = [] scores = [] test = wp.DataSet() bi = wp.BinInput(test_dataset.get(i)) test.add(bi, test_dataset.getLabel(i)) for j in range(0, len(self.nets)): votes.append(self.get_scores(self.nets[j].getAllScores(test))) labels = votes[0] for i in range(len(labels)): score = [] for j in range(len(votes)): if(labels[i] in votes[j]): score.append(votes[j].index(labels[i])) else: score.append(votes[j].index(max(votes[j]))) scores.append(score) result = 0 if(self.voting == "plurality1"): result = self.plurality1(scores, labels) else: if(self.voting == "plurality2"): result = self.plurality2(scores, labels) else: if(self.voting == "plurality3"): result = self.plurality3(scores, labels) else: result = self.plurality4(scores, labels, 0.3) results.append(result) return results
therm = genThermometer(dbmatrix) mins = np.min(densemtx, axis=0).squeeze().tolist()[0] maxs = np.max(densemtx, axis=0).squeeze().tolist()[0] dtherm = wsd.DynamicThermometer(therm, mins, maxs) binX = [ dtherm.transform(densemtx[i].tolist()[0]) for i in range(dbmatrix.shape[0]) ] for train_index, test_index in skf.split(dbmatrix, labels): for n in range(1, 11): win = n * 3 print("TRAIN - " + str(len(train_index))) print("TEST - " + str(len(test_index))) ds_train = wsd.DataSet([binX[ix] for ix in train_index], [labels[ix] for ix in train_index]) ds_test = wsd.DataSet([binX[ix] for ix in test_index], [labels[ix] for ix in test_index]) wisard = wsd.ClusWisard(win, 0.7, 20, 10) wisard.train(ds_train) outTrain = np.array(wisard.classify(ds_train)) outTest = np.array(wisard.classify(ds_test)) print('Train accuracy:', accuracy_score(outTrain, [labels[ix] for ix in train_index])) print('Test accuracy:', accuracy_score(outTest, [labels[ix] for ix in test_index])) wisard = wsd.ClusWisard(win, 0.7, 20, 10) wisard.train(ds_test) outTrain = np.array(wisard.classify(ds_train))
densemtx = dbmatrix.todense() therm = genThermometer(dbmatrix) mins = np.min(densemtx, axis=0).squeeze().tolist()[0] maxs = np.max(densemtx, axis=0).squeeze().tolist()[0] dtherm = wsd.DynamicThermometer(therm, mins, maxs) binX = [dtherm.transform(densemtx[i].tolist()[0]) for i in range(dbmatrix.shape[0])] train = binX[:10240, :] validation = binX[10240:11524, :] test = binX[11524:, :] ds_train = wsd.DataSet(train, [labels[ix] for ix in train_index]) ds_valid = wsd.DataSet(validation, [labels[ix] for ix in valid_index]) ds_test = wsd.DataSet(test, [labels[ix] for ix in rtest_index]) for n in range(1, 11): win = n * 3 wisard = wsd.ClusWisard(win, 0.7, 20, 10) wisard.train(ds_train) outValid = np.array(wisard.classify(ds_valid)) outTest = np.array(wisard.classify(ds_test)) print('Train accuracy:', accuracy_score(outValid, [labels[ix] for ix in valid_index])) print('Test accuracy:', accuracy_score(outTest, [labels[ix] for ix in test_index])) wisard = wsd.ClusWisard(win, 0.7, 20, 10) wisard.train(ds_test) outValid = np.array(wisard.classify(ds_valid))
def predict(self, X): return np.array(self.ptr.classify(wp.DataSet(X)), dtype=int)
def fit(self, X, y): self.ptr.train(wp.DataSet(X, np.array(y, dtype=str))) return self