def spnClassificationSPNFit(X, Y, alpha=0.001, min_slices=80): classes = numpy.unique(Y) spns = [] trainll = numpy.zeros((X.shape[0], classes.shape[0])) ws = [] for j in range(classes.shape[0]): idx = Y == classes[j] ws.append(float(numpy.sum(idx)) / X.shape[0]) data_train_class = X[idx, :] spn = LearnSPN(cache=memory, alpha=alpha, min_instances_slice=min_slices, cluster_prep_method=None, families="gaussian").fit_structure(data_train_class) spns.append(spn) trainll[idx, j] = spn.eval(data_train_class, individual=True) x = Variable(len(classes)) constraints = [sum_entries(x) == 1, x > 0] A = numpy.exp(trainll) objective = Maximize(sum_entries(log(A * x))) prob = Problem(objective, constraints) prob.solve() # print("Optimal value", prob.solve()) #ws = sum(x.value.tolist(), []) print(ws) return {'classes': classes, 'spns': spns, 'weights': ws}
def fit(self, X, y): # Check that X and y have correct shape X, y = check_X_y(X, y) # Store the classes seen during fit self.classes_ = unique_labels(y) print(y.shape, numpy.unique(y)) print(self.classes_) #0/0 self.X_ = X self.y_ = y # Return the classifier # classes = numpy.unique(Y) self.spns_ = [] self.ws_ = [] trainll = numpy.zeros((X.shape[0],self.classes_.shape[0])) for j in range(self.classes_.shape[0]): idx = y == self.classes_[j] #self.ws_.append(float(numpy.sum(idx)) / X.shape[0]) data_train_class = X[idx, :] spn = LearnSPN(alpha=self.alpha, min_instances_slice=self.min_instances_slice, cluster_prep_method="sqrt", families=self.families, cache=memory).fit_structure(data_train_class) self.spns_.append(spn) trainll[idx, j] = spn.eval(data_train_class, individual=True) #self.ws_ = self.ws_/numpy.sum(self.ws_) x = Variable(self.classes_.shape[0]) constraints = [sum_entries(x) == 1, x > 0] A = numpy.exp(trainll) objective = Maximize(sum_entries(log(A * x))) prob = Problem(objective, constraints) prob.solve() self.ws_ = sum(x.value.tolist(), []) #print("Optimal w",self.ws_) return self
def evalspnComplete(labels, data, dsname, writer, alpha, min_instances_slice=50): cvfolds = StratifiedKFold(labels, n_folds=10, random_state=123) classes = list(set(labels)) evalresults = OrderedDict() for train_index, test_index in cvfolds: train_data = data[train_index, ] train_labels = labels[train_index] test_data = data[test_index, ] test_labels = labels[test_index] # clfsvc = GridSearchCV(estimator=svm.SVC(kernel='linear', probability=True), param_grid=dict(C=numpy.logspace(-10, 0, 10)), n_jobs=50, cv=5) clfsvc = GridSearchCV(estimator=svm.SVC(kernel='linear', probability=True), param_grid={'C': [10 ** 3, 10 ** 2, 10 ** 1, 10 ** 0, 10 ** -1, 10 ** -2, 10 ** -3]}, n_jobs=50, cv=5) start = time.time() evalModel(clfsvc, test_data, test_labels, train_data, train_labels, "SVM raw", evalresults) evalresults.setdefault("SVM time in secs \t\t", []).append((time.time() - start)) clspn = SPNClassifier(alpha=alpha, min_instances_slice=min_instances_slice) start = time.time() evalModel(clspn, test_data, test_labels, train_data, train_labels, "SPN NB raw", evalresults) evalresults.setdefault("SPN time in secs \t\t", []).append((time.time() - start)) #print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") #clflr = LogisticRegression(solver='lbfgs') #start = time.time() #evalModel(clflr, test_data, test_labels, train_data, train_labels, "LR NB raw", evalresults) #evalresults.setdefault("SPN time in secs \t\t", []).append((time.time() - start)) continue evals_train = numpy.zeros((train_data.shape[0], 0)) evals_test = numpy.zeros((test_data.shape[0], 0)) grads_train = numpy.zeros((train_data.shape[0], 0)) grads_test = numpy.zeros((test_data.shape[0], 0)) activations_train = numpy.zeros((train_data.shape[0], 0)) activations_test = numpy.zeros((test_data.shape[0], 0)) #model = ClassificationNBFit(train_data, train_labels) timespn = 0 for c in classes: #break idx = train_labels == c print(idx) data_train_class = train_data[idx, :] start = time.time() spn = LearnSPN(alpha=alpha, min_instances_slice=min_instances_slice, cluster_prep_method="sqrt", cache=memory).fit_structure(data_train_class) print(alpha, min_instances_slice) # spn = spnlearn(data_train_class, alpha, min_slices=min_slices, cluster_prep_method="sqrt", family="poisson") timespn += (time.time() - start) # continue evalperclass = numpy.asarray(spn.eval(train_data, individual=True)).reshape((train_data.shape[0], 1)) print(evalperclass.shape) print(evalperclass) gradsperclass = spn.gradients(train_data) activationperclass = spn.activations(train_data) print(evals_train.shape) evals_train = numpy.append(evals_train, evalperclass, axis=1) print(evals_train) grads_train = numpy.hstack((grads_train, gradsperclass)) activations_train = numpy.hstack((activations_train, activationperclass)) evals_test = numpy.hstack((evals_test, numpy.asarray(spn.eval(test_data, individual=True)).reshape((test_data.shape[0], 1)))) grads_test = numpy.hstack((grads_test, spn.gradients(test_data))) activations_test = numpy.hstack((activations_test, spn.activations(test_data))) print("loop done") evalresults.setdefault("SPN time in secs \t\t", []).append(timespn) evalModel(clflr, evals_test, test_labels, evals_train, train_labels, "SPN per class ll -> LR", evalresults) #evalModel(clfsvc, grads_test, test_labels, grads_train, train_labels, "SPN per class gradients -> SVM", evalresults) #evalModel(clfsvc, activations_test, test_labels, activations_train, train_labels, "SPN per class activations -> SVM", evalresults) writer.write(json.dumps(evalresults)) writer.write("\n") for key, value in evalresults.items(): writer.write("%s: %0.6f (+/- %0.6f) \n" % (key, mean(value), stdev(value) * 2)) writer.write("\n")