示例#1
0
文件: pred.py 项目: willis-hu/spyn
def spnClassificationSPNFit(X, Y, alpha=0.001, min_slices=80):
    classes = numpy.unique(Y)
    spns = []

    trainll = numpy.zeros((X.shape[0], classes.shape[0]))
    ws = []
    for j in range(classes.shape[0]):
        idx = Y == classes[j]
        ws.append(float(numpy.sum(idx)) / X.shape[0])

        data_train_class = X[idx, :]
        spn = LearnSPN(cache=memory,
                       alpha=alpha,
                       min_instances_slice=min_slices,
                       cluster_prep_method=None,
                       families="gaussian").fit_structure(data_train_class)
        spns.append(spn)

        trainll[idx, j] = spn.eval(data_train_class, individual=True)

    x = Variable(len(classes))

    constraints = [sum_entries(x) == 1, x > 0]

    A = numpy.exp(trainll)

    objective = Maximize(sum_entries(log(A * x)))
    prob = Problem(objective, constraints)
    prob.solve()
    # print("Optimal value", prob.solve())

    #ws = sum(x.value.tolist(), [])
    print(ws)

    return {'classes': classes, 'spns': spns, 'weights': ws}
示例#2
0
    def fit(self, X, y):
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)
        print(y.shape, numpy.unique(y))
        print(self.classes_)
        
        #0/0

        self.X_ = X
        self.y_ = y
        # Return the classifier
        
        
        # classes = numpy.unique(Y)
        self.spns_ = []
        
        self.ws_ = []
        trainll = numpy.zeros((X.shape[0],self.classes_.shape[0]))
        for j in range(self.classes_.shape[0]):
            idx = y == self.classes_[j]
            #self.ws_.append(float(numpy.sum(idx)) / X.shape[0])
            
            data_train_class = X[idx, :]
            spn = LearnSPN(alpha=self.alpha, min_instances_slice=self.min_instances_slice, cluster_prep_method="sqrt", families=self.families, cache=memory).fit_structure(data_train_class)
            self.spns_.append(spn)
            trainll[idx, j] = spn.eval(data_train_class, individual=True)
        
        
        #self.ws_ = self.ws_/numpy.sum(self.ws_)
        
        
        x = Variable(self.classes_.shape[0])
    
        constraints = [sum_entries(x) == 1, x > 0]
        
        A = numpy.exp(trainll)
            
        objective = Maximize(sum_entries(log(A * x)))
        prob = Problem(objective, constraints)
        prob.solve()
        
        
        self.ws_ = sum(x.value.tolist(), [])
        #print("Optimal w",self.ws_)
        
        return self
示例#3
0
def evalspnComplete(labels, data, dsname, writer, alpha, min_instances_slice=50):
    
    cvfolds = StratifiedKFold(labels, n_folds=10, random_state=123)
    classes = list(set(labels))
    
    evalresults = OrderedDict()
    
    for train_index, test_index in cvfolds:
        train_data = data[train_index, ]
        train_labels = labels[train_index]
        
        test_data = data[test_index, ]
        test_labels = labels[test_index]
        
        # clfsvc = GridSearchCV(estimator=svm.SVC(kernel='linear', probability=True), param_grid=dict(C=numpy.logspace(-10, 0, 10)), n_jobs=50, cv=5)
        clfsvc = GridSearchCV(estimator=svm.SVC(kernel='linear', probability=True), param_grid={'C': [10 ** 3, 10 ** 2, 10 ** 1, 10 ** 0, 10 ** -1, 10 ** -2, 10 ** -3]}, n_jobs=50, cv=5)
        start = time.time()
        evalModel(clfsvc, test_data, test_labels, train_data, train_labels, "SVM raw", evalresults)
        evalresults.setdefault("SVM time in secs \t\t", []).append((time.time() - start))
        
        clspn = SPNClassifier(alpha=alpha, min_instances_slice=min_instances_slice)
        start = time.time()
        evalModel(clspn, test_data, test_labels, train_data, train_labels, "SPN NB raw", evalresults)
        evalresults.setdefault("SPN time in secs \t\t", []).append((time.time() - start))
        
        #print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
        #clflr = LogisticRegression(solver='lbfgs')
        #start = time.time()
        #evalModel(clflr, test_data, test_labels, train_data, train_labels, "LR NB raw", evalresults)
        #evalresults.setdefault("SPN time in secs \t\t", []).append((time.time() - start))
        continue
        
        evals_train = numpy.zeros((train_data.shape[0], 0))
        evals_test = numpy.zeros((test_data.shape[0], 0))

        grads_train = numpy.zeros((train_data.shape[0], 0))
        grads_test = numpy.zeros((test_data.shape[0], 0))
        
        activations_train = numpy.zeros((train_data.shape[0], 0))
        activations_test = numpy.zeros((test_data.shape[0], 0))
        
        
        #model = ClassificationNBFit(train_data, train_labels)
        
        timespn = 0
        for c in classes:
            #break
            idx = train_labels == c
            print(idx)
            data_train_class = train_data[idx, :]
            
            start = time.time()
            spn = LearnSPN(alpha=alpha, min_instances_slice=min_instances_slice, cluster_prep_method="sqrt", cache=memory).fit_structure(data_train_class)
            print(alpha, min_instances_slice)
            # spn = spnlearn(data_train_class, alpha, min_slices=min_slices, cluster_prep_method="sqrt", family="poisson")
            timespn += (time.time() - start)
            
            # continue
            evalperclass = numpy.asarray(spn.eval(train_data, individual=True)).reshape((train_data.shape[0], 1))
            print(evalperclass.shape)
            print(evalperclass)
            gradsperclass = spn.gradients(train_data)
            activationperclass = spn.activations(train_data)
            print(evals_train.shape)
            evals_train = numpy.append(evals_train, evalperclass, axis=1)
            print(evals_train)
            grads_train = numpy.hstack((grads_train, gradsperclass))
            activations_train = numpy.hstack((activations_train, activationperclass))
            
            evals_test = numpy.hstack((evals_test, numpy.asarray(spn.eval(test_data, individual=True)).reshape((test_data.shape[0], 1))))
            grads_test = numpy.hstack((grads_test, spn.gradients(test_data)))
            activations_test = numpy.hstack((activations_test, spn.activations(test_data)))
            print("loop done")
            
        evalresults.setdefault("SPN time in secs \t\t", []).append(timespn)
         
        
        
        evalModel(clflr, evals_test, test_labels, evals_train, train_labels, "SPN per class ll -> LR", evalresults)
    
        #evalModel(clfsvc, grads_test, test_labels, grads_train, train_labels, "SPN per class gradients -> SVM", evalresults)
        
        #evalModel(clfsvc, activations_test, test_labels, activations_train, train_labels, "SPN per class activations -> SVM", evalresults)
    
    
    writer.write(json.dumps(evalresults))
    writer.write("\n")
    
    
    for key, value in evalresults.items():
        writer.write("%s: %0.6f (+/- %0.6f) \n" % (key, mean(value), stdev(value) * 2))
        
    writer.write("\n")