Exemplo n.º 1
0
    def crossvalidate_linearsvm(self, folds=None, accuracy_weight=None):
        self.setup_crossvalidation(folds=folds)

        self.fold_accuracies = []

        for i in range(self.folds):
            train_indices = self.cv.train_dict[i]
            test_indices = self.cv.test_dict[i]

            trainX, trainY, testX, testY = self.assign_groups(train_indices, test_indices)

            print "Normalizing training data..."
            trainX = simple_normalize(trainX)
            print "Normalizing testing data..."
            testX = simple_normalize(testX)

            curr_clf = self.fit_linearsvc(trainX, trainY)
            curr_acc = self.test_accuracy(curr_clf, testX, testY)

            if not accuracy_weight:
                self.fold_accuracies.append(curr_acc)
            elif accuracy_weight == "group_trials":
                for trial in testY:
                    self.fold_accuracies.append(curr_acc)

                current_weighted_accuracy = sum(self.fold_accuracies) / len(self.fold_accuracies)
                print "Current weighted accuracy: ", current_weighted_accuracy

        self.average_accuracy = sum(self.fold_accuracies) / len(self.fold_accuracies)
        print "Average accuracy: ", self.average_accuracy
Exemplo n.º 2
0
    def output_maps(self, nifti_filepath, threshold=0.01, two_tail=True, threshold_type="pvalue"):
        print "Normalizing X matrix..."
        Xnorm = simple_normalize(self.data.X)
        print "Classifying with linear svm..."
        clf = self.fit_linearsvc(Xnorm, self.data.Y)
        print "Thresholding and dumping coefficients to file..."
        self.coefs = clf.coef_[0]

        if threshold_type == "pvalue":
            thresholded_coefs = threshold_by_pvalue(self.coefs, threshold, two_tail=two_tail)
        elif threshold_type == "raw_percentage":
            thresholded_coefs = threshold_by_rawrange(self.coefs, threshold, two_tail=two_tail)

        self.nifti.output_nifti_thrumask(
            thresholded_coefs,
            self.data.trial_mask,
            self.data.mask_shape,
            len(self.data.selected_trs),
            self.data.raw_affine,
            nifti_filepath,
        )

        self.nifti.convert_to_afni(nifti_filepath, nifti_filepath[:-4])
        self.nifti.adwarp_to_template_talairach(
            nifti_filepath[:-4] + "+orig", None, self.data.talairach_template_path, self.data.dxyz, overwrite=True
        )
Exemplo n.º 3
0
 def test_graphnet(self, X, Y, coefs):
     
     X = simple_normalize(X)
     accuracies = []
     
     for i, coefset in enumerate(coefs):
         
         correct = []
         print 'Checking accuracy for test group'
         
         if self.problemkey == 'RobustGraphNet':
             coefset = coefset[:-self.trainX_shape[0]]
         
         for trial, outcome in zip(X, Y):
             predict = trial*coefset
             #print np.sum(predict)
             Ypredsign = np.sign(np.sum(predict))
             if Ypredsign < 0.:
                 Ypredsign = 0.
             else:
                 Ypredsign = 1.
             #print Ypredsign, outcome, (Ypredsign == outcome)
             correct.append(Ypredsign == outcome)
             
         fold_accuracy = np.sum(correct) * 1. / len(correct)
         
         print 'coef number:', i
         print 'fold accuracy: ', fold_accuracy
         accuracies.append(fold_accuracy)
         
         
     return accuracies
Exemplo n.º 4
0
 def pls_train(self, X, Y, verbose=True):
     
     Xn = simple_normalize(X)
     
     pls = PLSRegression()
     
     if verbose:
         print 'fitting canonical pls...'
         
     pls.fit(Xn, Y)
     
     return pls
Exemplo n.º 5
0
 def pls_test(self, X, Y, pls):
     
     Xn = simple_normalize(X)
     
     predicted_Y = [x[0] for x in pls.predict(Xn)]
     
     pred_Y_sign = np.sign(predicted_Y)
     Y_sign = np.sign(Y)
     
     accuracy = (Y_sign == pred_Y_sign).sum()*1. /Y.shape[0]
     
     print 'accuracy in fold: ', accuracy
     return accuracy
Exemplo n.º 6
0
    def test_svm(self, X, Y, clf):

        X = simple_normalize(X)
        correct = []
        print 'Checking accuracy of next test group...'
        
        for trial, outcome in zip(X, Y):
            prediction = clf.predict(trial)
            correct.append((prediction[0] == outcome))
            
        accuracy = float(sum(correct))/float(len(correct))
        print 'Test group accuracy: ', accuracy
        return accuracy
Exemplo n.º 7
0
 def normalize_x(self, Xset):
     Xnorm = simple_normalize(Xset)
     return Xnorm
Exemplo n.º 8
0
 def normalize_xset(self, Xset):
     if self.verbose:
         print 'normalizing X...'
     Xnormed = simple_normalize(Xset)
     return Xnormed
Exemplo n.º 9
0
 def train_graphnet(self, X, Y, trial_mask=None, G=None, l1=None, l2=None, l3=None, delta=None,
                   svmdelta=None, initial=None, adaptive=False, svm=False,
                   scipy_compare=False, tol=1e-5, greymatter_mask=None, initial_l1weights=None,
                   use_adj_time=True):
             
     if not type(l1) in [list, tuple]:
         l1 = [l1]
             
     X = simple_normalize(X)
     
     tic = time.clock()
     
     #problemkey = self.regression_type_selector(*[bool(x) for x in [l1, l2, l3, delta, svmdelta]])
     
     problemkey = self.regression_type_selector(l1, l2, l3, delta, svmdelta)
     
     self.problemkey = problemkey
     self.trainX_shape = X.shape
     
     if problemkey in ('HuberSVMGraphNet', 'RobustGraphNet', 'NaiveGraphNet'):
         if G is None:
             #nx = 60
             #ny = 60
             #A, Afull = construct_adjacency_list(nx, ny, 1, return_full=True)
             #A, Afull = self.gen_adj(X.shape[1])
             #if greymatter_mask is not None:
             #    A, GMA = prepare_adj(trial_mask, numt=1, gm_mask=greymatter_mask)
             #else:
             #    A = prepare_adj(trial_mask, numt=1)
             #    GMA = None
             if use_adj_time:
                 A = prepare_adj(trial_mask, numt=1, gm_mask=greymatter_mask)
             else:
                 A = prepare_adj(trial_mask, numt=0, gm_mask=greymatter_mask)
             
         else:
             A = G.copy()
             
     if initial_l1weights is not None:
         newl1 = l1
     else:
         newl1 = None
     
     if problemkey is 'RobustGraphNet':
         problemtype = graphnet.RobustGraphNet
         print 'Robust GraphNet with penalties (l1, l2, l3, delta): ', l1, l2, l3, delta
         l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs=initial)#, gma=GMA)
         l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, delta=delta, l1weights=initial_l1weights,
                                  newl1=newl1)
     
     elif problemkey is 'HuberSVMGraphNet':
         problemtype = graphnet.GraphSVM
         print 'HuberSVM GraphNet with penalties (l1, l2, l3, delta): ', l1, l2, l3, delta
         Y = 2*np.round(np.random.uniform(0, 1, len(Y)))-1
         l = cwpath.CoordWise((X, Y, A), problemtype)#, gma=GMA)
         l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, delta=delta, l1weights=initial_l1weights,
                                  newl1=newl1)
         
     elif problemkey is 'NaiveGraphNet':
         problemtype = graphnet.NaiveGraphNet
         print 'Testing GraphNet with penalties (l1, l2, l3): ', l1, l2, l3
         l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs=initial)#, gma=GMA)
         l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, l1weights=initial_l1weights,
                                  newl1=newl1)
         
     elif problemkey is 'NaiveENet':
         problemtype = graphnet.NaiveENet
         print 'Testing ENET with penalties (l1, l2): ', l1, l2
         l = cwpath.CoordWise((X, Y), problemtype, initial_coefs=initial)
         l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l1weights=initial_l1weights,
                                  newl1=newl1)
         
     elif problemkey is 'Lasso':
         problemtype = graphnet.Lasso
         print 'Testing LASSO with penalty (l1): ', l1
         l = cwpath.CoordWise((X, Y), problemtype, initial_coefs=initial)
         l.problem.assign_penalty(path_key='l1', l1=l1, l1weights=initial_l1weights, newl1=newl1)
         
     else:
         print 'Incorrect parameters set (no problem key).'
         return False
     
     # Solve the problem:
     print 'Solving the problem...'
     
     coefficients, residuals = l.fit(tol=tol, initial=initial)
     
     self.coefficients = coefficients
     self.residuals = residuals
     
     print '\t---> Fitting problem with coordinate decesnt took: ', time.clock()-tic, 'seconds.'
     
     if adaptive:
         tic = time.clock()
         safety = 1e-5
         l1weights = 1./(self.coefficients[-1]+safety)
         l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs=initial)
         l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, delta=delta, l1weights=l1weights, newl1=l1)
         adaptive_coefficients, adaptive_residuals = l.fit(tol=tol, initial=initial)
         print '\t---> Fitting Adaptive GraphNet problem with coordinate descent took: ', time.clock()-tic, 'seconds.'
         
         self.firstpass_coefficients = self.coefficients
         self.firstpass_residuals = self.residuals
         self.coefficients = adaptive_coefficients
         self.residuals = adaptive_residuals
     
     
     if scipy_compare:
         
         l1 = l1[-1]
         beta = self.coefficients[-1]
     
         print '\t---> Fitting with scipy for comparison...'
         
         tic = time.clock()
         
         if problemkey is 'RobustGraphNet':
             def f(beta):
                 huber_sum = self.huber(Y - np.dot(X, beta), delta).sum()/2
                 beta_l1 = l1*np.dot(np.fabs(beta), l1weights)
                 beta_l2 = l2*np.linalg.norm(beta)**2/2
                 beta_l3 = l3*np.dot(beta, np.dot(Afull, beta))/2
                 return huber_sum + beta_l1 + beta_l2 + beta_l3
             
         elif problemkey is 'HuberSVMGraphNet':
             Xp2 = np.hstack([np.ones(X.shape[0])[:,np.newaxis], X])
             def f(beta):
                 ind = range(1, len(beta))
                 huber_err_sum = self.huber_svm_error(beta, Y, Xp2, delta).sum()
                 beta_l1 = np.fabs(beta[ind]).sum()*l1
                 beta_l2 = l2*(np.linalg.norm(beta[ind])**2/2)
                 beta_l3 = l3*(np.dot(beta[ind], np.dot(Afull, beta[ind])))/2
                 return huber_error_sum + beta_l1 + beta_l2 + beta_l3
             
         elif problemkey is 'NaiveGraphNet':
             def f(beta):
                 beta_XY = np.linalg.norm(Y - np.dot(X, beta))**2/2
                 beta_l1 = l1*np.fabs(beta).sum()
                 beta_l2 = l2*np.linalg.norm(beta)**2/2
                 beta_l3 = l3*np.dot(beta, np.dot(Afull, beta))/2
                 return beta_XY + beta_l1 + beta_l2 + beta_l3
             
         elif problemkey is 'NaiveENet':
             def f(beta):
                 beta_XY = np.linalg.norm(Y - np.dot(X, beta))**2/2
                 beta_l1 = l1*np.fabs(beta).sum()
                 beta_l2 = np.linalg.norm(beta)**2/2
                 
         elif problemkey is 'Lasso':
             def f(beta):
                 beta_XY = np.linalg.norm(Y - np.dot(X, beta))**2/2
                 beta_l1 = l1*np.fabs(beta).sum()
                 
         if problemkey is 'HuberSVMGraphNet':
             v = scipy.optimize.fmin_powell(f, np.zeros(Xp2.shape[1]), ftol=1.0e-14, xtol=1.0e-14, maxfun=100000)
         else:
             v = scipy.optimize.fmin_powell(f, np.zeros(X.shape[1]), ftol=1.0e-10, xtol=1.0e-10, maxfun=100000)
             
         v = np.asarray(v)
         
         print '\t---> Fitting GraphNet with scipy took: ', time.clock()-tic, 'seconds.'
         
         assert_true(np.fabs(f(v) - f(beta)) / np.fabs(f(v) + f(beta)) < tol)
         if np.linalg.norm(v) > 1e-8:
             assert_true(np.linalg.norm(v - beta) / np.linalg.norm(v) < tol)
         else:
             assert_true(np.linalg.norm(beta) < 1e-8)
             
         print '\t---> Coordinate-wise and Scipy optimization agree.'
         
     return self.coefficients
Exemplo n.º 10
0
 def fit_linearsvc(self, X, Y, class_weight='auto'):
     print 'fitting linearsvm'
     X = simple_normalize(X)
     clf = svm.LinearSVC(class_weight=class_weight)
     clf.fit(X, Y)
     return clf
Exemplo n.º 11
0
 def fit_svc(self, X, Y, cache_size=5000, class_weight='auto'):
     X = simple_normalize(X)
     clf = svm.SVC(cache_size=cache_size, class_weight=class_weight)
     clf.fit(X, Y)
     return clf