Пример #1
0
def grad_check(x,beta,delta):
    """
    Takes a formatted classifier-trait pair example (x), and the current
    parameter vector (beta) along with the learning rate and a perturbation 
    scale (delta).  It first calculates the gradient for this example. A
    perturbation vector is constructed composed of normally distributed values
    with standard deviation delta. It then applies the perturbation to beta and calculates
    the difference between LCL(beta,x) and LCL(beta + delta,x).  If the gradient
    is accurate, then LCL(beta + delta,x) - LCL(beta,x) should roughly equal
    delta * grad.
    """
    
    grad = np.zeros(np.size(beta))
    y = x[0]     #classifier value
    traits = x[1:]     #trait vector
    p = sgd.logistic(beta,traits)
    x_send = x.reshape(1,np.size(x)) #formatting for LCL function
    grad = (y - p)*traits     #calculate gradient
    LCL_prior = sgd.LCL(beta,x_send)     #initial LCL
    delta_vec = delta * randn(np.size(beta)) #create perturbation vector
    beta_prime = beta + delta_vec #perturbed beta vector
    LCL_prime = sgd.LCL(beta_prime,x_send) #LCL(beta + delta,x)
    diff = np.dot(delta_vec,grad)
    del_LCL = LCL_prime - LCL_prior #calculated del:LCL(x,beta)
    return diff, del_LCL, diff/del_LCL
    
    
Пример #2
0
 def test_logistic(self):
     self.assertAlmostEqual(logistic(1), 0.7310585786300049)
     self.assertAlmostEqual(logistic(2), 0.8807970779778823)
     self.assertAlmostEqual(logistic(-1), 0.2689414213699951)
Пример #3
0
 def test_logistic(self):
     self.assertAlmostEqual(logistic(1),  0.7310585786300049)
     self.assertAlmostEqual(logistic(2),  0.8807970779778823)
     self.assertAlmostEqual(logistic(-1),  0.2689414213699951)
Пример #4
0
 def test_logistic(self):
     input = logistic(np.matrix([1,2,-1]))
     output = np.matrix([0.7310585786300049, 0.8807970779778823, 0.2689414213699951])
     for i in range(len(input)):
         self.assertAlmostEqual(input[0, i], output[0, i])
Пример #5
0
def run_sgd():
    # load the data and preprocess it
    train_data = np.loadtxt('../dataset/1571/train_npcomp.dat', dtype='float')
    test_data = np.loadtxt('../dataset/1571/test_npcomp.dat', dtype='float')
    
    # save a copy of the original data set, before shuffling and dividing
    train_data_orig = np.copy(train_data)
    
    # save first half as training data, second half as validation data
    D = train_data.shape[1] - 1
    N_trainex = train_data_orig.shape[0]
    # shuffle the training set before splitting it
    randstate1 = np.random.get_state()
    np.random.shuffle(train_data_orig)
    
    if N_trainex%2 == 0:
        N_trainex = int(N_trainex/2)
    else:
        N_trainex = int(N_trainex/2)+1
    valid_data = train_data_orig[N_trainex:]
    train_data = train_data_orig[:N_trainex]
    
    N_trainex = train_data.shape[0]
    N_validex = valid_data.shape[0]
    N_testex = test_data.shape[0]
    
    # preprocess the data
    train_data, tdmean, tdstd = dr.preprocess(train_data, full_output=True)
    valid_data = dr.preprocess(valid_data, rescale=False)
    test_data = dr.preprocess(test_data, rescale=False)
    
    # rescale validation and test sets same as training data
    valid_data[:,1:-1] -= np.resize(tdmean, (N_validex,D))
    valid_data[:,1:-1] /= tdstd
    test_data[:,1:-1] -= np.resize(tdmean, (N_testex,D))
    test_data[:,1:-1] /= tdstd
    
    # initiate the grid search over mu
    trained_pars, mu_max, lr_max, LCL_max = sgd.mu_gridsearch(train_data, valid_data)
    
    # calculate error rate on validation data set
    errors = 0
    for ind_ex in range(N_validex):
        if sgd.logistic(trained_pars, valid_data[ind_ex,1:]) >= 0.5:
            errors += 1 - valid_data[ind_ex,0]
        else:
            errors += valid_data[ind_ex,0]
    error_rate_valid = errors/N_validex
    print 'Error rate on validation data = ' + str(error_rate_valid*100) + '%\n'
    
    # calculate LCL on test data
    LCL_test = sgd.LCL(trained_pars, test_data)
    
    # calculate error rate on test data set
    errors = 0
    for ind_ex in range(N_testex):
        if sgd.logistic(trained_pars, test_data[ind_ex,1:]) >= 0.5:
            errors += 1 - test_data[ind_ex,0]
        else:
            errors += test_data[ind_ex,0]
    error_rate_test = errors/N_testex
    print 'Error rate on test data = ' + str(error_rate_test*100) + '%\n'
    
    return mu_max, lr_max, LCL_max, error_rate_valid, LCL_test, error_rate_test
Пример #6
0
 def test_logistic(self):
     """Tests the logistic function.
     """
     self.assertAlmostEqual(logistic(1), 0.7310585786300049)
     self.assertAlmostEqual(logistic(2), 0.8807970779778823)
     self.assertAlmostEqual(logistic(-1), 0.2689414213699951)
Пример #7
0
 def test_logistic(self):
     input = logistic(np.matrix([1,2,-1]))
     output = np.matrix([0.7310585786300049, 0.8807970779778823, 0.2689414213699951])
     for i in range(len(input)):
         self.assertAlmostEqual(input[0, i], output[0, i])