We compare our error with and without regularization
'''


def nonlinear_transform(x):
    f = lambda x: [
        x[0], x[1], x[0]**2, x[1]**2, x[0] * x[1],
        abs(x[0] - x[1]),
        abs(x[0] + x[1])
    ]
    return np.array([f(x_i) for x_i in x])


if __name__ == '__main__':
    x_train, y_train = get_train()
    x_test, y_test = get_test()
    x_train = nonlinear_transform(x_train)
    x_test = nonlinear_transform(x_test)

    lrc = h2.LinearRegressionClassifier(7, x_train, y_train)
    in_sample_err = h2.calc_error_rate(y_train, lrc.classify(x_train))
    out_sample_err = h2.calc_error_rate(y_test, lrc.classify(x_test))
    h2.boldprint("Without Regularization:")
    print "In sample err", in_sample_err
    print "Out of sample err", out_sample_err

    _lambda = .5
    lrc = h2.LinearRegressionClassifier(7,
                                        x_train,
                                        y_train,
                                        reg=True,
示例#2
0
import representation
from sklearn import svm

from scipy.spatial.distance import correlation
from scipy.spatial.distance import cosine
from scipy.spatial.distance import euclidean

#### input paths
trainFilePath = '../Data/train-data.txt'
devFilePath = '../Data/dev-data.txt'
testFilePath = '../Data/task_2_test_set_to_release.txt'

#### loading data
trainClassID, trainTweetTxt = load_data.get_train(trainFilePath)
devClassID, devTweetTxt = load_data.get_dev(devFilePath)
testTweedID, testTweetTxt = load_data.get_test(testFilePath)

trainClassID = trainClassID + devClassID
#### representing as a matrix
mini_df = 1
for k in range (1, 2):
    trainDTMatirix, devDTMatirix, testDTMatirix = representation.get_dtm(trainTweetTxt, devTweetTxt, testTweetTxt,  k)
##    trainDTMatirix, devDTMatirix, testDTMatirix = representation.get_tdidf(trainTweetTxt, devTweetTxt, testTweetTxt,  k)

    trainDTMatirix = trainDTMatirix.todense()
    devDTMatirix = devDTMatirix.todense()
    testDTMatirix = testDTMatirix.todense()

    trainDTMatirix = numpy.concatenate((trainDTMatirix, devDTMatirix))
    
Adam's parameters are adjusted in an inverted triange over the first 45 epochs, and then level off for the rest of the training.
We only adjust the learning rate in this code.
The learning rate modulates between .008 and .08, and then descends down to 1e-4 for the remaining epochs
'''


def get_learning_rate_and_momentum(batch_number, final_number=200000):
    if batch_number < 45000:
        return (.01 - .0008) * np.abs(batch_number - 22500) / 22500
    else:
        return .0008 - .0007 * (batch_number - 45000) / (final_number - 45000)


from load_data import get_batch, get_test, get_validation
for i in range(100000):
    if USE_ONE_CYCLE:
        r = get_learning_rate_and_momentum(i, 100000)
    else:
        r = 1e-4

    x_batch, y_batch = get_batch()
    x_test = get_test(5)
    step.run(feed_dict={x: x_batch, x_test_reg: x_test, y: y_batch, rate: r})

    if i % 100 == 0:
        x_val, y_val = get_validation(50)
        print(i)
        print(accuracy.eval(feed_dict={x: x_batch, y: y_batch}))

saver.save(sess, 'models/fully_conv')