Exemplo n.º 1
0
else:
    weights_0p25 = np.load('training_output/collins_w_0p25.npy')
    weights_0p5 = np.load('training_output/collins_w_0p5.npy')
    weights_0p75 = np.load('training_output/collins_w_0p75.npy')
weights = [weights_0p25, weights_0p5, weights_0p75]

# load the test data
test_labels = dp.labels_as_ints('dataset/testLabels.txt')
test_sentences = dp.import_sentences('dataset/testSentences.txt')

# calculate the scores
score = []

for w in weights:
    if scoretype == 'word':
        score.append(sr.score_by_word(w,test_labels,test_sentences,dummy))
    elif scoretype == 'sentence':
        score.append(sr.score_by_sentence(w,test_labels,test_sentences,dummy))
    elif scoretype == 'mark':
        score.append(sr.score_by_mark(w,test_labels,test_sentences,dummy))
    else:
        print 'Not a valid method!\n'
        exit(0)

# save to file
f = open('scores/'+method+scoretype+'.txt', 'w')
f.write('Method: ' + method + ', Scoretype: ' + scoretype + '\n')

if method != 'mark':
    f.write('25\%: ' + str(score[0]) + '\n')
    f.write('50\%: ' + str(score[1]) + '\n')
Exemplo n.º 2
0
def collins(train_labels, train_sentences, validation_labels, 
            validation_sentences, pct_train=0.5, Nex=None):
    """
    Runs the Collins perceptron training on the input training data.
    
    labels - All training, validation labels.
    sentences - All training, validation sentences.
    pct_train - Percentage of examples from data set to use as training data.
             The rest are used as validation data.
    """
    
    # get J, the total number of feature functions
    J = ffs.calcJ()
    print 'J = ',J
    
    # now run it
    scores = []
    w0 = np.zeros(J)
    print 'Calculating initial score...'
    scores.append(sr.score_by_word(w0,validation_labels,validation_sentences))
    print 'Done!\n'
    # run until converged, according to score on validation set
    nep = 1
    epoch_time = []
    
    print 'Initiating Collins perceptron training.'
    while True:
        print 'Epoch #',nep,'...'
        t0 = time.time()
        # get the new weights & score
        print 'Training...'
        w1 = collins_epoch(train_labels, train_sentences, w0)
        print 'Done.\n'
        epoch_time.append([time.time() - t0])
        
        t0 = time.time()
        print 'Calculating new score...'
        scores.append(sr.general_score(w1,validation_labels,validation_sentences,'word',0))
        print 'Done.\n'
        epoch_time[nep-1].append(time.time() - t0)
        
        # decide if converged
        if scores[nep] < scores[nep-1]:
            break
        else:
            w0 = w1
        nep += 1
        
    print 'Training complete!\n'
    
    """
    # make a prediction on a dummy sentence
    #dummy = ['FIRSTWORD','I','like','cheese','but','I','also','like','bread','LASTWORD']
    dummy = ['FIRSTWORD','Do','you','like','cheese','LASTWORD']
    g_dummy = sr.g(w,dummy)
    U_dummy = sr.U(g_dummy)
    y_best = sr.bestlabel(U_dummy,g_dummy)
    """
    
    # now return final weights, score time series, and epoch timing
    return w0, scores, epoch_time