def execute_demo(language):
    data = Dataset(language)

    print("{}: {} training - {} dev".format(language, len(data.trainset),
                                            len(data.devset)))

    # for sent in data.trainset:
    #    print(sent['sentence'], sent['target_word'], sent['gold_label'])

    baseline = Baseline(language)

    baseline.train(data.trainset, data.bigram_dic)

    predictions = baseline.test(data.devset, data.bigram_dic)

    gold_labels = [sent['gold_label'] for sent in data.devset]

    report_score(gold_labels, predictions)

    print("{} test".format(language))

    predictions = baseline.test(data.testset, data.bigram_dic)

    gold_labels = [sent['gold_label'] for sent in data.testset]

    report_score(gold_labels, predictions)
def execute_demo(language):
    data = Dataset(language)

    if test == True:
        print("{}: {} training - {} dev".format(language, len(data.trainset),
                                                len(data.testset)))
    else:
        print("{}: {} training - {} dev".format(language, len(data.trainset),
                                                len(data.devset)))

    if Base == True:
        baseline = Baseline(language)
    else:
        baseline = MyLine(language)

    baseline.train(data.trainset)

    if test == True:
        predictions = baseline.test(data.testset)
        gold_labels = [sent['gold_label'] for sent in data.testset]

    else:
        predictions = baseline.test(data.devset)
        gold_labels = [sent['gold_label'] for sent in data.devset]

    report_score(gold_labels, predictions)
예제 #3
0
파일: example.py 프로젝트: yhSmiling/class
def execute_demo(language, flag):
    data = Dataset(language)

    if flag == 0:
        print("{}: {} training - {} dev".format(language, len(data.trainset),
                                                len(data.devset))
              )  #data.trainset 是dataset函数内返回的dataset的形式  data.devset用来测试用的
    if flag == 1:
        print("{}: {} training - {} test".format(language, len(data.trainset),
                                                 len(data.testset)))
    # for sent in data.trainset:
    #    # print(sent['sentence'], sent['target_word'], sent['gold_label'])
    #    print(sent)

    baseline = Baseline(language)

    baseline.train(data.trainset)

    predictions_devset = baseline.test(data.devset)
    predictions_testset = baseline.test(data.testset)

    gold_labels_devset = [sent['gold_label']
                          for sent in data.devset]  ##输出的是二元值  0 1 0 1形式的
    gold_labels_testset = [sent['gold_label'] for sent in data.testset]

    if flag == 0:
        print("Test by using dev set:")
        report_score(gold_labels_devset, predictions_devset)
    if flag == 1:
        print("Test by using test set:")
        report_score(gold_labels_testset, predictions_testset)
def execute_demo(language):
    data = Dataset(language)

    print("{}: {} training - {} Test\n".format(language.upper(),
                                               len(data.trainset),
                                               len(data.devset)))

    #for sent in data.trainset:
    #    print(sent['sentence'], sent['target_word'], sent['gold_label'])

    baseline = Baseline(language, type='classify')

    baseline.train(data.trainset)

    predictions = baseline.test(data.devset)

    gold_labels = [sent['gold_label'] for sent in data.devset]

    report_score(gold_labels, predictions, detailed=True)

    ########################### Regression ###################33
    baseline2 = Baseline(language, type='regression')

    baseline2.train(data.trainset)

    predictions = baseline2.test(data.devset)

    gold_labels2 = [float(sent['gold_prob']) for sent in data.devset]

    print("Probabilistic classification task:\nMSE:",
          mean_squared_error(gold_labels2, predictions), "\n\n")
def execute_demo(language):
    data = Dataset(language)

    baseline = Baseline(language)

    baseline.train(data.trainset, data.unigram, data.suffix, data.char_trigram, data.pos, data.dep, data.shape, data.frequency)

    predictions = baseline.test(data.testset)

    gold_labels = [sent['gold_label'] for sent in data.testset]

    report_score(gold_labels,predictions)
예제 #6
0
def execute_demo(language, amountdata=100):
    data = Dataset(language, amountdata)

    print("{}: {} training - {} dev".format(language, len(data.trainset),
                                            len(data.devset)))

    print('\nInitialising')
    baseline = Baseline(language)
    improved = Improved(language)

    print('Training')
    baseline.train(data.trainset)
    improved.train(data.trainset)

    print('Predicting')
    predictions = baseline.test(data.devset)
    predictionImp = improved.test(data.devset)
    gold_labels = [sent['gold_label'] for sent in data.devset]
    target = [sent['target_word'] for sent in data.devset]

    print("\nScore for baseline:")
    report_score(gold_labels, predictions)
    print("Score for improved model:")
    report_score(gold_labels, predictionImp)

    print('Predicting on testset')
    predictions2 = baseline.test(data.testset)
    predictionImp2 = improved.test(data.testset)
    gold_labels2 = [sent['gold_label'] for sent in data.testset]
    target2 = [sent['target_word'] for sent in data.testset]

    print("\nScore for baseline:")
    report_score(gold_labels2, predictions2)
    print("Score for improved model:")
    report_score(gold_labels2, predictionImp2)

    results = [(predictions[i], predictionImp[i], gold_labels[i], target[i])
               for i in range(len(target))]
    ####to show wrong predictions
    results = [tup for tup in results if tup[0] != tup[2] and tup[1] != tup[2]]

    results2 = [(predictions2[i], predictionImp2[i], gold_labels2[i],
                 target2[i]) for i in range(len(target2))]
    return results, results2
예제 #7
0
def execute_demo(language, algor):
    data = Dataset(language)

    print("{}: {} training - {} dev".format(language, len(data.trainset),
                                            len(data.testset)))

    baseline = Baseline(language, algor)

    freqdict1 = baseline.freqdict(data.trainset + data.testset)

    posindex1 = baseline.posdict(data.trainset + data.testset)

    baseline.train(data.trainset, freqdict1, posindex1)

    predictions = baseline.test(data.testset, freqdict1, posindex1)

    gold_labels = [sent['gold_label'] for sent in data.testset]

    report_score(gold_labels, predictions)
예제 #8
0
def execute_demo(language):
    data = Dataset(language)

    print("{}: {} training - {} dev - {} test".format(language,
                                                      len(data.trainset),
                                                      len(data.devset),
                                                      len(data.testset)))

    baseline = Baseline(language)

    baseline.train(data.trainset)

    dev = baseline.test(data.devset)
    devLabels = [sent['gold_label'] for sent in data.devset]

    print("Fine-tuned Score - Dev Set")
    report_score(devLabels, dev, detailed=True)

    predictions = baseline.test(data.testset)
    gold_labels = [sent['gold_label'] for sent in data.testset]

    print("Final Score - Test Set")
    report_score(gold_labels, predictions, detailed=True)
예제 #9
0
def execute_demo(language):
    data = Dataset(language)

    print("{}: {} training - {} test".format(language, len(data.trainset),
                                             len(data.testset)))

    # for sent in data.trainset:
    #    print(sent['sentence'], sent['target_word'], sent['gold_label'])

    baseline = Baseline(language)

    baseline.train(data.trainset)

    predictions = baseline.test(data.testset)

    gold_labels = [sent['gold_label'] for sent in data.testset]

    report_score(gold_labels, predictions, True)

    svm = SVM(language)
    svm.train(data.trainset)
    predictions2 = svm.test(data.testset)
    report_score(gold_labels, predictions2, True)
예제 #10
0
def execute_demo(language):
    data = Dataset(language)

    print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset)))

#    trainset = data.trainset[:int(len(data.trainset)*1/100)]
    print('Feature based models')
    baseline = Baseline(language)
    
    print('Training models')
    baseline.train(data.trainset)
#    baseline.train(trainset)

    print('Predicting labels')
    predictions = baseline.test(data.devset)

    predictions_int =[]
    for pred in predictions:
        pred_int = []
        for val in pred[1]:
            pred_int.append(int(val))
        predictions_int.append(pred_int)

    gold_labels = [sent['gold_label'] for sent in data.devset]
#    target_words = [sent['target_word'] for sent in data.devset]
        
    print('Calculating scores')
    for pred in predictions:
        print('Scores for' ,pred[0])
        report_score(gold_labels, pred[1])
    
    print('Scores for hard voting with all models')
    avg_pred_int = np.mean(np.array(predictions_int), axis = 0).tolist()
    avg_pred = [str(round(val)) for val in avg_pred_int]
    report_score(gold_labels, avg_pred)
    
#   Woed2vec based models
    
    print('Word2vec based models')
    print('Loading w2v')
    w2v = Word2vec(language)
    
    print('Training models')
    w2v.train(data.trainset)
#    w2v.train(trainset)
    
    print('Predicting labels')    
    predictions_w2v = w2v.test(data.devset)
    
    predictions_w2v_int =[]
    for pred in predictions_w2v:
        pred_int = []
        for val in pred[1]:
            pred_int.append(int(val))
        predictions_w2v_int.append(pred_int)
    
    print('Calculating scores')
    for pred in predictions_w2v:
        print('Scores for' ,pred[0])
        report_score(gold_labels, pred[1])
    
    print('Scores for hard voting with all models')
    avg_pred_w2v_int = np.mean(np.array(predictions_w2v_int), axis = 0).tolist()
    avg_pred_w2v = [str(round(val)) for val in avg_pred_w2v_int]
    report_score(gold_labels, avg_pred_w2v)
    
    for pred in predictions:
        pred_int = []
        for val in pred[1]:
            pred_int.append(int(val))
        predictions_w2v_int.append(pred_int)
    
    print('Scores for hard voting with both types of models')
    avg_pred_all_int = np.mean(np.array(predictions_w2v_int), axis = 0).tolist()
    avg_pred_all = [str(round(val)) for val in avg_pred_all_int]
    report_score(gold_labels, avg_pred_all)