Exemplo n.º 1
0
def main(_):
    loadData = False
    useOntology = True
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    runLCRROTALT = True
    runSVM = False

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM:
        backup = True
    else:
        backup = False

    BASE_train = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/cross_train_'
    BASE_val = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/cross_val_'
    BASE_svm_train = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/svm/cross_train_svm_'
    BASE_svm_val = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/svm/cross_val_svm_'


    REMAIN_val = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/cross_val_remainder_'
    REMAIN_svm_val = "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/svm/cross_val_remainder_'

    # Number of k-fold cross validations
    split_size = 10
    
    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation(FLAGS, split_size, loadData)
    remaining_size = 248
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        acc = []
        remaining_size_vec = []
        #k-fold cross validation
        for i in range(split_size):
            Ontology = OntReasoner()
            accuracyOnt, remaining_size = Ontology.run(backup,BASE_val+str(i)+'.txt', runSVM, True, i)
            acc.append(accuracyOnt)
            remaining_size_vec.append(remaining_size)
        with open("C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"+str(FLAGS.year)+'/cross_results_"+str(FLAGS.year)+"/ONTOLOGY_"+str(FLAGS.year)+'.txt', 'w') as result:
            print(str(split_size)+'-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(acc)
            result.write('size:' + str(test_size))
            result.write('accuracy: '+ str(acc)+'\n')
            result.write('remaining size: '+ str(remaining_size_vec)+'\n')
            result.write('Accuracy: {}, St Dev:{} \n'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
        if runSVM == True:
            test = REMAIN_svm_val
        else:
            test = REMAIN_val
Exemplo n.º 2
0
def main(_):
    loadData = True  # only for non-contextualised word embeddings.
    augment_data = True  # Load data must be true to augment
    #   Use prepareBERT for BERT (and BERT_Large) and prepareELMo for ELMo
    useOntology = False  # When run together with runLCRROTALT, the two-step method is used
    runLCRROTALT = True

    # determine if backupmethod is used
    if runLCRROTALT:
        backup = True
    else:
        backup = False

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector, ct = loadDataAndEmbeddings(
        FLAGS, loadData, augment_data)
    remaining_size = 250
    accuracyOnt = 0.87 if FLAGS.year == 2016 else 0.8277

    if useOntology == True:
        print('Starting Ontology Reasoner')
        # in sample accuracy
        Ontology = OntReasoner()
        accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path_ont,
                                                   runSVM)
        # out of sample accuracy
        # Ontology = OntReasoner()
        # accuracyInSampleOnt, remainingInSample_size = Ontology.run(backup,FLAGS.train_path_ont, runSVM)
        test = FLAGS.remaining_test_path
        print(test[0])
        print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format(
            accuracyOnt, accuracyOnt, remaining_size))
    else:
        test = FLAGS.remaining_test_path

    # LCR-Rot-hop model
    if runLCRROTALT == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size,
            augment_data, FLAGS.augmentation_file_path, ct)
        tf.reset_default_graph()
Exemplo n.º 3
0
def main(_):
    loadData = False
    useOntology = True  # Ontology
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    runLCRROTALT = False  #Olaf model
    runSVM = False
    runLCRROTALT_v4 = False  # Maria Model
    weightanalysis = False

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM:
        backup = True
    else:
        backup = False

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings(
        FLAGS, loadData)
    print(test_size)
    remaining_size = 250
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        Ontology = OntReasoner()
        #out of sample accuracy
        accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path,
                                                   runSVM)
        #in sample accuracy
        Ontology = OntReasoner()
        accuracyInSampleOnt, remaining_size = Ontology.run(
            backup, FLAGS.train_path, runSVM)
        if runSVM == True:
            test = FLAGS.remaining_svm_test_path
        else:
            test = FLAGS.remaining_test_path
        print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format(
            accuracyInSampleOnt, accuracyOnt, remaining_size))
    else:
        if runSVM == True:
            test = FLAGS.test_svm_path
        else:
            test = FLAGS.test_path

    if runLCRROTALT_v4 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v4.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    # LCR-Rot-hop model
    if runLCRROTALT == True:
        tf.reset_default_graph()
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()
        print([_, pred2, fw2, bw2, tl2, tr2])

    print('Finished program succesfully')
Exemplo n.º 4
0
def main(_):
    loadData = True  # only for non-contextualised word embeddings.
    #   Use prepareBERT for BERT (and BERT_Large) and prepareELMo for ELMo
    useOntology = False  # When run together with runLCRROTALT, the two-step method is used
    runLCRROTALT = True

    runSVM = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    weightanalysis = False

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM:
        backup = True
    else:
        backup = False

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings(
        FLAGS, loadData)
    print(test_size)
    remaining_size = 250
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        Ontology = OntReasoner()
        #out of sample accuracy
        accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path_ont,
                                                   runSVM)
        #in sample accuracy
        Ontology = OntReasoner()
        accuracyInSampleOnt, remaining_size = Ontology.run(
            backup, FLAGS.train_path_ont, runSVM)
        if runSVM == True:
            test = FLAGS.remaining_svm_test_path
        else:
            test = FLAGS.remaining_test_path
            print(test[0])
        print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format(
            accuracyOnt, accuracyOnt, remaining_size))
    else:
        if runSVM == True:
            test = FLAGS.test_svm_path
        else:
            test = FLAGS.test_path

    # LCR-Rot-hop model
    if runLCRROTALT == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()
Exemplo n.º 5
0
def main(_):
    loadData = False
    useOntology = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    runLCRROTALT = False
    runSVM = False
    runLCRModelAlt_hierarchical_v4 = True
    runAdversarial = True

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM:
        backup = True
    else:
        backup = False

    BASE_train = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_train_'
    BASE_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_'
    BASE_svm_train = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_train_svm_'
    BASE_svm_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_svm_'

    REMAIN_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_remainder_'
    REMAIN_svm_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_remainder_'

    # Number of k-fold cross validations
    split_size = 10

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation(
        FLAGS, split_size, loadData)
    remaining_size = 248
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        acc = []
        remaining_size_vec = []
        #k-fold cross validation
        for i in range(split_size):
            Ontology = OntReasoner()
            accuracyOnt, remaining_size = Ontology.run(
                backup, BASE_val + str(i) + '.txt', runSVM, True, i)
            acc.append(accuracyOnt)
            remaining_size_vec.append(remaining_size)
        with open(
                FLAGS.hardcoded_path +
                "/data/programGeneratedData/crossValidation" +
                str(FLAGS.year) +
                '/cross_results_"+str(FLAGS.year)+"/ONTOLOGY_"+str(FLAGS.year)'
                + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))
            print(acc)
            result.write('size:' + str(test_size))
            result.write('accuracy: ' + str(acc) + '\n')
            result.write('remaining size: ' + str(remaining_size_vec) + '\n')
            result.write('Accuracy: {}, St Dev:{} \n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
        if runSVM == True:
            test = REMAIN_svm_val
        else:
            test = REMAIN_val
    else:
        if runSVM == True:
            test = BASE_svm_val
        else:
            test = BASE_val
            #test = REMAIN_val

    if runLCRROT == True:
        acc = []
        #k-fold cross validation
        for i in [8]:
            acc1, _, _, _, _, _, _, _, _ = lcrModel.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc) + '\n')
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runLCRROTINVERSE == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelInverse.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_INVERSE_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runLCRROTALT == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelAlt_hierarchical_v3.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                FLAGS.hardcoded_path +
                "/data/programGeneratedData/crossValidation" +
                str(FLAGS.year) +
                '/cross_results_"+str(FLAGS.year)+"/LCRROT_ALT_"+str(FLAGS.year)'
                + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runCABASC == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _ = cabascModel.main(BASE_train + str(i) + '.txt',
                                          REMAIN_val + str(i) + '.txt',
                                          accuracyOnt, test_size[i],
                                          remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/CABASC_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runSVM == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1 = svmModel.main(BASE_svm_train + str(i) + '.txt',
                                 test + str(i) + '.txt', accuracyOnt,
                                 test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
        with open(
                "cross_results_" + str(FLAGS.year) + "/SVM_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {:.5f}, St Dev:{:.4f}'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            result.write(str(acc))
            result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

    print('Finished program succesfully')

    if runLCRModelAlt_hierarchical_v4 == True:
        print('Running CrossVal V4, year = ' + str(FLAGS.year))
        acc = []
        # k-fold cross validation
        for i in range(split_size):
            acc1 = lcrModelAlt_hierarchical_v4.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)

        acc.append(acc1)
        tf.reset_default_graph()
        with open(
                FLAGS.hardcoded_path +
                "/data/programGeneratedData/crossValidation" +
                str(FLAGS.year) + "cross_results_" + str(FLAGS.year) + "/v4_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
        print('Accuracy: {:.5f}, St Dev:{:.4f}'.format(
            np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
        result.write(str(acc))
        result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format(
            np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

        print('Finished program succesfully')

    if runAdversarial == True:
        print('Running CrossVal adversarial, year = ' + str(FLAGS.year))
        acc = []
        # k-fold cross validation
        for i in range(split_size):
            if FLAGS.year == 2015:
                acc1, pred2, fw2, bw2, tl2, tr2 = adversarial.main(
                    BASE_train + str(i) + '.txt',
                    test + str(i) + '.txt',
                    accuracyOnt,
                    test_size[i],
                    remaining_size,
                    learning_rate_dis=0.02,
                    learning_rate_gen=0.002,
                    keep_prob=0.3,
                    momentum_dis=0.9,
                    momentum_gen=0.36,
                    l2=0.00001,
                    k=3,
                    WriteFile=False)
            else:
                acc1, pred2, fw2, bw2, tl2, tr2 = adversarial.main(
                    BASE_train + str(i) + '.txt',
                    test + str(i) + '.txt',
                    accuracyOnt,
                    test_size[i],
                    remaining_size,
                    learning_rate_dis=0.03,
                    learning_rate_gen=0.0045,
                    keep_prob=0.3,
                    momentum_dis=0.7,
                    momentum_gen=0.42,
                    l2=0.00001,
                    k=3,
                    WriteFile=False)

        acc.append(acc1)
        tf.reset_default_graph()
        with open(
                FLAGS.hardcoded_path +
                "/data/programGeneratedData/crossValidation" +
                str(FLAGS.year) + "cross_results_" + str(FLAGS.year) +
                "/Adv_" + str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
        print('Accuracy: {:.5f}, St Dev:{:.4f}'.format(
            np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
        result.write(str(acc))
        result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format(
            np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

        print('Finished program succesfully')
Exemplo n.º 6
0
def main(_):
    loadData = True  # only for non-contextualised word embeddings.
    augment_data = False  # Load data must be true to augment
    useOntology = False
    runLCRROTALT = True

    #determine if backupmethod is used
    if runLCRROTALT:
        backup = True
    else:
        backup = False

    BASE_train = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_train_'
    BASE_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_'
    BASE_svm_train = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_train_svm_'
    BASE_svm_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_svm_'

    REMAIN_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_remainder_'
    REMAIN_svm_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_remainder_'

    # Number of k-fold cross validations
    split_size = 10

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation(
        FLAGS, split_size, augment_data, loadData)
    remaining_size = 248
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        acc = []
        remaining_size_vec = []
        #k-fold cross validation
        for i in range(split_size):
            Ontology = OntReasoner()
            accuracyOnt, remaining_size = Ontology.run(
                backup, BASE_val + str(i) + '.txt', runSVM, True, i)
            acc.append(accuracyOnt)
            remaining_size_vec.append(remaining_size)
        with open(
                "cross_results_" + str(FLAGS.year) + "/ONTOLOGY_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))
            print(acc)
            result.write('size:' + str(test_size))
            result.write('accuracy: ' + str(acc) + '\n')
            result.write('remaining size: ' + str(remaining_size_vec) + '\n')
            result.write('Accuracy: {}, St Dev:{} \n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            test = REMAIN_val
    else:
        test = BASE_val

    if runLCRROTALT == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelAlt.main(
                BASE_train + str(i) + '.txt',
                test + str(i) + '.txt',
                accuracyOnt,
                test_size[i],
                remaining_size,
                augment_data,
                FLAGS.augmentation_file_path,
                ct={})
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_ALT_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    print('Finished program succesfully')
Exemplo n.º 7
0
def main(_):
    loadData = False
    # loadData = True
    # useOntology = True
    useOntology = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    runLCRROTALT = True
    runSVM = False

    weightanalysis = False

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM:
        backup = True
    else:
        backup = False

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings(
        FLAGS, loadData)
    # print(test_size)
    remaining_size = 250
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        Ontology = OntReasoner()
        #out of sample accuracy
        accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path,
                                                   runSVM)
        #in sample accuracy
        Ontology = OntReasoner()
        accuracyInSampleOnt, remaining_size = Ontology.run(
            backup, FLAGS.train_path, runSVM)
        if runSVM == True:
            test = FLAGS.remaining_svm_test_path
        else:
            test = FLAGS.remaining_test_path
        print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format(
            accuracyOnt, accuracyOnt, remaining_size))
    else:
        if runSVM == True:
            test = FLAGS.test_svm_path
        else:
            test = FLAGS.test_path

    # LCR-Rot model
    if runLCRROT == True:
        _, pred1, fw1, bw1, tl1, tr1, sent, target, true = lcrModel.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    # LCR-Rot-inv model
    if runLCRROTINVERSE == True:
        lcrModelInverse.main(FLAGS.train_path, test, accuracyOnt, test_size,
                             remaining_size)
        tf.reset_default_graph()

    # LCR-Rot-hop model
    if runLCRROTALT == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    # CABASC model
    if runCABASC == True:
        _, pred3, weights = cabascModel.main(FLAGS.train_path, test,
                                             accuracyOnt, test_size,
                                             remaining_size)
        if weightanalysis and runLCRROT and runLCRROTALT:
            outF = open('sentence_analysis.txt', "w")
            dif = np.subtract(pred3, pred1)
            for i, value in enumerate(pred3):
                if value == 1 and pred2[i] == 0:
                    sentleft, sentright = [], []
                    flag = True
                    for word in sent[i]:
                        if word == '$t$':
                            flag = False
                            continue
                        if flag:
                            sentleft.append(word)
                        else:
                            sentright.append(word)
                    print(i)
                    outF.write(str(i))
                    outF.write("\n")
                    outF.write(
                        'lcr pred: {}; CABASC pred: {}; lcralt pred: {}; true: {}'
                        .format(pred1[i], pred3[i], pred2[i], true[i]))
                    outF.write("\n")
                    outF.write(";".join(sentleft))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in fw1[i][0]))
                    outF.write("\n")
                    outF.write(";".join(sentright))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in bw1[i][0]))
                    outF.write("\n")
                    outF.write(";".join(target[i]))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in tl1[i][0]))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in tr1[i][0]))
                    outF.write("\n")
                    outF.write(";".join(sentleft))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in fw2[i][0]))
                    outF.write("\n")
                    outF.write(";".join(sentright))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in bw2[i][0]))
                    outF.write("\n")
                    outF.write(";".join(target[i]))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in tl2[i][0]))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in tr2[i][0]))
                    outF.write("\n")
                    outF.write(";".join(sent[i]))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in weights[i][0]))
                    outF.write("\n")
            outF.close()

    # BoW model
    if runSVM == True:
        svmModel.main(FLAGS.train_svm_path, test, accuracyOnt, test_size,
                      remaining_size)

    print('Finished program succesfully')
Exemplo n.º 8
0
def main(_):
    loadData = False  # only for non-contextualised word embeddings.
    #   Use prepareBERT for BERT (and BERT_Large) and prepareELMo for ELMo
    useOntology = False  # When run together with runLCRROTALT, the two-step method is used
    runLCRROTALT = False

    runSVM = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    weightanalysis = False

    runLCRROTALT_v1 = False
    runLCRROTALT_v2 = False
    runLCRROTALT_v3 = False
    runLCRROTALT_v4 = True

    #curriculum_learning = True
    # if curriculum_learning = True, then choose either one_pass or baby_steps to be True as well!
    runOne_Pass = False
    runBaby_Steps = True
    if runOne_Pass or runBaby_Steps: # if baby steps or one pass, then automatically curriculum learning True as well to get the sorted indices.
        curriculum_learning = True

    # determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM or runLCRROTALT_v1 or runLCRROTALT_v2 or runLCRROTALT_v3 or runLCRROTALT_v4:
        backup = True
    else:
        backup = False

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings(FLAGS, loadData)
    print(test_size)
    remaining_size = 250
    accuracyOnt = 0.87
    tf.reset_default_graph()

    if useOntology == True:
        print('Starting Ontology Reasoner')
        # in sample accuracy
        Ontology = OntReasoner()
        accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path_ont, runSVM)
        # out of sample accuracy
        # Ontology = OntReasoner()
        # accuracyInSampleOnt, remainingInSample_size = Ontology.run(backup,FLAGS.train_path_ont, runSVM)
        if runSVM == True:
            test = FLAGS.remaining_svm_test_path
        else:
            test = FLAGS.remaining_test_path
            print(test[0])
        print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format(accuracyOnt, accuracyOnt, remaining_size))
    else:
        if runSVM == True:
            test = FLAGS.test_svm_path
        else:
            test = FLAGS.test_path

    # Get curriculum learning scores, either the ones already saved, or new ones
    # Make sure that the instances in FLAGS.train_path_ont and FLAGS.train_path (and the two test sets) have the same order of their instances!
    if curriculum_learning == True:

        try:
            sort_ind = pickle.load(open(FLAGS.sorted_indices, "rb"))
        except:
            tr_features, tr_sent = sentiWordNet.main(FLAGS.train_path_ont, FLAGS.train_aspect_categories)
            te_features, te_sent = sentiWordNet.main(FLAGS.test_path_ont, FLAGS.test_aspect_categories)
            tr_sent = np.asarray(utils.change_y_to_onehot(tr_sent))
            te_sent = np.asarray(utils.change_y_to_onehot(te_sent))
            print(tr_features.shape)
            print(tr_sent.shape)
            print(te_features.shape)
            print(te_sent.shape)

            curr_scores = auxModel.main(tr_features, te_features, tr_sent, te_sent)
            tf.reset_default_graph()
            inds1 = np.arange(0, len(curr_scores))
            sort_ind = [x for _, x in sorted(zip(curr_scores, inds1))]
            pickle.dump(sort_ind, open(FLAGS.sorted_indices, "wb"))

    # LCR-Rot-hop model
    if runLCRROTALT == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main(FLAGS.train_path, test, accuracyOnt, test_size,
                                                        remaining_size)
        tf.reset_default_graph()

    if runLCRROTALT_v1 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v1.main(FLAGS.train_path, test, accuracyOnt, test_size,
                                                                        remaining_size)
        tf.reset_default_graph()

    if runLCRROTALT_v2 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v2.main(FLAGS.train_path, test, accuracyOnt, test_size,
                                                                        remaining_size)
        tf.reset_default_graph()

    if runLCRROTALT_v3 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v3.main(FLAGS.train_path, test, accuracyOnt, test_size,
                                                                        remaining_size)
        tf.reset_default_graph()

    if runLCRROTALT_v4 == True:
        if runOne_Pass:
            acc = lcrModelAlt_hierarchical_v4_one_pass.main(FLAGS.train_path, test,
                                                                                         accuracyOnt,
                                                                                         test_size,
                                                                                         remaining_size,
                                                                                         sort_ind, FLAGS.num_buckets)
            tf.reset_default_graph()
        elif runBaby_Steps == True:
            tf.reset_default_graph()
            acc = lcrModelAlt_hierarchical_v4_baby_steps.main(FLAGS.train_path, test, accuracyOnt,
                                                                                           test_size,
                                                                                           remaining_size,
                                                                                           sort_ind, FLAGS.num_buckets)
            tf.reset_default_graph()

        else:
            acc = lcrModelAlt_hierarchical_v4_trainevaltest.main(FLAGS.hyper_train_path, FLAGS.hyper_eval_path, test, FLAGS.train_path,
                                                                            accuracyOnt,
                                                                            test_size,
                                                                            remaining_size)
            tf.reset_default_graph()
Exemplo n.º 9
0
def main(_):
    loadData = False
    useOntology = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    runLCRROTALT = False
    runSVM = False
    runlcrDoubleRAA = True
    runINVMULTIHOP1 = False
    runlcrDoubleRAAtype2 = False
    weightanalysis = False

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM or runlcrDoubleRAA or runINVMULTIHOP1:
        backup = True
    else:
        backup = False

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings(
        FLAGS, loadData)
    print(test_size)
    remaining_size = 250
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        Ontology = OntReasoner()
        #out of sample accuracy
        accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path,
                                                   runSVM)
        #in sample accuracy
        Ontology = OntReasoner()
        accuracyInSampleOnt, remaining_size = Ontology.run(
            backup, FLAGS.train_path, runSVM)
        if runSVM == True:
            test = FLAGS.remaining_svm_test_path
        else:
            test = FLAGS.remaining_test_path
        print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format(
            accuracyOnt, accuracyOnt, remaining_size))
    else:
        if runSVM == True:
            test = FLAGS.test_svm_path
        else:
            test = FLAGS.test_path

    # LCR-Rot model
    if runLCRROT == True:
        _, pred1, fw1, bw1, tl1, tr1, sent, target, true = lcrModel.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    # LCR-Rot-inv model
    if runLCRROTINVERSE == True:
        lcrModelInverse.main(FLAGS.train_path, test, accuracyOnt, test_size,
                             remaining_size)
        tf.reset_default_graph()

    # LCR-Rot-hop model
    if runLCRROTALT == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runlcrDoubleRAA == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrDoubleRAA.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runINVMULTIHOP1 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrinvmodel2.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runlcrDoubleRAAtype2 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrDoubleRAAtype2.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    # BoW model
    if runSVM == True:
        svmModel.main(FLAGS.train_svm_path, test, accuracyOnt, test_size,
                      remaining_size)

    print('Finished program succesfully')
Exemplo n.º 10
0
def main(_):
    loadData = False
    useOntology = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    runLCRROTALT = True
    runSVM = False

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM:
        backup = True
    else:
        backup = False

    BASE_train = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_train_'
    BASE_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_'
    BASE_svm_train = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_train_svm_'
    BASE_svm_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_svm_'

    REMAIN_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_remainder_'
    REMAIN_svm_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_remainder_'

    # Number of k-fold cross validations
    split_size = 10

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation(
        FLAGS, split_size, loadData)
    remaining_size = 248
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        acc = []
        remaining_size_vec = []
        #k-fold cross validation
        for i in range(split_size):
            Ontology = OntReasoner()
            accuracyOnt, remaining_size = Ontology.run(
                backup, BASE_val + str(i) + '.txt', runSVM, True, i)
            acc.append(accuracyOnt)
            remaining_size_vec.append(remaining_size)
        with open(
                "cross_results_" + str(FLAGS.year) + "/ONTOLOGY_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))
            print(acc)
            result.write('size:' + str(test_size))
            result.write('accuracy: ' + str(acc) + '\n')
            result.write('remaining size: ' + str(remaining_size_vec) + '\n')
            result.write('Accuracy: {}, St Dev:{} \n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
        if runSVM == True:
            test = REMAIN_svm_val
        else:
            test = REMAIN_val
    else:
        if runSVM == True:
            test = BASE_svm_val
        else:
            test = BASE_val

    if runLCRROT == True:
        acc = []
        #k-fold cross validation
        for i in [8]:
            acc1, _, _, _, _, _, _, _, _ = lcrModel.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc) + '\n')
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runLCRROTINVERSE == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelInverse.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_INVERSE_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runLCRROTALT == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelAlt.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_ALT_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runCABASC == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _ = cabascModel.main(BASE_train + str(i) + '.txt',
                                          REMAIN_val + str(i) + '.txt',
                                          accuracyOnt, test_size[i],
                                          remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/CABASC_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runSVM == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1 = svmModel.main(BASE_svm_train + str(i) + '.txt',
                                 test + str(i) + '.txt', accuracyOnt,
                                 test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
        with open(
                "cross_results_" + str(FLAGS.year) + "/SVM_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {:.5f}, St Dev:{:.4f}'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            result.write(str(acc))
            result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

    print('Finished program succesfully')
Exemplo n.º 11
0
def main(_):
    loadData = False  # only for non-contextualised word embeddings.
    # Use prepareBERT for BERT (and BERT_Large) and prepareELMo for ELMo
    useOntology = False  # When run together with runLCRROTALT, the two-step method is used
    runLCRROTALT = False

    runSVM = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    weightanalysis = False

    runLCRROTALT_v1 = False
    runLCRROTALT_v2 = False
    runLCRROTALT_v3 = False
    runLCRROTALT_v4 = True

    runAdversarial = False

    #Save and Restore if desired
    Save = False
    RestoreSave = False
    restore_path = '/Users/ronhochstenbach/Desktop/Ectrie Thesis/Venv_Thesis/Saved_Models/2020-05-29 23:08:39.394204_BERT_2016/Iter_0-470'  #do not add .meta!

    # determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM or runLCRROTALT_v1 or runLCRROTALT_v2 or runLCRROTALT_v3 or runLCRROTALT_v4 or runAdversarial:
        backup = True
    else:
        backup = False

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings(
        FLAGS, loadData)
    print(train_size)
    print(test_size)
    remaining_size = 250
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        # in sample accuracy
        Ontology = OntReasoner()
        accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path_ont,
                                                   runSVM)
        # out of sample accuracy
        # Ontology = OntReasoner()
        # accuracyInSampleOnt, remainingInSample_size = Ontology.run(backup,FLAGS.train_path_ont, runSVM)
        if runSVM == True:
            test = FLAGS.remaining_svm_test_path
        else:
            test = FLAGS.remaining_test_path
            print(test[0])
        print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format(
            accuracyOnt, accuracyOnt, remaining_size))
    else:
        if runSVM == True:
            test = FLAGS.test_svm_path
        else:
            test = FLAGS.test_path

    # LCR-Rot-hop model
    if runLCRROTALT == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runLCRROTALT_v1 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v1.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runLCRROTALT_v2 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v2.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runLCRROTALT_v3 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v3.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runLCRROTALT_v4 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt_hierarchical_v4.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runAdversarial == True:
        print('Running Adversarial')
        _, pred2, fw2, bw2, tl2, tr2 = adversarial.main(
            FLAGS.train_path,
            test,
            accuracyOnt,
            test_size,
            remaining_size,
            learning_rate_dis=0.02,
            learning_rate_gen=0.002,
            keep_prob=0.3,
            momentum_dis=0.9,
            momentum_gen=0.36,
            l2=0.00001,
            k=3,
            WriteFile=True)
        tf.reset_default_graph()