Пример #1
0
def svm_objective(hyperparams):
    global eval_num
    global best_loss
    global best_hyperparams

    eval_num += 1
    (c, gamma) = hyperparams
    print(hyperparams)

    l = svmModel.main(FLAGS.hyper_svm_train_path, FLAGS.hyper_svm_eval_path,
                      accuracyOnt, test_size, remaining_size, c, gamma)
    tf.reset_default_graph()

    # Save training results to disks with unique filenames

    print(eval_num, l, hyperparams)

    if best_loss is None or -l < best_loss:
        best_loss = -l
        best_hyperparams = hyperparams

    result = {
        'loss': -l,
        'status': STATUS_OK,
        'space': hyperparams,
    }

    save_json_result(str(l), result)

    return result
Пример #2
0
def main(_):
    loadData = False
    useOntology = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    runLCRROTALT = False
    runSVM = False
    runLCRModelAlt_hierarchical_v4 = True
    runAdversarial = True

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM:
        backup = True
    else:
        backup = False

    BASE_train = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_train_'
    BASE_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_'
    BASE_svm_train = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_train_svm_'
    BASE_svm_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_svm_'

    REMAIN_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_remainder_'
    REMAIN_svm_val = FLAGS.hardcoded_path + "/data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_remainder_'

    # Number of k-fold cross validations
    split_size = 10

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation(
        FLAGS, split_size, loadData)
    remaining_size = 248
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        acc = []
        remaining_size_vec = []
        #k-fold cross validation
        for i in range(split_size):
            Ontology = OntReasoner()
            accuracyOnt, remaining_size = Ontology.run(
                backup, BASE_val + str(i) + '.txt', runSVM, True, i)
            acc.append(accuracyOnt)
            remaining_size_vec.append(remaining_size)
        with open(
                FLAGS.hardcoded_path +
                "/data/programGeneratedData/crossValidation" +
                str(FLAGS.year) +
                '/cross_results_"+str(FLAGS.year)+"/ONTOLOGY_"+str(FLAGS.year)'
                + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))
            print(acc)
            result.write('size:' + str(test_size))
            result.write('accuracy: ' + str(acc) + '\n')
            result.write('remaining size: ' + str(remaining_size_vec) + '\n')
            result.write('Accuracy: {}, St Dev:{} \n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
        if runSVM == True:
            test = REMAIN_svm_val
        else:
            test = REMAIN_val
    else:
        if runSVM == True:
            test = BASE_svm_val
        else:
            test = BASE_val
            #test = REMAIN_val

    if runLCRROT == True:
        acc = []
        #k-fold cross validation
        for i in [8]:
            acc1, _, _, _, _, _, _, _, _ = lcrModel.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc) + '\n')
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runLCRROTINVERSE == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelInverse.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_INVERSE_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runLCRROTALT == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelAlt_hierarchical_v3.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                FLAGS.hardcoded_path +
                "/data/programGeneratedData/crossValidation" +
                str(FLAGS.year) +
                '/cross_results_"+str(FLAGS.year)+"/LCRROT_ALT_"+str(FLAGS.year)'
                + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runCABASC == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _ = cabascModel.main(BASE_train + str(i) + '.txt',
                                          REMAIN_val + str(i) + '.txt',
                                          accuracyOnt, test_size[i],
                                          remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/CABASC_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runSVM == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1 = svmModel.main(BASE_svm_train + str(i) + '.txt',
                                 test + str(i) + '.txt', accuracyOnt,
                                 test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
        with open(
                "cross_results_" + str(FLAGS.year) + "/SVM_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {:.5f}, St Dev:{:.4f}'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            result.write(str(acc))
            result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

    print('Finished program succesfully')

    if runLCRModelAlt_hierarchical_v4 == True:
        print('Running CrossVal V4, year = ' + str(FLAGS.year))
        acc = []
        # k-fold cross validation
        for i in range(split_size):
            acc1 = lcrModelAlt_hierarchical_v4.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)

        acc.append(acc1)
        tf.reset_default_graph()
        with open(
                FLAGS.hardcoded_path +
                "/data/programGeneratedData/crossValidation" +
                str(FLAGS.year) + "cross_results_" + str(FLAGS.year) + "/v4_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
        print('Accuracy: {:.5f}, St Dev:{:.4f}'.format(
            np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
        result.write(str(acc))
        result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format(
            np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

        print('Finished program succesfully')

    if runAdversarial == True:
        print('Running CrossVal adversarial, year = ' + str(FLAGS.year))
        acc = []
        # k-fold cross validation
        for i in range(split_size):
            if FLAGS.year == 2015:
                acc1, pred2, fw2, bw2, tl2, tr2 = adversarial.main(
                    BASE_train + str(i) + '.txt',
                    test + str(i) + '.txt',
                    accuracyOnt,
                    test_size[i],
                    remaining_size,
                    learning_rate_dis=0.02,
                    learning_rate_gen=0.002,
                    keep_prob=0.3,
                    momentum_dis=0.9,
                    momentum_gen=0.36,
                    l2=0.00001,
                    k=3,
                    WriteFile=False)
            else:
                acc1, pred2, fw2, bw2, tl2, tr2 = adversarial.main(
                    BASE_train + str(i) + '.txt',
                    test + str(i) + '.txt',
                    accuracyOnt,
                    test_size[i],
                    remaining_size,
                    learning_rate_dis=0.03,
                    learning_rate_gen=0.0045,
                    keep_prob=0.3,
                    momentum_dis=0.7,
                    momentum_gen=0.42,
                    l2=0.00001,
                    k=3,
                    WriteFile=False)

        acc.append(acc1)
        tf.reset_default_graph()
        with open(
                FLAGS.hardcoded_path +
                "/data/programGeneratedData/crossValidation" +
                str(FLAGS.year) + "cross_results_" + str(FLAGS.year) +
                "/Adv_" + str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
        print('Accuracy: {:.5f}, St Dev:{:.4f}'.format(
            np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
        result.write(str(acc))
        result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format(
            np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

        print('Finished program succesfully')
Пример #3
0
def main(_):
    loadData = False
    # loadData = True
    # useOntology = True
    useOntology = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    runLCRROTALT = True
    runSVM = False

    weightanalysis = False

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM:
        backup = True
    else:
        backup = False

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings(
        FLAGS, loadData)
    # print(test_size)
    remaining_size = 250
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        Ontology = OntReasoner()
        #out of sample accuracy
        accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path,
                                                   runSVM)
        #in sample accuracy
        Ontology = OntReasoner()
        accuracyInSampleOnt, remaining_size = Ontology.run(
            backup, FLAGS.train_path, runSVM)
        if runSVM == True:
            test = FLAGS.remaining_svm_test_path
        else:
            test = FLAGS.remaining_test_path
        print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format(
            accuracyOnt, accuracyOnt, remaining_size))
    else:
        if runSVM == True:
            test = FLAGS.test_svm_path
        else:
            test = FLAGS.test_path

    # LCR-Rot model
    if runLCRROT == True:
        _, pred1, fw1, bw1, tl1, tr1, sent, target, true = lcrModel.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    # LCR-Rot-inv model
    if runLCRROTINVERSE == True:
        lcrModelInverse.main(FLAGS.train_path, test, accuracyOnt, test_size,
                             remaining_size)
        tf.reset_default_graph()

    # LCR-Rot-hop model
    if runLCRROTALT == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    # CABASC model
    if runCABASC == True:
        _, pred3, weights = cabascModel.main(FLAGS.train_path, test,
                                             accuracyOnt, test_size,
                                             remaining_size)
        if weightanalysis and runLCRROT and runLCRROTALT:
            outF = open('sentence_analysis.txt', "w")
            dif = np.subtract(pred3, pred1)
            for i, value in enumerate(pred3):
                if value == 1 and pred2[i] == 0:
                    sentleft, sentright = [], []
                    flag = True
                    for word in sent[i]:
                        if word == '$t$':
                            flag = False
                            continue
                        if flag:
                            sentleft.append(word)
                        else:
                            sentright.append(word)
                    print(i)
                    outF.write(str(i))
                    outF.write("\n")
                    outF.write(
                        'lcr pred: {}; CABASC pred: {}; lcralt pred: {}; true: {}'
                        .format(pred1[i], pred3[i], pred2[i], true[i]))
                    outF.write("\n")
                    outF.write(";".join(sentleft))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in fw1[i][0]))
                    outF.write("\n")
                    outF.write(";".join(sentright))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in bw1[i][0]))
                    outF.write("\n")
                    outF.write(";".join(target[i]))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in tl1[i][0]))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in tr1[i][0]))
                    outF.write("\n")
                    outF.write(";".join(sentleft))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in fw2[i][0]))
                    outF.write("\n")
                    outF.write(";".join(sentright))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in bw2[i][0]))
                    outF.write("\n")
                    outF.write(";".join(target[i]))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in tl2[i][0]))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in tr2[i][0]))
                    outF.write("\n")
                    outF.write(";".join(sent[i]))
                    outF.write("\n")
                    outF.write(";".join(str(x) for x in weights[i][0]))
                    outF.write("\n")
            outF.close()

    # BoW model
    if runSVM == True:
        svmModel.main(FLAGS.train_svm_path, test, accuracyOnt, test_size,
                      remaining_size)

    print('Finished program succesfully')
Пример #4
0
def main(_):
    loadData = False
    useOntology = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    runLCRROTALT = False
    runSVM = False
    runlcrDoubleRAA = True
    runINVMULTIHOP1 = False
    runlcrDoubleRAAtype2 = False
    weightanalysis = False

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM or runlcrDoubleRAA or runINVMULTIHOP1:
        backup = True
    else:
        backup = False

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadDataAndEmbeddings(
        FLAGS, loadData)
    print(test_size)
    remaining_size = 250
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        Ontology = OntReasoner()
        #out of sample accuracy
        accuracyOnt, remaining_size = Ontology.run(backup, FLAGS.test_path,
                                                   runSVM)
        #in sample accuracy
        Ontology = OntReasoner()
        accuracyInSampleOnt, remaining_size = Ontology.run(
            backup, FLAGS.train_path, runSVM)
        if runSVM == True:
            test = FLAGS.remaining_svm_test_path
        else:
            test = FLAGS.remaining_test_path
        print('train acc = {:.4f}, test acc={:.4f}, remaining size={}'.format(
            accuracyOnt, accuracyOnt, remaining_size))
    else:
        if runSVM == True:
            test = FLAGS.test_svm_path
        else:
            test = FLAGS.test_path

    # LCR-Rot model
    if runLCRROT == True:
        _, pred1, fw1, bw1, tl1, tr1, sent, target, true = lcrModel.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    # LCR-Rot-inv model
    if runLCRROTINVERSE == True:
        lcrModelInverse.main(FLAGS.train_path, test, accuracyOnt, test_size,
                             remaining_size)
        tf.reset_default_graph()

    # LCR-Rot-hop model
    if runLCRROTALT == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrModelAlt.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runlcrDoubleRAA == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrDoubleRAA.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runINVMULTIHOP1 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrinvmodel2.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    if runlcrDoubleRAAtype2 == True:
        _, pred2, fw2, bw2, tl2, tr2 = lcrDoubleRAAtype2.main(
            FLAGS.train_path, test, accuracyOnt, test_size, remaining_size)
        tf.reset_default_graph()

    # BoW model
    if runSVM == True:
        svmModel.main(FLAGS.train_svm_path, test, accuracyOnt, test_size,
                      remaining_size)

    print('Finished program succesfully')
Пример #5
0
def main(_):
    loadData = False
    useOntology = False
    runCABASC = False
    runLCRROT = False
    runLCRROTINVERSE = False
    runLCRROTALT = True
    runSVM = False

    #determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM:
        backup = True
    else:
        backup = False

    BASE_train = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_train_'
    BASE_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_'
    BASE_svm_train = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_train_svm_'
    BASE_svm_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_svm_'

    REMAIN_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_remainder_'
    REMAIN_svm_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_remainder_'

    # Number of k-fold cross validations
    split_size = 10

    # retrieve data and wordembeddings
    train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation(
        FLAGS, split_size, loadData)
    remaining_size = 248
    accuracyOnt = 0.87

    if useOntology == True:
        print('Starting Ontology Reasoner')
        acc = []
        remaining_size_vec = []
        #k-fold cross validation
        for i in range(split_size):
            Ontology = OntReasoner()
            accuracyOnt, remaining_size = Ontology.run(
                backup, BASE_val + str(i) + '.txt', runSVM, True, i)
            acc.append(accuracyOnt)
            remaining_size_vec.append(remaining_size)
        with open(
                "cross_results_" + str(FLAGS.year) + "/ONTOLOGY_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))
            print(acc)
            result.write('size:' + str(test_size))
            result.write('accuracy: ' + str(acc) + '\n')
            result.write('remaining size: ' + str(remaining_size_vec) + '\n')
            result.write('Accuracy: {}, St Dev:{} \n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
        if runSVM == True:
            test = REMAIN_svm_val
        else:
            test = REMAIN_val
    else:
        if runSVM == True:
            test = BASE_svm_val
        else:
            test = BASE_val

    if runLCRROT == True:
        acc = []
        #k-fold cross validation
        for i in [8]:
            acc1, _, _, _, _, _, _, _, _ = lcrModel.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc) + '\n')
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runLCRROTINVERSE == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelInverse.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_INVERSE_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runLCRROTALT == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelAlt.main(
                BASE_train + str(i) + '.txt', test + str(i) + '.txt',
                accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_ALT_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runCABASC == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _ = cabascModel.main(BASE_train + str(i) + '.txt',
                                          REMAIN_val + str(i) + '.txt',
                                          accuracyOnt, test_size[i],
                                          remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/CABASC_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runSVM == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1 = svmModel.main(BASE_svm_train + str(i) + '.txt',
                                 test + str(i) + '.txt', accuracyOnt,
                                 test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
        with open(
                "cross_results_" + str(FLAGS.year) + "/SVM_" +
                str(FLAGS.year) + '.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {:.5f}, St Dev:{:.4f}'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            result.write(str(acc))
            result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

    print('Finished program succesfully')
Пример #6
0
        #k-fold cross validation
        for i in range(split_size):
            acc1, _, _ = cabascModel.main(BASE_train+str(i)+'.txt',REMAIN_val+str(i)+'.txt', accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: '+ str(i))
        with open("cross_results_"+str(FLAGS.year)+"/CABASC_"+str(FLAGS.year)+'.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size)+'-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

    if runSVM == True:
        acc = []
        #k-fold cross validation
        for i in range(split_size):
            acc1 = svmModel.main(BASE_svm_train+str(i)+'.txt',test+str(i)+'.txt', accuracyOnt, test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
        with open("cross_results_"+str(FLAGS.year)+"/SVM_"+str(FLAGS.year)+'.txt', 'w') as result:
            print(str(split_size)+'-fold cross validation results')
            print('Accuracy: {:.5f}, St Dev:{:.4f}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            result.write(str(acc))
            result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

    print('Finished program succesfully')

if __name__ == '__main__':
    # wrapper that handles flag parsing and then dispatches the main
    tf.app.run()
Пример #7
0
def main(_):
    loadData = False
    useOntology = False
    runCABASC = False
    runLCRROT = True
    runLCRROTINVERSE = False
    runLCRROTALT = False
    runSVM = False

    # determine if backupmethod is used
    if runCABASC or runLCRROT or runLCRROTALT or runLCRROTINVERSE or runSVM:
        backup = True
    else:
        backup = False

    BASE_train = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_train_'
    BASE_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_'
    BASE_svm_train = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_train_svm_'
    BASE_svm_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_svm_'

    REMAIN_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/cross_val_remainder_'
    REMAIN_svm_val = "data/programGeneratedData/crossValidation" + str(
        FLAGS.year) + '/svm/cross_val_remainder_'

    # Number of k-fold cross validations
    split_size = 10

    # retrieve data and wordembeddings
    # train_size, test_size, train_polarity_vector, test_polarity_vector = loadCrossValidation(FLAGS, split_size,
    #                                                                                        loadData)
    remaining_size = 248
    accuracyOnt = 0.87
    test = BASE_val

    # if useOntology == True:
    #   print('Starting Ontology Reasoner')
    #  acc = []
    # remaining_size_vec = []
    # k-fold cross validation
    # for i in range(split_size):
    #    Ontology = OntReasoner()
    #   accuracyOnt, remaining_size = Ontology.run(backup, BASE_val + str(i) + 'cross_train_10.txt', runSVM, True, i)
    #  acc.append(accuracyOnt)
    # remaining_size_vec.append(remaining_size)
    # with open("C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation" + str(
    #        FLAGS.year) + "/cross_results_" + str(FLAGS.year) + "/ONTOLOGY_" + str(FLAGS.year) + 'cross_train_10.txt',
    #         'w') as result:
    #  print(str(split_size) + '-fold cross validation results')
    # print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
    # print(acc)
    # result.write('size:' + str(test_size))
    # result.write('accuracy: ' + str(acc) + '\n')
    # result.write('remaining size: ' + str(remaining_size_vec) + '\n')
    # result.write('Accuracy: {}, St Dev:{} \n'.format(np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
    # if runSVM == True:
    #   test = REMAIN_svm_val
    # else:
    #   test = REMAIN_val
    # else:
    #   if runSVM == True:
    #      test = BASE_svm_val
    # else:
    #    test = BASE_val
    # test = REMAIN_val

    if runLCRROT == True:
        acc = []
        # k-fold cross validation
        for i in range(split_size):
            # input is the complete training data and the indices make sure that only the correct 9/10th of the data is selected. The other 1/10th can be found in test (BASE_val)
            acc1 = lcrModelAlt_hierarchical_v4_baby_steps.main(
                FLAGS.train_path, testdata, accuracyOnt, test_size,
                remaining_size, sort_ind, FLAGS.num_buckets)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_" +
                str(FLAGS.year) + 'cross_train_10.txt', 'w') as result:
            result.write(str(acc) + '\n')
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runLCRROTINVERSE == True:
        acc = []
        # k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelInverse.main(
                BASE_train + str(i) + 'cross_train_10.txt',
                test + str(i) + 'cross_train_10.txt', accuracyOnt,
                test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/LCRROT_INVERSE_" +
                str(FLAGS.year) + 'cross_train_10.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runLCRROTALT == True:
        acc = []
        # k-fold cross validation
        for i in range(split_size):
            acc1, _, _, _, _, _ = lcrModelAlt_hierarchical_v3.main(
                BASE_train + str(i) + 'cross_train_10.txt',
                test + str(i) + 'cross_train_10.txt', accuracyOnt,
                test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "C:/Users/Maria/Desktop/data/programGeneratedData/crossValidation"
                + str(FLAGS.year) + "/cross_results_" + str(FLAGS.year) +
                "/LCRROT_ALT_" + str(FLAGS.year) + 'cross_train_10.txt',
                'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runCABASC == True:
        acc = []
        # k-fold cross validation
        for i in range(split_size):
            acc1, _, _ = cabascModel.main(
                BASE_train + str(i) + 'cross_train_10.txt',
                REMAIN_val + str(i) + 'cross_train_10.txt', accuracyOnt,
                test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
            print('iteration: ' + str(i))
        with open(
                "cross_results_" + str(FLAGS.year) + "/CABASC_" +
                str(FLAGS.year) + 'cross_train_10.txt', 'w') as result:
            result.write(str(acc))
            result.write('Accuracy: {}, St Dev:{} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {}, St Dev:{}'.format(np.mean(np.asarray(acc)),
                                                   np.std(np.asarray(acc))))

    if runSVM == True:
        acc = []
        # k-fold cross validation
        for i in range(split_size):
            acc1 = svmModel.main(
                BASE_svm_train + str(i) + 'cross_train_10.txt',
                test + str(i) + 'cross_train_10.txt', accuracyOnt,
                test_size[i], remaining_size)
            acc.append(acc1)
            tf.reset_default_graph()
        with open(
                "cross_results_" + str(FLAGS.year) + "/SVM_" +
                str(FLAGS.year) + 'cross_train_10.txt', 'w') as result:
            print(str(split_size) + '-fold cross validation results')
            print('Accuracy: {:.5f}, St Dev:{:.4f}'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))
            result.write(str(acc))
            result.write('Accuracy: {:.5f}, St Dev:{:.4f} /n'.format(
                np.mean(np.asarray(acc)), np.std(np.asarray(acc))))

    print('Finished program succesfully')