Exemplo n.º 1
0
modeldir = args.modeldir + "_feat"
modeldirpath = "models/" + modeldir + "/"
modeldirpath = checkdir(modeldirpath)

base = os.path.basename(trainFile)
tr_file_name = os.path.splitext(base)[0]

evaluation = checkdir("results_baseline/")
resultsFile = open(evaluation + tr_file_name + "_result.txt", 'w')

MAX_SEQUENCE_LENGTH = 20
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 300
batch_size = 128
delim = "\t"
data, _, _ = data_process.getData(allxFile, delim)
word_index, tokenizer = data_process.getTokenizer(data, MAX_NB_WORDS,
                                                  MAX_SEQUENCE_LENGTH)
train_x, train_y, train_le, train_labels, _ = data_process.getDevData2(
    trainFile, tokenizer, MAX_SEQUENCE_LENGTH, delim)
delim = "\t"
dev_x, dev_y, dev_le, dev_labels, _ = data_process.getDevData2(
    devFile, tokenizer, MAX_SEQUENCE_LENGTH, delim)
test_x, test_y, test_le, test_labels, _ = data_process.getDevData2(
    testFile, tokenizer, MAX_SEQUENCE_LENGTH, delim)
delim = "\t"
allx, _, _, _, _ = data_process.getDevData2(allxFile, tokenizer,
                                            MAX_SEQUENCE_LENGTH, delim)

graph = getGraph(graphFile)
        str("{0:.2f}".format(auc)) + "\t" + str("{0:.2f}".format(wauc)) +
        "\t" + str("{0:.2f}".format(precision)) + "\t" +
        str("{0:.2f}".format(recall)) + "\t" +
        str("{0:.2f}".format(f1_score)) + "\n")
    print(report)

    probability = model.predict_proba(ublabelled_X,
                                      batch_size=batch_size,
                                      verbose=1)
    ul_pred_val = model.predict_classes([ublabelled_X],
                                        batch_size=batch_size,
                                        verbose=1)
    ul_pred = le.inverse_transform(ul_pred_val)
    #    fout = open("ul_pred1.txt", 'w')
    delim = "\t"
    train_xdata, lab_tr_x, ids = data_process.getData(train_file, delim)
    uData, uLab, uIds = data_process.getData(unlabaled_data, delim)
    ul_pred_data = []
    ul_pred_labels = []
    ul_pred_ids = []
    for pred, prob, val, udata, id in zip(ul_pred, probability, ul_pred_val,
                                          uData, uIds):
        class_prob = prob[val]
        if (class_prob >= 0.75):
            ul_pred_data.append(udata)
            ul_pred_labels.append(pred)
            ul_pred_ids.append(id)

    base = os.path.basename(train_file)
    basename = os.path.splitext(base)[0]
    dirname = os.path.dirname(train_file)