示例#1
0
def sentence_prediction(sentence):
    sentence = preprocess(sentence)
    model_path = config.MODEL_PATH

    test_dataset = dataset.BERTDataset(
        review=[sentence],
        target=[0]
    )

    test_data_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=3
    )

    device = config.device

    model = BERTBaseUncased()
    model.load_state_dict(torch.load(
        model_path, map_location=torch.device(device)))
    model.to(device)

    outputs, [] = engine.predict_fn(test_data_loader, model, device)
    print(outputs)
    return outputs[0]
示例#2
0
def main(_):
    input = config.EVAL_PROC
    output = 'predictions.csv'
    model_path = config.MODEL_PATH
    if FLAGS.input:
        input = FLAGS.input
    if FLAGS.output:
        output = FLAGS.input
    if FLAGS.model_path:
        model_path = FLAGS.model_path
    df_test = pd.read_fwf(input)

    logger.info(f"Bert Model: {config.BERT_PATH}")
    logger.info(
        f"Current date and time :{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} "
    )
    logger.info(f"Test file: {input}")
    logger.info(f"Test size : {len(df_test):.4f}")

    trg = []
    for i in range(len(df_test.values)):
        trg.append(0)

    test_dataset = dataset.BERTDataset(text=df_test.values, target=trg)

    test_data_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=3)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = BERTBaseUncased(config.DROPOUT)
    model.load_state_dict(
        torch.load(model_path, map_location=torch.device(device)))
    model.to(device)

    outputs, extracted_features = engine.predict_fn(
        test_data_loader, model, device, extract_features=FLAGS.features)
    df_test["predicted"] = outputs
    # save file
    df_test.to_csv(output, header=None, index=False)
示例#3
0
def main(_):
    test_file = config.EVAL_PROC
    model_path = config.MODEL_PATH
    if FLAGS.test_file:
        test_file = FLAGS.test_file
    if FLAGS.model_path:
        model_path = FLAGS.model_path
    df_test = pd.read_csv(test_file).fillna("none")

    logger.info(f"Bert Model: {config.BERT_PATH}")
    logger.info(
        f"Current date and time :{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} "
    )
    logger.info(f"Test file: {test_file}")
    logger.info(f"Test size : {len(df_test):.4f}")

    test_dataset = dataset.BERTDataset(review=df_test.text.values,
                                       target=df_test.label.values)

    test_data_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=3)

    device = config.device

    model = BERTBaseUncased()
    model.load_state_dict(
        torch.load(model_path, map_location=torch.device(device)))
    model.to(device)

    outputs, extracted_features = engine.predict_fn(
        test_data_loader, model, device, extract_features=FLAGS.features)
    df_test["predicted"] = outputs
    # save file
    df_test.to_csv(model_path.split("/")[-2] + '.csv',
                   header=None,
                   index=False)

    if FLAGS.features:
        pca = PCA(n_components=50, random_state=7)
        X1 = pca.fit_transform(extracted_features)
        tsne = TSNE(n_components=2,
                    perplexity=10,
                    random_state=6,
                    learning_rate=1000,
                    n_iter=1500)
        X1 = tsne.fit_transform(X1)
        # if row == 0: print("Shape after t-SNE: ", X1.shape)

        X = pd.DataFrame(np.concatenate([X1], axis=1), columns=["x1", "y1"])
        X = X.astype({"x1": float, "y1": float})

        # Plot for layer -1
        plt.figure(figsize=(20, 15))
        p1 = sns.scatterplot(x=X["x1"], y=X["y1"], palette="coolwarm")
        # p1.set_title("development-"+str(row+1)+", layer -1")
        x_texts = []
        for output, value in zip(outputs, df_test.label.values):
            if output == value:
                x_texts.append("@" + label_decoder(output)[0] +
                               label_decoder(output))
            else:
                x_texts.append(
                    label_decoder(value) + "-" + label_decoder(output))

        X["texts"] = x_texts
        # X["texts"] = ["@G" + label_decoder(output) if output == value else "@R-" + label_decoder(value) + "-" + label_decoder(output)
        #               for output, value in zip(outputs, df_test.label.values)]

        # df_test.label.astype(str)
        #([str(output)+"-" + str(value)] for output, value in zip(outputs, df_test.label.values))
        # Label each datapoint with the word it corresponds to
        for line in X.index:
            text = X.loc[line, "texts"] + "-" + str(line)
            if "@U" in text:
                p1.text(X.loc[line, "x1"] + 0.2,
                        X.loc[line, "y1"],
                        text[2:],
                        horizontalalignment='left',
                        size='medium',
                        color='blue',
                        weight='semibold')
            elif "@P" in text:
                p1.text(X.loc[line, "x1"] + 0.2,
                        X.loc[line, "y1"],
                        text[2:],
                        horizontalalignment='left',
                        size='medium',
                        color='green',
                        weight='semibold')
            elif "@N" in text:
                p1.text(X.loc[line, "x1"] + 0.2,
                        X.loc[line, "y1"],
                        text[2:],
                        horizontalalignment='left',
                        size='medium',
                        color='red',
                        weight='semibold')
            else:
                p1.text(X.loc[line, "x1"] + 0.2,
                        X.loc[line, "y1"],
                        text,
                        horizontalalignment='left',
                        size='medium',
                        color='black',
                        weight='semibold')
        plt.show()
        plt.savefig(model_path.split("/")[-2] + '-figure.svg', format="svg")