Пример #1
0
def draw_scatterplot(data_frame, real_column, prediction_column, path, topic):
    data_frame = data_frame.sort_values(real_column)
    sort_id = list(range(0, len(data_frame.index)))
    data_frame['id'] = pd.Series(sort_id).values

    data_frame = fit(data_frame, real_column)
    data_frame = fit(data_frame, prediction_column)

    pearson = pearson_corr(data_frame[real_column].tolist(),
                           data_frame[prediction_column].tolist())
    spearman = spearman_corr(data_frame[real_column].tolist(),
                             data_frame[prediction_column].tolist())
    rmse_value = rmse(data_frame[real_column].tolist(),
                      data_frame[prediction_column].tolist())
    mae = mean_absolute_error(data_frame[real_column].tolist(),
                              data_frame[prediction_column].tolist())

    textstr = 'RMSE=%.4f\nMAE=%.4f\nPearson Correlation=%.4f\nSpearman Correlation=%.4f' % (
        rmse_value, mae, pearson, spearman)

    plt.figure()
    ax = data_frame.plot(kind='scatter',
                         x='id',
                         y=real_column,
                         color='DarkBlue',
                         label='z_mean',
                         title=topic)
    ax = data_frame.plot(kind='scatter',
                         x='id',
                         y=prediction_column,
                         color='DarkGreen',
                         label='predicted z_mean',
                         ax=ax)
    ax.text(0.5 * data_frame.shape[0],
            min(min(data_frame[real_column].tolist()),
                min(data_frame[prediction_column].tolist())),
            textstr,
            fontsize=10)

    fig = ax.get_figure()
    fig.savefig(path)
Пример #2
0
    }).dropna()
    dev_temp = dev_temp.rename(columns={
        'original': 'text_a',
        'translation': 'text_b',
        'hter': 'labels'
    }).dropna()
    test_temp = test_temp.rename(columns={
        'original': 'text_a',
        'translation': 'text_b'
    }).dropna()

    test_sentence_pairs_temp = list(
        map(list,
            zip(test_temp['text_a'].to_list(), test_temp['text_b'].to_list())))

    train_temp = fit(train_temp, 'labels')
    dev_temp = fit(dev_temp, 'labels')

    train_list.append(train_temp)
    dev_list.append(dev_temp)
    test_list.append(test_temp)
    index_list.append(index_temp)
    test_sentence_pairs_list.append(test_sentence_pairs_temp)

train = pd.concat(train_list)

if transformer_config["evaluate_during_training"]:
    if transformer_config["n_fold"] > 1:
        dev_preds_list = []
        test_preds_list = []
Пример #3
0
train = train.rename(columns={
    'original': 'text_a',
    'translation': 'text_b',
    'hter': 'labels'
}).dropna()
dev = dev.rename(columns={
    'original': 'text_a',
    'translation': 'text_b',
    'hter': 'labels'
}).dropna()
test = test.rename(columns={
    'original': 'text_a',
    'translation': 'text_b'
}).dropna()

train = fit(train, 'labels')
dev = fit(dev, 'labels')

if siamese_transformer_config["evaluate_during_training"]:
    if siamese_transformer_config["n_fold"] > 0:
        dev_preds = np.zeros((len(dev), siamese_transformer_config["n_fold"]))
        test_preds = np.zeros(
            (len(test), siamese_transformer_config["n_fold"]))
        for i in range(siamese_transformer_config["n_fold"]):

            if os.path.exists(
                    siamese_transformer_config['best_model_dir']
            ) and os.path.isdir(siamese_transformer_config['best_model_dir']):
                shutil.rmtree(siamese_transformer_config['best_model_dir'])

            if os.path.exists(