def draw_scatterplot(data_frame, real_column, prediction_column, path, topic): data_frame = data_frame.sort_values(real_column) sort_id = list(range(0, len(data_frame.index))) data_frame['id'] = pd.Series(sort_id).values data_frame = fit(data_frame, real_column) data_frame = fit(data_frame, prediction_column) pearson = pearson_corr(data_frame[real_column].tolist(), data_frame[prediction_column].tolist()) spearman = spearman_corr(data_frame[real_column].tolist(), data_frame[prediction_column].tolist()) rmse_value = rmse(data_frame[real_column].tolist(), data_frame[prediction_column].tolist()) mae = mean_absolute_error(data_frame[real_column].tolist(), data_frame[prediction_column].tolist()) textstr = 'RMSE=%.4f\nMAE=%.4f\nPearson Correlation=%.4f\nSpearman Correlation=%.4f' % ( rmse_value, mae, pearson, spearman) plt.figure() ax = data_frame.plot(kind='scatter', x='id', y=real_column, color='DarkBlue', label='z_mean', title=topic) ax = data_frame.plot(kind='scatter', x='id', y=prediction_column, color='DarkGreen', label='predicted z_mean', ax=ax) ax.text(0.5 * data_frame.shape[0], min(min(data_frame[real_column].tolist()), min(data_frame[prediction_column].tolist())), textstr, fontsize=10) fig = ax.get_figure() fig.savefig(path)
dev = dev.rename(columns={ 'original': 'text_a', 'translation': 'text_b', 'z_mean': 'labels' }).dropna() test = test.rename(columns={ 'original': 'text_a', 'translation': 'text_b' }).dropna() dev_sentence_pairs = list( map(list, zip(dev['text_a'].to_list(), dev['text_b'].to_list()))) test_sentence_pairs = list( map(list, zip(test['text_a'].to_list(), test['text_b'].to_list()))) train = fit(train, 'labels') dev = fit(dev, 'labels') assert (len(index) == 1000) if siamesetransquest_config["evaluate_during_training"]: if siamesetransquest_config["n_fold"] > 0: dev_preds = np.zeros((len(dev), siamesetransquest_config["n_fold"])) test_preds = np.zeros((len(test), siamesetransquest_config["n_fold"])) for i in range(siamesetransquest_config["n_fold"]): if os.path.exists(siamesetransquest_config['best_model_dir'] ) and os.path.isdir( siamesetransquest_config['best_model_dir']): shutil.rmtree(siamesetransquest_config['best_model_dir']) if os.path.exists(
}).dropna() dev_temp = dev_temp.rename(columns={ 'original': 'text_a', 'translation': 'text_b', 'z_mean': 'labels' }).dropna() test_temp = test_temp.rename(columns={ 'original': 'text_a', 'translation': 'text_b' }).dropna() test_sentence_pairs_temp = list( map(list, zip(test_temp['text_a'].to_list(), test_temp['text_b'].to_list()))) train_temp = fit(train_temp, 'labels') dev_temp = fit(dev_temp, 'labels') train_list.append(train_temp) dev_list.append(dev_temp) test_list.append(test_temp) index_list.append(index_temp) test_sentence_pairs_list.append(test_sentence_pairs_temp) train = pd.concat(train_list) if monotransquest_config["evaluate_during_training"]: if monotransquest_config["n_fold"] > 1: dev_preds_list = [] test_preds_list = []