def quanteval_plot_ind(model_folder,
                       prefix_dir='',
                       lower=False,
                       verbose=False,
                       debug=False):
    # plotting many scores over time for word similarity

    ckpt_nums, ckpt_names = find_list_ckpts(model_folder,
                                            prefix_dir=prefix_dir)
    if debug:
        ckpt_nums = ckpt_nums[:3]
        ckpt_names = ckpt_names[:3]
    scores_long = []
    ckpt_nums_long = []
    labels_long = []
    for ckpt_num, ckpt_name in zip(ckpt_nums, ckpt_names):
        #print ckpt_name
        sp_corrs = quantitative_eval(
            [(model_folder, model_folder)],
            ckpt_files=[ckpt_name],
            prefix_dir=prefix_dir,
            metric_funcs=['max'],  # we should allow the ability to change this
            lower=lower,
            verbose=verbose)

        scores = sp_corrs[model_folder + '/max'].tolist()
        scores_long = scores_long + scores
        ckpt_nums_long = ckpt_nums_long + len(eval_datasets_names) * [ckpt_num]
        labels_long = labels_long + eval_datasets_names

        # Next, add evaluation for SCWS
        df_sp = quantitative_scws_df(model_folder,
                                     prefix_dir,
                                     ckpt_file=ckpt_name,
                                     verbose=verbose)
        labels_long += df_sp['method'].tolist()
        ckpt_nums_long += len(df_sp) * [ckpt_num]
        scores_long += df_sp['spearman'].tolist()

        # Next, add the average of all scores
        labels_long += ['AVERAGE']
        ckpt_nums_long += [ckpt_num]
        scores_long += [np.mean(np.array(scores))]  # add more scores

    df = pd.DataFrame()
    df['x'] = ckpt_nums_long
    df['scores'] = scores_long
    df['dataset'] = labels_long
    plot = (ggplot(aes(x='x', y='scores', color='dataset'), data=df) +
            geom_point(size=5) + geom_line() +
            ggtitle("Scores as time progress"))
    return plot, df
Пример #2
0
def quantitative_eval_over_time(model_folder, prefix_dir='', lower=False):
  # This is using max cosine similarity
  ckpt_nums, ckpt_names = find_list_ckpts(model_folder, prefix_dir=prefix_dir)
  scores = []
  for ckpt_num, ckpt_name in zip(ckpt_nums, ckpt_names):
      print(ckpt_name)
      sp_corrs = quantitative_eval([(model_folder, model_folder)], [ckpt_name], prefix_dir=prefix_dir,
        lower=lower)
      sum_score = sum(sp_corrs[model_folder + '/max'])
      scores.append(sum_score)
  df = pd.DataFrame()
  df['x'] = ckpt_nums
  df['scores'] = scores
  plot = (ggplot(aes(x='x', y='scores'), data=df)
                    + geom_point(size=5)
                    + geom_line()
                    + ggtitle("Scores as time progress")
                    )
  return plot, df