def cmd(analyzed_csv, algo_feed): """Produces the smart-broadcasting plots after reading values from ANALYZED_CSV.""" chpt_regex = re.compile(r'-([\d]*)$') df = pd.read_csv('results-algo/top_k-q_0.33-s-fix-adjust_0.csv').dropna() # Derive to which epoch was this instance trained to. df['chpt'] = [int(chpt_regex.search(x)[1]) for x in df.chpt_file] other_key = 'RQ_algo_num_tweets_mean' if algo_feed else 'RQ_num_tweets_mean' # Determine the users for which number of tweets are close enough. index = (np.abs(df['RL_num_tweets_mean'] - df[other_key]) < 2) print(_now(), '{} users are valid.'.format(np.sum(index))) # Setting up output plot_base = './output-plots' os.makedirs(plot_base, exist_ok=True) # Calculating the top-k metric. if algo_feed: baseline_key = 'poisson_top_k_algo_mean' RL_key = 'RL_top_k_algo_mean' RQ_key = 'RQ_algo_top_k_algo_mean' karimi_key = 'karimi_top_k_algo_mean' else: baseline_key = 'poisson_top_k_mean' RL_key = 'RL_top_k_mean' RQ_key = 'RQ_top_k_mean' karimi_key = 'karimi_top_k_mean' baseline = df[baseline_key][index] Y = {} Y['RL'] = df[RL_key][index] / baseline Y['RQ'] = df[RQ_key][index] / baseline Y['karimi'] = df[karimi_key][index] / baseline # Plotting the top-k metric. plt.figure() colors = sns.color_palette(n_colors=3) latexify(fig_width=2.25, largeFonts=False) box = plt.boxplot([Y['RL'], Y['RQ'], Y['karimi']], whis=0, showmeans=True, showfliers=False, showcaps=False, patch_artist=True, medianprops={'linewidth': 1.0}, boxprops={'linewidth': 1.0, 'edgecolor': colors[0], 'facecolor': colors[1], 'alpha': 0.3}, whiskerprops={'linewidth': 0}) for idx in range(len(colors)): box['boxes'][idx].set_facecolor(colors[idx]) box['boxes'][idx].set_edgecolor(colors[idx]) box['means'][idx].set_markersize(5) box['means'][idx].set_markerfacecolor(colors[idx]) box['medians'][idx].set_color(colors[idx]) plt.xticks([1, 2, 3], [r'TPPRL', r'\textsc{RedQueen}', 'Karimi']) plt.tight_layout() format_axes(plt.gca()) plt.savefig(os.path.join(plot_base, 'algo-top-1.pdf'), bbox_inches='tight', pad_inches=0) # Calculating the avg-rank metric if algo_feed: baseline_key = 'poisson_avg_rank_algo_mean' RL_key = 'RL_avg_rank_algo_mean' RQ_key = 'RQ_algo_avg_rank_algo_mean' karimi_key = 'karimi_avg_rank_algo_mean' else: baseline_key = 'poisson_avg_rank_mean' RL_key = 'RL_avg_rank_mean' RQ_key = 'RQ_avg_rank_mean' karimi_key = 'karimi_avg_rank_mean' baseline = df[baseline_key][index] Y = {} Y['RL'] = df[RL_key][index] / baseline Y['RQ'] = df[RQ_key][index] / baseline Y['karimi'] = df[karimi_key][index] / baseline # Plotting the top-k metric. plt.figure() colors = sns.color_palette(n_colors=3) latexify(fig_width=2.25, largeFonts=False) box = plt.boxplot([Y['RL'], Y['RQ'], Y['karimi']], whis=0, showmeans=True, showfliers=False, showcaps=False, patch_artist=True, medianprops={'linewidth': 1.0}, boxprops={'linewidth': 1.0, 'edgecolor': colors[0], 'facecolor': colors[1], 'alpha': 0.3}, whiskerprops={'linewidth': 0}) for idx in range(len(colors)): box['boxes'][idx].set_facecolor(colors[idx]) box['boxes'][idx].set_edgecolor(colors[idx]) box['means'][idx].set_markersize(5) box['means'][idx].set_markerfacecolor(colors[idx]) box['medians'][idx].set_color(colors[idx]) plt.xticks([1, 2, 3], [r'TPPRL', r'\textsc{RedQueen}', 'Karimi']) plt.tight_layout() format_axes(plt.gca()) plt.savefig(os.path.join(plot_base, 'algo-avg-rank.pdf'), bbox_inches='tight', pad_inches=0)
def run(all_user_data_file, user_idx, output_dir, q, N, gpu, reward_kind, K, should_restore, algo_lifetime_frac, hidden_dims, only_cpu, with_summaries, epochs, num_iters, save_every, until, log_device_placement, allow_growth, algo_feed, algo_c, with_approx_rewards, merge_sinks, with_zero_wt): """Read data from `all_user_data`, extract `user_idx` from the array and run code for it.""" assert reward_kind in [EB.R_2_REWARD, EB.TOP_K_REWARD], '"{}" is not recognized as a reward_kind.'.format(reward_kind) save_dir = os.path.join(output_dir, EB.SAVE_DIR_TMPL.format(user_idx)) if not os.path.exists(save_dir) and should_restore: warnings.warn('{} does not exist, will NOT RESTORE.'.format(save_dir)) with open(all_user_data_file, 'rb') as f: all_user_data = dill.load(f) one_user_data = all_user_data[user_idx] if merge_sinks: print(_now(), 'Merging the sinks!') one_user_data = RDU.merge_sinks(one_user_data) print(_now(), 'Making the trainer ...') sim_opts = one_user_data['sim_opts'].update({'q': q}) num_other_broadcasters = len(sim_opts.other_sources) num_followers = len(sim_opts.sink_ids) # These parameters can also be made arguments, if needed. max_events = 50000 reward_time_steps = 1000 decay_steps = 1 with_baseline = True batch_size = 16 trainer_opts_seed = 42 trainer_opts = EB.mk_def_exp_recurrent_trainer_opts( seed=trainer_opts_seed, device_gpu=gpu, hidden_dims=hidden_dims, num_other_broadcasters=num_other_broadcasters, only_cpu=only_cpu, max_events=max_events, reward_top_k=K, reward_kind=reward_kind, batch_size=batch_size, decay_steps=decay_steps, num_followers=num_followers, with_baseline=with_baseline, summary_dir=os.path.join(output_dir, 'train-summary-user_idx-{}/train'.format(user_idx)), save_dir=save_dir, set_wt_zero=with_zero_wt, ) config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=log_device_placement ) config.gpu_options.allow_growth = allow_growth sess = tf.Session(config=config) trainer = EB.ExpRecurrentTrainer( sim_opts=sim_opts, _opts=trainer_opts, sess=sess ) print(_now(), 'trainer made.') sink_ids = one_user_data['sim_opts'].sink_ids window_len = (one_user_data['duration'] / one_user_data['num_other_posts']) * N lifetimes = defaultdict(lambda: algo_lifetime_frac * window_len) algo_feed_seed = 42 + 1 algo_feed_args = ES.make_freq_prefs( one_user_data=one_user_data, sink_ids=sink_ids, src_lifetime_dict=lifetimes ) user_opt_dict = {} user_opt_dict['trainer_opts_dict'] = trainer_opts._get_dict() user_opt_dict['num_other_broadcasters'] = len(trainer.sim_opts.other_sources) user_opt_dict['hidden_dims'] = trainer.num_hidden_states user_opt_dict['num_followers'] = len(trainer.sim_opts.sink_ids) user_opt_dict['seed'] = trainer_opts_seed user_opt_dict['algo_feed'] = algo_feed user_opt_dict['algo_feed_seed'] = algo_feed_seed user_opt_dict['algo_feed_args'] = algo_feed_args user_opt_dict['algo_c'] = algo_c user_opt_dict['algo_with_approx_rewards'] = with_approx_rewards user_opt_dict['algo_reward_time_steps'] = reward_time_steps # Needed for experiments later user_opt_dict['N'] = N user_opt_dict['q'] = q os.makedirs(trainer.save_dir, exist_ok=True) with open(os.path.join(trainer.save_dir, 'user_opt_dict.dill'), 'wb') as f: dill.dump(user_opt_dict, f) trainer.initialize(finalize=True) if should_restore and os.path.exists(save_dir): try: trainer.restore() except (FileNotFoundError, AttributeError): warnings.warn('"{}" exists, but no save files were found. Not restoring.' .format(save_dir)) global_steps = trainer.sess.run(trainer.global_step) if global_steps > until: print( _now(), 'Have already run {} > {} iterations, not going further.' .format(global_steps, until) ) op_dir = os.path.join(output_dir, 'u_data-user_idx-{}/'.format(user_idx)) os.makedirs(op_dir, exist_ok=True) # start_time, end_time = one_user_data['user_event_times'][0], one_user_data['user_event_times'][-1] if algo_feed: u_datas = [EB.get_real_data_eval_algo( trainer=trainer, one_user_data=one_user_data, N=N, batch_c=algo_c, algo_feed_args=algo_feed_args, reward_time_steps=reward_time_steps, with_approx_rewards=with_approx_rewards )] else: u_datas = [EB.get_real_data_eval(trainer, one_user_data, N=N, with_red_queen=True)] log_eval(u_datas[-1]) for epoch in range(epochs): # Ensure that the output is pushed to the SLURM file. sys.stdout.flush() step = trainer.sess.run(trainer.global_step) with_df = (epoch == epochs - 1) or (step > until) if algo_feed: EB.train_real_data_algo( trainer=trainer, N=N, one_user_data=one_user_data, num_iters=num_iters, init_seed=42 + user_idx, algo_feed_args=algo_feed_args, with_summaries=with_summaries, with_approx_rewards=with_approx_rewards, batch_c=algo_c, reward_time_steps=reward_time_steps, ) u_datas.append( EB.get_real_data_eval_algo( trainer=trainer, one_user_data=one_user_data, N=N, with_df=with_df, algo_feed_args=algo_feed_args, reward_time_steps=reward_time_steps, with_approx_rewards=with_approx_rewards, batch_c=algo_c, ) ) else: EB.train_real_data( trainer, N=N, one_user_data=one_user_data, num_iters=num_iters, init_seed=42 + user_idx, with_summaries=with_summaries ) u_datas.append( EB.get_real_data_eval( trainer, one_user_data, N=N, with_red_queen=True, with_df=with_df ) ) log_eval(u_datas[-1]) if (epoch + 1) % save_every == 0 or with_df: file_name = 'u_data-{}.dill' if not with_df else 'u_data-{}-final.dill' op_file_name = os.path.join(op_dir, file_name.format(step)) with open(op_file_name, 'wb') as f: dill.dump(u_datas, f) print(_now(), 'Saved: {}'.format(op_file_name)) if step > until: print( _now(), 'Have already run {} > {} iterations, not going further.' .format(step, until) ) break
def cmd(initial_difficulty_csv, alpha, beta, save_dir, T, tau, only_cpu, batches, verbose): """Read the initial difficulty of items from INITIAL_DIFFICULTY_CSV, use the ALPHA and BETA specified, restore the teacher model from the given SAVE_DIR and compare the performance of the method against various baselines.""" with open(initial_difficulty_csv, 'r') as f: n_0s = [float(x.strip()) for x in f.readline().split(',')] num_items = len(n_0s) init_seed = 1337 scenario_opts = { 'T': T, 'tau': tau, 'n_0s': n_0s, 'alphas': np.ones(num_items) * alpha, 'betas': np.ones(num_items) * beta, } summary_dir = None teacher_opts = ET.mk_def_teacher_opts( num_items=num_items, hidden_dims=8, save_dir=save_dir, only_cpu=only_cpu, T=T, tau=tau, scenario_opts=scenario_opts, # The values here do not matter because we will not be training # the NN here. summary_dir=summary_dir, learning_rate=0.02, decay_rate=0.02, batch_size=32, q=0.0001, q_entropy=0.002, learning_bump=1.0, decay_steps=10, ) config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False ) config.gpu_options.allow_growth = True sess = tf.Session(config=config) teacher = ET.ExpRecurrentTeacher( _opts=teacher_opts, sess=sess, num_items=num_items ) teacher.initialize(finalize=True) # Restores to the latest version. teacher.restore() global_steps = teacher.sess.run(teacher.global_step) if verbose: print(_now(), "Restored successfully to step {}.".format(global_steps)) # Evaluating the performance of RL. _f_d, RL_test_scens = ET.get_test_feed_dicts(teacher, range(init_seed, init_seed + batches)) RL_rewards = [s.reward() for s in RL_test_scens] num_test_reviews = np.mean([x.get_num_events() for x in RL_test_scens]) # Performance using uniform baseline rets_unif = [ ET.uniform_random_baseline( scenario_opts, target_reviews=num_test_reviews, seed=seed + 8, verbose=False ) for seed in range(init_seed, init_seed + batches) ] # Performance if using Memorize. q_MEM = ET.sweep_memorize_q(scenario_opts, num_test_reviews, q_init=1.0, verbose=verbose) rets_mem = [ ET.memorize_baseline( scenario_opts, q_max=q_MEM, seed=seed + 8, verbose=False) for seed in range(init_seed, init_seed + batches) ] # Plotting reward (i.e. recall at T + tau) plt.figure() latexify(fig_width=2.25, largeFonts=False) colors = sns.color_palette(n_colors=3) Y = { 'RL': RL_rewards, 'MEM': [x['reward'] / (-100) for x in rets_mem], 'Uniform': [[x['reward'] / (-100) for x in rets_unif]], } box = plt.boxplot([Y['RL'], Y['MEM'], Y['Uniform']], whis=0, showmeans=True, showfliers=False, showcaps=False, patch_artist=True, medianprops={'linewidth': 1.0}, boxprops={'linewidth': 1.0, 'edgecolor': colors[0], 'facecolor': colors[1], 'alpha': 0.3}, whiskerprops={'linewidth': 0}) for idx in range(len(colors)): box['boxes'][idx].set_facecolor(colors[idx]) box['boxes'][idx].set_edgecolor(colors[idx]) box['means'][idx].set_markersize(5) box['means'][idx].set_markerfacecolor(colors[idx]) box['medians'][idx].set_color(colors[idx]) plt.yticks([0.0, 0.25, 0.50], ['0\%', '25\%', '50\%']) plt.xticks([1, 2, 3], [r'\textsc{TPPRL}', r'\textsc{Memorize}', 'Uniform']) plt.tight_layout() format_axes(plt.gca()) plot_base = './output-plots/' os.makedirs(plot_base, exist_ok=True) plt.savefig(os.path.join(plot_base, 'recall-results-{}-{}.pdf'.format(T, tau)), bbox_inches='tight', pad_inches=0) # Plotting item difficulty plt.figure() latexify(fig_width=2.25, largeFonts=False) colors = sns.color_palette(n_colors=3) Y = { 'RL': [scenario_opts['n_0s'][item] for x in RL_test_scens for item in x.items], 'MEM': [scenario_opts['n_0s'][item] for x in rets_mem for item, _ in x['review_timings']], 'Uniform': [scenario_opts['n_0s'][item] for x in rets_unif for item, _ in x['review_timings']] } box = plt.boxplot([Y['RL'], Y['MEM'], Y['Uniform']], whis=0, showmeans=True, showfliers=False, showcaps=False, patch_artist=True, medianprops={'linewidth': 1.0}, boxprops={'linewidth': 1.0, 'edgecolor': colors[0], 'facecolor': colors[1], 'alpha': 0.3}, whiskerprops={'linewidth': 0}) for idx in range(len(colors)): box['boxes'][idx].set_facecolor(colors[idx]) box['boxes'][idx].set_edgecolor(colors[idx]) box['means'][idx].set_markersize(5) box['means'][idx].set_markerfacecolor(colors[idx]) box['medians'][idx].set_color(colors[idx]) plt.xticks([1, 2, 3], [r'\textsc{TPPRL}', r'\textsc{Memorize}', 'Uniform']) plt.tight_layout() format_axes(plt.gca()) plt.savefig(os.path.join(plot_base, 'item-difficulty.pdf'), bbox_inches='tight', pad_inches=0) # Plotting reviews per day RL_times = [np.floor(t) for s in RL_test_scens for t in np.cumsum(s.time_deltas)] MEM_times = [np.floor(t) for x in rets_mem for _, t in x['review_timings']] plt.figure() latexify(fig_width=2.25, largeFonts=False) c1, c2 = sns.color_palette(n_colors=2) f, (a1, a2) = plt.subplots(2, 1) a1.hist(RL_times, bins=np.arange(T + 1), density=True, color=c1, alpha=0.5, label='RL') a1.set_yticks([.04, .08]) a1.set_yticklabels([r'4\%', r'8\%']) a1.set_ylabel('TPPRL') a1.set_ylim([0.04, 0.08]) a1.set_xticks([0.5, 3.5, 6.5, 9.5, 13.5]) a1.set_xticklabels([1, 4, 7, 10, 14]) format_axes(a1) a2.hist(MEM_times, bins=np.arange(T + 1), density=True, color=c2, alpha=0.5, label=r'\textsc{Mem}') a2.set_yticks([0, .04, .08], [r'0\%', r'4\%', r'8\%']) a2.set_xticks([0.5, 3.5, 6.5, 9.5, 13.5]) a2.set_xticklabels([1, 4, 7, 10, 14]) a2.set_ylabel(r'\textsc{Memorize}') a2.set_ylim([0.04, 0.08]) a2.set_yticks([.04, .08]) a2.set_yticklabels([r'4\%', r'8\%']) format_axes(a2) # plt.legend(ncol=2, bbox_to_anchor=(0, 0, 1, 1.1)) plt.tight_layout() plt.savefig(os.path.join(plot_base, 'reviews-every-day.pdf'), bbox_inches='tight', pad_inches=0) print(_now(), 'Done.')
def cmd(initial_difficulty_csv, alpha, beta, output_dir, should_restore, T, tau, with_summaries, summary_suffix, only_cpu, q, q_entropy, epochs, num_iters, save_every, until, with_MP, with_recall_probs, with_zero_wt): """Read initial difficulty of items from INITIAL_DIFFICULTY_CSV, ALPHA and BETA, train an optimal teacher and save the results to output_dir.""" with open(initial_difficulty_csv, 'r') as f: n_0s = [float(x.strip()) for x in f.readline().split(',')] num_items = len(n_0s) scenario_opts = { 'T': T, 'tau': tau, 'n_0s': n_0s, 'alphas': np.ones(num_items) * alpha, 'betas': np.ones(num_items) * beta, } summary_dir = os.path.join(output_dir, 'summary/train-{}'.format(summary_suffix)) save_dir = os.path.join(output_dir, 'save/') os.makedirs(summary_dir, exist_ok=True) os.makedirs(save_dir, exist_ok=True) teacher_opts = ET.mk_def_teacher_opts( num_items=num_items, hidden_dims=8, learning_rate=0.02, decay_rate=0.02, summary_dir=summary_dir, save_dir=save_dir, batch_size=32, only_cpu=only_cpu, T=T, tau=tau, q=q, q_entropy=q_entropy, learning_bump=1.0, decay_steps=10, scenario_opts=scenario_opts, set_wt_zero=with_zero_wt, ) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) teacher = ET.ExpRecurrentTeacher(_opts=teacher_opts, sess=sess, num_items=num_items) teacher.initialize(finalize=True) if should_restore and os.path.exists(save_dir): try: teacher.restore() global_steps = teacher.sess.run(teacher.global_step) print(_now(), "Restored successfully to step {}.".format(global_steps)) except (FileNotFoundError, AttributeError): warnings.warn( '"{}" exists, but no save files were found. Not restoring.'. format(save_dir)) global_steps = teacher.sess.run(teacher.global_step) if global_steps > until: print( _now(), 'Have already run {} > {} iterations, not going further.'.format( global_steps, until)) for epoch in range(epochs): sys.stdout.flush() teacher.train_many( num_iters=num_iters, init_seed=42, with_summaries=with_summaries, with_MP=with_MP, with_memorize_loss=False, save_every=save_every, with_recall_probs=with_recall_probs, ) step = teacher.sess.run(teacher.global_step) if step > until: print( _now(), 'Have already run {} > {} iterations, not going further.'. format(step, until)) break