def eps_greedy_sampling(td_ct, eval_td_ct, args, conf, summary_writer, epoch_id): """eps_greedy_sampling""" dataset = NpzDataset(conf.test_npz_list, conf.npz_config_path, conf.requested_npz_names, if_random_shuffle=False) data_gen = dataset.get_data_generator(conf.batch_size) list_reward = [] last_batch_data = BatchData(conf, data_gen.next()) batch_id = 0 for tensor_dict in data_gen: ### eps_greedy_sampling batch_data = BatchData(conf, tensor_dict) batch_data.set_decode_len(batch_data.seq_lens()) batch_data.expand_candidates(last_batch_data, batch_data.seq_lens()) fetch_dict = td_ct.eps_greedy_sampling( GenRLFeedConvertor.eps_greedy_sampling(batch_data, eps=0)) sampled_id = np.array(fetch_dict['sampled_id']).reshape([-1]) order = sequence_unconcat(sampled_id, batch_data.decode_len()) ### get reward reordered_batch_data = batch_data.get_reordered(order) fetch_dict = eval_td_ct.inference( EvalFeedConvertor.inference(reordered_batch_data)) reward = np.array(fetch_dict['click_prob'])[:, 1] ### logging list_reward.append(np.mean(reward)) if batch_id == 100: break last_batch_data = BatchData(conf, tensor_dict) batch_id += 1 add_scalar_summary(summary_writer, epoch_id, 'eps_greedy_sampling/reward-%s' % args.eval_exp, np.mean(list_reward))
def test(td_ct, args, conf, summary_writer, epoch_id): """eval auc on the full test dataset""" dataset = NpzDataset(conf.test_npz_list, conf.npz_config_path, conf.requested_npz_names, if_random_shuffle=False) data_gen = dataset.get_data_generator(conf.batch_size) auc_metric = AUCMetrics() seq_rmse_metric = SequenceRMSEMetrics() seq_correlation_metric = SequenceCorrelationMetrics() batch_id = 0 for tensor_dict in data_gen: batch_data = BatchData(conf, tensor_dict) fetch_dict = td_ct.test(EvalFeedConvertor.train_test(batch_data)) click_id = np.array(fetch_dict['click_id']).flatten() click_prob = np.array(fetch_dict['click_prob'])[:, 1] click_id_unconcat = sequence_unconcat(click_id, batch_data.seq_lens()) click_prob_unconcat = sequence_unconcat(click_prob, batch_data.seq_lens()) auc_metric.add(labels=click_id, y_scores=click_prob) for sub_click_id, sub_click_prob in zip(click_id_unconcat, click_prob_unconcat): seq_rmse_metric.add(labels=sub_click_id, preds=sub_click_prob) seq_correlation_metric.add(labels=sub_click_id, preds=sub_click_prob) batch_id += 1 add_scalar_summary(summary_writer, epoch_id, 'test/auc', auc_metric.overall_auc()) add_scalar_summary(summary_writer, epoch_id, 'test/seq_rmse', seq_rmse_metric.overall_rmse()) add_scalar_summary(summary_writer, epoch_id, 'test/seq_correlation', seq_correlation_metric.overall_correlation())
def test(ct, args, conf, summary_writer, epoch_id, item_shuffle=False): """eval auc on the full test dataset""" dataset = NpzDataset(args.test_npz_list, conf.npz_config_path, conf.requested_names, if_random_shuffle=True) data_gen = dataset.get_data_generator(conf.batch_size) click_rmse_metric = RMSEMetrics() click_accu_metric = AccuracyMetrics() for batch_id, tensor_dict in enumerate( threaded_generator(data_gen, capacity=100)): batch_data = BatchData(conf, tensor_dict) fetch_dict = ct.train(SimFeedConvertor.train_test(batch_data)) click_id = np.array(fetch_dict['click_id']).flatten() click_score = click_prob_2_score(np.array( fetch_dict['click_prob'])).flatten() click_rmse_metric.add(labels=click_id, preds=click_score) click_accu_metric.add(labels=click_id, probs=np.array(fetch_dict['click_prob'])) add_scalar_summary(summary_writer, epoch_id, 'test/click_rmse', click_rmse_metric.overall_rmse()) add_scalar_summary(summary_writer, epoch_id, 'test/click_accuracy', click_accu_metric.overall_accuracy()) for key, value in click_accu_metric.overall_metrics().items(): add_scalar_summary(summary_writer, epoch_id, 'test/%s' % key, value)
def online_inference_for_test(args, epoch_id, max_steps, ct_sim, dict_gen_ct, summary_writer): """ Do inference on the test test. """ sim_conf = ct_sim.alg.model.conf dataset = NpzDataset(args.test_npz_list, sim_conf.npz_config_path, sim_conf.requested_names, if_random_shuffle=False, one_pass=True) data_gen = dataset.get_data_generator(sim_conf.batch_size) thread_data_gen = threaded_generator(data_gen, capacity=100) list_sim_responses = [] ### online inference last_batch_data = BatchData(sim_conf, thread_data_gen.next()) for batch_id, tensor_dict in enumerate(thread_data_gen): if batch_id > max_steps: break np.random.seed(batch_id) batch_data = BatchData(sim_conf, tensor_dict) batch_data.set_decode_len(batch_data.seq_lens()) batch_data.expand_candidates(last_batch_data, batch_data.seq_lens()) np.random.seed(None) del batch_data.tensor_dict['click_id'] orders, sim_responses = inference_one_batch(args.gen_type, ct_sim, dict_gen_ct, batch_data, eps=0) # , (b, decode_len) # save to replay memory sim_batch_data = batch_data.get_reordered(orders, sim_responses) list_sim_responses.append(sim_responses) last_batch_data = BatchData(sim_conf, tensor_dict) if batch_id % 100 == 0: logging.info('inference test batch %d' % batch_id) list_sum_response = np.sum(np.concatenate(list_sim_responses, 0), 1) # (b,) add_scalar_summary(summary_writer, epoch_id, 'inference/test_sim_responses', np.mean(list_sum_response))
def train(ct, args, conf, summary_writer, epoch_id): """train for conf.train_interval steps""" dataset = NpzDataset(args.train_npz_list, conf.npz_config_path, conf.requested_names, if_random_shuffle=True) data_gen = dataset.get_data_generator(conf.batch_size) list_loss = [] list_epoch_loss = [] for batch_id, tensor_dict in enumerate( threaded_generator(data_gen, capacity=100)): batch_data = BatchData(conf, tensor_dict) fetch_dict = ct.train(SimFeedConvertor.train_test(batch_data)) list_loss.append(np.array(fetch_dict['loss'])) list_epoch_loss.append(np.mean(np.array(fetch_dict['loss']))) if batch_id % conf.prt_interval == 0: logging.info('batch_id:%d loss:%f' % (batch_id, np.mean(list_loss))) list_loss = [] add_scalar_summary(summary_writer, epoch_id, 'train/loss', np.mean(list_epoch_loss))
def online_inference(args, epoch_id, max_steps, data_gen, ct_sim, dict_gen_ct, summary_writer, if_print=True): """ Do inference for `max_steps` batches. """ sim_conf = ct_sim.alg.model.conf replay_memory = [] list_sim_responses = [] ### online inference last_batch_data = BatchData(sim_conf, data_gen.next()) for batch_id in range(max_steps): np.random.seed(epoch_id * max_steps + batch_id) tensor_dict = data_gen.next() batch_data = BatchData(sim_conf, tensor_dict) batch_data.set_decode_len(batch_data.seq_lens()) batch_data.expand_candidates(last_batch_data, batch_data.seq_lens()) np.random.seed(None) del batch_data.tensor_dict['click_id'] if batch_data.batch_size() == 1: # otherwise, rl will crash continue orders, sim_responses = inference_one_batch(args.gen_type, ct_sim, dict_gen_ct, batch_data, eps=args.infer_eps) # , (b, decode_len) # save to replay memory sim_batch_data = batch_data.get_reordered(orders, sim_responses) replay_memory.append(sim_batch_data) list_sim_responses.append(sim_responses) last_batch_data = BatchData(sim_conf, tensor_dict) if batch_id % 100 == 0 and if_print: logging.info('inference epoch %d batch %d' % (epoch_id, batch_id)) if if_print: list_sum_response = np.sum(np.concatenate(list_sim_responses, 0), 1) # (b,) add_scalar_summary(summary_writer, epoch_id, 'inference/sim_responses', np.mean(list_sum_response)) return replay_memory
def train(td_ct, args, conf, summary_writer, epoch_id): """train for conf.train_interval steps""" dataset = NpzDataset(conf.train_npz_list, conf.npz_config_path, conf.requested_npz_names, if_random_shuffle=True) data_gen = dataset.get_data_generator(conf.batch_size) list_epoch_loss = [] list_loss = [] batch_id = 0 for tensor_dict in data_gen: batch_data = BatchData(conf, tensor_dict) fetch_dict = td_ct.train(GenSLFeedConvertor.train_test(batch_data)) list_loss.append(np.array(fetch_dict['loss'])) list_epoch_loss.append(np.mean(np.array(fetch_dict['loss']))) if batch_id % conf.prt_interval == 0: logging.info('batch_id:%d loss:%f' % (batch_id, np.mean(list_loss))) list_loss = [] batch_id += 1 add_scalar_summary(summary_writer, epoch_id, 'train/loss', np.mean(list_epoch_loss))
def log_train(td_ct, args, conf, summary_writer, replay_memory, epoch_id): """train""" dataset = NpzDataset(conf.train_npz_list, conf.npz_config_path, conf.requested_npz_names, if_random_shuffle=True) data_gen = dataset.get_data_generator(conf.batch_size) list_reward = [] list_loss = [] list_first_Q = [] guessed_batch_num = 11500 batch_id = 0 last_batch_data = BatchData(conf, data_gen.next()) for tensor_dict in data_gen: ### eps_greedy_sampling batch_data = BatchData(conf, tensor_dict) batch_data.set_decode_len(batch_data.seq_lens()) order = [np.arange(d) for d in batch_data.decode_len()] ### get reward reordered_batch_data = batch_data.get_reordered(order) reordered_batch_data.set_decode_len(batch_data.decode_len()) reward = batch_data.tensor_dict['click_id'].values ### save to replay_memory replay_memory.append((reordered_batch_data, reward)) ### train memory_batch_data, reward = replay_memory[np.random.randint( len(replay_memory))] feed_dict = GenRLFeedConvertor.train_test(memory_batch_data, reward) fetch_dict = td_ct.train(feed_dict) ### logging list_reward.append(np.mean(reward)) list_loss.append(np.array(fetch_dict['loss'])) list_first_Q.append(np.mean(np.array(fetch_dict['c_Q'])[0])) if batch_id % 10 == 0: global_batch_id = epoch_id * guessed_batch_num + batch_id add_scalar_summary(summary_writer, global_batch_id, 'train/rl_reward', np.mean(list_reward)) add_scalar_summary(summary_writer, global_batch_id, 'train/rl_loss', np.mean(list_loss)) add_scalar_summary(summary_writer, global_batch_id, 'train/rl_1st_Q', np.mean(list_first_Q)) list_reward = [] list_loss = [] list_first_Q = [] last_batch_data = BatchData(conf, tensor_dict) batch_id += 1
def train(td_ct, eval_td_ct, args, conf, summary_writer, replay_memory, epoch_id): """train""" dataset = NpzDataset(conf.train_npz_list, conf.npz_config_path, conf.requested_npz_names, if_random_shuffle=True) data_gen = dataset.get_data_generator(conf.batch_size) list_reward = [] list_loss = [] list_first_Q = [] guessed_batch_num = 11500 batch_id = 0 last_batch_data = BatchData(conf, data_gen.next()) for tensor_dict in data_gen: ### eps_greedy_sampling batch_data = BatchData(conf, tensor_dict) batch_data.set_decode_len(batch_data.seq_lens()) batch_data.expand_candidates(last_batch_data, batch_data.seq_lens()) fetch_dict = td_ct.eps_greedy_sampling( GenRLFeedConvertor.eps_greedy_sampling(batch_data, eps=0.2)) sampled_id = np.array(fetch_dict['sampled_id']).reshape([-1]) order = sequence_unconcat(sampled_id, batch_data.decode_len()) ### get reward reordered_batch_data = batch_data.get_reordered(order) fetch_dict = eval_td_ct.inference( EvalFeedConvertor.inference(reordered_batch_data)) reward = np.array(fetch_dict['click_prob'])[:, 1] ### save to replay_memory reordered_batch_data2 = batch_data.get_reordered_keep_candidate(order) reordered_batch_data2.set_decode_len(batch_data.decode_len()) replay_memory.append((reordered_batch_data2, reward)) ### train memory_batch_data, reward = replay_memory[np.random.randint( len(replay_memory))] feed_dict = GenRLFeedConvertor.train_test(memory_batch_data, reward) fetch_dict = td_ct.train(feed_dict) ### logging list_reward.append(np.mean(reward)) list_loss.append(np.array(fetch_dict['loss'])) list_first_Q.append(np.mean(np.array(fetch_dict['c_Q'])[0])) if batch_id % 10 == 0: global_batch_id = epoch_id * guessed_batch_num + batch_id add_scalar_summary(summary_writer, global_batch_id, 'train/rl_reward', np.mean(list_reward)) add_scalar_summary(summary_writer, global_batch_id, 'train/rl_loss', np.mean(list_loss)) add_scalar_summary(summary_writer, global_batch_id, 'train/rl_1st_Q', np.mean(list_first_Q)) list_reward = [] list_loss = [] list_first_Q = [] last_batch_data = BatchData(conf, tensor_dict) batch_id += 1
def offline_training(args, epoch_id, replay_memory, dict_gen_ct, summary_writer, if_save=True, env_rl_data_gen=None): """ Do offline train on the replay_memory. """ ### offline train env model if args.gen_type in ['env', 'env_credit', 'env_rl']: list_loss = [] for sim_batch_data in replay_memory: ct_env = dict_gen_ct['env'] fetch_dict = ct_env.train(EnvFeedConvertor.train_test(sim_batch_data, args.env_item_dropout_rate, ct_env.alg.model.conf)) list_loss.append(np.array(fetch_dict['loss'])) if if_save: add_scalar_summary(summary_writer, epoch_id, 'train/env_loss', np.mean(list_loss)) ct_env.save_model(epoch_id) ### offline train credit if args.gen_type == 'env_credit': if args.credit_type == 'gt_globbase': globbase = get_globbase(replay_memory) # (seq_len,) print('globbase', globbase.tolist()) else: globbase = None list_loss = [] for sim_batch_data in replay_memory: ct_env = dict_gen_ct['env'] ct_credit = dict_gen_ct['credit'] credit = generate_credit_one_batch(ct_env, sim_batch_data, credit_type=args.credit_type, credit_gamma=args.credit_gamma, globbase=globbase) fetch_dict = ct_credit.train(CreditFeedConvertor.train_test(sim_batch_data, credit, ct_credit.alg.model.conf)) list_loss.append(np.array(fetch_dict['loss'])) if if_save: add_scalar_summary(summary_writer, epoch_id, 'train/credit_loss', np.mean(list_loss)) ct_credit.save_model(epoch_id) ### offline train mc_credit if args.gen_type == 'mc_credit': list_loss = [] for sim_batch_data in replay_memory: ct_credit = dict_gen_ct['credit'] credit = generate_credit_one_batch(None, sim_batch_data, credit_type='mc', credit_gamma=args.credit_gamma, globbase=None) fetch_dict = ct_credit.train(CreditFeedConvertor.train_test(sim_batch_data, credit, ct_credit.alg.model.conf)) list_loss.append(np.array(fetch_dict['loss'])) if if_save: add_scalar_summary(summary_writer, epoch_id, 'train/credit_loss', np.mean(list_loss)) ct_credit.save_model(epoch_id) ### offline train rl if args.gen_type == 'rl': list_loss = [] for sim_batch_data in replay_memory: ct_rl = dict_gen_ct['rl'] fetch_dict = ct_rl.train(RLFeedConvertor.train_test(sim_batch_data, ct_rl.alg.model.conf)) list_loss.append(np.array(fetch_dict['loss'])) if if_save: add_scalar_summary(summary_writer, epoch_id, 'train/rl_loss', np.mean(list_loss)) ct_rl.save_model(epoch_id) ### offline train ddpg if args.gen_type == 'ddpg': list_actor_loss = [] list_critic_loss = [] for batch_id, sim_batch_data in enumerate(replay_memory): ct_ddpg = dict_gen_ct['ddpg'] fetch_dict = ct_ddpg.train(DDPGFeedConvertor.train_test(sim_batch_data, ct_ddpg.alg.model.conf)) list_actor_loss.append(np.array(fetch_dict['actor_loss'])) list_critic_loss.append(np.array(fetch_dict['critic_loss'])) if batch_id % 100 == 0: print(epoch_id, batch_id, 'train/ddpg_actor_loss', np.mean(np.array(fetch_dict['actor_loss']))) print(epoch_id, batch_id, 'train/ddpg_critic_loss', np.mean(np.array(fetch_dict['critic_loss']))) if if_save: add_scalar_summary(summary_writer, epoch_id, 'train/ddpg_total_loss', np.mean(list_actor_loss) + np.mean(list_critic_loss)) add_scalar_summary(summary_writer, epoch_id, 'train/ddpg_actor_loss', np.mean(list_actor_loss)) add_scalar_summary(summary_writer, epoch_id, 'train/ddpg_critic_loss', np.mean(list_critic_loss)) ct_ddpg.save_model(epoch_id) ### offline train cf model if args.gen_type == 'cf': list_loss = [] for sim_batch_data in replay_memory: ct_cf = dict_gen_ct['cf'] fetch_dict = ct_cf.train(CFFeedConvertor.train_test(sim_batch_data, ct_cf.alg.model.conf)) list_loss.append(np.array(fetch_dict['loss'])) if if_save: add_scalar_summary(summary_writer, epoch_id, 'train/cf_loss', np.mean(list_loss)) ct_cf.save_model(epoch_id) ### offline train rl with additional data from env if args.gen_type == 'env_rl': ct_env = dict_gen_ct['env'] max_env_train_steps = len(replay_memory) env_replay_memory = online_inference(args, 0, max_env_train_steps, env_rl_data_gen, ct_env, dict_gen_ct, None, if_print=False) list_loss = [] for sim_batch_data in replay_memory + env_replay_memory: ct_rl = dict_gen_ct['rl'] fetch_dict = ct_rl.train(RLFeedConvertor.train_test(sim_batch_data, ct_rl.alg.model.conf)) list_loss.append(np.array(fetch_dict['loss'])) if if_save: add_scalar_summary(summary_writer, epoch_id, 'train/rl_loss', np.mean(list_loss)) ct_rl.save_model(epoch_id)
val_loss = compute_loss_fn(val_network_output, val_item["bboxes_preprocessed"], val_item["labels_preprocessed"]) val_loss_sum += val_loss train_loss = train_loss_sum / config["train"]["val_iter"] val_loss = val_loss_sum / config["train"]["val_batch"] utils.add_item_summary(train_item, train_network_output, train_summary_writer, anchors, config["dataset"]["num_classes"]) utils.add_item_summary(val_item, val_network_output, val_summary_writer, anchors, config["dataset"]["num_classes"]) utils.add_scalar_summary([train_loss], ["train_loss"], train_summary_writer) utils.add_scalar_summary([val_loss], ["val_loss"], val_summary_writer) print("Loss at step {:04d}: train loss: {:.3f}, val loss: {:3f}". format(train_index, train_loss, val_loss)) train_loss_sum = 0 if train_index != 0 and train_index % config["test"]["test_iter"] == 0: for test_index, test_item in enumerate(test_ds): if (config["test"]["test_batch"] is not None and test_index % config["test"]["test_batch"]): break test_network_output = od_model(test_item["image"], training=False)
train_c_loss = train_c_loss_sum / config["train"]["val_iter"] train_r_loss = train_r_loss_sum / config["train"]["val_iter"] val_loss = val_loss_sum / config["train"]["val_batch"] val_c_loss = val_c_loss_sum / config["train"]["val_batch"] val_r_loss = val_r_loss_sum / config["train"]["val_batch"] utils.add_item_summary(train_item, train_network_output, train_summary_writer, anchors, config["dataset"]["num_classes"]) utils.add_item_summary(val_item, val_network_output, val_summary_writer, anchors, config["dataset"]["num_classes"]) utils.add_scalar_summary( [train_loss, train_c_loss, train_r_loss], ["loss", "classificaition_loss", "regression_loss"], train_summary_writer) utils.add_scalar_summary( [val_loss, val_c_loss, val_r_loss], ["loss", "classificaition_loss", "regression_loss"], val_summary_writer) print("Loss at step {:04d}: train loss: {:.3f}, val loss: {:3f}". format(train_index, train_loss, val_loss)) train_loss_sum = 0 train_c_loss_sum = 0 train_r_loss_sum = 0 if train_index != 0 and train_index % config["test"]["test_iter"] == 0: