def evaluate_df(): ap_tst_1006, _, _ = evaluate(iterator_tst, [img_ids_tst, features_tst, labels_1006_tst], model.features, model.gzs_logits, sess, model) ap_tst_81, _, _ = evaluate(iterator_tst, [img_ids_tst, features_tst, labels_81_tst], model.features, model.zs_logits, sess, model) print('mAP 1006', np.mean(ap_tst_1006)) print('mAP 81', np.mean(ap_tst_81)) g_F1_3_tst, g_P_3_tst, g_R_3_tst = evaluate_k( 3, iterator_tst, [img_ids_tst, features_tst, labels_1006_tst], model.features, model.gzs_logits, sess, model) g_F1_5_tst, g_P_5_tst, g_R_5_tst = evaluate_k( 5, iterator_tst, [img_ids_tst, features_tst, labels_1006_tst], model.features, model.gzs_logits, sess, model) F1_3_tst, P_3_tst, R_3_tst = evaluate_k( 3, iterator_tst, [img_ids_tst, features_tst, labels_81_tst], model.features, model.zs_logits, sess, model) F1_5_tst, P_5_tst, R_5_tst = evaluate_k( 5, iterator_tst, [img_ids_tst, features_tst, labels_81_tst], model.features, model.zs_logits, sess, model) print('k=3', np.mean(F1_3_tst), np.mean(P_3_tst), np.mean(R_3_tst)) print('k=5', np.mean(F1_5_tst), np.mean(P_5_tst), np.mean(R_5_tst)) print('g_k=10', np.mean(g_F1_3_tst), np.mean(g_P_3_tst), np.mean(g_R_3_tst)) print('g_k=20', np.mean(g_F1_5_tst), np.mean(g_P_5_tst), np.mean(g_R_5_tst)) df_81 = pd.DataFrame() df_81['classes'] = tag81 df_81['F1_3'] = F1_3_tst df_81['P_3'] = P_3_tst df_81['R_3'] = R_3_tst df_81['F1_5'] = F1_5_tst df_81['P_5'] = P_5_tst df_81['R_5'] = R_5_tst df_81['ap'] = ap_tst_81 df_1k = pd.DataFrame() df_1k['g_F1_3'] = g_F1_3_tst df_1k['g_P_3'] = g_P_3_tst df_1k['g_R_3'] = g_R_3_tst df_1k['g_F1_5'] = g_F1_5_tst df_1k['g_P_5'] = g_P_5_tst df_1k['g_R_5'] = g_R_5_tst df_1k['aP'] = ap_tst_1006 return df_81, df_1k
def evaluate_df(): ap_tst, predictions_tst_v, labels_tst_v = evaluate( iterator_test, [img_ids_test, features_test, seen_labels_test], model.features, model.logits, sess, model) print('mAP', np.mean(ap_tst)) norm_b = np.linalg.norm(predictions_tst_v) F1_3_tst, P_3_tst, R_3_tst = evaluate_k( 3, iterator_test, [img_ids_test, features_test, seen_labels_test], model.features, model.logits, sess, model, predictions_tst_v, labels_tst_v) F1_5_tst, P_5_tst, R_5_tst = evaluate_k( 5, iterator_test, [img_ids_test, features_test, seen_labels_test], model.features, model.logits, sess, model, predictions_tst_v, labels_tst_v) print('sanity check {}'.format(np.linalg.norm(predictions_tst_v) - norm_b)) ## reload best model print(np.mean(F1_3_tst), np.mean(P_3_tst), np.mean(R_3_tst)) print(np.mean(F1_5_tst), np.mean(P_5_tst), np.mean(R_5_tst)) df = pd.DataFrame() df['classes'] = seen_classes df['F1_10'] = F1_3_tst df['P_10'] = P_3_tst df['R_10'] = R_3_tst df['F1_20'] = F1_5_tst df['P_20'] = P_5_tst df['R_20'] = R_5_tst df['ap'] = ap_tst return df
def evaluate_df(): F1_3_tst, P_3_tst, R_3_tst = evaluate_k( 3, iterator_tst, [img_ids_tst, features_tst, labels_81_tst], model.features, model.logits, sess, model) F1_5_tst, P_5_tst, R_5_tst = evaluate_k( 5, iterator_tst, [img_ids_tst, features_tst, labels_81_tst], model.features, model.logits, sess, model) ap_tst, _, _ = evaluate(iterator_tst, [img_ids_tst, features_tst, labels_81_tst], model.features, model.logits, sess, model) ## reload best model print('mAP', np.mean(ap_tst)) print('k=3', np.mean(F1_3_tst), np.mean(P_3_tst), np.mean(R_3_tst)) print('k=5', np.mean(F1_5_tst), np.mean(P_5_tst), np.mean(R_5_tst)) df = pd.DataFrame() df['classes'] = tag81 df['F1_3'] = F1_3_tst df['P_3'] = P_3_tst df['R_3'] = R_3_tst df['F1_5'] = F1_5_tst df['P_5'] = P_5_tst df['R_5'] = R_5_tst df['ap'] = ap_tst return df
def evaluate_df(): ap_tst, predictions_mll, labels_mll = evaluate( iterator_test, [img_ids_test, features_test, seen_labels_test], model.features, model.logits, sess, model) F1_3_tst, P_3_tst, R_3_tst = evaluate_k( 3, iterator_test, [img_ids_test, features_test, seen_labels_test], model.features, model.logits, sess, model, predictions_mll, labels_mll) F1_5_tst, P_5_tst, R_5_tst = evaluate_k( 5, iterator_test, [img_ids_test, features_test, seen_labels_test], model.features, model.logits, sess, model, predictions_mll, labels_mll) ## reload best model print('mAP', np.mean(ap_tst)) print('k=3', np.mean(F1_3_tst), np.mean(P_3_tst), np.mean(R_3_tst)) print('k=5', np.mean(F1_5_tst), np.mean(P_5_tst), np.mean(R_5_tst)) df = pd.DataFrame() df['classes'] = seen_classes df['F1_3'] = F1_3_tst df['P_3'] = P_3_tst df['R_3'] = R_3_tst df['F1_5'] = F1_5_tst df['P_5'] = P_5_tst df['R_5'] = R_5_tst df['ap'] = ap_tst return df
t1 = time() - t0 elbo = np.mean(elbo) cat_mean = np.mean(cat_mean) kl = np.mean(kls) accuracy = np.mean(accuracy) logger.add(epoch, kl=kl, tr_elbo=elbo, tr_acc=accuracy, tr_ll=cat_mean, tr_time=t1) model.eval() t_dvi = time() test_acc_dvi = evaluate(model, test_loader, mode='dvi', args=args) t_dvi = time() - t_dvi if not args.no_mc: t_mc = time() test_acc_mcvi = evaluate(model, test_loader, mode='mcvi', args=args) t_mc = time() - t_mc logger.add(epoch, te_acc_mcvi=test_acc_mcvi, te_time_mcvi=t_mc) test_acc_samples = evaluate(model, test_loader, mode='samples_dvi', args=args)
inputs, labels = Variable(inputs.cuda(async=True)), Variable(labels.cuda(async=True)) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() accs.append(metrics.logit2acc(outputs.data, labels)) # probably a bad way to calculate accuracy training_loss += loss.cpu().data.numpy()[0] logger.add(epoch, tr_loss=training_loss/steps, tr_acc=np.mean(accs)) # Deterministic test net.eval() acc, nll = utils.evaluate(net, testloader, num_ens=1) logger.add(epoch, te_nll_det=nll, te_acc_det=acc) # Stochastic test net.train() acc, nll = utils.evaluate(net, testloader, num_ens=1) logger.add(epoch, te_nll_stoch=nll, te_acc_stoch=acc) # Test-time averaging net.train() acc, nll = utils.evaluate(net, testloader, num_ens=20) logger.add(epoch, te_nll_ens=nll, te_acc_ens=acc) logger.add(epoch, time=time()-t0) logger.iter_info() logger.save(silent=True)
def do_testing(env, model, target_model=None, dpaths=None, render=False, num_episodes=100): #print("Is on test mode ?", not env.is_training) tf.reset_default_graph() # Create placeholders states_pl = tf.placeholder(tf.float32, shape=(None, FRAME_WIDTH, FRAME_HEIGHT, FRAME_BUFFER_SIZE), name='states') actions_pl = tf.placeholder(tf.int32, shape=(None), name='actions') targets_pl = tf.placeholder(tf.float32, shape=(None), name='targets') # Value function approximator network q_output = model.graph(states_pl) # Build target network q_target_net = target_model.graph(states_pl) # Compute Q from current q_output and one hot actions Q = tf.reduce_sum(tf.multiply( q_output, tf.one_hot(actions_pl, env.action_space.n, dtype=tf.float32)), axis=1) # Loss operation loss_op = tf.reduce_mean(tf.square(targets_pl - Q) / 2) # Prediction Op prediction = tf.argmax(q_output, 1) #prediction = q_output # Model Saver saver = tf.train.Saver() # init all variables init_op = tf.global_variables_initializer() # Limit memory usage for multiple training at same time config = tf.ConfigProto(allow_soft_placement=True) #config.gpu_options.per_process_gpu_memory_fraction = 0.33 # Start Session with tf.Session(config=config) as sess: if dpaths is not None: new_saver = tf.train.import_meta_graph(dpaths[1]) new_saver.restore(sess, tf.train.latest_checkpoint(dpaths[0])) data_stats = None if env.usdqn_sim.data_testing: for i in range(len(env.usdqn_sim.wheel_data)): print('i:%s/%s' % (i, len(env.usdqn_sim.wheel_data))) means, stds, vmin, vmax = evaluate( env, sess, prediction, states_pl, env.usdqn_sim.data_testing, GAMMA, False, render) stats = np.concatenate( [np.array([i]), means, stds, vmin, vmax], axis=-1).reshape([1, -1]) if data_stats is None: data_stats = stats else: data_stats = np.concatenate([data_stats, stats], axis=0) np.savetxt(os.path.join(dpaths[0], 'testing.csv'), data_stats, delimiter=',') else: means, stds = evaluate(env, sess, prediction, states_pl, num_episodes, GAMMA, False, render) #env.usdqn_sim.save_history() # Save means print(means)
def train(args): # Verify algorithm and config global env_options, trainer_options algo = args.algo if algo == "PPO": config = ppo_config else: raise ValueError("args.algo must in [PPO]") config.num_envs = args.num_envs config.activation = nn.ReLU if args.trainopt is not None: f = open(args.trainopt) trainer_options = json.load(f) if args.opt is not None: opt = json.load(open(args.opt)) env_options = opt['env'] trainer_options = opt['trainer'] # Seed the environments and setup torch seed = args.seed torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.set_num_threads(1) # Clean log directory log_dir = verify_log_dir('work_dirs', args.log_dir) # Create vectorized environments num_envs = args.num_envs env_id = args.env_id main_envs = make_envs( env_id='Humanoid-v3', seed=seed, log_dir=log_dir, num_envs=num_envs, asynchronous=True, ) aux_envs = make_envs( env_id='Walker2d-v3', seed=seed, log_dir=log_dir, num_envs=num_envs, asynchronous=True, ) envs = [main_envs, aux_envs] # eval_env is main_env healthy_z_range = (1.0, 2.0) eval_env = gym.make(env_id, healthy_z_range=healthy_z_range, healthy_reward=0) main_obs_dim = 376 main_act_dim = 17 main_reduce_obs_dim = 46 main_reduce_act_dim = 11 aux_obs_dim = 17 aux_act_dim = 6 obs_dims = [main_reduce_obs_dim, aux_obs_dim] act_dims = [main_act_dim, aux_act_dim] dim_dict = dict(obs_a=main_reduce_obs_dim, act_a=main_reduce_act_dim, obs_b=aux_obs_dim, act_b=aux_act_dim, coeff_a=0.4, coeff_b=1) dim_dict['act_dim'] = 17 dim_dict['real_obs_dim'] = 46 # Setup trainer if algo == "PPO": trainer = PPOTrainerMTMT(config, dim_dict) else: raise NotImplementedError frame_stack_tensors = [ FrameStackTensor(num_envs, main_envs.observation_space.shape, config.device), FrameStackTensor(num_envs, aux_envs.observation_space.shape, config.device) ] # Setup some stats helpers episode_rewards = [ np.zeros([num_envs, 1], dtype=np.float), np.zeros([num_envs, 1], dtype=np.float) ] total_episodes = total_steps = iteration = 0 reward_recorders = [deque(maxlen=100), deque(maxlen=100)] episode_length_recorders = [deque(maxlen=100), deque(maxlen=100)] sample_timer = Timer() process_timer = Timer() update_timer = Timer() total_timer = Timer() progress = [] evaluate_stat = {} # Start training print("Start training!") obs = [envs[i].reset() for i in range(2)] _ = [frame_stack_tensors[i].update(obs[i]) for i in range(2)] # first update for i in range(2): trainer.rollouts[i].observations[0].copy_( reduce_shape(frame_stack_tensors[i].get(), obs_dims[i])) branch_names = ['a', 'b'] while True: # Break when total_steps exceeds maximum value with sample_timer: # prepare rollout a for ind in range(2): for index in range(config.num_steps): trainer.model.eval() values, actions, action_log_prob = trainer.model.step( reduce_shape(frame_stack_tensors[ind].get(), obs_dims[ind]), deterministic=False, branch=branch_names[ind]) cpu_actions = actions.cpu().numpy() cpu_actions = enlarge_shape(cpu_actions, act_dims[ind]) # obs, done, info not needed, we have masks & obs in frame_stack_tensors _, reward, _, _, masks, new_total_episodes, new_total_steps, episode_rewards[ind] = \ step_envs(cpu_actions, envs[ind], episode_rewards[ind], frame_stack_tensors[ind], reward_recorders[ind], episode_length_recorders[ind], total_steps, total_episodes, config.device) if ind == 0: total_episodes = new_total_episodes total_steps = new_total_steps rewards = torch.from_numpy(reward.astype(np.float32)).view( -1, 1).to(config.device) trainer.rollouts[ind].insert( reduce_shape(frame_stack_tensors[ind].get(), obs_dims[ind]), actions, action_log_prob, values, rewards, masks) # ===== Process Samples ===== with process_timer: with torch.no_grad(): for i in range(2): next_value = trainer.compute_values( trainer.rollouts[i].observations[-1], branch_names[i]) trainer.rollouts[i].compute_returns( next_value, config.GAMMA) trainer.model.train() # ===== Update Policy ===== with update_timer: losses = trainer.update(trainer.rollouts[0], trainer.rollouts[1]) policy_loss, value_loss, total_loss = list(zip(*losses)) trainer.rollouts[0].after_update() trainer.rollouts[1].after_update() # ===== Evaluate Current Policy ===== if iteration % config.eval_freq == 0: eval_timer = Timer() # seems ok, by default model is dealing with task1 rewards, eplens = evaluate(trainer, eval_env, 1, dim_dict=dim_dict) evaluate_stat = summary(rewards, "episode_reward") evaluate_stat.update(summary(eplens, "episode_length")) evaluate_stat.update( dict(evaluate_time=eval_timer.now, evaluate_iteration=iteration)) # ===== Log information ===== if iteration % config.log_freq == 0: stats = dict( log_dir=log_dir, frame_per_second=int(total_steps / total_timer.now), training_episode_reward_a=summary(reward_recorders[0], "episode_reward"), training_episode_length_a=summary(episode_length_recorders[0], "episode_length"), training_episode_reward_b=summary(reward_recorders[1], "episode_reward"), training_episode_length_b=summary(episode_length_recorders[1], "episode_length"), evaluate_stats=evaluate_stat, learning_stats_a=dict(policy_loss=policy_loss[0], value_loss=value_loss[0], total_loss=total_loss[0]), learning_stats_b=dict(policy_loss=policy_loss[1], value_loss=value_loss[1], total_loss=total_loss[1]), total_steps=total_steps, total_episodes=total_episodes, time_stats=dict(sample_time=sample_timer.avg, process_time=process_timer.avg, update_time=update_timer.avg, total_time=total_timer.now, episode_time=sample_timer.avg + process_timer.avg + update_timer.avg), iteration=iteration) progress.append(stats) pretty_print({ "===== {} Training Iteration {} =====".format(algo, iteration): stats }) if iteration % config.save_freq == 0: trainer_path = trainer.save_w(log_dir, "iter{}".format(iteration)) progress_path = save_progress(log_dir, progress) print( "Saved trainer state at <{}>. Saved progress at <{}>.".format( trainer_path, progress_path)) # [TODO] Stop training when total_steps is greater than args.max_steps if total_steps > args.max_steps: break pass iteration += 1 trainer.save_w(log_dir, "final") envs.close()
dataset = dataset.map(utils.parser(anchors, num_classes).parser_example, num_parallel_calls=10) dataset = dataset.repeat().shuffle(SHUFFLE_SIZE).batch(BATCH_SIZE).prefetch( BATCH_SIZE) iterator = dataset.make_one_shot_iterator() example = iterator.get_next() images, *y_true = example model = yolov3.yolov3(num_classes) with tf.variable_scope('yolov3'): y_pred = model.forward(images, is_training=False) loss = model.compute_loss(y_pred, y_true) y_pred = model.predict(y_pred) load_ops = utils.load_weights(tf.global_variables(scope='yolov3'), weights_path) sess.run(load_ops) for epoch in range(EPOCHS): run_items = sess.run([y_pred, y_true] + loss) rec, prec, mAP = utils.evaluate(run_items[0], run_items[1], num_classes, score_thresh=0.3, iou_thresh=0.5) print( "=> EPOCH: %2d\ttotal_loss:%7.4f\tloss_coord:%7.4f\tloss_sizes:%7.4f\tloss_confs:%7.4f\tloss_class:%7.4f" "\trec:%7.4f\tprec:%7.4f\tmAP:%7.4f" % (epoch, run_items[2], run_items[3], run_items[4], run_items[5], run_items[6], rec, prec, mAP))
def train(args): # Verify algorithm and config global env_options, trainer_options algo = args.algo if algo == "PPO": config = ppo_config else: raise ValueError("args.algo must in [PPO]") config.num_envs = args.num_envs if args.envopt is not None: f = open(args.envopt) env_options = json.load(f) if args.trainopt is not None: f = open(args.trainopt) trainer_options = json.load(f) if args.opt is not None: opt = json.load(open(args.opt)) env_options = opt['env'] trainer_options = opt['trainer'] # Seed the environments and setup torch seed = args.seed torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.set_num_threads(1) # Clean log directory log_dir = verify_log_dir('work_dirs', args.log_dir) # Create vectorized environments num_envs = args.num_envs env_id = args.env_id envs = make_envs( env_id=env_id, seed=seed, log_dir=log_dir, num_envs=num_envs, asynchronous=True, options=env_options, ) if env_id == "Walker2d-v3": healthy_z_range = (0.8, 2.0) elif env_id == 'Humanoid-v3': healthy_z_range = (1.0, 2.0) if 'healthy_z_range' in env_options: healthy_z_range = env_options['healthy_z_range'] eval_env = gym.make(env_id, healthy_z_range=healthy_z_range, healthy_reward=0) if env_id == "Walker2d-v3": eval_env = Walker2d_wrapper(eval_env, env_options) obs_dim = envs.observation_space.shape[0] act_dim = envs.action_space.shape[0] real_obs_dim = obs_dim real_act_dim = act_dim if 'real_obs_dim' in trainer_options: real_obs_dim = trainer_options['real_obs_dim'] if 'real_act_dim' in trainer_options: real_act_dim = trainer_options['real_act_dim'] dim_dict = dict(obs_dim=obs_dim, act_dim=act_dim, real_obs_dim=real_obs_dim, real_act_dim=real_act_dim) # Setup trainer if algo == "PPO": trainer = PPOTrainer(envs, config, trainer_options) else: raise NotImplementedError # Create a placeholder tensor to help stack frames in 2nd dimension # That is turn the observation from shape [num_envs, 1, 84, 84] to # [num_envs, 4, 84, 84]. frame_stack_tensor = FrameStackTensor(num_envs, envs.observation_space.shape, config.device) # Setup some stats helpers episode_rewards = np.zeros([num_envs, 1], dtype=np.float) total_episodes = total_steps = iteration = 0 reward_recorder = deque(maxlen=100) episode_length_recorder = deque(maxlen=100) sample_timer = Timer() process_timer = Timer() update_timer = Timer() total_timer = Timer() progress = [] evaluate_stat = {} # Start training print("Start training!") obs = envs.reset() frame_stack_tensor.update(obs) trainer.rollouts.observations[0].copy_( reduce_shape(frame_stack_tensor.get(), real_obs_dim)) while True: # Break when total_steps exceeds maximum value with sample_timer: for index in range(config.num_steps): trainer.model.eval() values, actions, action_log_prob = trainer.model.step( reduce_shape(frame_stack_tensor.get(), real_obs_dim)) cpu_actions = actions.cpu().numpy() cpu_actions = enlarge_shape(cpu_actions, act_dim) obs, reward, done, info, masks, total_episodes, \ total_steps, episode_rewards = step_envs( cpu_actions, envs, episode_rewards, frame_stack_tensor, reward_recorder, episode_length_recorder, total_steps, total_episodes, config.device) rewards = torch.from_numpy(reward.astype(np.float32)).view( -1, 1).to(config.device) # Store samples trainer.rollouts.insert( reduce_shape(frame_stack_tensor.get(), real_obs_dim), actions, action_log_prob, values, rewards, masks) # ===== Process Samples ===== with process_timer: with torch.no_grad(): next_value = trainer.compute_values( trainer.rollouts.observations[-1]) trainer.rollouts.compute_returns(next_value, config.GAMMA) trainer.model.train() # ===== Update Policy ===== with update_timer: policy_loss, value_loss, total_loss = trainer.update( trainer.rollouts) trainer.rollouts.after_update() # ===== Evaluate Current Policy ===== if iteration % config.eval_freq == 0: eval_timer = Timer() rewards, eplens = evaluate(trainer, eval_env, 1, dim_dict=dim_dict) evaluate_stat = summary(rewards, "episode_reward") evaluate_stat.update(summary(eplens, "episode_length")) evaluate_stat.update( dict(evaluate_time=eval_timer.now, evaluate_iteration=iteration)) # ===== Log information ===== if iteration % config.log_freq == 0: stats = dict( log_dir=log_dir, frame_per_second=int(total_steps / total_timer.now), training_episode_reward=summary(reward_recorder, "episode_reward"), training_episode_length=summary(episode_length_recorder, "episode_length"), evaluate_stats=evaluate_stat, learning_stats=dict(policy_loss=policy_loss, value_loss=value_loss, total_loss=total_loss), total_steps=total_steps, total_episodes=total_episodes, time_stats=dict(sample_time=sample_timer.avg, process_time=process_timer.avg, update_time=update_timer.avg, total_time=total_timer.now, episode_time=sample_timer.avg + process_timer.avg + update_timer.avg), iteration=iteration) progress.append(stats) pretty_print({ "===== {} Training Iteration {} =====".format(algo, iteration): stats }) if iteration % config.save_freq == 0: trainer_path = trainer.save_w(log_dir, "iter{}".format(iteration)) progress_path = save_progress(log_dir, progress) print( "Saved trainer state at <{}>. Saved progress at <{}>.".format( trainer_path, progress_path)) # [TODO] Stop training when total_steps is greater than args.max_steps if total_steps > args.max_steps: break pass iteration += 1 trainer.save_w(log_dir, "final") envs.close()
t1 = time() - t0 elbo = np.mean(elbo) cat_mean = np.mean(cat_mean) kl = np.mean(kls) accuracy = np.mean(accuracy) logger.add(epoch, kl=kl, tr_elbo=elbo, tr_acc=accuracy, tr_ll=cat_mean, tr_time=t1) model.eval() t_dvi = time() test_acc_dvi = evaluate(model, test_loader, mode='dvi', args=args) t_dvi = time() - t_dvi if not args.no_mc: t_mc = time() test_acc_mcvi = evaluate(model, test_loader, mode='mcvi', args=args) t_mc = time() - t_mc logger.add(epoch, te_acc_mcvi=test_acc_mcvi, te_time_mcvi=t_mc) logger.add(epoch, te_acc_dvi=test_acc_dvi, te_time_dvi=t_dvi) logger.iter_info() logger.save(silent=True)
], feed_dict) accum_l = l * (1 - alpha) + alpha * accum_l if np.isnan(l): pdb.set_trace() # if i%1000 == 0 and is_save: # summary = sess.run(summary_op, feed_dict) # summary_writer.add_summary(summary, i) if i % eval_interval == 0 or i == NUS_WIDE_zs_n_iters - 1: print('Time elapse: ', time.clock() - tic) tic = time.clock() F1_val, P_val, R_val = evaluate_k( k, iterator_val, [img_ids_val, features_val, labels_925_val], model.features, model.logits, sess, model) ap_val, _, _ = evaluate(iterator_val, [img_ids_val, features_val, labels_925_val], model.features, model.logits, sess, model) F1_u_val, P_u_val, R_u_val = evaluate_k( k, iterator_val, [img_ids_val, features_val, labels_81_val], model.features, model.zs_logits, sess, model) mF1_val, mP_val, mR_val, mAP_val = [ np.mean(F1_val), np.mean(P_val), np.mean(R_val), np.mean(ap_val) ] mF1_u_val, mP_u_val, mR_u_val = [ np.mean(F1_u_val), np.mean(P_u_val), np.mean(R_u_val)
def train(args): # Verify algorithm and config algo = args.algo if algo == "PPO": config = ppo_config elif algo == "A2C": config = a2c_config else: raise ValueError("args.algo must in [PPO, A2C]") config.num_envs = args.num_envs assert args.env_id in ["cPong-v0", "CartPole-v0", "cPongTournament-v0"] # Seed the environments and setup torch seed = args.seed torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.set_num_threads(1) # Clean log directory log_dir = verify_log_dir(args.log_dir, algo) # Create vectorized environments num_envs = args.num_envs env_id = args.env_id envs = make_envs( env_id=env_id, seed=seed, log_dir=log_dir, num_envs=num_envs, asynchronous=True, resized_dim=config.resized_dim ) eval_envs = make_envs( env_id=env_id, seed=seed, log_dir=log_dir, num_envs=num_envs, asynchronous=False, resized_dim=config.resized_dim ) test = env_id == "CartPole-v0" tournament = env_id == "cPongTournament-v0" frame_stack = 4 if not test else 1 if tournament: assert algo == "PPO", "Using PPO in tournament is a good idea, " \ "because of its efficiency compared to A2C." # Setup trainer if algo == "PPO": trainer = PPOTrainer(envs, config, frame_stack, _test=test) else: trainer = A2CTrainer(envs, config, frame_stack, _test=test) # Create a placeholder tensor to help stack frames in 2nd dimension # That is turn the observation from shape [num_envs, 1, 84, 84] to # [num_envs, 4, 84, 84]. frame_stack_tensor = FrameStackTensor( num_envs, envs.observation_space.shape, frame_stack, config.device) # Setup some stats helpers episode_rewards = np.zeros([num_envs, 1], dtype=np.float) total_episodes = total_steps = iteration = 0 reward_recorder = deque(maxlen=100) episode_length_recorder = deque(maxlen=100) sample_timer = Timer() process_timer = Timer() update_timer = Timer() total_timer = Timer() progress = [] evaluate_stat = {} # Start training print("Start training!") obs = envs.reset() frame_stack_tensor.update(obs) trainer.rollouts.observations[0].copy_(frame_stack_tensor.get()) while True: # Break when total_steps exceeds maximum value # ===== Sample Data ===== with sample_timer: for index in range(config.num_steps): # Get action # [TODO] Get the action # Hint: # 1. Remember to disable gradient computing # 2. trainer.rollouts is a storage containing all data # 3. What observation is needed for trainer.compute_action? with torch.no_grad(): values, actions, action_log_prob = trainer.compute_action(trainer.rollouts.observations[index]) cpu_actions = actions.view(-1).cpu().numpy() # Step the environment # (Check step_envs function, you need to implement it) obs, reward, done, info, masks, total_episodes, \ total_steps, episode_rewards = step_envs( cpu_actions, envs, episode_rewards, frame_stack_tensor, reward_recorder, episode_length_recorder, total_steps, total_episodes, config.device, test) rewards = torch.from_numpy( reward.astype(np.float32)).view(-1, 1).to(config.device) # Store samples trainer.rollouts.insert( frame_stack_tensor.get(), actions.view(-1, 1), action_log_prob, values, rewards, masks) # ===== Process Samples ===== with process_timer: with torch.no_grad(): next_value = trainer.compute_values( trainer.rollouts.observations[-1]) trainer.rollouts.compute_returns(next_value, config.GAMMA) # ===== Update Policy ===== with update_timer: policy_loss, value_loss, dist_entropy, total_loss = \ trainer.update(trainer.rollouts) trainer.rollouts.after_update() # ===== Reset opponent if in tournament mode ===== if tournament and iteration % config.num_steps == 0: # Randomly choose one agent in each iteration envs.reset_opponent() # ===== Evaluate Current Policy ===== if iteration % config.eval_freq == 0: eval_timer = Timer() evaluate_rewards, evaluate_lengths = evaluate( trainer, eval_envs, frame_stack, 20) evaluate_stat = summary(evaluate_rewards, "episode_reward") if evaluate_lengths: evaluate_stat.update( summary(evaluate_lengths, "episode_length")) evaluate_stat.update(dict( win_rate=float( sum(np.array(evaluate_rewards) >= 0) / len( evaluate_rewards)), evaluate_time=eval_timer.now, evaluate_iteration=iteration )) # ===== Log information ===== if iteration % config.log_freq == 0: stats = dict( log_dir=log_dir, frame_per_second=int(total_steps / total_timer.now), training_episode_reward=summary(reward_recorder, "episode_reward"), training_episode_length=summary(episode_length_recorder, "episode_length"), evaluate_stats=evaluate_stat, learning_stats=dict( policy_loss=policy_loss, entropy=dist_entropy, value_loss=value_loss, total_loss=total_loss ), total_steps=total_steps, total_episodes=total_episodes, time_stats=dict( sample_time=sample_timer.avg, process_time=process_timer.avg, update_time=update_timer.avg, total_time=total_timer.now, episode_time=sample_timer.avg + process_timer.avg + update_timer.avg ), iteration=iteration ) if tournament: stats["opponent"] = envs.current_agent_name progress.append(stats) pretty_print({ "===== {} Training Iteration {} =====".format( algo, iteration): stats }) if iteration % config.save_freq == 0: trainer_path = trainer.save_w(log_dir, "iter{}".format(iteration)) progress_path = save_progress(log_dir, progress) print("Saved trainer state at <{}>. Saved progress at <{}>.".format( trainer_path, progress_path )) # [TODO] Stop training when total_steps is greater than args.max_steps if total_steps > args.max_steps: break iteration += 1 trainer.save_w(log_dir, "final") envs.close()
def _train(trainer, envs, eval_envs, config, num_envs, algo, log_dir, tournament, test): # Setup some stats helpers episode_rewards = np.zeros([num_envs, 1], dtype=np.float) total_episodes = total_steps = iteration = 0 reward_recorder = deque(maxlen=100) episode_length_recorder = deque(maxlen=100) sample_timer = Timer() process_timer = Timer() update_timer = Timer() total_timer = Timer() progress = [] evaluate_stat = {} while True: # Break when total_steps exceeds maximum value # ===== Sample Data ===== with sample_timer: for index in range(config.num_steps): # Get action if hasattr(trainer.model, 'reset_state'): trainer.model.reset_state() with torch.no_grad(): values, actions, action_log_prob = trainer.compute_action( trainer.rollouts.processed_observations[index]) trainer.model.update_hidden(actions) if trainer.discrete: cpu_actions = actions.view(-1).cpu().numpy() else: cpu_actions = actions.cpu().numpy() # Step the environment # (Check step_envs function, you need to implement it) obs, reward, done, info, masks, total_episodes, total_steps, episode_rewards = step_envs( cpu_actions, envs, episode_rewards, reward_recorder, episode_length_recorder, total_steps, total_episodes, config.device) rewards = torch.from_numpy(reward.astype(np.float32)).view( -1, 1).to(config.device) # Store samples if trainer.discrete: actions = actions.view(-1, 1) with torch.no_grad(): raw_obs = trainer.process_obs(obs) processed_obs = trainer.model.world_model(raw_obs).detach() trainer.rollouts.insert(obs, actions, action_log_prob, values, rewards, masks, processed_obs) # trainer.rollouts.insert(obs, actions, action_log_prob, values, rewards, masks) # ===== Process Samples ===== with process_timer: with torch.no_grad(): next_value = trainer.compute_values( trainer.rollouts.processed_observations[-1]) trainer.rollouts.compute_returns(next_value, config.gamma) # ===== Update Policy ===== with update_timer: policy_loss, value_loss, dist_entropy, total_loss = trainer.update( trainer.rollouts) # vae_loss, mdrnn_loss\ # = trainer.update(trainer.rollouts) trainer.model.reset_state() trainer.rollouts.after_update() # ===== Reset opponent if in tournament mode ===== if tournament and iteration % config.num_steps == 0: # Randomly choose one agent in each iteration envs.reset_opponent() # ===== Evaluate Current Policy ===== if eval_envs is not None and iteration % config.eval_freq == 0: eval_timer = Timer() evaluate_rewards, evaluate_lengths = evaluate( trainer, eval_envs, 20) evaluate_stat = summary(evaluate_rewards, "episode_reward") if evaluate_lengths: evaluate_stat.update( summary(evaluate_lengths, "episode_length")) evaluate_stat.update( dict(win_rate=float( sum(np.array(evaluate_rewards) >= 0) / len(evaluate_rewards)), evaluate_time=eval_timer.now, evaluate_iteration=iteration)) # ===== Log information ===== if iteration % config.log_freq == 0: stats = dict( log_dir=log_dir, frame_per_second=int(total_steps / total_timer.now), training_episode_reward=summary(reward_recorder, "episode_reward"), training_episode_length=summary(episode_length_recorder, "episode_length"), evaluate_stats=evaluate_stat, learning_stats=dict( policy_loss=policy_loss, entropy=dist_entropy, value_loss=value_loss, # vae_loss= vae_loss, # mdrnn_loss=mdrnn_loss, total_loss=total_loss), total_steps=total_steps, total_episodes=total_episodes, time_stats=dict(sample_time=sample_timer.avg, process_time=process_timer.avg, update_time=update_timer.avg, total_time=total_timer.now, episode_time=sample_timer.avg + process_timer.avg + update_timer.avg), iteration=iteration) if tournament: stats["opponent"] = envs.current_agent_name progress.append(stats) from IPython.display import clear_output clear_output() pretty_print({ "===== {} Training Iteration {} =====".format(algo, iteration): stats }) progress_path = save_progress(log_dir, progress) if iteration % config.save_freq == 0: trainer_path = trainer.save_w(log_dir, "iter{}".format(iteration)) progress_path = save_progress(log_dir, progress) print( "Saved trainer state at <{}>. Saved progress at <{}>.".format( trainer_path, progress_path)) if total_steps > int(args.max_steps): break iteration += 1
def train(self): ANCHORS = utils.get_anchors(self.anchors_path, self.img_h, self.img_w) parser = Parser(image_h=self.img_h, image_w=self.img_w, anchors=ANCHORS, num_classes=self.num_classes) trainset = dataset(parser, self.train_records, self.batch_size, shuffle=self.shuffle_size) testset = dataset(parser, self.test_records, self.batch_size, shuffle=None) is_training = tf.placeholder(tf.bool) example = tf.cond(is_training, lambda: trainset.get_next(), lambda: testset.get_next()) images, y_true = example model = yolov3.yolov3(self.num_classes, ANCHORS) with tf.variable_scope('yolov3'): # Give the images to the network, and receive a prediction # feature map pred_feature_map = model.forward(images, is_training=is_training, n_filters_dn=self.n_filters_dn, n_strides_dn=self.n_strides_dn, n_ksizes_dn=self.n_ksizes_dn) loss = model.compute_loss(pred_feature_map, y_true, self.iou_threshold) y_pred = model.predict(pred_feature_map) tf.summary.scalar("loss/coord_loss", loss[1]) tf.summary.scalar("loss/sizes_loss", loss[2]) tf.summary.scalar("loss/confs_loss", loss[3]) tf.summary.scalar("loss/class_loss", loss[4]) global_step = tf.Variable(0, trainable=True, collections=[tf.GraphKeys.LOCAL_VARIABLES]) write_op = tf.summary.merge_all() writer_train = tf.summary.FileWriter("../../data/train_summary", sess.graph) writer_test = tf.summary.FileWriter("../../data/test_summary") update_vars = tf.contrib.framework.get_variables_to_restore( include=["yolov3/yolo-v3"]) lr = tf.train.exponential_decay(self.learning_rate, global_step, decay_steps=self.decay_steps, decay_rate=self.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(lr) # set dependencies for BN ops update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss[0], var_list=update_vars, global_step=global_step) sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) saver = tf.train.Saver(max_to_keep=2) for step in range(self.steps): run_items = sess.run([train_op, write_op, y_pred, y_true] + loss, feed_dict={is_training: True}) if (step + 1) % self.eval_internal == 0: train_rec_value, train_prec_value = utils.evaluate( run_items[2], run_items[3]) writer_train.add_summary(run_items[1], global_step=step) writer_train.flush() # Flushes the event file to disk if (step + 1) % self.save_internal == 0: saver.save(sess, save_path=self.checkpoint_path, global_step=step + 1) if (step + 1) % self.print_every_n == 0: print(f"=> STEP {step+1} [TRAIN]:\tloss_xy: " + f"{run_items[5]:.4f} \tloss_wh:{run_items[6]:.4f} \t" + f"loss_conf:{run_items[7]:.4f} \tloss_class:" + f"{run_items[8]:.4f}") run_items = sess.run([write_op, y_pred, y_true] + loss, feed_dict={is_training: False}) if (step + 1) % self.eval_internal == 0: test_rec_value, test_prec_value = utils.evaluate( run_items[1], run_items[2]) print(f"\n{20*'='}> evaluation result <{20*'='}\n") print(f"=> STEP {step+1} [TRAIN]:\trecall:" + f"{train_rec_value:.2f} \tprecision:" + f"{train_prec_value:.4f}") print(f"=> STEP {step+1} [VALID]:\trecall:" + f"{test_rec_value:.2f} \tprecision:" + f"{test_prec_value:.4f}") print(f"\n{20*'='}> evaluation result <{20*'='}\n") writer_test.add_summary(run_items[0], global_step=step) writer_test.flush() # Flushes the event file to disk
df_f_zs, df_f_gzs = evaluate_zs_df_OpenImage( iterator_tst=iterator_test, tensors_zs=tensors_zs, tensors_gzs=tensors_gzs, unseen_classes=unseen_classes, classes=classes, sess=sess, model=model, k_zs=k_zs, k_gzs=k_gzs) print('-' * 30) if i % eval_interval == 0 or i == n_iters - 1: print('Time elapse: ', time.clock() - tic) tic = time.clock() ap_val, predictions_mll, labels_mll = evaluate( iterator_val, [img_ids_val, features_val, labels_val], model.features, model.logits, sess, model) F1_val, P_val, R_val = evaluate_k( k, iterator_val, [img_ids_val, features_val, labels_val], model.features, model.logits, sess, model, predictions_mll, labels_mll) mF1_val, mP_val, mR_val, mAP_val = [ np.mean(F1_val), np.mean(P_val), np.mean(R_val), np.mean(ap_val) ] learning_rate = lr.adapt(mAP_val) values = [ i, l, l_rank, l_att_span, l_att_global, l_att_dist, mF1_val,
def main(): fmt = { 'tr_loss': '3.1e', 'tr_acc': '.4f', 'te_acc_det': '.4f', 'te_acc_stoch': '.4f', 'te_acc_ens': '.4f', 'te_acc_perm_sigma': '.4f', 'te_acc_zero_mean': '.4f', 'te_acc_perm_sigma_ens': '.4f', 'te_acc_zero_mean_ens': '.4f', 'te_nll_det': '.4f', 'te_nll_stoch': '.4f', 'te_nll_ens': '.4f', 'te_nll_perm_sigma': '.4f', 'te_nll_zero_mean': '.4f', 'te_nll_perm_sigma_ens': '.4f', 'te_nll_zero_mean_ens': '.4f', 'time': '.3f' } fmt = {**fmt, **{'la%d' % i: '.4f' for i in range(4)}} args = get_args() logger = Logger("lenet5-VDO", fmt=fmt) trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor()) train_sampler = torch.utils.data.BatchSampler( torch.utils.data.RandomSampler(trainset), batch_size=args.batch_size, drop_last=False) trainloader = torch.utils.data.DataLoader(trainset, batch_sampler=train_sampler, num_workers=args.workers, pin_memory=True) testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor()) test_sampler = torch.utils.data.BatchSampler( torch.utils.data.SequentialSampler(testset), batch_size=args.batch_size, drop_last=False) testloader = torch.utils.data.DataLoader(testset, batch_sampler=test_sampler, num_workers=args.workers, pin_memory=True) net = LeNet5() net = net.to(device=args.device, dtype=args.dtype) if args.print_model: logger.print(net) criterion = metrics.SGVLB(net, len(trainset)).to(device=args.device, dtype=args.dtype) optimizer = optim.Adam(net.parameters(), lr=args.learning_rate) epochs = args.epochs lr_start = args.learning_rate for epoch in trange(epochs): # loop over the dataset multiple times t0 = time() utils.adjust_learning_rate( optimizer, metrics.lr_linear(epoch, 0, epochs, lr_start)) net.train() training_loss = 0 accs = [] steps = 0 for i, (inputs, labels) in enumerate(tqdm(trainloader), 0): steps += 1 inputs, labels = inputs.to( device=args.device, dtype=args.dtype), labels.to(device=args.device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() accs.append(metrics.logit2acc( outputs.data, labels)) # probably a bad way to calculate accuracy training_loss += loss.item() logger.add(epoch, tr_loss=training_loss / steps, tr_acc=np.mean(accs)) # Deterministic test net.eval() acc, nll = utils.evaluate(net, testloader, device=args.device, num_ens=1) logger.add(epoch, te_nll_det=nll, te_acc_det=acc) # Stochastic test net.train() acc, nll = utils.evaluate(net, testloader, device=args.device, num_ens=1) logger.add(epoch, te_nll_stoch=nll, te_acc_stoch=acc) # Test-time averaging net.train() acc, nll = utils.evaluate(net, testloader, device=args.device, num_ens=20) logger.add(epoch, te_nll_ens=nll, te_acc_ens=acc) # Zero-mean net.train() net.dense1.set_flag('zero_mean', True) acc, nll = utils.evaluate(net, testloader, device=args.device, num_ens=1) net.dense1.set_flag('zero_mean', False) logger.add(epoch, te_nll_zero_mean=nll, te_acc_zero_mean=acc) # Permuted sigmas net.train() net.dense1.set_flag('permute_sigma', True) acc, nll = utils.evaluate(net, testloader, device=args.device, num_ens=1) net.dense1.set_flag('permute_sigma', False) logger.add(epoch, te_nll_perm_sigma=nll, te_acc_perm_sigma=acc) # Zero-mean test-time averaging net.train() net.dense1.set_flag('zero_mean', True) acc, nll = utils.evaluate(net, testloader, device=args.device, num_ens=20) net.dense1.set_flag('zero_mean', False) logger.add(epoch, te_nll_zero_mean_ens=nll, te_acc_zero_mean_ens=acc) # Permuted sigmas test-time averaging net.train() net.dense1.set_flag('permute_sigma', True) acc, nll = utils.evaluate(net, testloader, device=args.device, num_ens=20) net.dense1.set_flag('permute_sigma', False) logger.add(epoch, te_nll_perm_sigma_ens=nll, te_acc_perm_sigma_ens=acc) logger.add(epoch, time=time() - t0) las = [ np.mean(net.conv1.log_alpha.data.cpu().numpy()), np.mean(net.conv2.log_alpha.data.cpu().numpy()), np.mean(net.dense1.log_alpha.data.cpu().numpy()), np.mean(net.dense2.log_alpha.data.cpu().numpy()) ] logger.add(epoch, **{'la%d' % i: las[i] for i in range(4)}) logger.iter_info() logger.save(silent=True) torch.save(net.state_dict(), logger.checkpoint) logger.save()
tf.summary.scalar("yolov3/recall", rec_tensor) tf.summary.scalar("yolov3/precision", prec_tensor) tf.summary.scalar("yolov3/mAP", mAP_tensor) tf.summary.scalar("yolov3/total_loss", loss[0]) tf.summary.scalar("loss/coord_loss", loss[1]) tf.summary.scalar("loss/sizes_loss", loss[2]) tf.summary.scalar("loss/confs_loss", loss[3]) tf.summary.scalar("loss/class_loss", loss[4]) write_op = tf.summary.merge_all() writer_train = tf.summary.FileWriter("./data/log/train", graph=sess.graph) sess.run(tf.global_variables_initializer()) for epoch in range(EPOCHS): run_items = sess.run([train_op, y_pred, y_true] + loss, feed_dict={is_training:True}) rec, prec, mAP = utils.evaluate(run_items[1], run_items[2], num_classes) _, _, _, summary = sess.run([tf.assign(rec_tensor, rec), tf.assign(prec_tensor, prec), tf.assign(mAP_tensor, mAP), write_op], feed_dict={is_training:True}) writer_train.add_summary(summary, global_step=epoch) writer_train.flush() # Flushes the event file to disk if epoch%1000 == 0: saver.save(sess, save_path="./checkpoint/yolov3.ckpt", global_step=epoch) print("=> EPOCH:%10d\ttotal_loss:%7.4f\tloss_coord:%7.4f\tloss_sizes:%7.4f\tloss_confs:%7.4f\tloss_class:%7.4f" "\trec:%.2f\tprec:%.2f\tmAP:%.2f" %(epoch, run_items[3], run_items[4], run_items[5], run_items[6], run_items[7], rec, prec, mAP))
for j in range(flags.train_ens): outputs[:, :, j] = F.log_softmax(net(inputs), dim=1) log_outputs = utils.logmeanexp(outputs, dim=2) loss = criterion(log_outputs, labels) loss.backward() optimizer.step() accs.append(metrics.logit2acc(log_outputs.data, labels)) training_loss += loss.cpu().data.numpy()[0] logger.add(epoch, tr_loss=training_loss/steps, tr_acc=np.mean(accs)) # Ens 100 test net.train() acc, nll = utils.evaluate(net, testloader, num_ens=100) logger.add(epoch, te_nll_ens100=nll, te_acc_ens100=acc) # Stochastic test net.train() acc, nll = utils.evaluate(net, testloader, num_ens=1) logger.add(epoch, te_nll_stoch=nll, te_acc_stoch=acc) # Test-time averaging net.train() acc, nll = utils.evaluate(net, testloader, num_ens=10) logger.add(epoch, te_nll_ens10=nll, te_acc_ens10=acc) logger.add(epoch, time=time()-t0) logger.iter_info() logger.save(silent=True)
def train(args): # Verify algorithm and config algo = args.algo if algo == "PPO": config = ppo_config elif algo == "A2C": config = a2c_config else: raise ValueError("args.algo must in [PPO, A2C]") config.num_envs = args.num_envs # Seed the environments and setup torch seed = args.seed torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.set_num_threads(1) # Clean log directory log_dir = verify_log_dir(args.log_dir, algo) # Create vectorized environments num_envs = args.num_envs env_name = args.env_name # Prepare tensorboard file args.save_log = 'Pairtrding-{}'.format(time.strftime("%Y%m%d-%H%M%S")) generate_date = str(datetime.now().date()) writer = SummaryWriter(args.log_dir + '/runs/' + generate_date + '/' + args.save_log) # download stock price data from yahoo finance stocklist = [ '0700.hk', '2318.hk', '3988.hk', '0998.hk', '1398.hk', '3968.hk', '0981.hk', '0005.hk' ] # 腾讯,平安,中银,中信,工商,招商,中芯国际,汇丰 stocktickers = ' '.join(stocklist) data = yf.download(tickers=stocktickers, start="2010-01-01", end="2019-12-31") data = data['Close'] columnchange = [] for stock in data.columns: name = stock + 'change' columnchange.append(name) data[name] = data[stock] - data[stock].shift(1) CorrDict = {} for i in columnchange: for j in columnchange: if i != j and (i, j) not in CorrDict: CorrDict[(i, j)] = data[i].corr(data[j]) pair = list(max(CorrDict)) pair.append(pair[0][:7]) pair.append(pair[1][:7]) dataremain = data[pair] from sklearn import linear_model import numpy as np model = linear_model.LinearRegression() model.fit(dataremain[pair[0]][1:-250].to_numpy().reshape(-1, 1), y=dataremain[pair[1]][1:-250]) beta = model.coef_[0] dataremain['Spread'] = beta * data[pair[0]] - data[pair[1]] Spreadmean = dataremain['Spread'].mean() Spreadstd = dataremain['Spread'].std() dataremain['Z-score'] = (dataremain['Spread'] - Spreadmean) / Spreadstd envs = PairtradingEnv(stock1=dataremain[pair[2]][:-250], stock2=dataremain[pair[3]][:-250]) eval_envs = PairtradingEnv(stock1=dataremain[pair[2]][-250:], stock2=dataremain[pair[3]][-250:]) baseline_config = baselineConfig(mean=Spreadmean, std=Spreadstd, beta=beta) baseline_trainer = baseline(env=envs, config=baseline_config) baseline_eval = baseline(env=eval_envs, config=baseline_config) test = env_name == "CartPole-v0" frame_stack = args.input_length if not test else 1 # Setup trainer if algo == "PPO": trainer = PPOTrainer(envs, config, frame_stack, _test=test) else: trainer = A2CTrainer(envs, config, frame_stack, _test=test) # Create a placeholder tensor to help stack frames in 2nd dimension # That is turn the observation from shape [num_envs, 1, 84, 84] to # [num_envs, 4, 84, 84]. frame_stack_tensor = FrameStackTensor( num_envs, envs.observation_space.shape, frame_stack, config.device) # envs.observation_space.shape: 1,42,42 # Setup some stats helpers episode_rewards = np.zeros([num_envs, 1], dtype=np.float) total_episodes = total_steps = iteration = 0 reward_recorder = deque(maxlen=100) episode_length_recorder = deque(maxlen=100) episode_values = deque(maxlen=100) sample_timer = Timer() process_timer = Timer() update_timer = Timer() total_timer = Timer() progress = [] evaluate_stat = {} # Start training print("Start training!") while True: # Break when total_steps exceeds maximum value # ===== Sample Data ===== # episode_values = [] episode_rewards = np.zeros([num_envs, 1], dtype=np.float) for env_id in range(num_envs): obs = envs.reset() # obs.shape: 15,1,42,42 frame_stack_tensor.update(obs, env_id) trainer.rollouts.observations[0, env_id].copy_( frame_stack_tensor.get(env_id) ) #trainer.rollouts.observations.shape: torch.Size([201, 15, 4, 42, 42]) with sample_timer: for index in range(config.num_steps): # Get action # [TODO] Get the action # Hint: # 1. Remember to disable gradient computing # 2. trainer.rollouts is a storage containing all data # 3. What observation is needed for trainer.compute_action? with torch.no_grad(): values, actions_cash, action_log_prob_cash, actions_beta, action_log_prob_beta = trainer.compute_action( trainer.rollouts.observations[index, env_id]) act = baseline_trainer.compute_action( actions_cash.view(-1), actions_beta.view(-1)) cpu_actions = act # Step the environment # (Check step_envs function, you need to implement it) obs, reward, done, masks, total_episodes, \ total_steps, episode_rewards, episode_values = step_envs( cpu_actions, envs, env_id, episode_rewards, episode_values, frame_stack_tensor, reward_recorder, episode_length_recorder, total_steps, total_episodes, config.device, test) rewards = torch.from_numpy( np.array(reward).astype(np.float32)).view(-1).to( config.device) # Store samples trainer.rollouts.insert(frame_stack_tensor.get(env_id), actions_cash.view(-1), action_log_prob_cash.view(-1), actions_beta.view(-1), action_log_prob_beta.view(-1), values.view(-1), rewards, masks.view(-1), env_id) # ===== Process Samples ===== with process_timer: with torch.no_grad(): next_value = trainer.compute_values( trainer.rollouts.observations[-1]) trainer.rollouts.compute_returns(next_value, config.GAMMA) # ===== Update Policy ===== with update_timer: policy_loss, value_loss, dist_entropy, total_loss = \ trainer.update(trainer.rollouts) trainer.rollouts.after_update() # Add training statistics to tensorboard log file writer.add_scalar('train_policy_loss', policy_loss, iteration) writer.add_scalar('train_value_loss', value_loss, iteration) writer.add_scalar('train_dist_entropy', dist_entropy, iteration) writer.add_scalar('train_total_loss', total_loss, iteration) writer.add_scalar('train_episode_rewards', np.mean(episode_rewards), iteration) writer.add_scalar('train_episode_values', np.array(episode_values).mean(), iteration) # ===== Evaluate Current Policy ===== if iteration % config.eval_freq == 0: eval_timer = Timer() evaluate_rewards, evaluate_lengths, evaluate_values = evaluate( trainer, eval_envs, baseline_eval, frame_stack, 5) evaluate_stat = summary(evaluate_rewards, "episode_reward") if evaluate_lengths: evaluate_stat.update( summary(evaluate_lengths, "episode_length")) evaluate_stat.update( dict(win_rate=float( sum(np.array(evaluate_rewards) >= 0) / len(evaluate_rewards)), evaluate_time=eval_timer.now, evaluate_iteration=iteration, evaluate_values=float(np.array(evaluate_values).mean()))) # Add evaluation statistics to tensorboard log file writer.add_scalar('eval_episode_rewards', np.array(evaluate_rewards).mean(), iteration // config.eval_freq) writer.add_scalar('eval_episode_values', np.array(evaluate_values).mean(), iteration // config.eval_freq) # ===== Log information ===== if iteration % config.log_freq == 0: stats = dict( log_dir=log_dir, frame_per_second=int(total_steps / total_timer.now), training_episode_reward=summary(reward_recorder, "episode_reward"), training_episode_values=summary(episode_values, "episode_value"), training_episode_length=summary(episode_length_recorder, "episode_length"), evaluate_stats=evaluate_stat, learning_stats=dict(policy_loss=policy_loss, entropy=dist_entropy, value_loss=value_loss, total_loss=total_loss), total_steps=total_steps, total_episodes=total_episodes, time_stats=dict(sample_time=sample_timer.avg, process_time=process_timer.avg, update_time=update_timer.avg, total_time=total_timer.now, episode_time=sample_timer.avg + process_timer.avg + update_timer.avg), iteration=iteration) progress.append(stats) pretty_print({ "===== {} Training Iteration {} =====".format(algo, iteration): stats }) if iteration % config.save_freq == 0: trainer_path = trainer.save_w(log_dir, "iter{}".format(iteration)) progress_path = save_progress(log_dir, progress) print( "Saved trainer state at <{}>. Saved progress at <{}>.".format( trainer_path, progress_path)) if iteration >= args.max_steps: break iteration += 1 trainer.save_w(log_dir, "final") envs.close()
# set dependencies for BN ops update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss[0], var_list=update_vars, global_step=global_step) sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) saver_to_restore.restore(sess, "./checkpoint/yolov3.ckpt") saver = tf.train.Saver(max_to_keep=2) for step in range(STEPS): run_items = sess.run([train_op, write_op, y_pred, y_true] + loss, feed_dict={is_training:True}) if (step+1) % EVAL_INTERNAL == 0: train_rec_value, train_prec_value = utils.evaluate(run_items[2], run_items[3]) writer_train.add_summary(run_items[1], global_step=step) writer_train.flush() # Flushes the event file to disk if (step+1) % SAVE_INTERNAL == 0: saver.save(sess, save_path="./checkpoint/yolov3_"+dataname+".ckpt", global_step=step+1) print("=> STEP %10d [TRAIN]:\tloss_xy:%7.4f \tloss_wh:%7.4f \tloss_conf:%7.4f \tloss_class:%7.4f" %(step+1, run_items[5], run_items[6], run_items[7], run_items[8])) run_items = sess.run([write_op, y_pred, y_true] + loss, feed_dict={is_training:False}) if (step+1) % EVAL_INTERNAL == 0: test_rec_value, test_prec_value = utils.evaluate(run_items[1], run_items[2]) print("\n=======================> evaluation result <================================\n") print("=> STEP %10d [TRAIN]:\trecall:%7.4f \tprecision:%7.4f" %(step+1, train_rec_value, train_prec_value))
def do_online_qlearning(env, test_env, model, params, learning_rate, epsilon_s, gpu_device, target_model=None, replay_buffer=None, dpaths=None, training=True): tf.reset_default_graph() with tf.device(gpu_device): # Create placeholders states_pl = tf.placeholder(tf.float32, shape=(None, FRAME_WIDTH, FRAME_HEIGHT, FRAME_BUFFER_SIZE), name='states') actions_pl = tf.placeholder(tf.int32, shape=(None), name='actions') targets_pl = tf.placeholder(tf.float32, shape=(None), name='targets') # Value function approximator network q_output = model.graph(states_pl) # Build target network q_target_net = target_model.graph(states_pl) tf_train = tf.trainable_variables() num_tf_train = len(tf_train) target_net_vars = [] for i, var in enumerate(tf_train[0:num_tf_train // 2]): target_net_vars.append(tf_train[i + num_tf_train // 2].assign( var.value())) # Compute Q from current q_output and one hot actions # Q = tf.reduce_sum( # tf.multiply(q_output, actions_pl), axis=1) Q = tf.reduce_sum(tf.multiply( q_output, tf.one_hot(actions_pl, env.action_space.n, dtype=tf.float32)), axis=1) # Loss operation loss_op = tf.reduce_mean(tf.square(targets_pl - Q) / 2) # Optimizer Op #optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # Training Op train_op = optimizer.minimize(loss_op) # Prediction Op prediction = tf.argmax(q_output, 1) # Model Saver saver = tf.train.Saver() # init all variables init_op = tf.global_variables_initializer() # Limit memory usage for multiple training at same time config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.49 #config.gpu_options.allow_growth = True # Start Session with tf.Session(config=config) as sess: sess.run(init_op) sess.graph.finalize() # Performance from untrained Q-learning if not training: return evaluate(test_env, sess, prediction, states_pl, params['EVAL_EPISODES'], GAMMA, False, False) start_time = time.time() # Load env and get observations observation = env.reset() # Observation Buffer observation_buffer = list() # Save results losses = list() means = list() stds = list() #init epsilon epsilon = epsilon_s['start'] for step in range(params['TRAINING_STEPS']): loss = 0 # Stack observations in buffer of 4 if len(observation_buffer) < FRAME_BUFFER_SIZE: observation_buffer.append(observation) # Collect next observation with uniformly random action a_rnd = env.action_space.sample() observation, _, done, _ = env.step(a_rnd) # Observations buffer is ready else: # Stack observation buffer state = np.stack(observation_buffer, axis=-1) # Epsilon greedy policy if epsilon > np.random.rand(1): # Exploration # Use uniformly sampled action from env action = np.array(env.action_space.sample(), dtype=np.float32).reshape((-1)) else: # Exploitation # Use model predicted action action = sess.run(prediction, feed_dict={ states_pl: state.reshape([ -1, FRAME_WIDTH, FRAME_HEIGHT, FRAME_BUFFER_SIZE ]).astype('float32') }) # action for next observation action = action.reshape([-1]).astype('int32') observation, reward, done, info = env.step(action[0]) # Clip reward r = reward_clip(reward) # Update observation buffer observation_buffer.append(observation) observation_buffer[0:1] = [] next_state = np.stack(observation_buffer, axis=-1) # Add transition to replay buffer # Store state as uint8 for memory optim replay_buffer.add(((state * 255).astype('uint8'), action, r, (next_state * 255).astype('uint8'), done)) # If replay buffer is ready to be sampled if replay_buffer.ready: # Train model on replay buffer b_states, b_actions, b_reward, b_next_state, b_term_state = replay_buffer.next_transitions( ) q_out, q_out_target = sess.run( [q_output, q_target_net], feed_dict={states_pl: b_next_state}) #q_out_max = np.amax(q_out, axis=1) #q_target = b_reward + GAMMA * (1 - b_term_state) * q_out_max q_out_argmax = np.unravel_index(np.argmax(q_out, axis=1), q_out.shape) q_target = b_reward + GAMMA * ( 1 - b_term_state) * q_out_target[q_out_argmax] # Run training Op on batch of replay experience loss, _ = sess.run( [loss_op, train_op], feed_dict={ states_pl: b_states, actions_pl: b_actions, targets_pl: q_target.astype('float32') }) if done: observation = env.reset() # Update epsilon greedy policy if epsilon > epsilon_s['end']: epsilon -= (epsilon_s['start'] - epsilon_s['end']) / epsilon_s['decay'] # Copy variables target network if (step + 1) % params['TARGET_UPDATE'] == 0: for var in target_net_vars: sess.run(var) if step % params['LOSS_STEPS'] == 0: # Save loss losses.append(loss) if step % params['LOG_STEPS'] == 0: print('\n', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())) print('Episodes %d -> %d Done (%.3fs) ... ' % (max(1, step + 1 - params['LOG_STEPS']), step + 1, time.time() - start_time)) print('- Training loss: %.4f' % loss) start_time = time.time() # Force flush for nohup sys.stdout.flush() if 'EVAL_STEPS_START' in params: if (step % params['EVAL_STEPS_START'] == 0 and step <= params['EVAL_STEPS_STOP'] and step != 0): silent = (step % params['LOG_STEPS'] != 0) cur_means, cur_stds = evaluate( test_env, sess, prediction, #states_pl, 1, GAMMA, silent) states_pl, params['EVAL_EPISODES'], GAMMA, silent) means.append(cur_means) stds.append(cur_stds) if step % params['EVAL_STEPS'] == 0: silent = (step % params['LOG_STEPS'] != 0) cur_means, cur_stds = evaluate( test_env, sess, prediction, #states_pl, 1, GAMMA, silent) states_pl, params['EVAL_EPISODES'], GAMMA, silent) # Save means means.append(cur_means) stds.append(cur_stds) # Save models if dpaths is not None and step % params['SAVE_STEPS'] == 0: saver.save(sess, dpaths, global_step=step) # Save models if dpaths is not None: saver.save(sess, dpaths) # Return Q-learning Experience results return losses, means, stds