def main(): from config import n # NOTE: Plot Config if not os.path.exists('results'): os.mkdir('results') if os.path.exists(os.path.join('results', n(c, seed))): print("DELETING...") shutil.rmtree(os.path.join('results', n(c, seed)), ignore_errors=True) for name in os.listdir('results'): name_dir = os.path.join('results', name) if os.path.isdir(name_dir): events_exist = False for number in os.listdir(name_dir): if os.path.isdir(os.path.join(name_dir, number)): for f in os.listdir(os.path.join(name_dir, number)): if 'events' in f: events_exist = True if not events_exist: shutil.rmtree(name_dir) os.mkdir(os.path.join('results', n(c, seed))) shutil.copyfile('./config.py', os.path.join('results', n(c, seed), 'cfg.py')) # NOTE: ALGORITHM result, msg = run(np.random.randint(100)) return 'Job Succeeded!', msg
def __init__(self, id=0, eval=False, dirname=None, real=False): self.pi_replay_buffer = None self.id = id self.tensorboard_keys = [] self.dir_name = n() if self.is_master() else dirname print('self.dir_name', self.dir_name) self.fd = {} self.t = 0 self.env = None self.actors = None self.effective_timesteps_so_far = 0 self.real_timesteps_so_far = 0 self.eval = eval self.real = real self.grasp_scale_obv_2d = None self.max_train_success_perc = 0 if self.is_master() and not self.eval: self.delete_prev_directory() self.custom_init_op() self.summary_phs = {} for tb_key in self.tensorboard_keys: key = tb_key.split('/')[-1] if tb_key.endswith('_tr'): tf.summary.scalar(tb_key[:-3], getattr(self, key)) else: self.summary_phs[tb_key] = tf.placeholder(tf.float64, shape=(), name=tb_key) tf.summary.scalar(tb_key, self.summary_phs[tb_key]) self.summary_op = tf.summary.merge_all() self.file_writer = tf.summary.FileWriter( logdir=os.path.join('results', self.dir_name), graph=tf.get_default_graph()) else: self.custom_init_op(host=g.hosts[self.id - 1], port=g.ports[self.id - 1]) self.sess = tf.Session() self.init_op = tf.global_variables_initializer() if len( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=g.grasp_cnn)) > 0: if self.eval: self.grasp_saver = tf.train.Saver(var_list=[ var for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=g.grasp_cnn) if 'finger_4' not in var.name and 'log_std' not in var.name ]) else: self.grasp_saver = tf.train.Saver(var_list=tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=g.grasp_cnn)) self.saver = tf.train.Saver() self.sess.graph.finalize() if self.is_master(): self.eval_op() if self.eval else self.master()
def run(run_id): starting_iter = 0 task_id = c.task_id utils.modify_xml(task_id, seed) algo = algo_class(run_id, True, c.restore_model, task_id, seed) ray.init(num_cpus=c.num_cores, redirect_output=True) algo_threads = ray.remote(algo_class) algo.actors = [ algo_threads.remote(_ + np.random.randint(100) + run_id, False, False, task_id, seed) for _ in range(c.num_cores) ] algo.save_model_op(first=True) print('initial eval reward', algo.evaluate_avg_r()) for iter in range(c.num_batches * c.num_tasks): start = time.time() algo.env = gym.make('%s%s' % (c.task_id, c.env_name)) # NOTE: GET EXPERIENCE algo.save_model_op() ray.get( [ac.restore_model_op.remote(algo.run_id) for ac in algo.actors]) ret = list( zip(*ray.get([ actor.gen_experiences_op.remote() for actor in algo.actors[:c.num_cpus] ]))) experience = list(itertools.chain(*ret[0])) num_traj = sum(ret[1]) cum_r = sum(ret[2]) num_succ = sum(ret[3]) # NOTE: experience is the list of tuples in one rollout algo.avg_train_rewards = cum_r / (num_traj + c.e) algo.num_traj_per_cpu = num_traj / c.num_cpus algo.suc_train = num_succ / (num_traj + c.e) algo.num_timesteps = len(experience) algo.update_learning_rates_op(iter) algo.suc_test = algo.suc_train algo.eval_rewards = algo.avg_train_rewards algo.suc_test_std = np.std(ret[3]) if c.gpu: with tf.device('/gpu:0'): algo.learn(experience) else: algo.learn(experience) # NOTE: LIST algo.suc_perc_train_list.append(algo.suc_train) algo.eval_r_list.append(algo.eval_rewards) algo.suc_perc_test_list.append(algo.suc_test) # NOTE: MOV AVG algo.suc_perc_train_list = algo.suc_perc_train_list[-c.mov_avg:] algo.eval_r_list = algo.eval_r_list[-c.mov_avg:] algo.suc_perc_test_list = algo.suc_perc_test_list[-c.mov_avg:] algo.mov_suc_train = np.mean(algo.suc_perc_train_list) algo.moving_eval_rewards = np.mean(algo.eval_r_list) algo.mov_suc_test = np.mean(algo.suc_perc_test_list) algo.max_eval_suc_mov = max(algo.mov_suc_test, algo.max_eval_suc_mov) algo.seconds = time.time() - start print((iter, '%s S' % algo.seconds, algo.eval_rewards, algo.avg_train_rewards, task_id)) print(n(c, seed)) if algo.mov_suc_test > algo.max_reward + 0.03: algo.save_best_model_op() algo.max_reward = algo.mov_suc_test # NOTE: LIFELONG LEARNING if algo.mov_suc_test >= c.solved_threshold or algo.timesteps_so_far > \ c.total_ts: task_id += 1 algo.save_best_model_op() if task_id == c.num_tasks: algo.timesteps_used_list.append(algo.timesteps_so_far / 1e6) algo.timesteps_used_list.append(sum(algo.timesteps_used_list)) algo.fd[algo.timesteps_used_text_ph] = str( algo.timesteps_used_list) algo.fd[algo.ts_string_ph] = ' & '.join( [str(round(num, 1)) for num in algo.timesteps_used_list]) algo.summary() break algo.suc_perc_test_list = [] algo.suc_perc_train_list = [] algo.eval_r_list = [] utils.modify_xml(task_id, seed) algo.reinit(task_id=task_id) ray.get([actor.reinit.remote(task_id) for actor in algo.actors]) algo.iteration_solved = iter - starting_iter starting_iter = iter algo.memory = 0 algo.summary() return utils.pickle_compatible(algo), algo.fd[algo.ts_string_ph] or 'None'
algo.suc_perc_train_list = [] algo.eval_r_list = [] utils.modify_xml(task_id, seed) algo.reinit(task_id=task_id) ray.get([actor.reinit.remote(task_id) for actor in algo.actors]) algo.iteration_solved = iter - starting_iter starting_iter = iter algo.memory = 0 algo.summary() return utils.pickle_compatible(algo), algo.fd[algo.ts_string_ph] or 'None' if __name__ == "__main__": from test_env import seed print('FINAL SEED', seed) tf.set_random_seed(seed=seed) np.random.seed(seed=seed) module = __import__("algorithms.%s" % c.algo, fromlist=[c.algo]) algo_class = getattr(module, c.algo) try: subject, msg = main() content = n(c, seed) + '\n' + msg except: subject = 'Job Failed!' content = '%s\n%s' % (n(c, seed), traceback.format_exc()) print(content)
algo_class = getattr(module, d.config.algo) algo_class(eval=True) with open('results.json', 'r') as f: j = json.load(f) subject = '%s: Job Succeeded! %s %s %s %s' % ( g.ip, g.finger_closing_only, g.regrasp_only, g.pos_adjustment_only, g.ori_adjustment_only) content = str([ 'test_single_seen_ret', 'test_single_novel_ret', 'test_multi_seen_ret, test_multi_novel_ret' ]) + '\n' content += str(j['mean']) + '\n' content += str(j['std']) + '\n' + g.path except: subject = '%s: Job Failed!' % g.ip content = '%s\n%s' % (n(), traceback.format_exc()) print(content) with open('credential.json', mode='r') as f: login = json.load(f) success = False while not success: time.sleep(1) try: if g.statistics: print('Trying to send email') yag = yagmail.SMTP(login['from'], login['password']) yag.send(login["to"], subject=subject, contents=content) print("Email sent") success = True
else: seed = 10 print('NOISE: %s %s' % (c.Tnoise, c.Fnoise)) print('SOLVED: %s' % c.solved_threshold) print('ABOVE TARGET: %s' % c.above_target) print('RUN: %s' % c.xml_name) print('pg_sub_lr %s' % c.pg_sub_lr) print('pg_master_ce_lr_source %s' % c.pg_master_ce_lr_source) print('REPEAT %s' % c.repeat) print('update_master_interval %s' % c.update_master_interval) print('reset %s' % c.reset) # NOTE: MASTER with open(seed_json, 'w') as f: json.dump({'seed': seed}, f) print('MASTER SEED', seed, 'can leave now...') print(n(c, seed)) env_id = c.env_name.replace('-', '_') for i in range(c.num_tasks): id = '%s%s' % (i, c.env_name) if c.env_type == 'mujoco': register(id=id, entry_point='test_env.envs.%s:%s' % (env_id, env_id), max_episode_steps=c.max_num_timesteps, reward_threshold=60000.0, kwargs={ 'paths': c.paths[i], 'seed': seed }) else: domain, task, _ = c.xml_name.split('-')