def main(unused_argv): # Configure which GPU to use. cfg = tf.config.experimental gpus = cfg.list_physical_devices('GPU') if not gpus: print('No GPUs detected. Running with CPU.') else: cfg.set_visible_devices(gpus[FLAGS.gpu], 'GPU') # Configure how much GPU to use (in Gigabytes). if FLAGS.gpu_limit is not None: mem_limit = 1024 * FLAGS.gpu_limit dev_cfg = [cfg.VirtualDeviceConfiguration(memory_limit=mem_limit)] cfg.set_virtual_device_configuration(gpus[0], dev_cfg) # Load train and test datasets. train_dataset = Dataset(os.path.join(FLAGS.data_dir, f'{FLAGS.task}-train')) test_dataset = Dataset(os.path.join(FLAGS.data_dir, f'{FLAGS.task}-test')) # Run training from scratch multiple times. for train_run in range(FLAGS.n_runs): name = f'{FLAGS.task}-{FLAGS.agent}-{FLAGS.n_demos}-{train_run}' # Set up tensorboard logger. curr_time = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') log_dir = os.path.join(FLAGS.train_dir, 'logs', FLAGS.agent, FLAGS.task, curr_time, 'train') writer = tf.summary.create_file_writer(log_dir) # Initialize agent. np.random.seed(train_run) tf.random.set_seed(train_run) agent = agents.names[FLAGS.agent](name, FLAGS.task, FLAGS.train_dir) # Limit random sampling during training to a fixed dataset. max_demos = train_dataset.n_episodes episodes = np.random.choice(range(max_demos), FLAGS.n_demos, False) train_dataset.set(episodes) if FLAGS.continue_logging: agent.load(FLAGS.log_ckpt) agent.total_steps = FLAGS.log_ckpt + 1 # Train agent and save snapshots. while agent.total_steps < FLAGS.n_steps: for _ in range(FLAGS.interval): agent.train(train_dataset, writer) agent.validate(test_dataset, writer) agent.save()
def main(): train_dataset = Dataset(os.path.join(dataset_root, 'ravens_demo-1623923819265029807'), use_aux=True) max_demos = train_dataset.n_episodes episodes = np.random.choice(range(max_demos), max_demos, False) print(episodes) train_dataset.set(episodes) agent = BC_Agent() if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() #agent.train_model(train_dataset) agent.load_whole_dataset(train_dataset, n_max_demos=1000) agent.aux_pretraining(train_dataset, load_model='pretrain_model_600_epoch', batch_size=100) #agent.train_model_with_pretrained_cnn(train_dataset) #agent.train_full_model(train_dataset) #cmap, _, _, _ = agent.get_sample(train_dataset) plt.ion() img_draw = plt.imshow(cmap, interpolation='nearest') while True: cmap, hmap, pose, grasp = agent.get_sample(train_dataset) img_draw.set_data(cmap) if grasp == 1: plt.pause(5) else: plt.pause(0.0001) print('pose:{}, grasp:{}'.format(pose, grasp)) time.sleep(1)
def main(): rospy.init_node('raven_vive_teleop') rate = rospy.Rate(60.0) train_dataset = Dataset(os.path.join(dataset_root, 'ravens_demo-1623923819265029807'), use_aux=True) max_demos = train_dataset.n_episodes episodes = np.random.choice(range(max_demos), max_demos, False) print(episodes) train_dataset.set(episodes) env = Environment(assets_root, disp=True, hz=60) task = tasks.names[task_name]() task.mode = mode env.set_task(task) obs = env.reset() info = None seed = 0 agent = bc_softmax_aux.BC_Agent() #agent = bc_softmax2d_aux.BC_Agent() agent.load_pretrained_model(model_save_rootdir + "pretrain_model_4400_epoch") agent.act(obs) episode_steps = 0 n_episode = 1 aux = { 'ee_pose': ((0, 0, 0), (0, 0, 0, 1)), 'obj_pose': ((0, 0, 0), (0, 0, 0, 1)) } plt.ion() im_color = obs['color'][0] img_draw = plt.imshow(im_color, interpolation='nearest', cmap='gray') while not rospy.is_shutdown(): (obs, act, _, _, aux_dataset), _ = train_dataset.sample() im_color = obs['color'][0] img_draw.set_data(im_color) plt.pause(0.00000001) action, predicted_aux = agent.act(obs, ret_aux=True) #if action != None: # print(action) #obj_pose = predicted_aux['predicted_obj_pose'] obj_pose = predicted_aux['predicted_ee_position'] #obj_pose = aux['ee_pose'] marker_head_point = [obj_pose[0], obj_pose[1], obj_pose[2]] marker_tail_point = [obj_pose[0], obj_pose[1], obj_pose[2] + 0.01] p.addUserDebugLine(marker_head_point, marker_tail_point, lineColorRGB=[0, 1, 0], lifeTime=3, lineWidth=5) obs, reward, done, info, aux = env.step_simple(action, use_aux=True, stand_still=True) #obs, reward, done, info, aux = env.step_simple(action,use_aux = True) plt.pause(3) print(aux_dataset) print(predicted_aux) auxg = np.array(aux_dataset['ee_pose'][0], dtype=float) auxp = np.array(predicted_aux['predicted_ee_position'], dtype=float) d = np.sum(np.square(auxg - auxp)) / 3 print('mse:%f\n' % d) # im_depth = obs['depth'][0] # img_draw.set_data(im_depth) # plt.pause(0.00000001) # plt.show() # print( im_color.shape) # print(obs['color']) s = "Episode:%d, steps:%d" % (n_episode, episode_steps) print(s) if (done): n_episode += 1 episode_steps = 0 seed += 1 episode = [] else: episode_steps += 1 if episode_steps > 100: episode = [] episode_steps = 0 n_episode += 1 obs = env.reset() rate.sleep()