def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=-1) num_mini_batch = ppo_info["num_mini_batch"] update_repeats = ppo_info["update_repeats"] # fmt: off return cls._training_pipeline( named_losses={ "offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss( total_episodes_in_epoch=int(1e6)), }, pipeline_stages=[ # Single stage, only with off-policy training PipelineStage( loss_names=[], # no on-policy losses max_stage_steps= total_train_steps, # keep sampling episodes in the stage # Enable off-policy training: offpolicy_component=OffPolicyPipelineComponent( # Pass a method to instantiate data iterators data_iterator_builder=lambda **extra_kwargs: create_minigrid_offpolicy_data_iterator( path=os.path.join( BABYAI_EXPERT_TRAJECTORIES_DIR, "BabyAI-GoToLocal-v0{}.pkl". format("" if torch.cuda.is_available() else "-small"), ), nrollouts=cls.NUM_TRAIN_SAMPLERS // num_mini_batch, # per trainer batch size rollout_len=cls.ROLLOUT_STEPS, instr_len=cls.INSTR_LEN, **extra_kwargs, ), loss_names=["offpolicy_expert_ce_loss" ], # off-policy losses updates=num_mini_batch * update_repeats, # number of batches per rollout ), ), ], # As we don't have any on-policy losses, we set the next # two values to zero to ensure we don't attempt to # compute gradients for on-policy rollouts: num_mini_batch=0, update_repeats=0, total_train_steps=total_train_steps, )
def training_pipeline(cls, **kwargs): total_train_steps = cls.TOTAL_IL_TRAIN_STEPS ppo_info = cls.rl_loss_default("ppo", steps=-1) num_mini_batch = ppo_info["num_mini_batch"] update_repeats = ppo_info["update_repeats"] return cls._training_pipeline( named_losses={ "offpolicy_expert_ce_loss": MiniGridOffPolicyExpertCELoss( total_episodes_in_epoch=int(1e6) // len(cls.machine_params("train")["gpu_ids"])), }, pipeline_stages=[ PipelineStage( loss_names=[], max_stage_steps=total_train_steps, offpolicy_component=OffPolicyPipelineComponent( data_iterator_builder=lambda **kwargs: create_minigrid_offpolicy_data_iterator( path=os.path.join( BABYAI_EXPERT_TRAJECTORIES_DIR, "BabyAI-GoToLocal-v0{}.pkl". format("" if torch.cuda.is_available() else "-small"), ), nrollouts=cls.NUM_TRAIN_SAMPLERS // num_mini_batch, rollout_len=cls.ROLLOUT_STEPS, instr_len=cls.INSTR_LEN, **kwargs, ), data_iterator_kwargs_generator=cls. expert_ce_loss_kwargs_generator, loss_names=["offpolicy_expert_ce_loss"], updates=num_mini_batch * update_repeats, ), ), ], num_mini_batch=0, update_repeats=0, total_train_steps=total_train_steps, )