c.LR: 1e-3, }, }, c.ALPHA: { c.OPTIMIZER: torch.optim.Adam, c.KWARGS: { c.LR: 1e-3, }, }, }, # SAC c.ACCUM_NUM_GRAD: 1, c.BATCH_SIZE: 256, c.BUFFER_WARMUP: 1000, c.EVALUATION_PREPROCESSING: gt.Identity(), c.GAMMA: 0.99, c.LEARN_ALPHA: True, c.MAX_GRAD_NORM: 10, c.NUM_GRADIENT_UPDATES: 1, c.NUM_PREFETCH: 1, c.REWARD_SCALING: 1., c.STEPS_BETWEEN_UPDATE: 1, c.TARGET_ENTROPY: -3., c.TARGET_UPDATE_INTERVAL: 1, c.TAU: 0.005, c.TRAIN_PREPROCESSING: gt.Identity(), c.UPDATE_NUM: 0, # Progress Tracking c.CUM_EPISODE_LENGTHS: [0],
c.MODEL_ARCHITECTURE: FixedScheduler, c.KWARGS: { c.INTENTION_I: 0, c.NUM_TASKS: num_tasks, }, c.SCHEDULER_PERIOD: c.MAX_INT, }, }, # DrQ c.K: 2, c.M: 2, c.EVALUATION_PREPROCESSING: gt.Identity(), c.TRAIN_PREPROCESSING: gt.Identity(), # SAC c.ACCUM_NUM_GRAD: 1, c.BATCH_SIZE: 256, c.BUFFER_WARMUP: 1000, c.GAMMA: 0.99, c.INITIAL_ALPHA: 1., c.LEARN_ALPHA: