Exemplo n.º 1
0
                c.LR: 1e-3,
            },
        },
        c.ALPHA: {
            c.OPTIMIZER: torch.optim.Adam,
            c.KWARGS: {
                c.LR: 1e-3,
            },
        },
    },

    # SAC
    c.ACCUM_NUM_GRAD: 1,
    c.BATCH_SIZE: 256,
    c.BUFFER_WARMUP: 1000,
    c.EVALUATION_PREPROCESSING: gt.Identity(),
    c.GAMMA: 0.99,
    c.LEARN_ALPHA: True,
    c.MAX_GRAD_NORM: 10,
    c.NUM_GRADIENT_UPDATES: 1,
    c.NUM_PREFETCH: 1,
    c.REWARD_SCALING: 1.,
    c.STEPS_BETWEEN_UPDATE: 1,
    c.TARGET_ENTROPY: -3.,
    c.TARGET_UPDATE_INTERVAL: 1,
    c.TAU: 0.005,
    c.TRAIN_PREPROCESSING: gt.Identity(),
    c.UPDATE_NUM: 0,

    # Progress Tracking
    c.CUM_EPISODE_LENGTHS: [0],
Exemplo n.º 2
0
            c.MODEL_ARCHITECTURE: FixedScheduler,
            c.KWARGS: {
                c.INTENTION_I: 0,
                c.NUM_TASKS: num_tasks,
            },
            c.SCHEDULER_PERIOD: c.MAX_INT,
        },
    },

    # DrQ
    c.K:
    2,
    c.M:
    2,
    c.EVALUATION_PREPROCESSING:
    gt.Identity(),
    c.TRAIN_PREPROCESSING:
    gt.Identity(),

    # SAC
    c.ACCUM_NUM_GRAD:
    1,
    c.BATCH_SIZE:
    256,
    c.BUFFER_WARMUP:
    1000,
    c.GAMMA:
    0.99,
    c.INITIAL_ALPHA:
    1.,
    c.LEARN_ALPHA: