target_net.load_state_dict(policy_net.state_dict())
    # create optimizer
    #opt = optim.RMSprop(policy_net.parameters(),
    #                    lr=info["RMS_LEARNING_RATE"],
    #                    momentum=info["RMS_MOMENTUM"],
    #                    eps=info["RMS_EPSILON"],
    #                    centered=info["RMS_CENTERED"],
    #                    alpha=info["RMS_DECAY"])
    opt = optim.Adam(policy_net.parameters(), lr=info['ADAM_LEARNING_RATE'])

    if args.model_loadpath is not '':
        # what about random states - they will be wrong now???
        # TODO - what about target net update cnt
        target_net.load_state_dict(model_dict['target_net_state_dict'])
        policy_net.load_state_dict(model_dict['policy_net_state_dict'])
        opt.load_state_dict(model_dict['optimizer'])
        print("loaded model state_dicts")
        if args.buffer_loadpath == '':
            args.buffer_loadpath = args.model_loadpath.replace(
                '.pkl', '_train_buffer.npz')
            print("auto loading buffer from:%s" % args.buffer_loadpath)
            try:
                replay_memory.load_buffer(args.buffer_loadpath)
            except Exception as e:
                print(e)
                print(
                    'not able to load from buffer: %s. exit() to continue with empty buffer'
                    % args.buffer_loadpath)

    train_sim(start_step_number, start_last_save)
示例#2
0
    print("using randomized prior")
    policy_net = NetWithPrior(policy_net, prior_net, info['PRIOR_SCALE'])
    target_net = NetWithPrior(target_net, prior_net, info['PRIOR_SCALE'])
target_net.load_state_dict(policy_net.state_dict())
opt = optim.Adam(policy_net.parameters(), lr=info['ADAM_LEARNING_RATE'])
if load_model:
    # what about random states - they will be wrong now???
    # TODO - what about target net update cnt (TODO from johana)
    target_net.load_state_dict(model_dict['target_net_state_dict'])
    policy_net.load_state_dict(model_dict['policy_net_state_dict'])
    opt.load_state_dict(model_dict['optimizer'])
    print("loaded model state_dicts")
    buffer_loadpath = info['model_loadpath'].replace('.pkl', '_train_buffer.npz')
    print("auto loading buffer from:%s" %buffer_loadpath)
    try:
        replay_memory.load_buffer(buffer_loadpath)
    except Exception as e:
        print(e)
        print('not able to load from buffer: %s. exit() to continue with empty buffer' %buffer_loadpath)
advice_net=None
if info['advice_flg']:
    print('loading advice model from: %s' %info['advicemodel_loadpath'])
    model_dict = torch.load(info['advicemodel_loadpath'])
    advice_net = EnsembleNet(n_ensemble=info['N_ENSEMBLE'],
                             n_actions=env.num_actions,
                             network_output_size=info['NETWORK_INPUT_SIZE'][0],
                             num_channels=info['HISTORY_SIZE'], dueling=info['DUELING']).to(info['DEVICE'])
    if info['PRIOR']:
        advice_net = NetWithPrior(advice_net, prior_net, info['PRIOR_SCALE'])
action_getter = ActionGetter(n_actions=env.num_actions,
                             policy_net=policy_net,