l1, b1, l2, b2 = model.get_gen_weights()
        weight_size = np.sum(
            list(l1.flatten().shape) + list(b1.flatten().shape) +
            list(l2.flatten().shape) + list(b2.flatten().shape))

    dis = models.DiscriminatorZ(args, obs_size, n_actions)
    dis = dis.to(device)

    # ms-CEM policy (refers to the guiding CEM policy)
    dim_theta = (obs_size + 1) * n_actions
    # Initialize mean and standard deviation
    theta_mean = np.zeros(dim_theta)
    theta_std = np.ones(dim_theta)
    theta = np.random.multivariate_normal(theta_mean, np.diag(theta_std**2))
    if LAYER == 2:  # 2-layer network
        msCEM_model = models.NetW_2Layer(args, obs_size, n_actions)

    if LAYER == 3:  # 3-layer network
        msCEM_model = models.NetW_3Layer(args, obs_size, n_actions)
    msCEM_model = msCEM_model.to(device)

    criterionGen = nn.BCELoss()
    criterionDis = nn.BCELoss()
    criterionMSE = nn.MSELoss()
    print(model)
    print(dis)

    print("Observation Size: {} \t Action Size: {}".format(
        obs_size, n_actions))
    print("Model_param: {}".format(utils.count_parameters(model)))
    print("Encoder_param: {}".format(utils.count_parameters(model.encoder)))
示例#2
0
    device = torch.device("cuda:" + DEVICE if torch.cuda.is_available() else "cpu")
    if not use_cuda:
        device = torch.device("cpu")
    print(device)

    dim_theta = (obs_size + 1) * n_actions

    # Initialize mean and standard deviation
    theta_mean = np.zeros(dim_theta)
    theta_std = np.ones(dim_theta)

    # main policy network
    theta = np.random.multivariate_normal(theta_mean, np.diag(theta_std ** 2))
    if LAYER == 2:
        model = models.NetW_2Layer(args, obs_size, n_actions)  # 2-layer
    if LAYER == 3:
        model = models.NetW_3Layer(args, obs_size, n_actions)  # 3-layer
    model = model.to(device)

    print("Observation Size: {} \t Action Size: {}".format(obs_size, n_actions))

    start_time = time.time()

    # define agent start state
    agent_start_list = [(INI_JOING_ANGLES, [369,430])]

    e_batch_buffer = utils.elite_batch_bufferW_msCEM(25)  # 10
    global_eList = []

    total_test_reward_accum = 0