def worker(input_worker):
    """Explanations"""
    #Global variables:
    global numInput, numOutput, numHidden
    global dim_input_hidden, dim_hidden_output
    global sigma
    global env

    #Local:

    seed = int(input_worker[0])
    p = input_worker[1]

    env.seed(seed)

    initial_positions = [[
        0.99944425, -0.03333454, 0.99606243, -0.08865463, -0.0176822,
        0.05332311
    ], [
        0.99990637, 0.01368383, 0.99981907, 0.01902162, 0.02523626, -0.02548034
    ],
                         [
                             9.99192425e-01, 4.01808159e-02, 9.98170503e-01,
                             6.04619401e-02, 2.84129862e-02, 2.68182438e-04
                         ],
                         [
                             0.99940723, 0.0344266, 0.99632547, 0.08564789,
                             -0.04982971, 0.07554994
                         ],
                         [
                             9.99516909e-01, -3.10797289e-02, 9.99999914e-01,
                             4.15201486e-04, -3.08654909e-02, -4.65136501e-02
                         ]]
    np.random.shuffle(initial_positions)
    initial_observation = initial_positions[0]

    #Neural Networks:
    NN = NeuralNetwork(numInput, numHidden, numOutput)

    NN.wi = p[0]
    NN.wo = p[1]

    #distortions
    epsilon_wo = np.random.multivariate_normal(
        [0 for x in range(dim_hidden_output)],
        np.identity(dim_hidden_output)).reshape((numHidden, numOutput))
    epsilon_wi = np.random.multivariate_normal(
        [0 for x in range(dim_input_hidden)],
        np.identity(dim_input_hidden)).reshape((numInput, numHidden))
    #parameters update
    NN.wo = NN.wo + epsilon_wo * sigma  #remark:we should merge the two, and reshape the matrix
    NN.wi = NN.wi + epsilon_wi * sigma

    #initial_observation=env.reset()

    reward_worker = episodeRoute(NN, env, initial_observation, steps=250)

    return (reward_worker, epsilon_wi, epsilon_wo)
def worker(input_worker):
    """Explanations"""
    #Global variables:
    global numInput, numOutput, numHidden
    global dim_hidden_output, dim_hidden_output
    global sigma
    global env

    #Local:

    seed = int(input_worker[0])
    p = input_worker[1]

    env.seed(seed)
    np.random.seed(seed)

    #Neural Networks:
    NN = NeuralNetwork(numInput, numHidden, numOutput)

    NN.wi = p[0]
    NN.wo = p[1]

    #distortions

    epsilon = np.random.multivariate_normal(
        np.zeros(dim_hidden_output + dim_input_hidden),
        np.identity(dim_hidden_output + dim_input_hidden))
    epsilon_wo = epsilon[0:dim_hidden_output].reshape((numHidden, numOutput))
    epsilon_wi = epsilon[dim_hidden_output:dim_hidden_output +
                         dim_input_hidden + 1].reshape((numInput, numHidden))

    #epsilon_wo = np.random.multivariate_normal([0 for x in range(dim_hidden_output)],np.identity(dim_hidden_output)).reshape((numHidden,numOutput))
    #epsilon_wi = np.random.multivariate_normal([0 for x in range(dim_input_hidden)],np.identity(dim_input_hidden)).reshape((numInput,numHidden))

    #parameters update
    NN.wo = NN.wo + epsilon_wo * sigma  #remark:we should merge the two, and reshape the matrix
    NN.wi = NN.wi + epsilon_wi * sigma

    #initial_observation=env.reset()

    reward_worker = episodeRoute(NN, env, initial_observation, steps=250)

    return (reward_worker, epsilon_wi, epsilon_wo)
params_GS_1=load_obj('params_GS_semi')
params_GS_2=load_obj('params_GS_semi_robust(2)')
params_GS_3=load_obj('params_GS_semi_robust2(2)')

params_wo_GS_1=load_obj('params_woGS_semi(2)')
params_wo_GS_2=load_obj('params_woGS_semi_robust(2)')
params_wo_GS_3=load_obj('params_woGS_semi_robust2(2)')

env, obs_dim , act_dim=initGym()
NN_GS_1=NeuralNetwork(obs_dim,8,act_dim)
NN_GS_2=NeuralNetwork(obs_dim,8,act_dim)
NN_GS_3=NeuralNetwork(obs_dim,8,act_dim)

simulation_reward_GS=[]

NN_GS_3.wi=params_GS_3[0]
NN_GS_3.wo=params_GS_3[1]

NN_GS_1.wi=params_GS_1[0]
NN_GS_1.wo=params_GS_1[1]

NN_GS_2.wi=params_GS_2[0]
NN_GS_2.wo=params_GS_2[1]


reward_episode_GS_1=[]
reward_episode_GS_2=[]
reward_episode_GS_3=[]

reward_episode_wo_GS_1=[]
reward_episode_wo_GS_2=[]
Exemplo n.º 4
0
            [np.mean(reward_workers),
             np.median(reward_workers)])

        index_sort = np.argsort(reward_workers)
        reward_workers = np.sort(reward_workers)
        #fitness = fitness_shaping_paper(reward_workers)

        print(reward_workers)
        epsilon_wi = [epsilon_wi[i] for i in index_sort]
        epsilon_wo = [epsilon_wo[i] for i in index_sort]
        """
        #grad1:
        params[0] = params[0] - alpha*(1/(num_workers*sigma))*sum([eps*F*w for eps,F,w in zip(epsilon_wi,reward_workers,fitness)])
        params[1] = params[1] - alpha*(1/(num_workers*sigma))*sum([eps*F*w for eps,F,w in zip(epsilon_wo,reward_workers,fitness)])
        """
        #grad1:
        params[0] = params[0] + alpha * (1 / (num_workers * sigma)) * sum(
            [eps * F for eps, F in zip(epsilon_wi, reward_workers)])
        params[1] = params[1] + alpha * (1 / (num_workers * sigma)) * sum(
            [eps * F for eps, F in zip(epsilon_wo, reward_workers)])

        print(reward_episode[-1][0])
    print(reward_episode)
    #%%
    plt.plot([x[0] for x in reward_episode])
    save_obj(params, 'params-v2')
    ### Test:
    NN = NeuralNetwork(numInput, numHidden, numOutput)
    NN.wi = params[0]
    NN.wo = params[1]
    runNN(NN, env)