Exemplo n.º 1
0
def playGame(DDPG_config,
             train_indicator=1):  #1 means Train, 0 means simply Run
    # SETUP STARTS HERE
    if train_indicator > 0:
        folder = setup_run(DDPG_config)
    elif train_indicator == 0:
        folder = DDPG_config['EXPERIMENT']

    if DDPG_config['RSEED'] == 0:
        DDPG_config['RSEED'] = None
    np.random.seed(DDPG_config['RSEED'])

    ACTIVE_NODES = DDPG_config['ACTIVE_NODES']

    # Generate an environment
    if DDPG_config['ENV'] == 'balancing':
        env = OmnetBalancerEnv(DDPG_config, folder)
    elif DDPG_config['ENV'] == 'label':
        env = OmnetLinkweightEnv(DDPG_config, folder)

    action_dim, state_dim = env.a_dim, env.s_dim

    MU = DDPG_config['MU']
    THETA = DDPG_config['THETA']
    SIGMA = DDPG_config['SIGMA']

    ou = OU(action_dim, MU, THETA, SIGMA)  #Ornstein-Uhlenbeck Process

    BUFFER_SIZE = DDPG_config['BUFFER_SIZE']
    BATCH_SIZE = DDPG_config['BATCH_SIZE']
    GAMMA = DDPG_config['GAMMA']
    EXPLORE = DDPG_config['EXPLORE']
    EPISODE_COUNT = DDPG_config['EPISODE_COUNT']
    MAX_STEPS = DDPG_config['MAX_STEPS']
    if EXPLORE <= 1:
        EXPLORE = EPISODE_COUNT * MAX_STEPS * EXPLORE
    # SETUP ENDS HERE

    reward = 0
    done = False
    wise = False
    step = 0
    epsilon = 1
    indicator = 0

    #Tensorflow GPU optimization
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    actor = ActorNetwork(sess, state_dim, action_dim, DDPG_config)
    critic = CriticNetwork(sess, state_dim, action_dim, DDPG_config)
    buff = ReplayBuffer(BUFFER_SIZE)  #Create replay buffer

    ltm = ['a_h0', 'a_h1', 'a_V', 'c_w1', 'c_a1', 'c_h1', 'c_h3', 'c_V']
    layers_to_mind = {}
    L2 = {}

    for k in ltm:
        layers_to_mind[k] = 0
        L2[k] = 0

    vector_to_file(ltm, folder + 'weightsL2' + 'Log.csv', 'w')

    #Now load the weight
    try:
        actor.model.load_weights(folder + "actormodel.h5")
        critic.model.load_weights(folder + "criticmodel.h5")
        actor.target_model.load_weights(folder + "actormodel.h5")
        critic.target_model.load_weights(folder + "criticmodel.h5")
        print("Weight load successfully")
    except:
        print("Cannot find the weight")

    print("OMNeT++ Experiment Start.")
    # initial state of simulator
    s_t = env.reset()
    loss = 0
    for i in range(EPISODE_COUNT):

        print("Episode : " + str(i) + " Replay Buffer " + str(buff.count()))

        total_reward = 0
        for j in range(MAX_STEPS):
            print('step ', j)
            epsilon -= 1.0 / EXPLORE
            a_t = np.zeros([1, action_dim])
            noise_t = np.zeros([1, action_dim])

            a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0]))

            if train_indicator and epsilon > 0 and (step % 1000) // 100 != 9:
                noise_t[0] = epsilon * ou.evolve()

            a = a_t_original[0]
            n = noise_t[0]
            a_t[0] = np.where((a + n > 0) & (a + n < 1), a + n,
                              a - n).clip(min=0, max=1)

            # execute action
            s_t1, r_t, done = env.step(a_t[0], j)
            # print(s_t1)
            print('reward ', r_t)

            buff.add(s_t, a_t[0], r_t, s_t1, done)  #Add replay buffer

            scale = lambda x: x
            #Do the batch update
            batch = buff.getBatch(BATCH_SIZE)
            states = scale(np.asarray([e[0] for e in batch]))
            actions = scale(np.asarray([e[1] for e in batch]))
            rewards = scale(np.asarray([e[2] for e in batch]))
            new_states = scale(np.asarray([e[3] for e in batch]))
            dones = np.asarray([e[4] for e in batch])

            y_t = np.zeros([len(batch), action_dim])
            target_q_values = critic.target_model.predict(
                [new_states,
                 actor.target_model.predict(new_states)])

            for k in range(len(batch)):
                if dones[k]:
                    y_t[k] = rewards[k]
                else:
                    y_t[k] = rewards[k] + GAMMA * target_q_values[k]

            if train_indicator and len(batch) >= BATCH_SIZE:
                loss = critic.model.train_on_batch([states, actions], y_t)
                a_for_grad = actor.model.predict(states)
                grads = critic.gradients(states, a_for_grad)
                # does this give an output like train_on_batch above? NO
                actor.train(states, grads)
                actor.target_train()
                critic.target_train()
                with open(folder + 'lossLog.csv', 'a') as file:
                    file.write(pretty(loss) + '\n')

            total_reward += r_t
            s_t = s_t1

            for layer in actor.model.layers + critic.model.layers:
                if layer.name in layers_to_mind.keys():
                    L2[layer.name] = np.linalg.norm(
                        np.ravel(layer.get_weights()[0]) -
                        layers_to_mind[layer.name])
                    #                     vector_to_file(np.ravel(layer.get_weights()[0]), folder + 'weights_' + layer.name + 'Log.csv', 'a')
                    layers_to_mind[layer.name] = np.ravel(
                        layer.get_weights()[0])


#             if max(L2.values()) <= 0.02:
#                 wise = True

            if train_indicator and len(batch) >= BATCH_SIZE:
                vector_to_file([L2[x] for x in ltm],
                               folder + 'weightsL2' + 'Log.csv', 'a')

            vector_to_file(a_t_original[0], folder + 'actionLog.csv', 'a')
            vector_to_file(noise_t[0], folder + 'noiseLog.csv', 'a')

            if 'PRINT' in DDPG_config.keys() and DDPG_config['PRINT']:
                print("Episode", "%5d" % i, "Step", "%5d" % step, "Reward",
                      "%.6f" % r_t)
                print("Epsilon", "%.6f" % max(epsilon, 0))

                att_ = np.split(a_t[0], ACTIVE_NODES)
                for _ in range(ACTIVE_NODES):
                    att_[_] = np.insert(att_[_], _, -1)
                att_ = np.concatenate(att_)
                print("Action\n", att_.reshape(ACTIVE_NODES, ACTIVE_NODES))
                print(max(L2, key=L2.get), pretty(max(L2.values())))

            step += 1
            if done or wise:
                break

        if step % 1000 == 0:  # writes at every 1000 step
            if (train_indicator):
                actor.model.save_weights(folder + "actormodel.h5",
                                         overwrite=True)
                actor.model.save_weights(folder + "actormodel" + str(step) +
                                         ".h5")
                with open(folder + "actormodel.json", "w") as outfile:
                    outfile.write(actor.model.to_json(indent=4) + '\n')

                critic.model.save_weights(folder + "criticmodel.h5",
                                          overwrite=True)
                critic.model.save_weights(folder + "criticmodel" + str(step) +
                                          ".h5")
                with open(folder + "criticmodel.json", "w") as outfile:
                    outfile.write(critic.model.to_json(indent=4) + '\n')

        print("TOTAL REWARD @ " + str(i) + "-th Episode  : Reward " +
              str(total_reward))
        print("Total Step: " + str(step))
        print("")

    env.end()  # This is for shutting down
    print("Finish.")
def vector_to_file(vector, file_name, action):
    string = ','.join(pretty(_) for _ in vector)
    with open(file_name, action) as file:
        return file.write(string + '\n')
Exemplo n.º 3
0
 def _pretty(self, *args, **kwargs):
     return pretty(*args, **kwargs)
 def _pretty(self, *args, **kwargs):
     return pretty(*args, **kwargs)
Exemplo n.º 5
0
 def _pretty(self, *args, **kwargs):
     if all(isinstance(a, basestring) for a in args):
         return pretty(*args, **kwargs)
     return args[0] if len(args) == 1 else args
 def _pretty(self, *args, **kwargs):
     if all(is_string(a) for a in args):
         return pretty(*args, **kwargs)
     return args[0] if len(args) == 1 else args
 def _pretty(self, *args, **kwargs):
     if all(is_string(a) for a in args):
         return pretty(*args, **kwargs)
     return args[0] if len(args) == 1 else args
def lib_mandatory_named_and_kwargs(a, b=2, **kwargs):
    return pretty(a, b, **kwargs)
def var_args(*varargs):
    return pretty(*varargs)
Exemplo n.º 10
0
def lib_kwargs(**kwargs):
    return pretty(**kwargs)
Exemplo n.º 11
0
def lib_mandatory_named_varargs_and_kwargs(a, b='default', *args, **kwargs):
    return pretty(a, b, *args, **kwargs)
Exemplo n.º 12
0
def lib_mandatory_and_named_2(a, b='default', c='default'):
    return pretty(a, b, c)
Exemplo n.º 13
0
def lib_mandatory_and_named(a, b='default'):
    return pretty(a, b)
Exemplo n.º 14
0
def var_args(*varargs):
    return pretty(*varargs)