def playGame(DDPG_config, train_indicator=1): #1 means Train, 0 means simply Run # SETUP STARTS HERE if train_indicator > 0: folder = setup_run(DDPG_config) elif train_indicator == 0: folder = DDPG_config['EXPERIMENT'] if DDPG_config['RSEED'] == 0: DDPG_config['RSEED'] = None np.random.seed(DDPG_config['RSEED']) ACTIVE_NODES = DDPG_config['ACTIVE_NODES'] # Generate an environment if DDPG_config['ENV'] == 'balancing': env = OmnetBalancerEnv(DDPG_config, folder) elif DDPG_config['ENV'] == 'label': env = OmnetLinkweightEnv(DDPG_config, folder) action_dim, state_dim = env.a_dim, env.s_dim MU = DDPG_config['MU'] THETA = DDPG_config['THETA'] SIGMA = DDPG_config['SIGMA'] ou = OU(action_dim, MU, THETA, SIGMA) #Ornstein-Uhlenbeck Process BUFFER_SIZE = DDPG_config['BUFFER_SIZE'] BATCH_SIZE = DDPG_config['BATCH_SIZE'] GAMMA = DDPG_config['GAMMA'] EXPLORE = DDPG_config['EXPLORE'] EPISODE_COUNT = DDPG_config['EPISODE_COUNT'] MAX_STEPS = DDPG_config['MAX_STEPS'] if EXPLORE <= 1: EXPLORE = EPISODE_COUNT * MAX_STEPS * EXPLORE # SETUP ENDS HERE reward = 0 done = False wise = False step = 0 epsilon = 1 indicator = 0 #Tensorflow GPU optimization config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) from keras import backend as K K.set_session(sess) actor = ActorNetwork(sess, state_dim, action_dim, DDPG_config) critic = CriticNetwork(sess, state_dim, action_dim, DDPG_config) buff = ReplayBuffer(BUFFER_SIZE) #Create replay buffer ltm = ['a_h0', 'a_h1', 'a_V', 'c_w1', 'c_a1', 'c_h1', 'c_h3', 'c_V'] layers_to_mind = {} L2 = {} for k in ltm: layers_to_mind[k] = 0 L2[k] = 0 vector_to_file(ltm, folder + 'weightsL2' + 'Log.csv', 'w') #Now load the weight try: actor.model.load_weights(folder + "actormodel.h5") critic.model.load_weights(folder + "criticmodel.h5") actor.target_model.load_weights(folder + "actormodel.h5") critic.target_model.load_weights(folder + "criticmodel.h5") print("Weight load successfully") except: print("Cannot find the weight") print("OMNeT++ Experiment Start.") # initial state of simulator s_t = env.reset() loss = 0 for i in range(EPISODE_COUNT): print("Episode : " + str(i) + " Replay Buffer " + str(buff.count())) total_reward = 0 for j in range(MAX_STEPS): print('step ', j) epsilon -= 1.0 / EXPLORE a_t = np.zeros([1, action_dim]) noise_t = np.zeros([1, action_dim]) a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0])) if train_indicator and epsilon > 0 and (step % 1000) // 100 != 9: noise_t[0] = epsilon * ou.evolve() a = a_t_original[0] n = noise_t[0] a_t[0] = np.where((a + n > 0) & (a + n < 1), a + n, a - n).clip(min=0, max=1) # execute action s_t1, r_t, done = env.step(a_t[0], j) # print(s_t1) print('reward ', r_t) buff.add(s_t, a_t[0], r_t, s_t1, done) #Add replay buffer scale = lambda x: x #Do the batch update batch = buff.getBatch(BATCH_SIZE) states = scale(np.asarray([e[0] for e in batch])) actions = scale(np.asarray([e[1] for e in batch])) rewards = scale(np.asarray([e[2] for e in batch])) new_states = scale(np.asarray([e[3] for e in batch])) dones = np.asarray([e[4] for e in batch]) y_t = np.zeros([len(batch), action_dim]) target_q_values = critic.target_model.predict( [new_states, actor.target_model.predict(new_states)]) for k in range(len(batch)): if dones[k]: y_t[k] = rewards[k] else: y_t[k] = rewards[k] + GAMMA * target_q_values[k] if train_indicator and len(batch) >= BATCH_SIZE: loss = critic.model.train_on_batch([states, actions], y_t) a_for_grad = actor.model.predict(states) grads = critic.gradients(states, a_for_grad) # does this give an output like train_on_batch above? NO actor.train(states, grads) actor.target_train() critic.target_train() with open(folder + 'lossLog.csv', 'a') as file: file.write(pretty(loss) + '\n') total_reward += r_t s_t = s_t1 for layer in actor.model.layers + critic.model.layers: if layer.name in layers_to_mind.keys(): L2[layer.name] = np.linalg.norm( np.ravel(layer.get_weights()[0]) - layers_to_mind[layer.name]) # vector_to_file(np.ravel(layer.get_weights()[0]), folder + 'weights_' + layer.name + 'Log.csv', 'a') layers_to_mind[layer.name] = np.ravel( layer.get_weights()[0]) # if max(L2.values()) <= 0.02: # wise = True if train_indicator and len(batch) >= BATCH_SIZE: vector_to_file([L2[x] for x in ltm], folder + 'weightsL2' + 'Log.csv', 'a') vector_to_file(a_t_original[0], folder + 'actionLog.csv', 'a') vector_to_file(noise_t[0], folder + 'noiseLog.csv', 'a') if 'PRINT' in DDPG_config.keys() and DDPG_config['PRINT']: print("Episode", "%5d" % i, "Step", "%5d" % step, "Reward", "%.6f" % r_t) print("Epsilon", "%.6f" % max(epsilon, 0)) att_ = np.split(a_t[0], ACTIVE_NODES) for _ in range(ACTIVE_NODES): att_[_] = np.insert(att_[_], _, -1) att_ = np.concatenate(att_) print("Action\n", att_.reshape(ACTIVE_NODES, ACTIVE_NODES)) print(max(L2, key=L2.get), pretty(max(L2.values()))) step += 1 if done or wise: break if step % 1000 == 0: # writes at every 1000 step if (train_indicator): actor.model.save_weights(folder + "actormodel.h5", overwrite=True) actor.model.save_weights(folder + "actormodel" + str(step) + ".h5") with open(folder + "actormodel.json", "w") as outfile: outfile.write(actor.model.to_json(indent=4) + '\n') critic.model.save_weights(folder + "criticmodel.h5", overwrite=True) critic.model.save_weights(folder + "criticmodel" + str(step) + ".h5") with open(folder + "criticmodel.json", "w") as outfile: outfile.write(critic.model.to_json(indent=4) + '\n') print("TOTAL REWARD @ " + str(i) + "-th Episode : Reward " + str(total_reward)) print("Total Step: " + str(step)) print("") env.end() # This is for shutting down print("Finish.")
def vector_to_file(vector, file_name, action): string = ','.join(pretty(_) for _ in vector) with open(file_name, action) as file: return file.write(string + '\n')
def _pretty(self, *args, **kwargs): return pretty(*args, **kwargs)
def _pretty(self, *args, **kwargs): if all(isinstance(a, basestring) for a in args): return pretty(*args, **kwargs) return args[0] if len(args) == 1 else args
def _pretty(self, *args, **kwargs): if all(is_string(a) for a in args): return pretty(*args, **kwargs) return args[0] if len(args) == 1 else args
def lib_mandatory_named_and_kwargs(a, b=2, **kwargs): return pretty(a, b, **kwargs)
def var_args(*varargs): return pretty(*varargs)
def lib_kwargs(**kwargs): return pretty(**kwargs)
def lib_mandatory_named_varargs_and_kwargs(a, b='default', *args, **kwargs): return pretty(a, b, *args, **kwargs)
def lib_mandatory_and_named_2(a, b='default', c='default'): return pretty(a, b, c)
def lib_mandatory_and_named(a, b='default'): return pretty(a, b)