def load_model(self, model_name): if self.actor_var: self.actor_var.set_weights_with_npz(model_name) else: with self.graph.as_default(): K.set_session(self.sess) self.model.load_weights(model_name, by_name=True)
def __init__(self, model_info): """ Initialize XingTian Model. To avoid the compatibility problems about tensorflow's versions. Model class will hold their graph&session within itself. Now, we used the keras's API to create models. :param model_info: """ self.graph = tf.Graph() # User Could assign it within create model. self.actor_var = None with self.graph.as_default(): config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=config) self.sess = sess K.set_session(self.sess) self.model_format = model_info.get('model_format') self.max_to_keep = model_info.get("max_to_keep", 100) self.model = self.create_model(model_info) if 'init_weights' in model_info: model_name = model_info['init_weights'] try: self.load_model(model_name) print("load weight: {} success.".format(model_name)) except BaseException: print("load weight: {} failed!".format(model_name))
def train(self, state, label): with self.graph.as_default(): K.set_session(self.sess) nbatch_train = BATCH_SIZE nbatch = state[0].shape[0] inds = np.arange(nbatch) loss_val = [] start_time = time.time() for _ in range(4): # Randomize the indexes np.random.shuffle(inds) # 0 to batch_size with batch_train_size step for start in range(0, nbatch, nbatch_train): end = start + nbatch_train mbinds = inds[start:end] feed_dict = {self.state: state[0][mbinds], self.adv: state[1][mbinds], self.old_p: state[2][mbinds], self.old_v: state[3][mbinds], self.target_p: label[0][mbinds], self.target_v: label[1][mbinds],} ret_value = self.sess.run([self.train_op, self.loss], feed_dict) loss_val.append(np.mean(ret_value[1])) return np.mean(loss_val)
def load_model(self, model_name): if self.actor_var and str(model_name).endswith(".npz"): self.actor_var.set_weights_with_npz(model_name) else: with self.graph.as_default(): K.set_session(self.sess) self.model.load_weights(model_name)
def __init__(self, model_info): """ Initialize XingTian Model. To avoid the compatibility problems about tensorflow's versions. Model class will hold their graph&session within itself. Now, we used the keras's API to create models. :param model_info: """ sess, self.graph = get_sess_graph() # User Could assign it within create model. self.actor_var = None self._summary = model_info.get("summary", False) with self.graph.as_default(): # init sess within the graph without assign the graph into sess. self.sess = sess K.set_session(self.sess) self.model_format = model_info.get('model_format') self.max_to_keep = model_info.get("max_to_keep", 100) self.model = self.create_model(model_info) if 'init_weights' in model_info: model_name = model_info['init_weights'] try: self.load_model(model_name) print("load weight: {} success.".format(model_name)) except BaseException: print("load weight: {} failed!".format(model_name))
def train(self, state, label, batch_size, verbose=False): """Train the model.""" with self.graph.as_default(): K.set_session(self.sess) history = self.model.fit( state, label, batch_size=batch_size, verbose=verbose ) return history.history["loss"][0]
def predict(self, state): """ Do predict use the latest model. """ with self.graph.as_default(): K.set_session(self.sess) feed_dict = {self.infer_state: state[0], self.adv: state[1]} return self.sess.run([self.infer_p, self.infer_v], feed_dict)
def set_weights(self, weights): """Set weight with memory tensor.""" # split keras and xingtian npz with self.graph.as_default(): K.set_session(self.sess) if isinstance(weights, dict) and self.actor_var: self.actor_var.set_weights(weights) else: # keras self.model.set_weights(weights)
def initial_inference(self, input_data): with self.graph.as_default(): K.set_session(self.sess) feed_dict = {self.obs: input_data} policy, value, hidden = self.sess.run(self.init_infer, feed_dict) value = self.value_transform(value[0], self.value_support_size, self.value_min, self.value_max) return NetworkOutput(value, 0, policy[0], hidden[0])
def predict(self, state): """ Do predict use the newest model. :param state: :return: """ with self.graph.as_default(): K.set_session(self.sess) return self.model.predict(state)
def predict(self, state): """ Do predict use the newest model. :param state: :return: """ with self.graph.as_default(): K.set_session(self.sess) feed_dict = {self.infer_state: state} return self.sess.run(self.infer_v, feed_dict)
def save_model(self, file_name): """save weights into .h5 file""" # check max model file to keep check_keep_model(os.path.dirname(file_name), self.max_to_keep) with self.graph.as_default(): K.set_session(self.sess) self.model.save_weights(file_name + ".h5", overwrite=True) if self.model_format == 'pb': pb_model(self.model, file_name) return file_name + ".h5"
def train(self, state, label): with self.graph.as_default(): # print(type(state[2][0][0])) K.set_session(self.sess) loss = self.model.fit(x={'state_input': state[0], 'adv': state[1]}, y={ "output_actions": label[0], "output_value": label[1] }, batch_size=128, verbose=0) return loss
def value_inference(self, input_data): with self.graph.as_default(): K.set_session(self.sess) feed_dict = {self.obs: input_data} policy, value, hidden = self.sess.run(self.init_infer, feed_dict) value_list = [] for value_data in value: value_list.append(self.value_transform(value_data, self.value_support_size, self.value_min, self.value_max)) return np.asarray(value_list)
def recurrent_inference(self, hidden_state, action): with self.graph.as_default(): K.set_session(self.sess) action = np.expand_dims(np.eye(self.action_dim)[action], 0) hidden_state = np.expand_dims(hidden_state, 0) conditioned_hidden = np.hstack((hidden_state, action)) feed_dict = {self.conditioned_hidden: conditioned_hidden} hidden, reward, policy, value = self.sess.run(self.rec_infer, feed_dict) value = self.value_transform(value[0], self.value_support_size, self.value_min, self.value_max) reward = self.value_transform(reward[0], self.reward_support_size, self.reward_min, self.reward_max) return NetworkOutput(value, reward, policy[0], hidden[0])
def train(self, state, label): with self.graph.as_default(): K.set_session(self.sess) target_value = self.conver_value(label[0], self.value_support_size, self.value_min, self.value_max) target_reward = self.conver_value(label[1], self.reward_support_size, self.reward_min, self.reward_max) feed_dict = {self.obs: state[0], self.action: state[1], self.loss_weights: state[2], self.target_value: target_value, self.target_reward: target_reward, self.target_policy: label[2]} _, loss = self.sess.run([self.train_op, self.loss], feed_dict) return np.mean(loss)
def create_model(self, model_info): """Create Deep-Q CNN network.""" state = Input(shape=self.state_dim, dtype="uint8") state1 = Lambda(lambda x: K.cast(x, dtype='float32') / 255.)(state) convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(state1) convlayer = Conv2D(64, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer) convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer) flattenlayer = Flatten()(convlayer) denselayer = Dense(256, activation='relu')(flattenlayer) value = Dense(self.action_dim, activation='linear')(denselayer) model = Model(inputs=state, outputs=value) adam = Adam(lr=self.learning_rate, clipnorm=10.) model.compile(loss='mse', optimizer=adam) if model_info.get("summary"): model.summary() self.infer_state = tf.placeholder(tf.uint8, name="infer_input", shape=(None, ) + tuple(self.state_dim)) self.infer_v = model(self.infer_state) self.actor_var = TFVariables([self.infer_v], self.sess) self.sess.run(tf.initialize_all_variables()) return model
def save_model(self, file_name): """Save weights into .h5 file.""" # check max model file to keep if self.max_to_keep > -1: check_keep_model(os.path.dirname(file_name), self.max_to_keep) if self.actor_var: self.actor_var.save_weights(file_name + ".npz") else: with self.graph.as_default(): # keras K.set_session(self.sess) self.model.save_weights(file_name) if self.model_format == 'pb': pb_model(self.model, file_name) return file_name + ".npz"
def pb_model(h5_model, file_name, out_prefix="output_"): """ Describe: output model in pb file :param h5_model: :param file_name: :param out_prefix: """ output_dir = os.path.dirname(file_name) + "/" + "pb_model" model_name = os.path.basename(file_name) + ".pb" if os.path.exists(output_dir) is False: os.mkdir(output_dir) out_nodes = [] for i in range(len(h5_model.outputs)): out_nodes.append(out_prefix + str(i + 1)) tf.identity(h5_model.output[i], out_prefix + str(i + 1)) sess = K.get_session() init_graph = sess.graph.as_graph_def() main_graph = graph_util.convert_variables_to_constants( sess, init_graph, out_nodes) graph_io.write_graph(main_graph, output_dir, name=model_name, as_text=False)
def layer_function(x): """Normalize data.""" return K.cast(x, dtype='float32') / 255.
def set_weights(self, weights): """set the new weights""" with self.graph.as_default(): K.set_session(self.sess) self.model.set_weights(weights)
def get_grad(self, data): with self.graph.as_default(): K.set_session(self.sess) self.model.get_grad(data)
def train(self, state, label): """Train the model.""" with self.graph.as_default(): K.set_session(self.sess) loss = self.model.train_on_batch(state, label) return loss
def load_model(self, model_name, by_name=False): with self.graph.as_default(): K.set_session(self.sess) self.model.load_weights(model_name, by_name)
def get_weights(self): """return the weights of the model""" with self.graph.as_default(): K.set_session(self.sess) return self.model.get_weights()
def loss(y_true, y_pred): policy = y_pred log_policy = K.log(policy + 1e-10) entropy = (-policy * log_policy) cross_entropy = (-y_true * log_policy) return K.mean(advantage * cross_entropy - ENTROPY_LOSS * entropy)