Exemplo n.º 1
0
 def load_model(self, model_name):
     if self.actor_var:
         self.actor_var.set_weights_with_npz(model_name)
     else:
         with self.graph.as_default():
             K.set_session(self.sess)
             self.model.load_weights(model_name, by_name=True)
Exemplo n.º 2
0
    def __init__(self, model_info):
        """
        Initialize XingTian Model.

        To avoid the compatibility problems about tensorflow's versions.
        Model class will hold their graph&session within itself.
        Now, we used the keras's API to create models.
        :param model_info:
        """
        self.graph = tf.Graph()

        # User Could assign it within create model.
        self.actor_var = None

        with self.graph.as_default():
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            sess = tf.compat.v1.Session(config=config)
            self.sess = sess
            K.set_session(self.sess)
            self.model_format = model_info.get('model_format')
            self.max_to_keep = model_info.get("max_to_keep", 100)
            self.model = self.create_model(model_info)
            if 'init_weights' in model_info:
                model_name = model_info['init_weights']
                try:
                    self.load_model(model_name)
                    print("load weight: {} success.".format(model_name))
                except BaseException:
                    print("load weight: {} failed!".format(model_name))
Exemplo n.º 3
0
    def train(self, state, label):
        with self.graph.as_default():
            K.set_session(self.sess)
            nbatch_train = BATCH_SIZE
            nbatch = state[0].shape[0]

            inds = np.arange(nbatch)
            loss_val = []
            start_time = time.time()
            for _ in range(4):
                # Randomize the indexes
                np.random.shuffle(inds)
                # 0 to batch_size with batch_train_size step
                for start in range(0, nbatch, nbatch_train):
                    end = start + nbatch_train
                    mbinds = inds[start:end]

                    feed_dict = {self.state: state[0][mbinds],
                                 self.adv: state[1][mbinds],
                                 self.old_p: state[2][mbinds],
                                 self.old_v: state[3][mbinds],
                                 self.target_p: label[0][mbinds],
                                 self.target_v: label[1][mbinds],}
                    ret_value = self.sess.run([self.train_op, self.loss], feed_dict)

                    loss_val.append(np.mean(ret_value[1]))

            return np.mean(loss_val)
Exemplo n.º 4
0
 def load_model(self, model_name):
     if self.actor_var and str(model_name).endswith(".npz"):
         self.actor_var.set_weights_with_npz(model_name)
     else:
         with self.graph.as_default():
             K.set_session(self.sess)
             self.model.load_weights(model_name)
Exemplo n.º 5
0
    def __init__(self, model_info):
        """
        Initialize XingTian Model.

        To avoid the compatibility problems about tensorflow's versions.
        Model class will hold their graph&session within itself.
        Now, we used the keras's API to create models.
        :param model_info:
        """
        sess, self.graph = get_sess_graph()
        # User Could assign it within create model.
        self.actor_var = None
        self._summary = model_info.get("summary", False)

        with self.graph.as_default():
            # init sess within the graph without assign the graph into sess.
            self.sess = sess
            K.set_session(self.sess)
            self.model_format = model_info.get('model_format')
            self.max_to_keep = model_info.get("max_to_keep", 100)
            self.model = self.create_model(model_info)
            if 'init_weights' in model_info:
                model_name = model_info['init_weights']
                try:
                    self.load_model(model_name)
                    print("load weight: {} success.".format(model_name))
                except BaseException:
                    print("load weight: {} failed!".format(model_name))
Exemplo n.º 6
0
 def train(self, state, label, batch_size, verbose=False):
     """Train the model."""
     with self.graph.as_default():
         K.set_session(self.sess)
         history = self.model.fit(
             state, label, batch_size=batch_size, verbose=verbose
         )
         return history.history["loss"][0]
Exemplo n.º 7
0
 def predict(self, state):
     """
     Do predict use the latest model.
     """
     with self.graph.as_default():
         K.set_session(self.sess)
         feed_dict = {self.infer_state: state[0], self.adv: state[1]}
         return self.sess.run([self.infer_p, self.infer_v], feed_dict)
Exemplo n.º 8
0
 def set_weights(self, weights):
     """Set weight with memory tensor."""
     # split keras and xingtian npz
     with self.graph.as_default():
         K.set_session(self.sess)
         if isinstance(weights, dict) and self.actor_var:
             self.actor_var.set_weights(weights)
         else:  # keras
             self.model.set_weights(weights)
Exemplo n.º 9
0
    def initial_inference(self, input_data):
        with self.graph.as_default():
            K.set_session(self.sess)

            feed_dict = {self.obs: input_data}
            policy, value, hidden = self.sess.run(self.init_infer, feed_dict)
            value = self.value_transform(value[0], self.value_support_size, self.value_min, self.value_max)

        return NetworkOutput(value, 0, policy[0], hidden[0])
Exemplo n.º 10
0
 def predict(self, state):
     """
     Do predict use the newest model.
     :param state:
     :return:
     """
     with self.graph.as_default():
         K.set_session(self.sess)
         return self.model.predict(state)
Exemplo n.º 11
0
 def predict(self, state):
     """
     Do predict use the newest model.
     :param state:
     :return:
     """
     with self.graph.as_default():
         K.set_session(self.sess)
         feed_dict = {self.infer_state: state}
         return self.sess.run(self.infer_v, feed_dict)
Exemplo n.º 12
0
    def save_model(self, file_name):
        """save weights into .h5 file"""
        # check max model file to keep
        check_keep_model(os.path.dirname(file_name), self.max_to_keep)

        with self.graph.as_default():
            K.set_session(self.sess)
            self.model.save_weights(file_name + ".h5", overwrite=True)
        if self.model_format == 'pb':
            pb_model(self.model, file_name)
        return file_name + ".h5"
Exemplo n.º 13
0
 def train(self, state, label):
     with self.graph.as_default():
         # print(type(state[2][0][0]))
         K.set_session(self.sess)
         loss = self.model.fit(x={'state_input': state[0], 'adv': state[1]},
                               y={
                                   "output_actions": label[0],
                                   "output_value": label[1]
                               },
                               batch_size=128,
                               verbose=0)
         return loss
Exemplo n.º 14
0
    def value_inference(self, input_data):
        with self.graph.as_default():
            K.set_session(self.sess)

            feed_dict = {self.obs: input_data}
            policy, value, hidden = self.sess.run(self.init_infer, feed_dict)

            value_list = []
            for value_data in value:
                value_list.append(self.value_transform(value_data, self.value_support_size, self.value_min, self.value_max))

        return np.asarray(value_list)
Exemplo n.º 15
0
    def recurrent_inference(self, hidden_state, action):
        with self.graph.as_default():
            K.set_session(self.sess)
            action = np.expand_dims(np.eye(self.action_dim)[action], 0)
            hidden_state = np.expand_dims(hidden_state, 0)
            conditioned_hidden = np.hstack((hidden_state, action))
            feed_dict = {self.conditioned_hidden: conditioned_hidden}
            hidden, reward, policy, value = self.sess.run(self.rec_infer, feed_dict)

            value = self.value_transform(value[0], self.value_support_size, self.value_min, self.value_max)
            reward = self.value_transform(reward[0], self.reward_support_size, self.reward_min, self.reward_max)

        return NetworkOutput(value, reward, policy[0], hidden[0])
Exemplo n.º 16
0
    def train(self, state, label):
        with self.graph.as_default():
            K.set_session(self.sess)

            target_value = self.conver_value(label[0], self.value_support_size, self.value_min, self.value_max)
            target_reward = self.conver_value(label[1], self.reward_support_size, self.reward_min, self.reward_max)

            feed_dict = {self.obs: state[0],
                         self.action: state[1],
                         self.loss_weights: state[2],
                         self.target_value: target_value,
                         self.target_reward: target_reward,
                         self.target_policy: label[2]}
            _, loss = self.sess.run([self.train_op, self.loss], feed_dict)

            return np.mean(loss)
Exemplo n.º 17
0
    def create_model(self, model_info):
        """Create Deep-Q CNN network."""
        state = Input(shape=self.state_dim, dtype="uint8")
        state1 = Lambda(lambda x: K.cast(x, dtype='float32') / 255.)(state)
        convlayer = Conv2D(32, (8, 8),
                           strides=(4, 4),
                           activation='relu',
                           padding='valid')(state1)
        convlayer = Conv2D(64, (4, 4),
                           strides=(2, 2),
                           activation='relu',
                           padding='valid')(convlayer)
        convlayer = Conv2D(64, (3, 3),
                           strides=(1, 1),
                           activation='relu',
                           padding='valid')(convlayer)
        flattenlayer = Flatten()(convlayer)
        denselayer = Dense(256, activation='relu')(flattenlayer)
        value = Dense(self.action_dim, activation='linear')(denselayer)
        model = Model(inputs=state, outputs=value)
        adam = Adam(lr=self.learning_rate, clipnorm=10.)
        model.compile(loss='mse', optimizer=adam)
        if model_info.get("summary"):
            model.summary()

        self.infer_state = tf.placeholder(tf.uint8,
                                          name="infer_input",
                                          shape=(None, ) +
                                          tuple(self.state_dim))
        self.infer_v = model(self.infer_state)
        self.actor_var = TFVariables([self.infer_v], self.sess)

        self.sess.run(tf.initialize_all_variables())
        return model
Exemplo n.º 18
0
    def save_model(self, file_name):
        """Save weights into .h5 file."""
        # check max model file to keep
        if self.max_to_keep > -1:
            check_keep_model(os.path.dirname(file_name), self.max_to_keep)

        if self.actor_var:
            self.actor_var.save_weights(file_name + ".npz")
        else:
            with self.graph.as_default():  # keras
                K.set_session(self.sess)
                self.model.save_weights(file_name)

        if self.model_format == 'pb':
            pb_model(self.model, file_name)
        return file_name + ".npz"
Exemplo n.º 19
0
def pb_model(h5_model, file_name, out_prefix="output_"):
    """
    Describe: output model in pb file
    :param h5_model:
    :param file_name:
    :param out_prefix:
    """
    output_dir = os.path.dirname(file_name) + "/" + "pb_model"
    model_name = os.path.basename(file_name) + ".pb"
    if os.path.exists(output_dir) is False:
        os.mkdir(output_dir)
    out_nodes = []
    for i in range(len(h5_model.outputs)):
        out_nodes.append(out_prefix + str(i + 1))
        tf.identity(h5_model.output[i], out_prefix + str(i + 1))
    sess = K.get_session()
    init_graph = sess.graph.as_graph_def()
    main_graph = graph_util.convert_variables_to_constants(
        sess, init_graph, out_nodes)
    graph_io.write_graph(main_graph,
                         output_dir,
                         name=model_name,
                         as_text=False)
Exemplo n.º 20
0
def layer_function(x):
    """Normalize data."""
    return K.cast(x, dtype='float32') / 255.
Exemplo n.º 21
0
 def set_weights(self, weights):
     """set the new weights"""
     with self.graph.as_default():
         K.set_session(self.sess)
         self.model.set_weights(weights)
Exemplo n.º 22
0
 def get_grad(self, data):
     with self.graph.as_default():
         K.set_session(self.sess)
         self.model.get_grad(data)
Exemplo n.º 23
0
 def train(self, state, label):
     """Train the model."""
     with self.graph.as_default():
         K.set_session(self.sess)
         loss = self.model.train_on_batch(state, label)
         return loss
Exemplo n.º 24
0
 def load_model(self, model_name, by_name=False):
     with self.graph.as_default():
         K.set_session(self.sess)
         self.model.load_weights(model_name, by_name)
Exemplo n.º 25
0
 def get_weights(self):
     """return the weights of the model"""
     with self.graph.as_default():
         K.set_session(self.sess)
         return self.model.get_weights()
Exemplo n.º 26
0
 def loss(y_true, y_pred):
     policy = y_pred
     log_policy = K.log(policy + 1e-10)
     entropy = (-policy * log_policy)
     cross_entropy = (-y_true * log_policy)
     return K.mean(advantage * cross_entropy - ENTROPY_LOSS * entropy)