def create_model(self, model_info): state_input = Input(shape=self.state_dim, name='state_input', dtype='uint8') state_input_1 = Lambda(layer_function)(state_input) advantage = Input(shape=(1, ), name='adv') convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(state_input_1) convlayer = Conv2D(64, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer) convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer) flattenlayer = Flatten()(convlayer) denselayer = Dense(256, activation='relu')(flattenlayer) out_actions = Dense(self.action_dim, activation='softmax', name='output_actions')(denselayer) out_value = Dense(1, name='output_value')(denselayer) model = Model(inputs=[state_input, advantage], outputs=[out_actions, out_value]) losses = {"output_actions": impala_loss(advantage), "output_value": 'mse'} lossweights = {"output_actions": 1.0, "output_value": .5} decay_value = 0.00000000512 model.compile(optimizer=Adam(lr=LR, clipnorm=40., decay=decay_value), loss=losses, loss_weights=lossweights) self.infer_state = tf.placeholder(tf.uint8, name="infer_state", shape=(None,) + tuple(self.state_dim)) self.adv = tf.placeholder(tf.float32, name="adv", shape=(None, 1)) self.infer_p, self.infer_v = model([self.infer_state, self.adv]) self.sess.run(tf.initialize_all_variables()) return model
def create_model(self, model_info): """Create keras model.""" state_input = Input(shape=self.state_dim, name='state_input') advantage = Input(shape=(1, ), name='adv') denselayer = Dense(HIDDEN_SIZE, activation='relu')(state_input) for _ in range(NUM_LAYERS - 1): denselayer = Dense(HIDDEN_SIZE, activation='relu')(denselayer) out_actions = Dense(self.action_dim, activation='softmax', name='output_actions')(denselayer) # y_pred out_value = Dense(1, name='output_value')(denselayer) model = Model(inputs=[state_input, advantage], outputs=[out_actions, out_value]) losses = { "output_actions": impala_loss(advantage), "output_value": 'mse' } lossweights = {"output_actions": 1.0, "output_value": .5} model.compile(optimizer=Adam(lr=LR), loss=losses, loss_weights=lossweights) self.infer_state = tf.placeholder(tf.float32, name="infer_state", shape=(None, ) + tuple(self.state_dim)) self.adv = tf.placeholder(tf.float32, name="adv", shape=(None, 1)) self.infer_p, self.infer_v = model([self.infer_state, self.adv]) self.actor_var = TFVariables([self.infer_p, self.infer_v], self.sess) self.sess.run(tf.initialize_all_variables()) return model
def create_dyn_network(self): conditioned_hidden = Input(shape=HIDDEN_OUT + self.action_dim) hidden = Dense(256, activation='relu')(conditioned_hidden) hidden = Dense(128, activation='relu')(hidden) out_h = Dense(HIDDEN_OUT, activation='relu')(hidden) out_r = Dense(self.reward_support_size, activation='softmax')(hidden) return Model(inputs=conditioned_hidden, outputs=[out_h, out_r])
def get_mlp_backbone(state_dim, act_dim, hidden_sizes, activation, vf_share_layers=False, summary=False): """Get mlp backbone.""" state_input = Input(shape=state_dim, name='obs') if not vf_share_layers: dense_layer_pi = bulid_mlp_layers(state_input, hidden_sizes, activation, 'pi') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer_pi) dense_layer_v = bulid_mlp_layers(state_input, hidden_sizes, activation, 'v') out_value = Dense(1, activation=None, name='output_value')(dense_layer_v) else: dense_layer = bulid_mlp_layers(state_input, hidden_sizes, activation, 'shared') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer) out_value = Dense(1, activation=None, name='output_value')(dense_layer) model = Model(inputs=[state_input], outputs=[pi_latent, out_value]) if summary: model.summary() return model
def create_model(self, model_info): """Create Deep-Q network.""" state = Input(shape=self.state_dim) denselayer = Dense(HIDDEN_SIZE, activation='relu')(state) for _ in range(NUM_LAYERS - 1): denselayer = Dense(HIDDEN_SIZE, activation='relu')(denselayer) value = Dense(self.action_dim, activation='linear')(denselayer) if self.dueling: adv = Dense(1, activation='linear')(denselayer) mean = Lambda(layer_normalize)(value) value = Lambda(layer_add)([adv, mean]) model = Model(inputs=state, outputs=value) adam = Adam(lr=self.learning_rate) model.compile(loss='mse', optimizer=adam) self.infer_state = tf.placeholder(tf.float32, name="infer_input", shape=(None, ) + tuple(self.state_dim)) self.infer_v = model(self.infer_state) self.actor_var = TFVariables([self.infer_v], self.sess) self.sess.run(tf.initialize_all_variables()) return model
def create_dyn_network(self): conditioned_hidden = Input(shape=HIDDEN2_UNITS + self.action_dim) hidden = Dense(HIDDEN1_UNITS, activation='relu')(conditioned_hidden) out_h = Dense(HIDDEN2_UNITS, activation='relu')(hidden) # out_h = Lambda(hidden_normlize)(out_h) out_r = Dense(self.reward_support_size, activation='softmax')(hidden) return Model(inputs=conditioned_hidden, outputs=[out_h, out_r])
def create_model(self, model_info): """Create Deep-Q CNN network.""" state = Input(shape=self.state_dim, dtype="uint8") state1 = Lambda(lambda x: K.cast(x, dtype='float32') / 255.)(state) convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(state1) convlayer = Conv2D(64, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer) convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer) flattenlayer = Flatten()(convlayer) denselayer = Dense(256, activation='relu')(flattenlayer) value = Dense(self.action_dim, activation='linear')(denselayer) model = Model(inputs=state, outputs=value) adam = Adam(lr=self.learning_rate, clipnorm=10.) model.compile(loss='mse', optimizer=adam) if model_info.get("summary"): model.summary() self.infer_state = tf.placeholder(tf.uint8, name="infer_input", shape=(None, ) + tuple(self.state_dim)) self.infer_v = model(self.infer_state) self.actor_var = TFVariables([self.infer_v], self.sess) self.sess.run(tf.initialize_all_variables()) return model
def create_model(self, model_info): state_input = Input(shape=self.state_dim, name='state_input') advantage = Input(shape=(1, ), name='adv') old_prediction = Input(shape=(self.action_dim, ), name='old_p') old_value = Input(shape=(1, ), name='old_v') denselayer = Dense(HIDDEN_SIZE, activation='relu')(state_input) for _ in range(NUM_LAYERS - 1): denselayer = Dense(HIDDEN_SIZE, activation='relu')(denselayer) out_actions = Dense(self.action_dim, activation='softmax', name='output_actions')(denselayer) out_value = Dense(1, name='output_value')(denselayer) model = Model(inputs=[state_input], outputs=[out_actions, out_value]) if model_info.get("summary"): model.summary() self.build_graph(tf.float32, model) return model
def create_rep_network(self): obs = Input(shape=self.state_dim, name='rep_input') obs_1 = Lambda(lambda x: tf.cast(x, dtype='float32') / 255.)(obs) convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(obs_1) convlayer = Conv2D(32, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer) convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer) flattenlayer = Flatten()(convlayer) denselayer = Dense(HIDDEN_OUT, activation='relu')(flattenlayer) # hidden = Lambda(hidden_normlize)(denselayer) hidden = denselayer return Model(inputs=obs, outputs=hidden)
def create_model(self, model_info): """method for creating DQN Q network""" state = Input(shape=self.state_dim) denselayer = Dense(HIDDEN_SIZE, activation='relu')(state) for _ in range(NUM_LAYERS - 1): denselayer = Dense(HIDDEN_SIZE, activation='relu')(denselayer) value = Dense(self.action_dim, activation='linear')(denselayer) model = Model(inputs=state, outputs=value) adam = Adam(lr=self.learning_rate) model.compile(loss='mse', optimizer=adam) return model
def get_cnn_backbone(state_dim, act_dim, hidden_sizes, activation, filter_arches, vf_share_layers=True, summary=False, dtype='uint8'): """Get CNN backbone.""" state_input_raw = Input(shape=state_dim, name='obs') if dtype == 'uint8': state_input = Lambda(layer_function)(state_input_raw) elif dtype == 'float32': state_input = state_input_raw else: raise ValueError( 'dtype: {} not supported automatically, please implement it yourself' .format(dtype)) if vf_share_layers: conv_layer = build_conv_layers(state_input, filter_arches, activation, 'shared') flatten_layer = Flatten()(conv_layer) dense_layer = bulid_mlp_layers(flatten_layer, hidden_sizes, activation, 'shared') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer) out_value = Dense(1, activation=None, name='output_value')(dense_layer) else: conv_layer_pi = build_conv_layers(state_input, filter_arches, activation, 'pi') conv_layer_v = build_conv_layers(state_input, filter_arches, activation, 'v') flatten_layer_pi = Flatten()(conv_layer_pi) flatten_layer_v = Flatten()(conv_layer_v) dense_layer_pi = bulid_mlp_layers(flatten_layer_pi, hidden_sizes, activation, 'pi') dense_layer_v = bulid_mlp_layers(flatten_layer_v, hidden_sizes, activation, 'v') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer_pi) out_value = Dense(1, activation=None, name='output_value')(dense_layer_v) model = Model(inputs=[state_input_raw], outputs=[pi_latent, out_value]) if summary: model.summary() return model
def create_model(self, model_info): state_input = Input(shape=self.state_dim, name='state_input', dtype='uint8') state_input_1 = Lambda(layer_function)(state_input) convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(state_input_1) convlayer = Conv2D(32, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer) convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer) flattenlayer = Flatten()(convlayer) denselayer = Dense(256, activation='relu', name='dense_1')(flattenlayer) out_actions = Dense(self.action_dim, activation='softmax', name='output_actions_raw')(denselayer) out_value = Dense(1, name='output_value')(denselayer) model = Model(inputs=[state_input], outputs=[out_actions, out_value]) self.build_graph(np.uint8, model) return model
def get_cnn_backbone(state_dim, act_dim, hidden_sizes, activation, filter_arches, vf_share_layers=True, summary=False): """Get CNN backbone.""" state_input_raw = Input(shape=state_dim, name='obs') state_input = Lambda(layer_function)(state_input_raw) if vf_share_layers: conv_layer = build_conv_layers(state_input, filter_arches, activation, 'shared') flatten_layer = Flatten()(conv_layer) dense_layer = bulid_mlp_layers(flatten_layer, hidden_sizes, activation, 'shared') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer) out_value = Dense(1, activation=None, name='output_value')(dense_layer) else: conv_layer_pi = build_conv_layers(state_input, filter_arches, activation, 'pi') conv_layer_v = build_conv_layers(state_input, filter_arches, activation, 'v') flatten_layer_pi = Flatten()(conv_layer_pi) flatten_layer_v = Flatten()(conv_layer_v) dense_layer_pi = bulid_mlp_layers(flatten_layer_pi, hidden_sizes, activation, 'pi') dense_layer_v = bulid_mlp_layers(flatten_layer_v, hidden_sizes, activation, 'v') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer_pi) out_value = Dense(1, activation=None, name='output_value')(dense_layer_v) model = Model(inputs=[state_input_raw], outputs=[pi_latent, out_value]) if summary: model.summary() return model
def get_mlp_backbone(state_dim, act_dim, hidden_sizes, activation, vf_share_layers=False, summary=False, dtype='float32'): """Get mlp backbone.""" state_input_raw = Input(shape=state_dim, name='obs') if dtype == 'float32': state_input = state_input_raw else: raise ValueError( 'dtype: {} not supported automatically, please implement it yourself' .format(dtype)) if not vf_share_layers: dense_layer_pi = bulid_mlp_layers(state_input, hidden_sizes, activation, 'pi') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer_pi) dense_layer_v = bulid_mlp_layers(state_input, hidden_sizes, activation, 'v') out_value = Dense(1, activation=None, name='output_value')(dense_layer_v) else: dense_layer = bulid_mlp_layers(state_input, hidden_sizes, activation, 'shared') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer) out_value = Dense(1, activation=None, name='output_value')(dense_layer) model = Model(inputs=[state_input], outputs=[pi_latent, out_value]) if summary: model.summary() return model
def create_policy_network(self): hidden_input = Input(shape=HIDDEN2_UNITS, name='hidden_input') hidden = Dense(HIDDEN1_UNITS, activation='relu')(hidden_input) out_v = Dense(self.value_support_size, activation='softmax')(hidden) out_p = Dense(self.action_dim, activation='softmax')(hidden) return Model(inputs=hidden_input, outputs=[out_p, out_v])
def create_rep_network(self): obs = Input(shape=self.state_dim, name='rep_input') hidden = Dense(HIDDEN1_UNITS, activation='relu')(obs) out_rep = Dense(HIDDEN2_UNITS, activation='relu')(hidden) # out_rep = Lambda(hidden_normlize)(out_rep) return Model(inputs=obs, outputs=out_rep)
def create_model(self, model_info): """Create Deep-Q network.""" user_input = Input(shape=(self.user_dim,), name="user_input", dtype=self.input_type) history_click_input = Input( shape=(self.n_history_click * self.item_dim), name="history_click", dtype=self.input_type ) history_no_click_input = Input( shape=(self.n_history_no_click * self.item_dim), name="history_no_click", dtype=self.input_type ) item_input = Input(shape=(self.item_dim,), name="item_input", dtype=self.input_type) shared_embedding = Embedding( self.vocab_size, self.emb_dim, name="Emb", mask_zero=True, embeddings_initializer=self.embedding_initializer, trainable=False, ) # un-trainable gru_click = GRU(self.item_dim * self.emb_dim) gru_no_click = GRU(self.item_dim * self.emb_dim) user_feature = Flatten()(shared_embedding(user_input)) item_feature = Flatten()(shared_embedding(item_input)) history_click_feature = Reshape( (self.n_history_click, self.item_dim * self.emb_dim) )(shared_embedding(history_click_input)) history_click_feature = gru_click(history_click_feature) history_no_click_feature = Reshape( (self.n_history_no_click, self.item_dim * self.emb_dim) )(shared_embedding(history_no_click_input)) history_no_click_feature = gru_no_click(history_no_click_feature) x = concatenate( [ user_feature, history_click_feature, history_no_click_feature, item_feature, ] ) x_dense1 = Dense(128, activation="relu")(x) x_dense2 = Dense(128, activation="relu")(x_dense1) # ctr_pred = Dense(1, activation="linear", name="q_value")(x_dense2) ctr_pred = Dense(1, activation=self.last_act, name="q_value")(x_dense2) model = Model( inputs=[ user_input, history_click_input, history_no_click_input, item_input, ], outputs=ctr_pred, ) model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate)) if self._summary: model.summary() self.user_input = tf.placeholder( dtype=self.input_type, name="user_input", shape=(None, self.user_dim) ) self.history_click_input = tf.placeholder( dtype=self.input_type, name="history_click_input", shape=(None, self.n_history_click * self.item_dim), ) self.history_no_click_input = tf.placeholder( dtype=self.input_type, name="history_no_click_input", shape=(None, self.n_history_no_click * self.item_dim), ) self.item_input = tf.placeholder( dtype=self.input_type, name="item_input", shape=(None, self.item_dim) ) self.ctr_predict = model( [ self.user_input, self.history_click_input, self.history_no_click_input, self.item_input, ] ) self.actor_var = TFVariables([self.ctr_predict], self.sess) self.sess.run(tf.initialize_all_variables()) return model