def save_weights(d: Model, filename: str, input_shape: list): print(f'Saving weights to: {filename}.') Path(filename).parent.mkdir(parents=True, exist_ok=True) weights = d.get_weights() weights.append(input_shape) # pop last = input shape (my convention). with open(filename, 'wb') as w: pickle.dump(weights, w)
def take_one_gradient_step(model: Model, cloned_model: Model, grads: list, alpha: float = 1) -> Model: """Updates both its numerical and trainable weights without breaking the computational graph. Args: model (Model): the model on which gradients were computed and which weights are to be updated grads (list): a list of numpy array, in the same order that model.get_weights provides. alpha (float): the magnitude of the gradient step """ updated_weights = model.get_weights() for i in range(len(updated_weights)): if grads[i] is not None: updated_weights[i] -= alpha * grads[i] cloned_model.set_weights(updated_weights) k = 0 for j in range(len(cloned_model.layers)): for var in cloned_model.layers[j].variables: weight_name = _extract_var_name(var) # Check if it is a connected gradient ie a trainable var if grads[k] is not None: cloned_model.layers[j].__dict__[weight_name] = tf.subtract( model.layers[j].__dict__[weight_name], alpha * grads[k]) k += 1
class NeuralAgent(object): def __init__(self): # build simple model inp = Input(shape=(4, ), name='i') hidden1 = Dense(4, activation='relu', name='h1')(inp) hidden2 = Dense(4, activation='relu', name='h2')(hidden1) output = Dense(1, activation='sigmoid', name='o')(hidden2) self.model = Model(inp, output, name='m') def get_flattened_weights_of_model(self): return np.concatenate([w.flatten() for w in self.model.get_weights()]) def set_weights_of_model(self, flattened_weights): shapes = u.weight_shapes_of(self.model) if flattened_weights.shape != (u.total_weights_of_shapes(shapes), ): raise Exception( "expected weights shaped (%d,) not %s" % (u.total_weights_of_shapes(shapes), flattened_weights.shape)) idx = 0 weights_to_set = [] for s in shapes: offset = u.num_weights(s) weight_slice = flattened_weights[idx:idx + offset] weights_to_set.append( np.array(weight_slice, dtype=np.float32).reshape(s)) idx += offset self.model.set_weights(weights_to_set) def decide_action(self, observation): output = self.model.predict(np.expand_dims(observation, 0)) action_prob = output[0][0] action = 1 if action_prob > 0.5 else 0 return action
def model_to_dict(model: Model): """Turns a Keras model into a Python dictionary :param model: Keras model instance :return: dictionary with model information """ return dict(model=model.to_json(), weights=model.get_weights())
class Critic_Model: def __init__(self, input_shape, action_space, lr, optimizer): X_input = Input(input_shape) old_values = Input(shape=(1,)) V = Dense(512, activation="relu", kernel_initializer='he_uniform')(X_input) V = Dense(256, activation="relu", kernel_initializer='he_uniform')(V) V = Dense(64, activation="relu", kernel_initializer='he_uniform')(V) value = Dense(1, activation=None)(V) self.Critic = Model(inputs=[X_input, old_values], outputs = value) self.Critic.compile(loss=[self.critic_PPO2_loss(old_values)], optimizer=optimizer(lr=lr))#, run_eagerly = True) def critic_PPO2_loss(self, values): def loss(y_true, y_pred): LOSS_CLIPPING = 0.2 clipped_value_loss = values + K.clip(y_pred - values, -LOSS_CLIPPING, LOSS_CLIPPING) v_loss1 = (y_true - clipped_value_loss) ** 2 v_loss2 = (y_true - y_pred) ** 2 value_loss = 0.5 * K.mean(K.maximum(v_loss1, v_loss2)) #value_loss = K.mean((y_true - y_pred) ** 2) # standard PPO loss return value_loss return loss def predict(self, state): return self.Critic.predict([state, np.zeros((state.shape[0], 1))]) def save_weights(self): weights = self.Critic.get_weights() np.save("critic_weights.dmp", weights) def load_weights(self): self.Critic.set_weights(np.load("critic_weights.dmp.npy", allow_pickle=True))
class DeepQ(RLQvalue): """Constructs the desired deep q learning network""" def __init__(self, action_size, observation_size, lr=1e-5, training_param=TrainingParam()): RLQvalue.__init__(self, action_size, observation_size, lr, training_param) self.construct_q_network() def construct_q_network(self): # replacement of the Convolution layers by Dense layers, and change the size of the input space and output space # Uses the network architecture found in DeepMind paper self.model = Sequential() input_layer = Input(shape=(self.observation_size * self.training_param.NUM_FRAMES, )) layer1 = Dense(self.observation_size * self.training_param.NUM_FRAMES)(input_layer) layer1 = Activation('relu')(layer1) layer2 = Dense(self.observation_size)(layer1) layer2 = Activation('relu')(layer2) layer3 = Dense(self.observation_size)(layer2) layer3 = Activation('relu')(layer3) layer4 = Dense(2 * self.action_size)(layer3) layer4 = Activation('relu')(layer4) output = Dense(self.action_size)(layer4) self.model = Model(inputs=[input_layer], outputs=[output]) self.model.compile(loss='mse', optimizer=Adam(lr=self.lr_)) self.target_model = Model(inputs=[input_layer], outputs=[output]) self.target_model.compile(loss='mse', optimizer=Adam(lr=self.lr_)) self.target_model.set_weights(self.model.get_weights())
class DDPGActor: def __init__(self, in_state, in_actions, layers=[128, 128], reg=0.01, activation=ELU, range_high=1, range_low=-1): self.obs = in_state self.act = in_actions self.init = VarianceScaling() self.reg = l2(reg) self.state_input = Input(shape = self.obs) if isinstance(activation, (str, )): x = BatchNormalization()(self.state_input) x = Dense(layers[0], activation=activation, kernel_initializer=self.init, kernel_regularizer=self.reg)(x) for l in layers[1:]: x = Dense(l, activation=activation, kernel_initializer=self.init, kernel_regularizer=self.reg)(x) else: x = BatchNormalization()(self.state_input) x = Dense(layers[0], activation=None, kernel_initializer = self.init, kernel_regularizer = self.reg)(x) x = activation()(x) for l in layers[1:]: x = Dense(l, activation=None, kernel_initializer = self.init, kernel_regularizer = self.reg)(x) x = activation()(x) x = Dense(self.act, activation='tanh', use_bias=False)(x) # move to action range out = Lambda(lambda i: range_high * i)(x) self.model = Model(inputs = [self.state_input], outputs=[out]) def initialize(self, opt): self.opt = opt self.model.compile( optimizer=self.opt, loss = 'mse' ) act_grads = K.placeholder(shape=(None, self.act)) mean_grad = K.mean(-act_grads, axis=0) #policy loss update_params = tf.gradients(self.model.output, self.model.trainable_weights, -act_grads) grads = zip(update_params, self.model.trainable_weights) return K.function( inputs=[self.model.input, act_grads], outputs=[mean_grad], updates=[self.opt.apply_gradients(grads)] ) def predict(self, obs): return self.model.predict(np.expand_dims(obs, axis=0))[0] def batch_predict(self, obs): return self.model.predict(obs) def transfer_weights(self, behavior_model, tau): self.model.set_weights( [((1-tau)*l1) + (tau*l2) for l1, l2 in zip(self.model.get_weights(), behavior_model.get_weights())])
class Agent: def __init__(self, env): self.env = env self.input_dim = env.observation_space.shape[0] self.output_dim = env.action_space.n self._build_network() #self.create_model() def _build_network(self): inputs = Input(shape=(self.input_dim, )) #X = tf.placeholder(shape=[1, input_size], dtype=tf.float32) hidden1 = Dense(10, )(inputs) hidden2 = Dense(10, activation='relu')(hidden1) output = Dense(self.output_dim, )(hidden2) # W = tf.Variable(tf.random_uniform([input_size, output_size], 0, 0.01)) self. model = Model(inputs=inputs, outputs=output) adam = tf.keras.optimizers.Adam(lr=0.1) self.model.compile(optimizer=adam, loss='mse') self.model.summary() def create_model(self, hidden_dims=[64, 64]): X = Input(shape=(self.input_dim, )) net = RepeatVector(self.input_dim)(X) net = Reshape([self.input_dim, self.input_dim, 1])(net) for h_dim in hidden_dims: net = Conv2D(h_dim, [3, 3], padding='SAME')(net) net = Activation('relu')(net) net = Flatten()(net) net = Dense(self.output_dim)(net) #self.model = Model(inputs=X, outputs=net) #self.model.compile('rmsprop', 'mse') def act(self, X, eps): if np.random.rand() < eps: return self.env.action_space.sample() X = X.reshape(-1, self.input_dim) Q = self.model.predict_on_batch(X) return np.argmax(Q, 1)[0] def train(self, X_batch, y_batch): return self.model.train_on_batch(X_batch, y_batch) def predict(self, X_batch): #print(x.shape) return self.model.predict_on_batch(X_batch) def get_weights(self): return self.model.get_weights() def set_weights(self, weights): return self.model.set_weights(weights)
class DetectorNeuralNetwork: def __init__(self): self.neural_network = None self.x_train = None self.x_test = None self.xi_test = None self.cov = None self.weights = None self.user_score = None self.impostor_score = None self.step = 0.01 def create_detector(self): dim = self.x_train.shape[1] input = Input(shape=(dim,)) hidden = Dense(dim, activation="sigmoid", kernel_initializer='random_normal', bias_initializer='zeros')(input) output = Dense(dim, activation="linear", kernel_initializer='random_normal', bias_initializer='zeros')(hidden) self.neural_network = Model(inputs=input, outputs=output) self.neural_network.summary() opt = tf.keras.optimizers.SGD(learning_rate=0.0001, momentum=0.0003) self.neural_network.compile(optimizer=opt, loss='mse') def fit(self): self.neural_network.fit( self.x_train, self.x_train, epochs=500, verbose=0) self.weights = self.neural_network.get_weights() def distance_user(self): predictions = self.neural_network.predict(self.x_test) user_score = [] for i in range(0, self.x_test.shape[0]): dist = distance.euclidean(self.x_test.iloc[i], predictions[i]) user_score.append(dist) return user_score def distance_impostor(self): predictions = self.neural_network.predict(self.xi_test) impostor_score = [] for i in range(0, self.xi_test.shape[0]): dist = distance.euclidean(self.xi_test.iloc[i], predictions[i]) impostor_score.append(dist) return impostor_score
def clone_model(model: Model) -> Model: """ Creates clone of the Keras model :param model: model to use in library :return: cloned keras modelModel """ model_clone = tf.keras.models.clone_model(model) model_clone.set_weights(model.get_weights()) model_clone.compile() return model_clone
class AlphaNNet: def __init__(self, model = None, ins = None): if model: self.v_net = load_model(model) elif ins: X = Input(ins) H = Activation('relu')(BatchNormalization(axis=3)(Conv2D(64, (3, 3), use_bias=False)(X))) H_shortcut = Cropping2D(cropping=2)(H) H = Activation('relu')(BatchNormalization(axis=3)(Conv2D(64, (3, 3), use_bias=False)(H))) H = BatchNormalization(axis=3)(Conv2D(64, (3, 3), use_bias=False)(H)) H = Activation('relu')(Add()([H, H_shortcut])) H_shortcut = Cropping2D(cropping=2)(H) H = Activation('relu')(BatchNormalization(axis=3)(Conv2D(64, (3, 3), use_bias=False)(H))) H = BatchNormalization(axis=3)(Conv2D(64, (3, 3), use_bias=False)(H)) H = Activation('relu')(Add()([H, H_shortcut])) H_shortcut = Cropping2D(cropping=2)(H) H = Activation('relu')(BatchNormalization(axis=3)(Conv2D(64, (3, 3), use_bias=False)(H))) H = BatchNormalization(axis=3)(Conv2D(64, (3, 3), use_bias=False)(H)) H = Activation('relu')(Add()([H, H_shortcut])) H_shortcut = Cropping2D(cropping=2)(H) H = Activation('relu')(BatchNormalization(axis=3)(Conv2D(64, (3, 3), use_bias=False)(H))) H = BatchNormalization(axis=3)(Conv2D(64, (3, 3), use_bias=False)(H)) H = Activation('relu')(Add()([H, H_shortcut])) Y = Activation('tanh')(Dense(3)(Flatten()(H))) self.v_net = Model(inputs = X, outputs = Y) def train(self, X, Y, ep = None, bs = None): self.v_net.fit(X, Y, epochs = ep, batch_size = bs) def v(self, X): return self.v_net.predict(X) def copy(self, lr = 0.001): nnet_copy = AlphaNNet() # value nnet_copy.v_net = clone_model(self.v_net) nnet_copy.v_net.build(self.v_net.layers[0].input_shape) nnet_copy.v_net.set_weights(self.v_net.get_weights()) nnet_copy.v_net.compile( optimizer = Adam(learning_rate = lr), loss = 'mean_squared_error' ) return nnet_copy def save(self, name): self.v_net.save('models/' + name + '.h5')
class QNet: def __init__(self): inputs = Input(shape=(7, 7, 1)) #x = MaxPool2D(2)(x) x = res_block(inputs, (32, 32, 32), 4, activation="linear") x = Conv2D(64, (2, 2), padding="valid")(x) x = res_block(x, (64, 64, 64), 2) x = Conv2D(128, (2, 2), padding="valid")(x) x = res_block(x, (128, 128, 128), 2) x = Conv2D(256, (2, 2), padding="valid")(x) x = res_block(x, (256, 256, 256), 2) x = Conv2D(256, (2, 2), padding="valid")(x) x = res_block(x, (256, 256, 256), 2) x = Conv2D(256, (2, 2), padding="valid")(x) x = res_block(x, (256, 256, 256), 2) x = Flatten()(x) x = Dense(256, activation='relu')(x) x = Dropout(0.1)(x) x = Dense(256, activation='relu')(x) x = Dense(7, activation='linear')(x) self._model = Model(inputs=inputs, outputs=x) self._model.compile('rmsprop', tf.losses.huber_loss) self._model.summary() def getModel(self): return self._model def getWeights(self): return self._model.get_weights() def setWeights(self, w): self._model.set_weights(w) def fit(self, data, data_y, epochs=1): train_data = data.reshape( (data.shape[0], data.shape[1], data.shape[2], 1)) self._model.fit(train_data, data_y, epochs=epochs, batch_size=1024) def predict(self, data): train_data = data.reshape( (data.shape[0], data.shape[1], data.shape[2], 1)) return self._model.predict(train_data) def save_weights(self, name): self._model.save_weights(name) def load_weights(self, name): self._model.load_weights(name) def save(self, name): self._model.save(name)
def yolo_body(inputs, q_input, num_anchors, config): """ :param inputs: image :param q_input: word embeding :param num_anchors: defalt : 3 :return: regresion , attention map """ """Create Multi-Modal YOLO_V3 model CNN body in Keras.""" assert config['backbone'] in ["darknet", "vgg"] if config['backbone'] == "darknet": darknet = Model(inputs, darknet_body(inputs)) # print(len(darknet.layers)) if os.path.exists(config['pretrained_weights']): tmp_model = load_model(config['pretrained_weights']) tmp_weights = tmp_model.get_weights() dark_weights = darknet.get_weights() min_len = len(dark_weights) for i in range(min_len): if tmp_weights[i].shape != dark_weights[i].shape: #print(i, tmp_weights[i].shape, dark_weights[i].shape) tmp_weights[i] = dark_weights[i] darknet.set_weights(tmp_weights[0:min_len]) # darknet.save("darknet.h5") # for i in range(len(darknet.layers)): # print(i, darknet.layers[i].get_config()) # print(darknet.output_shape, darknet.layers[47].output_shape, darknet.layers[29].output_shape) Fv = [ darknet.output, darknet.layers[47].output, darknet.layers[29].output ] else: Fv = vgg16(inputs) if config['use_bert']: q_input, fq = build_bert(poolings=['POOL_NSP'], output_layer_num=4) else: fq = build_nlp_model(q_input, config['rnn_hidden_size'], config['bidirectional'], config['rnn_drop_out'], config['lang_att']) #build nlp model for fusion # y, E,co_enery = make_multitask_braches(Fv,fq, num_anchors*5,config) y, E = make_multitask_braches(Fv, fq, num_anchors * 5, config) # return Model([inputs,q_input], [y,E,co_enery]) return Model([inputs, q_input], [y, E])
class Network(object): def __init__(self, config: AlphaZeroConfig): goban_size = config.goban_size num_filters = config.num_filters num_residuals = config.num_residuals w_d = config.weight_decay input = Input((goban_size, goban_size, 17)) # First block x = Conv2D(nfilters, 3, 1, 'same', kernel_regularizer=l2(w_d))(input) x = BatchNormalization()(x) x = LeakyReLU()(x) # Residual blocks for _ in range(nresiduals): tmp = Conv2D(nfilters, 3, 1, 'same', kernel_regularizer=l2(w_d))(x) tmp = BatchNormalization()(tmp) tmp = LeakyReLU()(tmp) tmp = Conv2D(nfilters, 3, 1, 'same', kernel_regularizer=l2(w_d))(x) tmp = BatchNormalization()(tmp) x = add([x, tmp]) x = LeakyReLU()(x) # Policy head, outputs logits p = Conv2D(2, 1, 1, 'same', kernel_regularizer=l2(w_d))(x) p = BatchNormalization()(p) p = LeakyReLU()(p) p = Flatten()(p) p = Dense(goban_size * goban_size + 1)(p) # Value head v = Conv2D(1, 1, 1, 'same', kernel_regularizer=l2(w_d))(x) v = BatchNormalization()(v) v = LeakyReLU()(v) v = Flatten()(v) v = Dense(nfilters)(v) v = LeakyReLU()(v) v = Dense(1, activation='tanh')(v) self.model = Model(inputs=x, outputs=[v, p]) def inference(self, image): return self.model.predict([[image]]) # Value, Policy def get_weights(self): # Returns the weights of this network. return self.model.get_weights()
def critic_net(input_shape, epsilon, summary=False): state_input = Input(shape=input_shape) conv_part = base_net(input_shape) x = conv_part(state_input) x = Dense(512, activation='tanh', dtype='float64')(x) critic_output = Dense(1, activation='linear', kernel_initializer=tf.keras.initializers.Zeros())(x) model = Model(inputs=state_input, outputs=critic_output) weights = [np.array(w) for w in model.get_weights()] weights[-1] *= 0 model.set_weights(weights) if summary: model.summary() return model
def test_forward (self): batch = 11 timesteps = 5 features = 3 outputs = 5 np.random.seed(123) data = np.random.uniform(size=(batch, features)) inpt_keras, _ = data_to_timesteps(data, timesteps) assert inpt_keras.shape == (batch - timesteps, timesteps, features) weights = [np.random.uniform(size=(features, outputs)), np.random.uniform(size=(outputs,outputs))] bias = [np.zeros(shape=(outputs,), dtype=float), np.zeros(shape=outputs, dtype=float)] # assign same weights to all the kernel in keras as for NumPyNet keras_weights1 = np.concatenate([weights[0] for i in range(4)], axis=1) keras_weights2 = np.concatenate([weights[1] for i in range(4)], axis=1) keras_bias = np.concatenate([bias[0] for i in range(4)]) for i in range(4): np.testing.assert_allclose(keras_weights1[:,outputs*i:outputs*(i+1)], weights[0], rtol=1e-5, atol=1e-8) for i in range(4): np.testing.assert_allclose(keras_weights2[:,outputs*i:outputs*(i+1)], weights[1], rtol=1e-5, atol=1e-8) inp = Input(shape=(inpt_keras.shape[1:])) lstm = LSTM(units=outputs, implementation=1, use_bias=False)(inp) model = Model(inputs=[inp], outputs=[lstm]) model.set_weights([keras_weights1, keras_weights2]) inpt_numpynet = data.reshape(batch, 1, 1, features) layer = LSTM_layer(outputs=outputs, steps=timesteps, weights=weights, bias=bias, input_shape=inpt_numpynet.shape) np.testing.assert_allclose(layer.uf.weights, model.get_weights()[0][:, :outputs], rtol=1e-5, atol=1e-8) np.testing.assert_allclose(layer.ui.weights, model.get_weights()[0][:, outputs:2*outputs], rtol=1e-5, atol=1e-8) np.testing.assert_allclose(layer.ug.weights, model.get_weights()[0][:, 2*outputs:3*outputs], rtol=1e-5, atol=1e-8) np.testing.assert_allclose(layer.uo.weights, model.get_weights()[0][:, 3*outputs:4*outputs], rtol=1e-5, atol=1e-8) np.testing.assert_allclose(layer.wf.weights, model.get_weights()[1][:, :outputs], rtol=1e-5, atol=1e-8) np.testing.assert_allclose(layer.wi.weights, model.get_weights()[1][:, outputs:2*outputs], rtol=1e-5, atol=1e-8) np.testing.assert_allclose(layer.wg.weights, model.get_weights()[1][:, 2*outputs:3*outputs], rtol=1e-5, atol=1e-8) np.testing.assert_allclose(layer.wo.weights, model.get_weights()[1][:, 3*outputs:4*outputs], rtol=1e-5, atol=1e-8) forward_out_keras = model.predict(inpt_keras) layer.forward(inpt=inpt_numpynet) forward_out_numpynet = layer.output.reshape(batch, outputs)
class Actor_Model: def __init__(self, input_shape, action_space, lr, optimizer): X_input = Input(input_shape) self.action_space = action_space X = Dense(512, activation="relu", kernel_initializer=tf.random_normal_initializer(stddev=0.01))(X_input) X = Dense(256, activation="relu", kernel_initializer=tf.random_normal_initializer(stddev=0.01))(X) X = Dense(64, activation="relu", kernel_initializer=tf.random_normal_initializer(stddev=0.01))(X) output = Dense(self.action_space, activation="softmax")(X) self.Actor = Model(inputs = X_input, outputs = output) self.Actor.compile(loss=self.ppo_loss, optimizer=optimizer(lr=lr), run_eagerly = True) def ppo_loss(self, y_true, y_pred): # Defined in https://arxiv.org/abs/1707.06347 advantages, prediction_picks, actions = y_true[:, :1], y_true[:, 1:1+self.action_space], y_true[:, 1+self.action_space:] LOSS_CLIPPING = 0.2 ENTROPY_LOSS = 0.001 prob = actions * y_pred old_prob = actions * prediction_picks prob = K.clip(prob, 1e-10, 1.0) old_prob = K.clip(old_prob, 1e-10, 1.0) ratio = K.exp(K.log(prob) - K.log(old_prob)) p1 = ratio * advantages p2 = K.clip(ratio, min_value=1 - LOSS_CLIPPING, max_value=1 + LOSS_CLIPPING) * advantages actor_loss = -K.mean(K.minimum(p1, p2)) entropy = -(y_pred * K.log(y_pred + 1e-10)) entropy = ENTROPY_LOSS * K.mean(entropy) total_loss = actor_loss - entropy return total_loss def predict(self, state): return self.Actor.predict(state) def save_weights(self): weights = self.Actor.get_weights() np.save("actor_weights.dmp", weights) def load_weights(self): self.Actor.set_weights(np.load("actor_weights.dmp.npy", allow_pickle=True))
class DeepQ_NN(BaseDeepQ): """ Constructs the desired deep q learning network Attributes ---------- schedule_lr_model: The schedule for the learning rate. """ def __init__(self, nn_params, training_param=None): if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, training_param) self.schedule_lr_model = None self.construct_q_network() def construct_q_network(self): """ The network architecture can be changed with the :attr:`l2rpn_baselines.BaseDeepQ.nn_archi` This function will make 2 identical models, one will serve as a target model, the other one will be trained regurlarly. """ self._model = Sequential() input_layer = Input(shape=(self._nn_archi.observation_size,), name="state") lay = input_layer for lay_num, (size, act) in enumerate(zip(self._nn_archi.sizes, self._nn_archi.activs)): lay = Dense(size, name="layer_{}".format(lay_num))(lay) # put at self.action_size lay = Activation(act)(lay) output = Dense(self._action_size, name="output")(lay) self._model = Model(inputs=[input_layer], outputs=[output]) self._schedule_lr_model, self._optimizer_model = self.make_optimiser() self._model.compile(loss='mse', optimizer=self._optimizer_model) self._target_model = Model(inputs=[input_layer], outputs=[output]) self._target_model.set_weights(self._model.get_weights())
def log_keras_model(self, model: Model, key: str = None): self.print('Logging model') log_base_path = 'model' if key is None else f'model/{key}' model_summary = [] model.summary(print_fn=lambda x: model_summary.append(x)) plot_model(model, show_shapes=True, expand_nested=True, to_file=self._out_path(f'{log_base_path}/model.png')) self.log(f'{log_base_path}/summary', '\n'.join(model_summary)) if len(model.get_weights()) > 0: plot_model_layer_weights(model, out_path=self._out_path( f'{log_base_path}/weights', is_directory=True)) model.save(self._out_path(f'{log_base_path}/model.h5'))
class DQNPolicy: def __init__(self, in_state, in_actions, layers, activation='relu', reg=0.01): self.obs = in_state self.act = in_actions self.init = VarianceScaling(scale=2.0) self.reg = l2(reg) self.state_input = Input(shape = self.obs) if isinstance(activation, (str,)): x = Dense(layers[0], activation=activation, kernel_initializer=self.init, kernel_regularizer=self.reg)(self.state_input) for l in layers[1:]: x = Dense(l, activation=activation, kernel_initializer=self.init, kernel_regularizer=self.reg)(x) else: x = Dense(layers[0], activation=None, kernel_initializer=self.init, kernel_regularizer=self.reg)(self.state_input) x = activation()(x) for l in layers[1:]: x = Dense(l, activation=None, kernel_initializer=self.init, kernel_regularizer=self.reg)(x) x = activation()(x) out = Dense(self.act, activation='softmax')(x) self.model = Model(inputs = [self.state_input], outputs=[out]) def initialize(self, opt): self.model.compile( optimizer=opt, loss = 'mse', metrics = ['accuracy'], ) def transfer_weights(self, behavior_model, tau): self.model.set_weights( [((1 - tau)*l1) + (tau*l2) for l1, l2 in zip(self.model.get_weights(), behavior_model.model.get_weights())] ) def predict(self, st): return np.argmax(self.model.predict(np.expand_dims(st, axis=0))) def batch_predict(self, st): return np.argmax(self.model.predict(st), axis=1) def fit(self, states, y): return self.model.fit([states], y, verbose=0)
class Machine(): def __init__(self, ins=None, model=None, nodes=1024, lr=0.01): if model: self.nnet = load_model(model + ".h5") elif ins: X = Input(ins) H = Activation('relu')(Dense(nodes)(X)) Y = Dense(1)(H) self.nnet = Model(inputs=X, outputs=Y) self.nnet.compile(optimizer=Adam(learning_rate=lr), loss='mean_squared_error') else: raise ValueError("Provide either input shape or model name") # train def learn(self, X, Y, ep=32, bs=256): self.nnet.fit(X, Y, epochs=ep, batch_size=bs) # compute output def v(self, data): return self.nnet.predict(data) # compute gradients at given input data points # only works for one hidden layer for now def d(self, data): gradients = [[0] * len(data) for _ in range(self.nnet.input_shape[1])] W = self.nnet.get_weights() W1, B, W2 = W[0], W[1], W[2] H = array(data) @ W1 for i in range(len(gradients)): for j in range(len(H)): for k in range(len(H[j])): if H[j][k] > -B[k]: gradients[i][j] += W1[i][k] * W2[k][0] return array(gradients) # save the model as .h5 file def save(self, name): self.nnet.save(name + ".h5")
class DeepQ_NN(BaseDeepQ): """Constructs the desired deep q learning network""" def __init__(self, action_size, observation_size, lr=1e-5, learning_rate_decay_steps=1000, learning_rate_decay_rate=0.95, training_param=TrainingParam()): BaseDeepQ.__init__(self, action_size, observation_size, lr, learning_rate_decay_steps, learning_rate_decay_rate, training_param) self.construct_q_network() def construct_q_network(self): # replacement of the Convolution layers by Dense layers, and change the size of the input space and output space # Uses the network architecture found in DeepMind paper self.model = Sequential() input_layer = Input(shape=(self.observation_size * self.training_param.NUM_FRAMES, ), name="state") layer1 = Dense(self.observation_size * self.training_param.NUM_FRAMES)(input_layer) layer1 = Activation('relu')(layer1) layer2 = Dense(self.observation_size)(layer1) layer2 = Activation('relu')(layer2) layer3 = Dense(self.observation_size)(layer2) layer3 = Activation('relu')(layer3) layer4 = Dense(2 * self.action_size)(layer3) layer4 = Activation('relu')(layer4) output = Dense(self.action_size, name="output")(layer4) self.model = Model(inputs=[input_layer], outputs=[output]) self.schedule_lr_model, self.optimizer_model = self.make_optimiser() self.model.compile(loss='mse', optimizer=self.optimizer_model) self.target_model = Model(inputs=[input_layer], outputs=[output]) # self.target_model.compile(loss='mse', optimizer=Adam(lr=self.lr_)) self.target_model.set_weights(self.model.get_weights())
def _build_model(self, front_model=None): input_front = None if front_model is None: input_front = Input(shape=(6, 10, 1)) flat_front = Flatten()(input_front) x1 = Dense(20)(flat_front) front_model = Model(input_front, x1, name="front") else: input_front = front_model.input input_back = Input(shape=(15, )) x2 = Dense(8)(input_back) x2 = Dense(3)(x2) back_model = Model(input_back, x2, name="back") qlearner = Model(input_front, back_model(front_model(input_front)), name="full_qlearner") weights = qlearner.get_weights() qlearner.compile(loss="mean_squared_error", optimizer="adam") return qlearner
class autoencoder: def __init__(self, x): print(tf.executing_eagerly()) self.x = x.astype(np.float32) # value of inputs self.m, self.n = x.shape # m is rows (number of samples) and n is cols (features) def build_model(self, layers, dim_red): inputs = Input(shape=(self.n, )) f = Dense(self.n, activation='relu')(inputs) steps = (self.n - dim_red) / layers for n in np.arange(self.n - steps, dim_red, -steps).round().astype(int): f = Dense(n, activation='relu')(f) middle = Dense(dim_red, activation='relu')(f) f = Dense(dim_red + steps, activation='relu')(middle) for n in np.arange(dim_red + 2 * steps, self.n, steps).round().astype(int): f = Dense(n, activation='relu')(f) output = Dense(self.n, activation='relu')(f) self.autoencode = Model(inputs=inputs, outputs=[output]) self.autoencode.compile(optimizer="Adam", loss="mse") self.reduce_dims = Model(inputs=inputs, outputs=[middle]) self.reduce_dims.compile(optimizer="Adam", loss="mse") self.autoencode.summary() def train(self, epoch): self.autoencode.fit(x=self.x, y=self.x, epochs=epoch) self.reduce_dims.set_weights(self.autoencode.get_weights()) def predict(self, x, reduce=None): if reduce is not None: return self.reduce_dims.predict(x.astype(np.float32)) else: return self.autoencode.predict(x.astype(np.float32))
def get_trained_model(x_train, y_train, layer_name, n_recurrent_units, bidi): if layer_name == 'LSTM': REC_LAYER = LSTM else: REC_LAYER = GRU # Define/Build/Train Training Model training_in_shape = x_train.shape[1:] training_in = Input(shape=training_in_shape) if bidi: recurrent_out = Bidirectional( REC_LAYER(n_recurrent_units, return_sequences=True, stateful=False))(training_in) else: recurrent_out = REC_LAYER(n_recurrent_units, return_sequences=True, stateful=False)(training_in) training_pred = Dense(1)(recurrent_out) training_model = Model(inputs=training_in, outputs=training_pred) training_model.compile(loss='mean_squared_error', optimizer='adam') if PRINT_SUMMARIES: training_model.summary() training_model.fit(x_train, y_train, batch_size=2, epochs=10, verbose=0) trained_weights = training_model.get_weights() return training_model, trained_weights
class Node(): def __init__(self, input_shape, ksize=3, name="Root", printer=0): self.name = name self.left = None self.right = None self.model = None self.history = None self.loss = 0 self.ksize = ksize self.input_shape = input_shape self.initialize_model(input_shape, ksize, printer) print("Node Created: " + str(name)) def initialize_model(self, input_shape, ksize, printer): img = Input(shape=input_shape) conv_1 = Conv2D(1, kernel_size=ksize, activation="relu")(img) flat = Flatten()(conv_1) out = Dense(1, activation="sigmoid")(flat) self.model = Model(img, out) self.model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) if (printer): print(self.model.summary()) def train(self, X, y, num_epochs=25): y_new = make_labels(y) self.history = self.model.fit(X, y_new, epochs=num_epochs) def predict(self, X): return self.model.predict(X) def save_img(self): w = self.model.get_weights() w = np.array(w) np.save(self.name + ".npy", w)
class DeepSpeakerModel: # I thought it was 3 but maybe energy is added at a 4th dimension. # would be better to have 4 dimensions: # MFCC, DIFF(MFCC), DIFF(DIFF(MFCC)), ENERGIES (probably tiled across the frequency domain). # this seems to help match the parameter counts. def __init__(self, batch_input_shape=(None, NUM_FRAMES, NUM_FBANKS, 1), include_softmax=False, num_speakers_softmax=None): self.include_softmax = include_softmax if self.include_softmax: assert num_speakers_softmax > 0 self.clipped_relu_count = 0 # http://cs231n.github.io/convolutional-networks/ # conv weights # #params = ks * ks * nb_filters * num_channels_input # Conv128-s # 5*5*128*128/2+128 # ks*ks*nb_filters*channels/strides+bias(=nb_filters) # take 100 ms -> 4 frames. # if signal is 3 seconds, then take 100ms per 100ms and average out this network. # 8*8 = 64 features. # used to share all the layers across the inputs # num_frames = K.shape() - do it dynamically after. inputs = Input(batch_shape=batch_input_shape, name='input') x = self.cnn_component(inputs) x = Reshape((-1, 2048))(x) # Temporal average layer. axis=1 is time. x = Lambda(lambda y: K.mean(y, axis=1), name='average')(x) if include_softmax: logger.info('Including a Dropout layer to reduce overfitting.') # used for softmax because the dataset we pre-train on might be too small. easy to overfit. x = Dropout(0.5)(x) x = Dense(512, name='affine')(x) if include_softmax: # Those weights are just when we train on softmax. x = Dense(num_speakers_softmax, activation='softmax')(x) else: # Does not contain any weights. x = Lambda(lambda y: K.l2_normalize(y, axis=1), name='ln')(x) self.m = Model(inputs, x, name='ResCNN') def keras_model(self): return self.m def get_weights(self): w = self.m.get_weights() if self.include_softmax: w.pop() # last 2 are the W_softmax and b_softmax. w.pop() return w def clipped_relu(self, inputs): relu = Lambda(lambda y: K.minimum(K.maximum(y, 0), 20), name=f'clipped_relu_{self.clipped_relu_count}')(inputs) self.clipped_relu_count += 1 return relu def identity_block(self, input_tensor, kernel_size, filters, stage, block): conv_name_base = f'res{stage}_{block}_branch' x = Conv2D(filters, kernel_size=kernel_size, strides=1, activation=None, padding='same', kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(l=0.0001), name=conv_name_base + '_2a')(input_tensor) x = BatchNormalization(name=conv_name_base + '_2a_bn')(x) x = self.clipped_relu(x) x = Conv2D(filters, kernel_size=kernel_size, strides=1, activation=None, padding='same', kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(l=0.0001), name=conv_name_base + '_2b')(x) x = BatchNormalization(name=conv_name_base + '_2b_bn')(x) x = self.clipped_relu(x) x = layers.add([x, input_tensor]) x = self.clipped_relu(x) return x def conv_and_res_block(self, inp, filters, stage): conv_name = 'conv{}-s'.format(filters) # TODO: why kernel_regularizer? o = Conv2D(filters, kernel_size=5, strides=2, activation=None, padding='same', kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(l=0.0001), name=conv_name)(inp) o = BatchNormalization(name=conv_name + '_bn')(o) o = self.clipped_relu(o) for i in range(3): o = self.identity_block(o, kernel_size=3, filters=filters, stage=stage, block=i) return o def cnn_component(self, inp): x = self.conv_and_res_block(inp, 64, stage=1) x = self.conv_and_res_block(x, 128, stage=2) x = self.conv_and_res_block(x, 256, stage=3) x = self.conv_and_res_block(x, 512, stage=4) return x def set_weights(self, w): for layer, layer_w in zip(self.m.layers, w): layer.set_weights(layer_w) logger.info(f'Setting weights for [{layer.name}]...')
Y = Dense(units=vocab_size)(encoder_output) # Compile model generator = Model(inputs=X, outputs=Y) generator.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), optimizer=optimizer) print(generator.summary()) """## Loading weights""" # Import trained weights from RNN to generator load_file = False if load_file: generator.load_weights("best_model.h5") else: generator.set_weights(model.get_weights()) """## Generating methods""" def generate_text(start_string, model, num_generate = 1000, temperature = 1.0): # Vectorize input string input_eval = [char2idx[s] for s in start_string] input_eval = tf.expand_dims(input_eval, 0) text_generated = [] # List to append predicted chars predicted_ids = [] idx2char = { v: k for k, v in char2idx.items() } # invert char-index mapping model.reset_states()
class CriticNetwork(Network): def __init__(self, *args, num_steps=1, **kwargs): super().__init__(*args, **kwargs) self.tau = 0.001 with graph.as_default(): if sess is not None: set_session(sess) self.num_steps = num_steps inp = None output = None output2 = None if self.shared_network is None: inp = Input((self.num_steps, self.input_dim)) output = self.get_network_head(inp).output output2 = self.get_network_head2(inp).output else: inp = self.shared_network.input output = self.shared_network.output output = Dense(self.output_dim, activation=self.activation, kernel_initializer='random_normal')(output) output2 = Dense(self.output_dim, activation=self.activation, kernel_initializer='random_normal')(output2) self.model = Model(inp, output) self.target_model = Model(inp, output2) self.model.compile(optimizer=SGD(lr=self.lr), loss=self.loss) self.target_model.compile(optimizer=SGD(lr=self.lr), loss=self.loss) @staticmethod def get_network_head(inp): output = LSTM(256, dropout=0.1, return_sequences=True, stateful=False, kernel_initializer='random_normal')(inp) output = BatchNormalization()(output) output = LSTM(128, dropout=0.1, return_sequences=True, stateful=False, kernel_initializer='random_normal')(output) output = BatchNormalization()(output) output = LSTM(64, dropout=0.1, return_sequences=True, stateful=False, kernel_initializer='random_normal')(output) output = LSTM(32, dropout=0.1, stateful=False, kernel_initializer='random_normal')(output) output = BatchNormalization()(output) return Model(inp, output) @staticmethod def get_network_head2(inp): output2 = LSTM(256, dropout=0.1, return_sequences=True, stateful=False, kernel_initializer='random_normal')(inp) output2 = BatchNormalization()(output2) output2 = LSTM(128, dropout=0.1, return_sequences=True, stateful=False, kernel_initializer='random_normal')(output2) output2 = BatchNormalization()(output2) output2 = LSTM(64, dropout=0.1, return_sequences=True, stateful=False, kernel_initializer='random_normal')(output2) output2 = LSTM(32, dropout=0.1, stateful=False, kernel_initializer='random_normal')(output2) output2 = BatchNormalization()(output2) return Model(inp, output2) def predict(self, sample): sample = np.array(sample).reshape((1, self.num_steps, self.input_dim)) return super().predict(sample) def train_on_batch(self, x, y): x = np.array(x).reshape((-1, self.num_steps, self.input_dim)) return super().train_on_batch(x, y) def target_predict(self, sample): sample = np.array(sample).reshape((1, self.num_steps, self.input_dim)) return super().target_predict(sample) def transfer_weights(self): """ Transfer model weights to target model with a factor of Tau """ W, target_W = self.model.get_weights(), self.target_model.get_weights() for i in range(len(W)): target_W[i] = self.tau * W[i] + (1 - self.tau) * target_W[i] self.target_model.set_weights(target_W)
class LSTMSeq2Seq(BaseModel): def __init__(self, check_optional_config=False, future_seq_len=2): """ Constructor of LSTM Seq2Seq model """ self.model = None self.past_seq_len = None self.future_seq_len = future_seq_len self.feature_num = None self.target_col_num = None self.metric = None self.latent_dim = None self.batch_size = None self.check_optional_config = check_optional_config def _build_train(self, mc=False, **config): """ build LSTM Seq2Seq model :param config: :return: """ super()._check_config(**config) self.metric = config.get('metric', 'mean_squared_error') self.latent_dim = config.get('latent_dim', 128) self.dropout = config.get('dropout', 0.2) self.lr = config.get('lr', 0.001) # for restore in continuous training self.batch_size = config.get('batch_size', 64) training = True if mc else None # Define an input sequence and process it. self.encoder_inputs = Input(shape=(None, self.feature_num), name="encoder_inputs") encoder = LSTM(units=self.latent_dim, dropout=self.dropout, return_state=True, name="encoder_lstm") encoder_outputs, state_h, state_c = encoder(self.encoder_inputs, training=training) # We discard `encoder_outputs` and only keep the states. self.encoder_states = [state_h, state_c] # Set up the decoder, using `encoder_states` as initial state. self.decoder_inputs = Input(shape=(None, self.target_col_num), name="decoder_inputs") # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. self.decoder_lstm = LSTM(self.latent_dim, dropout=self.dropout, return_sequences=True, return_state=True, name="decoder_lstm") decoder_outputs, _, _ = self.decoder_lstm(self.decoder_inputs, training=training, initial_state=self.encoder_states) self.decoder_dense = Dense(self.target_col_num, name="decoder_dense") decoder_outputs = self.decoder_dense(decoder_outputs) # Define the model that will turn # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` self.model = Model([self.encoder_inputs, self.decoder_inputs], decoder_outputs) self.model.compile(loss='mse', metrics=[self.metric], optimizer=keras.optimizers.RMSprop(lr=self.lr)) return self.model def _restore_model(self): self.encoder_inputs = self.model.input[0] # input_1 encoder_outputs, state_h_enc, state_c_enc = self.model.layers[2].output # lstm_1 self.encoder_states = [state_h_enc, state_c_enc] self.decoder_inputs = self.model.input[1] # input_2 self.decoder_lstm = self.model.layers[3] self.decoder_dense = self.model.layers[4] def _build_inference(self, mc=False): training = True if mc else None # from our previous model - mapping encoder sequence to state vectors encoder_model = Model(self.encoder_inputs, self.encoder_states) # A modified version of the decoding stage that takes in predicted target inputs # and encoded state vectors, returning predicted target outputs and decoder state vectors. # We need to hang onto these state vectors to run the next step of the inference loop. decoder_state_input_h = Input(shape=(self.latent_dim,)) decoder_state_input_c = Input(shape=(self.latent_dim,)) decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_outputs, state_h, state_c = self.decoder_lstm(self.decoder_inputs, training=training, initial_state=decoder_states_inputs) decoder_states = [state_h, state_c] decoder_outputs = self.decoder_dense(decoder_outputs) decoder_model = Model([self.decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states) return encoder_model, decoder_model def _decode_sequence(self, input_seq, mc=False): encoder_model, decoder_model = self._build_inference(mc=mc) # Encode the input as state vectors. states_value = encoder_model.predict(input_seq) # Generate empty target sequence of length 1. target_seq = np.zeros((len(input_seq), 1, self.target_col_num)) # Populate the first target sequence with end of encoding series value target_seq[:, 0] = input_seq[:, -1, :self.target_col_num] # Sampling loop for a batch of sequences - we will fill decoded_seq with predictions # (to simplify, here we assume a batch of size 1). decoded_seq = np.zeros((len(input_seq), self.future_seq_len, self.target_col_num)) for i in range(self.future_seq_len): output, h, c = decoder_model.predict([target_seq] + states_value) decoded_seq[:, i] = output[:, 0] # Update the target sequence (of length 1). target_seq = np.zeros((len(input_seq), 1, self.target_col_num)) target_seq[:, 0] = output[:, 0] # Update states states_value = [h, c] return decoded_seq def _get_decoder_inputs(self, x, y): """ lagged target series for teacher forcing decoder_input data is one timestamp ahead of y :param x: 3-d array in format of (sample_num, past_sequence_len, feature_num) :param y: 3-d array in format of (sample_num, future_sequence_len, target_col_num) Need to expand dimension if y is a 2-d array with one target col :return: 3-d array of decoder inputs """ decoder_input_data = np.zeros(y.shape) decoder_input_data[1:, ] = y[:-1, ] decoder_input_data[0, 0] = x[-1, -1, :self.target_col_num] decoder_input_data[0, 1:] = y[0, :-1] return decoder_input_data def _get_len(self, x, y): self.past_seq_len = x.shape[1] self.feature_num = x.shape[2] # self.future_seq_len = y.shape[1] self.target_col_num = y.shape[2] def _expand_y(self, y): """ expand dims for y. :param y: :return: """ while len(y.shape) < 3: y = np.expand_dims(y, axis=2) return y def _pre_processing(self, x, y, validation_data): """ pre_process input data. 1. expand dims for y and val_y 2. get decoder inputs for train data 3. get decoder inputs for validation data :param x: train_x :param y: train_y :param validation_data: :return: network input """ y = self._expand_y(y) self._get_len(x, y) decoder_input_data = self._get_decoder_inputs(x, y) if validation_data is not None: val_x, val_y = validation_data val_y = self._expand_y(val_y) val_decoder_input = self._get_decoder_inputs(val_x, val_y) validation_data = ([val_x, val_decoder_input], val_y) return x, y, decoder_input_data, validation_data def fit_eval(self, data, validation_data=None, mc=False, verbose=0, **config): """ fit for one iteration :param data: could be a tuple with numpy ndarray with form (x, y) x: 3-d array in format (no. of samples, past sequence length, 2+feature length), in the last dimension, the 1st col is the time index (data type needs to be numpy datetime type, e.g. "datetime64"), the 2nd col is the target value (data type should be numeric) y: 2-d numpy array in format (no. of samples, future sequence length) if future sequence length > 1, or 1-d numpy array in format (no. of samples, ) if future sequence length = 1 :param validation_data: tuple in format (x_test,y_test), data used for validation. If this is specified, validation result will be the optimization target for automl. Otherwise, train metric will be the optimization target. :param config: optimization hyper parameters :return: the resulting metric """ x, y = data[0], data[1] x, y, decoder_input_data, validation_data = self._pre_processing(x, y, validation_data) # if model is not initialized, __build the model if self.model is None: self._build_train(mc=mc, **config) # batch_size = config.get('batch_size', 64) # lr = self.lr # name = "seq2seq-batch_size-{}-epochs-{}-lr-{}-time-{}"\ # .format(batch_size, epochs, lr, time()) # tensorboard = TensorBoard(log_dir="logs/" + name) hist = self.model.fit([x, decoder_input_data], y, validation_data=validation_data, batch_size=self.batch_size, epochs=config.get("epochs", 10), verbose=verbose, # callbacks=[tensorboard] ) # check input metric value hist_metric_name = keras.metrics.get(self.metric).__name__ # model.metrics_names are available only after a keras model has been trained/evaluated compiled_metric_names = self.model.metrics_names.copy() print(compiled_metric_names) compiled_metric_names.remove("loss") if hist_metric_name in compiled_metric_names: metric_name = hist_metric_name elif self.metric in compiled_metric_names: metric_name = self.metric else: raise ValueError(f"Input metric in fit_eval should be one of the metrics that are used " f"to compile the model. Got metric value of {self.metric} and " f"the metrics in compile are {compiled_metric_names}") if validation_data is None: result = hist.history.get(metric_name)[-1] else: result = hist.history.get('val_' + metric_name)[-1] return {self.metric: result} def evaluate(self, x, y, metric=['mse']): """ Evaluate on x, y :param x: input :param y: target :param metric: a list of metrics in string format :return: a list of metric evaluation results """ y_pred = self.predict(x) # y = np.squeeze(y, axis=2) if self.target_col_num == 1: return [Evaluator.evaluate(m, y, y_pred) for m in metric] else: return [np.array([Evaluator.evaluate(m, y[:, i, :], y_pred[:, i, :]) for i in range(self.future_seq_len)]) for m in metric] def predict(self, x, mc=False): """ Prediction on x. :param x: input :return: predicted y (expected dimension = 2) """ y_pred = self._decode_sequence(x, mc=mc) if self.target_col_num == 1: y_pred = np.squeeze(y_pred, axis=2) return y_pred def predict_with_uncertainty(self, x, n_iter=100): result = np.array([self.predict(x, mc=True) for i in range(n_iter)]) prediction = result.mean(axis=0) uncertainty = result.var(axis=0) return prediction, uncertainty def state_dict(self): state = { "weights": self.model.get_weights(), "config": {"past_seq_len": self.past_seq_len, "feature_num": self.feature_num, "future_seq_len": self.future_seq_len, "target_col_num": self.target_col_num, "metric": self.metric, "latent_dim": self.latent_dim, "batch_size": self.batch_size} } return state def save(self, checkpoint_file, config_path=None): """ save model to file. :param checkpoint_file: the model file path to be saved to. :param config_path: the config file path to be saved to. :return: """ state_dict = self.state_dict() with open(checkpoint_file, "wb") as f: pickle.dump(state_dict, f) def restore(self, checkpoint_file, **config): """ restore model from file :param checkpoint_file: the model file :param config: the trial config :return: the restored model """ with open(checkpoint_file, "rb") as f: state_dict = pickle.load(f) config = state_dict["config"] self.past_seq_len = config["past_seq_len"] self.feature_num = config["feature_num"] self.future_seq_len = config["future_seq_len"] self.target_col_num = config["target_col_num"] self.metric = config["metric"] self.latent_dim = config["latent_dim"] self.batch_size = config["batch_size"] self._build_train(**config) self.model.set_weights(state_dict["weights"]) self._restore_model() # self.model.load_weights(file_path) def _get_required_parameters(self): return { # 'input_shape_x', # 'input_shape_y', # 'out_units' } def _get_optional_parameters(self): return { 'past_seq_len', 'latent_dim', 'dropout', 'metric', 'lr', 'epochs', 'batch_size' }