def test_single_cost_and_last_layer(cost_func, last_layer): model = NeuralNetwork( optimizer=StaticGradientDescent(default_parameters['learning_rate']), loss=cost_func, layers=[ Dense(layer_size=50, activation_func=ReLu(), weight_initializer=XavierInitializer()), Dense(layer_size=10, activation_func=last_layer, weight_initializer=XavierInitializer()) ], callbacks=[ LoggerCallback(), PlotCallback( f'./lab_3/cost/func={cost_func.get_name()}&last_layer={last_layer.get_name()}' ) ]) model.fit(x_train=X_train, y_train=y_train, x_val=X_val, y_val=y_val, epochs=default_parameters['epochs'], batch_size=default_parameters['batch_size']) model.test(X_test, y_test)
def get_variables(cls, all_units, vf_share_layers=False, name=None): name = name or "keras_mlp_policy" if "policy" in name: last_kernel_init_value = 0.01 elif "vf" in name: last_kernel_init_value = 1.0 else: raise NotImplementedError variables = {} with tf.name_scope(name): for i, (size_in, size_out) in \ enumerate(zip(all_units, all_units[1:])): name = f"dense_{i}" variables[name] = \ Dense.get_variables( size_in, size_out, name=name, kernel_initializer=( normc_initializer(1.0) if i < len(all_units) - 2 else normc_initializer(last_kernel_init_value))) if vf_share_layers: name = f"dense_vf" variables[name] = \ Dense.get_variables( size_in, 1, name=name, kernel_initializer=normc_initializer(1.0)) # tricky to remove the name count of the dummy instance # since the default name is used here # graph = tf.get_default_graph() # K.PER_GRAPH_LAYER_NAME_UIDS[graph][ # ("", dummy_instance.name)] -= 1 return variables
def test_single_initializer(initializer): model = NeuralNetwork( optimizer=AdamOptimizer( learning_rate=default_parameters['learning_rate']), loss=CrossEntropy(), layers=[ Flatten(), Dense(layer_size=50, activation_func=ReLu(), weight_initializer=initializer), Dense(layer_size=10, activation_func=Softmax(), weight_initializer=initializer) ], callbacks=[ LoggerCallback(), PlotCallback(f'./lab_3/initializers/{initializer.get_name()}') ]) model.fit(x_train=X_train, y_train=y_train, x_val=X_val, y_val=y_val, epochs=default_parameters['epochs'], batch_size=default_parameters['batch_size']) model.test(X_test, y_test)
def switch(layer, t): layer_type = layer[0] #make shift switch statement if layer_type == "conv": c = Conv2D(t, int(layer[1]), (int(layer[2]), int(layer[3]))) return c.conv elif layer_type == "activation": a = Activation(t) return a.activated_tensor elif layer_type == "pool": p = MaxPool(t) return p.pooled elif layer_type == "dense": d = Dense(int(layer[1]), t) return d.fpass elif layer_type == "output": d = Dense(int(layer[1]), t) return d.predict() else: print("fail")
def test_single_initializer_with_convo(initializer): model = NeuralNetwork( optimizer=AdamOptimizer( learning_rate=default_parameters['learning_rate'] * 10), loss=CrossEntropy(), layers=[ Convolution2D(num_of_filters=8, kernel=(3, 3), activation_func=ReLu()), MaxPooling2D(pool_size=(2, 2), stride=(2, 2)), Flatten(), Dense(layer_size=50, activation_func=ReLu(), weight_initializer=initializer), Dense(layer_size=10, activation_func=Softmax(), weight_initializer=initializer) ], callbacks=[ LoggerCallback(), PlotCallback(f'./lab_3/initializers/{initializer.get_name()}') ]) model.fit(x_train=X_train, y_train=y_train, x_val=X_val, y_val=y_val, epochs=default_parameters['epochs'], batch_size=default_parameters['batch_size']) model.test(X_test, y_test)
def test_single_activation_function(activation): model = NeuralNetwork( optimizer=StaticGradientDescent( learning_rate=default_parameters['learning_rate']), loss=CrossEntropy(), layers=[ Dense(layer_size=50, activation_func=activation, weight_initializer=XavierInitializer()), Dense(layer_size=10, activation_func=Softmax(), weight_initializer=XavierInitializer()) ], callbacks=[ LoggerCallback(), PlotCallback(f'./results/activations/{activation.get_name()}') ]) model.fit(x_train=X_train, y_train=y_train, x_val=X_val, y_val=y_val, epochs=default_parameters['epochs'], batch_size=default_parameters['batch_size']) model.test(X_test, y_test)
def __init__(self, rnn_units, rnn_output_units, rnn_output_activation, mlp_units_exclude_first, mlp_activation, custom_params=None, vf_share_layers=False, use_linear_baseline=False): keras_models.Model.__init__(self, name="keras_tesp_policy") custom_params = custom_params or {} self.cell = GRUCell( units=rnn_units, custom_params=custom_params.get("cell")) self.dense_projection = Dense( units=rnn_output_units, custom_params=custom_params.get("projection")) self.mlp_policy = KerasMLP( layer_units_exclude_first=mlp_units_exclude_first, activation=mlp_activation, vf_share_layers=vf_share_layers, name="keras_mlp_policy") if not vf_share_layers and not use_linear_baseline: self.mlp_vf = KerasMLP( layer_units_exclude_first=mlp_units_exclude_first[:-1] + [1], activation=mlp_activation, vf_share_layers=False, name="keras_mlp_vf")
def get_variables(cls, rnn_input_units, rnn_units, rnn_output_units, projection_kernel_init_value, # mlp_input_units, # mlp_units, # vf_share_layers=False name=None): name = name or "keras_tesp_policy" variables = {} with tf.name_scope(name): variables["cell"] = GRUCell.get_variables( rnn_input_units, rnn_units) variables["projection"] = Dense.get_variables( rnn_units, rnn_output_units, name="dense_projection", kernel_initializer=normc_initializer(projection_kernel_init_value)) # variables["mlp"] = KerasMLP.get_dummy_variables( # [mlp_input_units + rnn_output_units] + mlp_units, # vf_share_layers=vf_share_layers, **mlp_kwargs) # graph = tf.get_default_graph() # K.PER_GRAPH_LAYER_NAME_UIDS[graph][ # ("", dummy_instance.name)] -= 1 return variables
def __init__(self, layer_units_exclude_first, activation, custom_params=None, vf_share_layers=False, name=None): """ layer_units: list, a list of the number of units of all layers except the input layer """ name = name or "keras_mlp_policy" if "policy" in name: last_kernel_init_value = 0.01 elif "vf" in name: last_kernel_init_value = 0.01 else: raise NotImplementedError keras_models.Model.__init__(self, name=name) custom_params = custom_params or {} for i, size in enumerate(layer_units_exclude_first): name = f"dense_{i}" layer = Dense( size, custom_params=custom_params.get(name), activation=(activation if i < len(layer_units_exclude_first) - 1 else None), kernel_initializer=(normc_initializer(1.0) if i < len(layer_units_exclude_first) - 1 else normc_initializer(last_kernel_init_value)), name=name) setattr(self, name, layer) if vf_share_layers: name = f"dense_vf" layer = Dense(1, custom_params=custom_params.get(name), activation=None, kernel_initializer=normc_initializer(1.0), name=name) setattr(self, name, layer) self._vf_share_layers = vf_share_layers
def make_cnn(input_dim, num_of_classes): conv1 = Convolution(input_dim=input_dim, pad=2, stride=2, num_filters=10, filter_size=3, seed=1) relu1 = Relu() maxpool1 = Maxpool(input_dim=conv1.output_dim, filter_size=2, stride=1) flatten = Flatten(seed=1) dense1 = Dense(input_dim=np.prod(maxpool1.output_dim), output_dim=num_of_classes, seed=1) layers = [conv1, relu1, maxpool1, flatten, dense1] return layers
import numpy as np from core.model import Model from layers.input import Input from layers.dense import Dense from util.cost_functions import L2 if __name__ == '__main__': # demo MLP data_x = np.array([1, 2]) data_y = np.array([0.2, 0.4]) train_x = np.reshape(data_x, (len(data_x), 1, 1)) train_y = np.reshape(data_y, (len(data_y), 1, 1)) model = Model() model.add(Input(1)) model.add(Dense(3)) model.add(Dense(1)) model.compile(cost=L2(), optimizer='sgd', num_epochs=30000, batch_size=1, lr=0.1) model.train(train_x, train_y) test_data_x = np.array([1]) test_x = np.reshape(test_data_x, (len(test_data_x), 1)) print model.predict(test_x)
# Flatten(), # Dense(layer_size=50, activation_func=ReLu(), weight_initializer=HeInitializer()), # Dense(layer_size=10, activation_func=Softmax(), weight_initializer=HeInitializer()) # ] # }, { 'test_name': 'normal_C3x3-F2_MP2x2_F_D50_D10', 'layers': [ Convolution2D(num_of_filters=2, kernel=(3, 3), activation_func=ReLu()), MaxPooling2D(pool_size=(2, 2), stride=(2, 2)), Flatten(), Dense(layer_size=50, activation_func=ReLu(), weight_initializer=HeInitializer()), Dense(layer_size=10, activation_func=Softmax(), weight_initializer=HeInitializer()) ] }, { 'test_name': 'normal_C3x3-F4_MP2x2_F_D50_D10', 'layers': [ Convolution2D(num_of_filters=4, kernel=(3, 3), activation_func=ReLu()), MaxPooling2D(pool_size=(2, 2), stride=(2, 2)), Flatten(),
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import numpy as np from layers.activation import Activation from layers.dense import Dense from model.model import Model X_train = np.array([[[0, 0]], [[0, 1]], [[1, 0]], [[1, 1]]]) y_train = np.array([ [[0]], # F [[1]], # M [[1]], # M [[0]] # F ]) model = Model() model.add(Dense(2, 2)) model.add(Activation('tanh')) model.add(Dense(2, 2)) model.add(Activation('tanh')) model.add(Activation('softmax')) model.fit(X_train, y_train, epochs=1000, learning_rate=0.1) frank = np.array(X_train) # 155 pounts, 68 inches pred = model.predict(frank) print(np.array(pred))
def build_model(self, in_dim, h_dim, out_dim, model=None, weight_decay=0.01, optimizer='adadelta'): x_v = T.tensor3('x_v', dtype=data_type) # batch_size x dim x_s = T.tensor3('x_s', dtype=data_type) # Nt-1 x batch_size x dim y = T.tensor3('y', dtype=data_type) # Nt x batch_size x dim mask_v = T.matrix( 'mask_v', dtype=data_type) # Nt x batch_size mask_sent mask_video mask_s = T.matrix('mask_s', dtype=data_type) deterministic = T.scalar('deterministic', dtype=data_type) one = T.constant(1, dtype=data_type) zero = T.constant(0, dtype=data_type) lr = T.scalar('lr', dtype=data_type) mask_gen = T.matrix('mask_gen', dtype=data_type) mb_BOS = T.vector('mb_BOS', dtype=data_type) #maxlen = T.scalar('ml',dtype='int64') # layers l_lstm_f = LSTM(4096, h_dim) l_lstm_b = LSTM(4096, h_dim) l_lstm_v = LSTM_SA(h_dim * 2, h_dim, 4096, h_dim * 2) l_word_em = Embedding(out_dim, in_dim) l_lstm_t = LSTM_SA(in_dim, h_dim, h_dim, h_dim) l_map = Dense(h_dim, out_dim) layers = [l_lstm_f, l_lstm_b, l_lstm_v, l_word_em, l_lstm_t, l_map] # drop_layers l_drop_xv = drop(0.2, deterministic=deterministic) l_drop_em = drop(0.2, deterministic=deterministic) l_drop_t = drop(0.5, deterministic=deterministic) l_drop_f = drop(0.5, deterministic=deterministic) l_drop_b = drop(0.5, deterministic=deterministic) x_v = l_drop_xv.get_outputs(x_v) # forward pass out_lstm_f, _ = l_lstm_f.get_outputs(x_v, mask_v) out_lstm_f = l_drop_f.get_outputs(out_lstm_f) out_lstm_b, _ = l_lstm_b.get_outputs(x_v[::-1], mask_v[::-1]) out_lstm_b = l_drop_b.get_outputs(out_lstm_b) in_lstm_v = T.concatenate([out_lstm_f, out_lstm_b[::-1]], axis=2) out_lstm_v, c_v = l_lstm_v.get_outputs(in_lstm_v, context=x_v, mask_x=mask_v, mask_c=mask_v) out_word_em = l_word_em.get_outputs(x_s) out_word_em = l_drop_em.get_outputs(out_word_em) out_lstm_t, _ = l_lstm_t.get_outputs(out_word_em, mask_x=mask_s, context=out_lstm_v, mask_c=mask_v, h0=out_lstm_v[-1], c0=c_v[-1]) out_lstm_t = l_drop_t.get_outputs(out_lstm_t) out_map = l_map.get_outputs(out_lstm_t) pred, _ = theano.scan(NN.softmax, sequences=out_map) # cost caculating cost_o, _ = theano.scan(NN.categorical_crossentropy, sequences=[pred, y]) cost_o = cost_o * mask_s cost_o = cost_o.sum() / mask_s.sum() params_re = [] for l in layers: params_re += l.regulariable cost_w = 0.5 * weight_decay * l2(params_re) / mask_s.sum() cost = cost_o + cost_w self.params = l_lstm_f.params + l_lstm_b.params + l_lstm_v.params + l_word_em.params + l_lstm_t.params + l_map.params p_ctx = T.dot(out_lstm_v, l_lstm_t.Wpc) + l_lstm_t.bc def _step(x_t, h_tm1, c_tm1, p_ctx, context): m_t = T.ones_like(x_t).astype(data_type) o_em_t = l_word_em.W[x_t.astype('int64')] h_t_t, c_t_t = l_lstm_t._step(m_t, o_em_t, h_tm1, c_tm1, p_ctx, context, mask_v) o_map_t = l_map._step(h_t_t) prob_p = NN.softmax(o_map_t) word_t = T.argmax(prob_p, axis=1).astype( data_type) # return an integer, the index of max value return word_t, h_t_t, c_t_t [words_idx, out_val_t, c_t], _ = theano.scan(_step, outputs_info=[ dict(initial=mb_BOS), dict(initial=out_lstm_v[-1]), dict(initial=c_v[-1]) ], non_sequences=[p_ctx, out_lstm_v], n_steps=self.t_maxlen) val_model = theano.function(inputs=[x_v, mask_v, mb_BOS], givens={deterministic: one}, outputs=words_idx) self.optimizer = optimizers.get(optimizer) grads = self.optimizer.get_gradients(cost, self.params) updates = self.optimizer.get_updates(cost, self.params) train_model = theano.function(inputs=[x_v, x_s, y, mask_v, mask_s], outputs=[cost, cost_o], updates=updates, givens={deterministic: zero}) return train_model, val_model