Exemplo n.º 1
0
def test_single_cost_and_last_layer(cost_func, last_layer):
    model = NeuralNetwork(
        optimizer=StaticGradientDescent(default_parameters['learning_rate']),
        loss=cost_func,
        layers=[
            Dense(layer_size=50,
                  activation_func=ReLu(),
                  weight_initializer=XavierInitializer()),
            Dense(layer_size=10,
                  activation_func=last_layer,
                  weight_initializer=XavierInitializer())
        ],
        callbacks=[
            LoggerCallback(),
            PlotCallback(
                f'./lab_3/cost/func={cost_func.get_name()}&last_layer={last_layer.get_name()}'
            )
        ])

    model.fit(x_train=X_train,
              y_train=y_train,
              x_val=X_val,
              y_val=y_val,
              epochs=default_parameters['epochs'],
              batch_size=default_parameters['batch_size'])

    model.test(X_test, y_test)
Exemplo n.º 2
0
 def get_variables(cls, all_units, vf_share_layers=False, name=None):
     name = name or "keras_mlp_policy"
     if "policy" in name:
         last_kernel_init_value = 0.01
     elif "vf" in name:
         last_kernel_init_value = 1.0
     else:
         raise NotImplementedError
     variables = {}
     with tf.name_scope(name):
         for i, (size_in, size_out) in \
                 enumerate(zip(all_units, all_units[1:])):
             name = f"dense_{i}"
             variables[name] = \
                 Dense.get_variables(
                     size_in,
                     size_out,
                     name=name,
                     kernel_initializer=(
                         normc_initializer(1.0) if i < len(all_units) - 2
                         else normc_initializer(last_kernel_init_value)))
         if vf_share_layers:
             name = f"dense_vf"
             variables[name] = \
                 Dense.get_variables(
                     size_in,
                     1,
                     name=name,
                     kernel_initializer=normc_initializer(1.0))
     # tricky to remove the name count of the dummy instance
     # since the default name is used here
     # graph = tf.get_default_graph()
     # K.PER_GRAPH_LAYER_NAME_UIDS[graph][
     #     ("", dummy_instance.name)] -= 1
     return variables
Exemplo n.º 3
0
def test_single_initializer(initializer):
    model = NeuralNetwork(
        optimizer=AdamOptimizer(
            learning_rate=default_parameters['learning_rate']),
        loss=CrossEntropy(),
        layers=[
            Flatten(),
            Dense(layer_size=50,
                  activation_func=ReLu(),
                  weight_initializer=initializer),
            Dense(layer_size=10,
                  activation_func=Softmax(),
                  weight_initializer=initializer)
        ],
        callbacks=[
            LoggerCallback(),
            PlotCallback(f'./lab_3/initializers/{initializer.get_name()}')
        ])

    model.fit(x_train=X_train,
              y_train=y_train,
              x_val=X_val,
              y_val=y_val,
              epochs=default_parameters['epochs'],
              batch_size=default_parameters['batch_size'])

    model.test(X_test, y_test)
Exemplo n.º 4
0
        def switch(layer, t):
            layer_type = layer[0]
            
            #make shift switch statement 
            if layer_type == "conv":
                c = Conv2D(t, int(layer[1]), (int(layer[2]), int(layer[3])))
                return c.conv
            
            elif layer_type == "activation":
                a = Activation(t)
                return a.activated_tensor
            
            elif layer_type == "pool":
                p = MaxPool(t)
                return p.pooled

            elif layer_type == "dense":
                d = Dense(int(layer[1]), t)
                return d.fpass
            
            elif layer_type == "output":
                d = Dense(int(layer[1]), t)
                return d.predict()
            else:
                print("fail")
Exemplo n.º 5
0
def test_single_initializer_with_convo(initializer):
    model = NeuralNetwork(
        optimizer=AdamOptimizer(
            learning_rate=default_parameters['learning_rate'] * 10),
        loss=CrossEntropy(),
        layers=[
            Convolution2D(num_of_filters=8,
                          kernel=(3, 3),
                          activation_func=ReLu()),
            MaxPooling2D(pool_size=(2, 2), stride=(2, 2)),
            Flatten(),
            Dense(layer_size=50,
                  activation_func=ReLu(),
                  weight_initializer=initializer),
            Dense(layer_size=10,
                  activation_func=Softmax(),
                  weight_initializer=initializer)
        ],
        callbacks=[
            LoggerCallback(),
            PlotCallback(f'./lab_3/initializers/{initializer.get_name()}')
        ])

    model.fit(x_train=X_train,
              y_train=y_train,
              x_val=X_val,
              y_val=y_val,
              epochs=default_parameters['epochs'],
              batch_size=default_parameters['batch_size'])

    model.test(X_test, y_test)
Exemplo n.º 6
0
def test_single_activation_function(activation):
    model = NeuralNetwork(
        optimizer=StaticGradientDescent(
            learning_rate=default_parameters['learning_rate']),
        loss=CrossEntropy(),
        layers=[
            Dense(layer_size=50,
                  activation_func=activation,
                  weight_initializer=XavierInitializer()),
            Dense(layer_size=10,
                  activation_func=Softmax(),
                  weight_initializer=XavierInitializer())
        ],
        callbacks=[
            LoggerCallback(),
            PlotCallback(f'./results/activations/{activation.get_name()}')
        ])

    model.fit(x_train=X_train,
              y_train=y_train,
              x_val=X_val,
              y_val=y_val,
              epochs=default_parameters['epochs'],
              batch_size=default_parameters['batch_size'])

    model.test(X_test, y_test)
Exemplo n.º 7
0
    def __init__(self,
                 rnn_units,
                 rnn_output_units,
                 rnn_output_activation,
                 mlp_units_exclude_first,
                 mlp_activation,
                 custom_params=None,
                 vf_share_layers=False,
                 use_linear_baseline=False):
        keras_models.Model.__init__(self, name="keras_tesp_policy")

        custom_params = custom_params or {}
        self.cell = GRUCell(
            units=rnn_units,
            custom_params=custom_params.get("cell"))
        self.dense_projection = Dense(
            units=rnn_output_units,
            custom_params=custom_params.get("projection"))
        self.mlp_policy = KerasMLP(
            layer_units_exclude_first=mlp_units_exclude_first,
            activation=mlp_activation,
            vf_share_layers=vf_share_layers,
            name="keras_mlp_policy")
        if not vf_share_layers and not use_linear_baseline:
            self.mlp_vf = KerasMLP(
                layer_units_exclude_first=mlp_units_exclude_first[:-1] + [1],
                activation=mlp_activation,
                vf_share_layers=False,
                name="keras_mlp_vf")
Exemplo n.º 8
0
    def get_variables(cls,
                      rnn_input_units,
                      rnn_units,
                      rnn_output_units,
                      projection_kernel_init_value,
                      # mlp_input_units,
                      # mlp_units,
                      # vf_share_layers=False
                      name=None):
        name = name or "keras_tesp_policy"
        variables = {}
        with tf.name_scope(name):
            variables["cell"] = GRUCell.get_variables(
                rnn_input_units, rnn_units)
            variables["projection"] = Dense.get_variables(
                rnn_units,
                rnn_output_units,
                name="dense_projection",
                kernel_initializer=normc_initializer(projection_kernel_init_value))
            # variables["mlp"] = KerasMLP.get_dummy_variables(
            #     [mlp_input_units + rnn_output_units] + mlp_units,
            #     vf_share_layers=vf_share_layers, **mlp_kwargs)

        # graph = tf.get_default_graph()
        # K.PER_GRAPH_LAYER_NAME_UIDS[graph][
        #     ("", dummy_instance.name)] -= 1
        return variables
Exemplo n.º 9
0
    def __init__(self,
                 layer_units_exclude_first,
                 activation,
                 custom_params=None,
                 vf_share_layers=False,
                 name=None):
        """
            layer_units: list, a list of the number of units of all layers
                except the input layer
        """
        name = name or "keras_mlp_policy"
        if "policy" in name:
            last_kernel_init_value = 0.01
        elif "vf" in name:
            last_kernel_init_value = 0.01
        else:
            raise NotImplementedError
        keras_models.Model.__init__(self, name=name)

        custom_params = custom_params or {}
        for i, size in enumerate(layer_units_exclude_first):
            name = f"dense_{i}"
            layer = Dense(
                size,
                custom_params=custom_params.get(name),
                activation=(activation if
                            i < len(layer_units_exclude_first) - 1 else None),
                kernel_initializer=(normc_initializer(1.0) if
                                    i < len(layer_units_exclude_first) - 1 else
                                    normc_initializer(last_kernel_init_value)),
                name=name)
            setattr(self, name, layer)
        if vf_share_layers:
            name = f"dense_vf"
            layer = Dense(1,
                          custom_params=custom_params.get(name),
                          activation=None,
                          kernel_initializer=normc_initializer(1.0),
                          name=name)
            setattr(self, name, layer)
        self._vf_share_layers = vf_share_layers
Exemplo n.º 10
0
def make_cnn(input_dim, num_of_classes):
    conv1 = Convolution(input_dim=input_dim,
                        pad=2,
                        stride=2,
                        num_filters=10,
                        filter_size=3,
                        seed=1)
    relu1 = Relu()
    maxpool1 = Maxpool(input_dim=conv1.output_dim, filter_size=2, stride=1)
    flatten = Flatten(seed=1)
    dense1 = Dense(input_dim=np.prod(maxpool1.output_dim),
                   output_dim=num_of_classes,
                   seed=1)

    layers = [conv1, relu1, maxpool1, flatten, dense1]
    return layers
Exemplo n.º 11
0
import numpy as np
from core.model import Model
from layers.input import Input
from layers.dense import Dense
from util.cost_functions import L2

if __name__ == '__main__':
    # demo MLP
    data_x = np.array([1, 2])
    data_y = np.array([0.2, 0.4])

    train_x = np.reshape(data_x, (len(data_x), 1, 1))
    train_y = np.reshape(data_y, (len(data_y), 1, 1))

    model = Model()
    model.add(Input(1))
    model.add(Dense(3))
    model.add(Dense(1))
    model.compile(cost=L2(),
                  optimizer='sgd',
                  num_epochs=30000,
                  batch_size=1,
                  lr=0.1)
    model.train(train_x, train_y)

    test_data_x = np.array([1])
    test_x = np.reshape(test_data_x, (len(test_data_x), 1))
    print model.predict(test_x)
Exemplo n.º 12
0
 #         Flatten(),
 #         Dense(layer_size=50, activation_func=ReLu(), weight_initializer=HeInitializer()),
 #         Dense(layer_size=10, activation_func=Softmax(), weight_initializer=HeInitializer())
 #     ]
 # },
 {
     'test_name':
     'normal_C3x3-F2_MP2x2_F_D50_D10',
     'layers': [
         Convolution2D(num_of_filters=2,
                       kernel=(3, 3),
                       activation_func=ReLu()),
         MaxPooling2D(pool_size=(2, 2), stride=(2, 2)),
         Flatten(),
         Dense(layer_size=50,
               activation_func=ReLu(),
               weight_initializer=HeInitializer()),
         Dense(layer_size=10,
               activation_func=Softmax(),
               weight_initializer=HeInitializer())
     ]
 },
 {
     'test_name':
     'normal_C3x3-F4_MP2x2_F_D50_D10',
     'layers': [
         Convolution2D(num_of_filters=4,
                       kernel=(3, 3),
                       activation_func=ReLu()),
         MaxPooling2D(pool_size=(2, 2), stride=(2, 2)),
         Flatten(),
Exemplo n.º 13
0
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import numpy as np
from layers.activation import Activation
from layers.dense import Dense
from model.model import Model

X_train = np.array([[[0, 0]], [[0, 1]], [[1, 0]], [[1, 1]]])
y_train = np.array([
    [[0]],  # F
    [[1]],  # M
    [[1]],  # M
    [[0]]  # F
])

model = Model()

model.add(Dense(2, 2))
model.add(Activation('tanh'))
model.add(Dense(2, 2))
model.add(Activation('tanh'))
model.add(Activation('softmax'))

model.fit(X_train, y_train, epochs=1000, learning_rate=0.1)

frank = np.array(X_train)  # 155 pounts, 68 inches

pred = model.predict(frank)

print(np.array(pred))
Exemplo n.º 14
0
    def build_model(self,
                    in_dim,
                    h_dim,
                    out_dim,
                    model=None,
                    weight_decay=0.01,
                    optimizer='adadelta'):

        x_v = T.tensor3('x_v', dtype=data_type)  # batch_size x dim
        x_s = T.tensor3('x_s', dtype=data_type)  # Nt-1 x batch_size x dim
        y = T.tensor3('y', dtype=data_type)  # Nt x batch_size x dim
        mask_v = T.matrix(
            'mask_v', dtype=data_type)  # Nt x batch_size mask_sent mask_video
        mask_s = T.matrix('mask_s', dtype=data_type)
        deterministic = T.scalar('deterministic', dtype=data_type)
        one = T.constant(1, dtype=data_type)
        zero = T.constant(0, dtype=data_type)
        lr = T.scalar('lr', dtype=data_type)
        mask_gen = T.matrix('mask_gen', dtype=data_type)
        mb_BOS = T.vector('mb_BOS', dtype=data_type)
        #maxlen = T.scalar('ml',dtype='int64')
        # layers
        l_lstm_f = LSTM(4096, h_dim)
        l_lstm_b = LSTM(4096, h_dim)
        l_lstm_v = LSTM_SA(h_dim * 2, h_dim, 4096, h_dim * 2)
        l_word_em = Embedding(out_dim, in_dim)
        l_lstm_t = LSTM_SA(in_dim, h_dim, h_dim, h_dim)
        l_map = Dense(h_dim, out_dim)
        layers = [l_lstm_f, l_lstm_b, l_lstm_v, l_word_em, l_lstm_t, l_map]

        # drop_layers
        l_drop_xv = drop(0.2, deterministic=deterministic)
        l_drop_em = drop(0.2, deterministic=deterministic)
        l_drop_t = drop(0.5, deterministic=deterministic)
        l_drop_f = drop(0.5, deterministic=deterministic)
        l_drop_b = drop(0.5, deterministic=deterministic)
        x_v = l_drop_xv.get_outputs(x_v)

        # forward pass
        out_lstm_f, _ = l_lstm_f.get_outputs(x_v, mask_v)
        out_lstm_f = l_drop_f.get_outputs(out_lstm_f)
        out_lstm_b, _ = l_lstm_b.get_outputs(x_v[::-1], mask_v[::-1])
        out_lstm_b = l_drop_b.get_outputs(out_lstm_b)
        in_lstm_v = T.concatenate([out_lstm_f, out_lstm_b[::-1]], axis=2)
        out_lstm_v, c_v = l_lstm_v.get_outputs(in_lstm_v,
                                               context=x_v,
                                               mask_x=mask_v,
                                               mask_c=mask_v)
        out_word_em = l_word_em.get_outputs(x_s)
        out_word_em = l_drop_em.get_outputs(out_word_em)
        out_lstm_t, _ = l_lstm_t.get_outputs(out_word_em,
                                             mask_x=mask_s,
                                             context=out_lstm_v,
                                             mask_c=mask_v,
                                             h0=out_lstm_v[-1],
                                             c0=c_v[-1])
        out_lstm_t = l_drop_t.get_outputs(out_lstm_t)
        out_map = l_map.get_outputs(out_lstm_t)

        pred, _ = theano.scan(NN.softmax, sequences=out_map)

        # cost caculating
        cost_o, _ = theano.scan(NN.categorical_crossentropy,
                                sequences=[pred, y])
        cost_o = cost_o * mask_s
        cost_o = cost_o.sum() / mask_s.sum()
        params_re = []
        for l in layers:
            params_re += l.regulariable
        cost_w = 0.5 * weight_decay * l2(params_re) / mask_s.sum()
        cost = cost_o + cost_w

        self.params = l_lstm_f.params + l_lstm_b.params + l_lstm_v.params + l_word_em.params + l_lstm_t.params + l_map.params

        p_ctx = T.dot(out_lstm_v, l_lstm_t.Wpc) + l_lstm_t.bc

        def _step(x_t, h_tm1, c_tm1, p_ctx, context):
            m_t = T.ones_like(x_t).astype(data_type)
            o_em_t = l_word_em.W[x_t.astype('int64')]
            h_t_t, c_t_t = l_lstm_t._step(m_t, o_em_t, h_tm1, c_tm1, p_ctx,
                                          context, mask_v)
            o_map_t = l_map._step(h_t_t)
            prob_p = NN.softmax(o_map_t)
            word_t = T.argmax(prob_p, axis=1).astype(
                data_type)  # return an integer, the index of max value
            return word_t, h_t_t, c_t_t

        [words_idx, out_val_t,
         c_t], _ = theano.scan(_step,
                               outputs_info=[
                                   dict(initial=mb_BOS),
                                   dict(initial=out_lstm_v[-1]),
                                   dict(initial=c_v[-1])
                               ],
                               non_sequences=[p_ctx, out_lstm_v],
                               n_steps=self.t_maxlen)

        val_model = theano.function(inputs=[x_v, mask_v, mb_BOS],
                                    givens={deterministic: one},
                                    outputs=words_idx)

        self.optimizer = optimizers.get(optimizer)
        grads = self.optimizer.get_gradients(cost, self.params)
        updates = self.optimizer.get_updates(cost, self.params)

        train_model = theano.function(inputs=[x_v, x_s, y, mask_v, mask_s],
                                      outputs=[cost, cost_o],
                                      updates=updates,
                                      givens={deterministic: zero})

        return train_model, val_model