def test_gradient_forked_architecture(self): check_errors = [] in_layer = InputLayer(self.input_size) out_layer = ForwardLayer(self.output_size) in_layer >> ForwardLayer(3, name='A') >> out_layer in_layer >> ForwardLayer(2, name='B') >> out_layer net = build_net(out_layer) net.initialize(Gaussian(0.1)) e, grad_calc, grad_approx = check_gradient(net, n_batches=5, n_timesteps=7, rnd=rnd) check_errors.append(e) if e > 1e-4: # construct a weight view and break down the differences layer = net.layers.values()[1] # the only layer b = Matrix(grad_approx - grad_calc) diff = layer.create_param_view(b) for n, q in diff.items(): print("====== %s ======" % n) print(q) print("Checking Gradient of forked architecture = %0.4f" % e) self.assertTrue(np.all(np.array(check_errors) < 1e-4))
def test_gradient_forked_architecture(self): check_errors = [] in_layer = InputLayer(self.input_size) out_layer = ForwardLayer(self.output_size) in_layer >> ForwardLayer(3, name='A') >> out_layer in_layer >> ForwardLayer(2, name='B') >> out_layer net = build_net(out_layer) net.initialize(Gaussian(0.1)) e, grad_calc, grad_approx = check_gradient(net, n_batches=5, n_timesteps=7, rnd=rnd) check_errors.append(e) if e > 1e-4: # construct a weight view and break down the differences layer = net.layers.values()[1] # the only layer b = Matrix(grad_approx - grad_calc) diff = layer.create_param_view(b) for n, q in diff.items(): print("====== %s ======" % n) print(q) print("Checking Gradient of forked architecture = %0.4f" % e) self.assertTrue(np.all(np.array(check_errors) < 1e-4))
def build_network(self, layer_type, activation_function, layers=1): prev_layer = InputLayer(self.input_size) for l in range(layers): prev_layer = prev_layer >> layer_type(self.output_size, act_func=activation_function) net = build_net(prev_layer) net.initialize(Gaussian(std=0.1)) return net
def build_lwta_network(self, input_size, activation_function, block_sizes=[1, 2, 4, 8]): prev_layer = InputLayer(input_size) for l in range(len(block_sizes)): prev_layer = prev_layer >> ForwardLayer(input_size, act_func=activation_function) prev_layer = prev_layer >> LWTALayer(block_size=block_sizes[l]) net = build_net(prev_layer) net.initialize(Gaussian(std=0.1)) return net
def build_network(self, layer_type, activation_function, layers=1): prev_layer = InputLayer(self.input_size) for l in range(layers): prev_layer = prev_layer >> layer_type(self.output_size, act_func=activation_function) net = build_net(prev_layer) net.initialize(Gaussian(std=0.1)) return net
def build_gatedlayer_network(self, input_size, activation_function): input_layer = InputLayer(input_size) hidden_layer = GatedLayer(input_size, act_func=activation_function) out_layer = GatedLayer(input_size, act_func=activation_function) input_layer >> hidden_layer >> out_layer net = build_net(input_layer) net.initialize(Gaussian(std=0.1)) return net
def build_gatedlayer_network(self, input_size, activation_function): input_layer = InputLayer(input_size) hidden_layer = GatedLayer(input_size, act_func=activation_function) out_layer = GatedLayer(input_size, act_func=activation_function) input_layer >> hidden_layer >> out_layer net = build_net(input_layer) net.initialize(Gaussian(std=0.1)) return net
def build_network(self, layer_type, activation_function, layers=1): prev_layer = InputLayer(self.input_size) prev_layer = prev_layer >> DropoutLayer(dropout_prob=0.2) for l in range(layers): prev_layer = prev_layer >> layer_type(self.output_size, act_func=activation_function) prev_layer = prev_layer >> DropoutLayer(dropout_prob=0.5) prev_layer = prev_layer >> ForwardLayer(self.output_size, act_func="softmax") net = build_net(prev_layer) net.initialize(Gaussian(std=0.1)) return net
def build_staticlstm_network(self, input_size, activation_function): input_layer = InputLayer(input_size) hidden_layer = StaticLstmLayer(2, act_func=activation_function) out_layer = StaticLstmLayer(2, act_func=activation_function) input_layer >> NoOpLayer() >> ZeroLayer(1, name='0') >> hidden_layer input_layer >> hidden_layer >> out_layer net = build_net(input_layer) net.initialize(Gaussian(std=0.1)) return net
def build_staticlstm_network(self, input_size, activation_function): input_layer = InputLayer(input_size) hidden_layer = StaticLstmLayer(2, act_func=activation_function) out_layer = StaticLstmLayer(2, act_func=activation_function) input_layer >> NoOpLayer() >> ZeroLayer(1, name='0') >> hidden_layer input_layer >> hidden_layer >> out_layer net = build_net(input_layer) net.initialize(Gaussian(std=0.1)) return net
def build_lwta_network(self, input_size, activation_function, block_sizes=[1, 2, 4, 8]): prev_layer = InputLayer(input_size) for l in range(len(block_sizes)): prev_layer = prev_layer >> ForwardLayer( input_size, act_func=activation_function) prev_layer = prev_layer >> LWTALayer(block_size=block_sizes[l]) net = build_net(prev_layer) net.initialize(Gaussian(std=0.1)) return net
def build_network(self, lstm_configuration): lstm_config = { 'act_func': 'linear', 'input_gate': True, 'output_gate': True, 'forget_gate': True, 'peephole_connections': True, 'gate_recurrence': False, 'use_bias': True, 'full_gradient': True, 'in_act_func': 'tanh', 'coupled_if_gate': False } lstm_config.update(lstm_configuration) net = build_net(InputLayer(self.input_size) >> Lstm97Layer(self.output_size, **lstm_config)) net.initialize(Gaussian(std=0.1)) #net.initialize(1) return net
def test_dropout_mask_application(self): net = build_net(InputLayer(self.input_size) >> DropoutLayer()) output = net.forward_pass(self.X, training_pass=True) mask = net.get_fwd_state_for('DropoutLayer')['Mask'][1:, :, :] self.assertTrue(np.all(output[mask == 0] == 0))
def setUp(self): self.size = 3 self.batches = 1 self.time_slices = 5 self.net = build_net(InputLayer(self.size) >> ReverseLayer())