def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels, reg): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.reg = reg self.conv1 = ConvolutionalLayer(in_channels=input_shape[-1], out_channels=conv1_channels, filter_size=3, padding=1) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(pool_size=4, stride=4) self.conv2 = ConvolutionalLayer(in_channels=conv1_channels, out_channels=conv2_channels, filter_size=3, padding=1) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(pool_size=4, stride=4) self.flattener = Flattener() ## n_input = 4*conv2_channels - hard coding here, because of constant picture size 32 32 3 self.fullyconlayer = FullyConnectedLayer(n_input=4 * conv2_channels, n_output=n_output_classes) self.W_fc_layer = None self.B_fc_layer = None self.W_con1_layer = None self.B_con1_layer = None self.W_con2_layer = None self.B_con2_layer = None
def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ filter_size = 3 padding = 1 pool_size = 4 stride = 4 width, height, n_channels = input_shape assert ((height + 2 * padding - filter_size + 1) % pool_size == 0) assert ((width + 2 * padding - filter_size + 1) % pool_size == 0) height = (height + 2 * padding - filter_size + 1) // pool_size width = (width + 2 * padding - filter_size + 1) // pool_size assert ((height + 2 * padding - filter_size + 1) % pool_size == 0) assert ((width + 2 * padding - filter_size + 1) % pool_size == 0) height = (height + 2 * padding - filter_size + 1) // pool_size width = (width + 2 * padding - filter_size + 1) // pool_size # TODO Create necessary layers self.Conv_1 = ConvolutionalLayer(n_channels, conv1_channels, filter_size, padding) self.Relu_1 = ReLULayer() self.Maxpool_1 = MaxPoolingLayer(pool_size, stride) self.Conv_2 = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size, padding) self.Relu_2 = ReLULayer() self.Maxpool_2 = MaxPoolingLayer(pool_size, stride) self.Flattener = Flattener() self.FC = FullyConnectedLayer(height * width * conv2_channels, n_output_classes)
def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers width, height, n_input_channels = input_shape kernel_size = 3 padding = 1 conv_stride = 1 pooling_stride = 4 filter_size = 4 conv1_output = (width - kernel_size + 2 * padding) / conv_stride + 1 pooling1_output = (conv1_output - filter_size) / pooling_stride + 1 conv2_output = (pooling1_output - kernel_size + 2 * padding) / conv_stride + 1 pooling2_output = (conv2_output - filter_size) / pooling_stride + 1 fc_input = int(pooling2_output * pooling2_output * conv2_channels) self.Sequential = [ ConvolutionalLayer(n_input_channels, conv1_channels, kernel_size, padding), ReLULayer(), MaxPoolingLayer(filter_size, pooling_stride), ConvolutionalLayer(conv1_channels, conv2_channels, kernel_size, padding), ReLULayer(), MaxPoolingLayer(filter_size, pooling_stride), Flattener(), FullyConnectedLayer(fc_input, n_output_classes) ]
def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers self.layers = [] image_width, image_height, n_channels = input_shape filter_size = 3 pool_size = 4 stride = pool_size padding = 1 fc_input = (image_height // (pool_size**2)) * (image_width // (pool_size**2)) * conv2_channels self.layers.append( ConvolutionalLayer(n_channels, conv1_channels, filter_size, padding)) self.layers.append(ReLULayer()) self.layers.append(MaxPoolingLayer(pool_size, stride)) self.layers.append( ConvolutionalLayer(conv1_channels, conv2_channels, filter_size, padding)) self.layers.append(ReLULayer()) self.layers.append(MaxPoolingLayer(pool_size, stride)) self.layers.append(Flattener()) self.layers.append(FullyConnectedLayer(fc_input, n_output_classes))
def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers image_width, image_height, n_channels = input_shape conv_padding = 0 conv_filter_size = 3 max_pool_size = 4 max_pool_stride = 1 conv1_output_size = image_width - conv_filter_size + 1 maxpool1_output_size = int( (conv1_output_size - max_pool_size) / max_pool_stride) + 1 conv2_output_size = maxpool1_output_size - conv_filter_size + 1 maxpool2_output_size = int( (conv2_output_size - max_pool_size) / max_pool_stride) + 1 # correct if height == width !!! fc_input_size = maxpool2_output_size * maxpool2_output_size * conv2_channels self.conv1 = ConvolutionalLayer(n_channels, conv1_channels, conv_filter_size, conv_padding) self.relu1 = ReLULayer() self.maxpool1 = MaxPoolingLayer(max_pool_size, max_pool_stride) self.conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, conv_filter_size, conv_padding) self.relu2 = ReLULayer() self.maxpool2 = MaxPoolingLayer(max_pool_size, max_pool_stride) self.flattener = Flattener() self.fc = FullyConnectedLayer(fc_input_size, n_output_classes)
def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels, reg=0): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.reg = reg # TODO Create necessary layers assert input_shape[0] % 4 == 0 & input_shape[1] % 4 == 0, "Invalid input_shape value" self.layers = [ConvolutionalLayer(input_shape[2], conv1_channels, 3, 0), ReLULayer(), MaxPoolingLayer(4, 4), ConvolutionalLayer(conv1_channels, conv2_channels, 3, 0), ReLULayer(), MaxPoolingLayer(4, 4), Flattener(), FullyConnectedLayer(4 * conv2_channels, n_output_classes) ]
def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ # TODO Create necessary layers self.out_classes = n_output_classes image_width, image_height, in_channels = input_shape self.Conv1 = ConvolutionalLayer(in_channels, conv1_channels, 3, 1) self.ReLU1 = ReLULayer() self.MaxPool1 = MaxPoolingLayer(4, 4) self.Conv2 = ConvolutionalLayer(conv1_channels, conv2_channels, 3, 1) self.ReLU2 = ReLULayer() self.MaxPool2 = MaxPoolingLayer(4, 4) self.Flatten = Flattener() self.FC = FullyConnectedLayer(4 * conv2_channels, n_output_classes)
def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: :param input_shape, tuple of 3 ints - image_width, image_height, n_channels, wll be equal to (32, 32, 3) :param n_output_classes, int - number of classes to predict :param conv1_channels, int - number of filters in the 1st conv layer :param conv2_channels, int - number of filters in the 2nd conv layer """ image_width, image_height, image_channels = input_shape maxpool1_size = 4 maxpool2_size = 4 flattener_width = int(image_width / (maxpool1_size * maxpool2_size)) flattener_height = int(image_width / (maxpool1_size * maxpool2_size)) self.layers = [ ConvolutionalLayer(in_channels=image_channels, out_channels=conv1_channels, filter_size=3, padding=1), ReLULayer(), MaxPoolingLayer(maxpool1_size, maxpool1_size), ConvolutionalLayer(in_channels=conv1_channels, out_channels=conv2_channels, filter_size=3, padding=1), ReLULayer(), MaxPoolingLayer(maxpool2_size, maxpool2_size), Flattener(), FullyConnectedLayer( flattener_width * flattener_height * conv2_channels, n_output_classes) ]
def __init__(self, num_input, num_cells=50, num_output=1, lr=0.01, rho=0.95): X = T.matrix('x') Y = T.matrix('y') eta = T.scalar('eta') alpha = T.scalar('alpha') self.num_input = num_input self.num_output = num_output self.num_cells = num_cells self.eta = eta inputs = InputLayer(X, name="inputs") lstm = LSTMLayer(num_input, num_cells, input_layer=inputs, name="lstm") fc = FullyConnectedLayer(num_cells, num_output, input_layer=lstm) Y_hat = T.mean(fc.output(), axis=2) layer = inputs, lstm, fc self.params = get_params(layer) self.caches = make_caches(self.params) self.layers = layer mean_cost = T.mean((Y - Y_hat)**2) last_cost = T.mean((Y[-1] - Y_hat[-1])**2) self.cost = alpha * mean_cost + (1 - alpha) * last_cost """" self.updates = momentum(self.cost, self.params, self.caches, self.eta, clip_at=3.0) """ self.updates, _, _, _, _ = create_optimization_updates( self.cost, self.params, method="adadelta", lr=lr, rho=rho) self.train = theano.function([X, Y, alpha], [self.cost, last_cost] ,\ updates=self.updates, allow_input_downcast=True) self.costfn = theano.function([X, Y, alpha], [self.cost, last_cost],\ allow_input_downcast=True) self.predict = theano.function([X], [Y_hat], allow_input_downcast=True)
def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.layers = [ ConvolutionalLayer(in_channels=input_shape[2], out_channels=conv1_channels, filter_size=3, padding=1), ReLULayer(), MaxPoolingLayer(pool_size=4, stride=4), ConvolutionalLayer(in_channels=conv1_channels, out_channels=conv2_channels, filter_size=3, padding=1), ReLULayer(), MaxPoolingLayer(pool_size=4, stride=4), Flattener(), FullyConnectedLayer( n_input=(input_shape[0] * input_shape[1]) // (4 ** 4) * conv2_channels, n_output=n_output_classes ), ]
def __init__(self, layers, decay=0.001, learning_rate=0.01): mapping = { "input": lambda x: InputLayer(x), "fc": lambda x: FullyConnectedLayer(x), "convolution": lambda x: ConvolutionLayer(x), "pool": lambda x: PoolingLayer(x), "squaredloss": lambda x: SquaredLossLayer(x), "softmax": lambda x: SoftmaxLossLayer(x), "relu": lambda x: ReLuLayer(x), } self.layers = [] self.decay = decay self.learning_rate = learning_rate prev_layer = None for layer in layers: layer["input_shape"] = layer.get("input_shape", None) or prev_layer.output_shape layer["decay"] = self.decay layer = mapping[layer["type"]](layer) self.layers.append(layer) prev_layer = layer
def __init__(self, n_input, n_output, conv1_size, conv2_size, reg): """ Initializes the neural network Arguments: n_input, int - dimension of the model input n_output, int - number of classes to predict conv1_size, int - number of filters in the 1st conv layer conv2_size, int - number of filters in the 2nd conv layer reg, float - L2 regularization strength """ self.reg = reg # TODO Create necessary layers self.layers = [ ConvolutionalLayer(3, conv1_size, 3, 1), ReLULayer(), MaxPoolingLayer(4, 4), ConvolutionalLayer(conv1_size, conv2_size, 3, 1), ReLULayer(), MaxPoolingLayer(4, 4), Flattener(), FullyConnectedLayer( math.floor(n_input / 16)**2 * conv2_size, n_output) ]
def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ self.conv1_layer = (ConvolutionalLayer(input_shape[2], conv1_channels, 3, 0), ReLULayer(), MaxPoolingLayer(4, 1)) self.conv2_layer = (ConvolutionalLayer(conv1_channels, conv2_channels, 3, 0), ReLULayer(), MaxPoolingLayer(4, 1)) final_shape = (input_shape[0] - 3 + 1 - 4 + 1 - 3 + 1 - 4 + 1, input_shape[1] - 3 + 1 - 4 + 1 - 3 + 1 - 4 + 1, conv2_channels) self.output_layer = (Flattener(), FullyConnectedLayer(np.prod(final_shape), n_output_classes))
#test_x, test_y = test_data i = T.lscalar() # mini-batch index self.test_mb_predictions = theano.function([i], self.layers[-1].y_out, givens={self.x: observation}, on_unused_input='warn') return self.test_mb_predictions(0) #Initialize network layers = [ FullyConnectedLayer(n_in=4, n_out=10), FullyConnectedLayer(n_in=10, n_out=10), SoftmaxLayer(n_in=10, n_out=2) ] params = [param for layer in layers for param in layer.params] iterations = mini_batch_size x = T.vector("x") y = T.ivector("y") init_layer = layers[0] init_layer.set_inpt(x, 1) for j in xrange(1, len(layers)): prev_layer, layer = layers[j - 1], layers[j] layer.set_inpt(prev_layer.output, 1)
def __init__(self, input_shape, n_output_classes, conv1_channels, conv2_channels, reg=0): """ Initializes the neural network Arguments: input_shape, tuple of 3 ints - image_width, image_height, n_channels Will be equal to (32, 32, 3) n_output_classes, int - number of classes to predict conv1_channels, int - number of filters in the 1st conv layer conv2_channels, int - number of filters in the 2nd conv layer """ image_width, image_height, n_channels = input_shape padding_1 = 1 padding_2 = 1 filter_size_1 = 3 filter_size_2 = 3 pooling_size_1 = 4 pooling_size_2 = 4 stride_1 = 4 stride_2 = 4 height = image_height + 2 * padding_1 width = image_width + 2 * padding_1 out_height = height - filter_size_1 + 1 out_width = width - filter_size_1 + 1 #print(height, width, filter_size_1, out_height, out_width); assert (out_height - pooling_size_1) % stride_1 == 0 assert (out_width - pooling_size_1) % stride_1 == 0 height = out_height width = out_width out_height = int((height - pooling_size_1) / stride_1 + 1) out_width = int((width - pooling_size_1) / stride_1 + 1) #print(height, width, pooling_size_1, out_height, out_width); height = out_height + 2 * padding_2 width = out_width + 2 * padding_2 out_height = height - filter_size_2 + 1 out_width = width - filter_size_2 + 1 #print(height, width, filter_size_2, out_height, out_width); assert (out_height - pooling_size_2) % stride_2 == 0 assert (out_width - pooling_size_2) % stride_2 == 0 height = out_height width = out_width out_height = int((height - pooling_size_2) / stride_2 + 1) out_width = int((width - pooling_size_2) / stride_2 + 1) #print(height, width, pooling_size_2, out_height, out_width); # TODO Create necessary layers self.Conv_first = ConvolutionalLayer(n_channels, conv1_channels, filter_size_1, padding_1) self.Relu_first = ReLULayer() self.Maxpool_first = MaxPoolingLayer(pooling_size_1, stride_1) self.Conv_second = ConvolutionalLayer(conv1_channels, conv2_channels, filter_size_2, padding_2) self.Relu_second = ReLULayer() self.Maxpool_second = MaxPoolingLayer(pooling_size_2, stride_2) self.Flattener = Flattener() self.FC = FullyConnectedLayer(out_height * out_width * conv2_channels, n_output_classes) self.n_output = n_output_classes self.reg = reg
from layers import ReLULayer X = np.array([[1, -2, 3], [-1, 2, 0.1]]) assert check_layer_gradient(ReLULayer(), X) #%% [markdown] # А теперь реализуем полносвязный слой (fully connected layer), у которого будет два массива параметров: W (weights) и B (bias). # # Все параметры наши слои будут использовать для параметров специальный класс `Param`, в котором будут храниться значения параметров и градиенты этих параметров, вычисляемые во время обратного прохода. # # Это даст возможность аккумулировать (суммировать) градиенты из разных частей функции потерь, например, из cross-entropy loss и regularization loss. #%% from layers import FullyConnectedLayer assert check_layer_gradient(FullyConnectedLayer(3, 4), X) assert check_layer_param_gradient(FullyConnectedLayer(3, 4), X, 'W') #%% [markdown] # ## Создаем нейронную сеть # # Теперь мы реализуем простейшую нейронную сеть с двумя полносвязным слоями и нелинейностью ReLU. Реализуйте функцию `compute_loss_and_gradients`, она должна запустить прямой и обратный проход через оба слоя для вычисления градиентов. # # Не забудьте реализовать очистку градиентов в начале функции. #%% from model import TwoLayerNet model = TwoLayerNet(n_input=train_X.shape[1], n_output=10, hidden_layer_size=3,
from network import Network from layers import FullyConnectedLayer import numpy as np import pandas as pd data = pd.read_csv("SoccerData.txt", header=None, sep='\t') ground_truth = data.iloc[:, 8:] data = data.drop(data.columns[[8, 9]], axis=1) layers = [] layers.append(FullyConnectedLayer("relu", 8, 16)) layers.append(FullyConnectedLayer("relu", 16, 32)) layers.append(FullyConnectedLayer("relu", 32, 64)) layers.append(FullyConnectedLayer("relu", 64, 32)) layers.append(FullyConnectedLayer("relu", 32, 16)) layers.append(FullyConnectedLayer("relu", 16, 8)) layers.append(FullyConnectedLayer("sigmoid", 8, 2)) network = Network("Test1", layers, 0.01) network.load() print( network.loss(network.feed_forward(data.to_numpy()), ground_truth.to_numpy())) network.train(data.to_numpy(), ground_truth.to_numpy(), 5000)
def __init__(self, rng, batchsize=100, activation=tanh): import load (num_sent, word_cnt, max_sen_len, k_wrd, x_wrd, y) \ = load.read("tweets_clean.txt") dim_word = 100 cl_word = 300 k_wrd = 5 vocab_size = word_cnt n_hidden = 300 data_train,\ data_test,\ target_train,\ target_test\ = train_test_split(x_wrd, y, random_state=1234, test_size=0.1) x_train = theano.shared(np.asarray(data_train, dtype='int16'), borrow=True) y_train = theano.shared(np.asarray(target_train, dtype='int32'), borrow=True) x_test = theano.shared(np.asarray(data_test, dtype='int16'), borrow=True) y_test = theano.shared(np.asarray(target_test, dtype='int32'), borrow=True) self.n_train_batches = x_train.get_value( borrow=True).shape[0] / batchsize self.n_test_batches = x_test.get_value( borrow=True).shape[0] / batchsize """symbol definition""" index = T.iscalar() x = T.wmatrix('x') y = T.ivector('y') train = T.iscalar('train') layer_embed_input = x #.reshape((batchsize, max_sen_len)) layer_embed = EmbedIDLayer( rng, layer_embed_input, n_input=vocab_size, n_output=dim_word, ) layer1_input = layer_embed.output.reshape( (batchsize, 1, max_sen_len, dim_word)) layer1 = ConvolutionalLayer( rng, layer1_input, filter_shape=(cl_word, 1, k_wrd, dim_word), #1は入力チャネル数 image_shape=(batchsize, 1, max_sen_len, dim_word), activation=activation) layer2 = MaxPoolingLayer(layer1.output, poolsize=(max_sen_len - k_wrd + 1, 1)) layer3_input = layer2.output.reshape((batchsize, cl_word)) layer3 = FullyConnectedLayer(rng, dropout(rng, layer3_input, train), n_input=cl_word, n_output=n_hidden, activation=activation) layer4 = FullyConnectedLayer(rng, dropout(rng, layer3.output, train), n_input=n_hidden, n_output=2, activation=None) result = Result(layer4.output, y) # loss = result.negative_log_likelihood() loss = result.cross_entropy() accuracy = result.accuracy() params = layer4.params + layer3.params + layer1.params + layer_embed.params # updates = AdaDelta(params=params).updates(loss) updates = RMSprop(learning_rate=0.001, params=params).updates(loss) self.train_model = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ x: x_train[index * batchsize:(index + 1) * batchsize], y: y_train[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](1) }) self.test_model = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ x: x_test[index * batchsize:(index + 1) * batchsize], y: y_test[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](0) })
import skimage.measure import pickle from readlabel import read_image from network import Network from layers import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer, ReLU, Sigmoid whole_data = read_image(path1 = 'test_images/', path2 = './test_annotation', data_size = 1050) whole_x = whole_data[0] mean = whole_x.mean(axis=0) std = whole_x.std(axis=0) whole_x = (whole_x - mean) / std whole_y = whole_data[1] test_x = whole_x test_y = whole_y test_data = [test_x, test_y] mini_batch_size = 1 # final net = Network([ConvPoolLayer(filter_shape=(5, 5, 3, 9), image_shape=(mini_batch_size, 64, 64, 3), poolsize=2, activation_fn=ReLU), ConvPoolLayer(filter_shape=(5, 5, 9, 18), image_shape=(mini_batch_size, 30, 30, 9), poolsize=2, activation_fn=ReLU), ConvPoolLayer(filter_shape=(4, 4, 18, 36), image_shape=(mini_batch_size, 13, 13, 18), poolsize=2, activation_fn=ReLU), FullyConnectedLayer(n_in=900, n_out=225, activation_fn=ReLU), FullyConnectedLayer(n_in=225, n_out=50, activation_fn=ReLU), SoftmaxLayer(n_in=50, n_out=20, activation_fn=None)], mini_batch_size) print('start') net.load_test(mini_batch_size, test_data, path='./finalparams_noact.pickle')
def __init__( self, rng, batchsize=100, activation=relu ): import char_load (num_sent, char_cnt, word_cnt, max_word_len, max_sen_len, \ k_chr, k_wrd, x_chr, x_wrd, y) = char_load.read("tweets_clean.txt") dim_word = 30 dim_char = 5 cl_word = 300 cl_char = 50 k_word = k_wrd k_char = k_chr data_train_word, \ data_test_word, \ data_train_char, \ data_test_char, \ target_train, \ target_test \ = train_test_split(x_wrd, x_chr, y, random_state=1234, test_size=0.1) x_train_word = theano.shared(np.asarray(data_train_word, dtype='int16'), borrow=True) x_train_char = theano.shared(np.asarray(data_train_char, dtype='int16'), borrow=True) y_train = theano.shared(np.asarray(target_train, dtype='int8'), borrow=True) x_test_word = theano.shared(np.asarray(data_test_word, dtype='int16'), borrow=True) x_test_char = theano.shared(np.asarray(data_test_char, dtype='int16'), borrow=True) y_test = theano.shared(np.asarray(target_test, dtype='int8'), borrow=True) self.n_train_batches = x_train_word.get_value(borrow=True).shape[0] / batchsize self.n_test_batches = x_test_word.get_value(borrow=True).shape[0] / batchsize """symbol definition""" index = T.iscalar() x_wrd = T.wmatrix('x_wrd') x_chr = T.wtensor3('x_chr') y = T.bvector('y') train = T.iscalar('train') """network definition""" layer_char_embed_input = x_chr # .reshape((batchsize, max_sen_len, max_word_len)) layer_char_embed = EmbedIDLayer( rng, layer_char_embed_input, n_input=char_cnt, n_output=dim_char ) layer1_input = layer_char_embed.output.reshape( (batchsize * max_sen_len, 1, max_word_len, dim_char) ) layer1 = ConvolutionalLayer( rng, layer1_input, filter_shape=(cl_char, 1, k_char, dim_char), # cl_charフィルタ数 image_shape=(batchsize * max_sen_len, 1, max_word_len, dim_char) ) layer2 = MaxPoolingLayer( layer1.output, poolsize=(max_word_len - k_char + 1, 1) ) layer_word_embed_input = x_wrd # .reshape((batchsize, max_sen_len)) layer_word_embed = EmbedIDLayer( rng, layer_word_embed_input, n_input=word_cnt, n_output=dim_word ) layer3_word_input = layer_word_embed.output.reshape((batchsize, 1, max_sen_len, dim_word)) layer3_char_input = layer2.output.reshape((batchsize, 1, max_sen_len, cl_char)) layer3_input = T.concatenate( [layer3_word_input, layer3_char_input], axis=3 ) # .reshape((batchsize, 1, max_sen_len, dim_word+cl_char)) layer3 = ConvolutionalLayer( rng, layer3_input, filter_shape=(cl_word, 1, k_word, dim_word + cl_char), # 1は入力チャネル数 image_shape=(batchsize, 1, max_sen_len, dim_word + cl_char), activation=activation ) layer4 = MaxPoolingLayer( layer3.output, poolsize=(max_sen_len - k_word + 1, 1) ) layer5_input = layer4.output.reshape((batchsize, cl_word)) layer5 = FullyConnectedLayer( rng, dropout(rng, layer5_input, train), n_input=cl_word, n_output=50, activation=activation ) layer6_input = layer5.output layer6 = FullyConnectedLayer( rng, dropout(rng, layer6_input, train, p=0.1), n_input=50, n_output=2, activation=None ) result = Result(layer6.output, y) loss = result.negative_log_likelihood() accuracy = result.accuracy() params = layer6.params \ + layer5.params \ + layer3.params \ + layer_word_embed.params \ + layer1.params \ + layer_char_embed.params updates = RMSprop(learning_rate=0.001, params=params).updates(loss) self.train_model = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ x_wrd: x_train_word[index * batchsize: (index + 1) * batchsize], x_chr: x_train_char[index * batchsize: (index + 1) * batchsize], y: y_train[index * batchsize: (index + 1) * batchsize], train: np.cast['int32'](1) } ) self.test_model = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ x_wrd: x_test_word[index * batchsize: (index + 1) * batchsize], x_chr: x_test_char[index * batchsize: (index + 1) * batchsize], y: y_test[index * batchsize: (index + 1) * batchsize], train: np.cast['int32'](0) } )
def accuracy(net, test_data): correct = 0 for x_, y_ in test_data: if np.array_equal(net.predict(x_), y_): correct += 1 return correct / len(test_data) if __name__ == '__main__': (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() network1 = NeuralNetwork( [FullyConnectedLayer(784, 100), FullyConnectedLayer(100, 10)]) pred_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) training_data = [(np.reshape(x_, np.size(x_)).astype(np.float32) / 255, (pred_arr == y_) * 1) for x_, y_ in zip(x_train[:400], y_train[:400])] validation_data = [(np.reshape(x_, np.size(x_)).astype(np.float32) / 255, (pred_arr == y_) * 1) for x_, y_ in zip(x_train[59800:], y_train[59800:])] test_data_1 = [(np.reshape(x_, np.size(x_)).astype(np.float32) / 255, (pred_arr == y_) * 1) for x_, y_ in zip(x_test[:900], y_test[:900])] network1.sgd(training_data,