class MnistNetMiniBatch: def __init__(self): self.d1_layer = Dense(784, 100) self.a1_layer = ReLu() self.drop1_layer = Dropout(0.5) self.d2_layer = Dense(100, 50) self.a2_layer = ReLu() self.drop2_layer = Dropout(0.25) self.d3_layer = Dense(50, 10) self.a3_layer = Softmax() def forward(self, x, train=True): net = self.d1_layer.forward(x) net = self.a1_layer.forward(net) net = self.drop1_layer.forward(net, train) net = self.d2_layer.forward(net) net = self.a2_layer.forward(net) net = self.drop2_layer.forward(net, train) net = self.d3_layer.forward(net) net = self.a3_layer.forward(net) return (net) def backward(self, dz, learning_rate=0.01, mini_batch=True, update=False, len_mini_batch=None): dz = self.a3_layer.backward(dz) dz = self.d3_layer.backward(dz, learning_rate=learning_rate, mini_batch=mini_batch, update=update, len_mini_batch=len_mini_batch) dz = self.drop2_layer.backward(dz) dz = self.a2_layer.backward(dz) dz = self.d2_layer.backward(dz, learning_rate=learning_rate, mini_batch=mini_batch, update=update, len_mini_batch=len_mini_batch) dz = self.drop1_layer.backward(dz) dz = self.a1_layer.backward(dz) dz = self.d1_layer.backward(dz, learning_rate=learning_rate, mini_batch=mini_batch, update=update, len_mini_batch=len_mini_batch) return dz
def __init__(self): self.d1_layer = Dense(784, 100) self.a1_layer = ReLu() self.drop1_layer = Dropout(0.5) self.d2_layer = Dense(100, 50) self.a2_layer = ReLu() self.drop2_layer = Dropout(0.25) self.d3_layer = Dense(50, 10) self.a3_layer = Softmax()
def vgg_bn(): return [ Conv2D([3, 3], 32, [1, 1, 1, 1], padding='SAME'), Conv2DBatchNorm(32), Activation(tf.nn.relu), Conv2D([3, 3], 32, [1, 1, 1, 1], padding='SAME'), Conv2DBatchNorm(32), Activation(tf.nn.relu), Conv2D([3, 3], 64, [1, 2, 2, 1]), Conv2DBatchNorm(64), Activation(tf.nn.relu), Conv2D([3, 3], 64, [1, 1, 1, 1], padding='SAME'), Conv2DBatchNorm(64), Activation(tf.nn.relu), Conv2D([3, 3], 128, [1, 2, 2, 1]), Conv2DBatchNorm(128), Activation(tf.nn.relu), Conv2D([3, 3], 128, [1, 1, 1, 1], padding='SAME'), Conv2DBatchNorm(128), Activation(tf.nn.relu), Flatten(), Dense(128), Activation(tf.sigmoid), Dropout(0.5), Dense(10), Activation(tf.nn.softmax), ]
def test_dropout(): from layers import Dropout dropout = Dropout(0.5) x = T.tensor3() f = theano.function([x], dropout(x)) X = np.ones((batch_size, time_steps, input_size)) assert f(X).shape == (batch_size, time_steps, input_size)
def convModule(flow, filters, dropout, strides=(1, 1), s=(3, 3)): # {{{ if mirroring: flow = CReLU()(flow) flow = Convolution(filters, s=s, initialisation=init, initKWArgs=initKWArgs, strides=strides, regFunction=regF, reg=regP)(flow) if observing and not resNet: flow = Observation()(flow) if not mirroring: flow = Activation('relu')(flow) if doDropout: flow = Dropout(dropout)(flow) return flow # }}}
def fcModule(flow, w, dropout, regf=regF, reg=regP): # {{{ if mirroring: flow = CReLU()(flow) flow = FC(w, initialisation=init, initKWArgs=initKWArgs, reg=regP, regFunction=regF)(flow) if observing and not resNet: flow = Observation()(flow) if not mirroring: flow = Activation('relu')(flow) if doDropout: flow = Dropout(dropout)(flow) return flow # }}}
def __init__(self, input_size, hidden_size_list, output_size, activation='relu', weight_init_std='relu', weight_decay_lambda=0, use_dropout=False, dropout_ratio=0.5, use_batchnorm=False): self.input_size = input_size self.output_size = output_size self.hidden_size_list = hidden_size_list self.hidden_layer_num = len(hidden_size_list) self.weight_decay_lambda = weight_decay_lambda self.use_dropout = use_dropout self.use_batchnorm = use_batchnorm self.params = {} # 权重初始化 self.__init_weight(weight_init_std) # 生成层 activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine( self.params['W' + str(idx)], self.params['b' + str(idx)]) if self.use_batchnorm: self.params['gamma' + str(idx)] = np.ones( hidden_size_list[idx - 1]) self.params['beta' + str(idx)] = np.zeros( hidden_size_list[idx - 1]) self.layers['BatchNorm' + str(idx)] = BatchNormalization( self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) self.layers['Activation_function' + str(idx)] = activation_layer[activation]() if self.use_dropout: self.layers['Dropout' + str(idx)] = Dropout(dropout_ratio) idx = self.hidden_layer_num + 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithLoss()
def __init_layer(self): activation_layer = {'sigmoid': Sigmoid, 'relu': Relu} self.layers = OrderedDict() for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) if self.use_batchnorm: self.params['gamma' + str(idx)] = np.ones(self.hidden_size_list[idx - 1]) self.params['beta' + str(idx)] = np.zeros(self.hidden_size_list[idx - 1]) self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) self.layers['activation_function' + str(idx)] = activation_layer[self.activation]() if self.use_dropout: self.layers['Dropout' + str(idx)] = Dropout(self.dropout_ration) idx += 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithLoss()
def _deserialize(self, params): layers_attrs = ['layers'] if params['best_layers_']: layers_attrs.append('best_layers_') for layers_attr in layers_attrs: for i, layer_dict in enumerate(params[layers_attr]): if layer_dict['layer'] == 'activation': params[layers_attr][i] = Activation(**layer_dict) if layer_dict['layer'] == 'dropout': params[layers_attr][i] = Dropout(**layer_dict) if layer_dict['layer'] == 'fully_connected': fc = FullyConnected(**layer_dict) fc.W = np.asarray(layer_dict['W']) fc.b = np.asarray(layer_dict['b']) fc.dW = np.asarray(layer_dict['dW']) fc.db = np.asarray(layer_dict['db']) params[layers_attr][i] = fc return params
def main(): c = color_codes() mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) try: net = load_model('/home/mariano/Desktop/test.tf') except IOError: x = Input([784]) x_image = Reshape([28, 28, 1])(x) x_conv1 = Conv(filters=32, kernel_size=(5, 5), activation='relu', padding='same')(x_image) h_pool1 = MaxPool((2, 2), padding='same')(x_conv1) h_conv2 = Conv(filters=64, kernel_size=(5, 5), activation='relu', padding='same')(h_pool1) h_pool2 = MaxPool((2, 2), padding='same')(h_conv2) h_fc1 = Dense(1024, activation='relu')(h_pool2) h_drop = Dropout(0.5)(h_fc1) y_conv = Dense(10)(h_drop) net = Model(x, y_conv, optimizer='adam', loss='categorical_cross_entropy', metrics='accuracy') print(c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] + c['b'] + 'Original (MNIST)' + c['nc'] + c['g'] + ' net ' + c['nc'] + c['b'] + '(%d parameters)' % net.count_trainable_parameters() + c['nc']) net.fit(mnist.train.images, mnist.train.labels, val_data=mnist.test.images, val_labels=mnist.test.labels, patience=10, epochs=200, batch_size=1024) save_model(net, '/home/mariano/Desktop/test.tf')
def vgg_bn(): return [ #1 Conv2D([7, 7], 64, [1, 3, 3, 1]), Conv2DBatchNorm(64), Activation(tf.nn.relu), MaxPool([1,4,4,1],[1,1,1,1]), #2 Convolutional_block(f = 3, filters = [64,64,256],s = 1), MaxPool([1,5,5,1],[1,1,1,1]), Dropout(0.5), Identity_block(f = 3, filters=[64,64,256]), Dropout(0.5), Identity_block(f = 3, filters=[64,64,256]), Dropout(0.5), MaxPool([1,2,2,1],[1,1,1,1]), #3 Convolutional_block(f = 3, filters = [128,128,512],s = 2), Dropout(0.5), Identity_block(f = 3, filters=[128,128,512]), Dropout(0.5), Identity_block(f = 3, filters=[128,128,512]), Dropout(0.5), MaxPool([1,2,2,1],[1,1,1,1]), #4 Convolutional_block(f = 3, filters = [256,256,1024],s = 2), Identity_block(f = 3, filters=[256,256,1024]), Identity_block(f = 3, filters=[256,256,1024]), Identity_block(f = 3, filters=[256,256,1024]), Identity_block(f = 3, filters=[256,256,1024]), Identity_block(f = 3, filters=[256,256,1024]), Flatten(), Dense(128), Activation(tf.sigmoid), Dropout(0.5), Dense(10), #Fully_connected(), Activation(tf.nn.softmax), ]
# Adding dropout layer to prevent overfitting batch_size = 64 epochs = 20 num_classes = nClasses tumor_model = Sequential() tumor_model.add( Conv2D(32, kernel_size=(3, 3), activation='linear', padding='same', input_shape=(28, 28, 1))) tumor_model.add(LeakyReLU(alpha=0.1)) tumor_model.add(MaxPooling2D((2, 2), padding='same')) tumor_model.add(Dropout(0.25)) tumor_model.add(Conv2D(64, (3, 3), activation='linear', padding='same')) tumor_model.add(LeakyReLU(alpha=0.1)) tumor_model.add(MaxPooling2D(pool_size=(2, 2), padding='same')) tumor_model.add(Dropout(0.25)) tumor_model.add(Conv2D(128, (3, 3), activation='linear', padding='same')) tumor_model.add(LeakyReLU(alpha=0.1)) tumor_model.add(MaxPooling2D(pool_size=(2, 2), padding='same')) tumor_model.add(Dropout(0.4)) tumor_model.add(Flatten()) tumor_model.add(Dense(128, activation='linear')) tumor_model.add(LeakyReLU(alpha=0.1)) tumor_model.add(Dropout(0.3)) tumor_model.add(Dense(4, activation='softmax')) tumor_model.summary()
y_train = y_train[:256] X_train = X_train.reshape((-1, 1, 8, 8)) X_test = X_test.reshape((-1, 1, 8, 8)) X_test = X_train[:256] y_test = y_train[:256] # Model model = NeuralNetwork(SquareLoss(), (X_test, y_test)) model.add( Conv2D(16, filter_shape=(3, 3), stride=1, input_shape=(1, 8, 8), padding='same')) model.add(Activation('relu')) model.add(Dropout(p=0.2)) model.add(Conv2D(n_filters=32, filter_shape=(3, 3), stride=1, padding='same')) model.add(Activation('relu')) model.add(Dropout(p=0.2)) model.add(Flatten()) model.add(Dense(256)) model.add(Activation('relu')) model.add(Dropout(0.4)) model.add(Dense(10)) model.add(Activation('softmax')) train_err = model.fit(X_train, y_train, n_epochs=5, batch_size=256) # print(model.layers[-1]) n = len(train_err)
NN.SGD(train, eta=1e-3) #%% RNN: WORKS NN = FF([RecurrentFullCon(Tanh, Softmax, (4, 50, 4))], RNNCrossEntropy) NN.SGD(train, eta=1e-4) #%% Networks: WORKS NNS = [ FF([FullCon(Sigmoid, (28 * 28, 30)), FullCon(Sigmoid, (30, 10))], CrossEntropy) for i in range(3) ] NN = FF([Networks(Sigmoid, NNS)], CrossEntropy) NN.SGD(part_train) #%% Conv+Pool+Dropout: WORKS NN = FF([ Conv(Identity, (25, 3)), Pool('mean', (3, -1), (2, 2)), Dropout('binomial', 0.9), FullCon(Sigmoid, (3 * 13 * 13, 10)) ], CrossEntropy) NN.SGD(part_train) #%% FullCon Swish: WORKS b = 10. NN = FF([FullCon(Swish(b), (28 * 28, 30)), FullCon(Swish(b), (30, 10))], CrossEntropy) NN.SGD(part_train) #%% FullCon: WORKS NN = FF([FullCon(Sigmoid, (28 * 28, 30)), FullCon(Sigmoid, (30, 10))], CrossEntropy) NN.SGD(part_train) #%% TO DO # Expand dataset
from layers import Dropout import numpy as np from utils.check_grads import check_grads_layer batch = 10 ratio = 0.1 height = 10 width = 20 channel = 10 np.random.seed(1234) inputs = np.random.uniform(size=(batch, channel, height, width)) in_grads = np.random.uniform(size=(batch, channel, height, width)) dropout = Dropout(ratio, seed=1234) dropout.set_mode(True) check_grads_layer(dropout, inputs, in_grads)
def main(): test_id = 1 if test_id == 1: #---------- # Conv Net #---------- optimizer = Adam() data = datasets.load_digits() X = data.data # (1797, 64) y = data.target # Convert to one-hot encoding y = to_categorical(y.astype("int")) # (n_sample, n_class) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # Reshape X to (n_samples, channels, height, width) X_train = X_train.reshape((-1, 1, 8, 8)) X_test = X_test.reshape((-1, 1, 8, 8)) clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy, validation_data=(X_test, y_test)) clf.add( Conv2D(n_filters=16, filter_shape=(3, 3), stride=1, input_shape=(1, 8, 8), padding='same')) clf.add(Activation('relu')) clf.add(Dropout(0.25)) clf.add(BatchNormalization()) clf.add( Conv2D(n_filters=32, filter_shape=(3, 3), stride=1, padding='same')) clf.add(Activation('relu')) clf.add(Dropout(0.25)) clf.add(BatchNormalization()) clf.add(Flatten()) # 展平层 clf.add(Dense(256)) # 全连接层 clf.add(Activation('relu')) clf.add(Dropout(0.4)) clf.add(BatchNormalization()) clf.add(Dense(10)) clf.add(Activation('softmax')) print() clf.summary(name="ConvNet") train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=64) # Training and validation error plot n = len(train_err) training, = plt.plot(range(n), train_err, label="Training Error") validation, = plt.plot(range(n), val_err, label="Validation Error") plt.legend(handles=[training, validation]) plt.title("Error Plot") plt.ylabel('Error') plt.xlabel('Iterations') plt.show() _, accuracy = clf.test_on_batch(X_test, y_test) print("Accuracy:", accuracy) y_pred = np.argmax(clf.predict(X_test), axis=1) X_test = X_test.reshape(-1, 8 * 8) # Reduce dimension to 2D using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Convolutional Neural Network", accuracy=accuracy, legend_labels=range(10)) if test_id == 2: dataset = MultiClassDataset(n_samples=300, centers=3, n_features=2, center_box=(-10.0, 10.0), cluster_std=1.0, norm=True, one_hot=True) X = dataset.datas y = dataset.labels X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, seed=1) clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy, validation_data=(X_test, y_test)) clf.add(Dense(3)) clf.add(Activation('softmax')) clf.summary(name="SoftmaxReg") train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256)
def convert(keras_model, class_map, description="Neural Network Model"): """ Convert a keras model to PMML @model. The keras model object @class_map. A map in the form {class_id: class_name} @description. A short description of the model Returns a DeepNeuralNetwork object which can be exported to PMML """ pmml = DeepNetwork(description=description, class_map=class_map) pmml.keras_model = keras_model pmml.model_name = keras_model.name config = keras_model.get_config() for layer in config['layers']: layer_class = layer['class_name'] layer_config = layer['config'] layer_inbound_nodes = layer['inbound_nodes'] # Input if layer_class is "InputLayer": pmml._append_layer(InputLayer( name=layer_config['name'], input_size=layer_config['batch_input_shape'][1:] )) # Conv2D elif layer_class is "Conv2D": pmml._append_layer(Conv2D( name=layer_config['name'], channels=layer_config['filters'], kernel_size=layer_config['kernel_size'], dilation_rate=layer_config['dilation_rate'], use_bias=layer_config['use_bias'], activation=layer_config['activation'], strides=layer_config['strides'], padding=layer_config['padding'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # DepthwiseConv2D elif layer_class is "DepthwiseConv2D": pmml._append_layer(DepthwiseConv2D( name=layer_config['name'], kernel_size=layer_config['kernel_size'], depth_multiplier=layer_config['depth_multiplier'], use_bias=layer_config['use_bias'], activation=layer_config['activation'], strides=layer_config['strides'], padding=layer_config['padding'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # MaxPooling elif layer_class is "MaxPooling2D": pmml._append_layer(MaxPooling2D( name=layer_config['name'], pool_size=layer_config['pool_size'], strides=layer_config['strides'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) elif layer_class is "AveragePooling2D": pmml._append_layer(AveragePooling2D( name=layer_config['name'], pool_size=layer_config['pool_size'], strides=layer_config['strides'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) elif layer_class is "GlobalAveragePooling2D": pmml._append_layer(GlobalAveragePooling2D( name=layer_config['name'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Flatten elif layer_class is "Flatten": pmml._append_layer(Flatten( name=layer_config['name'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Dense elif layer_class is "Dense": pmml._append_layer(Dense( name=layer_config['name'], channels=layer_config['units'], use_bias=layer_config['use_bias'], activation=layer_config['activation'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Zero padding layer elif layer_class is "ZeroPadding2D": pmml._append_layer(ZeroPadding2D( name=layer_config['name'], padding=layer_config['padding'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Reshape layer elif layer_class is "Reshape": pmml._append_layer(Reshape( name=layer_config['name'], target_shape=layer_config['target_shape'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) elif layer_class is "Dropout": pmml._append_layer(Dropout( name=layer_config['name'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Batch Normalization elif layer_class is "BatchNormalization": pmml._append_layer(BatchNormalization( name=layer_config['name'], axis=layer_config['axis'], momentum=layer_config['momentum'], epsilon=layer_config['epsilon'], center=layer_config['center'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) elif layer_class is "Add": pmml._append_layer(Merge( name=layer_config['name'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes) )) elif layer_class is "Subtract": pmml._append_layer(Merge( name=layer_config['name'], operator='subtract', inbound_nodes=get_inbound_nodes(layer_inbound_nodes) )) elif layer_class is "Dot": pmml._append_layer(Merge( name=layer_config['name'], operator='dot', inbound_nodes=get_inbound_nodes(layer_inbound_nodes) )) elif layer_class is "Concatenate": pmml._append_layer(Merge( name=layer_config['name'], axis=layer_config['axis'], operator='concatenate', inbound_nodes=get_inbound_nodes(layer_inbound_nodes) )) elif layer_class is "Activation": pmml._append_layer(Activation( name=layer_config['name'], activation=layer_config['activation'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) elif layer_class is "ReLU": pmml._append_layer(Activation( name=layer_config['name'], activation='relu', threshold = layer_config['threshold'], max_value = layer_config['max_value'], negative_slope = layer_config['negative_slope'], inbound_nodes=get_inbound_nodes(layer_inbound_nodes), )) # Unknown layer else: raise ValueError("Unknown layer type:",layer_class) return pmml
threshold = 1000 x_train = x_train[:threshold] y_train = y_train[:threshold] x_test = x_test[:threshold] y_test = y_test[:threshold] seed = 15 model = Sequential(seed=seed) model.add(Conv2D(32, (5, 5), activation="relu", inputs_shape=x_train.shape[1:])) model.add(Pooling((2, 2))) model.add(Conv2D(16, (3, 3), activation="relu")) model.add(Pooling((2, 2))) model.add(Dense(10, activation="relu")) model.add(Dropout(0.5)) model.add(Dense(10, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer=Adam(), metric="accuracy") model.fit(x_train=x_train, t_train=y_train, x_test=x_test, t_test=y_test, batch_size=128, epochs=10, output_num=1) #誤差をプロット plt.plot(model.history_train[0])
def __init__(self, name, numpy_rng, theano_rng, batchsize=128): # CALL PARENT CONSTRUCTOR TO SETUP CONVENIENCE FUNCTIONS # (SAVE/LOAD, ...) super(EmotionConvNet, self).__init__(name=name) self.numpy_rng = numpy_rng self.batchsize = batchsize self.theano_rng = theano_rng self.mode = theano.shared(np.int8(0), name='mode') self.inputs = T.ftensor4('inputs') self.inputs.tag.test_value = numpy_rng.randn(self.batchsize, 1, 48, 48).astype(np.float32) self.targets = T.ivector('targets') self.targets.tag.test_value = numpy_rng.randint( 7, size=self.batchsize).astype(np.int32) self.layers = OrderedDict() self.layers['randcropandflip'] = RandCropAndFlip( inputs=self.inputs, image_shape=(self.batchsize, 1, 48, 48), patch_size=(44, 44), name='randcropandflip', theano_rng=self.theano_rng, mode_var=self.mode) self.layers['conv0'] = ConvLayer( rng=self.numpy_rng, inputs=self.layers['randcropandflip'], filter_shape=(32, 1, 9, 9), #image_shape=(self.batchsize, 1, 48, 48), name='conv0', pad=4) self.layers['maxpool0'] = MaxPoolLayer(inputs=self.layers['conv0'], pool_size=(2, 2), stride=(2, 2), name='maxpool0') self.layers['bias0'] = ConvBiasLayer(inputs=self.layers['maxpool0'], name='bias0') self.layers['relu0'] = Relu(inputs=self.layers['bias0'], name='relu0') self.layers['dropout0'] = Dropout(inputs=self.layers['relu0'], dropout_rate=.25, name='dropout0', theano_rng=self.theano_rng, mode_var=self.mode) self.layers['conv1'] = ConvLayer(rng=self.numpy_rng, inputs=self.layers['dropout0'], filter_shape=(32, 32, 5, 5), name='conv1', pad=2) self.layers['maxpool1'] = MaxPoolLayer(inputs=self.layers['conv1'], pool_size=(2, 2), stride=(2, 2), name='maxpool1') self.layers['bias1'] = ConvBiasLayer(inputs=self.layers['maxpool1'], name='bias1') self.layers['relu1'] = Relu(inputs=self.layers['bias1'], name='relu1') self.layers['dropout1'] = Dropout(inputs=self.layers['relu1'], dropout_rate=.25, name='dropout1', theano_rng=self.theano_rng, mode_var=self.mode) self.layers['conv2'] = ConvLayer(rng=self.numpy_rng, inputs=self.layers['dropout1'], filter_shape=(64, 32, 5, 5), name='conv2', pad=2) self.layers['maxpool2'] = MaxPoolLayer(inputs=self.layers['conv2'], pool_size=(2, 2), stride=(2, 2), name='maxpool2') self.layers['bias2'] = ConvBiasLayer(inputs=self.layers['maxpool2'], name='bias2') self.layers['relu2'] = Relu(inputs=self.layers['bias2'], name='relu2') self.layers['dropout2'] = Dropout(inputs=self.layers['relu2'], dropout_rate=.25, name='dropout2', theano_rng=self.theano_rng, mode_var=self.mode) self.layers['reshape2'] = Reshape( inputs=self.layers['dropout2'], shape=(self.layers['dropout2'].outputs_shape[0], np.prod(self.layers['dropout2'].outputs_shape[1:])), name='reshape2') self.layers['fc3'] = AffineLayer(rng=self.numpy_rng, inputs=self.layers['reshape2'], nouts=7, name='fc3') self.layers['softmax3'] = Softmax(inputs=self.layers['fc3'], name='softmax3') self.probabilities = self.layers['softmax3'].outputs self.probabilities = T.clip(self.probabilities, 1e-6, 1 - 1e-6) self._cost = T.nnet.categorical_crossentropy(self.probabilities, self.targets).mean() self.classification = T.argmax(self.probabilities, axis=1) self.params = [] for l in self.layers.values(): self.params.extend(l.params) self._grads = T.grad(self._cost, self.params) self.classify = theano.function( [self.inputs], self.classification, #givens={self.mode: np.int8(1)}) )
def main(): train_x, train_y, valid_x, valid_y, test_x, test_y = get_mnist() num_epochs = args.epochs eta = args.lr batch_size = args.batch_size # input x = T.matrix("x") y = T.ivector("y") drop_switch = T.scalar("drop_switch") #x.tag.test_value = np.random.randn(3, 784).astype("float32") #y.tag.test_value = np.array([1,2,3]) #drop_switch.tag.test_value = 0 #import ipdb; ipdb.set_trace() hidden_1 = Dense(input=x, n_in=784, n_out=2048, name="hidden_1") act_1 = Activation(input=hidden_1.output, activation="relu", name="act_1") drop_1 = Dropout(input=act_1.output, p=0.5, drop_switch=drop_switch) hidden_2 = Dense(input=drop_1.output, n_in=2048, n_out=2048, name="hidden_2") act_2 = Activation(input=hidden_2.output, activation="relu", name="act_2") drop_2 = Dropout(input=act_2.output, p=0.5, drop_switch=drop_switch) hidden_3 = Dense(input=drop_2.output, n_in=2048, n_out=2048, name="hidden_3") act_3 = Activation(input=hidden_3.output, activation="relu", name="act_3") drop_3 = Dropout(input=act_3.output, p=0.5, drop_switch=drop_switch) output = Dense(input=drop_3.output, n_in=2048, n_out=10, name="output") softmax = Activation(input=output.output, activation="softmax", name="softmax") # loss xent = T.nnet.nnet.categorical_crossentropy(softmax.output, y) cost = xent.mean() # errors y_pred = T.argmax(softmax.output, axis=1) errors = T.mean(T.neq(y, y_pred)) # updates params = hidden_1.params + hidden_2.params + hidden_3.params grads = [T.grad(cost, param) for param in params] updates = [] for p, g in zip(params, grads): updates.append((p, p - eta * g) #sgd ) # compiling train, predict and test fxns train = theano.function(inputs=[x, y, drop_switch], outputs=cost, updates=updates) predict = theano.function(inputs=[x, drop_switch], outputs=y_pred) test = theano.function(inputs=[x, y, drop_switch], outputs=errors) # train checkpoint = ModelCheckpoint(folder="snapshots") logger = Logger("logs/{}".format(time())) for epoch in range(num_epochs): print "Epoch: ", epoch print "LR: ", eta epoch_hist = {"loss": []} t = tqdm(range(0, len(train_x), batch_size)) for lower in t: upper = min(len(train_x), lower + batch_size) loss = train(train_x[lower:upper], train_y[lower:upper].astype(np.int32), 1.0) # drop t.set_postfix(loss="{:.2f}".format(float(loss))) epoch_hist["loss"].append(loss.astype(np.float32)) # epoch loss average_loss = sum(epoch_hist["loss"]) / len(epoch_hist["loss"]) t.set_postfix(loss="{:.2f}".format(float(average_loss))) logger.log_scalar(tag="Training Loss", value=average_loss, step=epoch) # validation accuracy val_acc = 1.0 - test(valid_x, valid_y.astype(np.int32), 0.0) # nodrop print "Validation Accuracy: ", val_acc logger.log_scalar(tag="Validation Accuracy", value=val_acc, step=epoch) checkpoint.check(val_acc, params) # Report Results on test set (w/ best val acc file) best_val_acc_filename = checkpoint.best_val_acc_filename print "Using ", best_val_acc_filename, " to calculate best test acc." load_model(path=best_val_acc_filename, params=params) test_acc = 1.0 - test(test_x, test_y.astype(np.int32), 0.0) # dropout disabled print "Test accuracy: ", test_acc
Convolution(input_shape=(32, 32), input_depth=3, n_filters=32, filter_dim=(3, 3), stride=(1, 1), padding=((1, 1), (1, 1))), ReLU(), BatchNorm(), Convolution(input_shape=(32, 32), input_depth=32, n_filters=32, filter_dim=(3, 3), stride=(1, 1), padding=((1, 1), (1, 1))), ReLU(), BatchNorm(), MaxPooling(input_shape=(32, 32), input_depth=32, filter_dim=(2, 2), stride=(2, 2)), Dropout(rate=0.2), Convolution(input_shape=(16, 16), input_depth=32, n_filters=64, filter_dim=(3, 3), stride=(1, 1), padding=((1, 1), (1, 1))), ReLU(), BatchNorm(), Convolution(input_shape=(16, 16), input_depth=64, n_filters=64, filter_dim=(3, 3), stride=(1, 1), padding=((1, 1), (1, 1))), ReLU(), BatchNorm(), MaxPooling(input_shape=(16, 16), input_depth=64, filter_dim=(2, 2),
def main(): train_x, train_y, valid_x, valid_y, test_x, test_y = get_cifar10( './cifar-10-batches-py/') labels = unpickle('./cifar-10-batches-py/batches.meta')['label_names'] train_x = train_x.astype(np.float32) / 255.0 valid_x = valid_x.astype(np.float32) / 255.0 test_x = test_x.astype(np.float32) / 255.0 num_epochs = args.epochs eta = args.lr batch_size = args.batch_size # input x = T.tensor4("x") y = T.ivector("y") drop_switch = T.scalar("drop_switch") # test values # x.tag.test_value = np.random.randn(6, 3, 32, 32).astype(np.float32) # y.tag.test_value = np.array([1,2,1,4,5]).astype(np.int32) # x.tag.test_value = x.tag.test_value / x.tag.test_value.max() # drop_switch.tag.test_value = 1.0 # import ipdb; ipdb.set_trace() # network definition conv1 = Conv2D(input=x, num_filters=50, input_channels=3, size=3, strides=(1, 1), padding=1, name="conv1") act1 = Activation(input=conv1.output, activation="relu", name="act1") pool1 = Pool2D(input=act1.output, stride=(2, 2), name="pool1") conv2 = Conv2D(input=pool1.output, num_filters=100, input_channels=50, size=3, strides=(1, 1), padding=1, name="conv2") act2 = Activation(input=conv2.output, activation="relu", name="act2") pool2 = Pool2D(input=act2.output, stride=(2, 2), name="pool2") conv3 = Conv2D(input=pool2.output, num_filters=200, input_channels=100, size=3, strides=(1, 1), padding=1, name="conv3") act3 = Activation(input=conv3.output, activation="relu", name="act3") pool3 = Pool2D(input=act3.output, stride=(2, 2), name="pool3") flat = Flatten(input=pool3.output) drop4 = Dropout(input=flat.output, p=0.5, drop_switch=drop_switch) fc1 = Dense(input=drop4.output, n_in=200 * 4 * 4, n_out=500, name="fc1") act4 = Activation(input=fc1.output, activation="relu", name="act4") drop5 = Dropout(input=act4.output, p=0.5, drop_switch=drop_switch) fc2 = Dense(input=drop5.output, n_in=500, n_out=10, name="fc2") softmax = Activation(input=fc2.output, activation="softmax", name="softmax") # loss xent = T.nnet.nnet.categorical_crossentropy(softmax.output, y) cost = xent.mean() # errors y_pred = T.argmax(softmax.output, axis=1) errors = T.mean(T.neq(y, y_pred)) # updates params = conv1.params + conv2.params + conv3.params + fc1.params + fc2.params grads = [T.grad(cost, param) for param in params] updates = [] for p, g in zip(params, grads): updates.append((p, p - eta * g) #sgd ) # compiling train, predict and test fxns train = theano.function(inputs=[x, y, drop_switch], outputs=cost, updates=updates) predict = theano.function(inputs=[x, drop_switch], outputs=y_pred) test = theano.function(inputs=[x, y, drop_switch], outputs=errors) # train checkpoint = ModelCheckpoint(folder="snapshots") logger = Logger("logs/{}".format(time())) for epoch in range(num_epochs): print "Epoch: ", epoch print "LR: ", eta epoch_hist = {"loss": []} t = tqdm(range(0, len(train_x), batch_size)) for lower in t: upper = min(len(train_x), lower + batch_size) loss = train(train_x[lower:upper], train_y[lower:upper].astype(np.int32), 1.0) # drop t.set_postfix(loss="{:.2f}".format(float(loss))) epoch_hist["loss"].append(loss.astype(np.float32)) # epoch loss average_loss = sum(epoch_hist["loss"]) / len(epoch_hist["loss"]) t.set_postfix(loss="{:.2f}".format(float(average_loss))) logger.log_scalar(tag="Training Loss", value=average_loss, step=epoch) # validation accuracy val_acc = 1.0 - test(valid_x, valid_y.astype(np.int32), 0.0) # nodrop print "Validation Accuracy: ", val_acc logger.log_scalar(tag="Validation Accuracy", value=val_acc, step=epoch) checkpoint.check(val_acc, params) # Report Results on test set (w/ best val acc file) best_val_acc_filename = checkpoint.best_val_acc_filename print "Using ", best_val_acc_filename, " to calculate best test acc." load_model(path=best_val_acc_filename, params=params) test_acc = 1.0 - test(test_x, test_y.astype(np.int32), 0.0) # no drop print "Test accuracy: ", test_acc
''' Model and its output activation and loss function Currently the output gate assumes Softmax + CrossEntropy ''' softmax_crossentropy = SoftmaxCrossEntropy() # CURRENTLY THE BEST MODEL he_and_relu = [ FullyConnected(config.INPUT_DIM, 192, he_uniform_init, use_weight_norm=True), BatchNorm(input_dim=192), ReLU(), Dropout(0.3), FullyConnected(192, 96, he_uniform_init, use_weight_norm=True), BatchNorm(input_dim=96), ReLU(), Dropout(0.3), FullyConnected(96, 48, he_uniform_init, use_weight_norm=True), BatchNorm(input_dim=48), ReLU(), Dropout(0.3), FullyConnected(48, config.NUM_CLASSES, he_uniform_init, use_weight_norm=True), ] xavier_and_lrelu = [
def __init__(self, sizes, batch_size, epoch_num, use_trained_params=False, filename=None, img_dim=(3, 32, 32), conv_param={ 'filter_num': 32, 'filter_size': 3, 'padding': 1, 'stride': 1 }, optimizer='Adam', activation='ReLU', use_dropout=True, dropout_p=0.2, use_bn=True): self.num_layers = len(sizes) self.sizes = sizes self.batch_size = batch_size self.epoch_num = epoch_num # self.learning_rate = learning_rate self.activation = activation self.use_dropout = use_dropout self.dropout_p = dropout_p self.use_bn = use_bn self.filter_num = conv_param['filter_num'] self.filter_size = conv_param['filter_size'] self.filter_padding = conv_param['padding'] self.filter_stride = conv_param['stride'] self.img_c = img_dim[0] self.img_wh = img_dim[1] self.conv_output_size = int( (img_dim[1] - self.filter_size + 2 * self.filter_padding) / self.filter_stride) + 1 self.pool_output_size = int(self.filter_num * (self.conv_output_size / 2) * (self.conv_output_size / 2)) self.opt = optimizer optimizers = { 'SGD': SGD, 'Momentum_SGD': Momentum_SGD, 'AdaGrad': AdaGrad, 'RMSProp': RMSProp, 'AdaDelta': AdaDelta, 'Adam': Adam } self.optimizer = optimizers[self.opt]() if use_trained_params: path = os.path.dirname(os.path.abspath(__file__)) loaded_params = np.load(os.path.join(path, filename)) self.W1 = loaded_params['W1'] self.b1 = loaded_params['b1'] self.W2 = loaded_params['W2'] self.b2 = loaded_params['b2'] self.W3 = loaded_params['W3'] self.b3 = loaded_params['b3'] self.gamma = loaded_params['gamma'] self.beta = loaded_params['beta'] if use_bn: self.running_mean = loaded_params['running_mean'] self.running_var = loaded_params['running_var'] else: np.random.seed(12) # Conv層重み self.W1 = np.sqrt(1 / sizes[0]) * np.random.randn( self.filter_num, img_dim[0], self.filter_size, self.filter_size) self.b1 = np.sqrt(1 / sizes[0]) * np.random.randn(self.filter_num) # BatchNorm層 self.gamma = np.ones(self.filter_num * self.conv_output_size * self.conv_output_size) self.beta = np.zeros(self.filter_num * self.conv_output_size * self.conv_output_size) # FullyConnected層重み(中間層) self.W2 = np.sqrt(1 / self.pool_output_size) * np.random.randn( self.pool_output_size, self.sizes[1]) #(pool,100) self.b2 = np.sqrt(1 / self.pool_output_size) * np.random.randn( self.sizes[1]) # Fullyconnected層重み(出力層) self.W3 = np.sqrt(1 / sizes[1]) * np.random.randn( self.sizes[1], self.sizes[2]) self.b3 = np.sqrt(1 / sizes[1]) * np.random.randn(self.sizes[2]) # layers of network activation_function = {'Sigmoid': Sigmoid, 'ReLU': ReLU} self.layers = {} self.layers['Conv'] = Conv2D(self.W1, self.b1, self.filter_stride, self.filter_padding) if self.use_bn: if use_trained_params: self.layers['BatchNorm'] = BatchNorm(self.gamma, self.beta,\ running_mean=self.running_mean,running_var=self.running_var) else: self.layers['BatchNorm'] = BatchNorm(self.gamma, self.beta) self.layers['Activation'] = activation_function[self.activation]() if self.use_dropout: self.layers['Dropout'] = Dropout(self.dropout_p) self.layers['Pool'] = MaxPool(pool_h=2, pool_w=2, stride=2) self.layers['FullyConnected1'] = FullyConnected(self.W2, self.b2) self.layers['Activation2'] = activation_function[self.activation]() self.layers['FullyConnected2'] = FullyConnected(self.W3, self.b3) self.lastLayer = SoftmaxLoss()
def __init__(self, sizes, batch_size, epoch_num, use_trained_params=False, filename=None, optimizer='SGD', activation='ReLU', use_dropout=True, dropout_p=0.2, use_bn=True): self.num_layers = len(sizes) self.sizes = sizes self.batch_size = batch_size self.epoch_num = epoch_num self.activation = activation self.use_dropout = use_dropout self.dropout_p = dropout_p self.use_bn = use_bn self.opt = optimizer optimizers = { 'SGD': SGD, 'Momentum_SGD': Momentum_SGD, 'AdaGrad': AdaGrad, 'RMSProp': RMSProp, 'AdaDelta': AdaDelta, 'Adam': Adam } self.optimizer = optimizers[self.opt]() if use_trained_params: path = os.path.dirname(os.path.abspath(__file__)) loaded_params = np.load(os.path.join(path, filename)) self.W1 = loaded_params['W1'] self.b1 = loaded_params['b1'] self.W2 = loaded_params['W2'] self.b2 = loaded_params['b2'] self.gamma = loaded_params['gamma'] self.beta = loaded_params['beta'] # Batch Normalizationを使う場合 if self.use_bn: self.running_mean = loaded_params['running_mean'] self.running_var = loaded_params['running_var'] else: np.random.seed(12) self.W1 = np.sqrt(1 / sizes[0]) * np.random.randn( sizes[0], sizes[1]) #(784,50) self.b1 = np.sqrt(1 / sizes[0]) * np.random.randn(sizes[1]) self.W2 = np.sqrt(1 / sizes[1]) * np.random.randn( sizes[1], sizes[2]) #(50,10) self.b2 = np.sqrt(1 / sizes[1]) * np.random.randn(sizes[2]) self.gamma = np.ones(self.W1.shape[1]) self.beta = np.zeros(self.W1.shape[1]) # layers of network activation_function = {'Sigmoid': Sigmoid, 'ReLU': ReLU} self.layers = {} self.layers['FullyConnected1'] = FullyConnected(self.W1, self.b1) if self.use_bn: if use_trained_params: self.layers['BatchNorm'] = BatchNorm( self.gamma, self.beta, running_mean=self.running_mean, running_var=self.running_var) else: self.layers['BatchNorm'] = BatchNorm(self.gamma, self.beta) self.layers['Activation'] = activation_function[self.activation]() if self.use_dropout: self.layers['Dropout'] = Dropout(self.dropout_p) self.layers['FullyConnected2'] = FullyConnected(self.W2, self.b2) self.lastLayer = SoftmaxLoss()
from keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x = x_train.reshape(-1, 28 * 28) x = (x-x.mean(axis=1).reshape(-1, 1))/x.std(axis=1).reshape(-1, 1) x = x.reshape(-1, 28, 28, 1) y = pd.get_dummies(y_train).to_numpy() xt = x_test.reshape(-1, 28 * 28) xt = (xt-xt.mean(axis=1).reshape(-1, 1))/xt.std(axis=1).reshape(-1, 1) xt = xt.reshape(-1, 28, 28, 1) yt = pd.get_dummies(y_test).to_numpy() m = Sequential() m.add(Conv2d( input_shape=(28, 28, 1), filters=4, padding=None, kernel_size=(3, 3), activation="relu")) m.add(Conv2d(filters=8, kernel_size=(3, 3), padding=None, activation="relu")) m.add(Pool2d(kernel_size=(2, 2))) m.add(Flatten()) m.add(FFL(neurons=64, activation="relu")) m.add(Dropout(0.1)) m.add(FFL(neurons=10, activation='softmax')) m.compile_model(lr=0.01, opt="adam", loss="cse") m.summary() m.train(x[:30], y[:30], epochs=2, batch_size=30, val_x=xt[:10], val_y=yt[:10]) m.visualize() m.save_model() load_model() m.summary() print(m.predict(x[10]))
import numpy as np iris = load_iris() X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) y_train_one_hot = to_categorical(y_train).astype(np.int) y_test_one_hot = to_categorical(y_test).astype(np.int) net = NetWork([ Linear(input_dim=4, output_dim=5), Tanh(), Linear(input_dim=5, output_dim=3), Dropout(p=0.3), Softmax(input_dim=3) ]) svc = SVC() lr = LogisticRegression() # 训练SVM分类器 svc.fit(X_train, y_train) # 训练逻辑斯蒂回归分类器 lr.fit(X_train, y_train) # 训练神经网络 train(net, X_train,
def encode(self, data): args = self.args dropout = args['dropout'] ids, p_char_ids, p_xl_ids, q_xl_ids, pos_ids, ner_ids, match_origin, match_lower, match_lemma, tf, p_mask, q_len, q_mask, mask = unpack_data( data) ### Transformer-XL if self.use_xl: with torch.no_grad(): q_xl, mems_1 = self.xl(q_xl_ids) p_xl, _ = self.xl(p_xl_ids, mems=mems_1) ### dropout q_xl = Dropout(q_xl, dropout, self.training) p_xl = Dropout(p_xl, dropout, self.training) xl = torch.cat([q_xl, p_xl], 1) ### obtain embeddings #concatenated emb size: [bsz x len x model_size] emb = self.word_embeddings(ids) char_emb = self.char_embeddings(p_char_ids) emb = self.emb_char(char_emb, emb) pos_emb = self.pos_embeddings(pos_ids) ner_emb = self.ner_embeddings(ner_ids) emb = Dropout(emb, dropout, self.training) #dropout ## Break down into passage and question p_emb = emb[:, q_len:] q_emb = emb[:, :q_len] p_ner_emb = ner_emb[:, q_len:] q_ner_emb = ner_emb[:, :q_len] p_pos_emb = pos_emb[:, q_len:] q_pos_emb = pos_emb[:, :q_len] p_tf = tf[:, q_len:] q_tf = tf[:, :q_len] p_match_origin = match_origin[:, q_len:] q_match_origin = match_origin[:, :q_len] p_match_lemma = match_lemma[:, q_len:] q_match_lemma = match_lemma[:, :q_len] p_match_lower = match_lower[:, q_len:] q_match_lower = match_lower[:, :q_len] ### Attention word_attention_outputs = self.word_attention_layer( p_emb, p_mask, emb[:, :q_len + 1], q_mask, self.training) q_word_attention_outputs = self.word_attention_layer( emb[:, :q_len + 1], q_mask, p_emb, p_mask, self.training) word_attention_outputs[:, 0] += q_word_attention_outputs[:, -1] q_word_attention_outputs = q_word_attention_outputs[:, :-1] p_word_inp = torch.cat([ p_emb, p_pos_emb, p_ner_emb, word_attention_outputs, p_match_origin, p_match_lower, p_match_lemma, p_tf ], dim=2) q_word_inp = torch.cat([ q_emb, q_pos_emb, q_ner_emb, q_word_attention_outputs, q_match_origin, q_match_lower, q_match_lemma, q_tf ], dim=2) if self.use_xl: p_word_inp = torch.cat([p_word_inp, p_xl], dim=2) q_word_inp = torch.cat([q_word_inp, q_xl], dim=2) ### Encoding into low, high and full word_inp = torch.cat([q_word_inp, p_word_inp], 1) low_states = self.low_rnn(word_inp, mask) high_states = self.high_rnn(low_states, mask) full_inp = torch.cat([low_states, high_states], dim=2) full_states = self.full_rnn(full_inp, mask) ### Attention HoW = torch.cat([emb, low_states, high_states], dim=2) if self.use_xl: HoW = torch.cat([HoW, xl], dim=2) p_HoW = HoW[:, q_len:] low_p_states = low_states[:, q_len:] high_p_states = high_states[:, q_len:] full_p_states = full_states[:, q_len:] q_HoW = HoW[:, :q_len + 1] low_q_states = low_states[:, :q_len + 1] high_q_states = high_states[:, :q_len + 1] full_q_states = full_states[:, :q_len + 1] low_attention_outputs, low_attention_q = self.low_attention_layer( p_HoW, p_mask, q_HoW, q_mask, low_q_states, low_p_states, self.training) high_attention_outputs, high_attention_q = self.high_attention_layer( p_HoW, p_mask, q_HoW, q_mask, high_q_states, high_p_states, self.training) full_attention_outputs, full_attention_q = self.full_attention_layer( p_HoW, p_mask, q_HoW, q_mask, full_q_states, full_p_states, self.training) low_attention_outputs[:, 0] += low_attention_q[:, -1] high_attention_outputs[:, 0] += high_attention_q[:, -1] full_attention_outputs[:, 0] += full_attention_q[:, -1] fuse_inp = torch.cat([ low_p_states, high_p_states, low_attention_outputs, high_attention_outputs, full_attention_outputs ], dim=2) fuse_q = torch.cat([ low_q_states, high_q_states, low_attention_q, high_attention_q, full_attention_q ], dim=2) fuse_inp[:, 0] += fuse_q[:, -1] fuse_concat = torch.cat([fuse_q[:, :-1], fuse_inp], 1) fused_states = self.fuse_rnn(fuse_concat, mask) ### Self Attention HoW = torch.cat([emb, pos_emb, ner_emb, tf, fuse_concat, fused_states], dim=2) if self.use_xl: HoW = torch.cat([HoW, xl], dim=2) self_attention_outputs, _ = self.self_attention_layer( HoW, mask, HoW, mask, fused_states, None, self.training) self_inp = torch.cat([fused_states, self_attention_outputs], dim=2) full_states = self.self_rnn(self_inp, mask) full_p_states = full_states[:, q_len:] full_q_states = full_states[:, :q_len] return full_p_states, p_mask, full_q_states, q_mask
def main(): x = tensor.imatrix() t = tensor.imatrix() data_dir = r'D:\datasets\ptb' if config.model.upper() == 'RNN': opt_name = '%s.drop_%.02f.per_step_%d' % ( config.model.upper(), args.hid_dropout_rate, args.per_step) else: opt_name = '%s.drop_%.02f.drop_cand_%d.per_step_%d' % ( config.model.upper(), args.hid_dropout_rate, args.drop_candidates, args.per_step) sample_size = 15 if config.model.upper( ) == 'RNN' else 35 # sample size is the sequence length overlap = 5 if config.model.upper() == 'RNN' else -1 train_db = LMDatabase(data_dir, "train", sample_size=sample_size, overlap_size=overlap, batch_size=config.batch_size) valid_db = LMDatabase(data_dir, "valid", sample_size=sample_size, batch_size=train_db.batch_size) rnns = {'LSTM': LSTM, 'GRU': GRU, 'RNN': RNN} model = Model( x, t, sample_size, [ Embed(train_db.vocab_size, config.layer_size, weight_init=Uniform(config.scale)), Dropout(config.in_dropout_rate), # dropout input of RNN rnns[config.model.upper()](config.layer_size, config.layer_size, train_db.batch_size, args.hid_dropout_rate, args.drop_candidates, args.per_step, weight_init=Uniform(config.scale)), Dropout(config.out_dropout_rate), # dropout output of RNN Linear(config.layer_size, train_db.vocab_size, weight_init=Uniform(config.scale)) ]) clip = {'LSTM': 10, 'GRU': 20, 'RNN': 30} opt = SGDOptimizer(model, x, t, train_db, test_db=valid_db, name=opt_name, clip_gradients=True, clip_threshold=clip[config.model.upper()], print_norms=True) lr = {'LSTM': 1, 'GRU': 0.1, 'RNN': 0.05} if not args.test_only: opt.train(train_db, test_db=valid_db, learning_rate=lr[config.model.upper()], lr_decay=1.5, epochs=1) model.load("exp/%s/opt.pkl" % opt_name) test_datasets = pickle.load(open("%s/test.pkl" % data_dir, 'rb')) for d in test_datasets: valid_db.dataset = test_datasets[d] valid_db.bounds, ins, outs = valid_db.create_dataset() valid_db.ins.set_value(ins) valid_db.outs.set_value(outs) costs = opt.get_test_costs(valid_db) print(d, costs)
def vgg_bn(): return [ #1 Conv2D([3, 3], 64, [1, 1, 1, 1], padding='SAME'), Conv2DBatchNorm(64), Activation(tf.nn.relu), #2 Conv2D([3, 3], 64, [1, 1, 1, 1], padding='SAME'), Conv2DBatchNorm(64), Activation(tf.nn.relu), #3 MaxPool([1,2,2,1],[1,2,2,1],padding='SAME'), #4 Conv2D([3, 3], 128, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(128), Activation(tf.nn.relu), #5 Conv2D([3, 3], 128, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(128), Activation(tf.nn.relu), #6 MaxPool([1,2,2,1],[1,2,2,1],padding='SAME'), #7 Conv2D([3, 3], 256, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(256), Activation(tf.nn.relu), #8 Conv2D([3, 3], 256, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(256), Activation(tf.nn.relu), #9 Conv2D([3, 3], 256, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(256), Activation(tf.nn.relu), #10 MaxPool([1,2,2,1],[1,2,2,1],padding='SAME'), #11 Conv2D([3, 3], 512, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(512), Activation(tf.nn.relu), #12 Conv2D([3, 3], 512, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(512), Activation(tf.nn.relu), #13 Conv2D([3, 3], 512, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(512), Activation(tf.nn.relu), #14 MaxPool([1,2,2,1],[1,2,2,1],padding='SAME'), #15 Conv2D([3, 3], 512, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(512), Activation(tf.nn.relu), #16 Conv2D([3, 3], 512, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(512), Activation(tf.nn.relu), #17 Conv2D([3, 3], 512, [1, 1, 1, 1],padding='SAME'), Conv2DBatchNorm(512), Activation(tf.nn.relu), #18 MaxPool([1,2,2,1],[1,2,2,1],padding='SAME'), Flatten(), Dense(4096), Activation(tf.nn.relu), Dropout(0.5), Dense(4096), Activation(tf.nn.relu), Dense(1000), Activation(tf.nn.relu), Dense(10), Activation(tf.nn.softmax), ]