def cnn_model_default(): N_fm = 150 kernel_size = 8 model = Sequential() model.add( Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) model.add(Reshape(1, conv_input_height, conv_input_width)) model.add( Convolution2D(N_fm, 1, kernel_size, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation('relu')) model.add( MaxPooling2D(poolsize=(conv_input_height - kernel_size + 1, 1), ignore_border=True)) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(N_fm, 1)) # SoftMax activation; actually, Dense+SoftMax works as Multinomial Logistic Regression model.add(Activation('linear')) # Custom optimizers could be used, though right now standard adadelta is employed model.compile(loss='mean_squared_error', optimizer='adagrad') return model
def cnn_model_default_improve(): N_fm = 300 kernel_size = 5 model = Sequential() model.add( Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) model.add(Reshape(1, conv_input_height, conv_input_width)) model.add( Convolution2D(N_fm, 1, kernel_size, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation('relu')) model.add( MaxPooling2D(poolsize=(conv_input_height - kernel_size + 1, 1), ignore_border=True)) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(N_fm, 1)) model.add(Activation('linear')) sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mean_squared_error', optimizer='adagrad') return model
def build_model(X_train_stream0, X_train_stream1, embed_size, normed): n_grade_categories = len(np.unique(pup.flatten(X_train_stream0))) grade_input = Input(shape=X_train_stream0.shape[1:], dtype='int32', name='grade_input') track_input = Input(shape=X_train_stream1.shape[1:], name='track_input') track_masked = Masking(mask_value=-999)(track_input) # -- grade embedding if normed: embedded_grade = Embedding(input_dim=n_grade_categories, output_dim=embed_size, mask_zero=True, input_length=X_train_stream0.shape[1], W_constraint=unitnorm(axis=1))( Flatten()(grade_input)) else: embedded_grade = Embedding(input_dim=n_grade_categories, output_dim=embed_size, mask_zero=True, input_length=X_train_stream0.shape[1])( Flatten()(grade_input)) #x = Concatenate([embedded_grade, track_input]) x = merge([embedded_grade, track_masked], mode='concat') x = LSTM(25, return_sequences=False)(x) x = Dropout(0.2)(x) y = Dense(4, activation='softmax')(x) return Model(inputs=[grade_input, track_input], outputs=y)
def build(self): subject = self.subject relation = self.relation object_ = self.get_object() embedding_size = self.model_params.get('n_embed_dims', 100) # add embedding layers embedding_rel = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get( 'n_embed_dims', 100), init='he_uniform', mask_zero=False) embedding_ent = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get( 'n_embed_dims', 100), init='he_uniform', W_constraint=unitnorm(axis=1), mask_zero=False) subject_embedding = embedding_ent(subject) relation_embedding = embedding_rel(relation) object_embedding = embedding_ent(object_) subject_output = Reshape((embedding_size, ))(subject_embedding) relation_output = Reshape((embedding_size, ))(relation_embedding) object_output = Reshape((embedding_size, ))(object_embedding) return subject_output, relation_output, object_output
def build(self): subject = self.subject relation = self.relation object_ = self.get_object() embedding_size = self.model_params.get('n_embed_dims', 100) # add embedding layers embedding_rel = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), init='he_uniform', mask_zero=False) embedding_ent = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), init='he_uniform', W_constraint=unitnorm(axis=1), mask_zero=False) subject_embedding = embedding_ent(subject) relation_embedding = embedding_rel(relation) object_embedding = embedding_ent(object_) subject_output = Reshape((embedding_size,))(subject_embedding) relation_output = Reshape((embedding_size,))(relation_embedding) object_output = Reshape((embedding_size,))(object_embedding) return subject_output, relation_output, object_output
def test_unitnorm(): unitnorm_instance = constraints.unitnorm() normalized = unitnorm_instance(K.variable(example_array)) norm_of_normalized = np.sqrt(np.sum(K.eval(normalized)**2, axis=1)) # in the unit norm constraint, it should be equal to 1. difference = norm_of_normalized - 1. largest_difference = np.max(np.abs(difference)) assert(np.abs(largest_difference) < 10e-5)
def test_unitnorm(self): from keras.constraints import unitnorm unitnorm_instance = unitnorm() normalized = unitnorm_instance(self.example_array) norm_of_normalized = np.sqrt(np.sum(normalized.eval()**2, axis=1)) difference = norm_of_normalized - 1. #in the unit norm constraint, it should be equal to 1. largest_difference = np.max(np.abs(difference)) self.assertAlmostEqual(largest_difference, 0.)
def test_unitnorm(self): from keras.constraints import unitnorm unitnorm_instance = unitnorm() normalized = unitnorm_instance(self.example_array) norm_of_normalized = np.sqrt(np.sum(normalized.eval()**2, axis=1)) difference = norm_of_normalized - 1. # in the unit norm constraint, it should be equal to 1. largest_difference = np.max(np.abs(difference)) self.assertAlmostEqual(largest_difference, 0.)
def test_unitnorm_constraint(self): lookup = Sequential() lookup.add(Embedding(3, 2, weights=[self.W1], W_constraint=unitnorm())) lookup.add(Flatten()) lookup.add(Dense(2, 1)) lookup.add(Activation('sigmoid')) lookup.compile(loss='binary_crossentropy', optimizer='sgd', class_mode='binary') lookup.train(self.X1, np.array([[1], [0]], dtype='int32')) norm = np.linalg.norm(lookup.params[0].get_value(), axis=1) self.assertTrue(np.allclose(norm, np.ones_like(norm).astype('float32')))
def masked_simplified_lstm(nb_sentence, nb_words, dict_size, word_embedding_weights, word_embedding_dim, sentence_embedding_dim, document_embedding_dim, nb_tags): word_lstm_model = Sequential() word_lstm_model.add(Masking(input_shape=(nb_words, word_embedding_dim))) word_lstm = LSTM(output_dim=sentence_embedding_dim, input_shape=(None, word_embedding_dim), activation=u'tanh', inner_activation=u'hard_sigmoid') word_lstm_model.add(word_lstm) sentence_lstm_model = Sequential() sentence_lstm_model.add( Masking(input_shape=(nb_sentence, sentence_embedding_dim))) sentence_lstm = LSTM(output_dim=document_embedding_dim, input_shape=(None, sentence_embedding_dim), activation=u'tanh', inner_activation=u'hard_sigmoid') sentence_lstm_model.add(sentence_lstm) relation_layer = Dense(output_dim=nb_tags, input_shape=(nb_tags, ), name=u'relation', bias=False, W_regularizer=l2(0.01), W_constraint=unitnorm(axis=0)) total_words = nb_words * nb_sentence input_layer = Input(shape=(total_words, )) embedding_layer = \ Embedding(dict_size, word_embedding_dim, weights=word_embedding_weights, trainable=True)(input_layer) first_reshape = Reshape( (nb_sentence, nb_words, word_embedding_dim))(embedding_layer) sentence_embeddings = TimeDistributed(word_lstm_model)(first_reshape) document_embedding = sentence_lstm_model(sentence_embeddings) dense_layer = Dense(output_dim=nb_tags, input_shape=(document_embedding_dim, ), activation=u'tanh', W_regularizer=l2(0.01))(document_embedding) adjusted_score_layer = relation_layer(dense_layer) output_layer = Activation(activation=u'softmax')(adjusted_score_layer) def masked_simplified_lstm_loss(y_true, y_pred): return K.categorical_crossentropy(y_pred, y_true) - K.sum( y_true * relation_layer.call(y_true), axis=-1) def masked_simplified_lstm_loss_cross_entropy(y_true, y_pred): return K.categorical_crossentropy(y_pred, y_true) + \ K.categorical_crossentropy(y_true, relation_layer.call(y_true)) def masked_simplified_lstm_loss_without_relation(y_true, y_pred): return K.categorical_crossentropy(y_pred, y_true) model = Model(input=input_layer, output=output_layer) model.compile(loss=masked_simplified_lstm_loss, optimizer='rmsprop') return model
def test_unitnorm_constraint(): lookup = Sequential() lookup.add(Embedding(3, 2, weights=[W1], W_constraint=unitnorm(), input_length=1)) lookup.add(Flatten()) lookup.add(Dense(1)) lookup.add(Activation('sigmoid')) lookup.compile(loss='binary_crossentropy', optimizer='sgd', class_mode='binary') lookup.train_on_batch(X1, np.array([[1], [0]], dtype='int32')) norm = np.linalg.norm(K.get_value(lookup.params[0]), axis=1) assert_allclose(norm, np.ones_like(norm).astype('float32'), rtol=1e-05)
def test_unitnorm_constraint(): lookup = Sequential() lookup.add( Embedding(3, 2, weights=[W1], W_constraint=unitnorm(), input_length=1)) lookup.add(Flatten()) lookup.add(Dense(1)) lookup.add(Activation('sigmoid')) lookup.compile(loss='binary_crossentropy', optimizer='sgd', class_mode='binary') lookup.train_on_batch(X1, np.array([[1], [0]], dtype='int32')) norm = np.linalg.norm(K.get_value(lookup.params[0]), axis=0) assert_allclose(norm, np.ones_like(norm).astype('float32'), rtol=1e-05)
def test_unitnorm_constraint(self): lookup = Sequential() lookup.add(Embedding(3, 2, weights=[self.W1], W_constraint=unitnorm())) lookup.add(Flatten()) lookup.add(Dense(2, 1)) lookup.add(Activation('sigmoid')) lookup.compile(loss='binary_crossentropy', optimizer='sgd', class_mode='binary') lookup.train_on_batch(self.X1, np.array([[1], [0]], dtype='int32')) norm = np.linalg.norm(lookup.params[0].get_value(), axis=1) self.assertTrue(np.allclose(norm, np.ones_like(norm).astype('float32')))
def cnn_1(): N_fm = 50 model = Sequential() model.add( Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) model.add(Reshape(1, conv_input_height, conv_input_width)) output_size = (conv_input_height, conv_input_width) kernel_height, kernel_width = 8, output_size[1] model.add( Convolution2D(N_fm, 1, kernel_height, kernel_width, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation('relu')) output_size = (output_size[0] - kernel_height + 1, output_size[1] - kernel_width + 1) model.add(Dropout(0.25)) kernel_height, kernel_width = 5, 1 model.add( Convolution2D(N_fm, N_fm, kernel_height, kernel_width, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation('relu')) output_size = (output_size[0] - kernel_height + 1, output_size[1] - kernel_width + 1) poolsize = (output_size[0], 1) model.add(MaxPooling2D(poolsize=poolsize)) h = output_size[0] / poolsize[0] w = output_size[1] / poolsize[1] model.add(Flatten()) # model.add(Dense(N_fm, N_fm)) # model.add(Activation('relu')) model.add(Dense(N_fm * h * w, 1)) model.add(Activation('linear')) model.add(Dropout(0.25)) model.compile(loss='mean_squared_error', optimizer='adagrad') return model
def cnn(W=None): # Number of feature maps (outputs of convolutional layer) N_fm = 100 dense_nb = 20 # kernel size of convolutional layer kernel_size = 5 conv_input_width = W.shape[1] # dims=300 global maxlen conv_input_height = maxlen # maxlen of sentence model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add( Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer model.add(Reshape(dims=(1, conv_input_height, conv_input_width))) # first convolutional layer model.add( Convolution2D(nb_filter=N_fm, nb_row=kernel_size, nb_col=conv_input_width, border_mode='valid', W_regularizer=l2(0.0001), activation='relu')) # ReLU activation model.add(Dropout(0.5)) # aggregate data in every feature map to scalar using MAX operation # model.add(MaxPooling2D(pool_size=(conv_input_height-kernel_size+1, 1), border_mode='valid')) model.add( MaxPooling2D(pool_size=(conv_input_height - kernel_size + 1, 1), border_mode='valid')) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(output_dim=dense_nb, activation='relu')) model.add(Dropout(0.5)) # Inner Product layer (as in regular neural network, but without non-linear activation function) model.add(Dense(output_dim=1, activation='linear')) # SoftMax activation; actually, Dense+SoftMax works as Multinomial Logistic Regression return model
def parallel_cnn(W): (nb_vocab, dims) = W.shape N_filter = 20 filter_shapes = [[2, 300], [3, 300], [4, 300], [5, 300]] pool_shapes = [[25, 1], [24, 1], [23, 1], [ 22, 1 ]] # Four Parallel Convolutional Layers with Four Pooling Layers model = Sequential() sub_models = [] for i in range(len(pool_shapes)): pool_shape = pool_shapes[i] filter_shape = filter_shapes[i] sub_model = Sequential() sub_model.add( Embedding(input_dim=nb_vocab, output_dim=dims, weights=[W], W_constraint=unitnorm())) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer sub_model.add(Reshape(dims=(1, 200, dims))) sub_model.add( Convolution2D(nb_filter=N_filter, nb_row=filter_shape[0], nb_col=filter_shape[1], border_mode='valid', activation='relu')) sub_model.add( MaxPooling2D(pool_size=(pool_shape[0], pool_shape[1]), border_mode='valid')) sub_model.add(Flatten()) sub_models.append(sub_model) model.add((Merge(sub_models, mode='concat'))) # Fully Connected Layer with dropout model.add(Flatten()) model.add(Dense(256, activation='relu')) model.add(Dropout(0.5)) # Fully Connected Layer as output layer model.add(Dense(2, activation='softmax')) return model
def assemble_model(input_dim, output_dim, conv_input_height, conv_input_width, weigths, number_of_classes=2): # Number of feature maps (outputs of convolutional layer) N_fm = 300 # kernel size of convolutional layer kernel_size = 8 model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add(Embedding(input_dim=input_dim, output_dim=output_dim, input_length=conv_input_height, weights=weigths, W_constraint=unitnorm())) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer model.add(Reshape((1, conv_input_height, conv_input_width))) # first convolutional layer model.add(Convolution2D(N_fm, kernel_size, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) # ReLU activation model.add(Activation('relu')) # aggregate data in every feature map to scalar using MAX operation model.add(MaxPooling2D(pool_size=(conv_input_height - kernel_size + 1, 1))) model.add(Flatten()) model.add(Dropout(0.5)) # Inner Product layer (as in regular neural network, but without non-linear activation function) model.add(Dense(number_of_classes)) # SoftMax activation; actually, Dense+SoftMax works as Multinomial Logistic Regression model.add(Activation('softmax')) # Custom optimizers could be used, though right now standard adadelta is employed opt = Adadelta(lr=1.0, rho=0.95, epsilon=1e-6) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model
def cnn_model_3(): N_fm = 100 model = Sequential() model.add( Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) model.add(Reshape(1, conv_input_height, conv_input_width)) model.add( Convolution2D(N_fm, 1, 7, 7, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(3, 3), ignore_border=True)) model.add( Convolution2D(N_fm, N_fm, 5, 5, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(2, 2), ignore_border=True)) model.add(Flatten()) model.add(Dropout(0.5)) h = math.floor((math.floor((conv_input_height - 6) / 3) - 4) / 2) w = math.floor((math.floor((conv_input_width - 6) / 3) - 4) / 2) model.add(Dense(N_fm * h * w, 1)) model.add(Activation('linear')) sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mean_squared_error', optimizer='adagrad') return model
def build_model(X_train_stream0, X_train_stream1, embed_size, normed): n_grade_categories = len(np.unique(pup.flatten(X_train_stream0))) grade_input = Input(shape=X_train_stream0.shape[1:], dtype='int32', name='grade_input') track_input = Input(shape=X_train_stream1.shape[1:], name='track_input') track_masked = Masking(mask_value=-999)(track_input) # -- grade embedding if normed: embedded_grade = Embedding( input_dim=n_grade_categories, output_dim=embed_size, mask_zero=True, input_length=X_train_stream0.shape[1], W_constraint=unitnorm(axis=1))(Flatten()(grade_input)) else: embedded_grade = Embedding( input_dim=n_grade_categories, output_dim=embed_size, mask_zero=True, input_length=X_train_stream0.shape[1])(Flatten()(grade_input)) #x = Concatenate([embedded_grade, track_input]) x = merge([embedded_grade, track_masked], mode='concat') x = LSTM(25, return_sequences=False)(x) x = Dropout(0.2)(x) y = Dense(4, activation='softmax')(x) return Model(inputs=[grade_input, track_input], outputs=y)
print(X_train.shape) # Number of feature maps (outputs of convolutional layer) N_fm = 400 batch_size = 128 nb_epoch = 20 ###################################### model ####################################### model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add( Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer model.add(Reshape(dims=(1, conv_input_height, conv_input_width))) # first convolutional layer model.add( Convolution2D(N_fm, kernel_size, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) # ReLU activation model.add(Activation('relu')) # aggregate data in every feature map to scalar using MAX operation model.add(
print(len(Y_test)) maxlen = max_len size = vec_dim print(X_train.shape) # Number of feature maps (outputs of convolutional layer) N_fm = 400 batch_size = 128 nb_epoch = 100 ###################################### model ####################################### model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add(Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer model.add(Reshape(1, conv_input_height, conv_input_width)) # first convolutional layer model.add(Convolution2D(N_fm, 1, kernel_size, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) # ReLU activation model.add(Activation('relu')) # aggregate data in every feature map to scalar using MAX operation model.add(MaxPooling2D(poolsize=(conv_input_height - kernel_size + 1, 1), ignore_border=True)) model.add(Flatten()) model.add(Dropout(0.5)) # Inner Product layer (as in regular neural network, but without non-linear activation function)
def learning(): ''' learning with CNN ''' print "loading data..." x = cPickle.load( open( "../data/processed/stackexchange/train-val-test-%d.pickle" % classNum, "rb")) revs, W, word_index_map, vocab = x[0], x[1], x[2], x[3] print "data loaded!" datasets = make_index_data(revs, word_index_map, max_l=embeddingSize, kernel_size=5) # Train data preparation N = datasets[0].shape[0] conv_input_width = W.shape[1] conv_input_height = int(datasets[0].shape[1] - 1) sizeY = classNum # For each word write a word index (not vector) to X tensor train_X = np.zeros((N, conv_input_height), dtype=np.int) train_Y = np.zeros((N, sizeY), dtype=np.int) for i in xrange(N): for j in xrange(conv_input_height): train_X[i, j] = datasets[0][i, j] train_Y[i, datasets[0][i, -1]] = 1 print 'train_X.shape = {}'.format(train_X.shape) print 'train_Y.shape = {}'.format(train_Y.shape) # Validation data preparation Nv = datasets[1].shape[0] # For each word write a word index (not vector) to X tensor val_X = np.zeros((Nv, conv_input_height), dtype=np.int) val_Y = np.zeros((Nv, sizeY), dtype=np.int) for i in xrange(Nv): for j in xrange(conv_input_height): val_X[i, j] = datasets[1][i, j] val_Y[i, datasets[1][i, -1]] = 1 # Number of feature maps (outputs of convolutional layer) N_fm = 300 # kernel size of convolutional layer kernel_size = 5 sampleSize = datasets[0].shape[0] featureSize = datasets[0].shape[1] embeddingInputSize = W.shape[0] embeddingOutputSize = W.shape[1] print 'sample size: {}'.format(sampleSize) print 'feature size: {}'.format(featureSize) print 'embedding input size: {}'.format(embeddingInputSize) print 'embedding output size: {}'.format(embeddingOutputSize) model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add( Embedding(input_dim=W.shape[0], output_dim=W.shape[1], input_length=conv_input_height, weights=[W], W_constraint=unitnorm())) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer model.add(Reshape((1, conv_input_height, conv_input_width))) # first convolutional layer model.add( Convolution2D(N_fm, kernel_size, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) # ReLU activation model.add(Activation('relu')) # aggregate data in every feature map to scalar using MAX operation model.add(MaxPooling2D(pool_size=(conv_input_height - kernel_size + 1, 1))) model.add(Flatten()) model.add(Dropout(1)) # Inner Product layer (as in regular neural network, but without non-linear activation function) model.add(Dense(classNum)) # SoftMax activation; actually, Dense+SoftMax works as Multinomial Logistic Regression model.add(Activation('softmax')) # Custom optimizers could be used, though right now standard adadelta is employed opt = Adadelta(lr=1.0, rho=0.95, epsilon=1e-6) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) epoch = 0 val_acc = [] val_auc = [] N_epoch = 3 for i in xrange(N_epoch): model.fit(train_X, train_Y, batch_size=50, nb_epoch=1, verbose=1) output = model.predict_proba(val_X, batch_size=10, verbose=1) # find validation accuracy using the best threshold value t vacc = np.max([ np.sum((output[:, 1] > t) == (val_Y[:, 1] > 0.5)) * 1.0 / len(output) for t in np.arange(0.0, 1.0, 0.01) ]) # find validation AUC vauc = roc_auc_score(val_Y, output) val_acc.append(vacc) val_auc.append(vauc) print 'Epoch {}: validation accuracy = {:.3%}, validation AUC = {:.3%}'.format( epoch, vacc, vauc) epoch += 1 print '{} epochs passed'.format(epoch) print 'Accuracy on validation dataset:' print val_acc print 'AUC on validation dataset:' print val_auc # save model and weight # save model model_json = model.to_json() with open( "../data/model/stackexchange/model_cnn_intent-%d.json" % classNum, "w") as json_file: json_file.write(model_json) # save model weight model.save_weights('../data/model/stackexchange/model_cnn_intent-%d.h5' % classNum) print("Saved model to disk")
def cnn_model(): N_fm = 400 # number of filters kernel_size = 8 conv_input_height, conv_input_width = max_len, len(W[1]) model = Sequential() model.add(Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) model.add(Reshape(dims=(1, conv_input_height, conv_input_width))) model.add(Convolution2D(nb_filter=N_fm, nb_row=kernel_size, nb_col=conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(conv_input_height - kernel_size + 1, 1), ignore_border=True)) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('linear')) model.compile(loss='mse', optimizer='adagrad') return model
X_test_doc_index.extend([doc_index[doc[0]]]) y_test_label.extend([doc[3]]) X_test_data_arr = pad_sequences(X_test_data, maxlen=MAX_SEQUENCE_LENGTH, value=0) X_test_doc_idx_arr = np.stack(X_test_doc_index) # change label to categorical variable label_train = to_categorical(np.asarray(y_train_label)) label_test = to_categorical(np.asarray(y_test_label)) print(label_train.shape, label_test.shape) print(len(X_train_data), len(y_train_label), len(X_test_data), len(y_test_label)) # w2v model # w2v with multiple layers sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embedding_layer_word = Embedding(input_dim=VOCAB_SIZE, output_dim=EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH,embeddings_constraint=unitnorm(axis=1)) embedded_sequences = embedding_layer_word(sequence_input) x = Conv1D(128, 5, activation='relu')(embedded_sequences) x = MaxPooling1D(5)(x) Dropout(0.2) BatchNormalization() x = Conv1D(128, 5, activation='relu')(x) x = MaxPooling1D(5)(x) Dropout(0.2) BatchNormalization() x = Conv1D(128, 5, activation='relu')(x) x = MaxPooling1D(27)(x) # global max pooling Dropout(0.2) BatchNormalization() x = Flatten()(x) x = Dense(128, activation='relu')(x)
def _createLayers(self, args): # optional norm constraint if args.max_norm: W_constraint = maxnorm(args.max_norm) elif args.unit_norm: W_constraint = unitnorm() else: W_constraint = None # optional regularizer if args.l2_reg: W_regularizer = l2(args.l2_reg) elif args.l1_reg: W_regularizer = l1(args.l1_reg) else: W_regularizer = None # helper functions to use with layers if self.num_actuators == 1: # simpler versions for single actuator case def _L(x): return K.exp(x) def _P(x): return x**2 def _A(t): m, p, u = t return -(u - m)**2 * p def _Q(t): v, a = t return v + a else: # use Theano advanced operators for multiple actuator case def _L(x): # initialize with zeros batch_size = x.shape[0] a = T.zeros((batch_size, self.num_actuators, self.num_actuators)) # set diagonal elements batch_idx = T.extra_ops.repeat(T.arange(batch_size), self.num_actuators) diag_idx = T.tile(T.arange(self.num_actuators), batch_size) b = T.set_subtensor(a[batch_idx, diag_idx, diag_idx], T.flatten(T.exp(x[:, :self.num_actuators]))) # set lower triangle cols = np.concatenate([np.array(range(i), dtype=np.uint) for i in xrange(self.num_actuators)]) rows = np.concatenate([np.array([i]*i, dtype=np.uint) for i in xrange(self.num_actuators)]) cols_idx = T.tile(T.as_tensor_variable(cols), batch_size) rows_idx = T.tile(T.as_tensor_variable(rows), batch_size) batch_idx = T.extra_ops.repeat(T.arange(batch_size), len(cols)) c = T.set_subtensor(b[batch_idx, rows_idx, cols_idx], T.flatten(x[:, self.num_actuators:])) return c def _P(x): return K.batch_dot(x, K.permute_dimensions(x, (0,2,1))) def _A(t): m, p, u = t d = K.expand_dims(u - m, -1) return -K.batch_dot(K.batch_dot(K.permute_dimensions(d, (0,2,1)), p), d) def _Q(t): v, a = t return v + a x = Input(shape=(self.state_size,), name='x') u = Input(shape=(self.num_actuators,), name='u') if args.batch_norm: h = BatchNormalization()(x) else: h = x for i in xrange(args.hidden_layers): h = Dense(args.hidden_nodes, activation=args.activation, name='h'+str(i+1), W_constraint=W_constraint, W_regularizer=W_regularizer)(h) if args.batch_norm and i != args.hidden_layers - 1: h = BatchNormalization()(h) v = Dense(1, name='v', W_constraint=W_constraint, W_regularizer=W_regularizer)(h) m = Dense(self.num_actuators, name='m', W_constraint=W_constraint, W_regularizer=W_regularizer)(h) l0 = Dense(self.num_actuators * (self.num_actuators + 1)/2, name='l0', W_constraint=W_constraint, W_regularizer=W_regularizer)(h) l = Lambda(_L, output_shape=(self.num_actuators, self.num_actuators), name='l')(l0) p = Lambda(_P, output_shape=(self.num_actuators, self.num_actuators), name='p')(l) a = merge([m, p, u], mode=_A, output_shape=(None, self.num_actuators,), name="a") q = merge([v, a], mode=_Q, output_shape=(None, self.num_actuators,), name="q") return x, u, m, v, q, p, a
def cnn_1(): N_fm = 50 model = Sequential() model.add(Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) model.add(Reshape(1, conv_input_height, conv_input_width)) output_size = (conv_input_height, conv_input_width) kernel_height, kernel_width = 8, output_size[1] model.add(Convolution2D(N_fm, 1, kernel_height, kernel_width, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation('relu')) output_size = (output_size[0] - kernel_height + 1, output_size[1] - kernel_width + 1) model.add(Dropout(0.25)) kernel_height, kernel_width = 5, 1 model.add(Convolution2D(N_fm, N_fm, kernel_height, kernel_width, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation('relu')) output_size = (output_size[0] - kernel_height + 1, output_size[1] - kernel_width + 1) poolsize = (output_size[0], 1) model.add(MaxPooling2D(poolsize=poolsize)) h = output_size[0] / poolsize[0] w = output_size[1] / poolsize[1] model.add(Flatten()) # model.add(Dense(N_fm, N_fm)) # model.add(Activation('relu')) model.add(Dense(N_fm * h * w, 1)) model.add(Activation('linear')) model.add(Dropout(0.25)) model.compile(loss='mean_squared_error', optimizer='adagrad') return model
def gru_with_attention(nb_sentence, nb_words, dict_size, word_embedding_weights, word_embedding_dim, sentence_embedding_dim, document_embedding_dim, nb_tags): word_lstm_input = Input(shape=(preprocessing.MAX_WORDS_IN_SENTENCE, word_embedding_dim)) word_lstm_output = GRU(output_dim=sentence_embedding_dim, return_sequences=True, input_shape=(preprocessing.MAX_WORDS_IN_SENTENCE, word_embedding_dim), activation=u'tanh', inner_activation=u'hard_sigmoid')(word_lstm_input) def get_last(word_lstm_output_seq): return K.permute_dimensions(word_lstm_output_seq, (1, 0, 2))[-1] for_get_last = Lambda( get_last, output_shape=(sentence_embedding_dim, ))(word_lstm_output) sentence_context = \ Dense(output_dim=preprocessing.MAX_WORDS_IN_SENTENCE, activation=u'tanh')(for_get_last) weights = Dense(output_dim=preprocessing.MAX_WORDS_IN_SENTENCE, activation=u'softmax')(sentence_context) final_output = merge([weights, word_lstm_output], mode=u'dot', dot_axes=1) word_lstm_model = Model(input=[word_lstm_input], output=[final_output]) sentence_lstm_model = Sequential() sentence_lstm = GRU(output_dim=document_embedding_dim, input_shape=(preprocessing.MAX_SENTENCES_IN_DOCUMENT, sentence_embedding_dim), activation=u'tanh', inner_activation=u'hard_sigmoid') sentence_lstm_model.add(sentence_lstm) relation_layer = Dense(output_dim=nb_tags, input_shape=(nb_tags, ), name=u'relation', bias=False, W_regularizer=l2(0.01), W_constraint=unitnorm(axis=0)) total_words = nb_words * nb_sentence input_layer = Input(shape=(total_words, )) embedding_layer = \ Embedding(dict_size, word_embedding_dim, weights=word_embedding_weights, trainable=False)(input_layer) first_reshape = Reshape( (nb_sentence, nb_words, word_embedding_dim))(embedding_layer) sentence_embeddings = TimeDistributed(word_lstm_model)(first_reshape) document_embedding = sentence_lstm_model(sentence_embeddings) dense_layer = Dense(output_dim=nb_tags, input_shape=(document_embedding_dim, ), activation=u'tanh', W_regularizer=l2(0.01))(document_embedding) adjusted_score_layer = relation_layer(dense_layer) output_layer = Activation(activation=u'softmax')(adjusted_score_layer) def masked_simplified_lstm_loss(y_true, y_pred): return K.categorical_crossentropy(y_pred, y_true) - K.sum( y_true * relation_layer.call(y_true), axis=-1) def masked_simplified_lstm_loss_cross_entropy(y_true, y_pred): return K.categorical_crossentropy(y_pred, y_true) + \ K.categorical_crossentropy(y_true, relation_layer.call(y_true)) def masked_simplified_lstm_loss_without_relation(y_true, y_pred): return K.categorical_crossentropy(y_pred, y_true) model = Model(input=input_layer, output=output_layer) model.compile(loss=masked_simplified_lstm_loss, optimizer='rmsprop') return model
env = gym.make(args.environment) assert isinstance(env.observation_space, Box), "observation space must be continuous" assert isinstance(env.action_space, Box), "action space must be continuous" assert len(env.action_space.shape) == 1 num_actuators = env.action_space.shape[0] print "num_actuators:", num_actuators # start monitor for OpenAI Gym if args.gym_record: env.monitor.start(args.gym_record,force=True) # optional norm constraint if args.max_norm: W_constraint = maxnorm(args.max_norm) elif args.unit_norm: W_constraint = unitnorm() else: W_constraint = None # optional regularizer if args.l2_reg: W_regularizer = l2(args.l2_reg) elif args.l1_reg: W_regularizer = l1(args.l1_reg) else: W_regularizer = None # helper functions to use with layers if num_actuators == 1: # simpler versions for single actuator case def _L(x):
def cnn_model_default(): N_fm = 150 kernel_size = 8 model = Sequential() model.add(Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) model.add(Reshape(1, conv_input_height, conv_input_width)) model.add(Convolution2D(N_fm, 1, kernel_size, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(conv_input_height - kernel_size + 1, 1), ignore_border=True)) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(N_fm, 1)) # SoftMax activation; actually, Dense+SoftMax works as Multinomial Logistic Regression model.add(Activation('linear')) # Custom optimizers could be used, though right now standard adadelta is employed model.compile(loss='mean_squared_error', optimizer='adagrad') return model
print(len(Y_test)) maxlen = max_len size = vec_dim print(X_train.shape) # Number of feature maps (outputs of convolutional layer) N_fm = 400 batch_size = 128 nb_epoch = 20 ###################################### model ####################################### model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add(Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer model.add(Reshape(dims=(1, conv_input_height, conv_input_width))) # first convolutional layer model.add(Convolution2D(N_fm, kernel_size, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) # ReLU activation model.add(Activation('relu')) # aggregate data in every feature map to scalar using MAX operation model.add(MaxPooling2D(pool_size=(conv_input_height - kernel_size + 1, 1), ignore_border=True)) model.add(Flatten()) model.add(Dropout(0.5)) # Inner Product layer (as in regular neural network, but without non-linear activation function)
b = 1 m = 1 / ((4 * (-0.09)) / np.power(l, 2)) weights_duplicate = createWeightsDuplicate(number_of_complex_units)#[np.array([[ 1, 1, 0, 0], [ 0, 0, 1, 1]])] weights_square = createWeightsSquare(number_of_complex_units, l, b)#[np.array([[l, -l, -l, l, 0, 0, 0, 0], [l, -l, l, -l, 0, 0, 0, 0], [ 0, 0, 0, 0, l, -l, -l, l], [ 0, 0, 0, 0, l, -l, l, -l]]), np.array([b, b, b, b, b, b, b, b])] weights_square_result = createWeightsSquareResult(number_of_complex_units, m)#[np.array([ [m, 0], [m, 0], [-m, 0], [-m, 0],[0,m], [0,m], [0,-m], [0,-m]]) ] weights_abs = createWeightAbs(number_of_complex_units)#[np.array([[1], [1]])] doTrain = False #initial_weights_last = [np.array([[3.4], [4.2]]), np.array([ -6.5])] initial_weights_last = [np.array([[0.3], [0.3]]), np.array([ -9])] model = Sequential() FTRNNlayer = FTRNN(units=number_of_complex_units, input_shape=(timesteps, 1), return_sequences=False, trainable=False, recurrent_constraint = unitnorm(), weights=weights) #FTRNNlayer = SimpleRNN(units=total_number_of_units, input_shape=(timesteps, 1), use_bias=False, return_sequences=False, trainable=False, weights=weights_simpleRNN) #recurrent_constraint = unitnorm(), FTRNNlayer.states = initial_states model.add(FTRNNlayer) #model.add(Dot(axes=1)([FTRNNlayer, FTRNNlayer])) duplicateLayer = Dense(total_number_of_units*2, activation=None, use_bias=False, trainable=doTrain, weights=weights_duplicate) squareLayer = Dense(total_number_of_units*4, activation='sigmoid', use_bias=True, trainable=doTrain, weights=weights_square) squareResultLayer = Dense(total_number_of_units, activation=None, use_bias=False, trainable=doTrain, weights=weights_square_result) absLayer = Dense(number_of_complex_units, activation=None, use_bias=False, trainable=doTrain, weights=weights_abs ) lastLayer = Dense(1, activation='sigmoid', use_bias=True, trainable=True, weights = initial_weights_last) model.add(duplicateLayer) model.add(squareLayer) model.add(squareResultLayer) model.add(absLayer)
def cnn_optimise(W): # Number of feature maps (outputs of convolutional layer) N_fm = 300 # kernel size of convolutional layer kernel_size = 8 conv_input_width = W.shape[1] conv_input_height = 200 # maxlen of sentence model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add( Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm(), init='uniform')) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer model.add(Reshape(dims=(1, conv_input_height, conv_input_width))) # first convolutional layer model.add(Convolution2D(N_fm, kernel_size, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) # ReLU activation model.add(Dropout(0.5)) model.add(Activation('relu')) # aggregate data in every feature map to scalar using MAX operation model.add(MaxPooling2D(pool_size=(conv_input_height - kernel_size + 1, 1), border_mode='valid')) model.add(Dropout(0.5)) model.add(Flatten()) # Inner Product layer (as in regular neural network, but without non-linear activation function) model.add(Dense(input_dim=N_fm, output_dim=1)) # SoftMax activation; actually, Dense+SoftMax works as Multinomial Logistic Regression model.add(Activation('sigmoid')) plot(model, to_file='./images/model.png') return model
assert isinstance(env.observation_space, Box), "observation space must be continuous" assert isinstance(env.action_space, Box), "action space must be continuous" assert len(env.action_space.shape) == 1 num_actuators = env.action_space.shape[0] print("num_actuators:", num_actuators) # start monitor for OpenAI Gym if args.gym_record: env.monitor.start(args.gym_record) # optional norm constraint if args.max_norm: kernel_constraint = maxnorm(args.max_norm) elif args.unit_norm: kernel_constraint = unitnorm() else: kernel_constraint = None # optional regularizer def regularizer(): if args.l2_reg: return l2(args.l2_reg) elif args.l1_reg: return l1(args.l1_reg) else: return None # helper functions to use with layers
def parallel_cnn(W): (nb_vocab, dims) =W.shape N_filter=20 filter_shapes = [[2, 300], [3, 300], [4, 300], [5, 300]] pool_shapes = [[25, 1], [24, 1], [23, 1], [22, 1]] # Four Parallel Convolutional Layers with Four Pooling Layers model = Sequential() sub_models = [] for i in range(len(pool_shapes)): pool_shape = pool_shapes[i] filter_shape = filter_shapes[i] sub_model = Sequential() sub_model.add(Embedding(input_dim=nb_vocab, output_dim=dims, weights=[W], W_constraint=unitnorm())) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer sub_model.add(Reshape(dims=(1, 200, dims))) sub_model.add(Convolution2D(nb_filter=N_filter, nb_row=filter_shape[0], nb_col=filter_shape[1], border_mode = 'valid', activation = 'relu')) sub_model.add(MaxPooling2D(pool_size=(pool_shape[0], pool_shape[1]), border_mode='valid')) sub_model.add(Flatten()) sub_models.append(sub_model) model.add((Merge(sub_models, mode='concat'))) # Fully Connected Layer with dropout model.add(Flatten()) model.add(Dense(256, activation='relu')) model.add(Dropout(0.5)) # Fully Connected Layer as output layer model.add(Dense(2, activation='softmax')) return model
def build_model(p): """ build a Keras model using the parameters in p """ max_posts = p['max_posts'] max_length = p['max_length'] filters = p['filters'] filtlen = p['filtlen'] poollen = p['poollen'] densed = p['densed'] embed_size = p['embed_size'] batch = p['batch'] random.seed(p['seed']) np.random.seed(p['seed']) # https://github.com/fchollet/keras/issues/2280 tf.reset_default_graph() if len(tf.get_default_graph()._nodes_by_id.keys()) > 0: raise RuntimeError( "Seeding is not supported after building part of the graph. " "Please move set_seed to the beginning of your code.") tf.set_random_seed(p['seed']) sess = tf.Session() K.set_session(sess) nb_words, genf, tok = datagen(max_posts, max_length, stype='training', batch_size=batch, randposts=p['randposts'], mintf=p['mintf'], mindf=p['mindf'], noempty=p['noempty'], prep=p['prep'], returntok=True) n_classes = 2 inp = Input(shape=(max_posts, max_length), dtype='int32') nextin = inp if p['cosine']: from keras.constraints import unitnorm wconstrain = unitnorm() else: wconstrain = None if p['w2v']: embeddings_index = {} fn = 'data/w2v_50_sg_export.txt' with open(fn) as f: for line in f: values = line.strip().split() word = values[0] if word in tok.word_index: coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs print('Found %s word vectors.' % len(embeddings_index)) embedding_matrix = np.zeros((nb_words, embed_size)) for word, i in tok.word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector else: embedding_matrix[i] = np.random.uniform(-0.2, 0.2, size=embed_size) emb = Embedding(nb_words, embed_size, mask_zero=True, input_length=max_length, W_constraint=wconstrain, weights=[embedding_matrix], trainable=p['etrain']) if not p['etrain']: print("making not trainable") emb.trainable = False else: assert p['etrain'], "must have etrain=True with w2v=False" emb = Embedding(nb_words, embed_size, mask_zero=True, W_constraint=wconstrain) embedded = TimeDistributed(emb)(nextin) if not p['etrain']: emb.trainable = False embedded.trainable = False conv = Sequential() conv.add( Convolution1D(nb_filter=filters, filter_length=filtlen, border_mode='valid', W_constraint=wconstrain, activation='linear', subsample_length=1, input_shape=(max_length, embed_size))) conv.add(Activation(p['af'])) conv.add(GlobalAveragePooling1D()) posts = TimeDistributed(conv)(embedded) combined = Convolution1D(nb_filter=filters, filter_length=p['acl'], border_mode='valid', activation=p['af'], subsample_length=p['acl'])(posts) combined = Flatten()(combined) if densed != 0: combined = Dense(densed, activation=p['af'])(combined) outlayer = Dense(2, activation='softmax')(combined) model = Model(inp, outlayer) return model, genf
def cnn(W=None): # Number of feature maps (outputs of convolutional layer) N_fm = 20 dense_nb = 20 # kernel size of convolutional layer kernel_size = 5 conv_input_width = W.shape[1] # dims=300 conv_input_height = 200 # maxlen of sentence model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add(Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer model.add(Reshape(dims=(1, conv_input_height, conv_input_width))) # first convolutional layer model.add(Convolution2D(nb_filter=N_fm, nb_row=kernel_size, nb_col=conv_input_width, border_mode='valid', W_regularizer=l2(0.0001), activation='relu')) # ReLU activation model.add(Dropout(0.5)) # aggregate data in every feature map to scalar using MAX operation # model.add(MaxPooling2D(pool_size=(conv_input_height-kernel_size+1, 1), border_mode='valid')) model.add(MaxPooling2D(pool_size=(kernel_size * 5, 1), border_mode='valid')) model.add(Dropout(0.4)) model.add(Flatten()) model.add(Dense(output_dim=dense_nb, activation='relu')) model.add(Dropout(0.2)) # Inner Product layer (as in regular neural network, but without non-linear activation function) model.add(Dense(output_dim=2, activation='softmax')) # SoftMax activation; actually, Dense+SoftMax works as Multinomial Logistic Regression return model
def cnn_model_simple(): N_fm = 10 model = Sequential() model.add(Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) model.add(Reshape(1, conv_input_height, conv_input_width)) model.add(Convolution2D(N_fm, 1, 5, conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(conv_input_height - 5 + 1, 1), ignore_border=True)) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(N_fm, 1)) model.add(Activation('linear')) sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mean_squared_error', optimizer='adagrad') return model
env = gym.make(args.environment) assert isinstance(env.observation_space, Box), "observation space must be continuous" assert isinstance(env.action_space, Box), "action space must be continuous" assert len(env.action_space.shape) == 1 num_actuators = env.action_space.shape[0] print("num_actuators:", num_actuators) # start monitor for OpenAI Gym if args.gym_record: env.monitor.start(args.gym_record) # optional norm constraint if args.max_norm: kernel_constraint = maxnorm(args.max_norm) elif args.unit_norm: kernel_constraint = unitnorm() else: kernel_constraint = None # optional regularizer def regularizer(): if args.l2_reg: return l2(args.l2_reg) elif args.l1_reg: return l1(args.l1_reg) else: return None # helper functions to use with layers if num_actuators == 1: # simpler versions for single actuator case
def cnn_model_default_improve_2(): N_fm = 300 # number of filters kernel_size = 5 model = Sequential() model.add(Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm())) model.add(Reshape(dims=(1, conv_input_height, conv_input_width))) model.add(Convolution2D(nb_filter=N_fm, nb_row=kernel_size, nb_col=conv_input_width, border_mode='valid', W_regularizer=l2(0.0001))) model.add(Activation("sigmoid")) model.add(MaxPooling2D(pool_size=(conv_input_height - kernel_size + 1, 1), ignore_border=True)) model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('linear')) sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mse', optimizer='adagrad') return model
for iFold in range(1): print(f"iFold {iFold}") iLoop, (train_index, test_index) = lKF[iFold] anDataConstTrain = anDataConst[train_index] anDataConstValid = anDataConst[test_index] encoder_inputs = Input(shape=(sentenceLength, ), name="Encoder_input") target_inputs = Input(shape=(sentenceLength, ), name="target_input") emb_obj = Embedding(output_dim=num_emb_dim, input_dim=vocab_size, name="Embedding", embeddings_constraint=unitnorm(axis=1)) x = emb_obj(encoder_inputs) x = Flatten()(x) x = Dense(4)(x) x = Dense(sentenceLength * num_emb_dim)(x) # 45 mins. on high noise. (shuffle 8) # Valid set MSE = 0.0335 x = LSTM(128, return_sequences=True)(x) x = LSTM(128, return_sequences=True)(x) x = TimeDistributed(Dense(vocab_size))(x) x = Flatten()(x) x = Dense(1)(x)
def _createLayers(self, args): # optional norm constraint if args.max_norm: W_constraint = maxnorm(args.max_norm) elif args.unit_norm: W_constraint = unitnorm() else: W_constraint = None # optional regularizer if args.l2_reg: W_regularizer = l2(args.l2_reg) elif args.l1_reg: W_regularizer = l1(args.l1_reg) else: W_regularizer = None # helper functions to use with layers if self.num_actuators == 1: # simpler versions for single actuator case def _L(x): return K.exp(x) def _P(x): return x**2 def _A(t): m, p, u = t return -(u - m)**2 * p def _Q(t): v, a = t return v + a else: # use Theano advanced operators for multiple actuator case def _L(x): # initialize with zeros batch_size = x.shape[0] a = T.zeros( (batch_size, self.num_actuators, self.num_actuators)) # set diagonal elements batch_idx = T.extra_ops.repeat(T.arange(batch_size), self.num_actuators) diag_idx = T.tile(T.arange(self.num_actuators), batch_size) b = T.set_subtensor( a[batch_idx, diag_idx, diag_idx], T.flatten(T.exp(x[:, :self.num_actuators]))) # set lower triangle cols = np.concatenate([ np.array(range(i), dtype=np.uint) for i in xrange(self.num_actuators) ]) rows = np.concatenate([ np.array([i] * i, dtype=np.uint) for i in xrange(self.num_actuators) ]) cols_idx = T.tile(T.as_tensor_variable(cols), batch_size) rows_idx = T.tile(T.as_tensor_variable(rows), batch_size) batch_idx = T.extra_ops.repeat(T.arange(batch_size), len(cols)) c = T.set_subtensor(b[batch_idx, rows_idx, cols_idx], T.flatten(x[:, self.num_actuators:])) return c def _P(x): return K.batch_dot(x, K.permute_dimensions(x, (0, 2, 1))) def _A(t): m, p, u = t d = K.expand_dims(u - m, -1) return -K.batch_dot( K.batch_dot(K.permute_dimensions(d, (0, 2, 1)), p), d) def _Q(t): v, a = t return v + a x = Input(shape=(self.state_size, ), name='x') u = Input(shape=(self.num_actuators, ), name='u') if args.batch_norm: h = BatchNormalization()(x) else: h = x for i in xrange(args.hidden_layers): h = Dense(args.hidden_nodes, activation=args.activation, name='h' + str(i + 1), W_constraint=W_constraint, W_regularizer=W_regularizer)(h) if args.batch_norm and i != args.hidden_layers - 1: h = BatchNormalization()(h) v = Dense(1, name='v', W_constraint=W_constraint, W_regularizer=W_regularizer)(h) m = Dense(self.num_actuators, name='m', W_constraint=W_constraint, W_regularizer=W_regularizer)(h) l0 = Dense(self.num_actuators * (self.num_actuators + 1) / 2, name='l0', W_constraint=W_constraint, W_regularizer=W_regularizer)(h) l = Lambda(_L, output_shape=(self.num_actuators, self.num_actuators), name='l')(l0) p = Lambda(_P, output_shape=(self.num_actuators, self.num_actuators), name='p')(l) a = merge([m, p, u], mode=_A, output_shape=( None, self.num_actuators, ), name="a") q = merge([v, a], mode=_Q, output_shape=( None, self.num_actuators, ), name="q") return x, u, m, v, q, p, a
def cnn_optimise(W): # Number of feature maps (outputs of convolutional layer) N_fm = 300 # kernel size of convolutional layer kernel_size = 8 conv_input_width = W.shape[1] conv_input_height = 200 # maxlen of sentence model = Sequential() # Embedding layer (lookup table of trainable word vectors) model.add( Embedding(input_dim=W.shape[0], output_dim=W.shape[1], weights=[W], W_constraint=unitnorm(), init="uniform") ) # Reshape word vectors from Embedding to tensor format suitable for Convolutional layer model.add(Reshape(dims=(1, conv_input_height, conv_input_width))) # first convolutional layer model.add(Convolution2D(N_fm, kernel_size, conv_input_width, border_mode="valid", W_regularizer=l2(0.0001))) # ReLU activation model.add(Dropout(0.5)) model.add(Activation("relu")) # aggregate data in every feature map to scalar using MAX operation model.add(MaxPooling2D(pool_size=(conv_input_height - kernel_size + 1, 1), border_mode="valid")) model.add(Dropout(0.5)) model.add(Flatten()) # Inner Product layer (as in regular neural network, but without non-linear activation function) model.add(Dense(input_dim=N_fm, output_dim=1)) # SoftMax activation; actually, Dense+SoftMax works as Multinomial Logistic Regression model.add(Activation("sigmoid")) plot(model, to_file="./images/model.png") return model