def pretrained_embedding_layer(word_to_vec_map, source_vocab_to_int): """ 构造Embedding层并加载预训练好的词向量(这里我使用的是100维) @param word_to_vec_map: 单词到向量的映射 @param word_to_index: 单词到数字编码的映射 """ vocab_len = len(source_vocab_to_int) + 1 # Keras Embedding的API要求+1 emb_dim = word_to_vec_map["the"].shape[0] # 初始化embedding矩阵 emb_matrix = np.zeros((vocab_len, emb_dim)) # 用词向量填充embedding矩阵 for word, index in source_vocab_to_int.items(): word_vector = word_to_vec_map.get(word, np.zeros(emb_dim)) emb_matrix[index, :] = word_vector # 定义Embedding层,并指定不需要训练该层的权重 embedding_layer = Embedding(vocab_len, emb_dim, trainable=False) # build embedding_layer.build((None,)) # set weights embedding_layer.set_weights([emb_matrix]) return embedding_layer
def build(self): assert self.config['question_len'] == self.config['answer_len'] question = self.question answer = self.get_answer() # add embedding layers embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 100)) question_embedding = embedding(question) answer_embedding = embedding(answer) # turn off layer updating embedding.params = [] embedding.updates = [] # dropout dropout = Dropout(0.25) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # dense dense = TimeDistributed(Dense(self.model_params.get('n_hidden', 200), activation='tanh')) question_dense = dense(question_dropout) answer_dense = dense(answer_dropout) # regularization question_dense = ActivityRegularization(l2=0.0001)(question_dense) answer_dense = ActivityRegularization(l2=0.0001)(answer_dense) # dropout question_dropout = dropout(question_dense) answer_dropout = dropout(answer_dense) # cnn cnns = [Convolution1D(filter_length=filter_length, nb_filter=self.model_params.get('nb_filters', 1000), activation=self.model_params.get('conv_activation', 'relu'), border_mode='same') for filter_length in [2, 3, 5, 7]] question_cnn = merge([cnn(question_dropout) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_dropout) for cnn in cnns], mode='concat') # regularization question_cnn = ActivityRegularization(l2=0.0001)(question_cnn) answer_cnn = ActivityRegularization(l2=0.0001)(answer_cnn) # dropout question_dropout = dropout(question_cnn) answer_dropout = dropout(answer_cnn) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = maxpool(question_dropout) answer_pool = maxpool(answer_dropout) # activation activation = Activation('tanh') question_output = activation(question_pool) answer_output = activation(answer_pool) return question_output, answer_output
def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True): input = Input(shape=(lembedding.size,), dtype='int32') if lembedding.vector_box.W is None: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, W_constraint=None)(input) else: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, weights=[lembedding.vector_box.W], W_constraint=None, )(input) emb.trainable = train_vectors if unit == 'gru': forward = GRU(rnn_size)(emb) backward = GRU(rnn_size, go_backwards=True)(emb) else: forward = LSTM(rnn_size)(emb) backward = LSTM(rnn_size, go_backwards=True)(emb) merged_rnn = merge([forward, backward], mode='concat') dropped = Dropout(0.5)(merged_rnn) if num_classes == 2: out = Dense(1, activation='sigmoid')(dropped) model = Model(input=input, output=out) if self.optimizer is None: self.optimizer = 'rmsprop' model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) else: out = Dense(num_classes, activation='softmax')(dropped) model = Model(input=input, output=out) if self.optimizer is None: self.optimizer = 'adam' model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) return model
def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True): model = Sequential() if lembedding.vector_box.W is None: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, W_constraint=None) else: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, weights=[lembedding.vector_box.W], W_constraint=None) emb.trainable = train_vectors model.add(emb) if unit == 'gru': model.add(GRU(rnn_size)) else: model.add(LSTM(rnn_size)) model.add(Dropout(0.2)) if num_classes == 2: model.add(Dense(1, activation='sigmoid')) if self.optimizer is None: self.optimizer = 'rmsprop' model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) else: if self.optimizer is None: self.optimizer = 'adam' model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) return model
def build(self): question, answer = self._get_inputs() # add embedding layers embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) question_embedding = embedding(question) a_embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) answer_embedding = embedding(answer) a_embedding.set_weights(embedding.get_weights()) # dropout dropout = Dropout(0.5) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # rnn forward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True) backward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True) question_lstm = merge([forward_lstm(question_dropout), backward_lstm(question_dropout)], mode='concat', concat_axis=-1) # dropout question_dropout = dropout(question_lstm) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = maxpool(question_dropout) # activation activation = Activation('tanh') question_output = activation(question_pool) question_model = Model(input=[question], output=[question_output]) # attentional rnn forward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True) backward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True) answer_lstm = merge([forward_lstm(answer_dropout), backward_lstm(answer_dropout)], mode='concat', concat_axis=-1) # dropout answer_dropout = dropout(answer_lstm) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) answer_pool = maxpool(answer_dropout) # activation activation = Activation('tanh') answer_output = activation(answer_pool) answer_model = Model(input=[question, answer], output=[answer_output]) return question_model, answer_model
def build(self): question = self.question answer = self.get_answer() # add embedding layers embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 100), mask_zero=False) question_embedding = embedding(question) answer_embedding = embedding(answer) # turn off layer updating embedding.params = [] embedding.updates = [] # dropout dropout = Dropout(0.25) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # question rnn part f_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True) b_rnn = LSTM(self.model_params.get('n_lstm_dims', 141), return_sequences=True, go_backwards=True) question_f_rnn = f_rnn(question_dropout) question_b_rnn = b_rnn(question_dropout) question_f_dropout = dropout(question_f_rnn) question_b_dropout = dropout(question_b_rnn) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = merge([maxpool(question_f_dropout), maxpool(question_b_dropout)], mode='concat', concat_axis=-1) # answer rnn part f_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141), question_pool, single_attn=True, return_sequences=True) b_rnn = AttentionLSTM(self.model_params.get('n_lstm_dims', 141), question_pool, single_attn=True, return_sequences=True, go_backwards=True) answer_f_rnn = f_rnn(answer_dropout) answer_b_rnn = b_rnn(answer_dropout) answer_f_dropout = dropout(answer_f_rnn) answer_b_dropout = dropout(answer_b_rnn) answer_pool = merge([maxpool(answer_f_dropout), maxpool(answer_b_dropout)], mode='concat', concat_axis=-1) # activation activation = Activation('tanh') question_output = activation(question_pool) answer_output = activation(answer_pool) return question_output, answer_output
def _generate_model(self, lembedding, num_classes=2, ngrams=[1,2,3,4,5], nfilters=64, train_vectors=True): def sub_ngram(n): return Sequential([ Convolution1D(nfilters, n, activation='relu', input_shape=(lembedding.size, lembedding.vector_box.vector_dim)), Lambda( lambda x: K.max(x, axis=1), output_shape=(nfilters,) ) ]) doc = Input(shape=(lembedding.size, ), dtype='int32') embedded = Embedding(input_dim=lembedding.vector_box.size, output_dim=lembedding.vector_box.vector_dim, weights=[lembedding.vector_box.W])(doc) embedded.trainable = train_vectors rep = Dropout(0.5)( merge( [sub_ngram(n)(embedded) for n in ngrams], mode='concat', concat_axis=-1 ) ) if num_classes == 2: out = Dense(1, activation='sigmoid')(rep) model = Model(input=doc, output=out) if self.optimizer is None: self.optimizer = 'rmsprop' model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) else: out = Dense(num_classes, activation='softmax')(rep) model = Model(input=doc, output=out) if self.optimizer is None: self.optimizer = 'adam' model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) return model
def _generate_model(self, lembedding, num_classes=2, num_features=128, train_vectors=True): model = Sequential() if lembedding.vector_box.W is None: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, W_constraint=None, input_length=lembedding.size) else: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, weights=[lembedding.vector_box.W], W_constraint=None, input_length=lembedding.size) emb.trainable = train_vectors model.add(emb) model.add(Convolution1D(num_features, 3, init='uniform')) model.add(Activation('relu')) model.add(MaxPooling1D(2)) model.add(Dropout(0.25)) model.add(Convolution1D(num_features, 3, init='uniform')) model.add(Activation('relu')) model.add(MaxPooling1D(2)) model.add(Dropout(0.25)) model.add(Flatten()) if num_classes == 2: model.add(Dense(1, activation='sigmoid')) if self.optimizer is None: self.optimizer = 'rmsprop' model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) else: if self.optimizer is None: self.optimizer = 'adam' model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) return model
def char_emb_cnn_func(n_characters: int, char_embedding_dim: int, emb_mat: np.array = None, filter_widths=(3, 4, 5, 7), highway_on_top=False): emb_layer = Embedding(n_characters, char_embedding_dim) if emb_mat is not None: emb_layer.set_weights([emb_mat]) conv2d_layers = [] for filter_width in filter_widths: conv2d_layers.append(Conv2D(char_embedding_dim, (1, filter_width), padding='same')) if highway_on_top: dense1 = Dense(char_embedding_dim * len(filter_widths)) dense2 = Dense(char_embedding_dim * len(filter_widths)) def result(input): emb_c = emb_layer(input) conv_results_list = [] for cl in conv2d_layers: conv_results_list.append(cl(emb_c)) emb_c = Lambda(lambda x: K.concatenate(x, axis=3))(conv_results_list) emb_c = Lambda(lambda x: K.max(x, axis=2))(emb_c) if highway_on_top: sigmoid_gate = dense1(emb_c) sigmoid_gate = Activation('sigmoid')(sigmoid_gate) deeper_units = dense2(emb_c) emb_c = Add()([Multiply()([sigmoid_gate, deeper_units]), Multiply()([Lambda(lambda x: K.constant(1., shape=K.shape(x)) - x)(sigmoid_gate), emb_c])]) emb_c = Activation('relu')(emb_c) return emb_c return result
def __init__(self, config): """ Convolution neural network model for sentence classification. Parameters Sentence CNN by Y.Kim ---------- EMBEDDING_DIM: Dimension of the embedding space. MAX_SEQUENCE_LENGTH: Maximum length of the sentence. MAX_NB_WORDS: Maximum number of words in the vocabulary. embeddings_index: A dict containing words and their embeddings. word_index: A dict containing words and their indices. labels_index: A dict containing the labels and their indices. Returns ------- compiled keras model """ self.batch_size = config.batch_size self.num_epoch = config.num_epoch EMBEDDING_DIM = 300 MAX_SEQUENCE_LENGTH = config.max_slen[config.dataset_name] # embedding_matrix = np.zeros((config.vocab_size, EMBEDDING_DIM)) embedding_layer = Embedding(config.vocab_size, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH, trainable=True) sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embedded_sequences = embedding_layer(sequence_input) # add first conv filter embedded_sequences = Reshape( (MAX_SEQUENCE_LENGTH, EMBEDDING_DIM, 1))(embedded_sequences) x = Conv2D(100, (5, EMBEDDING_DIM), activation='relu')(embedded_sequences) x = MaxPooling2D((MAX_SEQUENCE_LENGTH - 5 + 1, 1))(x) # add second conv filter. y = Conv2D(100, (4, EMBEDDING_DIM), activation='relu')(embedded_sequences) y = MaxPooling2D((MAX_SEQUENCE_LENGTH - 4 + 1, 1))(y) # add third conv filter. z = Conv2D(100, (3, EMBEDDING_DIM), activation='relu')(embedded_sequences) z = MaxPooling2D((MAX_SEQUENCE_LENGTH - 3 + 1, 1))(z) # concate the conv layers alpha = concatenate([x,y,z]) # flatted the pooled features. alpha = Flatten()(alpha) # dropout alpha = Dropout(0.5)(alpha) # predictions preds = Dense(1, activation='sigmoid')(alpha) # build model model = Model(sequence_input, preds) opt = optimizers.Adam(lr=0.0001) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['acc']) self.model = model return
def TEC_basic(config, f_prev=None, m_prev=None): ''' config: parameter settings of the model f_prev: feature output from the model trained on the previous time domain m_prev: model params from the previous model ''' wt_matrix = np.load('./wt/' + config['dname'] + '.npy') # some model compile parameters # opt = keras.optimizers.SGD(.0001) # opt = keras.optimizers.RMSprop(.0001) opt = keras.optimizers.Adam(.0001) if config['pred_num'] == 3: pred_func = 'softmax' model_loss = {'pred': 'categorical_crossentropy'} else: config['pred_num'] = 1 pred_func = 'sigmoid' model_loss = {'pred': 'binary_crossentropy'} # design inputs input_doc = Input( shape=(int(config['seq_max_len']), ), dtype='int32', name='input_doc', ) input_left = Input( shape=(int(config['seq_max_len']), ), dtype='int32', name='input_left', ) input_right = Input( shape=(int(config['seq_max_len']), ), dtype='int32', name='input_right', ) # define inputs inputs = [input_doc, input_left, input_right] if f_prev: input_prev = Input( shape=(2 * int(config['rnn_size']), ), # output the same shape dtype='int32', name='input_prev') # build embedding embed = Embedding( wt_matrix.shape[0], wt_matrix.shape[1], weights=[wt_matrix], input_length=int(config['seq_max_len']), trainable=False, # according to author open source codes name='embed') embed_doc = embed(input_doc) embed_left = embed(input_left) embed_right = embed(input_right) # left and right are the contexts, connect with LSTM, reverse the right left_lstm = LSTM(wt_matrix.shape[1], name='left_lstm')(embed_left) left_lstm = RepeatVector(int(config['seq_max_len']))(left_lstm) right_lstm = LSTM(wt_matrix.shape[1], go_backwards=True, name='right_lstm')(embed_right) right_lstm = RepeatVector(int(config['seq_max_len']))(right_lstm) # concatenated concat = keras.layers.concatenate([left_lstm, embed_doc, right_lstm], axis=-1) # convolution conv = Conv1D(300, 3, strides=1, padding='valid', activation='relu', use_bias=False, name='conv')(concat) pool = MaxPooling1D(name='pool', strides=None, padding='valid')(conv) flatten = Flatten(name='flatten')(pool) # add f_prev if it is not None if f_prev: concat_f = keras.layers.concatenate([input_prev, flatten], axis=-1) # a dense layer with dropout concat_f = Dense(2 * int(config['rnn_size']), activation='relu')(concat_f) concat_f = Dropout(0.5)(concat_f) # prediction pred = Dense(config['pred_num'], activation=pred_func, name='pred')(concat_f) # define inputs inputs.append(input_prev) else: # add a dropout f_dp = Dropout(0.5)(flatten) # prediction pred = Dense(config['pred_num'], activation=pred_func, name='pred')(f_dp) #'linear' # compile model my_model = Model(inputs=inputs, outputs=pred) my_model.compile(loss=model_loss, optimizer=opt, metrics=['accuracy']) print(my_model.summary()) return my_model
version = keras.__version__ major_version = int(version[0]) n_in = 4 n_out = 6 output_dim = 5 input_length = 10 mb = 42 kernel = 3 embedding_dim = 50 max_words = 200 input_length = 10 model = Sequential() model.add(Embedding(max_words, embedding_dim, input_length=input_length)) model.add(Convolution1D(128, kernel_size=3, activation='relu')) # 10 - 3 + 1 = 8 model.add(Convolution1D(64, kernel_size=3, activation='relu')) # 10 - 3 + 1 = 6 model.add(Convolution1D(32, kernel_size=3, activation='relu')) # 10 - 3 + 1 = 4 model.add(Flatten()) # 128 = 32 * 4 model.add(Dropout(0.2)) model.add(Dense(128, activation='sigmoid')) # W = 128 x 128 model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) model.summary() model.compile(loss='mse', optimizer='adam')
# # Since we're training a language model, there should also be: # * An embedding layer that converts character id x_t to a vector. # * An output layer that predicts probabilities of next phoneme # In[10]: import keras from keras.layers import concatenate,Dense,Embedding rnn_num_units = 64 embedding_size = 16 #Let's create layers for our recurrent network #Note: we create layers but we don't "apply" them yet embed_x = Embedding(n_tokens,embedding_size) # an embedding layer that converts character ids into embeddings #a dense layer that maps input and previous state to new hidden state, [x_t,h_t]->h_t+1 get_h_next = Dense(rnn_num_units, activation = 'tanh')###YOUR CODE HERE #a dense layer that maps current hidden state to probabilities of characters [h_t+1]->P(x_t+1|h_t+1) get_probas = Dense(n_tokens, activation = 'softmax')###YOUR CODE HERE #Note: please either set the correct activation to Dense or write it manually in rnn_one_step # In[11]: def rnn_one_step(x_t, h_t): """
def compileModel(classes, embedding_matrix, EMBEDDING_DIM=200, chunk_size=1000, CONVOLUTION_FEATURE=256, BORDER_MODE='valid', LSTM_FEATURE=256, DENSE_FEATURE=256, DROP_OUT=0.5, LEARNING_RATE=0.01, MOMENTUM=0.9): global sgd ngram_filters = [3, 4] # Define ngrams list, 3-gram, 4-gram, 5-gram convs = [] graph_in = Input(shape=(chunk_size, EMBEDDING_DIM)) for n_gram in ngram_filters: conv = Convolution1D( # Layer X, Features: 256, Kernel Size: ngram nb_filter= CONVOLUTION_FEATURE, # Number of kernels or number of filters to generate filter_length=n_gram, # Size of kernels, ngram activation='relu')(graph_in) # Activation function to use pool = MaxPooling1D( # Layer X a, Max Pooling: 3 pool_length=3)(conv) # Size of kernels lstm = LSTM( # Layer X b, Output Size: 256 output_dim=LSTM_FEATURE)(pool) # Features: 256 convs.append(lstm) model = Sequential() model.add( Embedding( # Layer 0, Start input_dim=nb_words + 1, # Size to dictionary, has to be input + 1 output_dim=EMBEDDING_DIM, # Dimensions to generate weights=[embedding_matrix], # Initialize word weights input_length= chunk_size, # Define length to input sequences in the first layer trainable=False)) # Disable weight changes during training model.add(Dropout(0.25)) # Dropout 25% out = Merge(mode='concat')( convs) # Layer 1, Output Size: Concatted ngrams feature maps graph = Model(input=graph_in, output=out) # Concat the ngram convolutions model.add(graph) # Concat the ngram convolutions model.add(Dropout(DROP_OUT)) # Dropout 50% model.add( Dense( # Layer 3, Output Size: 256 output_dim=DENSE_FEATURE, # Output dimension activation='relu')) # Activation function to use model.add( Dense( # Layer 4, Output Size: Size Unique Labels, Final output_dim=classes, # Output dimension activation='softmax')) # Activation function to use sgd = SGD(lr=LEARNING_RATE, momentum=MOMENTUM, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) print("Done compiling.") return model
input_context = Input(shape=(maxlen_input, ), dtype='int32', name='the context text') input_answer = Input(shape=(maxlen_input, ), dtype='int32', name='the answer text up to the current token') LSTM_encoder = LSTM(sentence_embedding_size, init='lecun_uniform', name='Encode context') LSTM_decoder = LSTM(sentence_embedding_size, init='lecun_uniform', name='Encode answer up to the current token') Shared_Embedding = Embedding(output_dim=word_embedding_size, input_dim=dictionary_size, input_length=maxlen_input, name='Shared') word_embedding_context = Shared_Embedding(input_context) context_embedding = LSTM_encoder(word_embedding_context) word_embedding_answer = Shared_Embedding(input_answer) answer_embedding = LSTM_decoder(word_embedding_answer) merge_layer = merge( [context_embedding, answer_embedding], mode='concat', concat_axis=1, name= 'concatenate the embeddings of the context and the answer up to current token' ) out = Dense(dictionary_size / 2, activation="relu",
image = preprocess(image) # fea_vec = model_inception_notop.predict(image) # fea_vec = np.reshape(fea_vec, fea_vec.shape[1]) return image # Define image caption model # inputs1 = Input(shape=(2048,)) inputs1 = model_inception_complete.input # fe1 = Dropout(0.5)(inputs1) fe1 = Dropout(0.5)(model_inception_complete.layers[-2].output) fe2 = Dense(256, activation='relu')(fe1) inputs2 = Input(shape=(max_length,)) se1 = Embedding(vocab_size, embedding_dim, mask_zero=True)(inputs2) se2 = Dropout(0.5)(se1) se3 = LSTM(256)(se2) decoder1 = add([fe2, se3]) decoder2 = Dense(256, activation='relu')(decoder1) outputs = Dense(vocab_size, activation='softmax')(decoder2) model = Model(inputs=[inputs1, inputs2], outputs=outputs) model.load_weights(model_weights, by_name=True) def greedySearch(photo): in_text = 'startseq' for i in range(max_length): sequence = [wordtoix[w] for w in in_text.split() if w in wordtoix]
indices = np.arange(data.shape[0]) np.random.shuffle(indices) data = data[indices] labels = labels[indices] nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0]) x_train = data[:-nb_validation_samples] y_train = labels[:-nb_validation_samples] x_val = data[-nb_validation_samples:] y_val = labels[-nb_validation_samples:] from keras.layers import Embedding embedding_layer = Embedding(NUM_WORDS, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) from keras.layers import Dense, Input, GlobalMaxPooling1D, GlobalAveragePooling1D from keras.layers import Conv1D, MaxPooling1D, Embedding from keras.models import Model sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences = embedding_layer(sequence_input) x = Conv1D(128, 5, activation='relu')(embedded_sequences) x = MaxPooling1D(5)(x) x = Conv1D(128, 5, activation='relu')(x) x = MaxPooling1D(5)(x) x = Conv1D(128, 5, activation='relu')(x) x = GlobalMaxPooling1D()(x)
import numpy as np # nums = np.arange(1, 101) # n_samples = 1000 # samples = np.array([np.random.randint(0, n_items, adj_size) for i in range(n_samples)]) # labels = np.array([(np.argsort(line) == 4).astype('int') for line in samples]) n_items = 50 adj_size = 10 epoches = 100 nn = np.arange(50) samples = np.array([nn[i:i + 10] for i in range(len(nn) - 10)]) samples = np.tile(samples, epoches).reshape(-1, 10) np.random.shuffle(samples) labels = np.array([(np.argsort(line) == 4).astype('int') for line in samples]) Y = np.array([line[np.argsort(line) == 4] for line in samples]) model = Sequential() model.add(Embedding(input_dim=n_items, output_dim=8, input_length=adj_size)) # print(samples[0]) model.add(GlobalAvgPool1D()) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(samples, labels, epochs=1000, batch_size=50, validation_split=0.3) # cc=model.predict(np.array([1,2,3,4,5,6,7,8,9,10]).reshape(-1,10)) # print(cc)
def build_model(self, config, weights): bgrnn_model = Sequential() bgrnn_model.add( Embedding( config['max_features'], config['embedding_dims'], input_length=config['input_length'], weights=[weights['Wemb']] if 'Wemb' in weights else None)) bgrnn_model.add( Bidirectional( GRU(config['rnn_output_dims'], dropout_W=config['dropout_W'], dropout_U=config['dropout_U']))) blstm_model = Sequential() blstm_model.add( Embedding( config['max_features'], config['embedding_dims'], input_length=config['input_length'], weights=[weights['Wemb']] if 'Wemb' in weights else None)) blstm_model.add( Bidirectional( LSTM(config['rnn_output_dims'], dropout_W=config['dropout_W'], dropout_U=config['dropout_U']))) cnn_model = Sequential() cnn_model.add( Embedding( config['max_features'], config['embedding_dims'], input_length=config['input_length'], weights=[weights['Wemb']] if 'Wemb' in weights else None)) #dropout = 0.2)) # cnn_model.add(ZeroPadding1D(int(config['filter_length_1'] / 2))) cnn_model.add( Convolution1D(nb_filter=config['nb_filter_1'], filter_length=config['filter_length_1'], border_mode='valid', activation='relu', subsample_length=1)) cnn_model.add(GlobalMaxPooling1D()) # cnn_model.add(Dense(config['hidden_dims'])) # cnn_model.add(Activation('sigmoid')) # merged model merged_model = Sequential() merged_model.add( Merge([bgrnn_model, blstm_model, cnn_model], mode='concat', concat_axis=1)) merged_model.add(Dropout(self.config['dropout'])) if config['nb_classes'] > 2: merged_model.add( Dense(config['nb_classes'], activation='softmax', name="dense_e")) loss_type = 'categorical_crossentropy' else: merged_model.add(Dense(1, activation='sigmoid', name="dense_d")) loss_type = 'binary_crossentropy' merged_model.compile(loss=loss_type, optimizer=self.get_optimizer(config['optimizer']), metrics=['accuracy']) return merged_model
def create_model(X_vocab_len, X_max_len, n_phonetic_features, n1, n2, n3, n4, n5, n6, HIDDEN_DIM, LAYER_NUM): def smart_merge(vectors, **kwargs): return vectors[0] if len(vectors) == 1 else merge(vectors, **kwargs) current_word = Input(shape=(X_max_len, ), dtype='float32', name='input1') # for encoder (shared) decoder_input = Input(shape=(X_max_len, ), dtype='float32', name='input3') # for decoder -- attention right_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input4') right_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input5') right_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input6') right_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input7') left_word1 = Input(shape=(X_max_len, ), dtype='float32', name='input8') left_word2 = Input(shape=(X_max_len, ), dtype='float32', name='input9') left_word3 = Input(shape=(X_max_len, ), dtype='float32', name='input10') left_word4 = Input(shape=(X_max_len, ), dtype='float32', name='input11') phonetic_input = Input(shape=(n_phonetic_features, ), dtype='float32', name='input12') emb_layer1 = Embedding(X_vocab_len, EMBEDDING_DIM, input_length=X_max_len, mask_zero=False, name='Embedding') list_of_inputs = [ current_word, right_word1, right_word2, right_word3, right_word4, left_word1, left_word2, left_word3, left_word4 ] current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \ left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4 = [emb_layer1(i) for i in list_of_inputs] print("Type:: ", type(current_word_embedding)) list_of_embeddings1 = [current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \ left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4] list_of_embeddings = [ Dropout(0.50, name='drop1_' + str(j))(i) for i, j in zip(list_of_embeddings1, range(len(list_of_embeddings1))) ] list_of_embeddings = [ GaussianNoise(0.05, name='noise1_' + str(j))(i) for i, j in zip(list_of_embeddings, range(len(list_of_embeddings))) ] conv4_curr, conv4_right1, conv4_right2, conv4_right3, conv4_right4, conv4_left1, conv4_left2, conv4_left3, conv4_left4 =\ [Conv1D(filters=no_filters, kernel_size=4, padding='valid',activation='relu', strides=1, name='conv4_'+str(j))(i) for i,j in zip(list_of_embeddings, range(len(list_of_embeddings)))] conv4s = [ conv4_curr, conv4_right1, conv4_right2, conv4_right3, conv4_right4, conv4_left1, conv4_left2, conv4_left3, conv4_left4 ] maxPool4 = [ MaxPooling1D(name='max4_' + str(j))(i) for i, j in zip(conv4s, range(len(conv4s))) ] avgPool4 = [ AveragePooling1D(name='avg4_' + str(j))(i) for i, j in zip(conv4s, range(len(conv4s))) ] pool4_curr, pool4_right1, pool4_right2, pool4_right3, pool4_right4, pool4_left1, pool4_left2, pool4_left3, pool4_left4 = \ [merge([i,j], name='merge_conv4_'+str(k)) for i,j,k in zip(maxPool4, avgPool4, range(len(maxPool4)))] conv5_curr, conv5_right1, conv5_right2, conv5_right3, conv5_right4, conv5_left1, conv5_left2, conv5_left3, conv5_left4 = \ [Conv1D(filters=no_filters, kernel_size=5, padding='valid', activation='relu', strides=1, name='conv5_'+str(j))(i) for i,j in zip(list_of_embeddings, range(len(list_of_embeddings)))] conv5s = [ conv5_curr, conv5_right1, conv5_right2, conv5_right3, conv5_right4, conv5_left1, conv5_left2, conv5_left3, conv5_left4 ] maxPool5 = [ MaxPooling1D(name='max5_' + str(j))(i) for i, j in zip(conv5s, range(len(conv5s))) ] avgPool5 = [ AveragePooling1D(name='avg5_' + str(j))(i) for i, j in zip(conv5s, range(len(conv5s))) ] pool5_curr, pool5_right1, pool5_right2, pool5_right3, pool5_right4, pool5_left1, pool5_left2, pool5_left3, pool5_left4 = \ [merge([i,j], name='merge_conv5_'+str(k)) for i,j,k in zip(maxPool5, avgPool5, range(len(maxPool5)))] maxPools = [pool4_curr, pool4_right1, pool4_right2, pool4_right3, pool4_right4, \ pool4_left1, pool4_left2, pool4_left3, pool4_left4, \ pool5_curr, pool5_right1, pool5_right2, pool5_right3, pool5_right4, \ pool5_left1, pool5_left2, pool5_left3, pool5_left4] concat = merge(maxPools, mode='concat', name='main_merge') x = Dropout(0.15, name='drop_single1')(concat) x = Bidirectional(RNN(rnn_output_size), name='bidirec1')(x) total_features = [x, phonetic_input] concat2 = merge(total_features, mode='concat', name='phonetic_merging') x = Dense(HIDDEN_DIM, activation='relu', kernel_initializer='he_normal', kernel_constraint=maxnorm(3), bias_constraint=maxnorm(3), name='dense1')(concat2) x = Dropout(0.15, name='drop_single2')(x) x = Dense(HIDDEN_DIM, kernel_initializer='he_normal', activation='tanh', kernel_constraint=maxnorm(3), bias_constraint=maxnorm(3), name='dense2')(x) x = Dropout(0.15, name='drop_single3')(x) out1 = Dense(n1, kernel_initializer='he_normal', activation='softmax', name='output1')(x) out2 = Dense(n2, kernel_initializer='he_normal', activation='softmax', name='output2')(x) out3 = Dense(n3, kernel_initializer='he_normal', activation='softmax', name='output3')(x) out4 = Dense(n4, kernel_initializer='he_normal', activation='softmax', name='output4')(x) out5 = Dense(n5, kernel_initializer='he_normal', activation='softmax', name='output5')(x) out6 = Dense(n6, kernel_initializer='he_normal', activation='softmax', name='output6')(x) # Luong et al. 2015 attention model emb_layer = Embedding(X_vocab_len, EMBEDDING_DIM, input_length=X_max_len, mask_zero=True, name='Embedding_for_seq2seq') current_word_embedding, right_word_embedding1, right_word_embedding2,right_word_embedding3, right_word_embedding4, \ left_word_embedding1, left_word_embedding2, left_word_embedding3, left_word_embedding4 = [emb_layer(i) for i in list_of_inputs] # current_word_embedding = smart_merge([ current_word_embedding, right_word_embedding1, left_word_embedding1]) encoder, state = GRU(rnn_output_size, return_sequences=True, unroll=True, return_state=True, name='encoder')(current_word_embedding) encoder_last = encoder[:, -1, :] decoder = emb_layer(decoder_input) decoder = GRU(rnn_output_size, return_sequences=True, unroll=True, name='decoder')(decoder, initial_state=[encoder_last]) attention = dot([decoder, encoder], axes=[2, 2], name='dot') attention = Activation('softmax', name='attention')(attention) context = dot([attention, encoder], axes=[2, 1], name='dot2') decoder_combined_context = concatenate([context, decoder], name='concatenate') outputs = TimeDistributed(Dense(64, activation='tanh'), name='td1')(decoder_combined_context) outputs = TimeDistributed(Dense(X_vocab_len, activation='softmax'), name='td2')(outputs) all_inputs = [current_word, decoder_input, right_word1, right_word2, right_word3, right_word4, left_word1, left_word2, left_word3,\ left_word4, phonetic_input] all_outputs = [outputs, out1, out2, out3, out4, out5, out6] model = Model(input=all_inputs, output=all_outputs) opt = Adam() return model
print('X_test shape:', X_test.shape) Y_train = np_utils.to_categorical(Y_train) Y_test = np_utils.to_categorical(Y_test) print('Y_train shape:', Y_train.shape) print('Y_test shape:', Y_test.shape) #sys.exit(1) print('Build model...') model = Sequential() # we start off with an efficient embedding layer which maps # our vocab indices into embedding_dims dimensions model.add( Embedding(len(train_chars), embedding_dims, input_length=maxlen, dropout=0.2)) # we add a Convolution1D, which will learn nb_filter # word group filters of size filter_length: model.add( Convolution1D(nb_filter=nb_filter, filter_length=filter_length, activation='relu', subsample_length=1)) # we use max pooling: model.add(MaxPooling1D(pool_length=3)) # We flatten the output of the conv layer, # so that we can add a vanilla dense layer: model.add(Flatten())
def main(argv): print '*' * 20 print 'Loading dataset...' sys.stdout.flush() #dataset of activities DATASET = DATASET_CSV df_dataset = pd.read_csv(DATASET, parse_dates=[[0, 1]], header=None, index_col=0, sep=' ') df_dataset.columns = ['sensor', 'action', 'event', 'activity'] df_dataset.index.names = ["timestamp"] # we only need the actions without the period to calculate the onehot vector for y, because we are only predicting the actions unique_actions = json.load(open(UNIQUE_ACTIONS, 'r')) total_actions = len(unique_actions) print '*' * 20 print 'Preparing dataset...' sys.stdout.flush() # Prepare sequences using action indices # Each action will be an index which will point to an action vector # in the weights matrix of the Embedding layer of the network input X_actions, X_times, y, tokenizer = prepare_x_y(df_dataset, unique_actions) # Create the embedding matrix for the embedding layer initialization embedding_matrix = create_embedding_matrix(tokenizer) #divide the examples in training and validation total_examples = len(X_actions) test_per = 0.2 limit = int(test_per * total_examples) X_actions_train = X_actions[limit:] X_times_train = X_times[limit:] X_actions_test = X_actions[:limit] X_times_test = X_times[:limit] y_train = y[limit:] y_test = y[:limit] print 'Different actions:', total_actions print 'Total examples:', total_examples print 'Train examples:', len(X_actions_train), len(y_train) print 'Test examples:', len(X_actions_test), len(y_test) sys.stdout.flush() X_actions_train = np.array(X_actions_train) X_times_train = np.array(X_times_train) y_train = np.array(y_train) X_actions_test = np.array(X_actions_test) X_times_test = np.array(X_times_test) y_test = np.array(y_test) print 'Shape (X,y):' print X_actions_train.shape print X_times_train.shape print y_train.shape print '*' * 20 print 'Building model...' sys.stdout.flush() # Actions embeddings branch input_actions = Input(shape=(INPUT_ACTIONS, ), dtype='int32', name='input_actions') embedding_actions = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], input_length=INPUT_ACTIONS, trainable=True, name='embedding_actions')(input_actions) # Actions times branch input_time = Input(shape=(INPUT_ACTIONS, 2), dtype='float32', name='input_time') #reshape_1 = Reshape((INPUT_ACTIONS, 2))(input_time) #merge embeddings (5 x 50) and times (5 x 1), to have 5 x 51 concat = merge([embedding_actions, input_time], mode='concat', concat_axis=-1) # Everything continues in a single branch lstm_1 = LSTM(512, return_sequences=False, input_shape=(INPUT_ACTIONS, ACTION_EMBEDDING_LENGTH + 2), name='lstm_1')(concat) dense_1 = Dense(1024, activation='relu', name='dense_1')(lstm_1) drop_1 = Dropout(0.8, name='drop_1')(dense_1) dense_2 = Dense(1024, activation='relu', name='dense_2')(drop_1) drop_2 = Dropout(0.8, name='drop_2')(dense_2) output_actions = Dense(total_actions, activation='softmax', name='main_output')(drop_2) model = Model(input=[input_actions, input_time], output=[output_actions]) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'mse', 'mae']) print(model.summary()) sys.stdout.flush() print '*' * 20 print 'Training model...' sys.stdout.flush() BATCH_SIZE = 128 checkpoint = ModelCheckpoint(BEST_MODEL, monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto') history = model.fit([X_actions_train, X_times_train], y_train, batch_size=BATCH_SIZE, nb_epoch=1000, validation_data=([X_actions_test, X_times_test], y_test), shuffle=False, callbacks=[checkpoint]) print '*' * 20 print 'Plotting history...' sys.stdout.flush() plot_training_info(['accuracy', 'loss'], True, history.history) print '*' * 20 print 'Evaluating best model...' sys.stdout.flush() model = load_model(BEST_MODEL) metrics = model.evaluate([X_actions_test, X_times_test], y_test, batch_size=BATCH_SIZE) print metrics predictions = model.predict([X_actions_test, X_times_test], BATCH_SIZE) correct = [0] * 5 prediction_range = 5 for i, prediction in enumerate(predictions): correct_answer = y_test[i].tolist().index(1) best_n = np.sort(prediction)[::-1][:prediction_range] for j in range(prediction_range): if prediction.tolist().index(best_n[j]) == correct_answer: for k in range(j, prediction_range): correct[k] += 1 accuracies = [] for i in range(prediction_range): print '%s prediction accuracy: %s' % (i + 1, (correct[i] * 1.0) / len(y_test)) accuracies.append((correct[i] * 1.0) / len(y_test)) print accuracies print '************ FIN ************\n' * 3
#Testing the model tt = Tokenizer() tt.fit_on_texts(input_test) tvocab_size = len(tt.word_index) + 1 # integer encode the documents tencoded_docs = tt.texts_to_sequences(input_test) #print(encoded_docs) # pad documents to a max length of 4 words tpadded_docs = pad_sequences(tencoded_docs, maxlen=max_length, padding='post') #print(padded_docs) # define model model = Sequential() e = Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=max_length, trainable=False) model.add(e) model.add(GRU(gru_output_size, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)) model.add(Flatten()) model.add(Dense(nclass, activation='softmax')) # compile the model model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) # summarize the model print(model.summary()) # fit the model model.fit(padded_docs,y_train, epochs=1, verbose=0, validation_data=(vpadded_docs, y_valid)) # evaluate the model loss, accuracy = model.evaluate(tpadded_docs, y_test, verbose=0)
print(token.word_index) # {'너무': 1, '참': 2, '재밌어요': 3, '최고에요': 4, '잘': 5, '만든': 6, '영화에요': 7, '추천하고': 8, '싶은': 9, '영화': 10, '입니다': 11, # '한번': 12, '더': 13, '보고': 14, '싶네요': 15, '글쎄요': 16, '별로에요': 17, # '생각보다': 18, '지루해요': 19, '연기가': 20, '어색해요': 21, '재미없어요': 22, '재미없다': 23, '재밌네요': 24} # 자주나온 단어는 인덱스를 앞으로 준다. x = token.texts_to_sequences(docs) print(x) #[[1, 3], [4], [2, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16], [17], [18, 19], [20, 21], [22], [1, 23], [2, 24]] pad_x = pad_sequences(x, padding='pre', value=0) print(pad_x) word_size = len(token.word_index) + 1 print(word_size) model = Sequential() model.add(Embedding(25, 10, input_length=4)) model.add(Conv1D(10, 2)) model.add(Conv1D(10, 2)) model.add(MaxPool1D()) # model.add(Embedding(word_size,10,input_length=4)) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) model.summary() model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) model.fit(pad_x, labels, epochs=30) acc = model.evaluate(pad_x, labels)[1] print(acc)
X_train = sequence.pad_sequences(X1, maxlen=maxlen) X_test = sequence.pad_sequences(T1, maxlen=maxlen) y_train = np.array(trainlabel) y_test = np.array(testlabel) hidden_dims = 128 nb_filter = 128 filter_length = 2 embedding_vecor_length = 128 pool_length = 2 lstm_output_size = 70 model = Sequential() model.add(Embedding(max_features, embedding_vecor_length, input_length=maxlen)) model.add( Convolution1D(nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1)) model.add(MaxPooling1D(pool_length=pool_length)) model.add(LSTM(lstm_output_size)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.load_weights("logs/cnnlstm/checkpoint-00.hdf5") y_pred = model.predict_classes(X_test) accuracy = accuracy_score(y_test, y_pred)
def cnnlstm_fit(): start_time = time.time() global X_val, X_train, X_test, y_train, y_val, y_test print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) y_train = np.array(y_train) model = Sequential() model.add( Embedding(num_words, EMBEDDING_DIM, embeddings_initializer=Constant(embedding_matrix), input_length=MAX_SEQUENCE_LENGTH, trainable=False)) # model.add(Embedding(num_words, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH)) # sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') # embedded_sequences = embedding_layer(sequence_input) model.add(Conv1D(128, 5, activation='relu')) #(embedded_sequences) model.add(MaxPooling1D(pool_size=4)) model.add(Bidirectional(LSTM(64))) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) # try using different optimizers and different optimizer configs model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) print('Train...') history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=[X_val, y_val]) plot_epoch_loss(history) # plot loss curve prob_val = model.predict( X_val, verbose=0) # make prob predictions on val data prob_val = [i for i in prob_val] # get prob relevance tr_val = [float(i) for i in y_val] # true label for val data as float fpr, tpr, thresholds = roc_curve(tr_val, prob_val) optco = thresholds[np.argmax(tpr > 0.95)] # optimal prob cutoff pred_val = [ 1. if i > optco else 0. for i in prob_val ] # pred is 1 if prob>optimal cutoff as determined from val data pf1 = metrics.f1_score(tr_val, pred_val) # predicted f1 ppr = metrics.precision_score(tr_val, pred_val) # predicted precision prec = metrics.recall_score(tr_val, pred_val) # predicted recall proc = metrics.roc_auc_score( tr_val, prob_val) # predicted roc auc measured on val data precision, recall, pr_thresholds = precision_recall_curve( tr_val, prob_val) # pr curve #p_prre_auc= metrics.auc(recall, precision,reorder=True) # pr auc p_prre_auc = metrics.average_precision_score(tr_val, prob_val) # pr auc prob_test = model.predict( X_test, verbose=0) # make prob predictions on unclassified (test) data prob_test = [i for i in prob_test] # get prob relevance pred_test = [ 1. if i > optco else 0. for i in prob_test ] # pred is 1 if prob on the test data > optimal cutoff as determined from val data tr_test = [float(i) for i in y_test] # true label for test data as float af1 = metrics.f1_score(tr_test, pred_test) #actual f1 apr = metrics.precision_score(tr_test, pred_test) #actual precision arec = metrics.recall_score(tr_test, pred_test) #actual recall aroc = metrics.roc_auc_score(tr_test, prob_test) #actual roc auc precision, recall, a_thresholds = precision_recall_curve( tr_test, prob_test) #a_prre_auc= metrics.auc(recall,precision,reorder=True) a_prre_auc = metrics.average_precision_score(tr_test, prob_test) # pr auc ndata = X_train.shape[0] t = (time.time() - start_time) / 3600. # time taken in seconds r = [ ndata, t, pf1, af1, ppr, apr, prec, arec, proc, aroc, p_prre_auc, a_prre_auc ] # list of results for output print("Time to run CNN_LSTM classification model = --- %s hours ---" % ((time.time() - start_time) / 3600.)) print(r) return (r)
def make_predictions(X, Y, val_X, val_Y, test_X, test_Y, s, test_ids): cl_w = compute_class_weight('balanced', np.unique(Y), Y) earlystop = EarlyStopping(monitor='val_loss', min_delta=0.01, patience=patience, \ verbose=1, mode='auto') print('Build model CNN model') in_txt = Input(name='in_norm', batch_shape=tuple([None, maxlen]), dtype='int32') # init with pre-trained embeddings emb_char = Embedding(len(word2index), embedding_dims, embeddings_initializer=Constant(embedding_matrix), trainable=True, input_length=maxlen, name='emb_char') emb_seq = emb_char(in_txt) z = Dropout(dropout_prob[0])(emb_seq) # convolutional block conv_blocks = [] for sz in filter_sizes: conv = Convolution1D(filters=num_filters, kernel_size=sz, padding="valid", activation="relu", strides=1, kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializer_func)(z) conv = MaxPooling1D(pool_size=2)(conv) conv = Flatten()(conv) conv_blocks.append(conv) z = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0] z = Dropout(dropout_prob[1])(z) z = Dense(hidden_dims, activation="relu", kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializer_func)(z) out_soft = Dense(1, activation='sigmoid', name='out_soft', kernel_initializer=initializer_func, kernel_regularizer=regularizers.l2(0.01))(z) model = Model(inputs=in_txt, outputs=out_soft) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X, Y, batch_size=batch_size, epochs=epochs, validation_data=(val_X, val_Y), class_weight={ 0: cl_w[0], 1: cl_w[1] }, callbacks=[earlystop], verbose=0) y_hat = model.predict(val_X, batch_size=batch_size) y_hat = y_hat.flatten() res_list = {} thresholds = np.arange(0, 1, 0.1) f1_prod_list = [] for p in thresholds: y_pred = [] for y in y_hat: if y >= p: y_pred.append(1) else: y_pred.append(0) y_pred = np.array(y_pred) from sklearn.metrics import precision_recall_fscore_support precision, recall, f1, _ = precision_recall_fscore_support( val_Y, y_pred, average=None) f1_prod_list.append(np.prod(f1)) res_list[p] = y_pred f1_prod_list = np.array(f1_prod_list) max_f1 = np.argmax(f1_prod_list) print('Positive class probability threshold %.4f' % thresholds[max_f1]) p = thresholds[max_f1] y_hat = model.predict(test_X, batch_size=batch_size) y_hat = y_hat.flatten() y_pred = [] for y in y_hat: if y >= p: y_pred.append(1) else: y_pred.append(0) y_pred = np.array(y_pred) test_y_hat = y_pred prec, recall, fm, support = precision_recall_fscore_support( test_Y, test_y_hat) print('F-measure') print(fm) print('Precision') print(prec) print('Recall') print(recall) print('Stat') print(support) accuracy_score = sklearn.metrics.accuracy_score(test_Y, test_y_hat) print('accuracy_score: {0}'.format(accuracy_score)) roc_auc_score = sklearn.metrics.roc_auc_score(test_Y, test_y_hat) print('roc_auc_score: {0}'.format(roc_auc_score)) false_positive_rate, true_positive_rate, thresholds_roc = sklearn.metrics.roc_curve( test_Y, test_y_hat) false_pos = [] false_neg = [] false_pos_probs = [] false_neg_probs = [] pos_class_probs = [] pos_class_probs_low = [] pos_class_probs_high = [] y_hat_proba = y_hat pos_class_probs = np.array(y_hat_proba) threshold = thresholds[max_f1] for proba in pos_class_probs: if proba >= threshold: pos_class_probs_high.append(proba) else: pos_class_probs_low.append(proba) pos_class_probs_low = np.array(pos_class_probs_low) pos_class_probs_high = np.array(pos_class_probs_high) threshold_low = np.median(pos_class_probs_low) threshold_high = np.median(pos_class_probs_high) for n, proba in enumerate(pos_class_probs): if proba <= threshold_low and test_Y[n] == 1: false_neg.append(test_ids[n]) false_neg_probs.append(proba) elif proba >= threshold_high and test_Y[n] == 0: false_pos.append(test_ids[n]) false_pos_probs.append(proba) print('Positive class proba distribution') #print(pos_class_probs) pos_class_probs = np.array(pos_class_probs) print('stat') print(st.describe(pos_class_probs)) print('median') print(np.median(pos_class_probs)) print('\n') print('False negatives with p of positive class <= %.3f' % threshold_low) print('admission ids') print(false_neg) if len(false_neg_probs) > 0: print('p distribution') # print(false_neg_probs) false_neg_probs = np.array(false_neg_probs) print('stat') print(st.describe(false_neg_probs)) print('median') print(np.median(false_neg_probs)) print('\n') print('False positives with p of positive class >= %.3f' % threshold_high) print('admission ids') print(false_pos) if len(false_pos_probs) > 0: print('p distribution') # print(false_pos_probs) false_pos_probs = np.array(false_pos_probs) print('stat') print(st.describe(false_pos_probs)) print('median') print(np.median(false_pos_probs)) print('\n') return fm[1], prec[1], recall[1], roc_auc_score
vocabulary = utils.getVocabulary(trainWindows,winSize,vocabSize) trainFeatures = utils.vectorizeWindows(trainWindows,vocabulary) devFeatures = utils.vectorizeWindows(devWindows,vocabulary) testFeatures = utils.vectorizeWindows(testWindows,vocabulary) trainTargets = np.asarray(trainTargets) devTargets = np.asarray(devTargets) testTargets = np.asarray(testTargets) print "Finished processing" model = Sequential() # Number of embedding vectors = vocabSize + UNK + <s> + <e> model.add(Embedding(vocabSize + 3, VSIZE, input_length=winSize, input_dtype='int32')) model.add(Flatten()) model.add(Dense(512, activation='relu')) model.add(Dense(trainTargets.shape[1], activation='softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) history = model.fit(trainFeatures, trainTargets, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(devFeatures, devTargets)) score = model.evaluate(testFeatures, testTargets, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1])
def elsa_architecture(nb_classes, nb_tokens, maxlen, feature_output=False, embed_dropout_rate=0, final_dropout_rate=0, embed_dim=300, embed_l2=1E-6, return_attention=False, load_embedding=False, pre_embedding=None, high=False, LSTM_hidden=512, LSTM_drop=0.5): """ Returns the DeepMoji architecture uninitialized and without using the pretrained model weights. # Arguments: nb_classes: Number of classes in the dataset. nb_tokens: Number of tokens in the dataset (i.e. vocabulary size). maxlen: Maximum length of a token. feature_output: If True the model returns the penultimate feature vector rather than Softmax probabilities (defaults to False). embed_dropout_rate: Dropout rate for the embedding layer. final_dropout_rate: Dropout rate for the final Softmax layer. embed_l2: L2 regularization for the embedding layerl. high: use or not the highway network # Returns: Model with the given parameters. """ class NonMasking(Layer): def __init__(self, **kwargs): self.supports_masking = True super(NonMasking, self).__init__(**kwargs) def build(self, input_shape): input_shape = input_shape def compute_mask(self, input, input_mask=None): # do not pass the mask to the next layers return None def call(self, x, mask=None): return x def get_output_shape_for(self, input_shape): return input_shape # define embedding layer that turns word tokens into vectors # an activation function is used to bound the values of the embedding model_input = Input(shape=(maxlen,), dtype='int32') embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None if not load_embedding and pre_embedding is None: embed = Embedding(input_dim=nb_tokens, output_dim=embed_dim, mask_zero=True,input_length=maxlen,embeddings_regularizer=embed_reg, name='embedding') else: embed = Embedding(input_dim=nb_tokens, output_dim=embed_dim, mask_zero=True,input_length=maxlen, weights=[pre_embedding], embeddings_regularizer=embed_reg,trainable=True, name='embedding') if high: x = NonMasking()(embed(model_input)) else: x = embed(model_input) x = Activation('tanh')(x) # entire embedding channels are dropped out instead of the # normal Keras embedding dropout, which drops all channels for entire words # many of the datasets contain so few words that losing one or more words can alter the emotions completely if embed_dropout_rate != 0: embed_drop = SpatialDropout1D(embed_dropout_rate, name='embed_drop') x = embed_drop(x) # skip-connection from embedding to output eases gradient-flow and allows access to lower-level features # ordering of the way the merge is done is important for consistency with the pretrained model lstm_0_output = Bidirectional(LSTM(LSTM_hidden, return_sequences=True, dropout=LSTM_drop), name="bi_lstm_0" )(x) lstm_1_output = Bidirectional(LSTM(LSTM_hidden, return_sequences=True, dropout=LSTM_drop), name="bi_lstm_1" )(lstm_0_output) x = concatenate([lstm_1_output, lstm_0_output, x]) if high: x = TimeDistributed(Highway(activation='tanh', name="high"))(x) # if return_attention is True in AttentionWeightedAverage, an additional tensor # representing the weight at each timestep is returned weights = None x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x) #x = MaskAverage(name='attlayer', return_attention=return_attention)(x) if return_attention: x, weights = x if not feature_output: # output class probabilities if final_dropout_rate != 0: x = Dropout(final_dropout_rate)(x) if nb_classes > 2: outputs = [Dense(nb_classes, activation='softmax', name='softmax')(x)] else: outputs = [Dense(1, activation='sigmoid', name='softmax')(x)] else: # output penultimate feature vector outputs = [x] if return_attention: # add the attention weights to the outputs if required outputs.append(weights) return Model(inputs=[model_input], outputs=outputs)
def build(self): """Construct lstm cvae model""" # Load embedding in Embedding layer embedding_matrix = self.load_embedding() embedding_layer = Embedding( self.num_words + 1, self.config.embedding_dim, weights=[embedding_matrix], input_length=self.config.max_sequence_length, trainable=False) # Q(z|X,y) -- encoder # embedded sequence input sequence_inputs = Input(batch_shape=(self.config.batch_size, self.config.max_sequence_length), dtype='int32') embedded_inputs = embedding_layer(sequence_inputs) x = LSTM(self.config.lstm_size_encoder, return_sequences=False)(embedded_inputs) score_inputs = Input(batch_shape=(self.config.batch_size, 1)) x_joint = concatenate([x, score_inputs], axis=1) x_encoded = Dense(self.config.intermediate_size, activation='tanh')(x_joint) z_mean = Dense(self.config.latent_size)(x_encoded) z_log_sigma = Dense(self.config.latent_size)(x_encoded) # Sample z ~ Q(z|X,y) def sampling(args): z_mean, z_log_sigma = args epsilon = K.random_normal(shape=(self.config.batch_size, self.config.latent_size), mean=0., stddev=1.) return z_mean + K.exp(z_log_sigma / 2.) * epsilon z = Lambda(sampling)([z_mean, z_log_sigma]) z_cond = concatenate([z, score_inputs], axis=1) # P(X|z,y) -- decoder z_repeated = RepeatVector(self.config.max_sequence_length)(z_cond) decoder_h = LSTM(self.config.lstm_size_decoder, return_sequences=True) decoder_out = Dense(self.num_words + 1) h_decoded = decoder_h(z_repeated) x_decoded = decoder_out(h_decoded) # Construct three models # vae vae = Model([sequence_inputs, score_inputs], x_decoded) # encoder encoder = Model([sequence_inputs, score_inputs], z_mean) # generator generator_z_inputs = Input(batch_shape=(self.config.batch_size, self.config.latent_size)) generator_z_cond = concatenate([generator_z_inputs, score_inputs], axis=1) generator_z_repeated = RepeatVector( self.config.max_sequence_length)(generator_z_cond) generator_h_decoded = decoder_h(generator_z_repeated) generator_x_decoded = decoder_out(generator_h_decoded) generator = Model([generator_z_inputs, score_inputs], generator_x_decoded) kl_weight = self.config.kl_weight def recon_loss(y_true, y_pred): """E[log P(X|z,y)]""" recon = K.mean(K.sparse_categorical_crossentropy(output=y_pred, target=y_true, from_logits=True), axis=1) return recon def kl_loss(y_true, y_pred): """D_KL(Q(z|X,y) || P(z|X)); calculate in closed form as both dist. are Gaussian""" kl = 0.5 * K.mean( K.exp(z_log_sigma) + K.square(z_mean) - 1. - z_log_sigma, axis=1) kl = kl * kl_weight return kl def vae_loss(y_true, y_pred): """ Calculate loss = reconstruction loss + KL loss for each data in minibatch """ recon = recon_loss(y_true, y_pred) kl = kl_loss(y_true, y_pred) return recon + kl vae.compile(loss=vae_loss, optimizer=self.config.optimizer, metrics=[recon_loss, kl_loss]) self.vae = vae self.encoder = encoder self.generator = generator
#also speeds up batch processing, it will arrange batches where sequences are same length print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) #returns numpy array x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) y_train = np.array(y_train) #keras only accepts numpy arrays, not python lists y_test = np.array(y_test) x_train = np.array(x_train) x_test = np.array(x_test) #instantiate sequential model model = Sequential() #add layers to model (in order! because you are using a sequential model) model.add(Embedding(max_features, 128)) #embeddings are 128 dim vectors model.add(Bidirectional(LSTM(64))) #LSTM layer has 64 units model.add(Dropout(0.5)) #what proportion of inputs to set to 0 model.add( Dense(1, activation='sigmoid') ) #single sigmoidal output, predicting either 0 or 1, negative or positive sentiment #compile the model model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) #train the model print('Train...') hist = model.fit(x_train, y_train, batch_size=batch_size, epochs=1,
def train_lstm_for_visualization(): checkpoints = glob(MODEL_PATH + "*.h5") if len(checkpoints) > 0: checkpoints = natsorted(checkpoints) assert len(checkpoints) != 0, "No checkpoints for visualization found." checkpoint_file = checkpoints[-1] print("Loading [{}]".format(checkpoint_file)) model = load_model(checkpoint_file) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy", utils.f1_score]) print(model.summary()) # Load the data x_train, y_train, x_test, y_test, vocab_size, tokenizer, max_tweet_length = prepare_data( SHUFFLE) # Get the word to index and the index to word mappings word_index = tokenizer.word_index index_to_word = {index: word for word, index in word_index.items()} # Evaluate the previously trained model on test data test_loss, test_acc, test_fscore = model.evaluate(x_test, y_test, verbose=1, batch_size=256) print("Loss: %.3f\nF-score: %.3f\n" % (test_loss, test_fscore)) return model, index_to_word, x_test else: # Load the data x_train, y_train, x_test, y_test, vocab_size, tokenizer, max_tweet_length = prepare_data( SHUFFLE) # Get the word to index and the index to word mappings word_index = tokenizer.word_index index_to_word = {index: word for word, index in word_index.items()} # Build, evaluate and save the model model = Sequential() model.add( Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_tweet_length, embeddings_initializer="glorot_normal", name="embedding_layer")) model.add( LSTM(output_dim=HIDDEN_UNITS, name="recurrent_layer", activation="tanh", return_sequences=True)) model.add(Flatten()) model.add(Dense(DENSE_UNITS, activation="relu", name="dense_layer")) model.add(Dense(NO_OF_CLASSES, activation="softmax")) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=["accuracy", utils.f1_score]) model.summary() checkpoint = ModelCheckpoint(monitor="val_acc", filepath=MODEL_PATH + "model_{epoch:02d}_{val_acc:.3f}.h5", save_best_only=True, mode="max") model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(x_test, y_test), callbacks=[checkpoint]) score = model.evaluate(x_test, y_test) print("Loss: %.3f\nF-score: %.3f\n" % (score[0], score[1])) return model, index_to_word, x_test
value=hindi2index[" "]) #one hot encoding of hindi sequence y = [ to_categorical(seq, num_classes=len(hindi2index)) for seq in hindi_padded_seq ] eng_train, eng_test, y_train, y_test = train_test_split(eng_padded_seq, y, test_size=0.05) #defining architecture of network model = Sequential() model.add( Embedding(input_dim=len(eng2index), output_dim=22, input_length=max_len)) model.add(Dropout(0.20)) #model.add(Conv1D(64,4,activation="relu",padding="same")) model.add( Bidirectional( LSTM(units=128, return_sequences=True, recurrent_dropout=0.25))) model.add(TimeDistributed(Dense(len(hindi2index), activation="softmax"))) model.summary() model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) history = model.fit(eng_train, np.array(y_train), validation_data=(eng_test, np.array(y_test)), batch_size=32,
LOG_FILE = './outputs/log-model-vggcnn-1' # Read back data train_reviews = np.load(path_join(ROOT_PATH, "IMDB_train_fulltext_glove_X.npy")) train_labels = np.load(path_join(ROOT_PATH, "IMDB_train_fulltext_glove_y.npy")) test_reviews = np.load(path_join(ROOT_PATH, "IMDB_test_fulltext_glove_X.npy")) test_labels = np.load(path_join(ROOT_PATH, "IMDB_test_fulltext_glove_y.npy")) WV_FILE_GLOBAL = path_join(ROOT_PATH, './embeddings/wv/glove.42B.300d.120000-glovebox.pkl') gb_global = pickle.load(open(WV_FILE_GLOBAL, 'rb')) wv_size = gb_global.W.shape[1] model = Sequential() emb = Embedding(gb_global.W.shape[0], wv_size, weights=[gb_global.W], input_length=train_reviews.shape[1]) emb.trainable = False model.add(emb) #model.add(Permute((2,1))) model.add(Convolution1D(128, 3, subsample_length=2, init='he_uniform')) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Convolution1D(128, 3, subsample_length=2, init='he_uniform')) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Convolution1D(128, 3, subsample_length=2, init='he_uniform')) model.add(Activation('relu')) model.add(Dropout(0.5))
def Keras0_helper(_X_tr, _X_va, _X_te, predictors, cat_feats, params, seed=2018): os.environ['PYTHONHASHSEED'] = '0' np.random.seed(seed) rn.seed(seed) X_tr = _X_tr[predictors] X_va = _X_va[predictors] X_te = _X_te[predictors] y_tr = _X_tr['is_attributed'] y_va = _X_va['is_attributed'] y_te = _X_te['is_attributed'] print('*************params**************') for f in sorted(params): print(f + ":", params[f]) batch_size = int(params['batch_size']) epochs_for_lr = float(params['epochs_for_lr']) max_epochs = int(params['max_epochs']) emb_cate = int(params['emb_cate']) dense_cate = int(params['dense_cate']) dense_nume_n_layers = int(params['dense_nume_n_layers']) drop = float(params['drop']) lr = float(params['lr']) lr_init = float(params['lr_init']) lr_fin = float(params['lr_fin']) n_layers = int(params['n_layers']) patience = int(params['patience']) train_dict = {} valid_dict = {} test_dict = {} input_list = [] emb_list = [] numerical_feats = [] tot_emb_n = 0 for col in X_tr: if col not in cat_feats: numerical_feats.append(col) if len(cat_feats) > 0: for col in cat_feats: train_dict[col] = np.array(X_tr[col]) valid_dict[col] = np.array(X_va[col]) test_dict[col] = np.array(X_te[col]) inpt = Input(shape=[1], name=col) input_list.append(inpt) max_val = np.max( [X_tr[col].max(), X_va[col].max(), X_te[col].max()]) + 1 emb_n = np.min([emb_cate, max_val]) if get_opt('fixEmb', 'on') == 'on': emb_n = emb_cate tot_emb_n += emb_n if emb_n == 1: print( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Warinig!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! emb_1 = 1" ) return 0 print('Embedding size:', max_val, emb_cate, X_tr[col].max(), X_va[col].max(), X_te[col].max(), emb_n, col) embd = Embedding(max_val, emb_n)(inpt) emb_list.append(embd) if len(emb_list) == 1: print( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Warinig!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! emb_list = 1" ) return 0 fe = concatenate(emb_list) s_dout = SpatialDropout1D(drop)(fe) x1 = Flatten()(s_dout) if get_opt('sameNDenseAsEmb', '-') == 'on': dense_cate = tot_emb_n if len(numerical_feats) > 0: train_dict['numerical'] = X_tr[numerical_feats].values valid_dict['numerical'] = X_va[numerical_feats].values test_dict['numerical'] = X_te[numerical_feats].values inpt = Input((len(numerical_feats), ), name='numerical') input_list.append(inpt) x2 = inpt for n in range(dense_nume_n_layers): x2 = Dense(dense_cate, activation='relu', kernel_initializer=RandomUniform(seed=seed))(x2) if get_opt('numeDropout', 'on') != 'off': x2 = Dropout(drop)(x2) if get_opt('NumeBatchNormalization', 'on') != 'off': x2 = BatchNormalization()(x2) if len(numerical_feats) > 0 and len(cat_feats) > 0: x = concatenate([x1, x2]) elif len(numerical_feats) > 0: x = x2 elif len(cat_feats) > 0: x = x1 else: return 0 # for small data test for n in range(n_layers): x = Dense(dense_cate, activation='relu', kernel_initializer=RandomUniform(seed=seed))(x) if get_opt('lastDropout', 'on') != 'off': x = Dropout(drop)(x) if get_opt('BatchNormalization', 'off') == 'on' or get_opt( 'LastBatchNormalization', 'off') == 'on': x = BatchNormalization()(x) outp = Dense(1, activation='sigmoid', kernel_initializer=RandomUniform(seed=seed))(x) model = Model(inputs=input_list, outputs=outp) if get_opt('optimizer', 'expo') == 'adam': optimizer = Adam(lr=lr) elif get_opt('optimizer', 'expo') == 'nadam': optimizer = Nadam(lr=lr) else: exp_decay = lambda init, fin, steps: (init / fin)**(1 / (steps - 1)) - 1 steps = int(len(X_tr) / batch_size) * epochs_for_lr lr_init, lr_fin = 0.001, 0.0001 lr_decay = exp_decay(lr_init, lr_fin, steps) optimizer = Adam(lr=lr, decay=lr_decay) model.compile(loss='binary_crossentropy', optimizer=optimizer) model.summary() #from keras.utils import plot_model #plot_model(model, to_file='model.png') model_file = '../work/weights.' + str(os.getpid()) + '.hdf5' if get_opt('trainCheck', '-') == 'on': training_data = (train_dict, y_tr) else: training_data = False if get_opt('testCheck', '-') == 'on': testing_data = (test_dict, y_te) else: testing_data = False aucEarlyStopping = EarlyStopping(training_data=training_data, validation_data=(valid_dict, y_va), testing_data=testing_data, patience=patience, model_file=model_file, verbose=1) model.fit(train_dict, y_tr, validation_data=[valid_dict, y_va], batch_size=batch_size, epochs=max_epochs, shuffle=True, verbose=2, callbacks=[aucEarlyStopping]) best_epoch = aucEarlyStopping.best_epoch print('loading', model_file + '.' + str(best_epoch)) model.load_weights(model_file + '.' + str(best_epoch)) _X_te['pred'] = model.predict(test_dict, batch_size=batch_size, verbose=2)[:, 0] _X_va['pred'] = model.predict(valid_dict, batch_size=batch_size, verbose=2)[:, 0] if get_opt('avgEpoch', 0) > 0: added = 1 for i in range(min(get_opt('avgEpoch', 0), patience)): best_epoch = aucEarlyStopping.best_epoch + (i + 1) if best_epoch >= max_epochs: continue print('loading', model_file + '.' + str(best_epoch)) model.load_weights(model_file + '.' + str(best_epoch)) _X_te['pred'] += model.predict( test_dict, batch_size=batch_size, verbose=2)[:, 0] * 0.5 _X_va['pred'] += model.predict( valid_dict, batch_size=batch_size, verbose=2)[:, 0] * 0.5 added += 0.5 best_epoch = aucEarlyStopping.best_epoch - (i + 1) if best_epoch < 0: continue print('loading', model_file + '.' + str(best_epoch)) model.load_weights(model_file + '.' + str(best_epoch)) _X_te['pred'] += model.predict( test_dict, batch_size=batch_size, verbose=2)[:, 0] * 0.5 _X_va['pred'] += model.predict( valid_dict, batch_size=batch_size, verbose=2)[:, 0] * 0.5 added += 0.5 _X_te['pred'] /= added _X_va['pred'] /= added os.system('rm -f ' + model_file + '.*') auc = roc_auc_score(y_va, _X_va.pred) return auc
EMBEDDING_DIM = 100 tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True) tokenizer.fit_on_texts(df['description'].values) word_index = tokenizer.word_index X = tokenizer.texts_to_sequences(df['description'].values) X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH) Y = pd.get_dummies(df['category']).values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42) model = Sequential() model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1])) model.add(SpatialDropout1D(0.2)) model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(6, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) epochs = 5 batch_size = 64 #history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size,validation_split=0.1,callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)]) #model.save_weights("lstmweightsnew.h5") model.load_weights("lstmweightsnew.h5") accr = model.evaluate(X_test, Y_test)
def main(argv): """" Main function This is the flow of actions of this main 0: Initial steps 1: Load data (X and y_emb) and needed dictionaries (activity-to-int, etc.) 2: Generate K partitions of the dataset (KFold cross-validation) 3: For each partition (train, test): 3.1: Build the LSTM model 3.2: Manage imbalanced data in the training set (SMOTE?) 3.3: Train the model with the imbalance-corrected training set and use the test set to validate 3.4: Store the generated learning curves and metrics with the best model (ModelCheckpoint? If results get worse with epochs, use EarlyStopping) 4: Calculate the mean and std for the metrics obtained for each partition and store """ # 0: Initial steps print_configuration_info() # fix random seed for reproducibility np.random.seed(7) # Make an instance of the class Utils utils = Utils() # Obtain the file number maxnumber = utils.find_file_maxnumber(RESULTS + DATASET + '/') filenumber = maxnumber + 1 print('file number: ', filenumber) # 1: Load data (X and y_emb) print('Loading data') # Load activity_dict where every activity name has its associated word embedding with open(ACTIVITY_EMBEDDINGS) as f: activity_dict = json.load(f) # Load the activity indices with open(ACTIVITY_TO_INT) as f: activity_to_int_dict = json.load(f) # Load the index to activity relations with open(INT_TO_ACTIVITY) as f: int_to_activity = json.load(f) # Load embedding matrix, X and y sequences (for y, load both, the embedding and index version) embedding_matrix = np.load(EMBEDDING_WEIGHTS) X = np.load(X_FILE) y_emb = np.load(Y_EMB_FILE) # We need the following two lines for StratifiedKFold y_index_one_hot = np.load(Y_INDEX_FILE) y_index = np.argmax(y_index_one_hot, axis=1) # To use oversampling methods in imbalance-learn, we need an activity_index:embedding relation # Build it using INT_TO_ACTIVITY and ACTIVITY_EMBEDDINGS files activity_index_to_embedding = {} for key in int_to_activity: activity_index_to_embedding[key] = activity_dict[int_to_activity[key]] max_sequence_length = X.shape[ 1] # TODO: change this to fit the maximum sequence length of all the datasets #total_activities = y_train.shape[1] ACTION_MAX_LENGTH = embedding_matrix.shape[1] print('X shape:', X.shape) print('y shape:', y_emb.shape) print('y index shape:', y_index.shape) print('max sequence length:', max_sequence_length) print('features per action:', embedding_matrix.shape[0]) print('Action max length:', ACTION_MAX_LENGTH) # 2: Generate K partitions of the dataset (KFold cross-validation) # TODO: Decide between KFold or StratifiedKFold # if StratifiedKFold skf = StratifiedKFold(n_splits=FOLDS) # if KFold #kf = KFold(n_splits = FOLDS) fold = 0 # 4: For each partition (train, test): metrics_per_fold = utils.init_metrics_per_fold() best_epochs = [] #for train, test in kf.split(X): for train, test in skf.split(X, y_index): print("%d Train: %s, test: %s" % (fold, len(train), len(test))) X_train = X[train] y_train = y_emb[train] y_train_index = y_index[train] X_val = X[test] y_val = y_emb[test] y_val_index = y_index_one_hot[test] print('Activity distribution %s' % Counter(y_index)) # 3.1: Build the LSTM model print('Building model...') sys.stdout.flush() model = Sequential() model.add( Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], input_length=max_sequence_length, trainable=EMB_TRAINABLE)) # Change input shape when using embeddings model.add( LSTM(512, return_sequences=False, recurrent_dropout=DROPOUT, dropout=DROPOUT, input_shape=(max_sequence_length, embedding_matrix.shape[1]))) # For regression use a linear dense layer with embedding_matrix.shape[1] size (300 in this case) # TODO: consider the need of normalization before calculating the loss (we may use a Lambda layer with L2 norm) model.add(Dense(embedding_matrix.shape[1])) # TODO: check different regression losses; cosine_proximity could be the best one for us? #model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', 'mae']) model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=['cosine_proximity', 'mse', 'mae']) print('Model built') print(model.summary()) sys.stdout.flush() # 3.2: Manage imbalanced data in the training set (SMOTE?) -> Conf option TREAT_IMBALANCE # NOTE: We may have a problem with SMOTE, since there are some classes with only 1-3 samples and SMOTE needs n_samples < k_neighbors (~5) # NOTE: RandomOverSampler could do the trick, however it generates just copies of current samples # TODO: Think about a combination between RandomOverSampler for n_samples < 5 and SMOTE? # TODO: First attempt without imbalance management if (TREAT_IMBALANCE == True): ros = RandomOverSampler( random_state=42 ) # sampling_strategy={4:10, 12:10, 14:10, 8:10, 13:10} print('Original dataset samples for training %s' % len(y_train_index)) print('Original dataset shape for training %s' % Counter(y_train_index)) X_train_res, y_train_index_res = ros.fit_resample( X_train, y_train_index) print('Resampled dataset samples for training %s' % len(y_train_index_res)) print('Resampled dataset shape for training %s' % Counter(y_train_index_res)) y_train_res = [] for j in y_train_index_res: y_train_res.append(activity_index_to_embedding[str( y_train_index_res[j])]) y_train_res = np.array(y_train_res) print("y_train_res shape: ", y_train_res.shape) else: X_train_res = X_train y_train_res = y_train # 3.3: Train the model with the imbalance-corrected training set and use the test set to validate print('Training...') sys.stdout.flush() # Define the callbacks to be used (EarlyStopping and ModelCheckpoint) # TODO: Do we need EarlyStopping here? #earlystopping = EarlyStopping(monitor='val_loss', patience=100, verbose=0) # TODO: improve file naming for multiple architectures weights_file = WEIGHTS + DATASET + '/' + str(filenumber).zfill( 2) + '-' + EXPERIMENT_ID + '-fold' + str(fold) + WEIGHTS_FILE_ROOT modelcheckpoint = ModelCheckpoint(weights_file, monitor='val_loss', save_best_only=True, verbose=0) callbacks = [modelcheckpoint] history = model.fit(X_train_res, y_train_res, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(X_val, y_val), shuffle=True, callbacks=callbacks) # 3.4: Store the generated learning curves and metrics with the best model (ModelCheckpoint?) -> Conf option SAVE plot_filename = PLOTS + DATASET + '/' + str(filenumber).zfill( 2) + '-' + EXPERIMENT_ID + '-fold' + str(fold) #plot_training_info(['loss'], True, history.history, plot_filename) if SAVE == True: utils.plot_training_info(['loss'], True, history.history, plot_filename) print("Plots saved in " + PLOTS + DATASET + '/') print("Training finished") # Print the best val_loss min_val_loss = min(history.history['val_loss']) min_val_loss_index = history.history['val_loss'].index(min_val_loss) print("Validation loss: " + str(min_val_loss) + " (epoch " + str(history.history['val_loss'].index(min_val_loss)) + ")") best_epochs.append(min_val_loss_index) model.load_weights(weights_file) yp = model.predict(X_val, batch_size=BATCH_SIZE, verbose=1) # yp has the embedding predictions of the regressor network # Obtain activity labels from embedding predictions ypreds = obtain_class_predictions(yp, activity_dict, activity_to_int_dict, int_to_activity) # Calculate the metrics ytrue = np.argmax(y_val_index, axis=1) print("ytrue shape: ", ytrue.shape) print("ypreds shape: ", ypreds.shape) # Use scikit-learn metrics to calculate confusion matrix, accuracy, precision, recall and F-Measure """ cm = confusion_matrix(ytrue, ypreds) # Normalize the confusion matrix by row (i.e by the number of samples # in each class) cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] np.set_printoptions(precision=3, linewidth=1000, suppress=True) # Save also the cm to a txt file results_file_root = RESULTS + DATASET + '/' + str(filenumber).zfill(2) + '-' + EXPERIMENT_ID + '-fold' + str(fold) np.savetxt(results_file_root + '-cm.txt', cm, fmt='%.0f') np.savetxt(results_file_root+'-cm-normalized.txt', cm_normalized, fmt='%.3f') print("Confusion matrices saved in " + RESULTS + DATASET + '/') """ # Plot non-normalized confusion matrix -> Conf option SAVE if SAVE == True: results_file_root = RESULTS + DATASET + '/' + str( filenumber).zfill(2) + '-' + EXPERIMENT_ID + '-fold' + str( fold) utils.plot_heatmap( ytrue, ypreds, classes=activity_to_int_dict.keys(), title='Confusion matrix, without normalization, fold ' + str(fold), path=results_file_root + '-cm.png') # Plot normalized confusion matrix utils.plot_heatmap(ytrue, ypreds, classes=activity_to_int_dict.keys(), normalize=True, title='Normalized confusion matrix, fold ' + str(fold), path=results_file_root + '-cm-normalized.png') #Dictionary with the values for the metrics (precision, recall and f1) metrics = utils.calculate_evaluation_metrics(ytrue, ypreds) metrics_per_fold = utils.update_metrics_per_fold( metrics_per_fold, metrics) # Update fold counter fold += 1 # 5: Calculate the mean and std for the metrics obtained for each partition and store (always) metrics_per_fold = utils.calculate_aggregate_metrics_per_fold( metrics_per_fold) metrics_filename = RESULTS + DATASET + '/' + str(filenumber).zfill( 2) + '-' + EXPERIMENT_ID + '-complete-metrics.json' with open(metrics_filename, 'w') as fp: json.dump(metrics_per_fold, fp, indent=4) print("Metrics saved in " + metrics_filename) print("Avg best epoch: " + str(np.mean(best_epochs)) + ", min: " + str(min(best_epochs)) + ", max: " + str(max(best_epochs)))
print(len(y_test), 'testing samples') from keras.preprocessing import sequence from keras.models import Sequential from keras.layers import Dense, Embedding, LSTM, Bidirectional from keras import optimizers maxlen = 500 x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) model = Sequential() model.add(Embedding(max_words, 128, input_length=maxlen)) model.add(Bidirectional(LSTM(128, dropout=0.2, recurrent_dropout=0.2))) model.add(Dense(1, activation='sigmoid')) optimizer = optimizers.RMSprop(0.001) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) from keras.callbacks import EarlyStopping early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='min')
def cmodel(path): l2_n = 0.000025 learning_rate = 0.001 batch_s = 128 epoch = 20 input_dime = 38 #train_x += test_x #train_y += test_y x = [] y = [] pathdir = os.listdir(path) i = 0 label = [] for d in pathdir: print("in %s / %s" % (path, d)) xt = notetotrain(path + d) label.append(str(d)) print(len(xt)) x += xt y = y + [i for j in range(len(xt))] #y += label i += 1 #print (len(x)) #print (len(y)) #print (y) print(label) tray = [] train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.1, random_state=1) for ia in range(i): tray.append(test_y.count(ia)) print(tray) train_x = np.array(train_x) / 37 train_y = np.array(train_y) test_x = np.array(test_x) / 37 test_y = np.array(test_y) #train_xr = [train_x,train_x,train_x] #test_xr = [test_x,test_x,test_x] ''' datapath = "./edata/" train_x = np.load(datapath+"trainx.npy")#,train_x) train_y = np.load(datapath+"trainy.npy")#,train_y) test_x = np.load(datapath+"testx.npy")#,test_x) test_y = np.load(datapath+"testy.npy")#),test_y) #np.load( ''' train_y = to_categorical(train_y, i) test_y = to_categorical(test_y, i) #print (train_y[0]) #print (train_x[0]) num_class = i #print (i) #print (num_class) #print (len(train_y)) #print (len(train_y[0])) #print (train_y[0]) seed = 7 np.random.seed(seed) model = Sequential() o_d = 38 * 120 ''' model.add(Reshape((128, 1), input_shape=((128),))) model.add(Conv1D(64, 1, activation='relu' )) model.add(Conv1D(64, 1, activation='relu')) model.add(MaxPooling1D(3)) model.add(Conv1D(128, 1, activation='relu')) model.add(Conv1D(128, 1, activation='relu')) model.add(GlobalAveragePooling1D()) model.add(Dropout(0.5)) ''' model.add(Embedding(input_dim=38, output_dim=o_d, input_length=128)) model.add(Dense(128, activation="relu")) #model.add(Embedding(input_dim=38,output_dim=o_d,input_length=128)) #model.add(Bidirectional(LSTM(1024,return_sequences=True),input_shape=(52,1))) model.add( LSTM( 128, return_sequences=True, activation="relu", #use_bias = True, recurrent_initializer="ones", kernel_initializer=glorot_normal(seed=1))) #model.add(Dropout(0.1)) model.add(Dense(128, activation="relu")) model.add( LSTM( 64, return_sequences=True, activation="relu", #use_bias = True, recurrent_initializer="ones", kernel_initializer=glorot_normal(seed=2))) #model.add(Dropout(0.1)) model.add(Dense(64, activation="relu")) #model.add(Dropout(0.1)) model.add( #Bidirectional (LSTM(64, activation="tanh", use_bias=True, recurrent_initializer="orthogonal", kernel_initializer=glorot_normal(seed=3)))) model.add( Dense(i, kernel_initializer=keras.initializers.random_normal(stddev=1, seed=3), kernel_regularizer=l2(l2_n) #activation="softmax" )) #model.add(BatchNormalization()) model.add(Activation("softmax")) print(model.summary()) #adam = Adam(learning_rate) checkpath = "../RNNcheckpoint/esaved-model-{epoch:02d}-{val_acc:.2f}.hdf5" model.compile( loss='categorical_crossentropy', optimizer=Adam(lr=learning_rate, decay=0.01), #SGD(lr=learning_rate,decay = 1e-5, #momentum=0.9,nesterov=True),#'adam', metrics=['acc']) #model = keras.models.load_model("./emodel.h5") checkpoint = ModelCheckpoint(checkpath, monitor='val_acc', verbose=1, save_best_only=False, mode='max') callbacks_list = [checkpoint] model.fit(train_x, train_y, batch_size=batch_s, callbacks=callbacks_list, epochs=epoch, verbose=1, validation_data=(test_x, test_y), shuffle=True) #Ki.clear_session() mp = "./emodel.h5" model.save(mp)
def _generate_model(self, lembedding, num_classes=2, first_kernel_size=3, num_features=1024, conv_dropout=False, train_vectors=True): model = Sequential() if lembedding.vector_box.W is None: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, W_constraint=None, input_length=lembedding.size) else: emb = Embedding(lembedding.vector_box.size, lembedding.vector_box.vector_dim, weights=[lembedding.vector_box.W], W_constraint=None, input_length=lembedding.size) emb.trainable = train_vectors model.add(emb) # Two conv layers with original kernel size, maxpooling is 2 model.add(Convolution1D(num_features, first_kernel_size, init='uniform')) model.add(Activation('relu')) model.add(MaxPooling1D(2)) if conv_dropout: model.add(Dropout(0.25)) model.add(Convolution1D(num_features, first_kernel_size, init='uniform')) model.add(Activation('relu')) model.add(MaxPooling1D(2)) if conv_dropout: model.add(Dropout(0.25)) # Three conv layers with kernel size = 3, no maxpooling model.add(Convolution1D(num_features, 3, init='uniform')) model.add(Activation('relu')) if conv_dropout: model.add(Dropout(0.25)) model.add(Convolution1D(num_features, 3, init='uniform')) model.add(Activation('relu')) if conv_dropout: model.add(Dropout(0.25)) model.add(Convolution1D(num_features, 3, init='uniform')) model.add(Activation('relu')) if conv_dropout: model.add(Dropout(0.25)) # One final conv layer with maxpooling model.add(Convolution1D(num_features, 3, init='uniform')) model.add(Activation('relu')) model.add(MaxPooling1D(2)) model.add(Dropout(0.25)) model.add(Flatten()) # Two dense layers with heavy dropout model.add(Dense(2048)) model.add(Dropout(0.5)) model.add(Dense(2048)) model.add(Dropout(0.5)) if num_classes == 2: model.add(Dense(1, activation='sigmoid')) if self.optimizer is None: self.optimizer = 'rmsprop' model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) else: if self.optimizer is None: self.optimizer = 'rmsprop' model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) return model
embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(embeddings_index)) #use pre-trained GloVe word embeddings to initialize the embedding layer embedding_matrix = np.random.random((MAX_NUM_WORDS + 1, EMBEDDING_DIM)) for word, i in vocab.items(): if i < MAX_NUM_WORDS: embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be random initialized. embedding_matrix[i] = embedding_vector embedding_layer = Embedding(MAX_NUM_WORDS + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_LEN / 64, trainable=True) #build model print("Build Model") input1 = Input(shape=(int(MAX_LEN / 64), ), dtype='int32') embed = embedding_layer(input1) gru1 = GRU(NUM_FILTERS, recurrent_activation='sigmoid', activation=None, return_sequences=False)(embed) Encoder1 = Model(input1, gru1) input2 = Input(shape=( 8,