def build(self, input_shape): input_dim = input_shape[-1] if self.recurrent_clip_min == -1 or self.recurrent_clip_max == -1: self.recurrent_clip_min = 0.0 if hasattr(self, 'timesteps') and self.timesteps is not None: self.recurrent_clip_max = pow(2.0, 1. / self.timesteps) else: warnings.warn("IndRNNCell: Number of timesteps could not be determined. \n" "Defaulting to max clipping range of 1.0. \n" "If this model was trained using a specific timestep during training, " "inference may be wrong due to this default setting.\n" "Please ensure that you use the same number of timesteps during training " "and evaluation") self.recurrent_clip_max = 1.0 self.kernel = self.add_weight(shape=(input_dim, self.units), name='input_kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.recurrent_initializer is None: if self.recurrent_clip_min is not None and self.recurrent_clip_max is not None: initialization_value = min(self.recurrent_clip_max, 1.0) self.recurrent_initializer = initializers.uniform(-initialization_value, initialization_value) else: self.recurrent_initializer = initializers.uniform(-1.0, 1.0) self.recurrent_kernel = self.add_weight(shape=(self.units,), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.recurrent_clip_min is not None and self.recurrent_clip_max is not None: if abs(self.recurrent_clip_min): abs_recurrent_kernel = K.abs(self.recurrent_kernel) min_recurrent_kernel = K.maximum(abs_recurrent_kernel, abs(self.recurrent_clip_min)) self.recurrent_kernel = K.sign(self.recurrent_kernel) * min_recurrent_kernel self.recurrent_kernel = K.clip(self.recurrent_kernel, self.recurrent_clip_min, self.recurrent_clip_max) if self.use_bias: bias_initializer = self.bias_initializer self.bias = self.add_weight(shape=(self.units,), name='bias', initializer=bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.built = True
def get_model(self): model = Sequential() model.add(Dense(self.h_dim, input_dim=self.in_dim)) #model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dense(self.h_dim)) #model.add(BatchNormalization()) model.add(Activation('relu')) small_uniform = uniform(-3e-3, 3e-3) q_model = Sequential() q_model.add(Dense(self.h_dim, input_dim=self.action_dim+self.h_dim)) #q_model.add(BatchNormalization()) q_model.add(Activation('relu')) q_model.add(Dense(self.h_dim)) #q_model.add(BatchNormalization()) q_model.add(Activation('relu')) q_model.add(Dense(self.h_dim)) #q_model.add(BatchNormalization()) q_model.add(Activation('relu')) q_model.add(Dense(1, kernel_initializer=small_uniform, bias_initializer=small_uniform)) pi_model = Sequential() pi_model.add(Dense(self.h_dim, input_dim=self.in_dim)) #pi_model.add(BatchNormalization()) pi_model.add(Activation('relu')) pi_model.add(Dense(self.h_dim)) #pi_model.add(BatchNormalization()) pi_model.add(Activation('relu')) pi_model.add(Dense(self.action_dim, activation='tanh', kernel_initializer=small_uniform, bias_initializer=small_uniform)) return model, pi_model, q_model
def build(self, input_shape): import numpy as np self.original_length = input_shape[1] if (self.symmetric == False): self.length = input_shape[1] else: self.odd_input_length = input_shape[1] % 2.0 == 1 self.length = int(input_shape[1] / 2.0 + 0.5) self.num_channels = input_shape[2] self.init = uniform( -np.sqrt( np.sqrt(2.0 / (self.length * self.num_channels + self.output_dim))), np.sqrt( np.sqrt(2.0 / (self.length * self.num_channels + self.output_dim)))) self.W_pos = self.add_weight( name='{}_W_pos'.format(self.name), shape=(self.output_dim, self.length), initializer=self.init, constraint=(None if self.curvature_constraint is None else CurvatureConstraint( self.curvature_constraint)), regularizer=(None if self.smoothness_penalty is None else SepFCSmoothnessRegularizer( self.smoothness_penalty, self.smoothness_l1, self.smoothness_second_diff))) self.W_chan = self.add_weight( shape=(self.output_dim, self.num_channels), name='{}_W_chan'.format(self.name), initializer=self.init, trainable=True) self.built = True
def create_model(self): model = Sequential() print('#3') model.add( Embedding(self.input_dim, self.vec_dim, input_length=self.maxlen, trainable=True, embeddings_initializer=uniform(seed=20170719))) model.add(BatchNormalization(axis=-1)) print('#4') model.add( Masking(mask_value=0, input_shape=(self.maxlen, self.vec_dim))) model.add( LSTM(self.n_hidden, batch_input_shape=(None, self.maxlen, self.vec_dim), kernel_initializer=glorot_uniform(seed=20170719), recurrent_initializer=orthogonal(gain=1.0, seed=20170719))) print('#5') model.add(BatchNormalization(axis=-1)) print('#6') model.add( Dense(self.output_dim, activation='sigmoid', use_bias=True, kernel_initializer=glorot_uniform(seed=20170719))) model.compile(loss="binary_crossentropy", optimizer="RMSprop", metrics=['binary_accuracy']) return model
def single_image_transform_model(cimage, simage): sdim, cdim = simage.shape, cimage.shape fcmodel = extract_features(cdim) fcmodel.trainable = False if cdim == sdim: fsmodel = fcmodel else: fsmodel = extract_features(sdim) fsmodel.trainable = False cimage = cimage.reshape( (1, cimage.shape[0], cimage.shape[1], cimage.shape[2])) cvals = fcmodel.predict(cimage)[0][0] simage = simage.reshape( (1, simage.shape[0], simage.shape[1], simage.shape[2])) svals = fsmodel.predict(simage)[1][0] dummy = Input(shape=cdim, dtype="float32", name="dummy_img") from keras import initializers timg = RawWeights(name='result', activation=None, initializer=initializers.uniform(0, 255))(dummy) tfeats = fcmodel(timg) cerr = Lambda(lambda x: x - cvals, name="ContentError")(tfeats[0]) serr = Lambda(lambda x: x - svals, name="StyleError")(tfeats[1]) model = Model(inputs=[dummy], outputs=[timg, cerr, serr]) return model
def reset(model): '''Given a Keras model consisting only of GraphFP, Dense, and Dropout layers, this function will reset the trainable weights to save time for CV tests.''' for layer in model.layers: # Note: these are custom depending on the layer type if '.GraphFP' in str(layer): W_inner = layer.init_inner((layer.inner_dim, layer.inner_dim)) b_inner = np.zeros((1, layer.inner_dim)) # Inner weights layer.W_inner.set_value( (T.tile(W_inner, (layer.depth + 1, 1, 1)).eval() + initializers.uniform( (layer.depth + 1, layer.inner_dim, layer.inner_dim)).eval()).astype(np.float32)) layer.b_inner.set_value( (T.tile(b_inner, (layer.depth + 1, 1, 1)).eval() + initializers.uniform( (layer.depth + 1, 1, layer.inner_dim)).eval()).astype( np.float32)) # Outer weights W_output = layer.init_output((layer.inner_dim, layer.output_dim), scale=layer.scale_output) b_output = np.zeros((1, layer.output_dim)) # Initialize weights tensor layer.W_output.set_value( (T.tile(W_output, (layer.depth + 1, 1, 1)).eval()).astype(np.float32)) layer.b_output.set_value( (T.tile(b_output, (layer.depth + 1, 1, 1)).eval()).astype(np.float32)) print('graphFP layer reset') elif '.Dense' in str(layer): layer.W.set_value( (layer.init(layer.W.shape.eval()).eval()).astype(np.float32)) layer.b.set_value(np.zeros(layer.b.shape.eval(), dtype=np.float32)) print('dense layer reset') elif '.Dropout' in str(layer): print('dropout unchanged') else: print('Not reseting weights for {}'.format(str(layer))) print('Reset model weights') return model
def baseline_model(): global num model = Sequential() model.add( Dense(input_dim=83, kernel_initializer=initializers.uniform(seed=0), bias_initializer=initializers.zeros(), activation='relu', units=30)) # model.add(BatchNormalization()) model.add(GaussianNoise(1)) model.add(GaussianDropout(0.3)) # model.add(Dense(20, kernel_initializer='uniform', bias_initializer='uniform', activation='relu')) # model.add(Dropout(0.3)) model.add( Dense(10, kernel_initializer=initializers.uniform(seed=0), bias_initializer='zeros', activation='relu')) model.add(Dropout(0.3)) model.add( Dense(5, kernel_initializer=initializers.uniform(seed=0), bias_initializer='zeros', activation='relu')) model.add(Dropout(0.3)) model.add( Dense(5, kernel_initializer=initializers.uniform(seed=0), bias_initializer='zeros', activation='relu')) model.add(Dropout(0.3)) model.add( Dense(2, kernel_initializer=initializers.uniform(seed=0), bias_initializer='zeros', activation='softmax')) # Compile model model.compile(loss=losses.binary_crossentropy, optimizer='adam', metrics=['accuracy']) return model
def create_actor_network(self, state_size, action_dim): #print("Now we build the model") model = Sequential() S = Input(shape=[state_size]) h0 = Dense(512, activation="relu", kernel_initializer="he_uniform")(S) h1 = Dense(512, activation="relu", kernel_initializer="he_uniform")(h0) h2 = Dense(512, activation="relu", kernel_initializer="he_uniform")(h1) LinearV = Dense(1, activation='sigmoid', kernel_initializer=uniform(minval=-3e-3, maxval=3e-3, seed=None))(h1) AngleV = Dense(1, activation='tanh', kernel_initializer=uniform(minval=-3e-3, maxval=3e-3, seed=None))(h1) F = concatenate([LinearV, AngleV]) model = Model(input=S, output=F) return model, model.trainable_weights, S
def build(self, input_shape): super().build(input_shape) reg = 1/14681 dropout_reg = 2/14681 def dropout_constraint(p): """Constraint probability between 0.0 and 1.0""" return K.clip(p, K.epsilon(), 1. - K.epsilon()) if self.dropout == 1.0: self.p = self.cell.add_weight(name='p', shape=(), initializer=initializers.uniform(minval=0.3, maxval=0.7), constraint=dropout_constraint, trainable=True) self.add_loss(dropout_reg*input_shape[-1] * (self.p * K.log(self.p) + (1-self.p) * K.log(1-self.p))) else: self.p = self.dropout if self.recurrent_dropout == 1.0: self.p_r = self.cell.add_weight(name='p_recurrent', shape=(), initializer=initializers.uniform(minval=0.3, maxval=0.7), constraint=dropout_constraint, trainable=True) self.add_loss(dropout_reg*self.units * (self.p_r * K.log(self.p_r) + (1-self.p_r) * K.log(1-self.p_r))) else: self.p_r = self.recurrent_dropout # weight loss self.add_loss(reg / (1.-self.p) * K.sum(K.square(self.cell.kernel))) self.add_loss(reg / (1.-self.p_r) * K.sum(K.square(self.cell.recurrent_kernel))) self.add_loss(reg * K.sum(K.square(self.cell.bias))) self.built = True
def _build_average_response_model(self): initializer = initializers.uniform(minval=0.5, maxval=1) input_ = Input(shape=self.s_dim, name='input') # todo potential but needs more understanding hidden = Dense(self.n_hidden, activation='relu')(input_) # hidden = Dense(self.n_hidden, activation='relu', kernel_initializer=initializer)(input_) out = Dense(3, activation='softmax')(hidden) model = Model(inputs=input_, outputs=out, name="br-model") model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy', 'mse']) return model
def EmgLstmNet(input_shape, classes, n_dropout=0., n_l2=0.0005, n_init='glorot_normal', lstm_units=[256]): if n_init == 'glorot_normal': kernel_init = initializers.glorot_normal(seed=0) elif n_init == 'glorot_uniform': kernel_init = initializers.glorot_uniform(seed=0) elif n_init == 'he_normal': kernel_init = initializers.he_normal(seed=0) elif n_init == 'he_uniform': kernel_init = initializers.he_uniform(seed=0) elif n_init == 'normal': kernel_init = initializers.normal(seed=0) elif n_init == 'uniform': kernel_init = initializers.uniform(seed=0) kernel_regl = regularizers.l2(n_l2) x_input = Input(input_shape) x = Masking(-10.0)(x_input) for i in range(len(lstm_units) - 1): x = LSTM(lstm_units[i], dropout=n_dropout, recurrent_dropout=n_dropout, kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, recurrent_regularizer=kernel_regl, recurrent_initializer=kernel_init, return_sequences=True, input_shape=input_shape)(x) x = LSTM(lstm_units[-1], dropout=n_dropout, recurrent_dropout=n_dropout, kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, recurrent_regularizer=kernel_regl, recurrent_initializer=kernel_init, return_sequences=False)(x) y = Dense(classes, activation='softmax', kernel_regularizer=kernel_regl, kernel_initializer=kernel_init)(x) model = Model(x_input, y) return model
def glorot_uniform_sigm(shape): """ Glorot style weight initializer for sigmoid activations. Like keras.initializations.glorot_uniform(), but with uniform random interval like in Deeplearning.net tutorials. They claim that the initialization random interval should be +/- sqrt(6 / (fan_in + fan_out)) (like Keras' glorot_uniform()) when tanh activations are used, +/- 4 sqrt(6 / (fan_in + fan_out)) when sigmoid activations are used. See: http://deeplearning.net/tutorial/mlp.html#going-from-logistic-regression-to-mlp """ fan_in, fan_out = _compute_fans(shape) s = 4. * np.sqrt(6. / (fan_in + fan_out)) return uniform(shape, s)
def build(self, input_shape): import numpy as np self.original_length = input_shape[1] if self.symmetric is False: self.length = input_shape[1] else: self.odd_input_length = input_shape[1] % 2.0 == 1 self.length = int(input_shape[1] / 2.0 + 0.5) self.num_channels = input_shape[2] limit = np.sqrt( np.sqrt(2.0 / (self.length * self.num_channels + self.output_dim))) self.W_pos = self.add_weight(shape=(self.output_dim, self.length), name='{}_W_pos'.format(self.name), initializer=initializers.uniform( -1 * limit, limit), constraint=self.positional_constraint, regularizer=self.smoothness_regularizer, trainable=True) self.W_chan = self.add_weight( shape=(self.output_dim, self.num_channels), name='{}_W_chan'.format(self.name), initializer=initializers.uniform(-1 * limit, limit), trainable=True) self.built = True
def create_critic_network(self, state_size, action_dim): print("Now we build the model") S = Input(shape=[state_size]) A = Input(shape=[action_dim], name='action2') w = Dense(512, kernel_initializer='he_uniform', activation='relu')(S) h = concatenate([w, A]) h3 = Dense(512, kernel_initializer='he_uniform', activation='relu')(h) h4 = Dense(512, kernel_initializer='he_uniform', activation='relu')(h3) V = Dense(1, kernel_initializer=uniform(minval=-3e-3, maxval=3e-3, seed=None), activation='linear')(h4) model = Model(input=[S, A], output=V) adam = Adam(lr=self.LEARNING_RATE) model.compile(loss='mse', optimizer=adam) return model, A, S
def create_actor_network(self, state_size, action_dim): #print("Now we build the model") model = Sequential() S = Input(shape=[state_size]) h0 = Dense(100, activation="relu", kernel_initializer="he_uniform")(S) h1 = Dense(100, activation="relu", kernel_initializer="he_uniform")(h0) # #uniform = lambda shape, name: uniform(shape, scale=3e-3, name=name) # def my_init(shape, name=None): # return uniform(shape, range = (0,0.01), name=name) V = Dense(action_dim, activation='sigmoid', kernel_initializer=uniform(minval=-3e-3, maxval=3e-3, seed=None))(h1) F = Lambda(lambda x: x * 20.0)(V) model = Model(input=S, output=F) return model, model.trainable_weights, S
def build_model(data): # inputs chars_input = Input([data.max_sen_len, data.max_word_len], dtype='int32') words_input = Input([data.max_sen_len, ], dtype='int32') pos_input = Input([data.max_sen_len, ], dtype='int32') # embeddings scale = np.sqrt(3.0 / config.char_embed_dim) chars = Embedding(data.char_alphabet_size+2, config.char_embed_dim, embeddings_initializer=RandomUniform(-scale, scale), mask_zero=True)(chars_input) words = Embedding(*data.word_embeddings.shape, weights=[data.word_embeddings], trainable=False)(words_input) pos = Embedding(data.pos_alphabet_size+2, data.pos_alphabet_size, embeddings_initializer='identity', mask_zero=True)(pos_input) if config.dropout is not False: chars = Dropout(config.dropout)(chars) # char-level word feature cnn = Conv1D(config.num_filters, config.conv_window, padding='same', activation='tanh')(chars) pool = GlobalMaxPool1D()(cnn) # word representation incoming = Concatenate()([words, pos, pool]) if config.dropout is not False: incoming = Dropout()(incoming) # Bi-LSTM bi_lstm = Bidirectional(LSTM( config.num_units, kernel_initializer=glorot_uniform(), recurrent_initializer=uniform(-0.1, 0.1), bias_initializer=Constant(1.), recurrent_activation='tanh' ))(incoming) if config.dropout is not False: bi_lstm = Dropout()(bi_lstm) # CRF crf = CRF(data.num_labels)(bi_lstm) model = Model(inputs=[chars_input, words_input, pos_input], outputs=[crf]) optimizer = SGD(lr=config.learning_rate, momentum=config.momentum) model.compile(loss=crf.loss_function, metrics=[crf.accuracy], optimizer=optimizer) model.summary() return model
def create_model(self): model = Sequential() model.add( Embedding(self.input_dim, self.output_dim, input_length=1, embeddings_initializer=uniform(seed=20170719))) model.add(Flatten()) model.add( Dense(self.input_dim, use_bias=False, kernel_initializer=glorot_uniform(seed=20170719))) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer="RMSprop", metrics=['categorical_accuracy']) print('#2') return model
def __init__(self, layer_sizes: List[int], layer_activations: List[Any], state_shape: tuple, action_shape: tuple, layer_and_batch_norm: bool, l2_param_penalty: float = 0.00, **kwargs): super().__init__(layer_sizes, layer_activations, state_shape, action_shape, 0, layer_and_batch_norm, l2_param_penalty) final_init = initializers.uniform(minval=-3e-3, maxval=3e-3) hidden_init = initializers.VarianceScaling(scale=1 / 3, mode='fan_in', distribution='uniform', seed=None) state = tf.keras.Input(shape=state_shape, name='state_input') action = tf.keras.Input(shape=action_shape, name='action_input') h = layers.Concatenate()([state, action]) for i in range(len(layer_sizes)): h = self.layer_with_layer_norm(h, i, 'Q', ln_bias=0., initializers=hidden_init) q = layers.Dense( units=1, bias_initializer= final_init, #keras bug, simply delete the partition bit from the tf code kernel_initializer=final_init)(h) self.model = tf.keras.Model(inputs=[state, action], outputs=[q])
def create_model(self): model = Sequential() print('#3') model.add( Embedding(self.input_dim, self.vec_dim, input_length=self.maxlen, embeddings_initializer=uniform(seed=20170719))) model.add(BatchNormalization(axis=-1)) print('#4') model.add( Masking(mask_value=0, input_shape=(self.maxlen, self.vec_dim))) model.add( LSTM(self.n_hidden, batch_input_shape=(None, self.maxlen, self.vec_dim), activation='tanh', recurrent_activation='hard_sigmoid', kernel_initializer=glorot_uniform(seed=20170719), recurrent_initializer=orthogonal(gain=1.0, seed=20170719), dropout=0.5, recurrent_dropout=0.5)) print('#5') model.add(BatchNormalization(axis=-1)) model.add(Dropout(0.5, noise_shape=None, seed=None)) print('#6') model.add( Dense( self.output_dim, activation=None, use_bias=True, kernel_initializer=glorot_uniform(seed=20170719), )) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer="RMSprop", metrics=['categorical_accuracy']) return model
def optimize_style_model(tmodel, cimage,simage): sdim, cdim = simage.shape, cimage.shape fcmodel = extract_features(cdim) fcmodel.trainable = False if cdim==sdim: fsmodel = fcmodel else: fsmodel = extract_features(sdim) fsmodel.trainable = False cimage = cimage.reshape((1, cimage.shape[0], cimage.shape[1], cimage.shape[2])) cvals = fcmodel.predict(cimage)[0][0] simage = simage.reshape((1, simage.shape[0], simage.shape[1], simage.shape[2])) svals = fsmodel.predict(simage)[1][0] cinp = Input(shape=cimage.shape[1:],dtype="float32",name="content_img") style_shape = tmodel.input_shape[1][1:] dummy = Input(shape=style_shape,dtype="float32",name="dummy_style") from keras import initializers style = RawWeights(name='style',activation=None, initializer=initializers.uniform(-0.01,0.01))(dummy) timg = tmodel([cinp,style]) res = Lambda(lambda x:x,name="result")(timg) tfeats = fcmodel(timg) cerr = Lambda(lambda x: x-cvals, name="ContentError")(tfeats[0]) serr = Lambda(lambda x: x-svals, name="StyleError")(tfeats[1]) model = Model(inputs=[cinp,dummy],outputs=[res,cerr,serr]) return model
def build(self, input_shape): #input_shape[0] is the batch index #input_shape[1] is length of input #input_shape[2] is number of filters #Equivalent to 'fanintimesfanouttimestwo' from the paper limit = np.sqrt(6.0 / (input_shape[1] * input_shape[2] * 2)) self.init = initializers.uniform(-1 * limit, limit) if (self.symmetric == False): W_length = input_shape[1] else: self.odd_input_length = input_shape[1] % 2.0 == 1 #+0.5 below turns floor into ceil W_length = int(input_shape[1] / 2.0 + 0.5) if (self.input_is_revcomp_conv == False): W_chan = input_shape[2] else: assert input_shape[2]%2==0,\ "if input is revcomp conv, # incoming channels would be even" W_chan = int(input_shape[2] / 2) self.W_shape = (W_length, W_chan) self.b_shape = (W_chan, ) self.W = self.add_weight( self.W_shape, initializer=self.init, name='{}_W'.format(self.name), regularizer=(None if self.smoothness_penalty is None else regularizers.SmoothnessRegularizer( self.smoothness_penalty))) if (self.bias): assert False, "No bias was specified in original experiments" self.built = True
def create_model(self): #エンコーダー encoder_input = Input(shape=(self.maxlen_e, ), dtype='int32', name='encorder_input') e_i = Embedding( output_dim=self.vec_dim, input_dim=self.input_dim, #input_length=self.maxlen_e, mask_zero=True, embeddings_initializer=uniform(seed=20170719))(encoder_input) e_i = BatchNormalization(axis=-1)(e_i) e_i = Masking(mask_value=0.0)(e_i) e_i_fw1, state_h_fw1, state_c_fw1 = LSTM( self.n_hidden, name='encoder_LSTM_fw1', #前向き1段目 return_sequences=True, return_state=True, kernel_initializer=glorot_uniform(seed=20170719), recurrent_initializer=orthogonal(gain=1.0, seed=20170719), #dropout=0.5, recurrent_dropout=0.5 )(e_i) encoder_LSTM_fw2 = LSTM( self.n_hidden, name='encoder_LSTM_fw2', #前向き2段目 return_sequences=True, return_state=True, kernel_initializer=glorot_uniform(seed=20170719), recurrent_initializer=orthogonal(gain=1.0, seed=20170719), dropout=0.5, recurrent_dropout=0.5) e_i_fw2, state_h_fw2, state_c_fw2 = encoder_LSTM_fw2(e_i_fw1) e_i_bw0 = e_i e_i_bw1, state_h_bw1, state_c_bw1 = LSTM( self.n_hidden, name='encoder_LSTM_bw1', #後ろ向き1段目 return_sequences=True, return_state=True, go_backwards=True, kernel_initializer=glorot_uniform(seed=20170719), recurrent_initializer=orthogonal(gain=1.0, seed=20170719), #dropout=0.5, recurrent_dropout=0.5 )(e_i_bw0) e_i_bw2, state_h_bw2, state_c_bw2 = LSTM( self.n_hidden, name='encoder_LSTM_bw2', #後ろ向き2段目 return_sequences=True, return_state=True, go_backwards=True, kernel_initializer=glorot_uniform(seed=20170719), recurrent_initializer=orthogonal(gain=1.0, seed=20170719), dropout=0.5, recurrent_dropout=0.5)(e_i_bw1) encoder_outputs = keras.layers.add([e_i_fw2, e_i_bw2], name='encoder_outputs') state_h_1 = keras.layers.add([state_h_fw1, state_h_bw1], name='state_h_1') state_c_1 = keras.layers.add([state_c_fw1, state_c_bw1], name='state_c_1') state_h_2 = keras.layers.add([state_h_fw2, state_h_bw2], name='state_h_2') state_c_2 = keras.layers.add([state_c_fw2, state_c_bw2], name='state_c_2') encoder_states1 = [state_h_1, state_c_1] encoder_states2 = [state_h_2, state_c_2] encoder_model = Model(inputs=encoder_input, outputs=[ encoder_outputs, state_h_1, state_c_1, state_h_2, state_c_2 ]) # デコーダー(学習用) # デコーダを、完全な出力シークエンスを返し、内部状態もまた返すように設定します。 # 訓練モデルではreturn_sequencesを使用しませんが、推論では使用します。 a_states1 = encoder_states1 a_states2 = encoder_states2 #レイヤー定義 decode_LSTM1 = LSTM( self.n_hidden, name='decode_LSTM1', return_sequences=True, return_state=True, kernel_initializer=glorot_uniform(seed=20170719), recurrent_initializer=orthogonal(gain=1.0, seed=20170719), ) decode_LSTM2 = LSTM(self.n_hidden, name='decode_LSTM2', return_sequences=True, return_state=True, kernel_initializer=glorot_uniform(seed=20170719), recurrent_initializer=orthogonal(gain=1.0, seed=20170719), dropout=0.5, recurrent_dropout=0.5) Dense1 = Dense(self.n_hidden, name='Dense1', kernel_initializer=glorot_uniform(seed=20170719)) Dense2 = Dense( self.n_hidden, name='Dense2', #次元を減らす kernel_initializer=glorot_uniform(seed=20170719)) a_Concat1 = keras.layers.Concatenate(axis=-1) a_decode_input_slice1 = Lambda(lambda x: x[:, 0, :], output_shape=( 1, self.vec_dim, ), name='slice1') a_decode_input_slice2 = Lambda(lambda x: x[:, 1:, :], name='slice2') a_Reshape1 = keras.layers.Reshape((1, self.vec_dim)) a_Dot1 = keras.layers.Dot(-1, name='a_Dot1') a_Softmax = keras.layers.Softmax(axis=-1, name='a_Softmax') a_transpose = keras.layers.Reshape((self.maxlen_e, 1), name='Transpose') a_Dot2 = keras.layers.Dot(1, name='a_Dot2') a_Concat2 = keras.layers.Concatenate(-1, name='a_Concat2') a_tanh = Lambda(lambda x: K.tanh(x), name='tanh') a_Concat3 = keras.layers.Concatenate(axis=-1, name='a_Concat3') decoder_Dense = Dense(self.output_dim, activation='softmax', name='decoder_Dense', kernel_initializer=glorot_uniform(seed=20170719)) a_output = Lambda(lambda x: K.zeros_like(x[:, -1, :]), output_shape=( 1, self.n_hidden, ))(encoder_outputs) a_output = keras.layers.Reshape((1, self.n_hidden))(a_output) decoder_inputs = Input(shape=(self.maxlen_d, ), dtype='int32', name='decorder_inputs') d_i = Embedding( output_dim=self.vec_dim, input_dim=self.input_dim, #input_length=self.maxlen_d, mask_zero=True, embeddings_initializer=uniform(seed=20170719))(decoder_inputs) d_i = BatchNormalization(axis=-1)(d_i) d_i = Masking(mask_value=0.0)(d_i) # d_i = Lambda(lambda x: 0.01*x)(d_i) d_input = d_i for i in range(self.maxlen_d): d_i_timeslice = a_decode_input_slice1(d_i) if i <= self.maxlen_d - 2: d_i = a_decode_input_slice2(d_i) d_i_timeslice = a_Reshape1(d_i_timeslice) lstm_input = a_Concat1([a_output, d_i_timeslice]) # 前段出力とdcode_inputをconcat d_i_1, h1, c1 = decode_LSTM1(lstm_input, initial_state=a_states1) h_output, h2, c2 = decode_LSTM2(d_i_1, initial_state=a_states2) a_states1 = [h1, c1] a_states2 = [h2, c2] #attention a_o = h_output a_o = Dense1(a_o) a_o = a_Dot1([a_o, encoder_outputs]) # encoder出力の転置行列を掛ける a_o = a_Softmax(a_o) # softmax a_o = a_transpose(a_o) a_o = a_Dot2([a_o, encoder_outputs]) # encoder出力行列を掛ける a_o = a_Concat2([a_o, h_output]) # ここまでの計算結果とLSTM出力をconcat a_o = Dense2(a_o) a_o = a_tanh(a_o) # tanh a_output = a_o # 次段attention処理向け出力 if i == 0: # docoder_output d_output = a_o else: d_output = a_Concat3([d_output, a_o]) d_output = keras.layers.Reshape( (self.maxlen_d, self.n_hidden))(d_output) decoder_outputs = decoder_Dense(d_output) model = Model(inputs=[encoder_input, decoder_inputs], outputs=decoder_outputs) model.compile(loss="categorical_crossentropy", optimizer="Adam", metrics=['categorical_accuracy']) #デコーダー(応答文作成) decoder_state_input_h_1 = Input(shape=(self.n_hidden, ), name='input_h_1') decoder_state_input_c_1 = Input(shape=(self.n_hidden, ), name='input_c_1') decoder_state_input_h_2 = Input(shape=(self.n_hidden, ), name='input_h_2') decoder_state_input_c_2 = Input(shape=(self.n_hidden, ), name='input_c_2') decoder_states_inputs_1 = [ decoder_state_input_h_1, decoder_state_input_c_1 ] decoder_states_inputs_2 = [ decoder_state_input_h_2, decoder_state_input_c_2 ] decoder_states_inputs = [ decoder_state_input_h_1, decoder_state_input_c_1, decoder_state_input_h_2, decoder_state_input_c_2 ] decoder_input_c = Input(shape=(1, self.n_hidden), name='decoder_input_c') decoder_input_encoded = Input(shape=(self.maxlen_e, self.n_hidden), name='decoder_input_encoded') #LSTM1段目 decoder_i_timeslice = a_Reshape1(a_decode_input_slice1(d_input)) l_input = a_Concat1([decoder_input_c, decoder_i_timeslice]) #前段出力とdcode_inputをconcat decoder_lstm_1, state_h_1, state_c_1 = decode_LSTM1( l_input, initial_state=decoder_states_inputs_1) #initial_stateが学習の時と違う #LSTM2段目 decoder_lstm_2, state_h_2, state_c_2 = decode_LSTM2( decoder_lstm_1, initial_state=decoder_states_inputs_2) decoder_states = [state_h_1, state_c_1, state_h_2, state_c_2] #attention attention_o = Dense1(decoder_lstm_2) attention_o = a_Dot1([attention_o, decoder_input_encoded]) #encoder出力の転置行列を掛ける attention_o = a_Softmax(attention_o) #softmax attention_o = a_transpose(attention_o) attention_o = a_Dot2([attention_o, decoder_input_encoded]) #encoder出力行列を掛ける attention_o = a_Concat2([attention_o, decoder_lstm_2]) #ここまでの計算結果とLSTM出力をconcat attention_o = Dense2(attention_o) decoder_o = a_tanh(attention_o) #tanh decoder_res = decoder_Dense(decoder_o) decoder_model = Model( [decoder_inputs, decoder_input_c, decoder_input_encoded] + decoder_states_inputs, [decoder_res, decoder_o] + decoder_states) return model, encoder_model, decoder_model
Y_valid = Y_valid.drop(index=Y_valid.index) X_train = X_train.values X_valid = X_valid.values X_test = X_test.values Y_train = Y_train.values Y_valid = Y_valid.values optimizer = optimizers.adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-24, decay=0, amsgrad=False) kernel_initializer = initializers.uniform(minval=-0.05, maxval=0.05, seed=None) activation = 'relu' class RestoreBestWeightsFinal(keras.callbacks.Callback): def __init__(self, min_delta=0, mode='auto', baseline=None): super(RestoreBestWeightsFinal, self).__init__() self.min_delta = min_delta self.best_weights = None if mode not in ['auto', 'min', 'max']: mode = 'auto' if mode == 'min': self.monitor_op = np.less elif mode == 'max':
def model(self, upos_embed_dim=12, drel_embed_dim=16, use_morphology=True, hidden_units=256, hidden_layers=2, activation='relu', init='he_uniform', embed_init_max=0.5, embed_const='unit_norm', embed_dropout=0.25, hidden_const=None, hidden_dropout=0.25, output_const=None, optimizer='adamax'): """-> keras.models.Model""" assert hasattr(self, 'x') upos_embed_dim = int(upos_embed_dim) assert 0 <= upos_embed_dim drel_embed_dim = int(drel_embed_dim) assert 0 <= drel_embed_dim hidden_units = int(hidden_units) assert 0 < hidden_units or 0 == hidden_layers hidden_layers = int(hidden_layers) assert 0 <= hidden_layers embed_init_max = float(embed_init_max) assert 0 <= embed_init_max embed_dropout = float(embed_dropout) assert 0 <= embed_dropout < 1 hidden_dropout = float(hidden_dropout) assert 0 <= hidden_dropout < 1 # for coercing constraint def const(x): try: x = float(x) assert 0 < x x = max_norm(x) except (TypeError, ValueError): if isinstance(x, str) and "none" == x.lower(): x = None return x # conversion output_const = const(output_const) hidden_const = const(hidden_const) embed_const = const(embed_const) embed_init = uniform(minval=-embed_init_max, maxval=embed_init_max) # all possible inputs form = Input(name="form", shape=self.x["form"].shape[1:], dtype=np.uint16) lemm = Input(name="lemm", shape=self.x["lemm"].shape[1:], dtype=np.uint16) upos = Input(name="upos", shape=self.x["upos"].shape[1:], dtype=np.uint8) drel = Input(name="drel", shape=self.x["drel"].shape[1:], dtype=np.uint8) feat = Input(name="feat", shape=self.x["feat"].shape[1:], dtype=np.float32) # cons layers i = [upos, drel] upos = Flatten(name="upos_flat")(Embedding( input_dim=len(self.upos2idx), output_dim=upos_embed_dim, embeddings_initializer=embed_init, embeddings_constraint=embed_const, name="upos_embed")(upos)) drel = Flatten(name="drel_flat")(Embedding( input_dim=len(self.drel2idx), output_dim=drel_embed_dim, embeddings_initializer=embed_init, embeddings_constraint=embed_const, name="drel_embed")(drel)) o = [upos, drel] if self.form_emb is not None: i.append(form) form = Flatten(name="form_flat")(Embedding( input_dim=len(self.form2idx), output_dim=self.form_emb.shape[-1], weights=[self.form_emb], embeddings_constraint=embed_const, name="form_embed")(form)) o.append(form) if self.lemm_emb is not None: i.append(lemm) lemm = Flatten(name="lemm_flat")(Embedding( input_dim=len(self.lemm2idx), output_dim=self.lemm_emb.shape[-1], weights=[self.lemm_emb], embeddings_constraint=embed_const, name="lemm_embed")(lemm)) o.append(lemm) if embed_dropout: o = [ Dropout(name="{}_dropout".format(x.name.split("_")[0]), rate=embed_dropout)(x) for x in o ] if use_morphology: i.append(feat) o.append(feat) o = Concatenate(name="concat")(o) for hid in range(hidden_layers): o = Dense(units=hidden_units, activation=activation, kernel_initializer=init, kernel_constraint=hidden_const, name="hidden{}".format(1 + hid))(o) if hidden_dropout: o = Dropout(name="hidden{}_dropout".format(1 + hid), rate=hidden_dropout)(o) o = Dense(units=len(self.idx2tran), activation='softmax', kernel_initializer=init, kernel_constraint=output_const, name="output")(o) m = Model(i, o, name="darc") m.compile(optimizer, 'sparse_categorical_crossentropy') return m
def GengNet(input_shape, classes, n_dropout=0.5, n_l2=0.0005, n_init='glorot_normal'): if n_init == 'glorot_normal': kernel_init = initializers.glorot_normal(seed=0) elif n_init == 'glorot_uniform': kernel_init = initializers.glorot_uniform(seed=0) elif n_init == 'he_normal': kernel_init = initializers.he_normal(seed=0) elif n_init == 'he_uniform': kernel_init = initializers.he_uniform(seed=0) elif n_init == 'normal': kernel_init = initializers.normal(seed=0) elif n_init == 'uniform': kernel_init = initializers.uniform(seed=0) kernel_regl = regularizers.l2(n_l2) input_img = Input((input_shape)) x = BatchNormalization()(input_img) x = Conv2D(64, (3, 3), kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(64, (3, 3), kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = LocallyConnected2D(64, (1, 1), kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, padding='valid')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = LocallyConnected2D(64, (1, 1), kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, padding='valid')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(n_dropout)(x) x = Dense(512, kernel_regularizer=kernel_regl, kernel_initializer=kernel_init)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(n_dropout)(x) x = Dense(512, kernel_regularizer=kernel_regl, kernel_initializer=kernel_init)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(n_dropout)(x) x = Dense(128, kernel_regularizer=kernel_regl, kernel_initializer=kernel_init)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Flatten()(x) x = Dense(classes, kernel_regularizer=kernel_regl, kernel_initializer=kernel_init)(x) x = Activation('softmax')(x) model = Model(input_img, x, name='GengNet') return model
def uniform(scale): return initializers.uniform(minval=-scale, maxval=scale)
def AtzoriNet(input_shape, classes, n_pool='average', n_dropout=0., n_l2=0.0005, n_init='glorot_normal', batch_norm=False): """ Creates the Deep Neural Network architecture described in the paper of Manfredo Atzori: Deep Learning with Convolutional Neural Networks Applied to Electromyography Data: A Resource for the Classification of Movements for Prosthetic Hands https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5013051/ Arguments: input_shape -- tuple, dimensions of the input in the form (height, width, channels) classes -- integer, number of classes to be classified, defines the dimension of the softmax unit n_pool -- string, pool method to be used {'max', 'average'} n_dropout -- float, rate of dropping units n_l2 -- float, ampunt of weight decay regularization n_init -- string, type of kernel initializer {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform', 'normal', 'uniform'} batch_norm -- boolean, whether BatchNormalization is applied to the input Returns: model -- keras.models.Model (https://keras.io) """ if n_init == 'glorot_normal': kernel_init = initializers.glorot_normal(seed=0) elif n_init == 'glorot_uniform': kernel_init = initializers.glorot_uniform(seed=0) elif n_init == 'he_normal': kernel_init = initializers.he_normal(seed=0) elif n_init == 'he_uniform': kernel_init = initializers.he_uniform(seed=0) elif n_init == 'normal': kernel_init = initializers.normal(seed=0) elif n_init == 'uniform': kernel_init = initializers.uniform(seed=0) # kernel_init = n_init kernel_regl = regularizers.l2(n_l2) ## Block 0 [Input] X_input = Input(input_shape, name='b0_input') X = X_input if batch_norm: X = BatchNormalization()(X) ## Block 1 [Pad -> Conv -> ReLU -> Dropout] X = ZeroPadding2D((0, 4))(X) X = Conv2D(32, (1, 10), padding='valid', kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, name='b1_conv2d_32_1x10')(X) X = Activation('relu', name='b1_relu')(X) X = Dropout(n_dropout, name='b1_dropout')(X) ## Block 2 [Pad -> Conv -> ReLU -> -> Dropout -> Pool] X = ZeroPadding2D((1, 1))(X) X = Conv2D(32, (3, 3), padding='valid', kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, name='b2_conv2d_32_3x3')(X) X = Activation('relu', name='b2_relu')(X) X = Dropout(n_dropout, name='b2_dropout')(X) if n_pool == 'max': X = MaxPooling2D((3, 3), strides=(3, 3), name='b2_pool')(X) else: X = AveragePooling2D((3, 3), strides=(3, 3), name='b2_pool')(X) ## Block 3 [Pad -> Conv -> ReLU -> Dropout -> Pool] X = ZeroPadding2D((2, 2))(X) X = Conv2D(64, (5, 5), padding='valid', kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, name='b3_conv2d_64_5x5')(X) X = Activation('relu', name='b3_relu')(X) X = Dropout(n_dropout, name='b3_dropout')(X) if n_pool == 'max': X = MaxPooling2D((3, 3), strides=(3, 3), name='b3_pool')(X) else: X = AveragePooling2D((3, 3), strides=(3, 3), name='b3_pool')(X) ## Block 4 [Pad -> Conv -> ReLU -> Dropout] X = ZeroPadding2D((2, 0))(X) X = Conv2D(64, (5, 1), padding='valid', kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, name='b4_conv2d_64_5x1')(X) X = Activation('relu', name='b4_relu')(X) X = Dropout(n_dropout, name='b4_dropout')(X) ## Block 5 [Pad -> Conv -> Softmax] X = Conv2D(classes, (1, 1), padding='same', kernel_regularizer=kernel_regl, kernel_initializer=kernel_init, name='b5_conv2d_{}_1x1'.format(classes))(X) X = Activation('softmax', name='b5_soft')(X) X = Reshape((-1, ), name='b5_reshape')(X) model = Model(inputs=X_input, outputs=X, name='AtzoriNet') return model
from keras.optimizers import adam from keras.callbacks import ModelCheckpoint import tensorflow as tf # GPU and/or CPU identification from tensorflow.python.client import device_lib print(device_lib.list_local_devices()) # Initialising the CNN classifier = Sequential() # Step 1 - Convolution classifier.add(Conv2D(64, (4, 4), input_shape = (128, 128, 3), activation = 'relu', kernel_initializer=initializers.uniform(seed=42))) # Step 2 - Pooling (first Batch normalization) classifier.add(BatchNormalization()) classifier.add(MaxPooling2D(pool_size = (4, 4))) # Adding a second convolutional layer classifier.add(Conv2D(64, (4, 4), activation = 'relu', kernel_initializer=initializers.uniform(seed=42))) classifier.add(BatchNormalization()) classifier.add(MaxPooling2D(pool_size = (2, 2))) # Adding a third convolutional layer
from keras.layers import BatchNormalization from keras import initializers from keras.optimizers import adam from tensorflow.python.client import device_lib print(device_lib.list_local_devices()) # Initialising the CNN classifier = Sequential() classifier.add( Conv2D(64, (4, 4), input_shape=(128, 128, 3), activation='relu', kernel_initializer=initializers.uniform(seed=42))) classifier.add(BatchNormalization()) classifier.add(MaxPooling2D(pool_size=(4, 4))) classifier.add( Conv2D(64, (4, 4), activation='relu', kernel_initializer=initializers.uniform(seed=42))) classifier.add(BatchNormalization()) classifier.add(MaxPooling2D(pool_size=(2, 2))) classifier.add( Conv2D(32, (4, 4), activation='relu', kernel_initializer=initializers.uniform(seed=42))) classifier.add(BatchNormalization()) classifier.add(MaxPooling2D(pool_size=(2, 2)))
def create_model(self): len_norm = 2 # constraintの最大ノルム長 r_lambda = 0.00005 # regularizerのラムダ #*********************************************************************** # * # レイヤクラス生成 * # * #*********************************************************************** class_Dense = Layer_Dense(reg_lambda=r_lambda) class_LSTM = Layer_LSTM(reg_lambda=r_lambda) class_BatchNorm = Layer_BatchNorm(max_value=len_norm, reg_lambda=r_lambda) print('#3') #*********************************************************************** # * # エンコーダー(学習/応答文作成兼用) * # * #*********************************************************************** #--------------------------------------------------------- #レイヤー定義 #--------------------------------------------------------- embedding = Embedding( output_dim=self.vec_dim, input_dim=self.input_dim, mask_zero=True, name='Embedding', embeddings_initializer=uniform(seed=20170719), #embeddings_regularizer=regularizers.l2(r_lambda), ) input_mask = Masking(mask_value=0, name="input_Mask") encoder_BatchNorm \ = class_BatchNorm.create_BatchNorm(bn_name='encoder_BatchNorm') encoder_LSTM = class_LSTM.create_LSTM(self.n_hidden, lstm_return_state=True, lstm_name='encoder_LSTM') #--------------------------------------------------------- # 入力定義 #--------------------------------------------------------- encoder_input = Input(shape=(self.maxlen_e, ), dtype='int32', name='encorder_input') e_input = input_mask(encoder_input) e_input = embedding(e_input) e_input = encoder_BatchNorm(e_input) #--------------------------------------------------------- # メイン処理 #--------------------------------------------------------- encoder_outputs, \ encoder_state_h, \ encoder_state_c = encoder_LSTM(e_input) #--------------------------------------------------------- # エンコーダモデル定義 #--------------------------------------------------------- encoder_model = Model( inputs=encoder_input, outputs=[encoder_outputs, encoder_state_h, encoder_state_c]) print('#4') #*********************************************************************** # デコーダー(学習用) * # デコーダを、完全な出力シークエンスを返し、内部状態もまた返すように * # 設定します。 * # 訓練モデルではreturn_sequencesを使用しませんが、推論では使用します。 * #*********************************************************************** #======================================================================= #レイヤー定義 #======================================================================= #--------------------------------------------------------- # デコーダー入力Batch Normalization #--------------------------------------------------------- decoder_BatchNorm \ = class_BatchNorm.create_BatchNorm(bn_name='decoder_BatchNorm') #--------------------------------------------------------- # デコーダーLSTM #--------------------------------------------------------- decoder_LSTM = class_LSTM.create_LSTM(self.n_hidden, lstm_return_state=True, lstm_return_sequences=True, lstm_name='decode_LSTM') #--------------------------------------------------------- # 全結合 #--------------------------------------------------------- decoder_Dense = class_Dense.create_Dense(self.output_dim, dense_activation='softmax', dense_name='decoder_Dense') #======================================================================= # 関数定義 #======================================================================= #-------------------------------------------------------- # decoderメイン処理 #-------------------------------------------------------- def decoder_main(d_i, encoder_states): # LSTM d_outputs, \ decoder_state_h, \ decoder_state_c = decoder_LSTM(d_i, initial_state=encoder_states) # 全結合 decoder_outputs = decoder_Dense(d_outputs) return decoder_outputs, decoder_state_h, decoder_state_c #======================================================================= # 手続き部 #======================================================================= #--------------------------------------------------------- #入力定義 #--------------------------------------------------------- decoder_inputs = Input(shape=(self.maxlen_d, ), dtype='int32', name='decoder_inputs') d_i = Masking(mask_value=0)(decoder_inputs) d_i = embedding(d_i) d_i = decoder_BatchNorm(d_i) d_input = d_i # 応答文生成で使う #----------------------------------------------------- # decoder処理実行 #----------------------------------------------------- encoder_states = [encoder_state_h, encoder_state_c] decoder_outputs, _, _ = decoder_main(d_i, encoder_states) #======================================================================= # 損失関数、評価関数とモデル定義 #======================================================================= #--------------------------------------------------------- # 損失関数 #--------------------------------------------------------- mask = Lambda(lambda x: K.sign(x))(decoder_inputs) def cross_loss(y_true, y_pred): perp_mask = K.cast(mask, dtype='float32') sum_mask = K.sum(perp_mask, axis=-1, keepdims=True) #print('perp_mask1',K.int_shape(perp_mask)) epsilons = 1 / 2 * y_pred + K.epsilon() cliped = K.maximum(y_pred, epsilons) log_pred = -K.log(cliped) cross_e = y_true * log_pred cross_e = K.sum(cross_e, axis=-1) masked_entropy = perp_mask * cross_e sum_entropy = K.sum(masked_entropy, axis=-1, keepdims=True) celoss = sum_entropy / sum_mask celoss = K.repeat(celoss, self.maxlen_d) return celoss #--------------------------------------------------------- # perplexity #--------------------------------------------------------- def get_perplexity(y_true, y_pred): perp_mask = K.cast(mask, dtype='float32') sum_mask = K.sum(perp_mask, axis=-1, keepdims=True) epsilons = 1 / 2 * y_pred + K.epsilon() cliped = K.maximum(y_pred, epsilons) log_pred = -K.log(cliped) cross_e = y_true * log_pred cross_e = K.sum(cross_e, axis=-1) masked_entropy = perp_mask * cross_e sum_entropy = K.sum(masked_entropy, axis=-1, keepdims=True) perplexity = sum_entropy / sum_mask perplexity = K.exp(perplexity) perplexity = K.repeat(perplexity, self.maxlen_d) return perplexity #--------------------------------------------------------- # 評価関数 #--------------------------------------------------------- def get_accuracy(y_true, y_pred): y_pred_argmax = K.argmax(y_pred, axis=-1) y_true_argmax = K.argmax(y_true, axis=-1) n_correct = K.abs(y_true_argmax - y_pred_argmax) n_correct = K.sign(n_correct) n_correct = K.ones_like(n_correct, dtype='int64') - n_correct n_correct = K.cast(n_correct, dtype='int32') n_correct = n_correct * mask n_correct = K.cast(K.sum(n_correct, axis=-1, keepdims=True), dtype='float32') n_total = K.cast(K.sum(mask, axis=-1, keepdims=True), dtype='float32') accuracy = n_correct / n_total accuracy = K.repeat(accuracy, self.maxlen_d) #print('accuracy',K.int_shape(accuracy)) return accuracy #--------------------------------------------------------- # モデル定義、コンパイル #--------------------------------------------------------- model = Model(inputs=[encoder_input, decoder_inputs], outputs=decoder_outputs) model.compile(loss=cross_loss, optimizer="Adam", metrics=[get_perplexity, get_accuracy]) #*********************************************************************** # * # デコーダー(応答文作成) * # * #*********************************************************************** print('#6') #--------------------------------------------------------- #入力定義 #--------------------------------------------------------- decoder_input_state_h = Input(shape=(self.n_hidden, ), name='decoder_input_state_h') decoder_input_state_c = Input(shape=(self.n_hidden, ), name='decoder_input_state_c') #--------------------------------------------------------- # デコーダー実行 #--------------------------------------------------------- decoder_input_state = [decoder_input_state_h, decoder_input_state_c] res_decoder_outputs, \ res_decoder_state_h, \ res_decoder_state_c = decoder_main(d_input, decoder_input_state) print('#7') #--------------------------------------------------------- # モデル定義 #--------------------------------------------------------- decoder_model = Model(inputs=[ decoder_inputs, decoder_input_state_h, decoder_input_state_c ], outputs=[ res_decoder_outputs, res_decoder_state_h, res_decoder_state_c ]) return model, encoder_model, decoder_model