model.add(BatchNormalization()) model.add(LeakyReLU()) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(128)) model.add(BatchNormalization()) model.add(LeakyReLU()) model.add(Dense(num_classes)) model.add(BatchNormalization()) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer=optimizers.Nadam(), metrics=['accuracy']) history = model.fit(training_data, training_target, batch_size=batch_size, epochs=epochs, verbose=2, validation_data=(testing_data, testing_target)) score = model.evaluate(testing_data, testing_target, verbose=0) print('Test loss: ', score[0]) print('Test accuracy: ', score[1]) model.save('model_1') print(history.history.keys())
def own_model(train_forward_data, train_backward_data, train_sense_embedding, test_f, test_b, test_i, val_forward_data=None, val_backward_data=None, val_sense_embedding=None, n_units=100, dense_units=256, is_training=True, EMBEDDING_DIM=100, epochs=100, batch_size=2048, init_word_vecs=None, ): model = get_model(n_units=n_units, dense_unints=dense_units, is_training=is_training, emb_dim=EMBEDDING_DIM, init_word_vecs=init_word_vecs, max_sequence_length=40, word_to_id=word_to_id) # Switchable optimizers opti = optimizers.Nadam(clipnorm=1.) # , clipvalue=0.5 # opti = optimizers.SGD(lr=0.00001, momentum=0.1) # opti = optimizers.Adam(lr=0.00001) model.compile(loss='mse', optimizer=opti, metrics=[cos_distance, get_f1]) print(model.summary()) early_stopping = EarlyStopping(monitor='val_loss', patience=10) bst_model_path = "weights.best.hdf5" model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True, verbose=1) hist = model.fit([train_forward_data, train_backward_data], train_sense_embedding, validation_data=([val_forward_data, val_backward_data], val_sense_embedding), epochs=epochs, batch_size=batch_size, shuffle=True, callbacks=[early_stopping, model_checkpoint]) model.save('1_project_2_TT.h5') def get_embedded_sense(goal_key): for elem in train_data_: key = elem['target_sense'] if key in goal_key: return elem['id'] return -1 ''' Modified testing Code ''' # Uses the testing target sense id to get the actual embedding from target_sense_to_context_embedding # That actual embedding is then used to calculate the cosine distance between it and the predicted vector pred_a = model.predict([test_f, test_b]) cos_sim_total = 0 counter = 0 not_testable = 0 test_answers = get_test_ansers(23) for i in range(len(pred_a)): pred = pred_a[i] goal_id = test_i[i] idx = test_answers.index[test_answers['Targets'] == goal_id] # This is for the entries where the sense was either just 'U' or 'P' or both if len(idx) == 0: continue goal_key = test_answers.iloc[idx]['Senses'].to_numpy()[0] train_id_key = get_embedded_sense(goal_key) # This is in case the testing target sense is not in the training corpus if train_id_key == -1: not_testable += 1 continue goal_embedding = target_sense_to_context_embedding.get(train_id_key) cos_sim = (1 - spatial.distance.cosine(goal_embedding, pred)) cos_sim_total += cos_sim counter += 1 print("Average Testing Cos Sim:", (cos_sim_total / counter)) print("Number of untestable due to the lack of a comparable embedding:", not_testable)
return loss*1000 return focal_loss model = Sequential() input_dim = xtrain.shape[1] #nb_classes = y_train.shape[1] model = Sequential() model.add(Dense(input_dim=input_dim, units=1)) model.add(Activation('sigmoid')) from keras import optimizers opt = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004) model.compile(loss=binary_focal_loss(gamma=2,alpha=0.9), optimizer=opt,metrics=[f1]) history = model.fit(xtrain, ytrain, epochs=15, batch_size=500, verbose=1) # Training print("------------------Training performance--------------------------------------") print ("AUC Score (test): %f" % roc_auc_score(ytrain, y_predprob)) prediction = model.predict(xtrain) print("Precison is",precision_score(ytrain, prediction, average='binary')) print("Recall is",recall_score(ytrain, prediction , average='binary')) print("F1 score is", f1_score(ytrain, prediction , average='binary')) print("Accuracy is", model.score(xtrain, ytrain)) print(confusion_matrix(ytrain, prediction))
model.add(LSTM(units=256, return_sequences=True)) model.add(Dropout(rate=0.5)) model.add(TimeDistributed(Dense(1))) # set optimizers & callbacks adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-6, amsgrad=False) nadam = optimizers.Nadam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.001) model.compile( loss = "mean_squared_logarithmic_error", optimizer = nadam, metrics = ["mean_squared_logarithmic_error"]) checkpoint = ModelCheckpoint( filepath = model_saved, monitor = "val_loss", verbose = 0, save_best_only = True, mode = "min")
def create_network(self): state_input = Input(shape=(self.state_size, ), name='state_input') #define the network h = state_input for _ in range(self.params['num_layers']): h = Dense(self.params['layer_size'], activation='relu')(h) q_output = Dense(self.params['num_points'], name='latent_q')(h) action_input = Input(shape=(self.action_size, ), name='action_input') temp_li = [] a_li = [] for a_index in range(self.params['num_points']): h = state_input temp = Dense(self.action_size, activation='tanh', kernel_initializer=RandomUniform(minval=-.1, maxval=+.1, seed=None), bias_initializer=RandomUniform(minval=-1, maxval=+1, seed=None))(h) temp = Lambda(lambda x: x * self.env.action_space.high[0], name="action" + str(a_index))(temp) a_li.append(temp) layer = Lambda(func_L2) temp = layer([temp, action_input]) temp_li.append(temp) merged = Concatenate(axis=-1)(temp_li) merged = Lambda(lambda x: x * self.params['temperature'])(merged) softmax = Activation('softmax')(merged) final_q = dot([q_output, softmax], axes=1, normalize=False) model = Model(inputs=[state_input, action_input], outputs=final_q) if self.params['opt'] == 'adam': opt = optimizers.Adam(lr=self.params['learning_rate']) elif self.params['opt'] == 'nadam': opt = optimizers.Nadam(lr=self.params['learning_rate']) elif self.params['opt'] == 'rmsprop': opt = optimizers.RMSprop(lr=self.params['learning_rate']) model.compile(loss='mse', optimizer=opt) qRef_li = [] for j in range(self.params['num_points']): each_qRef = [] for i in range(self.params['num_points']): layer = Lambda(func_L2) each_qRef.append(layer([a_li[i], a_li[j]])) each_qRef = Concatenate(axis=-1)(each_qRef) each_qRef = Lambda(lambda x: x * self.params['temperature'])( each_qRef) each_qRef = Activation('softmax')(each_qRef) test_final_q = dot([q_output, each_qRef], axes=1, normalize=False) qRef_li.append(test_final_q) qRef_li = Model( inputs=state_input, outputs=[Concatenate(axis=1)(a_li), Concatenate(axis=-1)(qRef_li)]) return model, qRef_li
metrics = ['mean_absolute_error', 'mean_absolute_percentage_error'] lr = args.lrearning_rate epsilon = args.epsilon optimizer_selection = {'Adadelta' : optimizers.Adadelta( \ lr=lr, rho=0.95, epsilon=epsilon, decay=0.0), \ 'Adagrad' : optimizers.Adagrad( \ lr=lr, epsilon=epsilon, decay=0.0), \ 'Adam' : optimizers.Adam( \ lr=lr, beta_1=0.9, beta_2=0.999, \ epsilon=epsilon, decay=0.0, amsgrad=False), \ 'Adamax' : optimizers.Adamax( \ lr=lr, beta_1=0.9, beta_2=0.999, \ epsilon=epsilon, decay=0.0), \ 'Nadam' : optimizers.Nadam( \ lr=lr, beta_1=0.9, beta_2=0.999, \ epsilon=epsilon, schedule_decay=0.004), \ 'RMSprop' : optimizers.RMSprop( \ lr=lr, rho=0.9, epsilon=epsilon, decay=0.0), \ 'SGD' : optimizers.SGD( \ lr=lr, momentum=0.0, decay=0.0, nesterov=False)} optimizer = optimizer_selection[args.optimizer] model.compile(optimizer = optimizer, \ loss = loss_function, \ metrics = metrics) #%% # Save trained models for every epoch
def cnn_dropout_mnist(args): """ Main function """ # %% # IMPORTS # code repository sub-package imports from artificial_neural_networks.utils.download_mnist import download_mnist from artificial_neural_networks.utils.generic_utils import save_classif_model from artificial_neural_networks.utils.vis_utils import plot_confusion_matrix, epoch_plot # %% if args.verbose > 0: print(args) # For reproducibility if args.reproducible: os.environ['PYTHONHASHSEED'] = '0' np.random.seed(args.seed) rn.seed(args.seed) tf.set_random_seed(args.seed) sess = tf.Session(graph=tf.get_default_graph()) K.set_session(sess) # print(hash("keras")) # %% # Load the MNIST dataset mnist_path = download_mnist() mnist = np.load(mnist_path) train_x = mnist['x_train'].astype(np.float32) train_y = mnist['y_train'].astype(np.int32) test_x = mnist['x_test'].astype(np.float32) test_y = mnist['y_test'].astype(np.int32) mnist.close() # %% # PREPROCESSING STEP scaling_factor = args.scaling_factor translation = args.translation img_width = train_x.shape[1] img_height = train_x.shape[2] n_train = train_x.shape[0] # number of training examples/samples n_test = test_x.shape[0] # number of test examples/samples n_in = img_width * img_height # number of features / dimensions n_out = np.unique(train_y).shape[0] # number of classes/labels # Reshape training and test sets train_x = train_x.reshape(n_train, img_width, img_height, 1) test_x = test_x.reshape(n_test, img_width, img_height, 1) # Apply preprocessing train_x = scaling_factor * (train_x - translation) test_x = scaling_factor * (test_x - translation) one_hot = False # It works exactly the same for both True and False # Convert class vectors to binary class matrices (i.e. One hot encoding) if one_hot: train_y = to_categorical(train_y, n_out) test_y = to_categorical(test_y, n_out) # %% # Model hyperparameters and ANN Architecture N = [] N.append(n_in) # input layer if args.same_size: n_layers = args.n_layers for i in range(n_layers): N.append(args.layer_size) # hidden layer i else: n_layers = len(args.explicit_layer_sizes) for i in range(n_layers): N.append(args.explicit_layer_sizes[i]) # hidden layer i N.append(n_out) # output layer # ANN Architecture L = len(N) - 1 x = Input(shape=(img_width, img_height, 1)) # input layer h = Dropout(rate=args.dropout_rate_input)(x) h = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(h) h = MaxPooling2D(pool_size=(2, 2))(h) h = Dropout(rate=args.dropout_rate_conv)(h) h = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(h) h = MaxPooling2D(pool_size=(2, 2))(h) h = Dropout(rate=args.dropout_rate_conv)(h) h = Flatten()(h) for i in range(1, L): h = Dense(units=N[i], activation='relu')(h) # hidden layer i h = Dropout(rate=args.dropout_rate_hidden)(h) out = Dense(units=n_out, activation='softmax')(h) # output layer model = Model(inputs=x, outputs=out) if args.verbose > 0: model.summary() if one_hot: loss_function = 'categorical_crossentropy' else: loss_function = 'sparse_categorical_crossentropy' metrics = ['accuracy'] lr = args.lrearning_rate epsilon = args.epsilon optimizer_selection = { 'Adadelta': optimizers.Adadelta(lr=lr, rho=0.95, epsilon=epsilon, decay=0.0), 'Adagrad': optimizers.Adagrad(lr=lr, epsilon=epsilon, decay=0.0), 'Adam': optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=epsilon, decay=0.0, amsgrad=False), 'Adamax': optimizers.Adamax(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=epsilon, decay=0.0), 'Nadam': optimizers.Nadam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=epsilon, schedule_decay=0.004), 'RMSprop': optimizers.RMSprop(lr=lr, rho=0.9, epsilon=epsilon, decay=0.0), 'SGD': optimizers.SGD(lr=lr, momentum=0.0, decay=0.0, nesterov=False) } optimizer = optimizer_selection[args.optimizer] model.compile(optimizer=optimizer, loss=loss_function, metrics=metrics) # %% # Save trained models for every epoch models_path = r'artificial_neural_networks/trained_models/' model_name = 'mnist_cnn_dropout' weights_path = models_path + model_name + '_weights' model_path = models_path + model_name + '_model' file_suffix = '_{epoch:04d}_{val_acc:.4f}_{val_loss:.4f}' if args.save_weights_only: file_path = weights_path else: file_path = model_path file_path += file_suffix # monitor = 'val_loss' monitor = 'val_acc' if args.save_models: checkpoint = ModelCheckpoint(file_path + '.h5', monitor=monitor, verbose=args.verbose, save_best_only=args.save_best_only, mode='auto', save_weights_only=args.save_weights_only) callbacks = [checkpoint] else: callbacks = [] # %% # TRAINING PHASE if args.time_training: start = timer() model_history = model.fit(x=train_x, y=train_y, validation_data=(test_x, test_y), batch_size=args.batch_size, epochs=args.n_epochs, verbose=args.verbose, callbacks=callbacks) if args.time_training: end = timer() duration = end - start print('Total time for training (in seconds):') print(duration) # %% # TESTING PHASE train_y_pred = np.argmax(model.predict(train_x), axis=1) test_y_pred = np.argmax(model.predict(test_x), axis=1) train_score = model.evaluate(x=train_x, y=train_y, verbose=args.verbose) train_dict = {'loss': train_score[0], 'acc': train_score[1]} test_score = model.evaluate(x=test_x, y=test_y, verbose=args.verbose) test_dict = {'val_loss': test_score[0], 'val_acc': test_score[1]} if args.verbose > 0: print('Train loss:', train_dict['loss']) print('Train accuracy:', train_dict['acc']) print('Test loss:', test_dict['val_loss']) print('Test accuracy:', test_dict['val_acc']) # %% # Data Visualization if args.plot: # Confusion matrices classes = list(range(n_out)) train_cm = confusion_matrix(train_y, train_y_pred) plot_confusion_matrix(train_cm, classes=classes, title='Confusion matrix for training set') test_cm = confusion_matrix(test_y, test_y_pred) plot_confusion_matrix(test_cm, classes=classes, title='Confusion matrix for test set') # Loss vs epoch epoch_axis = range(1, args.n_epochs + 1) train_loss = model_history.history['loss'] test_loss = model_history.history['val_loss'] epoch_plot(epoch_axis, train_loss, test_loss, 'Loss') # Accuracy vs epoch train_acc = model_history.history['acc'] test_acc = model_history.history['val_acc'] epoch_plot(epoch_axis, train_acc, test_acc, 'Accuracy') # %% # Save the architecture and the lastly trained model save_classif_model(model, models_path, model_name, weights_path, model_path, file_suffix, test_dict, args) # %% return model
decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=seq2seq_encoder_out) x = BatchNormalization(name='Decoder-Batchnorm-2')(decoder_gru_output) # Dense layer for prediction decoder_dense = Dense(num_decoder_tokens, activation='softmax', name='Final-Output-Dense') decoder_outputs = decoder_dense(x) ######################## #### Seq2Seq Model #### #seq2seq_decoder_out = decoder_model([decoder_inputs, seq2seq_encoder_out]) seq2seq_Model = Model([encoder_inputs, decoder_inputs], decoder_outputs) seq2seq_Model.compile(optimizer=optimizers.Nadam(lr=0.001), loss='sparse_categorical_crossentropy') seq2seq_Model.summary() ######################## #### Train the Model #### from keras.callbacks import CSVLogger, ModelCheckpoint #setup callbacks for model logging script_name_base = 'keras_seq2seq' csv_logger = CSVLogger('{:}.log'.format(script_name_base)) model_checkpoint = ModelCheckpoint( '{:}.epoch{{epoch:02d}}-val{{val_loss:.5f}}.hdf5'.format(script_name_base), save_best_only=True)
model.add(Dense(units=512, activation="relu")) model.add(Dense(units=2, activation="softmax")) #compilation with adam optimizier #and cross entropy loss from keras.optimizers import Adam #visualize the model model.summary() model_final = model #COMPILE kwargs = {'decay', 'lr'} opt = optimizers.Nadam(learning_rate=0.00020, beta_1=0.9, beta_2=0.899) model_final.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['accuracy']) model_final.summary() from keras.callbacks import ModelCheckpoint, EarlyStopping #save the model with the best validation accuracy checkpoint = ModelCheckpoint("weights.h5", monitor='val_accuracy', verbose=1, save_best_only=False, save_weights_only=False,
input_profile=input_profile) answer = TimeDistributed(Dense(dim_wordvec))(answer) # the original paper uses a matrix multiplication for this reduction step. # we choose to use a RNN instead. # TODO: provide options for this preditction step [lstm, cnn, dense] answer = LSTM(dim_lstm)(answer) # (samples, 32) # one regularization layer -- more would probably be needed. answer = Dropout(dropout)(answer) answer = Dense(dim_output)(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary answer = Activation('softmax')(answer) # build the final model nadam = optimizers.Nadam(lr=lr) if profile: model = Model([input_sequence, question, input_profile], answer) else: model = Model([input_sequence, question], answer) model.compile(optimizer=nadam, loss='categorical_crossentropy', metrics=['accuracy']) # model.summary() logger.debug('...Compile done.') return model # # train # model.fit([inputs_train, queries_train], answers_train, # batch_size=32, # epochs=120,
X, Y, ds = getnewdata(iids[2], config) Xt, Yt, ds = getnewdata( iids[randint(config.getTestIndex()[0], config.getTestIndex()[1])], config) model = Sequential() s2s = SimpleSeq2Seq(batch_input_shape=(1, X.shape[1], X.shape[2]), hidden_dim=1, output_length=config.getWindows()[1], output_dim=1) model.add(s2s) model.add(Dense(40, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(2999, activation='softmax')) opt = optimizers.Nadam() model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy']) for rounds in range(0, config.getEpochs()): for index in range(config.getTrainIndex()[0], config.getTrainIndex()[1]): print "Index ", index, " Round ", rounds X, Y, ds = getnewdata(iids[index], config) r = randint(config.getTestIndex()[0], config.getTestIndex()[1]) print "random", r Xt, Yt, ds = getnewdata(iids[r], config) model.fit(X, Y, initial_epoch=rounds, epochs=rounds + 1,
# create a Keras model for a multilayer perceptron with one hidden layer (multiclass classification so last layer is softmax activation) model = Sequential() model.add( Dense(hidden_nodes, activation='relu', input_shape=(input_nodes, ), use_bias=False)) model.add( Dense(hidden_nodes, activation='relu', input_shape=(hidden_nodes, ), use_bias=False)) model.add(Dense(output_nodes, activation='softmax', bias=False)) model.summary() opt = optimizers.Nadam(lr=learning_rate) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) # setup callbacks callbacks = [ EarlyStopping(monitor='val_loss', patience=patience, verbose=1), ModelCheckpoint(model_name, monitor='val_loss', save_best_only=True, verbose=1), ReduceLROnPlateau(monitor='val_loss', factor=lr_update_factor, patience=lr_patience, verbose=1,
#### Encoder Model #### encoder_inputs = Input(shape=(doc_length, ), name='Encoder-Input') enc_out = encoder_model(encoder_inputs) # first dense layer with batch norm x = Dense(500, activation='relu')(enc_out) x = BatchNormalization(name='bn-1')(x) out = Dense(500)(x) code2emb_model = Model([encoder_inputs], out) print(code2emb_model.summary()) print("Starting the training") from keras.callbacks import CSVLogger, ModelCheckpoint from keras import optimizers code2emb_model.compile(optimizer=optimizers.Nadam(lr=0.002), loss='cosine_proximity') script_name_base = 'code2emb_model_' csv_logger = CSVLogger('{:}.log'.format(script_name_base)) model_checkpoint = ModelCheckpoint( '{:}.epoch{{epoch:02d}}-val{{val_loss:.5f}}.hdf5'.format(script_name_base), save_best_only=True) batch_size = 20000 epochs = 15 history = code2emb_model.fit([encoder_input_data], fastailm_emb, batch_size=batch_size, epochs=epochs, validation_split=0.12, callbacks=[csv_logger, model_checkpoint])
return model try: train_bow = np.array(train_bow.toarray()) train_bow = train_bow.reshape(train_bow.shape[0], train_bow.shape[1], 1) test_bow = np.array(test_bow.toarray()) test_bow = test_bow.reshape(test_bow.shape[0], test_bow.shape[1], 1) except: pass train_bow.shape adm = optimizers.Adam(lr=1e-3, decay=1e-4) sgd = optimizers.SGD(lr=1e-3, nesterov=True, momentum=0.7, decay=1e-4) Nadam = optimizers.Nadam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model = baseline_cnn_model(train_bow, 5, 'cla', adm) y_train_final = to_categorical(Y_train) y_test_final = to_categorical(Y_test) num_epochs = 25 for epoch in range(num_epochs): print(epoch + 1, '/', num_epochs) model1 = model.fit(train_bow, Y_train, batch_size=128, epochs=1, verbose=1, validation_split=0.3)
def slm(self, data): """ Returns Sequence Labeling Model. """ seq = Input(shape=(None, ), name='INPUT') emb = Embedding(VOCAB_SIZE, EXTVEC_DIM, weights=[data.embedding], mask_zero=True, trainable=False, name='WE')(seq) input_node = [seq] if args.use_flair: flair = Input(shape=(None, FLAIR_DIM), name='FLAIR') emb = concatenate([emb, flair], axis=-1, name='EMB_FLAIR') input_node.append(flair) if args.char_emb != None: char_embedding = [] for _ in range(CHAR_SIZE): scale = math.sqrt(3.0 / CHAR_DIM) char_embedding.append( np.random.uniform(-scale, scale, CHAR_DIM)) char_embedding[0] = np.zeros(CHAR_DIM) char_embedding = np.asarray(char_embedding) char_seq = Input(shape=(None, None), name='CHAR_INPUT') char_emb = TimeDistributed(Embedding(CHAR_SIZE, CHAR_DIM, weights=[char_embedding], mask_zero=True, trainable=True), name='CHAR_EMB')(char_seq) if args.char_emb == 'lstm': char_emb = TimeDistributed(Bidirectional( LSTM(CHAR_LSTM_SIZE, kernel_initializer=self.kernel_initializer, recurrent_initializer=self.recurrent_initializer, implementation=2, return_sequences=False)), name="CHAR_BiLSTM")(char_emb) if args.char_emb == 'cnn': char_emb = TimeDistributed(MaskConv1D( filters=NUM_CHAR_CNN_FILTER, kernel_size=CHAR_CNN_KERNEL_SIZE, padding='same', kernel_initializer=self.kernel_initializer), name="CHAR_CNN")(char_emb) char_emb = TimeDistributed(Lambda(lambda x: K.max(x, axis=1)), name="MAX_POOLING")(char_emb) input_node.append(char_seq) emb = concatenate([emb, char_emb], axis=-1, name='EMB_CHAR') if args.backbone == 'lstm': dec = Bidirectional(LSTM( args.lstm_size, kernel_initializer=self.kernel_initializer, recurrent_initializer=self.recurrent_initializer, dropout=args.dropout_rate, recurrent_dropout=args.dropout_rate, implementation=2, return_sequences=True), merge_mode='concat', name='BiLSTM-1')(emb) ''' enc_bilstm = Bidirectional(LSTM(args.lstm_size, kernel_initializer=self.kernel_initializer, recurrent_initializer=self.recurrent_initializer, dropout=args.dropout_rate, recurrent_dropout=args.dropout_rate, implementation=2, return_sequences=True), merge_mode='concat', name='BiLSTM-1')(emb) dec = Bidirectional(LSTM(args.lstm_size, kernel_initializer=self.kernel_initializer, recurrent_initializer=self.recurrent_initializer, dropout=args.dropout_rate, recurrent_dropout=args.dropout_rate, implementation=2, return_sequences=True), merge_mode='concat', name='BiLSTM-2')(enc_bilstm) ''' if args.use_att: mhsa = MultiHeadSelfAttention( head_num=args.nb_head, size_per_head=args.size_per_head, kernel_initializer=self.kernel_initializer, name='MHSA')(dec) dec = concatenate([dec, mhsa], axis=-1, name='CONTEXT') if args.backbone == 'cnn': conv_1 = self.conv_block(emb, dilation_rate=DILATION_RATE[0], name='1') conv_2 = self.conv_block(conv_1, dilation_rate=DILATION_RATE[1], name='2') conv_3 = self.conv_block(conv_2, dilation_rate=DILATION_RATE[2], name='3') dec = self.conv_block(conv_3, dilation_rate=DILATION_RATE[-1], use_dropout=False, name='4') if args.classifier == 'softmax': output = TimeDistributed(Dense( NUM_CLASS, activation='softmax', kernel_initializer=self.kernel_initializer), name='DENSE')(dec) loss_func = 'sparse_categorical_crossentropy' if args.classifier == 'crf': dense = TimeDistributed(Dense( NUM_CLASS, activation=None, kernel_initializer=self.kernel_initializer), name='DENSE')(dec) crf = ChainCRF(init=self.kernel_initializer, name='CRF') output = crf(dense) loss_func = crf.sparse_loss optimizer = optimizers.Nadam(lr=self.lr, clipnorm=args.clip_norm) model = Model(inputs=input_node, outputs=output) model.compile(loss=loss_func, optimizer=optimizer) return model
merge_train_data = merge_train_data.values merge_val_data = merge_val_data.values #Model Definition model=models.Sequential() model.add(layers.Dense(64,kernel_regularizer=regularizers.l2(0.001),activation='relu',input_shape=(14,))) model.add(layers.Dropout(0.5)) model.add(layers.Dense(64,kernel_regularizer=regularizers.l2(0.001),activation='relu')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(1,activation='sigmoid')) #Compiling the model and configuring optimizer,loss and metrics model.compile(optimizer=optimizers.Nadam(),loss='binary_crossentropy',metrics=[metrics.binary_accuracy]) #Training the model history=model.fit(merge_train_data,merge_train_labels,epochs=30,batch_size=512,validation_data=(merge_val_data,merge_val_labels))