def __init__( self, input_shape, number_of_classes, filtres=16, tailleBlock={ 'A': 10, 'B': 3, 'C': 3 }, optimiseur='Nadam', activation='elu', beta=1.1, initializer='he_normal', metrics=['accuracy'], learningR=None, #0.0005, nb_gpu=2): get_custom_objects()['swish'] = swish get_custom_objects()['e_swish'] = e_swish self.input_shape = input_shape self.number_of_classes = number_of_classes self.filtres = filtres self.tailleBlock = tailleBlock #if learningR is not None : self.optimiseur = optimiseur if learningR is not None: self.optimiseur = { 'SGD': SGD(learning_rate=learningR), 'RMSprop': RMSprop(learning_rate=learningR), 'Adagrad': Adagrad(learning_rate=learningR), 'Adadelta': Adadelta(learning_rate=learningR), 'Adam': Adam(learning_rate=learningR), 'Adamax': Adamax(learning_rate=learningR), 'Nadam': Nadam(learning_rate=learningR), }[optimiseur] else: self.optimiseur = { 'SGD': SGD(), 'RMSprop': RMSprop(), 'Adagrad': Adagrad(), 'Adadelta': Adadelta(), 'Adam': Adam(), 'Adamax': Adamax(), 'Nadam': Nadam(), }[optimiseur] self.activation = activation self.initializer = initializer self.nb_gpu = nb_gpu self.metrics = metrics # la valeur 3 indique que les canaux des couleurs sont à la fin # autrement -1 (je n'utilise pas cette syntaxe ) self.channel_axis = 3
def init_model(self): self.model = Sequential() self.model.add( Dense(self.hidden_units, input_dim=self.input_units, activation=self.activation)) self.model.add( Dropout(self.dropout, noise_shape=self.noise_shape, seed=self.seed)) self.model.add( Dense(self.output_units, activation=self.activation_last)) if self.optimizer == 'RMSprop': opt = RMSprop(learning_rate=self.learning_rate) elif self.optimizer == 'Adadelta': opt = kAdadelta(learning_rate=self.learning_rate) elif self.optimizer == 'SGD': opt = SGD(learning_rate=self.learning_rate) elif self.optimizer == 'Adagrad': opt = Adagrad(learning_rate=self.learning_rate) elif self.optimizer == 'Adamax': opt = Adamax(learning_rate=self.learning_rate) elif self.optimizer == 'Nadam': opt = Nadam(learning_rate=self.learning_rate) else: opt = Adam(learning_rate=self.learning_rate) self.model.compile(optimizer=opt, loss=self.loss, metrics=[self.metrics])
def set_optimizer(self, optimizer_name, lr): """Select the optimizer Parameters ------ optimizer_name: name of the optimizer, either adam, sgd, rmsprop, adagrad, adadelta lr: fload learning rate Raises ------ Exception """ if optimizer_name == 'adam': optimizer = Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) elif optimizer_name == 'sgd': optimizer = SGD(lr=lr, momentum=0.0, decay=0.0, nesterov=False) elif optimizer_name == 'rmsprop': optimizer = RMSprop(lr=lr, rho=0.9, epsilon=None, decay=0.0) elif optimizer_name == 'adagrad': optimizer = Adagrad(lr=lr, epsilon=None, decay=0.0) elif optimizer_name == 'adadelta': optimizer = Adadelta(lr=lr, rho=0.95, epsilon=None, decay=0.0) else: raise Exception('Optimizer unknown') return optimizer
def create_conv_NN(self): self.model = Sequential() nn_struct = self.nn_struct num_layers = len(nn_struct) self.model.add(Dense(nn_struct[0], input_dim=self.input_dim)) self.model.add( Reshape((int(nn_struct[0] / 2), 2), input_shape=(nn_struct[0], ))) self.model.add( Conv1D(filters=25, kernel_size=10, strides=1, input_shape=(int(self.input_dim / 4), 4), activation=self.activation_function)) self.model.add(Flatten()) self.model.add(Dense(nn_struct[1], activation=self.activation_function)) self.model.add(Dense(self.output_dim, activation='softmax')) if self.optimizer == 'sgd': optimizer = SGD(learning_rate=self.lr, momentum=0.0, nesterov=False) elif self.optimizer == 'Adam': optimizer = Adam(learning_rate=self.lr) elif self.optimizer == 'RMSprop': optimizer = RMSprop(learning_rate=self.lr) elif self.optimizer == 'Adagrad': optimizer = Adagrad(learning_rate=self.lr) #self.init_weights() #categorical_crossentropy self.model.compile(loss='categorical_crossentropy', optimizer=self.optimizer) print(self.model.summary()) return self.model
def compiled_model(INPUT_SHAPE: list, QNT_CLASS: int) -> tf.keras.Model: """ A função retorna o modelo compilado. Return a compiled model. """ INPUT_SHAPE = tuple(INPUT_SHAPE) base_model = MobileNetV2(include_top=False, weights='imagenet', input_tensor=Input(shape=INPUT_SHAPE, name='inputs')) for layer in base_model.layers: layer.trainable = False mod = base_model.output mod = AveragePooling2D()(mod) mod = Flatten()(mod) mod = Dropout(0.5)(mod) mod = Dense(QNT_CLASS, activation='softmax')(mod) mod_retorno = Model(inputs=base_model.input, outputs=mod) mod_retorno.compile( loss=CategoricalCrossentropy(), optimizer=Adagrad(), metrics=[Accuracy(), Precision(), AUC(), FalseNegatives()]) return mod_retorno
def sfd_model(optimizer, learning_rate): ''' nvidia self driving car inspired architecture. ''' if optimizer == 'adagrad': optimizer = Adagrad(lr=learning_rate) elif optimizer == 'sgd': optimizer = SGD(lr=learning_rate) elif optimizer == 'rmsprop': optimizer = RMSprop(lr=learning_rate) else: optimizer = Adam(lr=learning_rate) model = Sequential() model.add(Conv2D(24, 5, 2, input_shape=(66, 200, 3), activation='elu')) model.add(Conv2D(36, 5, 2, activation='elu')) model.add(Conv2D(48, 5, 2, activation='elu')) model.add(Conv2D(64, 3, activation='elu')) model.add(Conv2D(64, 3, activation='elu')) # model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(128, activation='elu')) # model.add(Dropout(0.5)) model.add(Dense(64, activation='elu')) model.add(Dropout(0.5)) model.add(Dense(16, activation='elu')) model.add(Dropout(0.5)) model.add(Dense(1)) model.compile(loss='mse', optimizer=optimizer, metrics=metrics.Accuracy(name='Accuracy')) return model
def create_NN(self): self.model = Sequential() nn_struct = self.nn_struct num_layers = len(nn_struct) self.model.add(Dense(nn_struct[0], input_dim=self.input_dim)) for i in range(1, num_layers): self.model.add( Dense(nn_struct[i], activation=self.activation_function)) self.model.add(Dense(self.output_dim, activation='softmax')) if self.optimizer == 'sgd': optimizer = SGD(learning_rate=self.lr, momentum=0.0, nesterov=False) elif self.optimizer == 'Adam': optimizer = Adam(learning_rate=self.lr) elif self.optimizer == 'RMSprop': optimizer = RMSprop(learning_rate=self.lr) elif self.optimizer == 'Adagrad': optimizer = Adagrad(learning_rate=self.lr) # #categorical_crossentropy self.model.compile(loss='mse', optimizer=self.optimizer) self.init_weights() print(self.model.summary()) return self.model
def main(): args = parse_args() with open(args.config_file) as fp: config = yaml.safe_load(fp) class_weight = config["class_weight"] batch_size = config["batch_size"] n_epochs = config["n_epochs"] seq_length = config["seq_length"] audio_path = args.audio_path csv_path = args.csv_path x_train, x_val = make_datasets(audio_path, csv_path, seq_length, 0) t_train, t_val = make_datasets(audio_path, csv_path, seq_length, 1) model = build_model(seq_length) model.compile(loss='binary_crossentropy', optimizer=Adagrad(), metrics=['accuracy', precision, recall]) history = model.fit(x_train, t_train, batch_size=batch_size, epochs=n_epochs, verbose=1, validation_data=(x_val, t_val), class_weight=class_weight) print(model.summary())
def __init__(self): self.midi_notes = 78 self.midi_ticks = 881 self.midi_shape = (self.midi_ticks, self.midi_notes, 2) self.latent_dim = 100 #optimizer = Adam(0.0002, 0.5) optimizer = Adagrad() # Build and compile the discriminator self.discriminator = self.build_discriminator() self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) # Build the generator self.generator = self.build_generator() # The generator takes noise as input and generates midis z = Input(shape=(self.latent_dim, )) result = self.generator(z) # For the combined model we will only train the generator self.discriminator.trainable = False # The discriminator takes generated images as input and determines validity validity = self.discriminator(result) # The combined model (stacked generator and discriminator) # Trains the generator to fool the discriminator self.combined = Model(z, validity) self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)
def get_optimizer(name, lr): if name == 'sgd': return SGD(lr=lr) elif name == 'adagrad': return Adagrad(lr=lr) elif name == 'rmsprop': return RMSprop(lr=lr) else: raise NotImplementedError
def create_facenet_nn2(image_size, channels, alpha, lr): inp = Input((*image_size, channels), name='input') out = conv(64, (7, 7), 2, name='conv1')(inp) out = max_pooling((3, 3), 2, name='pool1')(out) out = LRN(name='lrn1')(out) out = conv(64, (1, 1), 1, name='inception2' + '/3x3_reduce')(out) out = conv(192, (3, 3), 1, name='inception2' + '/3x3')(out) out = LRN(name='lrn2')(out) out = max_pooling((3, 3), 2, name='pool2')(out) out = inception_module(out, (64, 96, 128, 16, 32, 32), pooling='max', name='inception_3a') out = inception_module(out, (64, 96, 128, 32, 64, 64), pooling='l2', name='inception_3b') out = inception_module_partial(out, (128, 256, 32, 64), name='inception_3c') out = inception_module(out, (256, 96, 192, 32, 64, 128), pooling='l2', name='inception_4a') out = inception_module(out, (224, 112, 224, 32, 64, 128), pooling='l2', name='inception_4b') out = inception_module(out, (192, 128, 256, 32, 64, 128), pooling='l2', name='inception_4c') out = inception_module(out, (160, 144, 288, 32, 64, 128), pooling='l2', name='inception_4d') out = inception_module_partial(out, (160, 256, 64, 128), name='inception_4e') out = inception_module(out, (384, 192, 384, 48, 128, 128), pooling='max', name='inception_5a') out = inception_module(out, (384, 192, 384, 48, 128, 128), pooling='l2', name='inception_5b') out = GlobalAveragePooling2D(name='avg_pool')(out) out = Dropout(0.4)(out) out = dense(128, name='fc')(out) out = L2Normalize(name='embeddings')(out) facenet = Model(inp, out, name='FaceNet_NN2') facenet.summary() triplet_loss = TripletSemiHardLoss(alpha) sgd_opt = Adagrad(lr) facenet.compile(sgd_opt, triplet_loss) return facenet
def setup_to_fine_tune(model, base_model): GAP_LAYER = 17 # max_pooling_2d_2 for layer in base_model.layers[:GAP_LAYER + 1]: layer.trainable = False for layer in base_model.layers[GAP_LAYER + 1:]: layer.trainable = True model.compile(optimizer=Adagrad(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
def get_optimizer(name, lr, epochs): ''' Returns Optimizer object based on the name of optimizer ''' if name == 'Adam': return Adam(lr=lr, decay=lr / epochs) elif name == 'Adagrad': return Adagrad(lr=lr, decay=lr / epochs) elif name == 'SGD': return SGD(lr=lr, momentum=0.9, decay=lr / epochs)
def build_model(): model = tf.keras.models.Sequential([ tf.keras.layers.Conv2D(128, (3, 3), padding="same", input_shape=(48, 48, 1), kernel_regularizer=l2(0.01)), #48 tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.Conv2D(64, (3, 3), padding="same", kernel_regularizer=l2(0.01)), #48 tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.MaxPooling2D(2, 2), #24 tf.keras.layers.Conv2D(64, (3, 3), padding="same", kernel_regularizer=l2(0.01)), #24 tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.MaxPooling2D(2, 2), #12 tf.keras.layers.Conv2D(128, (3, 3), padding="same", kernel_regularizer=l2(0.01)), #12 tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.Conv2D(128, (3, 3), kernel_regularizer=l2(0.01)), #10 tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.Conv2D(128, (3, 3), kernel_regularizer=l2(0.01)), #8 tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.MaxPooling2D(2, 2), #4 tf.keras.layers.Conv2D(512, (3, 3), padding="same", kernel_regularizer=l2(0.01)), #4 tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.Conv2D(512, (3, 3), kernel_regularizer=l2(0.01)), #2 tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.Conv2D(1024, (1, 1), kernel_regularizer=l2(0.01)), #2 tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation('relu'), tf.keras.layers.MaxPooling2D(2, 2), #1 tf.keras.layers.Flatten(), tf.keras.layers.Dense(512, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(3, activation=tf.nn.softmax) ]) model.compile(optimizer=Adagrad(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy']) return model
def create_implExModel(num_nodes, num_edges, embed_size=50, n3_reg=1e-3, learning_rate=5e-1, num_negs=50, alpha=1., beta=1.): # Build complEx Model sub_inputs = Input(shape=(), name='subject') obj_inputs = Input(shape=(), name='object') rel_inputs = Input(shape=(), name='relation') cnt_inputs = Input(shape=(), name='count') y_true_inputs = Input(shape=(), name='label') inputs = { "subject": sub_inputs, "object": obj_inputs, "relation": rel_inputs, "count": cnt_inputs, "label": y_true_inputs } node_layer = Embedding(input_dim=num_nodes, output_dim=embed_size, embeddings_initializer=GlorotUniform(), name='node_embedding') edge_layer = Embedding(input_dim=num_edges, output_dim=embed_size, embeddings_initializer=GlorotUniform(), name='edge_embedding') sub_embed = node_layer(sub_inputs) rel_embed = edge_layer(rel_inputs) obj_embed = node_layer(obj_inputs) outputs = ComplExDotScore(n3_reg)([sub_embed, rel_embed, obj_embed]) model = Model(inputs, outputs, name='implEx') # Compile implEx Model wbce_loss = tf.nn.weighted_cross_entropy_with_logits( y_true_inputs, outputs, num_negs) / num_negs confidence = 1 + alpha * tf.math.log(1 + cnt_inputs / beta) loss = K.sum(confidence * wbce_loss) model.add_loss(loss) model.add_metric(K.mean(wbce_loss), 'weighted_binarycrossentropy') model.compile(optimizer=Adagrad(learning_rate)) return model
def test_kwargs(): n = 100 d = 2 x, y, theta = glmdisc.Glmdisc.generate_data(n, d) model = glmdisc.Glmdisc(algorithm="NN", validation=False, test=False) model.fit(predictors_cont=x, predictors_qual=None, labels=y, plot=True, optim=Adagrad(), callbacks=EarlyStopping()) assert isinstance(model.model_nn.optimizer, tensorflow.python.keras.optimizer_v2.adam.Adam) assert isinstance(model.callbacks[-1], tensorflow.python.keras.callbacks.EarlyStopping)
def get_optimizer(optim="adam", learning_rate=1e-3): if optim == "adam": return Adam(learning_rate=learning_rate) elif optim == "adagrad": return Adagrad(learning_rate=learning_rate) elif optim == "sgd": return SGD(learning_rate=learning_rate) elif optim == "rmsprop": return RMSprop(learning_rate=learning_rate) elif optim == "adadelta": return Adadelta(learning_rate=learning_rate) else: logger.error(f"Invalid optim {optim}") os._exit(0)
def compile_model(self, model_params): self.model_params = model_params if model_params['optimizer'] == 'adam': optimizer = Adam(learning_rate=model_params['lr']) elif model_params['optimizer'] == 'adagrad': optimizer = Adagrad(learning_rate=model_params['lr']) elif model_params['optimizer'] == 'rmsprop': optimizer = RMSprop(learning_rate=model_params['lr']) elif model_params['optimizer'] == 'sgd': optimizer = SGD(learning_rate=model_params['lr']) self.model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
def test_kwargs(caplog): n = 100 d = 2 x, y, theta = glmdisc.Glmdisc.generate_data(n, d, plot=True) model = glmdisc.Glmdisc(algorithm="NN", validation=False, test=False) model.fit(predictors_cont=x, predictors_qual=None, labels=y, plot=True, optimizer=Adagrad(), callbacks=EarlyStopping()) assert isinstance(model.model_nn['tensorflow_model'].optimizer, tensorflow.python.keras.optimizer_v2.adagrad.Adagrad) assert isinstance(model.model_nn['callbacks'][-1], tensorflow.python.keras.callbacks.EarlyStopping) with pytest.raises(ValueError): model.fit(predictors_cont=x, predictors_qual=None, labels=y, plot="toto", optimizer=Adagrad(), callbacks=EarlyStopping()) assert "plot parameter provided but not boolean" in caplog.records[-1].message
def get_optimizer(optimizer): if optimizer == "sdg": return SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) if optimizer == "rmsprop": return RMSprop(learning_rate=0.01) if optimizer == "adam": return Adam(learning_rate=0.01) if optimizer == "adagrad": return Adagrad(learning_rate=0.01) if optimizer == "adadelta": return Adadelta(learning_rate=1.0)
def test_initialize(self): self._compare_initialize_values(Adam(), 4, "m", init_ops.constant_initializer(0.0)) self._compare_initialize_values( Ftrl(initial_accumulator_value=0.5), 4, "accumulator", init_ops.constant_initializer(0.5), ) self._compare_initialize_values( Adagrad(initial_accumulator_value=0.5), 4, "accumulator", init_ops.constant_initializer(0.5), )
def __init__(self, embedding_id, train_students, valid_students, optimizer=Adagrad(learning_rate=0.02879), embed_dim=218, simple=True): self.optimizer = optimizer self.embed_dim = embed_dim self.simple = simple self.embedding_id = embedding_id self.train_students = train_students self.valid_students = valid_students self.model = self.__build_model()
def create_transEModel(num_nodes, num_edges, embed_size=50, ord='l1', margin=1, learning_rate=2e-1): # build transE Model pos_sub_inputs = Input(shape=(), name='pos_subject') neg_sub_inputs = Input(shape=(), name='neg_subject') pos_obj_inputs = Input(shape=(), name='pos_object') neg_obj_inputs = Input(shape=(), name='neg_object') rel_inputs = Input(shape=(), name='relation') inputs = { "pos_subject": pos_sub_inputs, "neg_subject": neg_sub_inputs, "pos_object": pos_obj_inputs, "neg_object": neg_obj_inputs, "relation": rel_inputs } # 초기화 방식은 논문에 나와있는 방식으로 구성 init_range = 6 / np.sqrt(embed_size) init_op = RandomUniform(-init_range, init_range) node_layer = Embedding(input_dim=num_nodes, output_dim=embed_size, embeddings_initializer=init_op, name='node_embedding') edge_layer = Embedding(input_dim=num_edges, output_dim=embed_size, embeddings_initializer=init_op, name='edge_embedding') pos_sub = node_layer(pos_sub_inputs) neg_sub = node_layer(neg_sub_inputs) pos_obj = node_layer(pos_obj_inputs) neg_obj = node_layer(neg_obj_inputs) rel = edge_layer(rel_inputs) score = TransEScore(ord, margin)([pos_sub, neg_sub, pos_obj, neg_obj, rel]) model = Model(inputs, score) # Compile transE Model model.add_loss(score) model.compile(optimizer=Adagrad(learning_rate)) return model
def select_optimizer(self, opt_type, learning_rate, clipnorm = 0.5): if opt_type == 'adam': return Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) elif opt_type == 'rmsprop': return RMSprop(lr=learning_rate, rho=0.9, epsilon=None, decay=0.0) elif opt_type == 'adagrad': return Adagrad(lr=learning_rate, epsilon=None, decay=0.0) elif opt_type == 'adadelta': return Adadelta(lr=learning_rate, rho=0.95, epsilon=None, decay=0.0) elif opt_type == 'nadam': return Nadam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) elif opt_type == 'sgd': return SGD(lr = learning_rate, momentum = 0.0, decay = 0.0, nesterov = False, clipnorm = clipnorm) else: print('No optimizer') quit()
def create_model(self): self.model = Sequential() self.model.add( Conv2D( self.mik , (3, 3), activation= self.activ_func, input_shape= self.input_shape )) ### Conv layer 1 - getting overall details ### the more add number of filters/kernel the better, usually your network is ### the higher the kernel shape you pick large part information # Max pooling self.model.add( MaxPooling2D ( pool_size = (2, 2) ) ) self.model.add(Dropout(self.dropout_rate)) self.model.add(Conv2D(self.mik * 2 , (3, 3), activation = self.activ_func )) ### Conv layer 2 - getting more details # Max pooling self.model.add( MaxPooling2D ( pool_size = (2, 2) ) ) ### resuming information does not has weights self.model.add(Flatten()) self.model.add(Dense( self.mik * 8, activation = self.activ_func )) ### first hidden layer of the fully connected self.model.add(Dropout(self.dropout_rate)) self.model.add(Dense(self.num_classes, activation=self.activ_func )) self.sgd = SGD(lr = self.lr, decay = self.decay, momentum = self.momentum, nesterov=True) ##### self.rmsp = RMSprop(learning_rate = self.lr, rho=0.9, momentum=0.0, epsilon= self.decay, centered=False) self.adag =Adagrad(learning_rate=self.lr, initial_accumulator_value=0.1, epsilon=self.decay) if self.optimizer == "SGD": optim = self.sgd elif self.optimizer == "RMSProp": optim = self.rmsp elif self.optimizer == "AdaGrad": optim = self.adag # Compile the model self.model.compile(loss='categorical_crossentropy', optimizer=optim )
def set_optimizer(self, optimizer="SGD", learning_rate=0.01, momentum=0.0): """ This function sets the optimizer. :param optimizer: Should be one of the following: "SGD" , "RMSprop" , "Adagrad" , :param learning_rate: Learning rate :param momentum: Momentum :return: none """ if optimizer == "SGD": self.optimizer = SGD(learning_rate=learning_rate, momentum=momentum) else: if optimizer == "RMSprop": self.optimizer = RMSprop(learning_rate=learning_rate) else: self.optimizer = Adagrad(learning_rate=learning_rate)
def test_allowed_slot_names(self): opt_and_slots_pairs = [ (SGD(), []), (SGD(momentum=0.2), ["momentum"]), (Adam(), ["m", "v"]), (Adam(amsgrad=True), ["m", "v", "vhat"]), (Adamax(), ["m", "v"]), (Nadam(), ["m", "v"]), (Adadelta(), ["accum_grad", "accum_var"]), (Adagrad(), ["accumulator"]), (Ftrl(), ["accumulator", "linear"]), (RMSprop(), ["rms"]), (RMSprop(momentum=0.2), ["rms", "momentum"]), (RMSprop(centered=True), ["rms", "mg"]), (RMSprop(momentum=0.2, centered=True), ["rms", "momentum", "mg"]), ] for opt, expected_slots in opt_and_slots_pairs: self._compare_slot_names(opt, expected_slots)
def compile_model(self, optimizer, losses, metrics): self._maybe_load_checkpoint() if optimizer.lower() == 'radam': optimizer = RAdamOptimizer(total_steps=1000, warmup_proportion=0.1, learning_rate=self._learning_rate, min_lr=1e-8) elif optimizer.lower() == 'adam': optimizer = Adam(lr=self._learning_rate) elif optimizer.lower() == 'adagrad': optimizer = Adagrad(lr=self._learning_rate) elif optimizer.lower() == 'sgd': optimizer = SGD(lr=self._learning_rate, momentum=0.9) elif optimizer.lower() == 'rmsprop': optimizer = RMSprop(lr=self._learning_rate) self._optimizer = optimizer return self._model.compile(optimizer=self._optimizer, loss=losses, metrics=metrics)
def main(): #模型训练预测时的参数 mp = model_params() #构建bert的参数 bp = bert_params(with_pool=True) datagen = DataGenerator(bp, batch_size=mp.batch_size, num_neg=mp.num_neg, shuffle=mp.shuffle) #后续再尝试用其它的优化器 optimizer = Adagrad(learning_rate=mp.learning_rate) my_model = Bert4QA(bp) #训练主类 t = TrainOrPredict(mp) #final_model就是训练好的模型 final_model = t.train(my_model, optimizer, datagen) data = datagen.data_faq tokenizer = datagen.tokenizer #训练完成后查看效果 real_query_text = "月球和地球是什么关系?" question_score = {} for query_name in data.query_dict.keys(): query_text = data.query_dict[query_name] token_ids, segment_ids = tokenizer.encode(real_query_text, query_text) question_score[query_name] = final_model.predict( [token_ids, segment_ids]) question_score = {k: v.numpy() for k, v in question_score.items()} qs = dict(sorted(question_score.items(), key=lambda x: x[1], reverse=True)) c = 0 for k, v in qs.items(): c += 1 print(k, data.query_dict[k], v) if c == 10: break return final_model
def train(self, inputs, labels, mlp_dir=None, gmf_dir=None, split_ratio=0.1): # compile and train the model print('fit the model') if self.optimizer.lower() == 'adagrad': self.model.compile( optimizer=Adagrad(learning_rate=self.learning_rate), loss=self.loss) elif self.optimizer.lower() == 'adam': self.model.compile( optimizer=Adam(learning_rate=self.learning_rate), loss=self.loss) elif self.optimizer.lower() == 'rmsprop': self.model.compile(optimizer=RMSprop(self.learning_rate), loss=self.loss) else: self.model.compile(optimizer=SGD(self.learning_rate), loss=self.loss) if self.load_pretrain: if mlp_dir is not None and gmf_dir is not None: self.load_pretrain_model(mlp_dir, gmf_dir) else: print( 'the pretrain model path is not correct, please check the path' ) start_time = time.time() self.model.fit(inputs, labels, batch_size=self.batch_size, epochs=self.epochs, verbose=self.verbose, validation_split=split_ratio, shuffle=True) end_time = time.time() print(end_time - start_time)