def build(self, hp): maxnorm = hp.Choice('maxnorm', values=self.hyperparam['maxnorm']) resnet_model = Resnet(input_shape=(self.seqlen, self.channels), norm_max=maxnorm) if self.pretrained_weights is not None: resnet_model.set_weights(self.pretrained_weights) inp = Input(shape=(self.seqlen, self.channels)) enc_inp = resnet_model(inp) dense_units = hp.Int('preclassification', min_value = self.hyperparam['dense_units']['min'],\ max_value = self.hyperparam['dense_units']['max'], step = self.hyperparam['dense_units']['step']) dense_out = Dense(units = dense_units, activation='relu', kernel_constraint=MaxNorm(maxnorm,axis=[0,1]), bias_constraint=MaxNorm(maxnorm,axis=0), kernel_initializer=glorot_uniform(seed=0))(enc_inp) dense_out = Dropout(rate=hp.Choice('dropout', values = self.hyperparam['dropout']))(dense_out) output = Dense(self.num_classes, activation='softmax', kernel_constraint=MaxNorm(maxnorm,axis=[0,1]), bias_constraint=MaxNorm(maxnorm,axis=0), kernel_initializer=glorot_uniform(seed=0))(dense_out) model = Model(inputs=inp, outputs=output) model.compile(optimizer=Adam(lr=hp.Choice('lr', values = self.hyperparam['lr'])), loss=focal_loss(), metrics=['accuracy', macro_f1]) return model
def loss(self): ######### -*- Softmax Loss -*- ######### self.softmax_b1, self.ce1 = pw_softmaxwithloss_2d(self.Y, self.b1) self.softmax_b2, self.ce2 = pw_softmaxwithloss_2d(self.Y, self.b2) self.softmax_b3, self.ce3 = pw_softmaxwithloss_2d(self.Y, self.b3) self.softmax_b4, self.ce4 = pw_softmaxwithloss_2d(self.Y, self.b4) self.softmax_fuse, self.cefuse = pw_softmaxwithloss_2d( self.Y, self.fuse) self.total_ce = self.ce1 + self.ce2 + self.ce3 + self.ce4 + self.cefuse ######### -*- Focal Loss -*- ######### self.fl = focal_loss(self.Y, self.o, alpha=self.alpha, gamma=self.gamma) ######### -*- Total Loss -*- ######### self.total_loss = self.total_ce + self.fl_weight * self.fl
x_train /= 255 x_test /= 255 print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Dense(512, activation='relu', input_shape=(784,))) model.add(Dropout(0.2)) model.add(Dense(512, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(10, activation='softmax')) model.summary() model.compile(loss=lambda y, y_hat: focal_loss(y, y_hat, gamma), optimizer=RMSprop(), metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1])
def compile_model(model, num_classes, metrics, loss, lr): from keras.losses import binary_crossentropy from keras.losses import categorical_crossentropy from keras.metrics import binary_accuracy from keras.metrics import categorical_accuracy from keras.optimizers import Adam from metrics import dice_coeff from metrics import jaccard_index from metrics import class_jaccard_index from metrics import pixelwise_precision from metrics import pixelwise_sensitivity from metrics import pixelwise_specificity from metrics import pixelwise_recall from losses import focal_loss if isinstance(loss, str): if loss in {'ce', 'crossentropy'}: if num_classes == 1: loss = binary_crossentropy else: loss = categorical_crossentropy elif loss in {'focal', 'focal_loss'}: loss = focal_loss(num_classes) else: raise ValueError('unknown loss %s' % loss) if isinstance(metrics, str): metrics = [metrics, ] for i, metric in enumerate(metrics): if not isinstance(metric, str): continue elif metric == 'acc': metrics[i] = binary_accuracy if num_classes == 1 else categorical_accuracy elif metric == 'jaccard_index': metrics[i] = jaccard_index(num_classes) elif metric == 'jaccard_index0': metrics[i] = class_jaccard_index(0) elif metric == 'jaccard_index1': metrics[i] = class_jaccard_index(1) elif metric == 'jaccard_index2': metrics[i] = class_jaccard_index(2) elif metric == 'jaccard_index3': metrics[i] = class_jaccard_index(3) elif metric == 'jaccard_index4': metrics[i] = class_jaccard_index(4) elif metric == 'jaccard_index5': metrics[i] = class_jaccard_index(5) elif metric == 'dice_coeff': metrics[i] = dice_coeff(num_classes) elif metric == 'pixelwise_precision': metrics[i] = pixelwise_precision(num_classes) elif metric == 'pixelwise_sensitivity': metrics[i] = pixelwise_sensitivity(num_classes) elif metric == 'pixelwise_specificity': metrics[i] = pixelwise_specificity(num_classes) elif metric == 'pixelwise_recall': metrics[i] = pixelwise_recall(num_classes) else: raise ValueError('metric %s not recognized' % metric) model.compile(optimizer=Adam(lr=lr), loss=loss, metrics=metrics)
def build_model(lr, l2, img_shape, activation='sigmoid'): ############## # BRANCH MODEL ############## regul = regularizers.l2(l2) optim = Adam(lr=lr) kwargs = {'padding': 'same', 'kernel_regularizer': regul} inp = Input(shape=img_shape) # 384x384x1 x = Conv2D(64, (9, 9), strides=2, activation='relu', **kwargs)(inp) # 192x192x64 x = MaxPooling2D((2, 2), strides=(2, 2))(x) # 96x96x64 for _ in range(2): x = BatchNormalization()(x) x = Conv2D(64, (3, 3), activation='relu', **kwargs)(x) x = MaxPooling2D((2, 2), strides=(2, 2))(x) # 48x48x64 x = BatchNormalization()(x) x = Conv2D(128, (1, 1), activation='relu', **kwargs)(x) # 48x48x128 for _ in range(4): x = subblock(x, 64, **kwargs) x = MaxPooling2D((2, 2), strides=(2, 2))(x) # 24x24x128 x = BatchNormalization()(x) x = Conv2D(256, (1, 1), activation='relu', **kwargs)(x) # 24x24x256 for _ in range(4): x = subblock(x, 64, **kwargs) x = MaxPooling2D((2, 2), strides=(2, 2))(x) # 12x12x256 x = BatchNormalization()(x) x = Conv2D(384, (1, 1), activation='relu', **kwargs)(x) # 12x12x384 for _ in range(4): x = subblock(x, 96, **kwargs) x = MaxPooling2D((2, 2), strides=(2, 2))(x) # 6x6x384 x = BatchNormalization()(x) x = Conv2D(512, (1, 1), activation='relu', **kwargs)(x) # 6x6x512 for _ in range(4): x = subblock(x, 128, **kwargs) x = GlobalMaxPooling2D()(x) # 512 branch_model = Model(inp, x) ############ # HEAD MODEL ############ mid = 32 xa_inp = Input(shape=branch_model.output_shape[1:]) xb_inp = Input(shape=branch_model.output_shape[1:]) x1 = Lambda(lambda x: x[0] * x[1])([xa_inp, xb_inp]) x2 = Lambda(lambda x: x[0] + x[1])([xa_inp, xb_inp]) x3 = Lambda(lambda x: K.abs(x[0] - x[1]))([xa_inp, xb_inp]) x4 = Lambda(lambda x: K.square(x))(x3) x = Concatenate()([x1, x2, x3, x4]) # ?x2048 x = Reshape((4, branch_model.output_shape[1], 1), name='reshape1')(x) # ?x4x512x1 # Per feature NN with shared weight is implemented using CONV2D with appropriate stride. x = Conv2D(mid, (4, 1), activation='relu', padding='valid')(x) # ?x1x512xmid x = Reshape((branch_model.output_shape[1], mid, 1))(x) # ?x512xmidx1 x = Conv2D(1, (1, mid), activation='linear', padding='valid')(x) # ?x512x1x1 x = Flatten(name='flatten')(x) # ?x512 # Weighted sum implemented as a Dense layer. x = Dense(1, use_bias=True, activation=activation, name='weighted-average')(x) # ?x1 head_model = Model([xa_inp, xb_inp], x, name='head') ######################## # SIAMESE NEURAL NETWORK ######################## # Complete model is constructed by calling the branch model on each input image, # and then the head model on the resulting 512-vectors. img_a = Input(shape=img_shape) img_b = Input(shape=img_shape) xa = branch_model(img_a) xb = branch_model(img_b) x = head_model([xa, xb]) model = Model([img_a, img_b], x) model.compile(optim, loss=focal_loss(gamma=2., alpha=.5), metrics=['binary_crossentropy', 'acc']) # model.compile(optim, loss='binary_crossentropy', metrics=['binary_crossentropy', 'acc']) print(f'loss_functions is : {model.loss_functions}') return model, branch_model, head_model
def main(argv): indir = args.indir mode = args.mode # binary or multiclass or nonwear outdir = args.outdir if mode == 'multiclass': states = ['Wake', 'NREM 1', 'NREM 2', 'NREM 3', 'REM', 'Wake_ext'] elif mode == 'binary': states = ['Wake', 'Sleep', 'Wake_ext'] collate_states = ['NREM 1', 'NREM 2', 'NREM 3', 'REM'] elif mode == 'nonwear': states = ['Wear', 'Nonwear'] collate_states = ['Wake', 'NREM 1', 'NREM 2', 'NREM 3', 'REM'] valid_states = [state for state in states if state != 'Wake_ext'] num_classes = len(valid_states) if not os.path.exists(outdir): os.makedirs(outdir) resultdir = os.path.join(outdir, mode, 'models') if not os.path.exists(resultdir): os.makedirs(resultdir) # Read data from disk data = pd.read_csv(os.path.join(indir, 'features_30.0s.csv')) labels = data['label'].values users = data['user'].values if mode == 'binary': labels = np.array( ['Sleep' if lbl in collate_states else lbl for lbl in labels]) elif mode == 'nonwear': labels = np.array( ['Wear' if lbl in collate_states else lbl for lbl in labels]) # Read raw data shape_df = pd.read_csv(os.path.join(indir, 'datashape_30.0s.csv')) num_samples = shape_df['num_samples'].values[0] seqlen = shape_df['num_timesteps'].values[0] n_channels = shape_df['num_channels'].values[0] raw_data = np.memmap(os.path.join(indir, 'rawdata_30.0s.npz'), dtype='float32', mode='r', shape=(num_samples, seqlen, n_channels)) # Hyperparameters lr = args.lr # learning rate num_epochs = args.num_epochs batch_size = args.batchsize max_seqlen = 1504 num_channels = args.num_channels # number of raw data channels feat_channels = args.feat_channels # Add ENMO, z-angle and LIDS as additional channels # Use nested cross-validation based on users # Outer CV unique_users = list(set(users)) random.shuffle(unique_users) cv_splits = 5 user_cnt = Counter(users[np.isin(labels, valid_states)]).most_common() samp_per_fold = len(users) // cv_splits # Get users to be used in test for each fold such that each fold has similar # number of samples fold_users = [[] for i in range(cv_splits)] fold_cnt = [[] for i in range(cv_splits)] for user, cnt in user_cnt: idx = -1 maxdiff = 0 for j in range(cv_splits): if (samp_per_fold - sum(fold_cnt[j])) > maxdiff: maxdiff = samp_per_fold - sum(fold_cnt[j]) idx = j fold_users[idx].append(user) fold_cnt[idx].append(cnt) predictions = [] if mode != 'nonwear': wake_idx = states.index('Wake') wake_ext_idx = states.index('Wake_ext') for fold in range(cv_splits): print('Evaluating fold %d' % (fold + 1)) test_users = fold_users[fold] trainval_users = [(key, val) for key, val in user_cnt if key not in test_users] random.shuffle(trainval_users) # validation data is approximately 10% of total samples val_samp = 0.1 * sum([tup[1] for tup in user_cnt]) nval = 0 val_sum = 0 while (val_sum < val_samp): val_sum += trainval_users[nval][1] nval += 1 val_users = [key for key, val in trainval_users[:nval]] train_users = [key for key, val in trainval_users[nval:]] print('#users: Train = {:d}, Val = {:d}, Test = {:d}'.format( len(train_users), len(val_users), len(test_users))) # Create partitions # make a copy to change wake_ext for this fold fold_labels = np.array( [states.index(lbl) if lbl in states else -1 for lbl in labels]) train_indices = get_partition(raw_data, fold_labels, users, train_users, states, mode, is_train=True) val_indices = get_partition(raw_data, fold_labels, users, val_users, states, mode) test_indices = get_partition(raw_data, fold_labels, users, test_users, states, mode) nsamples = len(train_indices) + len(val_indices) + len(test_indices) print('Train: {:0.2f}%, Val: {:0.2f}%, Test: {:0.2f}%'\ .format(len(train_indices)*100.0/nsamples, len(val_indices)*100.0/nsamples,\ len(test_indices)*100.0/nsamples)) if mode != 'nonwear': chosen_indices = train_indices[ fold_labels[train_indices] != wake_ext_idx] else: chosen_indices = train_indices class_wts = class_weight.compute_class_weight( class_weight='balanced', classes=np.unique(fold_labels[chosen_indices]), y=fold_labels[chosen_indices]) # Rename wake_ext as wake for training samples if mode != 'nonwear': rename_indices = train_indices[fold_labels[train_indices] == wake_ext_idx] fold_labels[rename_indices] = wake_idx print('Train', Counter(np.array(fold_labels)[train_indices])) print('Val', Counter(np.array(fold_labels)[val_indices])) print('Test', Counter(np.array(fold_labels)[test_indices])) # Data generators for computing statistics stat_gen = DataGenerator(train_indices, raw_data, fold_labels, valid_states, partition='stat',\ batch_size=batch_size, seqlen=seqlen, n_channels=num_channels, feat_channels=feat_channels,\ n_classes=num_classes, shuffle=True) mean, std = stat_gen.fit() np.savez(os.path.join(resultdir, 'Fold' + str(fold + 1) + '_stats'), mean=mean, std=std) # Data generators for train/val/test train_gen = DataGenerator(train_indices, raw_data, fold_labels, valid_states, partition='train',\ batch_size=batch_size, seqlen=seqlen, n_channels=num_channels, feat_channels=feat_channels,\ n_classes=num_classes, shuffle=True, augment=True, aug_factor=0.75, balance=True, mean=mean, std=std) val_gen = DataGenerator(val_indices, raw_data, fold_labels, valid_states, partition='val',\ batch_size=batch_size, seqlen=seqlen, n_channels=num_channels, feat_channels=feat_channels,\ n_classes=num_classes, mean=mean, std=std) test_gen = DataGenerator(test_indices, raw_data, fold_labels, valid_states, partition='test',\ batch_size=batch_size, seqlen=seqlen, n_channels=num_channels, feat_channels=feat_channels,\ n_classes=num_classes, mean=mean, std=std) # Create model # Use batchnorm as first step since computing mean and std # across entire dataset is time-consuming model = FCN(input_shape=(seqlen, num_channels + feat_channels), max_seqlen=max_seqlen, num_classes=len(valid_states), norm_max=args.maxnorm) #print(model.summary()) model.compile(optimizer=Adam(lr=lr), loss=focal_loss(), metrics=['accuracy', macro_f1]) # Train model # Use callback to compute F-scores over entire validation data metrics_cb = Metrics(val_data=val_gen, batch_size=batch_size) # Use early stopping and model checkpoints to handle overfitting and save best model model_checkpt = ModelCheckpoint(os.path.join(resultdir,'fold'+str(fold+1)+'_'+mode+'-{epoch:02d}-{val_f1:.4f}.h5'),\ monitor='val_f1',\ mode='max', save_best_only=True) batch_renorm_cb = BatchRenormScheduler(len(train_gen)) history = model.fit( train_gen, epochs=num_epochs, validation_data=val_gen, verbose=1, shuffle=False, callbacks=[batch_renorm_cb, metrics_cb, model_checkpt], workers=2, max_queue_size=20, use_multiprocessing=False) # Plot training history plot_results(fold+1, history.history['loss'], history.history['val_loss'],\ os.path.join(resultdir,'Fold'+str(fold+1)+'_'+mode+'_loss.jpg'), metric='Loss') plot_results(fold+1, history.history['accuracy'], history.history['val_accuracy'],\ os.path.join(resultdir,'Fold'+str(fold+1)+'_'+mode+'_accuracy.jpg'), metric='Accuracy') plot_results(fold+1, history.history['macro_f1'], metrics_cb.val_f1,\ os.path.join(resultdir,'Fold'+str(fold+1)+'_'+mode+'_macro_f1.jpg'), metric='Macro F1') # Predict probability on validation data using best model best_model_file, epoch, val_f1 = get_best_model(resultdir, fold + 1) print('Predicting with model saved at Epoch={:d} with val_f1={:0.4f}'. format(epoch, val_f1)) model.load_weights(os.path.join(resultdir, best_model_file)) probs = model.predict(test_gen) y_pred = probs.argmax(axis=1) y_true = fold_labels[test_indices] predictions.append( (users[test_indices], data.iloc[test_indices]['timestamp'], data.iloc[test_indices]['filename'], test_indices, y_true, probs)) # Save user report cv_save_classification_result( predictions, valid_states, os.path.join( resultdir, 'fold' + str(fold + 1) + '_deeplearning_' + mode + '_results.csv'), method='dl') cv_get_classification_report(predictions, mode, valid_states, method='dl') cv_get_classification_report(predictions, mode, valid_states, method='dl') # Save user report cv_save_classification_result(predictions, valid_states, os.path.join( resultdir, 'deeplearning_' + mode + '_results.csv'), method='dl')
x = Dropout(dr)(x) x = Dense(d, activation=Mish())(x) x = LayerNormalization()(x) outputs = Dense(n_class, activation="softmax")(x) model = Model(inputs, outputs) return model model = dense_model(**model_pars) cosine = cb.CosineAnnealingScheduler(T_max=50, eta_max=1e-3, eta_min=1e-5, verbose=1, epoch_start=5) loss = l.focal_loss(gamma=3., alpha=6.) model.compile(Ranger(learning_rate=1e-3), loss=loss, metrics=["accuracy"]) print(model.summary()) model.fit( train, epochs=55, validation_data=validation, callbacks=[ ModelCheckpoint( "main.h5", monitor="val_loss", keep_best_only=True, save_weights_only=False, ),
def build(self): """ feature_size: N field_size: F embedding_size: K batch_size: None """ self.feat_index = tf.placeholder(tf.int32, shape=[None, None], name='feature_index') self.feat_value = tf.placeholder(tf.float32, shape=[None, None], name='feature_value') self.label = tf.placeholder(tf.float32, shape=[None, 1], name='label') self.keep_prob = tf.placeholder(tf.float32, shape=[], name='keep_prob') # scaler self.is_training = tf.placeholder(tf.bool, shape=[], name='is_training') #1、-------------------------定义权值----------------------------------------- # FM部分中一次项的权值定义 self.weight['first_order'] = tf.Variable( tf.random_normal([self.feature_size, 1], 0.0, 0.05), # N * 1 name='first_order') # One-hot编码后的输入层与Dense embeddings层的权值定义,即DNN的输入embedding。 self.weight['embedding_weight'] = tf.Variable( tf.random_normal([self.feature_size, self.embedding_size], 0.0, 0.05), # N*K name='embedding_weight') # deep网络部分的weight和bias, deep网络初始输入维度:input_size = F*K num_layer = len(self.deep_layers) input_size = self.field_size * self.embedding_size # glorot_normal = np.sqrt(2.0 / (input_size + self.deep_layers[0])) # for sigmoid he_normal = np.sqrt(2.0 / input_size) # for relu self.weight['layer_0'] = tf.Variable(np.random.normal( loc=0, scale=he_normal, size=(input_size, self.deep_layers[0])), dtype=np.float32) self.weight['bias_0'] = tf.Variable(np.random.normal( loc=0, scale=he_normal, size=(1, self.deep_layers[0])), dtype=np.float32) # 生成deep network里面每层的weight 和 bias for i in range(1, num_layer): he_normal = np.sqrt(2.0 / (self.deep_layers[i - 1])) self.weight['layer_' + str(i)] = tf.Variable(np.random.normal( loc=0, scale=he_normal, size=(self.deep_layers[i - 1], self.deep_layers[i])), dtype=np.float32) self.weight['bias_' + str(i)] = tf.Variable(np.random.normal( loc=0, scale=he_normal, size=(1, self.deep_layers[i])), dtype=np.float32) # deep部分output_size + 一次项output_size + 二次项output_size last_layer_size = self.deep_layers[ -1] + self.field_size + self.embedding_size glorot_normal = np.sqrt(2.0 / (last_layer_size + 1)) # 生成最后一层的weight和bias self.weight['last_layer'] = tf.Variable(np.random.normal( loc=0, scale=glorot_normal, size=(last_layer_size, 1)), dtype=np.float32) self.weight['last_bias'] = tf.Variable(tf.constant(0.0), dtype=np.float32) #2、----------------------前向传播------------------------------------ # None*F*K self.embedding_index = tf.nn.embedding_lookup( self.weight['embedding_weight'], self.feat_index) # [None*F*K] .*[None*F*1] = None*F*K self.embedding_part = tf.multiply( self.embedding_index, tf.reshape(self.feat_value, [-1, self.field_size, 1])) # FM部分一阶特征 # None * F*1 self.embedding_first = tf.nn.embedding_lookup( self.weight['first_order'], self.feat_index) #[None*F*1].*[None*F*1] = None*F*1 self.embedding_first = tf.multiply( self.embedding_first, tf.reshape(self.feat_value, [-1, self.field_size, 1])) # None*F self.first_order = tf.reduce_sum(self.embedding_first, 2) # 二阶特征 None*K self.sum_second_order = tf.reduce_sum(self.embedding_part, 1) self.sum_second_order_square = tf.square(self.sum_second_order) self.square_second_order = tf.square(self.embedding_part) self.square_second_order_sum = tf.reduce_sum(self.square_second_order, 1) # 1/2*((a+b)^2 - a^2 - b^2)=ab # None*K self.second_order = 0.5 * tf.subtract(self.sum_second_order_square, self.square_second_order_sum) # FM部分的输出 None*(F+K) self.fm_part = tf.concat([self.first_order, self.second_order], axis=1) # DNN部分 # None*(F*K) self.deep_embedding = tf.reshape( self.embedding_part, [-1, self.field_size * self.embedding_size]) # 全连接部分 for i in range(0, len(self.deep_layers)): self.deep_embedding = tf.add( tf.matmul(self.deep_embedding, self.weight["layer_%d" % i]), self.weight["bias_%d" % i]) # self.deep_embedding =tf.matmul(self.deep_embedding, self.weight["layer_%d" % i]) self.bn_out = tf.layers.batch_normalization( self.deep_embedding, training=self.is_training) # self.bn_out = tf.layers.dropout(self.deep_embedding, rate=self.keep_prob,training=self.is_training) self.deep_embedding = self.activate(self.bn_out) self.deep_embedding = tf.layers.dropout(self.deep_embedding, rate=1.0 - self.keep_prob, training=self.is_training) # FM输出与DNN输出拼接 None*(F+K+layer[-1]]) din_all = tf.concat([self.fm_part, self.deep_embedding], axis=1) #None*1 self.out = tf.add(tf.matmul(din_all, self.weight['last_layer']), self.weight['last_bias']) #3. ------------------确定损失--------------------------------------- # loss部分 None*1 self.prob = tf.nn.sigmoid(self.out) # self.entropy_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels= self.label, logits= self.out)) # self.entropy_loss = -tf.reduce_mean( # self.label * tf.log(tf.clip_by_value(self.prob, 1e-10, 1.0))+ (1 - self.label)* tf.log(tf.clip_by_value(1-self.prob,1e-10,1.0))) self.entropy_loss = focal_loss(self.prob, self.label, alpha=0.5, gamma=2) # self.entropy_loss = weighted_binary_crossentropy(self.prob, self.label, pos_ratio=self.pos_ratio) # 正则:sum(w^2)/2*l2_reg_coef self.reg_loss = tf.contrib.layers.l2_regularizer(self.l2_reg_coef)( self.weight["last_layer"]) for i in range(len(self.deep_layers)): self.reg_loss += tf.contrib.layers.l2_regularizer( self.l2_reg_coef)(self.weight["layer_%d" % i]) # tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(self.l2_reg_coef)(self.weight['layer_1'])) # print(self.entropy_loss.shape.as_list(), self.reg_loss.shape.as_list()) self.loss = self.entropy_loss + self.reg_loss self.global_step = tf.Variable(0, trainable=False, name='global_step') self.learning_rate = tf.train.exponential_decay(self.learning_rate, self.global_step, 3000, 0.99, staircase=False) opt = tf.train.AdamOptimizer(self.learning_rate) # opt = tf.train.GradientDescentOptimizer(self.learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm(gradients, 5) with tf.control_dependencies(update_ops): # self.train_op = opt.minimize(self.loss, global_step = self.global_step) self.train_op = opt.apply_gradients(zip(clip_gradients, trainable_params), global_step=self.global_step) self.saver = tf.train.Saver(max_to_keep=3)
loss0 = dice_coef_loss( roi_masks_pos0[:, :, :, 1], mask_logits0[:, :, :, 1]) + dice_coef_loss( roi_masks_pos1[:, :, :, 1], mask_logits1[:, :, :, 1]) with tf.name_scope("loss_Xent"): tf_mask0 = tf.cast(mask_logits0, tf.float32) tf_mask1 = tf.cast(mask_logits1, tf.float32) tf0 = pixel_wise_loss(tf_mask0, roi_masks_pos0, pixel_weights=None) tf1 = pixel_wise_loss(tf_mask1, roi_masks_pos1, pixel_weights=None) loss1 = ( pixel_wise_loss(tf_mask0, roi_masks_pos0, pixel_weights=None) + pixel_wise_loss(tf_mask1, roi_masks_pos1, pixel_weights=None)) / 3.0 with tf.name_scope("loss_focal"): focal = focal_loss( mask_logits0[:, :, :, 0], roi_masks_pos0[:, :, :, 0]) + focal_loss( mask_logits0[:, :, :, 0], roi_masks_pos0[:, :, :, 0]) with tf.name_scope("loss"): loss = loss0 + sce_weight * loss1 with tf.name_scope("train"): solver = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = solver.minimize(loss1) loss_op = [loss0, loss1] tf.summary.scalar('Dice_p', -dice1) tf.summary.scalar('Dice_pz', -dice2)