def perf_test_RASTAweights(): """ Test the performance of the RASTA weights provide by Lecoultre et al. """ dataset = 'RASTA' sess = tf.Session() set_session(sess) tf.keras.backend.set_image_data_format('channels_last') base_model = resnet_trained(20) predictions = Dense(25, activation='softmax')(base_model.output) net_finetuned = Model(inputs=base_model.input, outputs=predictions) #net_finetuned = custom_resnet() # Ce model a 87 layers path_to_model = os.path.join(os.sep,'media','gonthier','HDD2','output_exp','rasta_models','resnet_2017_7_31-19_9_45','model.h5') #ce model a 107 layers constrNet = 'LResNet50' # For Lecoutre ResNet50 version model_name = 'Lecoutre2017' input_name_lucid = 'input_1' net_finetuned.load_weights(path_to_model) # ,by_name=True net_finetuned.build((224,224,3)) print(net_finetuned.summary()) print(net_finetuned.predict(np.random.rand(1,224,224,3))) item_name,path_to_img,default_path_imdb,classes,ext,num_classes,str_val,df_label,\ path_data,Not_on_NicolasPC = get_database(dataset) sLength = len(df_label[item_name]) classes_vectors = df_label[classes].values df_label_test = df_label[df_label['set']=='test'] y_test = classes_vectors[df_label['set']=='test',:] cropCenter = False randomCrop = False imSize = 224 predictions = predictionFT_net(net_finetuned,df_test=df_label_test,x_col=item_name,\ y_col=classes,path_im=path_to_img,Net=constrNet,\ cropCenter=cropCenter,randomCrop=randomCrop,\ imSize=imSize) with sess.as_default(): metrics = evaluationScoreRASTA(y_test,predictions) top_k_accs,AP_per_class,P_per_class,R_per_class,P20_per_class,F1_per_class,acc_per_class= metrics for k,top_k_acc in zip([1,3,5],top_k_accs): print('Top-{0} accuracy : {1:.2f}%'.format(k,top_k_acc*100))
class MobileNetModel: def __init__(self, data_X, data_y): self.n_class = int(data_y.shape[0]) self.model = None self._create_architecture(data_X, data_y) def _create_architecture(self, data_X, data_y): self.model = MobileNet(include_top=False, weights=None, input_tensor=None, input_shape=list( [int(_) for _ in data_X.shape[-3:]]), pooling=None) self.model.load_weights('./weights/mobilenet_1_0_224_tf_no_top.h5') """ Freeze the previous layers """ for layer in self.model.layers: layer.trainable = False """ By Setting top to False, we need to add our own classification layers """ # The model documentation notes that this is the size of the classification block x = GlobalAveragePooling2D()(self.model.output) # let's add a fully-connected layer x = Dense(1024, activation='relu')(x) x = Dropout(x, rate=0.5) # and a logistic layer -- let's say we have 200 classes x = Dense(int(data_y.shape[1]), activation='softmax', name='predictions')(x) # create graph of your new model self.model = Model(inputs=self.model.inputs, outputs=x, name='MobileNet') self.model.compile(optimizer=tf.train.AdamOptimizer(), loss='categorical_crossentropy', metrics=['accuracy', 'mean_squared_error']) def train(self, train_generator, validation_generator): print('Training Model') # fits the model on batches with real-time data augmentation: self.model.fit_generator(train_generator, steps_per_epoch=1, epochs=20, validation_steps=1, validation_data=validation_generator, verbose=1)
def get_densenet121_model(classes=2): def preprocess_input(img): img[:, :, 0] = (img[:, :, 0] - 103.94) * 0.017 img[:, :, 1] = (img[:, :, 1] - 116.78) * 0.017 img[:, :, 2] = (img[:, :, 2] - 123.68) * 0.017 return img.astype(np.float32) def decode_img(img): img[:, :, 0] = (img[:, :, 0] / 0.017) + 103.94 img[:, :, 1] = (img[:, :, 1] / 0.017) + 116.78 img[:, :, 2] = (img[:, :, 2] / 0.017) + 123.68 return img.astype(np.uint8) base_model = tf.keras.applications.DenseNet121(include_top=False, classes=2) x = base_model.output x = GlobalAveragePooling2D()(x) pre = Dense(classes, activation='softmax', name='fc1000')(x) model = Model(inputs=base_model.input, outputs=pre) model.summary() for layer in base_model.layers: layer.trainable = False ckpt = './ckpt/densenet121.h5' checkpoint = ModelCheckpoint(filepath=ckpt) tensorboard = './log/densenet121' tensorboard = TensorBoard(log_dir=tensorboard) if os.path.exists(ckpt): model.load_weights(ckpt, by_name=True) print("load done") else: plot_model(model, to_file='densenet121.png') model.compile(optimizer=tf.train.AdamOptimizer(0.001), loss='binary_crossentropy', metrics=['accuracy']) return model, checkpoint, tensorboard, preprocess_input, decode_img
def get_mobilev2_model(classes=2): def preprocess_input(img): img = img / 128. img = img - 1. return img.astype(np.float32) def decode_img(img): img = img + 1. img = img * 128. return img.astype(np.uint8) base_model = MobileNetV2(include_top=False, input_shape=(224, 224, 3)) x = base_model.output x = GlobalAveragePooling2D()(x) pre = Dense(classes, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=pre) model.summary() # 冻结这些层就无法训练 # 迁移学习,用训练好的权重,重写全连接层再进行训练 for layer in base_model.layers: layer.trainable = False ckpt = './ckpt/mobilev2.h5' checkpoint = ModelCheckpoint(filepath=ckpt) tensorboard = './log/mobilev2' tensorboard = TensorBoard(log_dir=tensorboard) if os.path.exists(ckpt): model.load_weights(ckpt) print('load done') else: plot_model(model, to_file='mobilev2.png') sgd = SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy']) return model, checkpoint, tensorboard, preprocess_input, decode_img
def main(arg): directory = Path('./saved_predictions/') directory.mkdir(exist_ok=True) directory = Path('./saved_models/') directory.mkdir(exist_ok=True) directory = Path('./training_checkpoints/') directory.mkdir(exist_ok=True) input_yx_size = tuple(args.input_yx_size) batch_size = args.batch_size epochs = args.epochs learning_rate = args.learning_rate num_test_samples = args.num_test_samples save_weights = args.save_weights every = args.every num_samples = args.num_samples save_train_prediction = args.save_train_prediction save_test_prediction = args.save_test_prediction verbose = args.verbose validation_ratio = args.validation_ratio y_axis_len, x_axis_len = input_yx_size decay = args.decay decay = args.decay load_weights = args.load_weights y_axis_len, x_axis_len = input_yx_size num_points = y_axis_len * x_axis_len is_flat_channel_in = args.is_flat_channel_in input_points = Input(shape=(num_points, 4)) x = input_points x = Convolution1D(64, 1, activation='relu', input_shape=(num_points, 4))(x) x = BatchNormalization()(x) x = Convolution1D(128, 1, activation='relu')(x) x = BatchNormalization()(x) x = Convolution1D(512, 1, activation='relu')(x) x = BatchNormalization()(x) x = MaxPooling1D(pool_size=num_points)(x) x = Dense(512, activation='relu')(x) x = BatchNormalization()(x) x = Dense(256, activation='relu')(x) x = BatchNormalization()(x) x = Dense(16, weights=[ np.zeros([256, 16]), np.array([1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1]).astype(np.float32) ])(x) input_T = Reshape((4, 4))(x) # forward net g = Lambda(mat_mul, arguments={'B': input_T})(input_points) g = Convolution1D(64, 1, input_shape=(num_points, 3), activation='relu')(g) g = BatchNormalization()(g) g = Convolution1D(64, 1, input_shape=(num_points, 3), activation='relu')(g) g = BatchNormalization()(g) # feature transformation net f = Convolution1D(64, 1, activation='relu')(g) f = BatchNormalization()(f) f = Convolution1D(128, 1, activation='relu')(f) f = BatchNormalization()(f) f = Convolution1D(128, 1, activation='relu')(f) f = BatchNormalization()(f) f = MaxPooling1D(pool_size=num_points)(f) f = Dense(512, activation='relu')(f) f = BatchNormalization()(f) f = Dense(256, activation='relu')(f) f = BatchNormalization()(f) f = Dense(64 * 64, weights=[ np.zeros([256, 64 * 64]), np.eye(64).flatten().astype(np.float32) ])(f) feature_T = Reshape((64, 64))(f) # forward net g = Lambda(mat_mul, arguments={'B': feature_T})(g) seg_part1 = g g = Convolution1D(64, 1, activation='relu')(g) g = BatchNormalization()(g) g = Convolution1D(32, 1, activation='relu')(g) g = BatchNormalization()(g) g = Convolution1D(32, 1, activation='relu')(g) g = BatchNormalization()(g) # global_feature global_feature = MaxPooling1D(pool_size=num_points)(g) global_feature = Lambda(exp_dim, arguments={'num_points': num_points})(global_feature) # point_net_seg c = concatenate([seg_part1, global_feature]) """ c = Convolution1D(512, 1, activation='relu')(c) c = BatchNormalization()(c) c = Convolution1D(256, 1, activation='relu')(c) c = BatchNormalization()(c) c = Convolution1D(128, 1, activation='relu')(c) c = BatchNormalization()(c) c = Convolution1D(128, 1, activation='relu')(c) c = BatchNormalization()(c) """ c = Convolution1D(256, 1, activation='relu')(c) c = BatchNormalization()(c) c = Convolution1D(128, 4, activation='relu', strides=4)(c) c = BatchNormalization()(c) c = Convolution1D(128, 4, activation='relu', strides=4)(c) c = BatchNormalization()(c) c = Convolution1D(128, 4, activation='relu', strides=4)(c) c = BatchNormalization()(c) c = Convolution1D(64, 4, activation='relu', strides=4)(c) c = BatchNormalization()(c) c = Convolution1D(64, 4, activation='relu', strides=4)(c) c = BatchNormalization()(c) c = Convolution1D(32, 1, activation='relu')(c) c = BatchNormalization()(c) """ c = Convolution1D(128, 4, activation='relu',strides=4)(c) c = Convolution1D(64, 4, activation='relu',strides=4)(c) c = Convolution1D(32, 4, activation='relu',strides=4)(c) c = Convolution1D(16, 1, activation='relu')(c) c = Convolution1D(1, 1, activation='relu')(c) """ #c = tf.keras.backend.squeeze(c,3); c = CuDNNLSTM(64, return_sequences=False)(c) #c =CuDNNLSTM(784, return_sequences=False)) #c =CuDNNLSTM(256, return_sequences=False)) #c = Reshape([16,16,1])(c) c = Reshape([8, 8, 1])(c) c = Conv2DTranspose(8, (3, 3), padding="same", activation="relu", strides=(2, 2))(c) c = Conv2DTranspose(8, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(16, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(64, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(64, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) #c =Dropout(0.4)) c = Conv2DTranspose(128, (3, 3), padding="same", activation="relu", strides=(2, 2))(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(128, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(128, (3, 3), padding="same", activation="relu", strides=(2, 2))(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(128, (3, 3), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) #c =Dropout(0.4)) #c =tf.keras.layers.BatchNormalization()) c = Conv2DTranspose(64, (3, 3), padding="same", strides=(4, 2))(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 3), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 3), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) #c =Dropout(0.4)) c = Conv2DTranspose(32, (3, 3), padding="same", activation="relu", strides=(1, 1))(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 1), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 1), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(16, (1, 1), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(8, (1, 1), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(1, (1, 1), padding="valid")(c) """ c =Conv2DTranspose(4, (1,1),padding="same",activation="relu")) c =Conv2DTranspose(2, (1,1),padding="same",activation="relu")) #c =Dropout(0.4)) c =Conv2DTranspose(1, (1,1),padding="same")) """ prediction = tf.keras.layers.Reshape([512, 256])(c) """ c1 ,c2 = tf.split(c,[256,256],axis=1,name="split") complexNum = tf.dtypes.complex( c1, c2, name=None ) complexNum =tf.signal.ifft2d( complexNum, name="IFFT" ) real = tf.math.real(complexNum) imag = tf.math.imag(complexNum) con = concatenate([real,imag]) prediction =tf.keras.layers.Reshape([ 512, 256])(con) """ # define model model = Model(inputs=input_points, outputs=prediction) opt = tf.keras.optimizers.Adam(lr=learning_rate, decay=decay) loss = tf.keras.losses.MeanSquaredError() mertric = ['mse'] if args.loss is "MAE": loss = tf.keras.losses.MeanAbsoluteError() mertric = ['mae'] model.compile( loss=loss, optimizer=opt, metrics=mertric, ) model.summary() if load_weights: model.load_weights('./training_checkpoints/cp-best_loss.ckpt') #edit data_loader.py if you want to play with data input_ks, ground_truth = load_data(num_samples, is_flat_channel_in=is_flat_channel_in) input_ks = input_ks / np.max(input_ks) checkpoint_path = "./training_checkpoints/cp-{epoch:04d}.ckpt" checkpoint_dir = os.path.dirname(checkpoint_path) # Create checkpoint callback #do you want to save the model's wieghts? if so set this varaible to true cp_callback = [] NAME = "NUFFT_NET" tensorboard = TensorBoard(log_dir="logs/{}".format(NAME)) cp_callback.append(tensorboard) if save_weights: cp_callback.append( tf.keras.callbacks.ModelCheckpoint(checkpoint_dir, save_weights_only=True, verbose=verbose, period=every)) if args.is_train: model.fit(input_ks, ground_truth, batch_size=batch_size, epochs=epochs, validation_split=validation_ratio, callbacks=cp_callback) if args.name_model is not "": model.save('./saved_mdoels/' + args.name_model) dict_name = './saved_predictions/' #return to image size x_axis_len = int(x_axis_len / 4) np.random.seed(int(time())) if save_train_prediction <= num_samples: rand_ix = np.random.randint(0, num_samples - 1, save_train_prediction) #kspace = np.zeros((save_train_prediction, #y_axis_len,input_ks[rand_ix].shape[1])) kspace = input_ks[rand_ix] if args.save_input: np.save("./saved_predictions/inputs.npy", input_ks[rand_ix]) ground_truth = ground_truth[rand_ix] preds = model.predict(kspace, batch_size=save_train_prediction) for i in range(save_train_prediction): output = np.reshape(preds[i], (y_axis_len * 2, x_axis_len)) output = output * 255 output[np.newaxis, ...] output_gt = ground_truth[i] output_gt[np.newaxis, ...] output = np.concatenate([output, output_gt], axis=0) np.save(dict_name + 'prediction%d.npy' % (i + 1), output) input_ks, ground_truth = load_data( num_test_samples, 'test', is_flat_channel_in=is_flat_channel_in) input_ks = input_ks / np.max(input_ks) if args.is_eval: model.evaluate(input_ks, ground_truth, batch_size, verbose, callbacks=cp_callback) if save_test_prediction <= num_test_samples: rand_ix = np.random.randint(0, num_test_samples - 1, save_test_prediction) kspace = input_ks[rand_ix] if args.save_input: np.save("./saved_predictions/test_inputs.npy", input_ks[rand_ix]) ground_truth = ground_truth[rand_ix] preds = model.predict(kspace, batch_size=save_test_prediction) for i in range(save_test_prediction): output = np.reshape(preds[i], (y_axis_len * 2, x_axis_len)) output = output * 255 output[np.newaxis, ...] output_gt = ground_truth[i] output_gt[np.newaxis, ...] output = np.concatenate([output, output_gt], axis=0) np.save(dict_name + 'test_prediction%d.npy' % (i + 1), output)
def testDiversVaries(): #tf.keras.backend.clear_session() #tf.reset_default_graph() #K.set_learning_phase(0) sess = tf.Session() #graph = tf.get_default_graph() #keras.backend.set_session(sess) # IMPORTANT: models have to be loaded AFTER SETTING THE SESSION for keras! # Otherwise, their weights will be unavailable in the threads after the session there has been set set_session(sess) original_model = resnet_trained(n_retrain_layers=20) # Cela va charger un tf.keras model base_model = resnet_trained(20) predictions = Dense(25, activation='softmax')(base_model.output) net_finetuned = Model(inputs=base_model.input, outputs=predictions) net_finetuned.predict(np.random.rand(1,224,224,3)) trainable_layers_name = [] for original_layer in original_model.layers: if original_layer.trainable: trainable_layers_name += [original_layer.name] #C:\media\gonthier\HDD2\output_exp\rasta_models\resnet_2017_7_31-19_9_45 path_to_model = os.path.join(os.sep,'media','gonthier','HDD2','output_exp','rasta_models','resnet_2017_7_31-19_9_45','model.h5') constrNet = 'LResNet50' # For Lecoutre ResNet50 version model_name = 'Lecoutre2017' input_name_lucid = 'input_1' tf.keras.backend.set_image_data_format('channels_last') net_finetuned.load_weights(path_to_model,by_name=True) net_finetuned.build((224,224,3)) net_finetuned.summary() net_finetuned.predict(np.random.rand(1,224,224,3)) #net_finetuned = keras.models.load_model(path_to_model,compile=True) #net_finetuned = load_model(path_to_model,compile=True) number_of_trainable_layer = 20 # #list_layer_index_to_print = [] #for layer in model.layers: # trainable_l = layer.trainable # name_l = layer.name # if trainable_l and 'res' in name_l: # print(name_l,trainable_l) # num_features = tf.shape(layer.bias).eval(session=sess)[0] # list_layer_index_to_print += [name_l,np.arange(0,num_features)] # #for layer in original_model.layers: # print(layer) # trainable_l = layer.trainable # name_l = layer.name # if trainable_l and 'res' in name_l: # print(name_l,trainable_l) # num_features = tf.shape(layer.bias).eval(session=sess)[0] # list_layer_index_to_print += [name_l,np.arange(0,num_features)] #list_weights,list_name_layers = get_weights_and_name_layers_forPurekerasModel(original_model) list_weights,list_name_layers = CompNet_FT_lucidIm.get_weights_and_name_layers(original_model) dict_layers_relative_diff,dict_layers_argsort = CompNet_FT_lucidIm.get_gap_between_weights(list_name_layers,\ list_weights,net_finetuned) layer_considered_for_print_im = [] for layer in net_finetuned.layers: trainable_l = layer.trainable name_l = layer.name print(name_l,trainable_l) if trainable_l and (name_l in trainable_layers_name): layer_considered_for_print_im += [name_l] num_top = 3 list_layer_index_to_print_base_model = [] list_layer_index_to_print = [] #print(layer_considered_for_print_im) for key in dict_layers_argsort.keys(): #print(key) if not(key in layer_considered_for_print_im): continue for k in range(num_top): topk = dict_layers_argsort[key][k] list_layer_index_to_print += [[key,topk]] list_layer_index_to_print_base_model += [[key,topk]] print('list_layer_index_to_print',list_layer_index_to_print) #dict_list_layer_index_to_print_base_model[model_name+suffix] = list_layer_index_to_print_base_model #dict_layers_relative_diff,dict_layers_argsort = CompNet_FT_lucidIm.get_gap_between_weights(list_name_layers,\ # list_weights,model) # For the fine-tuned model !!! path_lucid_model = os.path.join(os.sep,'media','gonthier','HDD2','output_exp','Covdata','Lucid_model') path = path_lucid_model if path=='': os.makedirs('./model', exist_ok=True) path ='model' else: os.makedirs(path, exist_ok=True) frozen_graph = lucid_utils.freeze_session(sess, output_names=[out.op.name for out in net_finetuned.outputs]) name_pb = 'tf_graph_'+constrNet+model_name+'.pb' #nodes_tab = [n.name for n in tf.get_default_graph().as_graph_def().node] #print(nodes_tab) tf.io.write_graph(frozen_graph,logdir= path,name= name_pb, as_text=False) if platform.system()=='Windows': output_path = os.path.join('CompModifModel',constrNet) else: output_path = os.path.join(os.sep,'media','gonthier','HDD2','output_exp','Covdata','CompModifModel',constrNet) pathlib.Path(output_path).mkdir(parents=True, exist_ok=True) matplotlib.use('Agg') output_path_with_model = os.path.join(output_path,model_name) pathlib.Path(output_path_with_model).mkdir(parents=True, exist_ok=True) # global sess # global graph # with graph.as_default(): # set_session(sess) # net_finetuned.predict(np.random.rand(1,224,224,3)) net_finetuned.predict(np.random.rand(1,224,224,3)) lucid_utils.print_images(model_path=path_lucid_model+'/'+name_pb,list_layer_index_to_print=list_layer_index_to_print\ ,path_output=output_path_with_model,prexif_name=model_name,input_name=input_name_lucid,Net=constrNet) # For the original one !!! original_model.predict(np.random.rand(1,224,224,3)) #sess = keras.backend.get_session() #sess.run() frozen_graph = lucid_utils.freeze_session(sess, output_names=[out.op.name for out in original_model.outputs]) name_pb = 'tf_graph_'+constrNet+'PretrainedImageNet.pb' tf.io.write_graph(frozen_graph,logdir= path,name= name_pb, as_text=False) lucid_utils.print_images(model_path=path_lucid_model+'/'+name_pb,list_layer_index_to_print=list_layer_index_to_print\ ,path_output=output_path_with_model,prexif_name=model_name,input_name=input_name_lucid,Net=constrNet)
class GenericModel: @staticmethod def load_from(path): model = GenericModel() model.model = load_model(path) return model def __init__(self): self.model = None self.registered_callbacks = [] self.id = 'generic_model' self.time = round(time()) self.desc = None """config = ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.40 config.gpu_options.allow_growth = True session = InteractiveSession(config=config)""" def build_model(self): img_input = Input(self.get_input_shape()) last_layer = self.model_structure(img_input) self.model = Model(img_input, last_layer) self.model.summary() def compile(self, loss_function, metric_functions=None, optimizer=Adam(1e-3, epsilon=1e-6)): self.require_model_loaded() return self.model.compile(loss=loss_function, optimizer=optimizer, metrics=metric_functions) def model_structure(self, input_img): raise NotImplementedError def get_input_shape(self): raise NotImplementedError def register_std_callbacks(self, tensorboard_logs_folder=None, checkpoint_path=None): self.require_model_loaded() run_id = str(time()) if self.desc is not None: run_id += "_" + self.desc folder_id = os.path.join(self.id, run_id) if tensorboard_logs_folder is not None: self.registered_callbacks.append( TensorBoard(log_dir=os.path.join(tensorboard_logs_folder, folder_id), histogram_freq=0, write_graph=True, write_images=True)) if checkpoint_path is not None: store_path = os.path.join(checkpoint_path, folder_id) if not os.path.exists(store_path): os.makedirs(store_path) store_path = os.path.join( store_path, 'e{epoch:02d}-l{loss:.4f}-v{val_loss:.4f}.ckpt') print("Storing to %s" % store_path) self.registered_callbacks.append( ModelCheckpoint(store_path, monitor='val_loss', verbose=1, period=1, save_best_only=False, mode='min')) def train_with_generator(self, training_data_generator, epochs, steps_per_epoch, validation_data=None): self.model.fit(training_data_generator, use_multiprocessing=True, workers=4, steps_per_epoch=steps_per_epoch, callbacks=self.registered_callbacks, epochs=epochs, verbose=1, **({} if validation_data is None else { "validation_data": validation_data })) def require_model_loaded(self): if self.model is None: raise ValueError("Model is not build yet") def load_weights(self, path): self.require_model_loaded() return self.model.load_weights(path) def predict(self, batch): self.require_model_loaded() return self.model.predict(batch)
optimizer=optimizer) printf("train epoch %d loss %.4f ci %.4f \n" % (epoch, train_loss, train_ci)) printf("validating epoch %s..." % (epoch, )) val_loss, val_ci = loop_dataset(val_inds, optimizer=None) printf("validating epoch %d loss %.4f ci %.4f \n" % (epoch, val_loss, val_ci)) if val_loss <= best_metric: best_metric = val_loss DTAModel.save_weights(os.path.join(chkpt_subdir, "DTA"), ) wait = 0 else: wait += 1 if wait > args.patience: break DTAModel.load_weights(os.path.join(chkpt_subdir, "DTA")) printf("start testing...") test_loss, test_ci = loop_dataset(test_inds, optimizer=None) if test_ci > best_ci: best_ci = test_ci best_loss = test_loss best_it = it printf("CV %d test loss: %.4f ci: %.4f \n" % (it, test_loss, test_ci)) printf("Best iteration in fold-5 CV: %d, Best loss: %.4f, Best CI: %.4f." % (best_it, best_loss, best_ci))
class jyHEDModelV1(jyModelBase): def __init__(self): super(jyHEDModelV1, self).__init__() self.__listLayerName = [] self.__pVisualModel = None def structureModel(self): Inputs = layers.Input(shape=self._inputShape, batch_size=self._iBatchSize) Con1 = layers.Conv2D(64, (3, 3), name='Con1', activation='relu', padding='SAME', input_shape=self._inputShape, strides=1)(Inputs) Con2 = layers.Conv2D(64, (3, 3), name='Con2', activation='relu', padding='SAME', strides=1)(Con1) Side1 = sideBranch(Con2, 1) MaxPooling1 = layers.MaxPooling2D((2, 2), name='MaxPooling1', strides=2, padding='SAME')(Con2) # outputs1 Con3 = layers.Conv2D(128, (3, 3), name='Con3', activation='relu', padding='SAME', strides=1)(MaxPooling1) Con4 = layers.Conv2D(128, (3, 3), name='Con4', activation='relu', padding='SAME', strides=1)(Con3) Side2 = sideBranch(Con4, 2) MaxPooling2 = layers.MaxPooling2D((2, 2), name='MaxPooling2', strides=2, padding='SAME')(Con4) # outputs2 Con5 = layers.Conv2D(256, (3, 3), name='Con5', activation='relu', padding='SAME', strides=1)(MaxPooling2) Con6 = layers.Conv2D(256, (3, 3), name='Con6', activation='relu', padding='SAME', strides=1)(Con5) Con7 = layers.Conv2D(256, (3, 3), name='Con7', activation='relu', padding='SAME', strides=1)(Con6) Side3 = sideBranch(Con7, 4) MaxPooling3 = layers.MaxPooling2D((2, 2), name='MaxPooling3', strides=2, padding='SAME')(Con7) # outputs3 Con8 = layers.Conv2D(512, (3, 3), name='Con8', activation='relu', padding='SAME', strides=1)(MaxPooling3) Con9 = layers.Conv2D(512, (3, 3), name='Con9', activation='relu', padding='SAME', strides=1)(Con8) Con10 = layers.Conv2D(512, (3, 3), name='Con10', activation='relu', padding='SAME', strides=1)(Con9) Side4 = sideBranch(Con10, 8) MaxPooling4 = layers.MaxPooling2D((2, 2), name='MaxPooling4', strides=2, padding='SAME')(Con10) # outputs4 Con11 = layers.Conv2D(512, (3, 3), name='Con11', activation='relu', padding='SAME', strides=1)(MaxPooling4) Con12 = layers.Conv2D(512, (3, 3), name='Con12', activation='relu', padding='SAME', strides=1)(Con11) Con13 = layers.Conv2D(512, (3, 3), name='Con13', activation='relu', padding='SAME', strides=1)(Con12) Side5 = sideBranch(Con13, 16) Fuse = layers.Concatenate(axis=-1)([Side1, Side2, Side3, Side4, Side5]) # learn fusion weight Fuse = layers.Conv2D(1, (1, 1), name='Fuse', padding='SAME', use_bias=False, activation=None)(Fuse) output1 = layers.Activation('sigmoid', name='output1')(Side1) output2 = layers.Activation('sigmoid', name='output2')(Side2) output3 = layers.Activation('sigmoid', name='output3')(Side3) output4 = layers.Activation('sigmoid', name='output4')(Side4) output5 = layers.Activation('sigmoid', name='output5')(Side5) output6 = layers.Activation('sigmoid', name='output6')(Fuse) outputs = [output1, output2, output3, output4, output5, output6] self._pModel = Model(inputs=Inputs, outputs=outputs) pAdam = optimizers.adam(lr=0.0001) self._pModel.compile(loss={ 'output6': classBalancedSigmoidCrossEntropy }, optimizer=pAdam) # self._pModel.summary() def startTrain(self, listDS, iMaxLen, iBatchSize): itrTrain = tf.compat.v1.data.make_one_shot_iterator(listDS[0]) itrValid = tf.compat.v1.data.make_one_shot_iterator(listDS[1]) iStepsPerEpochTrain = int(iMaxLen[0] / iBatchSize[0]) iStepsPerEpochValid = int(iMaxLen[1] / iBatchSize[1]) self._pModel.fit(itrTrain, validation_data=itrValid, epochs=self._iEpochs, callbacks=[self._pSaveModel, self._pTensorboard], steps_per_epoch=iStepsPerEpochTrain, validation_steps=iStepsPerEpochValid) def loadWeights(self, strPath): # last = tf.train.latest_checkpoint(strPath) # checkPoint = tf.train.load_checkpoint(strPath) self._pModel.load_weights(strPath) # visual model outputs = [] for myLayer in self._pModel.layers: self.__listLayerName.append(myLayer.name) outputs.append(myLayer.output) # print(self.__pModel.layers[0]) # self.__pVisualModel = Model(self.__pModel.inputs, outputs=outputs) self.__pVisualModel = Model(self._pModel.inputs, outputs=self._pModel.outputs) return self.__pVisualModel def predict(self, IMG): # pImage = open(IMG, 'rb').read() # tensorIMG = tf.image.decode_jpeg(pImage) pIMG = image.array_to_img(IMG)# .resize((256, 144)) tensorIMG = image.img_to_array(pIMG) x = np.array(tensorIMG / 255.0) # show image iColumn = 4 # generate window plt.figure(num='Input') # plt.subplot(1, 1, 1) plt.imshow(x) # imagetest = x x = np.expand_dims(x, axis=0) # pyplot.imshow(x) time1 = datetime.datetime.now() outputs = self.__pVisualModel.predict(x) time2 = datetime.datetime.now() print(time2 - time1) i = 100 listOutput = [] for i in range(len(outputs)): outputShape = outputs[i].shape singleOut = outputs[i].reshape(outputShape[1], outputShape[2], outputShape[3]) # singleOut *= 255 listOutput.append(singleOut) singleOut = listOutput[-1] singleOut[singleOut > 0.5] = 1 listOutput[-1] = singleOut return listOutput ''' for output in outputs: # plt.figure(num='%s' % str(i)) outputShape = output.shape singleOut = output.reshape(outputShape[1], outputShape[2], outputShape[3]) singleOut *= 255 if outputShape[3] == 1: # test = x - output # test = np.abs(test) # return mysum # plt.subplot(1, 1, 1) # plt.imshow(singleOut, camp='gray') # cv2.imwrite('D:\wyc\Projects\TrainDataSet\HED\Result/%s.jpg' % str(i), singleOut) return singleOut # i += 1 # plt.show() ''' def getModelConfig(self): return self._iBatchSize
class SSD(BaseSSD): def __init__(self, aspect_ratios=None, image_size=None): self.model = None if aspect_ratios is None: aspect_ratios = [1.] self.aspect_ratios = aspect_ratios self.num_boxes = len(aspect_ratios) + 1 if 1. in aspect_ratios else 0 self.create_head_layers() self.build(input_shape=(image_size, image_size, 3)) def build(self, input_shape): input_tensor = Input(shape=input_shape) conv11 = CRelu(kernel_size=7, filters=16, strides=2, name='conv1_1')(input_tensor) pool11 = MaxPooling2D(pool_size=3, strides=2, padding='same', name='pool1_1')(conv11) conv21 = ResidualCRelu(params="3 1 PJ 32-24-128 NO", name='conv2_1')(pool11) conv22 = ResidualCRelu(params="3 1 NO 32-24-128 BN", name='conv2_2')(conv21) conv23 = ResidualCRelu(params="3 1 NO 32-24-128 BN", name='conv2_3')(conv22) conv31 = ResidualCRelu(params="3 2 PJ 64-48-128 BN", name='conv3_1')(conv23) conv32 = ResidualCRelu(params="3 1 NO 64-48-128 BN", name='conv3_2')(conv31) conv33 = ResidualCRelu(params="3 1 PJ 64-48-192 BN", name='conv3_3')(conv32) conv34 = ResidualCRelu(params="3 1 NO 64-48-192 BN", name='conv3_4')(conv33) conv41 = Inception(params="2 PJ 64 64-128 32-48-48 256", name='conv4_1')(conv34) conv42 = Inception(params="1 NO 64 64-128 32-48-48 256", name='conv4_2')(conv41) conv43 = Inception(params="1 NO 64 64-128 32-48-48 256", name='conv4_3')(conv42) conv44 = Inception(params="1 NO 64 64-128 32-48-48 256", name='conv4_4')(conv43) conv51 = Inception(params="2 PJ 64 96-192 32-64-64 384", name='conv5_1')(conv44) conv52 = Inception(params="1 NO 64 96-192 32-64-64 384", name='conv5_2')(conv51) conv53 = Inception(params="1 NO 64 96-192 32-64-64 384", name='conv5_3')(conv52) conv54 = Inception(params="1 NO 64 96-192 32-64-64 384", name='conv5_4_pre')(conv53) conv54 = BatchNormalization(scale=False, name='conv5_4_bn')(conv54) conv54 = ReLU(name='conv5_4')(conv54) downscale = MaxPooling2D(pool_size=3, strides=2, padding='same', name='downscale')(conv34) upscale = tf.keras.layers.UpSampling2D(interpolation='bilinear', name='upscale')(conv54) concat = Concatenate(name='concat')([downscale, conv44, upscale]) final = conv(filters=768, strides=1, kernel_size=1, activation='relu', name='pva_final')(concat) # extra feature map layers extra1 = ConvBn(256, 1, name='extra1_shrink')(final) extra1 = ConvBn(512, 3, strides=2, padding='same', name='extra1')(extra1) extra2 = ConvBn(128, 1, name='extra2_shrink')(extra1) extra2 = ConvBn(256, 3, strides=2, padding='same', name='extra2')(extra2) extra3 = ConvBn(128, 1, name='extra3_shrink')(extra2) extra3 = ConvBn(256, 3, name='extra3')(extra3) extra4 = ConvBn(128, 1, name='extra4_shrink')(extra3) extra4 = ConvBn(256, 3, name='extra4')(extra4) extra5 = ConvBn(128, 1, name='extra5_shrink')(extra4) extra5 = ConvBn(256, 4, name='extra5')(extra5) feature_maps = [conv34, final, extra1, extra2, extra3, extra4, extra5] confs, locs, anchors = [], [], [] for i in range(len(feature_maps)): map = feature_maps[i] conf = self.conf_layers[i](map) loc = self.loc_layers[i](map) anchor = self.anchor_layers[i](map) confs.append(conf) locs.append(loc) anchors.append(anchor) confs_reshaped = [Reshape((-1, 1))(conf) for conf in confs] locs_reshaped = [Reshape((-1, 4))(loc) for loc in locs] anchors_reshaped = [Reshape((-1, 4))(db) for db in anchors] conf_concat = Concatenate(axis=1, name='scores')(confs_reshaped) loc_concat = Concatenate(axis=1, name='offsets')(locs_reshaped) anchor_concat = Concatenate(axis=1, name='default_boxes')(anchors_reshaped) self.model = Model(input_tensor, [conf_concat, loc_concat, anchor_concat], name='ssd_pvanet') def init_pvanet(self, path): self.model.load_weights(path, by_name=True)
merged_layers = Dense(1024, activation='selu')(merged_layers) output = Dense(12, kernel_initializer='normal', activation='linear')(merged_layers) model = Model(inputs=[image_a, image_b, image_fov], outputs=output) model.compile(optimizer=tf.keras.optimizers.Adam(0.00005, decay=0.00001), loss=custom_objective, metrics=[ loss_in_cm, loss_in_radian, loss_in_cm_x, loss_in_cm_y, loss_in_cm_z ]) model.summary() if os.path.isfile(SAVED_MODEL_W): model.load_weights(SAVED_MODEL_W) print('weights are loaded') # ============================================================================ # --- Train and print accuracy ----------------------------------------------- # ---------------------------------------------------------------------------- callback = TensorBoard('./logs') callback.set_model(model) train_names = [ 'train_loss', 'train_loss_in_cm', 'train_loss_in_radian', 'train_loss_in_cm_x', 'train_loss_in_cm_y', 'train_loss_in_cm_z' ] val_names = [ 'val_loss', 'val_loss_in_cm', 'val_loss_in_radian', 'val_loss_in_cm_x', 'val_loss_in_cm_y', 'val_loss_in_cm_z'
class FusionModel: def __init__(self, config, load_weight_path=None, ab_loss='mse'): img_shape = (config.IMAGE_SIZE, config.IMAGE_SIZE) # Creating generator and discriminator optimizer = Adam(0.00002, 0.5) self.foreground_generator = instance_network(img_shape) self.fusion_discriminator = discriminator_network(img_shape) self.fusion_discriminator.compile(loss=wasserstein_loss_dummy, optimizer=optimizer) self.fusion_generator = fusion_network(img_shape, config.BATCH_SIZE) self.fusion_generator.compile(loss=[ab_loss, 'kld'], optimizer=optimizer) if load_weight_path: chroma_gan = load_model(load_weight_path) chroma_gan_layers = [layer.name for layer in chroma_gan.layers] print('Loading chroma GAN parameter to instance network...') instance_layer_names = [ layer.name for layer in self.foreground_generator.layers ] for i, layer in enumerate(instance_layer_names): if layer == 'fg_model_3': print('model 3 skip') continue if len(layer) < 2: continue if layer[:3] == 'fg_': try: j = chroma_gan_layers.index(layer[3:]) self.foreground_generator.layers[i].set_weights( chroma_gan.layers[j].get_weights()) print(f'Successfully set weights for layer {layer}') except ValueError: print(f'Layer {layer} not found in chroma gan.') except Exception as e: print(e) print('Loading chroma GAN parameter to fusion network...') fusion_layer_names = [ layer.name for layer in self.fusion_generator.layers ] for i, layer in enumerate(fusion_layer_names): if layer == 'model_3': print('model 3 skip') continue try: j = chroma_gan_layers.index(layer) self.fusion_generator.layers[i].set_weights( chroma_gan.layers[j].get_weights()) print(f'Successfully set weights for layer {layer}') except ValueError: print(f'Layer {layer} not found in chroma gan.') except Exception as e: print(e) # Fg=instance prediction fg_img_l = Input(shape=(*img_shape, 1, MAX_INSTANCES)) # self.foreground_generator.trainable = False fg_model_3, fg_conv2d_11, fg_conv2d_13, fg_conv2d_15, fg_conv2d_17 = self.foreground_generator( fg_img_l) # Fusion prediction fusion_img_l = Input(shape=(*img_shape, 1)) fusion_img_real_ab = Input(shape=(*img_shape, 2)) fg_bbox = Input(shape=(4, MAX_INSTANCES)) fg_mask = Input(shape=(*img_shape, MAX_INSTANCES)) self.fusion_generator.trainable = False fusion_img_pred_ab, fusion_class_vec = self.fusion_generator([ fusion_img_l, fg_model_3, fg_conv2d_11, fg_conv2d_13, fg_conv2d_15, fg_conv2d_17, fg_bbox, fg_mask ]) dis_pred_ab = self.fusion_discriminator( [fusion_img_pred_ab, fusion_img_l]) dis_real_ab = self.fusion_discriminator( [fusion_img_real_ab, fusion_img_l]) # Sample the gradient penalty img_ab_interp_samples = RandomWeightedAverage()( [fusion_img_pred_ab, fusion_img_real_ab]) dis_interp_ab = self.fusion_discriminator( [img_ab_interp_samples, fusion_img_l]) partial_gp_loss = partial( gradient_penalty_loss, averaged_samples=img_ab_interp_samples, gradient_penalty_weight=GRADIENT_PENALTY_WEIGHT) partial_gp_loss.__name__ = 'gradient_penalty' # Compile D and G as well as combined self.discriminator_model = Model( inputs=[ fusion_img_l, fusion_img_real_ab, fg_img_l, fg_bbox, fg_mask ], outputs=[dis_real_ab, dis_pred_ab, dis_interp_ab]) self.discriminator_model.compile(optimizer=optimizer, loss=[ wasserstein_loss_dummy, wasserstein_loss_dummy, partial_gp_loss ], loss_weights=[-1.0, 1.0, 1.0]) self.fusion_generator.trainable = True self.fusion_discriminator.trainable = False self.combined = Model( inputs=[fusion_img_l, fg_img_l, fg_bbox, fg_mask], outputs=[fusion_img_pred_ab, fusion_class_vec, dis_pred_ab]) self.combined.compile(loss=[ab_loss, 'kld', wasserstein_loss_dummy], loss_weights=[1.0, 0.003, -0.1], optimizer=optimizer) # Monitor stuff self.callback = TensorBoard(config.LOG_DIR) self.callback.set_model(self.combined) self.train_names = [ 'loss', 'mse_loss', 'kullback_loss', 'wasserstein_loss' ] self.disc_names = ['disc_loss', 'disc_valid', 'disc_fake', 'disc_gp'] self.test_loss_array = [] self.g_loss_array = [] def train(self, data: Data, test_data, log, config, skip_to_after_epoch=None): # Load VGG network VGG_modelF = applications.vgg16.VGG16(weights='imagenet', include_top=True) # Real, Fake and Dummy for Discriminator positive_y = np.ones((data.batch_size, 1), dtype=np.float32) negative_y = -positive_y dummy_y = np.zeros((data.batch_size, 1), dtype=np.float32) # total number of batches in one epoch total_batch = int(data.size / data.batch_size) print(f'batch_size={data.batch_size} * total_batch={total_batch}') save_path = lambda type, epoch: os.path.join( config.MODEL_DIR, f"fusion_{type}Epoch{epoch}.h5") if skip_to_after_epoch: start_epoch = skip_to_after_epoch + 1 print(f"Loading weights from epoch {skip_to_after_epoch}") self.combined.load_weights( save_path("combined", skip_to_after_epoch)) self.fusion_discriminator.load_weights( save_path("discriminator", skip_to_after_epoch)) else: start_epoch = 0 for epoch in range(start_epoch, config.NUM_EPOCHS): for batch in tqdm(range(total_batch)): train_batch = data.generate_batch() resized_l = train_batch.resized_images.l resized_ab = train_batch.resized_images.ab # GT vgg predictVGG = VGG_modelF.predict( np.tile(resized_l, [1, 1, 1, 3])) # train generator g_loss = self.combined.train_on_batch([ resized_l, train_batch.instances.l, train_batch.instances.bbox, train_batch.instances.mask ], [resized_ab, predictVGG, positive_y]) # train discriminator d_loss = self.discriminator_model.train_on_batch([ resized_l, resized_ab, train_batch.instances.l, train_batch.instances.bbox, train_batch.instances.mask ], [positive_y, negative_y, dummy_y]) # update log files write_log(self.callback, self.train_names, g_loss, (epoch * total_batch + batch + 1)) write_log(self.callback, self.disc_names, d_loss, (epoch * total_batch + batch + 1)) if batch % 10 == 0: print( f"[Epoch {epoch}] [Batch {batch}/{total_batch}] [generator loss: {g_loss[0]:08f}] [discriminator loss: {d_loss[0]:08f}]" ) print('Saving models...') self.combined.save(save_path("combined", epoch)) self.fusion_discriminator.save(save_path("discriminator", epoch)) print('Models saved.') print('Sampling test images...') # sample images after each epoch self.sample_images(test_data, epoch, config) def sample_images(self, test_data: Data, epoch, config): total_batch = int(ceil(test_data.size / test_data.batch_size)) for _ in range(total_batch): # load test data test_batch = test_data.generate_batch() # predict AB channels fg_model_3, fg_conv2d_11, fg_conv2d_13, fg_conv2d_15, fg_conv2d_17 = self.foreground_generator.predict( test_batch.instances.l) fusion_img_pred_ab, _ = self.fusion_generator.predict([ test_batch.resized_images.l, fg_model_3, fg_conv2d_11, fg_conv2d_13, fg_conv2d_15, fg_conv2d_17, test_batch.instances.bbox, test_batch.instances.mask ]) # print results for i in range(test_data.batch_size): original_full_img = test_batch.images.full[i] height, width, _ = original_full_img.shape pred_ab = cv2.resize( deprocess_float2int(fusion_img_pred_ab[i]), (width, height)) reconstruct_and_save( test_batch.images.l[i], pred_ab, f'epoch{epoch}_{test_batch.file_names[i]}', config)
def knowledge_transfer(current_student: Model, method: Method, loss: Union[LossType, List[LossType]]) -> \ Tuple[Model, History]: """ Performs KT. :param current_student: the student to be used for the current KT method. :param method: the method to be used for the KT. :param loss: the KT loss to be used. :return: Tuple containing a student Keras model and its training History object. """ kt_logging.debug('Configuring student...') weights = None y_train_adapted = y_train_concat y_val_adapted = y_val_concat metrics = {} if method == Method.DISTILLATION: # Adapt student current_student = kd_student_adaptation(current_student, temperature) # Create KT metrics. metrics = generate_supervised_metrics(method) monitoring_metric = 'val_accuracy' elif method == Method.PKT_PLUS_DISTILLATION: # Adapt student current_student = pkt_plus_kd_student_adaptation(current_student, temperature) # Create importance weights for the different losses. weights = [kd_importance_weight, pkt_importance_weight] if selective_learning: selective_learning_weights = [] for _ in range(n_submodels): selective_learning_weights.extend(weights) weights = selective_learning_weights # Adapt the labels. y_train_adapted.extend(y_train_adapted) y_val_adapted.extend(y_val_adapted) else: # Adapt the labels. y_train_adapted = [y_train_concat, y_train_concat] y_val_adapted = [y_val_concat, y_val_concat] # Create KT metrics. metrics = generate_supervised_metrics(method) monitoring_metric = 'val_concatenate_accuracy' else: # PKT performs KT, but also rotates the space, thus evaluating results has no meaning, # since the neurons representing the classes are not the same anymore. monitoring_metric = 'val_loss' if selective_learning: current_student = selective_learning_student_adaptation(current_student, n_submodels) monitoring_metric = 'val_loss' # Create optimizer. optimizer = initialize_optimizer(optimizer_name, learning_rate, decay, beta1, beta2, rho, momentum, clip_norm, clip_value) # Compile student. current_student.compile(optimizer, loss, metrics, weights) # Initialize callbacks list. kt_logging.debug('Initializing Callbacks...') # Create a temp file, in order to save the model, if needed. tmp_weights_path = None if use_best_model: tmp_weights_path = join(gettempdir(), next(mktemp()) + '.h5') callbacks_list = init_callbacks(monitoring_metric, lr_patience, lr_decay, lr_min, early_stopping_patience, verbosity, tmp_weights_path, selective_learning) # Train student. history = current_student.fit(x_train, y_train_adapted, batch_size=batch_size, callbacks=callbacks_list, epochs=epochs, validation_data=(x_val, y_val_adapted), verbose=verbosity) if exists(tmp_weights_path): # Load best weights and delete the temp file. current_student.load_weights(tmp_weights_path) remove(tmp_weights_path) # Rewind student to its normal state, if necessary. if selective_learning: current_student = selective_learning_student_rewind(current_student, optimizer=optimizer, loss=loss[0], metrics=metrics) if method == Method.DISTILLATION: current_student = kd_student_rewind(current_student) elif method == Method.PKT_PLUS_DISTILLATION: current_student = pkt_plus_kd_rewind(current_student) return current_student, history
class FaceEncoder: def __init__(self, image_size, gf_dim, gfc_dim, sh_dim, tx_dim, co_dim, m_dim, il_dim, ep_dim): self.image_size = image_size self.bn_axis = 3 self.gf_dim = gf_dim self.gfc_dim = gfc_dim self.m_dim = m_dim self.il_dim = il_dim self.sh_dim = sh_dim self.tx_dim = tx_dim self.co_dim = co_dim self.ep_dim = ep_dim def build(self): inputs = Input(shape=[self.image_size, self.image_size, 3], name='image_input') x = self.get_encoder(inputs=inputs, is_reuse=False, is_training=True) self.model = Model(inputs=inputs, outputs=x, name='FaceEncoder') def __call__(self, inputs, training=False): return self.model(inputs=inputs, training=training) def summary(self): print(self.model.summary()) def load_pretrained(self, weights_path): self.model.load_weights(filepath=weights_path) def get_encoder(self, inputs, is_reuse=False, is_training=True): if not is_reuse: self.g_bn0_0 = BatchNormalization(axis=self.bn_axis, name='g_k_bn0_0', scale=True, fused=True) self.g_bn0_1 = BatchNormalization(axis=self.bn_axis, name='g_k_bn0_1', scale=True, fused=True) self.g_bn0_2 = BatchNormalization(axis=self.bn_axis, name='g_k_bn0_2', scale=True, fused=True) self.g_bn0_3 = BatchNormalization(axis=self.bn_axis, name='g_k_bn0_3', scale=True, fused=True) self.g_bn1_0 = BatchNormalization(axis=self.bn_axis, name='g_k_bn1_0', scale=True, fused=True) self.g_bn1_1 = BatchNormalization(axis=self.bn_axis, name='g_k_bn1_1', scale=True, fused=True) self.g_bn1_2 = BatchNormalization(axis=self.bn_axis, name='g_k_bn1_2', scale=True, fused=True) self.g_bn1_3 = BatchNormalization(axis=self.bn_axis, name='g_k_bn1_3', scale=True, fused=True) self.g_bn2_0 = BatchNormalization(axis=self.bn_axis, name='g_k_bn2_0', scale=True, fused=True) self.g_bn2_1 = BatchNormalization(axis=self.bn_axis, name='g_k_bn2_1', scale=True, fused=True) self.g_bn2_2 = BatchNormalization(axis=self.bn_axis, name='g_k_bn2_2', scale=True, fused=True) self.g_bn2_3 = BatchNormalization(axis=self.bn_axis, name='g_k_bn2_3', scale=True, fused=True) self.g_bn3_0 = BatchNormalization(axis=self.bn_axis, name='g_k_bn3_0', scale=True, fused=True) self.g_bn3_1 = BatchNormalization(axis=self.bn_axis, name='g_k_bn3_1', scale=True, fused=True) self.g_bn3_2 = BatchNormalization(axis=self.bn_axis, name='g_k_bn3_2', scale=True, fused=True) self.g_bn3_3 = BatchNormalization(axis=self.bn_axis, name='g_k_bn3_3', scale=True, fused=True) self.g_bn4_0 = BatchNormalization(axis=self.bn_axis, name='g_k_bn4_0', scale=True, fused=True) self.g_bn4_1 = BatchNormalization(axis=self.bn_axis, name='g_k_bn4_1', scale=True, fused=True) self.g_bn4_2 = BatchNormalization(axis=self.bn_axis, name='g_k_bn4_2', scale=True, fused=True) self.g_bn4_c = BatchNormalization(axis=self.bn_axis, name='g_h_bn4_c', scale=True, fused=True) self.g_bn5 = BatchNormalization(axis=self.bn_axis, name='g_k_bn5', scale=True, fused=True) self.g_bn5_m = BatchNormalization(axis=self.bn_axis, name='g_k_bn5_m', scale=True, fused=True) self.g_bn5_ill = BatchNormalization(axis=self.bn_axis, name='g_k_bn5_ill', scale=True, fused=True) self.g_bn5_shape = BatchNormalization(axis=self.bn_axis, name='g_k_bn5_shape', scale=True, fused=True) self.g_bn5_col = BatchNormalization(axis=self.bn_axis, name='g_k_bn5_col', scale=True, fused=True) self.g_bn5_exp = BatchNormalization(axis=self.bn_axis, name='g_k_bn5_exo', scale=True, fused=True) self.g_bn5_tex = BatchNormalization(axis=self.bn_axis, name='g_k_bn5_tex', scale=True, fused=True) # inputs are of size 224 x 224 x 3 k0_1 = elu( self.g_bn0_1(Conv2D(self.gf_dim * 1, (7, 7), (2, 2), padding='SAME', use_bias=False, name='g_k01_conv')(inputs), training=is_training)) k0_2 = elu( self.g_bn0_2(Conv2D(self.gf_dim * 2, (3, 3), (1, 1), padding='SAME', use_bias=False, name='g_k02_conv')(k0_1), training=is_training)) k1_0 = elu( self.g_bn1_0(Conv2D(self.gf_dim * 2, (3, 3), (2, 2), padding='SAME', use_bias=False, name='g_k10_conv')(k0_2), training=is_training)) k1_1 = elu( self.g_bn1_1(Conv2D(self.gf_dim * 2, (3, 3), (1, 1), padding='SAME', use_bias=False, name='g_k11_conv')(k1_0), training=is_training)) k1_2 = elu( self.g_bn1_2(Conv2D(self.gf_dim * 4, (3, 3), (1, 1), padding='SAME', use_bias=False, name='g_k12_conv')(k1_1), training=is_training)) k2_0 = elu( self.g_bn2_0(Conv2D(self.gf_dim * 4, (3, 3), (2, 2), padding='SAME', use_bias=False, name='g_k20_conv')(k1_2), training=is_training)) k2_1 = elu( self.g_bn2_1(Conv2D(self.gf_dim * 3, (3, 3), (1, 1), padding='SAME', use_bias=False, name='g_k21_conv')(k2_0), training=is_training)) k2_2 = elu( self.g_bn2_2(Conv2D(self.gf_dim * 6, (3, 3), (1, 1), padding='SAME', use_bias=False, name='g_k22_conv')(k2_1), training=is_training)) k3_0 = elu( self.g_bn3_0(Conv2D(self.gf_dim * 6, (3, 3), (2, 2), padding='SAME', use_bias=False, name='g_k30_conv')(k2_2), training=is_training)) k3_1 = elu( self.g_bn3_1(Conv2D(self.gf_dim * 4, (3, 3), (1, 1), padding='SAME', use_bias=False, name='g_k31_conv')(k3_0), training=is_training)) k3_2 = elu( self.g_bn3_2(Conv2D(self.gf_dim * 8, (3, 3), (1, 1), padding='SAME', use_bias=False, name='g_k32_conv')(k3_1), training=is_training)) k4_0 = elu( self.g_bn4_0(Conv2D(self.gf_dim * 8, (3, 3), (2, 2), padding='SAME', use_bias=False, name='g_k40_conv')(k3_2), training=is_training)) k4_1 = elu( self.g_bn4_1(Conv2D(self.gf_dim * 5, (3, 3), (1, 1), padding='SAME', use_bias=False, name='g_k41_conv')(k4_0), training=is_training)) # Pose k51_m = self.g_bn5_m(Conv2D(int(self.gfc_dim / 8), (3, 3), (1, 1), padding='SAME', use_bias=False, name='g_k5_m_conv')(k4_1), training=is_training) k51_shape_ = k51_m.shape k52_m = AveragePooling2D(pool_size=[k51_shape_[1], k51_shape_[2]], strides=[1, 1], padding='VALID')(k51_m) k52_m = tf.reshape(k52_m, [-1, int(self.gfc_dim / 8)]) k6_m = Dense(self.m_dim, name='g_k6_m_lin')(k52_m) # Illumination k51_ill = self.g_bn5_ill(Conv2D(int(self.gfc_dim / 8), (3, 3), (1, 1), padding='SAME', name='g_k5_il_conv')(k4_1), training=is_training) k52_ill = AveragePooling2D(pool_size=[k51_shape_[1], k51_shape_[2]], strides=[1, 1], padding='VALID')(k51_ill) k52_ill = tf.reshape(k52_ill, [-1, int(self.gfc_dim / 8)]) k6_ill = Dense(self.il_dim, name='g_k6_ill_lin')(k52_ill) # Shape k51_shape = self.g_bn5_shape(Conv2D(self.sh_dim, (3, 3), (1, 1), padding='SAME', name='g_k5_shape_conv')(k4_1), training=is_training) k52_shape = AveragePooling2D(pool_size=[k51_shape_[1], k51_shape_[2]], strides=[1, 1], padding='VALID')(k51_shape) k52_shape = tf.reshape(k52_shape, [-1, self.sh_dim]) # Texture k51_tex = self.g_bn5_tex(Conv2D(self.tx_dim, (3, 3), (1, 1), padding='SAME', name='g_k5_tex_conv')(k4_1), training=is_training) k52_tex = AveragePooling2D(pool_size=[k51_shape_[1], k51_shape_[2]], strides=[1, 1], padding='VALID')(k51_tex) k52_tex = tf.reshape(k52_tex, [-1, self.tx_dim]) # Expression k51_exp = self.g_bn5_exp(Conv2D(self.ep_dim, (3, 3), (1, 1), padding='SAME', name='g_k5_exp_conv')(k4_1), training=is_training) k52_exp = AveragePooling2D(pool_size=[k51_shape_[1], k51_shape_[2]], strides=[1, 1], padding='VALID')(k51_exp) k52_exp = tf.reshape(k52_exp, [-1, self.ep_dim]) # Color k51_col = self.g_bn5_col(Conv2D(int(self.gfc_dim / 8), (3, 3), (1, 1), padding='SAME', name='g_k5_col_conv')(k4_1), training=is_training) k52_col = AveragePooling2D(pool_size=[k51_shape_[1], k51_shape_[2]], strides=[1, 1], padding='VALID')(k51_col) k52_col = tf.reshape(k52_col, [-1, int(self.gfc_dim / 8)]) k6_col = Dense(self.co_dim, name='g_k6_col_lin')(k52_col) return k52_shape, k52_tex, k52_exp, k6_m, k6_ill, k6_col
def csp_darknet53(input_shape=None, input_tensor=None, include_top=True, weights='imagenet', pooling=None, classes=1000, **kwargs): """Generate cspdarknet53 model for Imagenet classification.""" if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=28, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: img_input = input_tensor x = csp_darknet53_body(img_input) if include_top: model_name = 'cspdarknet53' x = GlobalAveragePooling2D(name='avg_pool')(x) x = Reshape((1, 1, 1024))(x) x = DarknetConv2D(classes, (1, 1))(x) x = Flatten()(x) x = Softmax(name='Predictions/Softmax')(x) else: model_name = 'cspdarknet53_headless' if pooling == 'avg': x = GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name=model_name) # Load weights. if weights == 'imagenet': if include_top: file_name = 'cspdarknet53_weights_tf_dim_ordering_tf_kernels_224.h5' weight_path = BASE_WEIGHT_PATH + file_name else: file_name = 'cspdarknet53_weights_tf_dim_ordering_tf_kernels_224_no_top.h5' weight_path = BASE_WEIGHT_PATH + file_name weights_path = get_file(file_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
class jyHEDModelV2_2_SGD_GradientTape_L1(jyModelBase): def __init__(self): super(jyHEDModelV2_2_SGD_GradientTape_L1, self).__init__() self.__listLayerName = [] self.__pVisualModel = None self.__bLoadModel = False self.__pTrainFW = tf.summary.create_file_writer(self._strLogPath + '/train') self.__pValidFW = tf.summary.create_file_writer(self._strLogPath + '/valid') self.__pMetricsFW = tf.summary.create_file_writer(self._strLogPath + '/metrics') def structureModel(self): weightDecay = 0.00001 Inputs = layers.Input(shape=self._inputShape, batch_size=self._iBatchSize) Con1 = layers.Conv2D(64, (3, 3), name='Con1', activation='relu', padding='SAME', input_shape=self._inputShape, strides=1, kernel_regularizer=l2(weightDecay))(Inputs) Con2 = layers.Conv2D(64, (3, 3), name='Con2', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(Con1) Side1 = sideBranch(Con2, 1) MaxPooling1 = layers.MaxPooling2D((2, 2), name='MaxPooling1', strides=2, padding='SAME')(Con2) # outputs1 Con3 = layers.Conv2D(128, (3, 3), name='Con3', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(MaxPooling1) Con4 = layers.Conv2D(128, (3, 3), name='Con4', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(Con3) Side2 = sideBranch(Con4, 2) MaxPooling2 = layers.MaxPooling2D((2, 2), name='MaxPooling2', strides=2, padding='SAME')(Con4) # outputs2 Con5 = layers.Conv2D(256, (3, 3), name='Con5', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(MaxPooling2) Con6 = layers.Conv2D(256, (3, 3), name='Con6', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(Con5) Con7 = layers.Conv2D(256, (3, 3), name='Con7', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(Con6) Side3 = sideBranch(Con7, 4) MaxPooling3 = layers.MaxPooling2D((2, 2), name='MaxPooling3', strides=2, padding='SAME')(Con7) # outputs3 Con8 = layers.Conv2D(512, (3, 3), name='Con8', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(MaxPooling3) Con9 = layers.Conv2D(512, (3, 3), name='Con9', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(Con8) Con10 = layers.Conv2D(512, (3, 3), name='Con10', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(Con9) Side4 = sideBranch(Con10, 8) MaxPooling4 = layers.MaxPooling2D((2, 2), name='MaxPooling4', strides=2, padding='SAME')(Con10) # outputs4 Con11 = layers.Conv2D(512, (3, 3), name='Con11', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(MaxPooling4) Con12 = layers.Conv2D(512, (3, 3), name='Con12', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(Con11) Con13 = layers.Conv2D(512, (3, 3), name='Con13', activation='relu', padding='SAME', strides=1, kernel_regularizer=l2(weightDecay))(Con12) Side5 = sideBranch(Con13, 16) Fuse = layers.Concatenate(axis=-1)([Side1, Side2, Side3, Side4, Side5]) # learn fusion weight fuseInitWeight = initializers.constant(0.2) Fuse = layers.Conv2D(1, (1, 1), name='Fuse', padding='SAME', use_bias=False, activation=None, kernel_initializer=fuseInitWeight, kernel_regularizer=l1(weightDecay))(Fuse) # output1 = layers.Activation('sigmoid', name='output1')(Side1) # output2 = layers.Activation('sigmoid', name='output2')(Side2) # output3 = layers.Activation('sigmoid', name='output3')(Side3) # output4 = layers.Activation('sigmoid', name='output4')(Side4) # output5 = layers.Activation('sigmoid', name='output5')(Side5) output6 = layers.Activation('sigmoid', name='output6')(Fuse) outputs = [output6 ] # [output1, output2, output3, output4, output5, output6] self._pModel = Model(inputs=Inputs, outputs=outputs) pOptimizer = optimizers.adam(lr=0.0001) pOptimizer = optimizers.SGD(lr=0.000001, decay=0., momentum=0.9) pOptimizer = tf.optimizers.SGD(lr=0.5, decay=0., momentum=0.9) # pOptimizer = monitorSGD(lr=0.000001, decay=0., momentum=0.9) # grads = tf.gradients(classBalancedSigmoidCrossEntropy, self._pModel.trainable_weights) # pSGD = optimizers.SGD() self._pModel.compile( loss={ # 'output1': classBalancedSigmoidCrossEntropy, # 'output2': classBalancedSigmoidCrossEntropy, # 'output3': classBalancedSigmoidCrossEntropy, # 'output4': classBalancedSigmoidCrossEntropy, # 'output5': classBalancedSigmoidCrossEntropy, 'output6': classBalancedSigmoidCrossEntropy }, optimizer=pOptimizer) # self._pModel.summary() def startTrain(self, listDS, iMaxLen, iBatchSize): ''' itrTrain = tf.compat.v1.data.make_one_shot_iterator(listDS[0]) itrValid = tf.compat.v1.data.make_one_shot_iterator(listDS[1]) iStepsPerEpochTrain = int(iMaxLen[0] / iBatchSize[0]) iStepsPerEpochValid = int(iMaxLen[1] / iBatchSize[1]) pBack = myCallback(self._strLogPath) self._pModel.fit(itrTrain, validation_data=itrValid, epochs=self._iEpochs, callbacks=[self._pSaveModel, self._pTensorboard, pBack], steps_per_epoch=iStepsPerEpochTrain, validation_steps=iStepsPerEpochValid) ''' itrTrain = tf.compat.v1.data.make_one_shot_iterator(listDS[0]) itrValid = tf.compat.v1.data.make_one_shot_iterator(listDS[1]) iStepsPerEpochTrain = int(iMaxLen[0] / iBatchSize[0]) iStepsPerEpochValid = int(iMaxLen[1] / iBatchSize[1]) # trainLoss = tf.keras.metrics.Mean(name='train_loss') dictLossGroup = self._pModel.loss # t = self._pModel.layers[23].losses # t = self._pModel.weights[0].name # p = self._pModel.loss iTick = 0 # epoch for epoch in range(self._iEpochs): # save model if iTick > self._iPeriod: strModelFileName = self._strModelFileName.format(epoch=epoch + 1) filepath = self._strSavePath + strModelFileName print(self._strFormat % ('Epoch: %s/%s, SaveModel: %s' % (str(epoch), str(self._iEpochs), strModelFileName))) self._pModel.save_weights(filepath, overwrite=True) iTick = 0 iTick += 1 # stepsPerEpoch for stepsPerEpoch in range(iStepsPerEpochTrain): with tf.GradientTape() as tape: itr = itrTrain.next() # output define as [out1, out2, ....., out6] listPredict = [self._pModel(itr[0])] t = self._pModel.weights listLabel = [itr[1]] listLoss = [] fAllLoss = 0. template = 'Per: {}/{}, TrainLoss: {} -- ' i = 0 # multiple output, calculate loss for key in dictLossGroup: # loss function pLoss = dictLossGroup[key] # add regularize regularization_loss = tf.math.add_n( self._pModel.losses) # pLoss += tf.add_n # loss value outputLoss = pLoss( listLabel[i], listPredict[i]) + regularization_loss listLoss.append(outputLoss) # sum of loss fAllLoss += outputLoss # print format template += 'train_loss_%s: {} -- ' % key i += 1 # calculate gradient gradient = tape.gradient(fAllLoss, self._pModel.trainable_weights) # trainLoss(fAllLoss) template += '\n' print( template.format(stepsPerEpoch + 1, iStepsPerEpochTrain, fAllLoss, listLoss[0])) # backprop self._pModel.optimizer.apply_gradients( zip(gradient, self._pModel.trainable_weights)) # 每执行完一个train epoch 进行validcross 因此valid计算不能与train同步进行要在train epoch结束后进行 fValidAllLoss = 0. listValidLoss = list(0 for n in range(len(dictLossGroup))) for stepsPerEpochValid in range(iStepsPerEpochValid): itr2 = itrValid.next() listPreValid = [self._pModel(itr2[0])] listValidLabel = [itr2[1]] i = 0 for key in dictLossGroup: # loss function pLoss = dictLossGroup[key] # loss value outputValidLoss = pLoss(listValidLabel[i], listPreValid[i]) listValidLoss[i] += outputValidLoss # sum of loss fValidAllLoss += outputValidLoss # print format # template += ' --train_loss_%s: {}-- ' % key i += 1 # mean of val_loss fValidAllLoss /= iStepsPerEpochValid validTemplate = 'Epoch {}, val_loss: {} -- '.format( epoch + 1, fValidAllLoss) for k in range(len(listValidLoss)): listValidLoss[k] /= iStepsPerEpochValid validTemplate += 'val_loss_{}: {} -- '.format( k + 1, listValidLoss[k]) print( '\n-----------------------------------------------------------------------\n' ) print(validTemplate) print( '\n-----------------------------------------------------------------------\n' ) # per epoch output with self.__pTrainFW.as_default(): i = 0 tf.summary.scalar('loss: ', fAllLoss, step=epoch) # tf.summary.scalar('val_loss: ', fValidAllLoss, step=epoch) for key in dictLossGroup: tf.summary.scalar('loss_' + key, listLoss[i], step=epoch) # tf.summary.scalar('val_loss_' + key, listValidLoss[i], step=epoch) i += 1 with self.__pMetricsFW.as_default(): # save gradient each layer pLayerWeight = self._pModel.trainable_weights for i in range(len(pLayerWeight)): strName = pLayerWeight[i].name + '/Grad' tf.summary.histogram(strName, gradient[i], step=epoch) # mean grad meanGrad = tf.reduce_mean(gradient[i]) tf.summary.scalar(strName + '/Mean', meanGrad, step=epoch) # model grad tensorNorm = tf.norm(gradient[i]) tf.summary.scalar(strName + '/Norm', tensorNorm, step=epoch) with self.__pValidFW.as_default(): i = 0 tf.summary.scalar('loss: ', fValidAllLoss, step=epoch) for key in dictLossGroup: tf.summary.scalar('loss_' + key, listValidLoss[i], step=epoch) i += 1 def loadWeights(self, strPath): # last = tf.train.latest_checkpoint(strPath) # checkPoint = tf.train.load_checkpoint(strPath) self._pModel.load_weights(strPath) # w = self._pModel.weights # visual model self.__bLoadModel = True def generateVisualModel(self): outputs = [] for myLayer in self._pModel.layers: self.__listLayerName.append(myLayer.name) outputs.append(myLayer.output) # print(self.__pModel.layers[0]) # self.__pVisualModel = Model(self.__pModel.inputs, outputs=outputs) self.__pVisualModel = Model(self._pModel.inputs, outputs=self._pModel.outputs) return self.__pVisualModel def predict(self, IMG): # pImage = open(IMG, 'rb').read() # tensorIMG = tf.image.decode_jpeg(pImage) pIMG = image.array_to_img(IMG) # .resize((256, 144)) tensorIMG = image.img_to_array(pIMG) x = np.array(tensorIMG / 255.0) # show image iColumn = 4 # generate window plt.figure(num='Input') # plt.subplot(1, 1, 1) plt.imshow(x) # imagetest = x x = np.expand_dims(x, axis=0) # pyplot.imshow(x) time1 = datetime.datetime.now() outputs = self.__pVisualModel.predict(x) time2 = datetime.datetime.now() print(time2 - time1) i = 100 listOutput = [] for i in range(len(outputs)): outputShape = outputs[i].shape singleOut = outputs[i].reshape(outputShape[0], outputShape[1], outputShape[2]) # singleOut *= 255 listOutput.append(singleOut) singleOut = listOutput[-1] singleOut[singleOut > 0.5] = 1 listOutput[-1] = singleOut return listOutput ''' for output in outputs: # plt.figure(num='%s' % str(i)) outputShape = output.shape singleOut = output.reshape(outputShape[1], outputShape[2], outputShape[3]) singleOut *= 255 if outputShape[3] == 1: # test = x - output # test = np.abs(test) # return mysum # plt.subplot(1, 1, 1) # plt.imshow(singleOut, camp='gray') # cv2.imwrite('D:\wyc\Projects\TrainDataSet\HED\Result/%s.jpg' % str(i), singleOut) return singleOut # i += 1 # plt.show() ''' def getModelConfig(self): return self._iBatchSize
class RetroCycleGAN: def __init__(self, save_index="0", save_folder="./", generator_size=32, discriminator_size=64, word_vector_dimensions=300, discriminator_lr=0.0001, generator_lr=0.0001, lambda_cycle=1, lambda_id_weight=0.01, one_way_mm=True, cycle_mm=True, cycle_dis=True, id_loss=True, cycle_mm_w=2, cycle_loss=True): self.cycle_mm = cycle_mm self.cycle_dis = cycle_dis self.cycle_mae = cycle_loss self.id_loss = id_loss self.one_way_mm = one_way_mm self.cycle_mm_w = cycle_mm_w if self.cycle_mm else 0 self.save_folder = save_folder # Input shape self.word_vector_dimensions = word_vector_dimensions self.embeddings_dimensionality = (self.word_vector_dimensions,) # , self.channels) self.save_index = save_index # Number of filters in the first layer of G and D self.gf = generator_size self.df = discriminator_size # Loss weights self.lambda_cycle = lambda_cycle if self.cycle_mae else 0# Cycle-consistency loss self.lambda_id = lambda_id_weight if self.id_loss else 0 # Identity loss d_lr = discriminator_lr self.d_lr = d_lr g_lr = generator_lr self.g_lr = g_lr # cv = clip_value # cn = cn self.d_A = self.build_discriminator(name="word_vector_discriminator") self.d_B = self.build_discriminator(name="retrofitted_word_vector_discriminator") self.d_ABBA = self.build_c_discriminator(name="cycle_cond_discriminator_unfit") self.d_BAAB = self.build_c_discriminator(name="cycle_cond_discriminator_fit") # Best combo sofar SGD, gaussian, dropout,5,0.5 mml(0,5,.5),3x1024gen, 2x1024, no normalization # return Adam(lr,amsgrad=True,decay=1e-8) # ------------------------- # Construct Computational # Graph of Generators # ------------------------- # Build the generators self.g_AB = self.build_generator(name="to_retro_generator") # for layer in self.g_AB.layers: # a = layer.get_weights() # print(a) # self.d_A.summary() # self.g_AB.summary() # plot_model(self.g_AB, show_shapes=True) self.g_BA = self.build_generator(name="from_retro_generator") # self.d_B.summary() # self.g_BA.summary() # Input images from both domains unfit_wv = Input(shape=self.embeddings_dimensionality, name="plain_word_vector") fit_wv = Input(shape=self.embeddings_dimensionality, name="retrofitted_word_vector") # # Translate images to the other domain fake_B = self.g_AB(unfit_wv) fake_A = self.g_BA(fit_wv) # Translate images back to original domain reconstr_A = self.g_BA(fake_B) reconstr_B = self.g_AB(fake_A) print("Building recon model") # self.reconstr = Model(inputs=[unfit_wv,fit_wv],outputs=[reconstr_A,reconstr_B]) print("Done") # Identity mapping of images unfit_wv_id = self.g_BA(unfit_wv) fit_wv_id = self.g_AB(fit_wv) # For the combined model we will only train the generators # Discriminators determines validity of translated images valid_A = self.d_A(fake_A) valid_B = self.d_B(fake_B) # Combined model trains generators to fool discriminators self.d_A.trainable = False self.d_B.trainable = False # self.d_ABBA.trainable = False # self.d_BAAB.trainable = False self.combined = Model(inputs=[unfit_wv, fit_wv], # Model that does A->B->A (left), B->A->B (right) outputs=[valid_A, valid_B, # for the bce calculation reconstr_A, reconstr_B, # for the mae calculation reconstr_A, reconstr_B, # for the max margin calculation unfit_wv_id, fit_wv_id, # dAc_r, dBc_r, # for the conditional discriminator margin calculation # dAc_fake, dBc_fake # for the conditional discriminator margin calculation ], # for the id loss calculation name="combinedmodel") log_path = './logs' callback = keras.callbacks.TensorBoard(log_dir=log_path) callback.set_model(self.combined) self.combined_callback = callback def compile_all(self, optimizer="sgd"): def max_margin_loss(y_true, y_pred): cost = 0 sim_neg = 25 sim_margin = 1 for i in range(0, sim_neg): new_true = tf.random.shuffle(y_true) normalize_a = tf.nn.l2_normalize(y_true) normalize_b = tf.nn.l2_normalize(y_pred) normalize_c = tf.nn.l2_normalize(new_true) minimize = tf.reduce_sum(tf.multiply(normalize_a, normalize_b)) maximize = tf.reduce_sum(tf.multiply(normalize_a, normalize_c)) mg = sim_margin - minimize + maximize # print(mg) cost += tf.keras.backend.clip(mg, 0, 1000) return cost / (sim_neg * 1.0) def create_opt(lr=0.1): if optimizer == "adam": opt = tf.optimizers.Adam(lr=lr, epsilon=1e-10) return opt else: raise KeyError("coULD NOT FIND THE OPTIMIZER") # self.d_A.trainable = True # self.d_B.trainable = True self.d_A.compile(loss='binary_crossentropy', optimizer=create_opt(self.d_lr), metrics=['accuracy']) self.d_ABBA.compile(loss='binary_crossentropy', optimizer=create_opt(self.d_lr), metrics=['accuracy']) self.d_BAAB.compile(loss='binary_crossentropy', optimizer=create_opt(self.d_lr), metrics=['accuracy']) self.d_B.compile(loss='binary_crossentropy', optimizer=create_opt(self.d_lr), metrics=['accuracy']) # self.d_A.trainable = False # self.d_B.trainable = False self.g_AB.compile(loss=max_margin_loss, optimizer=create_opt(self.g_lr), ) self.g_BA.compile(loss=max_margin_loss, optimizer=create_opt(self.g_lr), ) self.combined.compile(loss=['binary_crossentropy', 'binary_crossentropy', 'mae', 'mae', max_margin_loss, max_margin_loss, 'mae', 'mae', ], loss_weights=[1, 1, self.lambda_cycle * 1, self.lambda_cycle * 1, self.cycle_mm_w, self.cycle_mm_w, self.lambda_id, self.lambda_id, # self.lambda_cycle * 1, self.lambda_cycle * 1, # self.lambda_cycle * 1, self.lambda_cycle * 1 ], optimizer=create_opt(self.g_lr)) # self.combined.summary() self.g_AB.summary() self.d_A.summary() self.combined.summary() def build_generator(self, name, hidden_dim=2048): """U-Net Generator""" def dense(layer_input, hidden_dim, normalization=True, dropout=True, dropout_percentage=0.2): d = Dense(hidden_dim, activation="relu")(layer_input) if normalization: d = BatchNormalization()(d) if dropout: d = Dropout(dropout_percentage)(d) return d # Image input inpt = Input(shape=self.embeddings_dimensionality) encoder = dense(inpt, hidden_dim, normalization=False, dropout=True, dropout_percentage=0.2) decoder = dense(encoder, hidden_dim, normalization=False, dropout=True, dropout_percentage=0.2) # +encoder output = Dense(self.word_vector_dimensions)(decoder) return Model(inpt, output, name=name) def build_discriminator(self, name, hidden_dim=2048): def d_layer(layer_input, hidden_dim, normalization=True, dropout=True, dropout_percentage=0.3): """Discriminator layer""" d = Dense(hidden_dim, activation="relu")(layer_input) if normalization: d = BatchNormalization()(d) if dropout: d = Dropout(dropout_percentage)(d) return d inpt = Input(shape=self.embeddings_dimensionality) d1 = d_layer(inpt, hidden_dim, normalization=False, dropout=True, dropout_percentage=0.3) d1 = d_layer(d1, hidden_dim, normalization=True, dropout=True, dropout_percentage=0.3) validity = Dense(1, activation="sigmoid", dtype='float32')(d1) return Model(inpt, validity, name=name) def build_c_discriminator(self, name, hidden_dim=2048): def d_layer(layer_input, hidden_dim, normalization=True, dropout=True, dropout_percentage=0.3): """Discriminator layer""" d = Dense(hidden_dim, activation="relu")(layer_input) if normalization: d = BatchNormalization()(d) if dropout: d = Dropout(dropout_percentage)(d) return d inpt = Input(shape=600) d1 = d_layer(inpt, hidden_dim, normalization=False, dropout=True, dropout_percentage=0.3) d1 = d_layer(d1, hidden_dim, normalization=True, dropout=True, dropout_percentage=0.3) validity = Dense(1, activation="sigmoid", dtype='float32')(d1) return Model(inpt, validity, name=name) def load_weights(self, preface="", folder=None): if folder is None: folder = self.save_folder try: self.g_AB.reset_states() self.g_BA.reset_states() self.combined.reset_states() self.d_B.reset_states() self.d_A.reset_states() self.d_A.load_weights(os.path.join(folder, preface + "fromretrodis.h5")) self.d_B.load_weights(os.path.join(folder, preface + "toretrodis.h5")) self.g_AB.load_weights(os.path.join(folder, preface + "toretrogen.h5")) self.g_BA.load_weights(os.path.join(folder, preface + "fromretrogen.h5")) self.combined.load_weights(os.path.join(folder, preface + "combined_model.h5")) except Exception as e: print(e) def train(self, epochs, dataset, save_folder, name, batch_size=1, cache=False, epochs_per_checkpoint=4, dis_train_amount=3): wandb.init(project="retrogan", dir=save_folder) wandb.run.name = name # wandb.watch(self.g_AB,criterion="simlex") wandb.run.save() self.name = name start_time = datetime.datetime.now() res = [] X_train, Y_train = tools.load_all_words_dataset_final(dataset["original"], dataset["retrofitted"], save_folder=save_folder, cache=cache) print("Shapes of training data:", X_train.shape, Y_train.shape) print(X_train) print(Y_train) print("*" * 100) def load_batch(batch_size=32, always_random=False): def _int_load(): iterable = list(Y_train.index) shuffle(iterable) batches = [] print("Prefetching batches") for ndx in tqdm(range(0, len(iterable), batch_size)): try: ixs = iterable[ndx:min(ndx + batch_size, len(iterable))] if always_random: ixs = list(np.array(iterable)[random.sample(range(0, len(iterable)), batch_size)]) imgs_A = X_train.loc[ixs] imgs_B = Y_train.loc[ixs] if np.isnan(imgs_A).any().any() or np.isnan(imgs_B).any().any(): # np.isnan(imgs_B).any(): # print(ixs) continue batches.append((imgs_A, imgs_B)) except Exception as e: print("Skipping batch") # print(e) return batches batches = _int_load() print("Beginning iteration") for i in tqdm(range(0, len(batches)), ncols=30): imgs_A, imgs_B = batches[i] yield np.array(imgs_A.values, dtype=np.float32), np.array(imgs_B.values, dtype=np.float32) # def load_random_batch(batch_size=32, batch_amount=1000000): # iterable = list(Y_train.index) # # shuffle(iterable) # ixs = list(np.array(iterable)[random.sample(range(0, len(iterable)), batch_size)]) # imgs_A = X_train.loc[ixs] # imgs_B = Y_train.loc[ixs] # def test_nan(a,b): # return np.isnan(a).any().any() or np.isnan(b).any().any() # while True: # if(test_nan(imgs_A,imgs_B)): # ixs = list(np.array(iterable)[random.sample(range(0, len(iterable)), batch_size)]) # imgs_A = X_train.loc[ixs] # imgs_B = Y_train.loc[ixs] # else: # break # return imgs_A, imgs_B # # def exp_decay(epoch): # initial_lrate = 0.1 # k = 0.1 # lrate = initial_lrate * math.exp(-k * epoch) # return lrate # noise = np.random.normal(size=(1, dimensionality), scale=0.001) # noise = np.tile(noise,(batch_size,1)) dis_train_amount = dis_train_amount self.compile_all("adam") # ds = tf.data.Dataset.from_generator(load_batch,(tf.float32,tf.float32),args=(batch_size,)) # ds = ds.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) def train_(training_epochs, always_random=False): global_step = 0 for epoch in range(training_epochs): # noise = np.random.normal(size=(batch_size, dimensionality), scale=0.01) for batch_i, (imgs_A, imgs_B) in enumerate(load_batch(batch_size, always_random=always_random)): global_step += 1 # for batch_i, (imgs_A, imgs_B) in enumerate(ds): # try: # if epoch % 2 == 0: # # print("Adding noise") # imgs_A = np.add(noise[0:imgs_A.shape[0], :], imgs_A) # imgs_B = np.add(noise[0:imgs_B.shape[0], :], imgs_B) # imgs_A = tf.cast(imgs_A, tf.float32) # imgs_B = tf.cast(imgs_B, tf.float32) fake_B = self.g_AB.predict(imgs_A) fake_A = self.g_BA.predict(imgs_B) fake_ABBA = self.g_BA.predict(fake_B) fake_BAAB = self.g_AB.predict(fake_A) # Train the discriminators (original images = real / translated = Fake) dA_loss = None dB_loss = None valid = np.ones((imgs_A.shape[0],)) # *noisy_entries_num,) ) fake = np.zeros((imgs_A.shape[0],)) # *noisy_entries_num,) ) # self.d_A.trainable = True # self.d_B.trainable = True for _ in range(int(dis_train_amount)): # da = self.d_A.evaluate(imgs_A) dA_loss_real = self.d_A.train_on_batch(imgs_A, valid) # daf = self.d_A(fake_A) dA_loss_fake = self.d_A.train_on_batch(fake_A, fake) if dA_loss is None: dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake) else: dA_loss += 0.5 * np.add(dA_loss_real, dA_loss_fake) dB_loss_real = self.d_B.train_on_batch(imgs_B, valid) dB_loss_fake = self.d_B.train_on_batch(fake_B, fake) if dB_loss is None: dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake) else: dB_loss += 0.5 * np.add(dB_loss_real, dB_loss_fake) d_loss = (1.0 / dis_train_amount) * 0.5 * np.add(dA_loss, dB_loss) # self.d_A.trainable = False # self.d_B.trainable = False def CycleCondLoss(d_ground, d_approx): l = tf.math.log(d_ground) + tf.math.log(1 - d_approx) return -1 * tf.reduce_mean(l) # train cycle discriminators d_cycle_dis = 0 g_cycle_dis = 0 if self.cycle_dis: with tf.GradientTape() as tape: dA = self.d_ABBA(tf.concat([fake_B, imgs_A], 1)) dA_r = self.d_ABBA(tf.concat([fake_B, fake_ABBA], 1)) la = CycleCondLoss(dA, dA_r) tga = tape.gradient(la, self.d_ABBA.trainable_variables) self.d_ABBA.optimizer.apply_gradients(zip(tga, self.d_ABBA.trainable_variables)) d_cycle_dis += la with tf.GradientTape() as tape: dB = self.d_BAAB(tf.concat([fake_A, imgs_B], 1)) dB_r = self.d_BAAB(tf.concat([fake_A, fake_BAAB], 1)) lb = CycleCondLoss(dB, dB_r) tgb = tape.gradient(lb, self.d_BAAB.trainable_variables) self.d_BAAB.optimizer.apply_gradients(zip(tgb, self.d_BAAB.trainable_variables)) d_cycle_dis += lb with tf.GradientTape() as tape: fake_B = self.g_AB(imgs_A) fake_A = self.g_BA(imgs_B) fake_ABBA = self.g_BA(fake_B) fake_BAAB = self.g_AB(fake_A) dB = self.d_BAAB(tf.concat([fake_A, imgs_B], 1)) dB_r = self.d_BAAB(tf.concat([fake_A, fake_BAAB], 1)) dA = self.d_ABBA(tf.concat([fake_B, imgs_A], 1)) dA_r = self.d_ABBA(tf.concat([fake_B, fake_ABBA], 1)) la = CycleCondLoss(dA, dA_r) lb = CycleCondLoss(dB, dB_r) tga = tape.gradient((la + lb) / 2.0, self.combined.trainable_variables) self.combined.optimizer.apply_gradients(zip(tga, self.combined.trainable_variables)) g_cycle_dis += (la + lb) / 2.0 # Calculate the max margin loss for A->B, B->A mm_b_loss = 0 mm_a_loss = 0 if self.one_way_mm: mm_a_loss = self.g_AB.train_on_batch(imgs_A, imgs_B) mm_b_loss = self.g_BA.train_on_batch(imgs_B, imgs_A) # Calculate the cycle A->B->A, B->A->B with max margin, and mae # Train cycle dis g_loss = self.combined.train_on_batch([imgs_A, imgs_B], [valid, valid, imgs_A, imgs_B, imgs_A, imgs_B, imgs_A, imgs_B, # valid,valid, # valid,valid ]) def named_logs(model, logs): result = {} for l in zip(model.metrics_names, logs): result[l[0]] = l[1] return result r = named_logs(self.combined, g_loss) r.update({ 'mma': mm_a_loss, 'mmb': mm_b_loss, }) elapsed_time = datetime.datetime.now() - start_time if batch_i % 50 == 0 and batch_i != 0: print( "\n[Epoch %d/%d] [Batch %d] [D loss: %f, acc: %3d%%] " "[G loss: %05f, adv: %05f, recon: %05f, recon_mm: %05f,id: %05f][mma:%05f,mmb:%05f]time: %s " \ % (epoch, training_epochs, batch_i, d_loss[0], 100 * d_loss[1], g_loss[0], np.mean(g_loss[1:3]), np.mean(g_loss[3:5]), np.mean(g_loss[5:7]), np.mean(g_loss[7:8]), mm_a_loss, mm_b_loss, elapsed_time)) scalars = { "epoch": epoch, # "batch": batch_i, "global_step": global_step, "discriminator_loss": d_loss[0], "discriminator_acc": d_loss[1], "combined_loss": g_loss[0]+g_cycle_dis+d_cycle_dis, "loss": g_loss[0] + d_loss[0], "cycle_da": g_loss[1], "cycle_db": g_loss[2], "cycle_dis": d_cycle_dis, "cycle_gen_condis":g_cycle_dis, "MM_ABBA_CYCLE": g_loss[5], "MM_BAAB_CYCLE": g_loss[6], "abba_mae": g_loss[3], "baab_mae": g_loss[4], "idloss_ab": g_loss[7], "idloss_ba": g_loss[8], "mm_ab_loss": mm_a_loss, "mm_ba_loss": mm_b_loss, } wandb.log(scalars, step=global_step) # wandbcb.on_batch_end(batch_i, r) # wandb.log({"batch_num":batch_i,"epoch_num":epoch}) # self.combined_callback.on_batch_end(batch_i, r) print("\n") sl, sv,c = self.test(dataset) if epoch % epochs_per_checkpoint == 0 and epoch != 0: self.save_model(name="checkpoint") res.append((sl, sv, c)) wandb.log({"simlex": sl, "simverb": sv, "card":c,"epoch": epoch}) # self.combined_callback.on_epoch_end(epoch, {"simlex": sl, "simverb": sv}) # wandbcb.on_epoch_end(epoch, {"simlex": sl, "simverb": sv}) print(res) print("\n") print("Actual training") train_(epochs) print("Final performance") sl, sv,c = self.test(dataset) res.append((sl, sv,c)) self.save_model(name="final") return res def test(self, dataset, simlex="testing/SimLex-999.txt", simverb="testing/SimVerb-3500.txt",card="testing/card660.tsv", fasttext="fasttext_model/cc.en.300.bin", prefix="en_"): sl = tools.test_sem(self.g_AB, dataset, dataset_location=simlex, fast_text_location=fasttext, prefix=prefix,pt=False)[0] sv = tools.test_sem(self.g_AB, dataset, dataset_location=simverb, fast_text_location=fasttext, prefix=prefix,pt=False)[0] c = tools.test_sem(self.g_AB, dataset, dataset_location=card, fast_text_location=fasttext, prefix=prefix,pt=False)[0] return sl, sv,c def save_model(self, name=""): self.d_A.save(os.path.join(self.save_folder, name + "fromretrodis.h5"), include_optimizer=False) self.d_B.save(os.path.join(self.save_folder, name + "toretrodis.h5"), include_optimizer=False) self.g_AB.save(os.path.join(self.save_folder, name + "toretrogen.h5"), include_optimizer=False) self.g_BA.save(os.path.join(self.save_folder, name + "fromretrogen.h5"), include_optimizer=False) self.combined.save(os.path.join(self.save_folder, name + "combined_model.h5"), include_optimizer=False)
class BaseKerasModel(BaseModel): model = None tensorboard = None train_names = ['train_loss', 'train_mse', 'train_mae'] val_names = ['val_loss', 'val_mse', 'val_mae'] counter = 0 inputs = None hidden_layer = None outputs = None def __init__(self, use_default_dense=True, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.001)): super().__init__() if use_default_dense: self.activation = activation self.kernel_regularizer = kernel_regularizer def create_input_layer(self, input_placeholder: BaseInputFormatter): """Creates keras model""" self.inputs = tf.keras.layers.InputLayer( input_shape=input_placeholder.get_input_state_dimension()) return self.inputs def create_hidden_layers(self, input_layer=None): if input_layer is None: input_layer = self.inputs hidden_layer = tf.keras.layers.Dropout(0.3)(input_layer) hidden_layer = tf.keras.layers.Dense( 128, kernel_regularizer=self.kernel_regularizer, activation=self.activation)(hidden_layer) hidden_layer = tf.keras.layers.Dropout(0.4)(hidden_layer) hidden_layer = tf.keras.layers.Dense( 64, kernel_regularizer=self.kernel_regularizer, activation=self.activation)(hidden_layer) hidden_layer = tf.keras.layers.Dropout(0.3)(hidden_layer) hidden_layer = tf.keras.layers.Dense( 32, kernel_regularizer=self.kernel_regularizer, activation=self.activation)(hidden_layer) hidden_layer = tf.keras.layers.Dropout(0.1)(hidden_layer) self.hidden_layer = hidden_layer return self.hidden_layer def create_output_layer(self, output_formatter: BaseOutputFormatter, hidden_layer=None): # sigmoid/tanh all you want on self.model if hidden_layer is None: hidden_layer = self.hidden_layer self.outputs = tf.keras.layers.Dense( output_formatter.get_model_output_dimension()[0], activation='tanh')(hidden_layer) self.model = Model(inputs=self.inputs, outputs=self.outputs) return self.outputs def write_log(self, callback, names, logs, batch_no, eval=False): for name, value in zip(names, logs): summary = tf.Summary() summary_value = summary.value.add() summary_value.simple_value = value tag_name = name if eval: tag_name = 'eval_' + tag_name summary_value.tag = tag_name callback.writer.add_summary(summary, batch_no) callback.writer.flush() def finalize_model(self, logname=str(int(random() * 1000))): loss, loss_weights = self.create_loss() self.model.compile(tf.keras.optimizers.Nadam(lr=0.001), loss=loss, loss_weights=loss_weights, metrics=[ tf.keras.metrics.mean_absolute_error, tf.keras.metrics.binary_accuracy ]) log_name = './logs/' + logname self.logger.info("log_name: " + log_name) self.tensorboard = tf.keras.callbacks.TensorBoard( log_dir=log_name, histogram_freq=1, write_images=False, batch_size=1000, ) self.tensorboard.set_model(self.model) self.logger.info("Model has been finalized") def fit(self, x, y, batch_size=1): if self.counter % 200 == 0: logs = self.model.evaluate(x, y, batch_size=batch_size, verbose=1) self.write_log(self.tensorboard, self.model.metrics_names, logs, self.counter, eval=True) print('step:', self.counter) else: logs = self.model.train_on_batch(x, y) self.write_log(self.tensorboard, self.model.metrics_names, logs, self.counter) self.counter += 1 def predict(self, arr): return self.model.predict(arr) def save(self, file_path): self.model.save_weights(filepath=file_path, overwrite=True) def load(self, file_path): path = os.path.abspath(file_path) self.model.load_weights(filepath=os.path.abspath(file_path)) def create_loss(self): return 'mean_absolute_error', None
reconstruction_loss = mse(inputs, outputs) else: reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='adam') vae.summary() plot_model(vae, to_file='vae_mlp.png', show_shapes=True) if args.weights: vae.load_weights(args.weights) else: # train the autoencoder vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None)) vae.save_weights('vae_mlp_mnist.h5') plot_results(models, data, batch_size=batch_size, model_name="vae_mlp") # # plot the latent space of the encoded digits ######################################### # x_test_encoded = encoder.predict(x_test, batch_size=batch_size) # plt.figure(figsize=(6, 6)) # plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test) # plt.colorbar()