def fit_one_epoch(model_rpn, model_all, epoch, epoch_size, epoch_size_val, gen, genval, Epoch, callback): total_loss = 0 rpn_loc_loss = 0 rpn_cls_loss = 0 roi_loc_loss = 0 roi_cls_loss = 0 val_toal_loss = 0 with tqdm(total=epoch_size, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(gen): if iteration >= epoch_size: break X, Y, boxes = batch[0], batch[1], batch[2] P_rpn = model_rpn.predict_on_batch(X) height, width, _ = np.shape(X[0]) base_feature_width, base_feature_height = get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) results = bbox_util.detection_out_rpn(P_rpn, anchors) roi_inputs = [] out_classes = [] out_regrs = [] for i in range(len(X)): R = results[i][:, 1:] X2, Y1, Y2 = calc_iou(R, config, boxes[i], NUM_CLASSES) roi_inputs.append(X2) out_classes.append(Y1) out_regrs.append(Y2) loss_class = model_all.train_on_batch( [X, np.array(roi_inputs)], [Y[0], Y[1], np.array(out_classes), np.array(out_regrs)]) write_log(callback, [ 'total_loss', 'rpn_cls_loss', 'rpn_reg_loss', 'detection_cls_loss', 'detection_reg_loss' ], loss_class, iteration) rpn_cls_loss += loss_class[1] rpn_loc_loss += loss_class[2] roi_cls_loss += loss_class[3] roi_loc_loss += loss_class[4] total_loss = rpn_loc_loss + rpn_cls_loss + roi_loc_loss + roi_cls_loss pbar.set_postfix( **{ 'total': total_loss / (iteration + 1), 'rpn_cls': rpn_cls_loss / (iteration + 1), 'rpn_loc': rpn_loc_loss / (iteration + 1), 'roi_cls': roi_cls_loss / (iteration + 1), 'roi_loc': roi_loc_loss / (iteration + 1), 'lr': K.get_value(model_rpn.optimizer.lr) }) pbar.update(1) print('Start Validation') with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(genval): if iteration >= epoch_size_val: break X, Y, boxes = batch[0], batch[1], batch[2] P_rpn = model_rpn.predict_on_batch(X) height, width, _ = np.shape(X[0]) base_feature_width, base_feature_height = get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) results = bbox_util.detection_out_rpn(P_rpn, anchors) roi_inputs = [] out_classes = [] out_regrs = [] for i in range(len(X)): R = results[i][:, 1:] X2, Y1, Y2 = calc_iou(R, config, boxes[i], NUM_CLASSES) roi_inputs.append(X2) out_classes.append(Y1) out_regrs.append(Y2) loss_class = model_all.test_on_batch( [X, np.array(roi_inputs)], [Y[0], Y[1], np.array(out_classes), np.array(out_regrs)]) val_toal_loss += loss_class[0] pbar.set_postfix(**{'total': val_toal_loss / (iteration + 1)}) pbar.update(1) print('Finish Validation') print('Epoch:' + str(epoch + 1) + '/' + str(Epoch)) print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1))) print('Saving state, iter:', str(epoch + 1)) model_all.save_weights('logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.h5' % ((epoch + 1), total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1))) return
def on_epoch_end(self, epoch, logs=None): logs = logs or {} logs['lr'] = K.get_value(self.model.optimizer.lr)
def _test(): import numpy as np import tensorflow.keras.backend as K data_format = "channels_last" # data_format = "channels_first" pretrained = False models = [ (pyramidnet110_a48_cifar10, 10), (pyramidnet110_a48_cifar100, 100), (pyramidnet110_a48_svhn, 10), (pyramidnet110_a84_cifar10, 10), (pyramidnet110_a84_cifar100, 100), (pyramidnet110_a84_svhn, 10), (pyramidnet110_a270_cifar10, 10), (pyramidnet110_a270_cifar100, 100), (pyramidnet110_a270_svhn, 10), (pyramidnet164_a270_bn_cifar10, 10), (pyramidnet164_a270_bn_cifar100, 100), (pyramidnet164_a270_bn_svhn, 10), (pyramidnet200_a240_bn_cifar10, 10), (pyramidnet200_a240_bn_cifar100, 100), (pyramidnet200_a240_bn_svhn, 10), (pyramidnet236_a220_bn_cifar10, 10), (pyramidnet236_a220_bn_cifar100, 100), (pyramidnet236_a220_bn_svhn, 10), (pyramidnet272_a200_bn_cifar10, 10), (pyramidnet272_a200_bn_cifar100, 100), (pyramidnet272_a200_bn_svhn, 10), ] for model, classes in models: net = model(pretrained=pretrained, data_format=data_format) batch_saze = 14 x = tf.random.normal((batch_saze, 3, 32, 32) if is_channels_first(data_format) else (batch_saze, 32, 32, 3)) y = net(x) assert (tuple(y.shape.as_list()) == (batch_saze, classes)) weight_count = sum([np.prod(K.get_value(w).shape) for w in net.trainable_weights]) print("m={}, {}".format(model.__name__, weight_count)) assert (model != pyramidnet110_a48_cifar10 or weight_count == 1772706) assert (model != pyramidnet110_a48_cifar100 or weight_count == 1778556) assert (model != pyramidnet110_a48_svhn or weight_count == 1772706) assert (model != pyramidnet110_a84_cifar10 or weight_count == 3904446) assert (model != pyramidnet110_a84_cifar100 or weight_count == 3913536) assert (model != pyramidnet110_a84_svhn or weight_count == 3904446) assert (model != pyramidnet110_a270_cifar10 or weight_count == 28485477) assert (model != pyramidnet110_a270_cifar100 or weight_count == 28511307) assert (model != pyramidnet110_a270_svhn or weight_count == 28485477) assert (model != pyramidnet164_a270_bn_cifar10 or weight_count == 27216021) assert (model != pyramidnet164_a270_bn_cifar100 or weight_count == 27319071) assert (model != pyramidnet164_a270_bn_svhn or weight_count == 27216021) assert (model != pyramidnet200_a240_bn_cifar10 or weight_count == 26752702) assert (model != pyramidnet200_a240_bn_cifar100 or weight_count == 26844952) assert (model != pyramidnet200_a240_bn_svhn or weight_count == 26752702) assert (model != pyramidnet236_a220_bn_cifar10 or weight_count == 26969046) assert (model != pyramidnet236_a220_bn_cifar100 or weight_count == 27054096) assert (model != pyramidnet236_a220_bn_svhn or weight_count == 26969046) assert (model != pyramidnet272_a200_bn_cifar10 or weight_count == 26210842) assert (model != pyramidnet272_a200_bn_cifar100 or weight_count == 26288692) assert (model != pyramidnet272_a200_bn_svhn or weight_count == 26210842)
def _test(): import numpy as np import tensorflow.keras.backend as K import tensorflow as tf data_format = "channels_last" # data_format = "channels_first" pretrained = False from_audio = True audio_features = 64 models = [ quartznet5x5_en_ls, quartznet15x5_en, quartznet15x5_en_nr, quartznet15x5_fr, quartznet15x5_de, quartznet15x5_it, quartznet15x5_es, quartznet15x5_ca, quartznet15x5_pl, quartznet15x5_ru, quartznet15x5_ru34, ] for model in models: net = model(in_channels=audio_features, from_audio=from_audio, pretrained=pretrained, data_format=data_format) batch = 3 aud_scale = 640 if from_audio else 1 seq_len = np.random.randint(150, 250, batch) * aud_scale seq_len_max = seq_len.max() + 2 x_shape = (batch, seq_len_max) if from_audio else ( (batch, audio_features, seq_len_max) if is_channels_first(data_format) else (batch, seq_len_max, audio_features)) x = tf.random.normal(shape=x_shape) x_len = tf.convert_to_tensor(seq_len.astype(np.long)) y, y_len = net(x, x_len) assert (y.shape.as_list()[0] == batch) classes_id = 1 if is_channels_first(data_format) else 2 seq_id = 2 if is_channels_first(data_format) else 1 assert (y.shape.as_list()[classes_id] == net.classes) if from_audio: assert (y.shape.as_list()[seq_id] in range(seq_len_max // aud_scale * 2, seq_len_max // aud_scale * 2 + 9)) else: assert (y.shape.as_list()[seq_id] in [seq_len_max // 2, seq_len_max // 2 + 1]) weight_count = sum( [np.prod(K.get_value(w).shape) for w in net.trainable_weights]) print("m={}, {}".format(model.__name__, weight_count)) assert (model != quartznet5x5_en_ls or weight_count == 6713181) assert (model != quartznet15x5_en or weight_count == 18924381) assert (model != quartznet15x5_en_nr or weight_count == 18924381) assert (model != quartznet15x5_fr or weight_count == 18938731) assert (model != quartznet15x5_de or weight_count == 18927456) assert (model != quartznet15x5_it or weight_count == 18934631) assert (model != quartznet15x5_es or weight_count == 18931556) assert (model != quartznet15x5_ca or weight_count == 18934631) assert (model != quartznet15x5_pl or weight_count == 18929506) assert (model != quartznet15x5_ru or weight_count == 18930531) assert (model != quartznet15x5_ru34 or weight_count == 18929506)
def on_epoch_end(self, epochs, logs=None): self.global_epoch = self.global_epoch + 1 lr = K.get_value(self.model.optimizer.lr) self.learning_rates.append(lr)
def loss_real_x(self): return K.get_value(self.loss_real_x_var)
dense_activation=None) negative_log_likelihood = lambda x, rv_x: -rv_x.log_prob(x) # Full probabilistic model if model_choice == 'full_prob': net = model_ipu.create_model_full_prob(input_shape, latent_dim, hidden_dim, filters, kernels, final_dim, conv_activation=None, dense_activation=None) kl = sum(net.losses) alpha = K.variable(0.5) negative_log_likelihood = lambda x, rv_x: -rv_x.log_prob(x) + kl * ( K.get_value(alpha) - 1) net.summary() #### Compile network #net.load_weights('test') # Custom metrics def kl_metric(y_true, y_pred): return K.sum(net.losses) net.compile(optimizer=tf.optimizers.Adam(learning_rate=1e-4), loss=negative_log_likelihood, metrics=['acc', kl_metric ]) #negative_log_likelihood)#"mean_squared_error" ds_train, steps_per_epoch = create_dataset(batch_size)
def _test(): import numpy as np import tensorflow.keras.backend as K data_format = "channels_last" # data_format = "channels_first" in_size = (480, 480) aux = False pretrained = False models = [ (deeplabv3_resnetd50b_voc, 21), (deeplabv3_resnetd101b_voc, 21), (deeplabv3_resnetd152b_voc, 21), (deeplabv3_resnetd50b_coco, 21), (deeplabv3_resnetd101b_coco, 21), (deeplabv3_resnetd152b_coco, 21), (deeplabv3_resnetd50b_ade20k, 150), (deeplabv3_resnetd101b_ade20k, 150), (deeplabv3_resnetd50b_cityscapes, 19), (deeplabv3_resnetd101b_cityscapes, 19), ] for model, classes in models: net = model(pretrained=pretrained, in_size=in_size, aux=aux, data_format=data_format) batch_saze = 14 x = tf.random.normal((batch_saze, 3, in_size[0], in_size[1]) if is_channels_first(data_format) else (batch_saze, in_size[0], in_size[1], 3)) ys = net(x) y = ys[0] if aux else ys assert (y.shape[0] == x.shape[0]) if is_channels_first(data_format): assert ((y.shape[1] == classes) and (y.shape[2] == x.shape[2]) and (y.shape[3] == x.shape[3])) else: assert ((y.shape[3] == classes) and (y.shape[1] == x.shape[1]) and (y.shape[2] == x.shape[2])) weight_count = sum([np.prod(K.get_value(w).shape) for w in net.trainable_weights]) print("m={}, {}".format(model.__name__, weight_count)) if aux: assert (model != deeplabv3_resnetd50b_voc or weight_count == 42127850) assert (model != deeplabv3_resnetd101b_voc or weight_count == 61119978) assert (model != deeplabv3_resnetd152b_voc or weight_count == 76763626) assert (model != deeplabv3_resnetd50b_coco or weight_count == 42127850) assert (model != deeplabv3_resnetd101b_coco or weight_count == 61119978) assert (model != deeplabv3_resnetd152b_coco or weight_count == 76763626) assert (model != deeplabv3_resnetd50b_ade20k or weight_count == 42194156) assert (model != deeplabv3_resnetd101b_ade20k or weight_count == 61186284) assert (model != deeplabv3_resnetd50b_cityscapes or weight_count == 42126822) assert (model != deeplabv3_resnetd101b_cityscapes or weight_count == 61118950) else: assert (model != deeplabv3_resnetd50b_voc or weight_count == 39762645) assert (model != deeplabv3_resnetd101b_voc or weight_count == 58754773) assert (model != deeplabv3_resnetd152b_voc or weight_count == 74398421) assert (model != deeplabv3_resnetd50b_coco or weight_count == 39762645) assert (model != deeplabv3_resnetd101b_coco or weight_count == 58754773) assert (model != deeplabv3_resnetd152b_coco or weight_count == 74398421) assert (model != deeplabv3_resnetd50b_ade20k or weight_count == 39795798) assert (model != deeplabv3_resnetd101b_ade20k or weight_count == 58787926) assert (model != deeplabv3_resnetd50b_cityscapes or weight_count == 39762131) assert (model != deeplabv3_resnetd101b_cityscapes or weight_count == 58754259)
def run(params): args = Struct(**params) set_seed(args.rng_seed) ext = extension_from_parameters(args) verify_path(args.save) prefix = args.save + ext logfile = args.logfile if args.logfile else prefix + '.log' set_up_logger(logfile, args.verbose) logger.info('Params: {}'.format(params)) loader = ComboDataLoader(seed=args.rng_seed, val_split=args.validation_split, cell_features=args.cell_features, drug_features=args.drug_features, response_url=args.response_url, use_landmark_genes=args.use_landmark_genes, preprocess_rnaseq=args.preprocess_rnaseq, exclude_cells=args.exclude_cells, exclude_drugs=args.exclude_drugs, use_combo_score=args.use_combo_score, cv_partition=args.cv_partition, cv=args.cv) # test_loader(loader) # test_generator(loader) train_gen = ComboDataGenerator(loader, batch_size=args.batch_size).flow() val_gen = ComboDataGenerator(loader, partition='val', batch_size=args.batch_size).flow() train_steps = int(loader.n_train / args.batch_size) val_steps = int(loader.n_val / args.batch_size) model = build_model(loader, args, verbose=True) print('Creating model PNG') from keras.utils import plot_model plot_model(model, 'model_global_combo.png', show_shapes=True) print('Model PNG has been created successfuly!') model.summary() # plot_model(model, to_file=prefix+'.model.png', show_shapes=True) if args.cp: model_json = model.to_json() with open(prefix + '.model.json', 'w') as f: print(model_json, file=f) def warmup_scheduler(epoch): lr = args.learning_rate or base_lr * args.batch_size / 100 if epoch <= 5: K.set_value(model.optimizer.lr, (base_lr * (5 - epoch) + lr * epoch) / 5) logger.debug('Epoch {}: lr={}'.format(epoch, K.get_value(model.optimizer.lr))) return K.get_value(model.optimizer.lr) df_pred_list = [] cv_ext = '' cv = args.cv if args.cv > 1 else 1 fold = 0 while fold < cv: if args.cv > 1: logger.info('Cross validation fold {}/{}:'.format(fold + 1, cv)) cv_ext = '.cv{}'.format(fold + 1) model = build_model(loader, args) optimizer = optimizers.deserialize({ 'class_name': args.optimizer, 'config': {} }) base_lr = args.base_lr or K.get_value(optimizer.lr) if args.learning_rate: K.set_value(optimizer.lr, args.learning_rate) model.compile(loss=args.loss, optimizer=optimizer, metrics=[mae, r2]) # calculate trainable and non-trainable params params.update(candle.compute_trainable_params(model)) candle_monitor = candle.CandleRemoteMonitor(params=params) timeout_monitor = candle.TerminateOnTimeOut(params['timeout']) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001) warmup_lr = LearningRateScheduler(warmup_scheduler) checkpointer = ModelCheckpoint(prefix + cv_ext + '.weights.h5', save_best_only=True, save_weights_only=True) tensorboard = TensorBoard(log_dir="tb/tb{}{}".format(ext, cv_ext)) history_logger = LoggingCallback(logger.debug) model_recorder = ModelRecorder() # callbacks = [history_logger, model_recorder] callbacks = [ candle_monitor, timeout_monitor, history_logger, model_recorder ] if args.reduce_lr: callbacks.append(reduce_lr) if args.warmup_lr: callbacks.append(warmup_lr) if args.cp: callbacks.append(checkpointer) if args.tb: callbacks.append(tensorboard) if args.gen: history = model.fit_generator(train_gen, train_steps, epochs=args.epochs, callbacks=callbacks, validation_data=val_gen, validation_steps=val_steps) fold += 1 else: if args.cv > 1: x_train_list, y_train, x_val_list, y_val, df_train, df_val = loader.load_data_cv( fold) else: x_train_list, y_train, x_val_list, y_val, df_train, df_val = loader.load_data( ) y_shuf = np.random.permutation(y_val) log_evaluation(evaluate_prediction(y_val, y_shuf), description='Between random pairs in y_val:') history = model.fit(x_train_list, y_train, batch_size=args.batch_size, shuffle=args.shuffle, epochs=args.epochs, callbacks=callbacks, validation_data=(x_val_list, y_val)) if args.cp: model.load_weights(prefix + cv_ext + '.weights.h5') if not args.gen: y_val_pred = model.predict(x_val_list, batch_size=args.batch_size).flatten() scores = evaluate_prediction(y_val, y_val_pred) if args.cv > 1 and scores[args.loss] > args.max_val_loss: logger.warn( 'Best val_loss {} is greater than {}; retrain the model...' .format(scores[args.loss], args.max_val_loss)) continue else: fold += 1 log_evaluation(scores) df_val.is_copy = False df_val['GROWTH_PRED'] = y_val_pred df_val['GROWTH_ERROR'] = y_val_pred - y_val df_pred_list.append(df_val) if args.cp: # model.save(prefix+'.model.h5') model_recorder.best_model.save(prefix + '.model.h5') # test reloadded model prediction # new_model = keras.models.load_model(prefix+'.model.h5') # new_model.load_weights(prefix+cv_ext+'.weights.h5') # new_pred = new_model.predict(x_val_list, batch_size=args.batch_size).flatten() # print('y_val:', y_val[:10]) # print('old_pred:', y_val_pred[:10]) # print('new_pred:', new_pred[:10]) plot_history(prefix, history, 'loss') plot_history(prefix, history, 'r2') if K.backend() == 'tensorflow': K.clear_session() if not args.gen: pred_fname = prefix + '.predicted.growth.tsv' if args.use_combo_score: pred_fname = prefix + '.predicted.score.tsv' df_pred = pd.concat(df_pred_list) df_pred.to_csv(pred_fname, sep='\t', index=False, float_format='%.4g') logger.handlers = [] return history
def get_config(self): return { 'l1': float(K.get_value(self.l1)), 'l2': float(K.get_value(self.l2)) }
def _test(): import numpy as np import tensorflow.keras.backend as K data_format = "channels_last" # data_format = "channels_first" in_size = (480, 480) aux = False pretrained = False models = [ (fcn8sd_resnetd50b_voc, 21), (fcn8sd_resnetd101b_voc, 21), (fcn8sd_resnetd50b_coco, 21), (fcn8sd_resnetd101b_coco, 21), (fcn8sd_resnetd50b_ade20k, 150), (fcn8sd_resnetd101b_ade20k, 150), (fcn8sd_resnetd50b_cityscapes, 19), (fcn8sd_resnetd101b_cityscapes, 19), ] for model, classes in models: net = model(pretrained=pretrained, in_size=in_size, aux=aux, data_format=data_format) batch_saze = 14 x = tf.random.normal(( batch_saze, 3, in_size[0], in_size[1]) if is_channels_first(data_format) else (batch_saze, in_size[0], in_size[1], 3)) ys = net(x) y = ys[0] if aux else ys assert (y.shape[0] == x.shape[0]) if is_channels_first(data_format): assert ((y.shape[1] == classes) and (y.shape[2] == x.shape[2]) and (y.shape[3] == x.shape[3])) else: assert ((y.shape[3] == classes) and (y.shape[1] == x.shape[1]) and (y.shape[2] == x.shape[2])) weight_count = sum( [np.prod(K.get_value(w).shape) for w in net.trainable_weights]) print("m={}, {}".format(model.__name__, weight_count)) if aux: assert (model != fcn8sd_resnetd50b_voc or weight_count == 35445994) assert (model != fcn8sd_resnetd101b_voc or weight_count == 54438122) assert (model != fcn8sd_resnetd50b_coco or weight_count == 35445994) assert (model != fcn8sd_resnetd101b_coco or weight_count == 54438122) assert (model != fcn8sd_resnetd50b_ade20k or weight_count == 35545324) assert (model != fcn8sd_resnetd101b_ade20k or weight_count == 54537452) assert (model != fcn8sd_resnetd50b_cityscapes or weight_count == 35444454) assert (model != fcn8sd_resnetd101b_cityscapes or weight_count == 54436582) else: assert (model != fcn8sd_resnetd50b_voc or weight_count == 33080789) assert (model != fcn8sd_resnetd101b_voc or weight_count == 52072917) assert (model != fcn8sd_resnetd50b_coco or weight_count == 33080789) assert (model != fcn8sd_resnetd101b_coco or weight_count == 52072917) assert (model != fcn8sd_resnetd50b_ade20k or weight_count == 33146966) assert (model != fcn8sd_resnetd101b_ade20k or weight_count == 52139094) assert (model != fcn8sd_resnetd50b_cityscapes or weight_count == 33079763) assert (model != fcn8sd_resnetd101b_cityscapes or weight_count == 52071891)
def perturb(self, factors=None): if not factors: factors = [0.8, 1.2] K.set_value(self.l1, K.get_value(self.l1) * np.random.choice(factors)) K.set_value(self.l2, K.get_value(self.l2) * np.random.choice(factors))
def get_config(self): return {self.name: float(K.get_value(self.variable))}
def perturb(self, factors=None): if not factors: factors = [0.8, 1.2] K.set_value(self.variable, K.get_value(self.variable) * np.random.choice(factors))
def k(self): return K.get_value(self.k_var)
def run_model(config): t1 = time.time() num_epochs = config['hyperparameters']['num_epochs'] config['create_structure']['func'] = util.load_attr_from( config['create_structure']['func']) input_shape = [(942, ), (3820, ), (3820, )] output_shape = (1, ) cs_kwargs = config['create_structure'].get('kwargs') if cs_kwargs is None: structure = config['create_structure']['func'](input_shape, output_shape) else: structure = config['create_structure']['func'](input_shape, output_shape, **cs_kwargs) arch_seq = config['arch_seq'] print(f'actions list: {arch_seq}') structure.set_ops(arch_seq) # structure.draw_graphviz('model_global_combo.dot') model = structure.create_model() # from keras.utils import plot_model # plot_model(model, 'model_global_combo.png', show_shapes=True) model.summary() t2 = time.time() t_model_create = t2 - t1 print('Time model creation: ', t_model_create) import sys t1 = time.time() params = initialize_parameters() args = Struct(**params) set_seed(args.rng_seed) optimizer = optimizers.deserialize({ 'class_name': args.optimizer, 'config': {} }) base_lr = args.base_lr or K.get_value(optimizer.lr) if args.learning_rate: K.set_value(optimizer.lr, args.learning_rate) model.compile(loss=args.loss, optimizer=optimizer, metrics=[mae, r2]) if config.get('load_data') is None: data = combo_ld_numpy(args) else: if not (config['load_data'].get('prop') is None): print('Data prop: ', config['load_data']['prop']) data = combo_ld_numpy(args, prop=config['load_data']['prop']) else: data = combo_ld_numpy(args) x_train_list = [data['x_train_0'], data['x_train_1'], data['x_train_2']] y_train = data['y_train'] x_val_list = [data['x_val_0'], data['x_val_1'], data['x_val_2']] y_val = data['y_val'] print('y_val shape: ', np.shape(y_val)) t2 = time.time() t_data_loading = t2 - t1 print('Time data loading: ', t_data_loading) stop_if_unfeasible = StopIfUnfeasible(time_limit=1200) t1 = time.time() history = model.fit(x_train_list, y_train, batch_size=args.batch_size, shuffle=args.shuffle, epochs=num_epochs, callbacks=[stop_if_unfeasible], validation_data=(x_val_list, y_val)) t2 = time.time() t_training = t2 - t1 print('Time training: ', t_training) print('avr_batch_timing :', stop_if_unfeasible.avr_batch_time) print('avr_timing: ', stop_if_unfeasible.estimate_training_time) print('stopped: ', stop_if_unfeasible.stopped) print(history.history) try: return history.history['val_r2'][0] except: return -1.0
def m_global(self): return K.get_value(self.m_global_var)
def updateLearningRate(model, lr=1e-4): print('Orignial lr:', K.get_value(model.optimizer.lr)) # To set learning rate K.set_value(model.optimizer.lr, lr)
def loss_gen_x(self): return K.get_value(self.loss_gen_x_var)
def _test(): import numpy as np import tensorflow.keras.backend as K data_format = "channels_last" # data_format = "channels_first" in_size = (480, 480) aux = False pretrained = False models = [ (pspnet_resnetd50b_voc, 21), (pspnet_resnetd101b_voc, 21), (pspnet_resnetd50b_coco, 21), (pspnet_resnetd101b_coco, 21), (pspnet_resnetd50b_ade20k, 150), (pspnet_resnetd101b_ade20k, 150), (pspnet_resnetd50b_cityscapes, 19), (pspnet_resnetd101b_cityscapes, 19), ] for model, classes in models: net = model(pretrained=pretrained, in_size=in_size, aux=aux, data_format=data_format) batch_saze = 14 x = tf.random.normal(( batch_saze, 3, in_size[0], in_size[1]) if is_channels_first(data_format) else (batch_saze, in_size[0], in_size[1], 3)) ys = net(x) y = ys[0] if aux else ys assert (y.shape[0] == x.shape[0]) if is_channels_first(data_format): assert ((y.shape[1] == classes) and (y.shape[2] == x.shape[2]) and (y.shape[3] == x.shape[3])) else: assert ((y.shape[3] == classes) and (y.shape[1] == x.shape[1]) and (y.shape[2] == x.shape[2])) weight_count = sum( [np.prod(K.get_value(w).shape) for w in net.trainable_weights]) print("m={}, {}".format(model.__name__, weight_count)) if aux: assert (model != pspnet_resnetd50b_voc or weight_count == 49081578) assert (model != pspnet_resnetd101b_voc or weight_count == 68073706) assert (model != pspnet_resnetd50b_coco or weight_count == 49081578) assert (model != pspnet_resnetd101b_coco or weight_count == 68073706) assert (model != pspnet_resnetd50b_ade20k or weight_count == 49180908) assert (model != pspnet_resnetd101b_ade20k or weight_count == 68173036) assert (model != pspnet_resnetd50b_cityscapes or weight_count == 49080038) assert (model != pspnet_resnetd101b_cityscapes or weight_count == 68072166) else: assert (model != pspnet_resnetd50b_voc or weight_count == 46716373) assert (model != pspnet_resnetd101b_voc or weight_count == 65708501) assert (model != pspnet_resnetd50b_coco or weight_count == 46716373) assert (model != pspnet_resnetd101b_coco or weight_count == 65708501) assert (model != pspnet_resnetd50b_ade20k or weight_count == 46782550) assert (model != pspnet_resnetd101b_ade20k or weight_count == 65774678) assert (model != pspnet_resnetd50b_cityscapes or weight_count == 46715347) assert (model != pspnet_resnetd101b_cityscapes or weight_count == 65707475)
def train_fn(model_bytes): # Make sure pyarrow is referenced before anything else to avoid segfault due to conflict # with TensorFlow libraries. Use `pa` package reference to ensure it's loaded before # functions like `deserialize_model` which are implemented at the top level. # See https://jira.apache.org/jira/browse/ARROW-3346 pa import atexit import horovod.tensorflow.keras as hvd from horovod.spark.task import get_available_devices import os from petastorm import make_batch_reader from petastorm.tf_utils import make_petastorm_dataset import tempfile import tensorflow as tf import tensorflow.keras.backend as K import shutil # Horovod: initialize Horovod inside the trainer. hvd.init() # Horovod: pin GPU to be used to process local rank (one GPU per process), if GPUs are available. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = get_available_devices()[0] K.set_session(tf.Session(config=config)) # Horovod: restore from checkpoint, use hvd.load_model under the hood. model = deserialize_model(model_bytes, hvd.load_model) # Horovod: adjust learning rate based on number of processes. K.set_value(model.optimizer.lr, K.get_value(model.optimizer.lr) * hvd.size()) # Horovod: print summary logs on the first worker. verbose = 2 if hvd.rank() == 0 else 0 callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(root_rank=0), # Horovod: average metrics among workers at the end of every epoch. # # Note: This callback must be in the list before the ReduceLROnPlateau, # TensorBoard, or other metrics-based callbacks. hvd.callbacks.MetricAverageCallback(), # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during # the first five epochs. See https://arxiv.org/abs/1706.02677 for details. hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=verbose), # Reduce LR if the metric is not improved for 10 epochs, and stop training # if it has not improved for 20 epochs. tf.keras.callbacks.ReduceLROnPlateau(monitor='val_exp_rmspe', patience=10, verbose=verbose), tf.keras.callbacks.EarlyStopping(monitor='val_exp_rmspe', mode='min', patience=20, verbose=verbose), tf.keras.callbacks.TerminateOnNaN() ] # Model checkpoint location. ckpt_dir = tempfile.mkdtemp() ckpt_file = os.path.join(ckpt_dir, 'checkpoint.h5') atexit.register(lambda: shutil.rmtree(ckpt_dir)) # Horovod: save checkpoints only on the first worker to prevent other workers from corrupting them. if hvd.rank() == 0: callbacks.append(tf.keras.callbacks.ModelCheckpoint(ckpt_file, monitor='val_exp_rmspe', mode='min', save_best_only=True)) # Make Petastorm readers. with make_batch_reader('%s/train_df.parquet' % args.data_dir, num_epochs=None, cur_shard=hvd.rank(), shard_count=hvd.size(), hdfs_driver=PETASTORM_HDFS_DRIVER) as train_reader: with make_batch_reader('%s/val_df.parquet' % args.data_dir, num_epochs=None, cur_shard=hvd.rank(), shard_count=hvd.size(), hdfs_driver=PETASTORM_HDFS_DRIVER) as val_reader: # Convert readers to tf.data.Dataset. train_ds = make_petastorm_dataset(train_reader) \ .apply(tf.data.experimental.unbatch()) \ .shuffle(int(train_rows / hvd.size())) \ .batch(args.batch_size) \ .map(lambda x: (tuple(getattr(x, col) for col in all_cols), tf.log(x.Sales))) val_ds = make_petastorm_dataset(val_reader) \ .apply(tf.data.experimental.unbatch()) \ .batch(args.batch_size) \ .map(lambda x: (tuple(getattr(x, col) for col in all_cols), tf.log(x.Sales))) history = model.fit(train_ds, validation_data=val_ds, steps_per_epoch=int(train_rows / args.batch_size / hvd.size()), validation_steps=int(val_rows / args.batch_size / hvd.size()), callbacks=callbacks, verbose=verbose, epochs=args.epochs) # Dataset API usage currently displays a wall of errors upon termination. # This global model registration ensures clean termination. # Tracked in https://github.com/tensorflow/tensorflow/issues/24570 globals()['_DATASET_FINALIZATION_HACK'] = model if hvd.rank() == 0: with open(ckpt_file, 'rb') as f: return history.history, f.read()
def train(self): self.optimizer = tf.keras.optimizers.Adam(beta_1=0.9, beta_2=0.98, epsilon=1e-9) if self.use_label_smoothing: self.loss_function = tf.keras.losses.CategoricalCrossentropy( from_logits=True) else: self.loss_function = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True) self.loss_metric = tf.keras.metrics.Mean(name="train_loss") self.acc_metric = tf.keras.metrics.SparseCategoricalAccuracy( name="train_acc") ckpt = tf.train.Checkpoint(model=self.transformer, opt=self.optimizer) if self.ckpt_path is not None: fname, self.initial_epoch = load_checkpoint( Path(self.ckpt_path).resolve(), self.ckpt_epoch) print("\nCheckpoint File : {}\n".format(fname)) ckpt.mapped = {"model": self.transformer, "opt": self.optimizer} ckpt.restore(fname) progbar = tf.keras.utils.Progbar(target=self.num_train) self.count = 0 for epoch in range(self.initial_epoch, self.initial_epoch + self.epochs): K.set_value(self.optimizer.lr, self._get_lr(epoch)) progbar.update(0) self.loss_metric.reset_states() self.acc_metric.reset_states() start_time = korea_time(None) for train_src, train_tar in self.train_dataset: num_data = K.int_shape(train_src)[0] logits = self.forward(train_src, train_tar) progbar.add(num_data) end_time = korea_time(None) epoch_loss = self.loss_metric.result() epoch_acc = self.acc_metric.result() ckpt_prefix = self.ckpt_folder / "Epoch-{}_Loss-{:.5f}_Acc-{:5f}".format( epoch, epoch_loss, epoch_acc) ckpt.save(file_prefix=ckpt_prefix) print( "Epoch = [{:5d}] Loss = [{:8.6f}] Acc = [{:8.6f}] LR = [{:.10f}]\n" .format(epoch, epoch_loss, epoch_acc, K.get_value(self.optimizer.lr))) # model result 저장 msg = "Epoch = [{:5d}] - End Time [ {} ]\n".format( epoch, end_time.strftime("%Y/%m/%d %H:%M:%S")) msg += "Elapsed Time = {}\n".format(end_time - start_time) msg += "Learning Rate = [{:.10f}]\n".format( K.get_value(self.optimizer.lr)) msg += "Loss : [{:8.6f}] - Acc : [{:8.6f}]\n".format( epoch_loss, epoch_acc) msg += " - " * 15 + "\n\n" with self.training_result_file.open("a+", encoding="utf-8") as fp: fp.write(msg) if self.test_result_file is not None: self.translate(epoch)
def on_batch_end(self, batch, logs=None): self.global_step = self.global_step + 1 self.global_step_for_interval = self.global_step_for_interval + 1 lr = K.get_value(self.model.optimizer.lr) self.learning_rates.append(lr)
def _test(): import numpy as np import tensorflow.keras.backend as K data_format = "channels_last" # data_format = "channels_first" pretrained = False models = [ regnetx002, regnetx004, regnetx006, regnetx008, regnetx016, regnetx032, regnetx040, regnetx064, regnetx080, regnetx120, regnetx160, regnetx320, regnety002, regnety004, regnety006, regnety008, regnety016, regnety032, regnety040, regnety064, regnety080, regnety120, regnety160, regnety320, ] for model in models: net = model(pretrained=pretrained, data_format=data_format) batch = 14 size = 224 x = tf.random.normal((batch, 3, size, size) if is_channels_first(data_format) else ( batch, size, size, 3)) y = net(x) assert (tuple(y.shape.as_list()) == (batch, 1000)) weight_count = sum( [np.prod(K.get_value(w).shape) for w in net.trainable_weights]) print("m={}, {}".format(model.__name__, weight_count)) assert (model != regnetx002 or weight_count == 2684792) assert (model != regnetx004 or weight_count == 5157512) assert (model != regnetx006 or weight_count == 6196040) assert (model != regnetx008 or weight_count == 7259656) assert (model != regnetx016 or weight_count == 9190136) assert (model != regnetx032 or weight_count == 15296552) assert (model != regnetx040 or weight_count == 22118248) assert (model != regnetx064 or weight_count == 26209256) assert (model != regnetx080 or weight_count == 39572648) assert (model != regnetx120 or weight_count == 46106056) assert (model != regnetx160 or weight_count == 54278536) assert (model != regnetx320 or weight_count == 107811560) assert (model != regnety002 or weight_count == 3162996) assert (model != regnety004 or weight_count == 4344144) assert (model != regnety006 or weight_count == 6055160) assert (model != regnety008 or weight_count == 6263168) assert (model != regnety016 or weight_count == 11202430) assert (model != regnety032 or weight_count == 19436338) assert (model != regnety040 or weight_count == 20646656) assert (model != regnety064 or weight_count == 30583252) assert (model != regnety080 or weight_count == 39180068) assert (model != regnety120 or weight_count == 51822544) assert (model != regnety160 or weight_count == 83590140) assert (model != regnety320 or weight_count == 145046770)
def on_epoch_begin(self, epoch, logs=None): if self.verbose: lrate = K.get_value(self.model.optimizer.lr) print(f"epoch {epoch} lr: {lrate}")
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # no gpu. import sys import tensorflow as tf conf = tf.compat.v1.ConfigProto() conf.gpu_options.per_process_gpu_memory_fraction=0.6 session = tf.compat.v1.Session(config=conf) tf.compat.v1.disable_eager_execution() learning_rate = float(sys.argv[1]) model_num = int(sys.argv[2]) temp_manager = TrainingManager() model = temp_manager.net.net.model import tensorflow.keras.backend as K print("Current learning rate: " + str(K.get_value(model.optimizer.lr))) print("Setting learning rate to " + str(learning_rate)) K.set_value(model.optimizer.lr, learning_rate) print("Done") print("Overwriting " + "models/best_" + str(model_num) + ".h5") model.save("models/best_" + str(model_num) + ".h5")
def on_batch_end(self, batch, logs=None): if self.current_epoch_ > 1: return if self.use_validation_set: X, Y = self.validation_data[0], self.validation_data[1] # use 5 random batches from test set for fast approximate of loss num_samples = self.batch_size * self.validation_sample_rate if num_samples > X.shape[0]: num_samples = X.shape[0] idx = np.random.choice(X.shape[0], num_samples, replace=False) x = X[idx] y = Y[idx] values = self.model.evaluate(x, y, batch_size=self.batch_size, verbose=False) loss = values[0] else: loss = logs['loss'] # smooth the loss value and bias correct running_loss = self.loss_smoothing_beta * loss + ( 1. - self.loss_smoothing_beta) * loss running_loss = running_loss / ( 1. - self.loss_smoothing_beta**self.current_batch_) # stop logging if loss is too large if self.current_batch_ > 1 and self.stopping_criterion_factor is not None and ( running_loss > self.stopping_criterion_factor * self.best_loss_): if self.verbose: print( " - LRFinder: Skipping iteration since loss is %d times as large as best loss (%0.4f)" % (self.stopping_criterion_factor, self.best_loss_)) return if running_loss < self.best_loss_ or self.current_batch_ == 1: self.best_loss_ = running_loss current_lr = K.get_value(self.model.optimizer.lr) self.history.setdefault('running_loss_', []).append(running_loss) if self.lr_scale == 'exp': self.history.setdefault('log_lrs', []).append(np.log10(current_lr)) else: self.history.setdefault('log_lrs', []).append(current_lr) # compute the lr for the next batch and update the optimizer lr if self.lr_scale == 'exp': current_lr *= self.lr_multiplier_ else: current_lr = self.lr_multiplier_[self.current_batch_ - 1] K.set_value(self.model.optimizer.lr, current_lr) # save the other metrics as well for k, v in logs.items(): self.history.setdefault(k, []).append(v) if self.verbose: if self.use_validation_set: print(" - LRFinder: val_loss: %1.4f - lr = %1.8f " % (values[0], current_lr)) else: print(" - LRFinder: lr = %1.8f " % current_lr)
def objective_function2(y_true, y_pred, lamb1=0.00008, lamb2=0.00008): #'Custom Objective function' y_true = K.flatten(y_true) y_pred = K.flatten(y_pred) n_seg = 32 # Because we have 32 segments per video. nvid = K.get_value(y_true.get_shape()[0]) n_exp = nvid / 2 Num_d=32*nvid sub_max = K.ones_like(y_pred) # sub_max represents the highest scoring instants in bags (videos). sub_sum_labels = K.ones_like(y_true) # It is used to sum the labels in order to distinguish between normal and abnormal videos. sub_sum_l1 = K.ones_like(y_true) # For holding the concatenation of summation of scores in the bag. sub_l2 = K.ones_like(y_true) # For holding the concatenation of L2 of score in the bag. for i in range(nvid): # For Labels vid_seg = y_true[i * n_seg: i * n_seg + n_seg] # Esto coloca la suma de la puntuaciones de los segmentos del vídeo i en sub_sum_labels sub_sum_labels = K.concatenate([sub_sum_labels, K.stack(K.sum(vid_seg))]) # Just to keep track of abnormal and normal vidoes # For Features scores Feat_Score = y_pred[i * n_seg: i * n_seg + n_seg] # El primero coloca la puntuación del cuboide más anómalo en la posición # i de sub_max mientras que el segundo coloca la suma de las puntuaciones # de los segmentos en sub_sum_l1 sub_max = K.concatenate([sub_max, K.stack(K.max(Feat_Score))]) # Keep the maximum score of scores of all instances in a Bag (video) sub_sum_l1 = K.concatenate([sub_sum_l1, K.stack(K.sum(Feat_Score))]) # Keep the sum of scores of all instances in a Bag (video) # Compute the temporal smoothness term z1 = T.ones_like(Feat_Score) # length = n_seg z2 = T.concatenate([z1, Feat_Score]) # length = 2*n_seg z3 = T.concatenate([Feat_Score, z1]) # length = 2*n_seg z_22 = z2[31:] # Esto sacaría la segunda parte (Feat_Score con un 1 delante) de z2 z_44 = z3[:33] # Esto sacaría la primera partr (Feat_Score con un 1 detrás) de z3 z = z_22 - z_44 # Aquí se estaría estando a cada valor de Feat_Score el valor que tiene en la posición i+1 z = z[1:32] z = T.sum(T.sqr(z)) # Save the temporal smoothness term on the i position of sub_l2 sub_l2 = T.concatenate([sub_l2, T.stack(z)]) # sub_max[Num_d:] means include all elements after Num_d. # AllLabels =[2 , 4, 3 ,9 ,6 ,12,7 ,18 ,9 ,14] # z=x[4:] #[ 6. 12. 7. 18. 9. 14.] sub_score = sub_max[Num_d:] # We need this step since we have used T.ones_like F_labels = sub_sum_labels[Num_d:] # We need this step since we have used T.ones_like # F_labels contains integer 32 for normal video and 0 for abnormal videos. This because of labeling done at the end of "load_dataset_Train_batch" # AllLabels =[2 , 4, 3 ,9 ,6 ,12,7 ,18 ,9 ,14] # z=x[:4] # [ 2 4 3 9]... This shows 0 to 3 elements sub_sum_l1 = sub_sum_l1[Num_d:] # We need this step since we have used T.ones_like sub_sum_l1 = sub_sum_l1[:n_exp] sub_l2 = sub_l2[Num_d:] # We need this step since we have used T.ones_like sub_l2 = sub_l2[:n_exp] # F_labels contiene la suma de las puntuaciones reales anormales # sub_score la puntuación predicha máxima de cada vídeo # sub_sum_l1 la suma de las puntuaciones predichas de los patrones anormales # sub_l2 el término de suavizado temporal sobre los patrones anormales # Se coge un vídeo normal con la máxima puntuación de anomalía indx_nor = K.equal(F_labels, 32).nonzero()[0] # Index of normal videos: Since we labeled 1 for each of 32 segments of normal videos F_labels=32 for normal video # Se coge un vídeo anormal con la mínima puntuación de anomalía indx_abn = K.equal(F_labels, 0).nonzero()[0] n_Nor = n_exp Sub_Nor = sub_score[indx_nor] # Maximum Score for each of abnormal video Sub_Abn = sub_score[indx_abn] # Maximum Score for each of normal video # Se computa el loss hinge (no entiendo por qué hace el for) z = K.ones_like(y_true) for i in range(n_Nor): sub_z = K.maximum(1 - Sub_Abn + Sub_Nor[i], 0) z = K.concatenate([z, K.stack(K.sum(sub_z))]) z = z[Num_d:] # We need this step since we have used T.ones_like z = K.mean(z, axis=-1) + lamb1 * K.sum(sub_sum_l1) + lamb2 * K.sum(sub_l2) # Final Loss f return z
# print('number of null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0)) # print("sample words not found: ", np.random.choice(words_not_found, 10)) return embedding_matrix, nb_words model = load_model('./model_no_symbol_rnn_2.h5', custom_objects={ 'f1_m': f1_m, 'precision_m': precision_m, 'recall_m': recall_m, 'f1_m': f1_m }) X_train, X_test, y_train, y_test = load_chosen_dataset_input( './fa_no_symbol.csv') train_data_processed, test_data_processed = preprocess_dataset(X_train, X_test) tokenizer = create_word_index_dict(train_data_processed + test_data_processed) # word_index = tokenizer.word_index # embedding_index = load_indonesian_word_embeddings() # embedding_matrix, nb_words = plot_embedding_matrix(word_index, embedding_index) # model = get_model_bilstm(metrics) train_data_sequenced, test_data_sequenced = tokenize_input( train_data_processed, test_data_processed, tokenizer) train_data, test_data = pad_sequences(train_data_sequenced, test_data_sequenced, 20) input = tokenizer.texts_to_sequences( ['menkum ham sudah kaji draf revisi uu kpk']) print(True if K.get_value(model(np.array(input)))[0, 0] >= 0.5 else False) # model_train = model.fit(train_data, y_train, batch_size=256, epochs=7, verbose=2) # model.save('./model_no_symbol_rnn_2.h5')
def on_epoch_begin(self, epoch, logs=None): lr = K.get_value(self.model.optimizer.lr) print('\nEpoch %05d: LearningRateScheduler reducing learning ' 'rate to %s.' % (epoch + 1, lr))