def exp_outlier_merge(sub_dataset=['_train', '_valid', '_test'], outlier_dataset_name='outliers'): datasets = [cfg.dataset + sd for sd in sub_dataset] for feat_net in cfg.nets: outliers_dataset = common.feat_dataset(outlier_dataset_name, feat_net) for dataset in datasets: feat_dataset = common.feat_dataset(dataset, feat_net) out_dataset_path = common.feat_path(dataset + "_outl", feat_net) print("") print("Features net: {}".format(feat_net)) print("Input dataset: {}".format( common.feat_fname(dataset, feat_net))) print("Merging with: {}".format( common.feat_fname(outlier_dataset_name, feat_net))) print("Out feat dataset: {}".format( common.feat_fname(dataset + "_outl", feat_net))) out = ImageDataset.merge_datasets(feat_dataset, outliers_dataset, label_mode='new') out.save_hdf5(out_dataset_path) print("Done") print("") print("") print("All done.")
def merge_features_train_valid(feat_net='resnet50'): dataset_name = cfg.dataset trainset_name = dataset_name + '_train' validset_name = dataset_name + '_valid' print("Merging training and validation data-features (feature net: " + feat_net + ")") train_feat_set = common.feat_dataset(trainset_name, feat_net) valid_feat_set = common.feat_dataset(validset_name, feat_net) merged_feat_set = ImageDataset.merge_datasets(train_feat_set, valid_feat_set) merged_dataset_path = common.feat_path(dataset_name, feat_net) merged_feat_set.save_hdf5(merged_dataset_path)
def __init__(self, feat_net_name, trainset_name, validset_name=None, validsplit=0, shuffle_trainset=False, batch_size=32, loss='categorical_crossentropy', metric=['ACCURACY'], checkpoint_monitor=None): self.trainset_name = trainset_name self.trainset = common.feat_dataset(trainset_name, feat_net_name) if shuffle_trainset: self.trainset.shuffle() #self.validset = validset if validset_name is None: self.valdata = None else: validset = common.feat_dataset(validset_name, feat_net_name) self.valdata = [validset.data, validset.getLabelsVec()] self.validsplit = validsplit self.batch_size = batch_size self.loss=loss self.metric=metric self.feat_net_name = feat_net_name self.chk_mon = checkpoint_monitor if self.chk_mon is None and (self.valdata is not None or self.validsplit > 0): self.chk_mon = 'val_loss' else: self.chk_mon = 'loss'
def __init__(self, feat_net_name, trainset_name, testset_name, verbose=True, csv_global_stats=True, csv_class_stats=True, single_class_verbose=False, batch_size=32, save_csv_dir='weights'): ''' :param save_csv_dir: 'weights': save the csv files in the directory of the weights of the shallow network 'current': save the csv files in the current working directory others_strings: save the csv in the specified directory ''' self.trainset_name = trainset_name self.feat_net_name = feat_net_name self.testset = common.feat_dataset(testset_name, feat_net_name) self.testset_name = testset_name self.verbose = verbose self.single_class_verbose = single_class_verbose self.csv_global_stats = csv_global_stats self.csv_class_stats = csv_class_stats self.batch_size = batch_size self.save_csv_mode = save_csv_dir
def extract_shallow_features(): feat_net = 'resnet50' cfg.init(include_nets=[feat_net]) old_trainset_name = cfg.dataset + '_train_ds' #old_testset_name = cfg.dataset + '_test' dataset_name = cfg.dataset + '_train_ds' dataset_name = cfg.dataset + '_test' #crop, size = cfg.crop_size(net=feat_net) print("\nloading dataset: " + dataset_name) try: dataset = common.feat_dataset(dataset_name, feat_net) except IOError: print("Can't open dataset.") return print("dataset loaded.") in_shape = cfg.feat_shape_dict[feat_net] out_shape = feat_dataset_n_classes(dataset_name, feat_net) B = ShallowNetBuilder(in_shape, out_shape) SL = ShallowLoader(old_trainset_name, feat_net) pretrain_weight_epoch = '10' labelflip_finetune_epoch = '00' out_layer = 'additional_hidden_0' extr_n = '_ft@' + pretrain_weight_epoch model = B.H8K(extr_n, lf_decay=0.01).init(lf=False).load(SL, labelflip_finetune_epoch).model() #model.summary() feature_vectors = net_utils.extract_features(model, dataset, out_layer, batch_size, True) feature_vectors.save_hdf5("shallow_extracted_features/shallow_feat_" + dataset_name + ".h5")
def main(args): config.init() feat_net = 'resnet50' print("") print("") print("Running experiment on net: " + feat_net) # testset_name = "dbp3120_test" # + '_verrocchio77' testset = common.feat_dataset(testset_name, feat_net) in_shape = config.feat_shape_dict[feat_net] out_shape = testset.labelsize def for_resnet50(): #shallow_path = common.shallow_path("LF_FT_A", trainset_name, feat_net, ext=False) LF = new_model(in_shape, out_shape) # shallow_path = config.SHALLOW_PATH + "shallow_AB__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.best.h5" # LF.load_weights(shallow_path, by_name=True) # score = test_net(LF, testset) # write_net_score(score, "AB best", testset_name, "test_results.csv", detailed_csv=True) # # shallow_path = config.SHALLOW_PATH + "shallow_AB__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.last.h5" # LF.load_weights(shallow_path, by_name=True) # score = test_net(LF, testset) # write_net_score(score, "AB last", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.best.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "A best (5ep)", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.last.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "A last (5ep)", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.best.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A best", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.00.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A 0", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.01.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A 1", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.02.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A 2", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.03.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A 3", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.04.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A 4", testset_name, "test_results.csv", detailed_csv=True) if feat_net == 'resnet50': for_resnet50()
def main(args): config.init() feat_net = 'resnet50' print("") print("") print("Running experiment on net: " + feat_net) # if config.USE_TOY_DATASET: # trainset_name = config.DATASET + '_train' # else: # trainset_name = config.DATASET + '_train_ds' testset_name = config.DATASET + '_test' testset = common.feat_dataset(testset_name, feat_net) in_shape = config.feat_shape_dict[feat_net] out_shape = testset.labelsize def for_resnet50(): #shallow_path = common.shallow_path("LF_FT_A", trainset_name, feat_net, ext=False) LF = new_model(in_shape, out_shape) shallow_path = config.SHALLOW_PATH + "shallow_AB__feat_dbp3120_train_ds__resnet50__avg_pool.weights.best.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "AB best", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_AB__feat_dbp3120_train_ds__resnet50__avg_pool.weights.last.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "AB last", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.best.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "A best (5ep)", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.last.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "A last (5ep)", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.best.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A best", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.00.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A 0", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.17.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A 17", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.41.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A 41", testset_name, "test_results.csv", detailed_csv=True) shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.60.h5" LF.load_weights(shallow_path, by_name=True) score = test_net(LF, testset) write_net_score(score, "LF A 60", testset_name, "test_results.csv", detailed_csv=True) if feat_net == 'resnet50': for_resnet50()
def main(args): config.init() feat_net = 'resnet50' print("") print("") print("Running experiment on net: " + feat_net) trainset_name = config.DATASET + '_so' trainset = common.feat_dataset(trainset_name, feat_net) validset = common.feat_dataset(config.DATASET + '_so_test', feat_net) valid_data = validset.data, validset.getLabelsVec() valid_split = 0 in_shape = config.feat_shape_dict[feat_net] out_shape = trainset.labelsize def addestra(model, name, optimizer, epochs, callbacks, chk_period=-1, loss_in_name=False): shallow_path = common.shallow_path(name, trainset_name, feat_net, ext=False) if chk_period > 0: name = shallow_path + '.weights.{epoch:02d}' + ( '-{val_loss:.2f}.h5' if loss_in_name else '.h5') checkpoint = ModelCheckpoint(name, monitor='val_acc', save_weights_only=True, period=chk_period) callbacks.append(checkpoint) bestpoint = ModelCheckpoint(shallow_path + '.weights.best.h5', monitor='val_loss', save_best_only=True, save_weights_only=True) callbacks.append(bestpoint) model.compile(optimizer=optimizer, loss=LOSS, metrics=METRIC) #model.summary() #print("Valid split: " + str(valid_split)) model.fit(trainset.data, trainset.getLabelsVec(), nb_epoch=epochs, batch_size=BATCH, callbacks=callbacks, shuffle=True, validation_data=valid_data, validation_split=valid_split) save_model_json(model, shallow_path + '.json') model.save_weights(shallow_path + '.weights.last.h5') def for_resnet50(): early_stopping = EarlyStopping('val_loss', min_delta=0.01, patience=7, verbose=1) reduceLR = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1, epsilon=0.01, cooldown=0, min_lr=0) callbacks = [early_stopping, reduceLR] A = new_model(in_shape, out_shape) optimizer = SGD(lr=0.01, momentum=0.9, decay=1e-6, nesterov=True) addestra(A, "A_5ep", optimizer, 100, callbacks, chk_period=1, loss_in_name=True) shallow_path = common.shallow_path("A_5ep", trainset_name, feat_net, ext=False) early_stopping = EarlyStopping('val_loss', min_delta=0.001, patience=10, verbose=1) reduceLR = ReduceLROnPlateau('val_loss', factor=0.1, patience=4, verbose=1, epsilon=0.0001) callbacks = [early_stopping, reduceLR] LF = new_model(in_shape, out_shape, lf=True, lf_decay=0.03) LF.load_weights(shallow_path + '.weights.best.h5', by_name=True) optimizer = SGD(lr=0.001, momentum=0.9, decay=1e-6, nesterov=True) addestra(LF, "LF_FT_A", optimizer, epochs=100, callbacks=callbacks, chk_period=1) def for_vgg(): pass # m = new_model(in_shape, out_shape, hiddens=[Hidden(4096, 0.5), Hidden(4096, 0.5)]) # addestra(m, "H4K_H4K", SGD(lr=0.0001, momentum=0.9, decay=1e-6, nesterov=True), epochs=100, callbacks=callbacks) # # m = new_model(in_shape, out_shape, hiddens=[Hidden(4096, 0.5)]) # addestra(m, "H4K", SGD(lr=0.0001, momentum=0.9, decay=1e-6, nesterov=True), epochs=100, callbacks=callbacks) if feat_net == 'resnet50': for_resnet50() if feat_net.startswith('vgg'): for_vgg()
def prune_feat_dataset_with_shallow_classifier( feat_net=cfg_emb.FEAT_NET, double_seeds=True, n_top_classes=cfg_emb.PRUNING_KEEP_N_CLASSES, labelflip=cfg_emb.USE_LABELFLIP): dataset_name = cfg_emb.FEAT_DATASET trainset_name = cfg.dataset + '_train' + ('_ds' if double_seeds else '') #validset_name = cfg.dataset + '_valid' testset_name = cfg.dataset # testset_name = cfg.dataset + '_test' print("Shallow Test") print("Features from CNN: " + feat_net) print("Trained on: " + trainset_name) print("Testing on: " + testset_name) in_shape = cfg.feat_shape_dict[feat_net] out_shape = feat_dataset_n_classes(testset_name, feat_net) SNB = ShallowNetBuilder(in_shape, out_shape) SL = ShallowLoader(trainset_name, feat_net) ST = ShallowTester(feat_net, trainset_name, testset_name, csv_class_stats=False, csv_global_stats=False) # Nets to test #shallow_nets = [SNB.H8K] shallow_nets = [SNB.A] # Weights to load on nets to test shallow_weights_to_loads = ['best'] # Weights to load on labelflip-finetuned nets (finetuned loading the weights in shallow_weights_to_loads list) shallow_ft_lf_weights_to_load = ['00'] dataset_to_prune = common.feat_dataset(dataset_name, feat_net) for sn in shallow_nets: for sh_i in shallow_weights_to_loads: if cfg_emb.USE_LABELFLIP: # Test some of the finetuned model that use LabelFlip noise label: extr_n = '_ft@' + str(sh_i) for lf_i in shallow_ft_lf_weights_to_load: shallow_net = sn(extr_n, lf_decay=cfg_emb.LF_DECAY).init( lf=False).load(SL, lf_i) keep, prune = test_for_top_classes( shallow_net, ST, nb_selected_classes=n_top_classes, out_on_csv="class_pruning.csv", out_classname_txt="class_names_keep_from_pruning.txt", out_classindex_txt="class_keep_from_pruning.txt") pruned = dataset_to_prune.sub_dataset_with_labels(keep) pruned_out_feature_dataset = pruned_feat_dataset_path( dataset_name, testset_name, n_top_classes, feat_net, shallow_net) print("Saving pruned feature dataset in: " + pruned_out_feature_dataset) pruned.save_hdf5(pruned_out_feature_dataset) else: # Test without LabelFlip Finetune: shallow_net = sn().init(lf=False).load(SL, sh_i) keep, prune = test_for_top_classes( shallow_net, ST, nb_selected_classes=n_top_classes, out_on_csv="class_pruning.csv", out_classname_txt="class_names_keep_from_pruning.txt", out_classindex_txt="class_keep_from_pruning.txt") pruned = dataset_to_prune.sub_dataset_with_labels(keep) pruned_out_feature_dataset = pruned_feat_dataset_path( dataset_name, testset_name, n_top_classes, feat_net, shallow_net) print("Saving pruned feature dataset in: " + pruned_out_feature_dataset) pruned.save_hdf5(pruned_out_feature_dataset)