def compute_similarities_for_each_topK(test_codes_folder, old_results_file_topK, topK=topK): suf = '.jpg' test_img_similarity_dict = dict() names_results_ids_dict = get_results_topK(old_results_file_topK, k=topK) counter = 1 for codes_dict in get_codes_from_files(test_codes_folder): for name, code in zip(codes_dict['names'], codes_dict['codes']): name = name.replace(suf, '') catProbDict = names_results_ids_dict[name] similarity = 0 for cat, prob in catProbDict.items(): tmpSim = ensFunc(prob, calc_for_each_sim(code, cat)) if tmpSim > similarity: idd = cat similarity = tmpSim test_img_similarity_dict[name] = [idd, similarity] # min_distance = distance if distance < min_distance else min_distance print(counter) counter += 1 # test_img_similarity_dict = distance_to_similarity(test_img_similarity_dict, min_distance) save_obj( test_img_similarity_dict, 'test_sim_dict' + '_top' + str(topK) + '_' + old_results_file_topK.split('/')[-1]) return test_img_similarity_dict
def second_phase(): global resnet_model tensorboard = TensorBoard(log_dir=second_phase_folder + 'tb_logs', batch_size=batch_size) resnet_model = load_model(data_folder + '1st_phase_resnet_model.h5') trainable_layers_ratio = 1/3.0 trainable_layers_index = int(len(resnet_model.layers) * (1 - trainable_layers_ratio)) for layer in resnet_model.layers[:trainable_layers_index]: layer.trainable = False for layer in resnet_model.layers[trainable_layers_index:]: layer.trainable = True # for layer in resnet_model.layers: # layer.trainable = True resnet_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['acc']) if not os.path.exists(second_phase_folder): os.makedirs(second_phase_folder) # train the model on the new data for a few epochs for i in range(second_phase_train_reps): history = resnet_model.fit_generator(train_img_class_gen, steps_per_epoch=steps_per_small_epoch, epochs=small_epochs, verbose=2, validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch, workers=4, callbacks=[tensorboard]) print('itr', i) if i % saves_per_epoch == 0: print('{} epoch completed'.format(int(i / saves_per_epoch))) ts = calendar.timegm(time.gmtime()) resnet_model.save(second_phase_folder + str(ts) + '_resnet_model.h5') save_obj(history.history, str(ts) + '_xcpetion_history.h5', folder=second_phase_folder) resnet_model.save(data_folder + '2nd_phase_resnet_model.h5')
def first_phase(trained=True, printGap=True, first_phase_train_reps=first_phase_train_reps, data_folder=data_folder): global siamese_model tensorboard = TensorBoard(log_dir=first_phase_folder + 'tb_logs', batch_size=batch_size) if not trained: siamese_model = SiameseModel('../resnet/' + '3rd_phase_resnet_model.h5') optimizer = Adam(0.0005) siamese_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc']) else: siamese_model = load_model(data_folder + '1st_phase_siamese_model.h5') if not os.path.exists(first_phase_folder): os.makedirs(first_phase_folder) for i in range(first_phase_train_reps): history = siamese_model.fit_generator( train_img_class_gen, steps_per_epoch=steps_per_small_epoch, epochs=small_epochs, verbose=2, validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch, workers=4, callbacks=[tensorboard]) print('itr', i) if i % saves_per_epoch == 0: print('{} epoch completed'.format(int(i / saves_per_epoch))) if i >= 5: ts = calendar.timegm(time.gmtime()) siamese_model.save(first_phase_folder + str(ts) + '_siamese_model.h5') save_obj(history.history, str(ts) + '_siamese_history', folder=first_phase_folder) if printGap: steps = len(val_names_list) / batch_size predicts = siamese_model.predict_generator(val_img_class_gen, steps=steps / 10, verbose=2) ########## predProb = np.max(predicts, axis=-1) predId = np.argmax(predicts, axis=-1) trueId = list( map( lambda x: val_name_id_dict[str(x).split('.')[0].split('/')[ 1]], [name for name in val_img_class_gen.filenames])) gap = GAP_vector(predId, predProb, trueId) print('gap: ', gap) siamese_model.save(data_folder + '1st_phase_siamese_model.h5')
def first_phase(trained=True, printGap=True, first_phase_train_reps=first_phase_train_reps): global nasnet_model tensorboard = TensorBoard(log_dir=first_phase_folder + 'tb_logs', batch_size=batch_size) if not trained: # create the base pre-trained model input_tensor = Input(shape=input_shape) base_model = NASNetMobile(input_tensor=input_tensor, weights='imagenet', include_top=False) # add a global spatial average pooling layer x = base_model.output x = GlobalAveragePooling2D()(x) # add a fully-connected layer x = Dense(1024, activation='relu')(x) # add a logistic layer predictions = Dense(classes_num, activation='softmax')(x) # this is the model we will train nasnet_model = Model(inputs=base_model.input, outputs=predictions) nasnet_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['acc']) else: nasnet_model = load_model(data_folder + '1st_phase_nasnet_model.h5') if not os.path.exists(first_phase_folder): os.makedirs(first_phase_folder) for i in range(first_phase_train_reps): history = nasnet_model.fit_generator(train_img_class_gen, steps_per_epoch=steps_per_small_epoch, epochs=small_epochs, verbose=2, validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch, workers=4, callbacks=[tensorboard]) print('itr', i) if i % saves_per_epoch == 0: print('{} epoch completed'.format(int(i / saves_per_epoch))) if i>=50: ts = calendar.timegm(time.gmtime()) nasnet_model.save(first_phase_folder + str(ts) + '_nasnet_model.h5') save_obj(history.history, str(ts) + '_nasnet_history', folder=first_phase_folder) if printGap: steps = len(val_names_list)/batch_size predicts = nasnet_model.predict_generator(val_img_class_gen, steps=steps/10, verbose=2)########## predProb = np.max(predicts, axis=-1) predId = np.argmax(predicts, axis=-1) trueId = list(map(lambda x: val_name_id_dict[str(x).split('.')[0].split('/')[1]], [name for name in val_img_class_gen.filenames])) gap = GAP_vector(predId, predProb, trueId) print('gap: ', gap) nasnet_model.save(data_folder + '1st_phase_nasnet_model.h5')
def get_results(results_file): """returns a dict with images names (without suffix) as keys and the result's idd/class as value""" names_results_ids_dict = dict() with open(results_file) as f: f.readline() for line in f: name, idd = line.strip().split(',') if len(idd) < 3: save_obj(names_results_ids_dict, 'names_results_ids_dict') return names_results_ids_dict idd = idd.split(' ')[0] names_results_ids_dict[name] = idd return names_results_ids_dict
def compute_similarities_for_each(test_codes_folder, old_results_file): suf = '.jpg' test_img_similarity_dict = dict() names_results_ids_dict = get_results(old_results_file) counter = 1 for codes_dict in get_codes_from_files(test_codes_folder): for name, code in zip(codes_dict['names'], codes_dict['codes']): name = name.replace(suf, '') cat = names_results_ids_dict[name] similarity = calc_for_each_sim(code, cat) test_img_similarity_dict[name] = similarity # min_distance = distance if distance < min_distance else min_distance print(counter) counter += 1 # test_img_similarity_dict = distance_to_similarity(test_img_similarity_dict, min_distance) save_obj(test_img_similarity_dict, 'test_img_similarity_dict' + old_results_file.split('/')[-1]) return test_img_similarity_dict
def get_results_topK(old_results_file_topK, k=topK): """returns a dict with images names (without suffix) as keys and the result's idd/class as value""" names_results_ids_dict = dict() with open(old_results_file_topK) as f: f.readline() for line in f: name, idd = line.strip().split(',') if len(idd) < 3: save_obj(names_results_ids_dict, 'names_results_ids_dict_topK') return names_results_ids_dict names_results_ids_dict[name] = {} iddList = idd.split(' ') for i in range(k): names_results_ids_dict[name][iddList[2 * (topK - 1 - i)]] = float( iddList[2 * (topK - 1 - i) + 1]) return names_results_ids_dict
def compute_codes(imgs_path, codes_folder, siamese_model_path): batch_size = 64 code_net = get_code_net(siamese_model_path) if not os.path.exists(codes_folder): os.makedirs(codes_folder) codes_batches_size = 2**14 print('make images names list') imgs_names = listdir(imgs_path) imgs_num = len(imgs_names) batches_num = imgs_num // codes_batches_size + (1 if imgs_num % codes_batches_size else 0) counter = 1 this_names = [] print('start loop') for ind, name in enumerate(imgs_names): this_names.append(name) # img = img_to_array(imgs_path + name) # imgs.append(img) if len(this_names) == codes_batches_size or ind == len(imgs_names) - 1: code_dict = {'names': this_names} names = this_names[:] steps = len(this_names) // batch_size + (1 if len(names) % batch_size else 0) codes = code_net.predict_generator(code_generator( names, imgs_path, batch_size=batch_size), steps=steps, verbose=2, workers=4) code_dict['codes'] = codes # codes = batch_compute_codes(code_net, imgs) save_obj(code_dict, 'batch_{}'.format(counter), folder=codes_folder) this_names = [] print('done {} out of {}'.format(counter, batches_num)) counter += 1 print('codes computed and saved at: ' + codes_folder)
def make_codes_by_category(train_codes_folder, codes_by_category_folder, train_name_id_dict): if not os.path.exists(codes_by_category_folder): os.makedirs(codes_by_category_folder) counter = 1 for train_codes in listdir(train_codes_folder): train_codes = train_codes.replace('.pkl', '') print(train_codes) counter += 1 temp_cats_dict = dict() train_codes = load_obj(train_codes, folder=train_codes_folder) for name, code in zip(train_codes['names'], train_codes['codes']): cat = train_name_id_dict[name.replace('.jpg', '')] if cat not in temp_cats_dict.keys(): temp_cats_dict[cat] = dict() temp_cats_dict[cat]['names'] = [] temp_cats_dict[cat]['codes'] = [] temp_cats_dict[cat]['names'].append(name) temp_cats_dict[cat]['codes'].append(code) for cat in temp_cats_dict.keys(): if not Path(codes_by_category_folder + str(cat)).is_file(): cat_file = dict() cat_file['names'] = [] cat_file['codes'] = [] save_obj(obj=cat_file, name=str(cat), folder=codes_by_category_folder) cat_file = load_obj(str(cat), folder=codes_by_category_folder) names = temp_cats_dict[cat]['names'] codes = temp_cats_dict[cat]['codes'] cat_file['names'].extend(names) cat_file['codes'].extend(codes) save_obj(obj=cat_file, name=str(cat), folder=codes_by_category_folder)
def third_phase(trained=False, third_phase_train_reps=third_phase_train_reps): global inception_model, new_inception_model, optimizer tensorboard = TensorBoard(log_dir=third_phase_folder + 'tb_logs', batch_size=batch_size) if not trained: inception_model = load_model(data_folder + '1st_phase_inception_model.h5') else: inception_model = load_model(data_folder + '3rd_phase_inception_model.h5') # # add regularizers to the convolutional layers # trainable_layers_ratio = 1 / 2.0 # trainable_layers_index = int(len(inception_model.layers) * (1 - trainable_layers_ratio)) # for layer in inception_model.layers[:trainable_layers_index]: # layer.trainable = False # for layer in inception_model.layers[trainable_layers_index:]: # layer.trainable = True for layer in inception_model.layers: layer.trainable = True if isinstance(layer, keras.layers.convolutional.Conv2D): layer.kernel_regularizer = regularizers.l2(0.001) layer.activity_regularizer = regularizers.l1(0.001) # add dropout and regularizer to the penultimate Dense layer predictions = inception_model.layers[-1] dropout = Dropout(0.2) fc = inception_model.layers[-2] fc.kernel_regularizer = regularizers.l2(0.001) fc.activity_regularizer = regularizers.l1(0.001) x = dropout(fc.output) predictors = predictions(x) new_inception_model = Model(inputs=inception_model.input, outputs=predictors) optimizer = Adam(lr=0.1234) start_lr = 0.00015 end_lr = 0.00002 step_lr = (end_lr - start_lr) / (third_phase_train_reps - 1) new_inception_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc']) if not os.path.exists(third_phase_folder): os.makedirs(third_phase_folder) for i in range(third_phase_train_reps): lr = start_lr + step_lr * i K.set_value(new_inception_model.optimizer.lr, lr) print(i, 'out of ', third_phase_train_reps, '\nlearning rate ', K.eval(new_inception_model.optimizer.lr)) history = new_inception_model.fit_generator( train_img_class_gen, steps_per_epoch=steps_per_small_epoch, epochs=small_epochs, verbose=2, validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch, workers=4, callbacks=[tensorboard]) # history = new_inception_model.fit_generator(train_img_class_gen, # steps_per_epoch=steps_per_small_epoch, # epochs=small_epochs, verbose=2, # validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch, # workers=4, callbacks=[LosswiseKerasCallback(tag='keras inception model')]) print("iteration", i) if i % saves_per_epoch == 0: print('{} epoch completed'.format(int(i / saves_per_epoch))) if i >= 5: ts = calendar.timegm(time.gmtime()) new_inception_model.save(third_phase_folder + str(ts) + '_inception_model.h5') save_obj(history.history, str(ts) + '_inception_history.h5', folder=third_phase_folder) new_inception_model.save(data_folder + '3rd_phase_inception_model.h5')
def inception(trained=False, third_phase_train_reps=third_phase_train_reps): global inception_model, new_inception_model, optimizer tensorboard = TensorBoard(log_dir=third_phase_folder + 'tb_logs', batch_size=batch_size) start_lr = 0.00015 end_lr = 0.00001 step_lr = (end_lr - start_lr) / (third_phase_train_reps - 1) if not trained: # create the base pre-trained model input_tensor = Input(shape=input_shape) base_model = InceptionResNetV2(input_tensor=input_tensor, weights='imagenet', include_top=False) # add a global spatial average pooling layer x = base_model.output x = GlobalAveragePooling2D()(x) # add a fully-connected layer x = Dense(1024, activation='relu')(x) # add a logistic layer predictions = Dense(classes_num, activation='softmax')(x) # this is the model we will train inception_model = Model(inputs=base_model.input, outputs=predictions) inception_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['acc']) for layer in inception_model.layers: layer.trainable = True if isinstance(layer, keras.layers.convolutional.Conv2D): layer.kernel_regularizer = regularizers.l2(0.001) layer.activity_regularizer = regularizers.l1(0.001) # add dropout and regularizer to the penultimate Dense layer predictions = inception_model.layers[-1] dropout = Dropout(0.2) fc = inception_model.layers[-2] fc.kernel_regularizer = regularizers.l2(0.001) fc.activity_regularizer = regularizers.l1(0.001) x = dropout(fc.output) predictors = predictions(x) new_inception_model = Model(inputs=inception_model.input, outputs=predictors) optimizer = Adam(lr=0.1234) new_inception_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc']) else: new_inception_model = load_model(data_folder + '3rd_phase_inception_model.h5') optimizer = Adam(lr=0.1234) new_inception_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc']) if not os.path.exists(third_phase_folder): os.makedirs(third_phase_folder) for i in range(third_phase_train_reps): lr = start_lr + step_lr * i K.set_value(new_inception_model.optimizer.lr, lr) print(i, 'out of ', third_phase_train_reps, '\nlearning rate ', K.eval(new_inception_model.optimizer.lr)) history = new_inception_model.fit_generator( train_img_class_gen, steps_per_epoch=steps_per_small_epoch, epochs=small_epochs, verbose=2, validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch, workers=4, callbacks=[tensorboard]) # history = new_inception_model.fit_generator(train_img_class_gen, # steps_per_epoch=steps_per_small_epoch, # epochs=small_epochs, verbose=2, # validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch, # workers=4, callbacks=[LosswiseKerasCallback(tag='keras inception model')]) print("iteration", i) if i % saves_per_epoch == 0: print('{} epoch completed'.format(int(i / saves_per_epoch))) if i >= 5: ts = calendar.timegm(time.gmtime()) new_inception_model.save(third_phase_folder + str(ts) + '_inception_model.h5') save_obj(history.history, str(ts) + '_inception_history.h5', folder=third_phase_folder) new_inception_model.save(data_folder + '3rd_phase_inception_model.h5')
def make_files(): global csv_name_id_tuples_list, csv_ids_list, csv_ids_set, csv_names_set, \ csv_id_name_dict, csv_name_id_dict, classes_num, \ train_names_list, train_name_id_dict, val_names_list, val_name_id_dict print("parsing train.csv") with open(csv_csv_path) as f: f.readline() for line in f: l = line.replace('"', '').strip().split(',') if len(l) != 3: print(l) continue name, idd = l[0], int(l[2]) csv_name_id_tuples_list.append((name, idd)) csv_names_list.append(name) csv_name_id_dict[name] = idd csv_ids_list.append(idd) if idd in csv_id_name_dict.keys(): csv_id_name_dict[id].add(name) else: csv_id_name_dict[id] = {name} print("start saving lists") csv_names_set = set(csv_names_list) csv_ids_set = set(csv_ids_list) save_obj(csv_name_id_tuples_list, 'csv_name_id_tuples_list') save_obj(csv_names_list, 'csv_names_list') save_obj(csv_ids_list, 'csv_ids_list') save_obj(csv_name_id_dict, 'csv_name_id_dict') save_obj(csv_id_name_dict, 'csv_id_name_dict') save_obj(csv_ids_set, 'csv_ids_set') save_obj(csv_names_set, 'csv_names_set') train_names_list, train_name_id_dict = make_folder_lists_dicts( train_images_folder) val_names_list, val_name_id_dict = make_folder_lists_dicts( val_images_folder) save_obj(train_names_list, 'train_names_list') save_obj(train_name_id_dict, 'train_name_id_dict') save_obj(val_names_list, 'val_names_list') save_obj(val_name_id_dict, 'val_name_id_dict')
import os, sys module_path = os.path.abspath(os.path.join('..')) sys.path.append(module_path) from conf.configure import * from utils.data_util import save_obj, load_obj from preprocess.generator import make_generators import time pred_batch_size = 32 #32 pred_lr = 0.0002 pred_model = data_folder + 'xcpetion_model_.json' pred_weights = data_folder + 'continue_second_phase_logs/older/1521093898_xcpetion_model.h5' # pred_model_path = data_folder + '2nd_2nd_phase_xcpetion_model.h5' pred_model_path = data_folder + 'continue_second_phase_xcpetion_model.h5' if not os.path.exists(working_folder+'class_indices_dict'+'.pkl'): # classList = [f for f in os.listdir(train_class_images_path) if not os.path.isfile(os.path.join(train_class_images_path, f))] # classDict = dict(zip(classList, list(range(len(classList))))) train_img_class_gen, val_img_class_gen=make_generators(isPlain=True) # print(train_img_class_gen.class_indices) classDict = train_img_class_gen.class_indices save_obj(classDict, 'class_indices_dict') class_indices_dict = load_obj('class_indices_dict') inverted_class_indices_dict = dict((v, k) for k, v in class_indices_dict.items()) # print(type(inverted_class_indices_dict[10])) # print(inverted_class_indices_dict[10])