def FT_other_model(model_name,constrNet,target_dataset='IconArt_v1'): """ @param : model_name : name of the model fine-tuned on other dataset before """ ft_model = get_fine_tuned_model(model_name,constrNet) # TODO finir ici !!! # Il faudra peut etre faire passer les modeles fine-tuned dans l'argument weights au lieu de # imagenet ou autre et qui va charger les autres models deja !!! pour tester # Load info about dataset item_name,path_to_img,default_path_imdb,classes,ext,num_classes,str_val,df_label,\ path_data,Not_on_NicolasPC = get_database(target_dataset) model_trained_on_target_set = FineTuneModel(ft_model,dataset=target_dataset,df=df_label,\ x_col=item_name,y_col=classes,path_im=path_to_img,\ str_val=str_val,num_classes=len(classes),epochs=epochs,\ Net=constrNet,plotConv=plotConv,batch_size=batch_size,\ cropCenter=cropCenter,return_best_model=return_best_model,\ NoValidationSetUsed=NoValidationSetUsed,\ RandomValdiationSet=RandomValdiationSet,\ deepSupervision=deepSupervision,dataAug=dataAug,\ last_epochs_model_path=last_epochs_model_path,\ history_path=local_history_path,randomCrop=randomCrop,\ LR_scheduling=LR_scheduling,\ imSize=imSize)
def perf_test_RASTAweights(): """ Test the performance of the RASTA weights provide by Lecoultre et al. """ dataset = 'RASTA' sess = tf.Session() set_session(sess) tf.keras.backend.set_image_data_format('channels_last') base_model = resnet_trained(20) predictions = Dense(25, activation='softmax')(base_model.output) net_finetuned = Model(inputs=base_model.input, outputs=predictions) #net_finetuned = custom_resnet() # Ce model a 87 layers path_to_model = os.path.join(os.sep,'media','gonthier','HDD2','output_exp','rasta_models','resnet_2017_7_31-19_9_45','model.h5') #ce model a 107 layers constrNet = 'LResNet50' # For Lecoutre ResNet50 version model_name = 'Lecoutre2017' input_name_lucid = 'input_1' net_finetuned.load_weights(path_to_model) # ,by_name=True net_finetuned.build((224,224,3)) print(net_finetuned.summary()) print(net_finetuned.predict(np.random.rand(1,224,224,3))) item_name,path_to_img,default_path_imdb,classes,ext,num_classes,str_val,df_label,\ path_data,Not_on_NicolasPC = get_database(dataset) sLength = len(df_label[item_name]) classes_vectors = df_label[classes].values df_label_test = df_label[df_label['set']=='test'] y_test = classes_vectors[df_label['set']=='test',:] cropCenter = False randomCrop = False imSize = 224 predictions = predictionFT_net(net_finetuned,df_test=df_label_test,x_col=item_name,\ y_col=classes,path_im=path_to_img,Net=constrNet,\ cropCenter=cropCenter,randomCrop=randomCrop,\ imSize=imSize) with sess.as_default(): metrics = evaluationScoreRASTA(y_test,predictions) top_k_accs,AP_per_class,P_per_class,R_per_class,P20_per_class,F1_per_class,acc_per_class= metrics for k,top_k_acc in zip([1,3,5],top_k_accs): print('Top-{0} accuracy : {1:.2f}%'.format(k,top_k_acc*100))
def minimal_sizeOfRASTAImages(): source_dataset = 'RASTA' item_name,path_to_img,default_path_imdb,classes,ext,num_classes,str_val,df_label,\ path_data,Not_on_NicolasPC = get_database(source_dataset) list_h = [] list_w = [] for i,row in enumerate(df_label.iterrows()): row_value = row[1] image_path = path_to_img+'/'+ row_value[item_name] + '.jpg' im = Image.open(image_path) width, height = im.size list_h += [height] list_w += [width] print('Size of the images from RASTA dataset') print('Minimal width',np.min(list_w)) print('Minimal height',np.min(list_h)) print('Mean width',np.mean(list_w)) print('Mean height',np.mean(list_h)) print('Median width',np.median(list_w)) print('Median height',np.median(list_h))
def load_dataset(dataset_nm='IconArt_v1', set_='train', classe=0, k_per_bag=300, metamodel='FasterRCNN', demonet='res152_COCO'): """Load data from file, do pre-processing, split it into train/test set. Parameters ----------------- dataset_nm : string Name of dataset. Returns ----------------- datasets : list Train and test """ # load data from file datasets = {} item_name, path_to_img, default_path_imdb, classes, ext, num_classes, str_val, df_label, path_data, Not_on_NicolasPC = get_database( dataset_nm) dict_name_file = getDictFeaturesPrecomputed(dataset_nm,k_per_bag=k_per_bag,\ metamodel=metamodel,demonet=demonet) config = tf.ConfigProto() config.intra_op_parallelism_threads = 16 config.inter_op_parallelism_threads = 16 config.gpu_options.allow_growth = True ins_fea = None bags_full_label = None bags_nm = None bags_nm = 0 if set_ == 'train': name_file = dict_name_file[set_] if metamodel == 'EdgeBoxes': dim_rois = 4 else: dim_rois = 5 print('num_classes', num_classes) next_element = getTFRecordDataset(name_file,k_per_bag =k_per_bag,\ dim_rois = dim_rois,num_classes =num_classes ) sess = tf.Session(config=config) while True: try: fc7s, roiss, rois_scores, labels, name_imgs = sess.run( next_element) if ins_fea is None: ins_fea = fc7s.astype(np.float32) bags_full_label = labels.astype(np.float32) bags_nm = name_imgs else: ins_fea = np.vstack((ins_fea, fc7s)).astype(np.float32) bags_full_label = np.vstack( (bags_full_label, labels)).astype(np.float32) bags_nm = np.concatenate((bags_nm, name_imgs)) #for k in range(len(labels)): #name_im = name_imgs[k].decode("utf-8") except tf.errors.OutOfRangeError: break sess.close() bags_full_label = np.array(bags_full_label) bags_label = bags_full_label[:, classe] # data = sio.loadmat('./dataset/'+dataset_nm+'.mat') # ins_fea = data['x']['data'][0,0] # if dataset_nm.startswith('musk'): # bags_nm = data['x']['ident'][0,0]['milbag'][0,0] # else: # bags_nm = data['x']['ident'][0,0]['milbag'][0,0][:,0] # bags_label = data['x']['nlab'][0,0][:,0] - 1 # L2 norm for musk1 and musk2 if dataset_nm.startswith('newsgroups') is False: mean_fea = np.mean(ins_fea, axis=(0, 1), keepdims=True) + 1e-6 std_fea = np.std(ins_fea, axis=(0, 1), keepdims=True) + 1e-6 ins_fea = np.divide(ins_fea - mean_fea, std_fea) else: mean_fea = np.ones((1, 1, ins_fea.shape[2])) std_fea = np.ones((1, 1, ins_fea.shape[2])) bags_fea = [] for id, bag_nm in enumerate(bags_nm): bag_fea = ([], []) for ins_idx in range(k_per_bag): bag_fea[0].append(ins_fea[id, ins_idx, :]) bag_fea[1].append(bags_label[id]) bags_fea.append(bag_fea) # # # store data in bag level # ins_idx_of_input = {} # store instance index of input # for id, bag_nm in enumerate(bags_nm): # if bag_nm in ins_idx_of_input: ins_idx_of_input[bag_nm].append(id) # else: ins_idx_of_input[bag_nm] = [id] # bags_fea = [] # for bag_nm, ins_idxs in list(ins_idx_of_input.items()): # bag_fea = ([], []) # for ins_idx in ins_idxs: # print('ins_fea[ins_idx][0]',ins_fea[ins_idx][0].shape) # bag_fea[0].append(ins_fea[ins_idx][0]) # bag_fea[1].append(bags_label[ins_idx]) # bags_fea.append(bag_fea) datasets['train'] = bags_fea # # random select 90% bags as train, others as test # num_bag = len(bags_fea) # kf = KFold(num_bag, n_folds=n_folds, shuffle=True, random_state=None) # datasets = [] # for train_idx, test_idx in kf: # # dataset['train'] = [bags_fea[ibag] for ibag in train_idx] # dataset['test'] = [bags_fea[ibag] for ibag in test_idx] # datasets.append(dataset) return datasets, bags_full_label, mean_fea, std_fea
def Compute_EdgeBoxesAndCNN_features(demonet='res152', nms_thresh=0.7, database='IconArt_v1', augmentation=False, L2=False, saved='all', verbose=True, filesave='tfrecords', k_regions=300, testMode=False, plotProposedBoxes=False): """ The goal of this function is to compute @param : demonet : teh kind of inside network used it can be 'vgg16_VOC07', 'vgg16_VOC12','vgg16_COCO','res101_VOC12','res101_COCO','res152_COCO' @param : nms_thresh : the nms threshold on the Region Proposal Network /!\ Pour le moment la version de EdgeBoxes dans les contribs ne permet pas d'avoir de scores """ path_data = '/media/gonthier/HDD/output_exp/ClassifPaintings/' path_imgs = path_data + 'EdgeBoxesIllust/' + database + '/' if plotProposedBoxes: print( "We will only plot the regions of the EdgeBoxes with k_regions = ", k_regions, path_imgs) pathlib.Path(path_imgs).mkdir(parents=True, exist_ok=True) item_name,path_to_img,default_path_imdb,classes,ext,num_classes,str_val,df_label,\ path_data,Not_on_NicolasPC = get_database(database) if augmentation: raise NotImplementedError N = 50 else: N = 1 if L2: raise NotImplementedError extL2 = '_L2' else: extL2 = '' if saved == 'all': savedstr = '_all' elif saved == 'fc7': savedstr = '' elif saved == 'pool5': savedstr = '_pool5' tf.reset_default_graph( ) # Needed to use different nets one after the other if verbose: print('=== EdgeBoxes net', demonet, 'database', database, ' ===') if demonet == 'res152': weights_path = '/media/gonthier/HDD/models/resnet152_weights_tf.h5' model = resnet_152_keras.resnet152_model_2048output(weights_path) num_features = 2048 else: raise (NotImplementedError) tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True # init session # sess = tf.Session(config=tfconfig) features_resnet_dict = {} sets = ['train', 'val', 'trainval', 'test'] if filesave == 'pkl': name_pkl_all_features = path_data + 'EdgeBoxes_' + demonet + '_' + database + '_N' + str( N) + extL2 + '_TLforMIL_nms_' + str(nms_thresh) + savedstr + '.pkl' pkl = open(name_pkl_all_features, 'wb') elif filesave == 'tfrecords': if k_regions == 300: k_per_bag_str = '' else: k_per_bag_str = '_k' + str(k_regions) dict_writers = {} for set_str in sets: name_pkl_all_features = path_data if testMode: name_pkl_all_features += 'TestMode_' name_pkl_all_features += 'EdgeBoxes_' + demonet + '_' + database + '_N' + str( N) + extL2 + '_TLforMIL_nms_' + str( nms_thresh ) + savedstr + k_per_bag_str + '_' + set_str + '.tfrecords' dict_writers[set_str] = tf.python_io.TFRecordWriter( name_pkl_all_features) model_edgeboxes = 'model/model.yml' print('Need of pip install opencv-contrib-python') edge_detection = cv2.ximgproc.createStructuredEdgeDetection( model_edgeboxes) number_of_regions = [] Itera = 1000 if testMode: Itera = 1 for i, name_img in enumerate(df_label[item_name]): if testMode and i > 1: break if filesave == 'pkl': if not (k_regions == 300): raise (NotImplementedError) if i % Itera == 0: if verbose: print(i, name_img) if not (i == 0): pickle.dump(features_resnet_dict, pkl) # Save the data features_resnet_dict = {} if database in ['IconArt_v1','VOC2007','clipart','comic','Paintings',\ 'watercolor','WikiTenLabels','MiniTrain_WikiTenLabels',\ 'WikiLabels1000training','CASPApaintings']: complet_name = path_to_img + name_img + '.jpg' elif database == 'PeopleArt': complet_name = path_to_img + name_img name_sans_ext = os.path.splitext(name_img)[0] elif (database == 'Wikidata_Paintings') or ( database == 'Wikidata_Paintings_miniset_verif'): name_sans_ext = os.path.splitext(name_img)[0] complet_name = path_to_img + name_sans_ext + '.jpg' if plotProposedBoxes: plot_im_withBoxes(complet_name, edge_detection, k_regions, path_imgs) list_im, rois = get_crops(complet_name, edge_detection, k_regions, demonet, augmentation=False) number_of_regions += [len(list_im)] fc7 = model.predict(list_im) # Need a BGR and between 0 and 255 minus the mean per color roi_scores = np.ones((len(list_im, ))) # cls_score, cls_prob, bbox_pred, rois,roi_scores, fc7,pool5 = TL_im_detect(sess, net, im) # Arguments: im (ndarray): a color image in BGR order #features_resnet_dict[name_img] = fc7[np.concatenate(([0],np.random.randint(1,len(fc7),29))),:] if saved == 'fc7': features_resnet_dict[name_img] = fc7 # elif saved=='pool5': # features_resnet_dict[name_img] = pool5 elif saved == 'all': features_resnet_dict[name_img] = rois, roi_scores, fc7 elif filesave == 'tfrecords': if i % Itera == 0: if verbose: print(i, name_img) if database in ['IconArt_v1','VOC2007','clipart','comic','Paintings','watercolor'\ ,'CASPApaintings','WikiTenLabels','MiniTrain_WikiTenLabels','WikiLabels1000training']: complet_name = path_to_img + name_img + '.jpg' name_sans_ext = name_img elif database == 'PeopleArt': complet_name = path_to_img + name_img name_sans_ext = os.path.splitext(name_img)[0] elif (database == 'Wikidata_Paintings') or ( database == 'Wikidata_Paintings_miniset_verif'): name_sans_ext = os.path.splitext(name_img)[0] complet_name = path_to_img + name_sans_ext + '.jpg' im = cv2.imread(complet_name) height = im.shape[0] width = im.shape[1] if plotProposedBoxes: plot_im_withBoxes(complet_name, edge_detection, k_regions, path_imgs) list_im, rois = get_crops(complet_name, edge_detection, k_regions, demonet, augmentation=False) # Boxes are x, y, w, h number_of_regions += [len(list_im)] fc7 = model.predict(list_im) roi_scores = np.ones((len(list_im, ))) # cls_score, cls_prob, bbox_pred, rois,roi_scores, fc7,pool5 = TL_im_detect(sess, net, im) # Arguments: im (ndarray): a color image in BGR order if testMode: print('Image :', height, width) print('Normally ROI (x1,x2,y1,y2) :') print(rois) if (len(fc7) >= k_regions): rois = rois[0:k_regions, :] roi_scores = roi_scores[0:k_regions, ] fc7 = fc7[0:k_regions, :] else: number_repeat = k_regions // len(fc7) + 1 f_repeat = np.repeat(fc7, number_repeat, axis=0) roi_scores_repeat = np.repeat(roi_scores, number_repeat, axis=0) rois_repeat = np.repeat(rois, number_repeat, axis=0) rois = rois_repeat[0:k_regions, :] roi_scores = roi_scores_repeat[0:k_regions, ] fc7 = f_repeat[0:k_regions, :] num_regions = fc7.shape[0] num_features = fc7.shape[1] dim1_rois = rois.shape[1] classes_vectors = np.zeros((num_classes, 1), dtype=np.float32) if database == 'Paintings': for j in range(num_classes): if (classes[j] in df_label['classe'][i]): classes_vectors[j] = 1 if database in [ 'VOC2007', 'clipart', 'watercolor', 'comic', 'PeopleArt', 'CASPApaintings' ]: for j in range(num_classes): value = int((int(df_label[classes[j]][i]) + 1.) / 2.) # En fait ce qui se passe la c'est que tu rescale a la sauvage # entre 0 et 1 un truc qui peut etre entre 0 et 1 mais aussi entre -1 et 1 # C'est chelou classes_vectors[j] = value if database in [ 'WikiTenLabels', 'MiniTrain_WikiTenLabels', 'WikiLabels1000training', 'IconArt_v1' ]: for j in range(num_classes): value = int(df_label[classes[j]][i]) classes_vectors[j] = value #features_resnet_dict[name_img] = fc7[np.concatenate(([0],np.random.randint(1,len(fc7),29))),:] if saved == 'fc7': print( 'It is possible that you need to replace _bytes_feature by _floats_feature in this function' ) print('!!!!!!!!!!!!!!!!!!!!!') raise (NotImplementedError) # TODO : modifier cela ! features = tf.train.Features( feature={ 'height': _int64_feature(height), 'width': _int64_feature(width), 'num_regions': _int64_feature(num_regions), 'num_features': _int64_feature(num_features), 'fc7': _bytes_feature(tf.compat.as_bytes(fc7.tostring())), 'label': _bytes_feature( tf.compat.as_bytes(classes_vectors.tostring())), 'name_img': _bytes_feature(str.encode(name_sans_ext)) }) elif saved == 'pool5': raise (NotImplementedError) elif saved == 'all': feature = { 'height': _int64_feature(height), 'width': _int64_feature(width), 'num_regions': _int64_feature(num_regions), 'num_features': _int64_feature(num_features), 'dim1_rois': _int64_feature(dim1_rois), 'rois': _floats_feature(rois), 'roi_scores': _floats_feature(roi_scores), 'fc7': _floats_feature(fc7), 'label': _floats_feature(classes_vectors), 'name_img': _bytes_feature(str.encode(name_sans_ext)) } features = tf.train.Features(feature=feature) example = tf.train.Example(features=features) # print(len(feature['rois'])) if database == 'VOC2007' or database == 'PeopleArt': if (df_label.loc[df_label[item_name] == name_img]['set'] == 'train').any(): dict_writers['train'].write(example.SerializeToString()) dict_writers['trainval'].write(example.SerializeToString()) elif (df_label.loc[df_label[item_name] == name_img]['set'] == 'val').any(): dict_writers['val'].write(example.SerializeToString()) dict_writers['trainval'].write(example.SerializeToString()) elif (df_label.loc[df_label[item_name] == name_img]['set'] == 'test').any(): dict_writers['test'].write(example.SerializeToString()) if (database == 'Wikidata_Paintings_miniset' ) or database == 'Paintings': if (df_label.loc[df_label[item_name] == name_img]['set'] == 'train').any(): dict_writers['train'].write(example.SerializeToString()) dict_writers['trainval'].write(example.SerializeToString()) elif (df_label.loc[df_label[item_name] == name_img]['set'] == 'validation').any(): dict_writers['val'].write(example.SerializeToString()) dict_writers['trainval'].write(example.SerializeToString()) elif (df_label.loc[df_label[item_name] == name_img]['set'] == 'test').any(): dict_writers['test'].write(example.SerializeToString()) if database in ['IconArt_v1','watercolor','clipart','comic','WikiTenLabels',\ 'MiniTrain_WikiTenLabels','WikiLabels1000training','CASPApaintings']: if (df_label.loc[df_label[item_name] == name_img]['set'] == 'train').any(): dict_writers['train'].write(example.SerializeToString()) dict_writers['trainval'].write(example.SerializeToString()) elif (df_label.loc[df_label[item_name] == name_img]['set'] == 'test').any(): dict_writers['test'].write(example.SerializeToString()) if filesave == 'pkl': pickle.dump(features_resnet_dict, pkl) pkl.close() elif filesave == 'tfrecords': for set_str in sets: dict_writers[set_str].close() print('Mean number of regions per image :', np.mean(number_of_regions), 'with k max = ', k_regions) tf.reset_default_graph() if testMode: sets = ['train', 'test', 'trainval', 'val'] dim_rois = 4 for set_str in sets: name_pkl_all_features = path_data if testMode: name_pkl_all_features += 'TestMode_' name_pkl_all_features += 'EdgeBoxes_' + demonet + '_' + database + '_N' + str( N) + extL2 + '_TLforMIL_nms_' + str( nms_thresh ) + savedstr + k_per_bag_str + '_' + set_str + '.tfrecords' print(name_pkl_all_features) if set_str == 'train': train_dataset = tf.data.TFRecordDataset(name_pkl_all_features) sess = tf.Session() train_dataset = train_dataset.map(lambda r: parser_w_rois_all_class(r, \ num_classes=num_classes,with_rois_scores=True,num_features=num_features, num_rois=k_regions,dim_rois=dim_rois)) mini_batch_size = 1 dataset_batch = train_dataset.batch(mini_batch_size) dataset_batch.cache() iterator = dataset_batch.make_one_shot_iterator() next_element = iterator.get_next() print(next_element) nx = sess.run(next_element) print(nx) name_img = nx[-1][0].decode('utf8') if database in ['IconArt_v1','VOC2007','clipart','comic','Paintings',\ 'watercolor','WikiTenLabels','MiniTrain_WikiTenLabels',\ 'WikiLabels1000training','CASPApaintings']: complet_name = path_to_img + name_img + '.jpg' name_sans_ext = name_img elif database == 'PeopleArt': complet_name = path_to_img + name_img name_sans_ext = os.path.splitext(name_img)[0] elif (database == 'Wikidata_Paintings') or ( database == 'Wikidata_Paintings_miniset_verif'): name_sans_ext = os.path.splitext(name_img)[0] complet_name = path_to_img + name_sans_ext + '.jpg' im = cv2.imread(complet_name) blobs, im_scales = get_blobs(im) dd = nx[1] / im_scales[0] score = nx[2] roi = np.hstack((dd[0], score[0].reshape((-1, 1)))) class_name = [''] vis_detections_list(im, class_name, [roi]) os.remove(name_pkl_all_features)
def simpleRASTAclassification_withGramMatrices(): """ In this function we just try to see if one distance of the Gram matrices at one of the layer can be a good proxy to the classification task """ Net = 'VGG' source_dataset = 'RASTA' cropCenter = True # Get dataset information set_ = 'trainval' getBeforeReLU = True whatToload = 'covmean' number_im_considered = None style_layers_all = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1' ] style_layers_all = ['block3_conv1' ] layer_used = style_layers_all[0] sizeIm_tab = [224,800] sizeIm = sizeIm_tab[1] item_name,path_to_img,default_path_imdb,classes,ext,num_classes,str_val,df_label,\ path_data,Not_on_NicolasPC = get_database(source_dataset) df_train = df_label[df_label['set']=='train'] df_test = df_label[df_label['set']=='test'] df_val = df_label[df_label['set']==str_val] df_trainval = df_train.append(df_val) output_path = os.path.join(os.sep,'media','gonthier','HDD2','output_exp','Covdata',\ 'RASTA') pathlib.Path(output_path).mkdir(parents=True, exist_ok=True) for layer_used in style_layers_all: style_layers = [layer_used] for sizeIm in sizeIm_tab: # Il va falloir modifier cela a un moment : calculer la taille minimale du dataset RASTA name = 'Triu_cov_of_VGG_'+ layer_used name_feat = name+ '_ImSize'+str(sizeIm) outfile_test = os.path.join(output_path, name_feat + '_test.npy') outfile_trainval = os.path.join(output_path,name_feat + '_trainval.npy') outfile_test_y_get = os.path.join(output_path, name + '_test_labels.npy') outfile_trainval_y_gt = os.path.join(output_path,name + '_trainval_labels.npy') if os.path.isfile(outfile_test) and os.path.isfile(outfile_trainval): Xtest=np.load(outfile_test) Xtrain=np.load(outfile_trainval) Ytest_gt=np.load(outfile_test_y_get) Ytrain_gt=np.load(outfile_trainval_y_gt) Ytest_pred_minOfDist = np.empty(Ytest_gt.shape, dtype=np.float32) Ytest_pred_meanOfDist = np.empty(Ytest_gt.shape, dtype=np.float32) Ytest_pred_meanOf_kNN_Dist = np.empty(Ytest_gt.shape, dtype=np.float32) else: net_get_cov = get_VGGmodel_gram_mean_features(style_layers,getBeforeReLU=getBeforeReLU) itera = 1000 Xtrain = None Xtest = None Xtrain_dict = {} Xtest_dict = {} for l,layer in enumerate(style_layers): Xtrain_dict[layer] = None Xtest_dict[layer] = None Ytrain_gt = np.empty((len(df_trainval),num_classes), dtype=np.float32) Ytrain_pred = np.empty((len(df_trainval),num_classes), dtype=np.float32) for i,row in enumerate(df_trainval.iterrows()): row_value = row[1] image_path = path_to_img+'/'+ row_value[item_name] + '.jpg' labels = row_value[classes].values Ytrain_gt[i,:] = labels X_i = None X_i = None if number_im_considered is None or i < number_im_considered: if i%itera==0: print(i,image_path) head, tail = os.path.split(image_path) short_name = '.'.join(tail.split('.')[0:-1]) # Get the covairances matrixes and the means try: #vgg_cov_mean = sess.run(get_gram_mean_features(vgg_inter,image_path)) if cropCenter: image_array= load_and_crop_img(path=image_path,Net=Net,target_size=sizeIm, crop_size=sizeIm,interpolation='lanczos:center') # For VGG or ResNet with classification head size == 224 else: image_array = load_resize_and_process_img(image_path,Net=Net,sizeIm=sizeIm) net_cov_mean = net_get_cov.predict(image_array, batch_size=1) except IndexError as e: print(e) print(i,image_path) raise(e) for l,layer in enumerate(style_layers): # [cov,mean] = vgg_cov_mean[l] cov = net_cov_mean[2*l][0,:,:] #cov = cov.reshape(int(np.sqrt(cov.size)),int(np.sqrt(cov.size))) #print('cov.size',cov.shape) iu1 = np.triu_indices(cov.shape[0]) cov_vectorized = cov[iu1] # Only the superior triangular part because the Gram matrices is symetric mean = net_cov_mean[2*l+1][0,:] # car batch size == 1 if X_i is None: X_i = cov_vectorized else: X_i = np.concatenate((X_i,cov_vectorized)) if Xtrain_dict[layer] is None: Xtrain_dict[layer]={} #Xtrain_dict[layer][short_name]= cov if Xtrain is None: Xtrain = np.empty((len(df_trainval),len(X_i)), dtype=np.float32) Xtrain[i,:] = X_i Ytest_gt = np.empty((len(df_test),num_classes), dtype=np.float32) Ytest_pred_minOfDist = np.empty((len(df_test),num_classes), dtype=np.float32) Ytest_pred_meanOfDist = np.empty((len(df_test),num_classes), dtype=np.float32) Ytest_pred_meanOf_kNN_Dist = np.empty((len(df_test),num_classes), dtype=np.float32) for i,row in enumerate(df_test.iterrows()): row_value = row[1] image_path = path_to_img+'/'+ row_value[item_name] + '.jpg' labels = row_value[classes].values Ytest_gt[i,:] = labels X_i = None X_i = None if number_im_considered is None or i < number_im_considered: if i%itera==0: print(i,image_path) head, tail = os.path.split(image_path) short_name = '.'.join(tail.split('.')[0:-1]) # Get the covairances matrixes and the means try: #vgg_cov_mean = sess.run(get_gram_mean_features(vgg_inter,image_path)) if cropCenter: image_array= load_and_crop_img(path=image_path,Net=Net,target_size=sizeIm, crop_size=sizeIm,interpolation='lanczos:center') # For VGG or ResNet with classification head size == 224 else: image_array = load_resize_and_process_img(image_path,Net=Net,sizeIm=sizeIm) net_cov_mean = net_get_cov.predict(image_array, batch_size=1) except IndexError as e: print(e) print(i,image_path) raise(e) for l,layer in enumerate(style_layers): # [cov,mean] = vgg_cov_mean[l] cov = net_cov_mean[2*l][0,:,:] iu1 = np.triu_indices(cov.shape[0]) cov_vectorized = cov[iu1] # Only the superior triangular part because the Gram matrices is symetric mean = net_cov_mean[2*l+1][0,:] # car batch size == 1 if X_i is None: X_i = cov_vectorized else: X_i = np.concatenate((X_i,cov_vectorized)) if Xtest_dict[layer] is None: Xtest_dict[layer]={} #Xtest_dict[layer][short_name]= cov if Xtest is None: Xtest = np.empty((len(df_test),len(X_i)), dtype=np.float32) Xtest[i,:] = X_i np.save(outfile_test, Xtest) np.save(outfile_trainval, Xtrain) np.save(outfile_test_y_get, Ytest_gt) np.save(outfile_trainval_y_gt, Ytrain_gt) del net_get_cov #argmin_test = pairwise_distances_argmin() # Y[argmin[i], :] is the row in Y that is closest to X[i, :]. # metrics = ['euclidean','l2','manhattan'] # allMethod = False # dist_metric = 'l2' # #dist_metric = 'manhattan' # pairwise_dist = pairwise_distances(Xtest,Xtrain,metric=dist_metric) # # D_{i, j} is the distance between the ith array from X and the jth array from Y # # Problem ici avec block3_conv1 # # MemoryError: Unable to allocate array with shape (4268, 131328) and data type float64 # max_distance = np.max(pairwise_dist) # min_distance = np.min(pairwise_dist) # mean_distance = np.mean(pairwise_dist) # k_n = 5 # for c,classe in enumerate(classes): # index_c_trainval_samples = np.where(Ytrain_gt[:,c]==1.0)[0] # for j in range(len(df_test)): # image_j_pairwise_dist_images_classe_c = pairwise_dist[j,index_c_trainval_samples] # min_image_j_images_classe_c = np.min(image_j_pairwise_dist_images_classe_c) # # if j < 2: # # argmin_image_j_images_classe_c = np.argmin(image_j_pairwise_dist_images_classe_c) # Ytest_pred_minOfDist[j,c] = np.exp(-min_image_j_images_classe_c) # if allMethod: # best_k = np.argsort(image_j_pairwise_dist_images_classe_c, axis=0)[:k_n] # Ytest_pred_meanOfDist[j,c] = np.exp(-np.mean(image_j_pairwise_dist_images_classe_c)) # Ytest_pred_meanOf_kNN_Dist[j,c] = np.exp(-np.mean(image_j_pairwise_dist_images_classe_c[best_k])) top_k = [1,3,5] # print('\nFor layer :',layer_used,'and size :',sizeIm) # if allMethod: print('\nMinimal distance') # scores = get_top_scores(Ytest_gt,Ytest_pred_minOfDist,top_k=top_k) # for val,pred in zip(top_k,scores): # print('Top-{} accuracy : {}%'.format(val,pred*100)) # if allMethod: # print('\nMean distance') # scores = get_top_scores(Ytest_gt,Ytest_pred_meanOfDist,top_k=top_k) # for val,pred in zip(top_k,scores): # print('Top-{} accuracy : {}%'.format(val,pred*100)) # print('\nMean distance of',k_n,'NN') # scores = get_top_scores(Ytest_gt,Ytest_pred_meanOf_kNN_Dist,top_k=top_k) # for val,pred in zip(top_k,scores): # print('Top-{} accuracy : {}%'.format(val,pred*100)) # SVM classification # class_weight = 'balanced' # C_finalSVM = 1.0 # classifier = LinearSVC(penalty='l2',class_weight=class_weight, # loss='squared_hinge',max_iter=1000,dual=False,C=C_finalSVM) # dual=False if number of samples > num of features print(Xtrain.shape) num_classes = 25 optimizer = 'SGD' regulOnNewLayer = None regulOnNewLayerParam = [] dropout = None verbose = True decay = 10**(-4) SGDmomentum= 0.9 lr = 0.001 nesterov = False model = MLP_model(num_of_classes=num_classes,optimizer=optimizer,lr=lr,\ regulOnNewLayer=regulOnNewLayer,regulOnNewLayerParam=regulOnNewLayerParam,dropout=dropout,\ nesterov=nesterov,SGDmomentum=SGDmomentum,decay=decay,verbose=verbose,\ final_activation='softmax',metrics='top_k_categorical_accuracy',loss='categorical_crossentropy') # X_train_big, X_train_small, y_train_big, y_train_small = train_test_split(Xtrain, Ytrain_gt, test_size=0.1, random_state=42) # Yi = np.where(y_train_small==1.0) # Yi1 = Yi[1] # classifier.fit(X_train_small,Yi1) scaler = StandardScaler(copy=False) Xtrain = scaler.fit_transform(Xtrain) Xtest = scaler.transform(Xtest) if layer_used=='block1_conv1': batch_size =2048 elif layer_used=='block3_conv1': # Dans ce cas là on a un vector de features de # taille 32896 batch_size = 64 else: batch_size = 1024*2048//Xtrain.shape[1] print('batch_size :';batch_size) model = TrainMLP(model,Xtrain,Ytrain_gt,None,None,batch_size=batch_size,epochs=20,\ verbose=True,plotConv=False,return_best_model=True,\ NoValidationSetUsed=False,RandomValdiationSet=True) Y_clf_prediction = model.predict(Xtest) print('\nFor layer :',layer_used,'and size :',sizeIm) scores = get_top_scores(Ytest_gt,Y_clf_prediction,top_k=top_k) for val,pred in zip(top_k,scores): print('Top-{} accuracy : {}%'.format(val,pred*100))
with_scores = False case_s = '' if 'OIV5_small_3135' in file: database ='OIV5_small_3135' short_name = '3k' num = 3135 elif 'OIV5_small_30001' in file: database = 'OIV5_small_30001' short_name = '30k' num = 30001 if 'MaxOfMax' in file: case_m = ' MoM' else: case_m = '' item_name,path_to_img,default_path_imdb,classes,ext,num_classes,str_val,df_label,path_data,Not_on_NicolasPC =\ get_database(database) APs = pd.read_csv(file,sep=',',header=None,dtype={0:str,1:float}) name_columns_AP = 'AP ' + short_name + case_s + case_m list_lines['Classe'] += ' & ' + short_name + case_s + case_m APs.columns = ['ID',name_columns_AP] APs[name_columns_AP] = APs[name_columns_AP].apply(lambda x: x*100) APs['ID'] = APs['ID'].apply(lambda x: x.replace('OpenImagesDetectionChallenge_Precision/','')) APs['ID'] = APs['ID'].apply(lambda x: x.replace("OpenImagesDetectionChallenge_PerformanceByCategory/[email protected]/b",'')) APs['ID'] = APs['ID'].apply(lambda x: x.replace("'",'')) for row in APs.iterrows(): ap_c = row[1][1] c = row[1][0] if float(ap_c) > 5.0: list_lines[c] += "& \\textbf{{ {0:.1f} }}".format(ap_c) else: list_lines[c] += "& {0:.1f}".format(ap_c)
def saliencyMap_ImageSize(): """ Fonction pour tester sur une image la saliency map """ target_dataset = 'IconArt_v1' style_layers = getBNlayersResNet50() features = 'activation_48' normalisation = False final_clf= 'LinearSVC' # Don t matter source_dataset= 'ImageNet' transformOnFinalLayer='GlobalAveragePooling2D' final_clf = 'MLP2' epochs = 20 optimizer = 'SGD' return_best_model = True batch_size= 16 dropout=None regulOnNewLayer=None nesterov=False SGDmomentum=0.9 decay=1e-4 cropCenter = False # Load ResNet50 normalisation statistics opt_option = [0.1,0.01] pretrainingModif = True kind_method = 'FT' computeGlobalVariance = False constrNet = 'ResNet50' #list_bn_layers = getBNlayersResNet50() Model_dict = {} list_markers = ['o','s','X','*','v','^','<','>','d','1','2','3','4','8','h','H','p','d','$f$','P'] alpha = 0.7 sizeIm = 224 Net = constrNet print('loading :',constrNet,computeGlobalVariance,kind_method,pretrainingModif,opt_option) model = learn_and_eval(target_dataset,source_dataset,final_clf,features,\ constrNet,kind_method,style_layers=style_layers, normalisation=normalisation,transformOnFinalLayer=transformOnFinalLayer, batch_size_RF=16,epochs_RF=20,momentum=0.9,ReDo=False, returnStatistics=True,cropCenter=cropCenter,\ computeGlobalVariance=computeGlobalVariance,\ epochs=epochs,optimizer=optimizer,opt_option=opt_option, return_best_model=return_best_model,\ batch_size=batch_size,gridSearch=False,verbose=True) # Performance : & 54.4 & 76.3 & 60.7 & 82.1 & 74.3 & 70.6 & 11.0 & 61.3 \\ item_name,path_to_img,default_path_imdb,classes,ext,num_classes,str_val,df_label,\ path_data,Not_on_NicolasPC = get_database(target_dataset) images_in_set = df_label[df_label['set']=='test'][item_name].values images_in_set = [images_in_set[0]] for image in images_in_set: #image = images_in_set[20] image_path = os.path.join(path_to_img,image+'.jpg') if cropCenter: image_array= load_and_crop_img(path=image_path,Net=Net,target_size=sizeIm, crop_size=sizeIm,interpolation='lanczos:center') # For VGG or ResNet with classification head size == 224 else: image_array = load_resize_and_process_img(image_path,Net=Net,max_dim=sizeIm) predictions = model.predict(image_array) df_label[df_label[item_name]==image][classes].values c_i = 0 SmoothGradsaliencyMap = SmoothedMask(model,c_i,stdev_spread=.15,\ nsamples=25,magnitude=False) smooth_grad_of_image = SmoothGradsaliencyMap.GetMask(image_array) smooth_grad_of_image_scaled = take_abs_and_rescale(smooth_grad_of_image) smooth_grad_of_image_scaled = to_01(smooth_grad_of_image) ShowGrayscaleImage(smooth_grad_of_image_scaled[0,:,:,:]) integrated_grad_of_image = GetMask_IntegratedGradients(image_array,model,c_i, x_steps=50) integrated_grad_of_image_scaled = to_01(integrated_grad_of_image) ShowGrayscaleImage(integrated_grad_of_image_scaled[0,:,:,:]) # Dans ce cas là on a un gradient selon les 3 canaux couleurs integrated_grad_randBaseline_of_image = GetMask_RandomBaseline_IntegratedGradients(image_array,model,c_i, x_steps=50,num_random_trials=10) integrated_grad_randBaseline_of_image_scaled = to_01(integrated_grad_randBaseline_of_image) ShowGrayscaleImage(integrated_grad_randBaseline_of_image_scaled[0,:,:,:]) integrated_grad_noisy_image = GetMask_IntegratedGradients_noisyImage(image_array,model,c_i, x_steps=50,num_random_trials=10,stdev_spread=.15) integrated_grad_noisy_image_scaled = to_01(integrated_grad_noisy_image) ShowGrayscaleImage(integrated_grad_noisy_image_scaled[0,:,:,:])
def eval_MAP_SaliencyMethods(database='IconArt_v1',metamodel='FasterRCNN',demonet='res152_COCO', k_per_bag=300,SaliencyMethod='SmoothGrad'): """ The goal of this function is to compute the mAP of the saliency method for classification ResNet @param : SaliencyMethod : IntegratedGrad ou SmoothGrad pour le moment """ matplotlib.use('Agg') save_data = False ReDo = True plot = False TEST_NMS = 0.01 thresh_classif = 0.1 # Parameter for the classification network target_dataset = 'IconArt_v1' style_layers = [] features = 'activation_48' normalisation = False final_clf= 'LinearSVC' # Don t matter source_dataset= 'ImageNet' transformOnFinalLayer='GlobalAveragePooling2D' final_clf = 'MLP2' epochs = 20 optimizer = 'SGD' return_best_model = True batch_size= 16 dropout=None regulOnNewLayer=None nesterov=False SGDmomentum=0.9 decay=1e-4 cropCenter = False # Load ResNet50 normalisation statistics opt_option = [0.1,0.01] pretrainingModif = True kind_method = 'FT' computeGlobalVariance = False constrNet = 'ResNet50' sizeIm = 224 Net = constrNet # Load the box proosals item_name,path_to_img,default_path_imdb,classes,ext,num_classes,str_val,df_label,path_data,Not_on_NicolasPC = get_database(database) imdb,list_im_withanno = get_imdb_and_listImagesInTestSet(database) num_images_detect = len(list_im_withanno) dict_rois = getDictBoxesProposals(database=target_dataset,k_per_bag=k_per_bag,\ metamodel=metamodel,demonet=demonet) for_data_output = os.path.join(path_data,'dataSaliencyMap',SaliencyMethod) im_with_boxes_output = os.path.join(path_data,'SaliencyMapImagesBoxes',SaliencyMethod) print('===',im_with_boxes_output) pathlib.Path(for_data_output).mkdir(parents=True, exist_ok=True) pathlib.Path(im_with_boxes_output).mkdir(parents=True, exist_ok=True) # Load Classification model print('loading :',constrNet,computeGlobalVariance,kind_method,pretrainingModif,opt_option) model = learn_and_eval(target_dataset,source_dataset,final_clf,features,\ constrNet,kind_method,style_layers=style_layers, normalisation=normalisation,transformOnFinalLayer=transformOnFinalLayer, batch_size_RF=16,epochs_RF=20,momentum=0.9,ReDo=False, returnStatistics=True,cropCenter=cropCenter,\ computeGlobalVariance=computeGlobalVariance,\ epochs=epochs,optimizer=optimizer,opt_option=opt_option, return_best_model=return_best_model,\ batch_size=batch_size,gridSearch=False,verbose=True) SaliencyMapClass_tab = [] stdev_spread = 0.1 nsamples = 50 x_steps = 50 for j in range(num_classes): SaliencyMapClass=getSaliencyMapClass(model,c_i=j,method=SaliencyMethod,\ stdev_spread=stdev_spread,nsamples=nsamples,x_steps=x_steps) SaliencyMapClass_tab +=[SaliencyMapClass] # list_gt_boxes_classes = [] candidate_boxes = [[] for _ in range(imdb.num_images)] all_boxes_order = [[[] for _ in range(num_images_detect)] for _ in range(imdb.num_classes)] # number_gt_boxes = 0 itera = 20 norm = True t0 = time.time() # Un peu plus de 1440 images for i in range(imdb.num_images): # complet_name_tab = ('.'.join(complet_name.split('.')[0:-1])).split('/') im_path = imdb.image_path_at(i) name_im = im_path.split('/')[-1] if i%itera==0: t1 = time.time() print(i,name_im,'duration for ',itera,'iterations = ',str(t1-t0),'s') t0 = time.time() im = cv2.imread(im_path) hauteur, largeur ,_ = im.shape blobs, im_scales = get_blobs(im) if database=='PeopleArt': name_im = '/'.join(im_path.split('/')[-2:]) if database=='PeopleArt': name_im= '.'.join(name_im.split('.')[0:-1]) else: name_im = name_im.split('.')[0] proposals_boxes = dict_rois[name_im] if cropCenter: image_array= load_and_crop_img(path=im_path,Net=Net,target_size=sizeIm, crop_size=sizeIm,interpolation='lanczos:center') # For VGG or ResNet with classification head size == 224 else: image_array = load_resize_and_process_img(im_path,Net=Net,max_dim=sizeIm) #print(np.max(image_array),np.min(image_array),np.mean(image_array),np.median(image_array)) #input('wait') dict_sensitivity = {} dict_sensitivity_path = os.path.join(for_data_output,name_im+'_dict_SaliencyMap'+SaliencyMethod+'_std'+str(stdev_spread)+'_n'+str(nsamples)+'_steps'+str(x_steps)+'.pkl') if not(os.path.exists(dict_sensitivity_path)) or ReDo: predictions = model.predict(image_array)[0] dict_sensitivity['predictions'] = predictions inds = np.where(predictions > thresh_classif)[0] for ind in inds: prediction = predictions[ind] if np.isnan(prediction): print('Prediction of ',name_im,'is nan !!!') input('wait') candidate_boxes = [] j = ind +1 # the class index for the evaluation part Smap=SaliencyMapClass_tab[ind].GetMask(image_array) #print('before normalisation',np.max(Smap),np.min(Smap),np.mean(Smap),np.median(Smap)) if save_data: dict_sensitivity[j] = Smap if SaliencyMethod=='SmoothGrad': #Smap_grey = np.mean(Smap,axis=-1,keepdims=True) Smap_grey = np.mean(np.abs(Smap),axis=-1,keepdims=True) #print('after grey',np.max(Smap_grey),np.min(Smap_grey),np.mean(Smap_grey),np.median(Smap_grey)) if norm: Smap_grey = to_01(Smap_grey) #print('after normalisation',np.max(Smap_grey),np.min(Smap_grey),np.mean(Smap_grey),np.median(Smap_grey)) Smap_grey_time_score = prediction*Smap_grey else: # In the case of Integrated Gradient # Sur conseil d Antoine Pirovano ptile= 99 # Sum for grayscale of the absolute value pixel_attrs = np.sum(np.abs(Smap), axis=-1,keepdims=True) pixel_attrs = np.clip(pixel_attrs / np.percentile(pixel_attrs, ptile), 0, 1) Smap_grey_time_score = prediction * pixel_attrs #print('after mul score',np.max(Smap_grey_time_score),np.min(Smap_grey_time_score),np.mean(Smap_grey_time_score),np.median(Smap_grey_time_score)) # attention truc super contre intuitif dans le resize c'est hauteur largeur alors que # la fonction size retourne largeur hauteur Smap_grey_time_score = Smap_grey_time_score[0] #Smap_grey_time_score_resized = cv2.resize(Smap_grey_time_score, (hauteur, largeur),interpolation=cv2.INTER_NEAREST) Smap_grey_time_score_resized = cv2.resize(Smap_grey_time_score, (largeur,hauteur),interpolation=cv2.INTER_NEAREST) #print('Smap_grey_time_score_resized',Smap_grey_time_score_resized.shape,im.shape) #print('after resize',np.max(Smap_grey_time_score_resized),np.min(Smap_grey_time_score_resized),np.mean(Smap_grey_time_score_resized),np.median(Smap_grey_time_score_resized)) if plot: name_output = name_im+'_'+SaliencyMethod+'_std'+str(stdev_spread)+'_n'+str(nsamples)+'_steps'+str(x_steps)+ '_'+str(j)+'.jpg' name_output_path = os.path.join(im_with_boxes_output,name_output) Smap_grey_time_score_resized_01 = to_01(Smap_grey_time_score_resized) plt.imshow(Smap_grey_time_score_resized_01, cmap=cm.gray) plt.title(classes[j-1]+' : '+str(prediction)) plt.savefig(name_output_path) plt.close() for k in range(len(proposals_boxes)): box = proposals_boxes[k] x1,y1,x2,y2 = box # x : largeur et y en hauteur x1_int = int(np.round(x1)) x2_int = int(np.round(x2)) y1_int = int(np.round(y1)) y2_int = int(np.round(y2)) #print(name_im,'Smap_grey_time_score_resized',Smap_grey_time_score_resized.shape,im.shape) #print(x1_int,x2_int,y1_int,y2_int) assert(x2_int<=largeur) assert(y2_int<=hauteur) Smap_grey_time_score_resized_crop = Smap_grey_time_score_resized[y1_int:y2_int,x1_int:x2_int] # because bbox = dets[i, :4] # Boxes are score, x1,y1,x2,y2 Smap_grey_time_score_resized_crop_score = np.mean(Smap_grey_time_score_resized_crop) # if k < 3: # print('Smap_grey_time_score_resized_crop',Smap_grey_time_score_resized_crop.shape) # print(x1_int,x2_int,y1_int,y2_int) # print('j',j,'k',k,',score',Smap_grey_time_score_resized_crop_score) if not(np.isnan(Smap_grey_time_score_resized_crop_score)): box_with_scores = np.append(box,[Smap_grey_time_score_resized_crop_score]) candidate_boxes += [box_with_scores] else: box_with_scores = np.append(box,[0.0]) candidate_boxes += [box_with_scores] # if np.isnan(Smap_grey_time_score_resized_crop_score): # print('!!! score is nan') # print(x1,y1,x2,y2) # print(x1_int,x2_int,y1_int,y2_int) # print(Smap_grey_time_score_resized_crop.shape) # print(name_im,'Smap_grey_time_score_resized',Smap_grey_time_score_resized.shape,im.shape) # print(prediction) # print('after resize',np.max(Smap_grey_time_score_resized),np.min(Smap_grey_time_score_resized),np.mean(Smap_grey_time_score_resized),np.median(Smap_grey_time_score_resized)) # print(Smap_grey_time_score_resized_crop_score) # input('wait') #print(candidate_boxes) if len(candidate_boxes)>0: candidate_boxes_NP = np.array(candidate_boxes) candidate_boxes_NP[:,-1] = candidate_boxes_NP[:,-1] -np.max(candidate_boxes_NP[:,-1]) + prediction keep = nms(candidate_boxes_NP, TEST_NMS) cls_dets = candidate_boxes_NP[keep, :] all_boxes_order[j][i] = cls_dets if plot: roi_boxes_and_score = [] local_cls = [] for j in range(num_classes): cls_dets = all_boxes_order[j+1][i] if len(cls_dets) > 0: local_cls += [classes[j]] roi_boxes_score = cls_dets if roi_boxes_and_score is None: roi_boxes_and_score = [roi_boxes_score] else: roi_boxes_and_score += [roi_boxes_score] if roi_boxes_and_score is None: roi_boxes_and_score = [[]] #print(name_im,roi_boxes_and_score,local_cls) vis_detections_list(im, local_cls, roi_boxes_and_score, thresh=-np.inf) name_output = name_im+'_'+SaliencyMethod+'_std'+str(stdev_spread)+'_n'+str(nsamples)+'_steps'+str(x_steps)+ '_Regions.jpg' name_output_path = os.path.join(im_with_boxes_output,name_output) #input("wait") plt.savefig(name_output_path) plt.close() if save_data: with open(dict_sensitivity_path, 'wb') as f: pickle.dump(dict_sensitivity, f, pickle.HIGHEST_PROTOCOL) # for i in range(imdb.num_images): # candidate_boxes[i] = np.array(candidate_boxes[i]) imdb.set_force_dont_use_07_metric(True) det_file = os.path.join(path_data, 'detectionsSaliencyMap'+SaliencyMethod+'_std'+str(stdev_spread)+'_n'+str(nsamples)+'_steps'+str(x_steps)+'.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes_order, f, pickle.HIGHEST_PROTOCOL) output_dir = path_data +'tmp/' + database+'_'+SaliencyMethod+'_std'+str(stdev_spread)+'_n'+str(nsamples)+'_steps'+str(x_steps)+'_mAP.txt' aps = imdb.evaluate_detections(all_boxes_order, output_dir) # AP at O.5 print("===> Detection score (thres = 0.5): ",database,'with Saliency map from',SaliencyMethod,'with std =',stdev_spread,'nsamples = ',nsamples,'x_steps =',x_steps) print(arrayToLatex(aps,per=True)) ovthresh_tab = [0.3,0.1,0.] for ovthresh in ovthresh_tab: aps = imdb.evaluate_localisation_ovthresh(all_boxes_order, output_dir,ovthresh) print("Detection score with thres at ",ovthresh) print(arrayToLatex(aps,per=True))
def Compute_Faster_RCNN_features(demonet='res152_COCO', nms_thresh=0.7, database='IconArt_v1', verbose=True, k_regions=300, path_data='data', path_output='output', path_to_model='models'): """ @param : demonet : the backbone net used it can be 'vgg16_VOC07', 'vgg16_VOC12','vgg16_COCO','res101_VOC12','res101_COCO','res152_COCO' @param : nms_thresh : the nms threshold on the Region Proposal Network @param : database name of the dataset @param : k_regions : number of region per image @param : path_data path to the dataset @param : path_output path to the output model @param : path_to_model path to the pretarined model """ item_name, path_to_img, classes, ext, num_classes, str_val, df_label = get_database( database, default_path_imdb=path_data) N = 1 extL2 = '' savedstr = '_all' layer = 'fc7' tf.reset_default_graph( ) # Needed to use different nets one after the other if verbose: print(demonet) if 'VOC' in demonet: CLASSES = CLASSES_SET['VOC'] anchor_scales = [8, 16, 32] # It is needed for the right net architecture !! elif 'COCO' in demonet: CLASSES = CLASSES_SET['COCO'] anchor_scales = [ 4, 8, 16, 32 ] # we use 3 aspect ratios and 4 scales (adding 64**2) nbClassesDemoNet = len(CLASSES) pathlib.Path(path_to_model).mkdir(parents=True, exist_ok=True) tfmodel = os.path.join(path_to_model, NETS_Pretrained[demonet]) if not (os.path.exists(tfmodel)): print("You have to download the Faster RCNN pretrained, see README") return (0) tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True # init session sess = tf.Session(config=tfconfig) # load network if 'vgg16' in demonet: net = vgg16() size_output = 4096 elif 'res101' in demonet: net = resnetv1(num_layers=101) size_output = 2048 elif 'res152' in demonet: net = resnetv1(num_layers=152) size_output = 2048 else: raise NotImplementedError net.create_architecture("TEST", nbClassesDemoNet, tag='default', anchor_scales=anchor_scales, modeTL=True, nms_thresh=nms_thresh) saver = tf.train.Saver() saver.restore(sess, tfmodel) features_resnet_dict = {} sets = ['trainval', 'test'] if k_regions == 300: k_per_bag_str = '' else: k_per_bag_str = '_k' + str(k_regions) dict_writers = {} for set_str in sets: name_pkl_all_features = os.path.join( path_output, 'FasterRCNN_' + demonet + '_' + database + '_N' + str(N) + extL2 + '_TLforMIL_nms_' + str(nms_thresh) + savedstr + k_per_bag_str + '_' + set_str + '.tfrecords') dict_writers[set_str] = tf.python_io.TFRecordWriter( name_pkl_all_features) Itera = 1000 for i, name_img in enumerate(df_label[item_name]): if i % Itera == 0: if verbose: print(i, name_img) if database in ['IconArt_v1', 'watercolor']: complet_name = path_to_img + name_img + '.jpg' name_sans_ext = name_img elif database == 'PeopleArt': complet_name = path_to_img + name_img name_sans_ext = os.path.splitext(name_img)[0] try: im = cv2.imread(complet_name) height = im.shape[0] width = im.shape[1] except AttributeError: print(complet_name, 'is missing') continue cls_score, cls_prob, bbox_pred, rois, roi_scores, fc7, pool5 = TL_im_detect( sess, net, im) # Arguments: im (ndarray): a color image in BGR order if k_regions == 300: num_regions = fc7.shape[0] num_features = fc7.shape[1] dim1_rois = rois.shape[1] classes_vectors = np.zeros((num_classes, 1)) rois_tmp = np.zeros((k_regions, 5)) roi_scores_tmp = np.zeros((k_regions, 1)) fc7_tmp = np.zeros((k_regions, size_output)) rois_tmp[0:rois.shape[0], 0:rois.shape[1]] = rois roi_scores_tmp[0:roi_scores.shape[0], 0:roi_scores.shape[1]] = roi_scores fc7_tmp[0:fc7.shape[0], 0:fc7.shape[1]] = fc7 rois = rois_tmp roi_scores = roi_scores_tmp fc7 = fc7_tmp else: # We will select only k_regions new_nms_thresh = 0.0 score_threshold = 0.1 minimal_surface = 36 * 36 num_regions = k_regions num_features = fc7.shape[1] dim1_rois = rois.shape[1] classes_vectors = np.zeros((num_classes, 1)) rois_reduce,roi_scores_reduce,fc7_reduce = reduce_to_k_regions(k_regions,rois, \ roi_scores, fc7,new_nms_thresh, \ score_threshold,minimal_surface) if (len(fc7_reduce) >= k_regions): rois = rois_reduce[0:k_regions, :] roi_scores = roi_scores_reduce[0:k_regions, ] fc7 = fc7_reduce[0:k_regions, :] else: number_repeat = k_regions // len(fc7_reduce) + 1 f_repeat = np.repeat(fc7_reduce, number_repeat, axis=0) roi_scores_repeat = np.repeat(roi_scores_reduce, number_repeat, axis=0) rois_reduce_repeat = np.repeat(rois_reduce, number_repeat, axis=0) rois = rois_reduce_repeat[0:k_regions, :] roi_scores = roi_scores_repeat[0:k_regions, ] fc7 = f_repeat[0:k_regions, :] if database in ['watercolor', 'PeopleArt']: for j in range(num_classes): value = int((int(df_label[classes[j]][i]) + 1.) / 2.) #print(value) classes_vectors[j] = value if database in ['IconArt_v1']: for j in range(num_classes): value = int(df_label[classes[j]][i]) classes_vectors[j] = value features = tf.train.Features( feature={ 'height': _int64_feature(height), 'width': _int64_feature(width), 'num_regions': _int64_feature(num_regions), 'num_features': _int64_feature(num_features), 'dim1_rois': _int64_feature(dim1_rois), 'rois': _floats_feature(rois), 'roi_scores': _floats_feature(roi_scores), 'fc7': _floats_feature(fc7), 'label': _floats_feature(classes_vectors), 'name_img': _bytes_feature(str.encode(name_sans_ext)) }) example = tf.train.Example(features=features) if database == 'PeopleArt': if (df_label.loc[df_label[item_name] == name_img]['set'] == 'train' ).any(): dict_writers['trainval'].write(example.SerializeToString()) elif (df_label.loc[df_label[item_name] == name_img]['set'] == 'val' ).any(): dict_writers['trainval'].write(example.SerializeToString()) elif (df_label.loc[df_label[item_name] == name_img]['set'] == 'test').any(): dict_writers['test'].write(example.SerializeToString()) if database in ['watercolor','IconArt_v1']\ or 'IconArt_v1' in database: if (df_label.loc[df_label[item_name] == name_img]['set'] == 'train' ).any(): dict_writers['trainval'].write(example.SerializeToString()) elif (df_label.loc[df_label[item_name] == name_img]['set'] == 'test').any(): dict_writers['test'].write(example.SerializeToString()) for set_str in sets: dict_writers[set_str].close()
def run_and_eval_MImax(demonet='res152_COCO', database='IconArt_v1', ReDo=True, PlotRegions=False, verbose=True, k_per_bag=300, CV_Mode=None, num_split=2, restarts=11, max_iters_all_base=300, LR=0.01, C=1.0, Optimizer='GradientDescent', with_scores=False, epsilon=0.0, C_Searching=False, thresh_evaluation=0.05, TEST_NMS=0.3, mini_batch_size=None, loss_type='', path_data='data', path_output='output', path_to_model='models'): """ This function used TFrecords file Classifier based on CNN features with Transfer Learning on Faster RCNN output for weakly supervised object detection Note : with a features maps of 2048, k_bag =300 and a batchsize of 1000 we can train up to 1200 W vectors in parallel at the same time on a NVIDIA 1080 Ti @param : demonet : the kind of inside network used it can be 'vgg16_VOC07', 'vgg16_VOC12','vgg16_COCO','res101_VOC12','res101_COCO','res152_COCO' @param : database : the database used for the weakly supervised detection task @param : verbose : Verbose option classical @param : ReDo = False : Erase the former computation, if True and the model already exists : only do the evaluation @param : PlotRegions : plot the regions used for learn and the regions in the positive output response @param : k_per_bag : number of element per batch in the slection phase [defaut : 300] @param : CV_Mode : cross validation mode in the MI_max : possibility ; None, CV in k split @param : num_split : Number of split for the CV @param : restarts : number of restarts / reinitialisation in the MI_max [default=11] @param : max_iters_all_base : number of maximum iteration on the going on the full database @param : LR : Learning rate for the optimizer in the MI_max @param : C : Regularisation term for the optimizer in the MI_max @param : Optimizer : Optimizer for the MI_max GradientDescent or Adam @param : thresh_evaluation : 0.05 : seuillage avant de fournir les boites a l evaluation de detections @param : TEST_NMS : 0.3 : recouvrement autorise avant le NMS avant l evaluation de detections @param : mini_batch_size if None or 0 an automatic adhoc mini batch size is set This function output AP for different dataset for the weakly supervised task """ item_name, path_to_img, classes, ext, num_classes, str_val, df_label = get_database( database) num_trainval_im = len( df_label[df_label['set'] == 'train'][item_name]) + len( df_label[df_label['set'] == str_val][item_name]) print('Training on ', database, 'with ', num_trainval_im, ' images in the trainval set') N = 1 extL2 = '' nms_thresh = 0.7 savedstr = '_all' metamodel = 'FasterRCNN' sets = ['trainval', 'test'] dict_name_file = {} data_precomputeed = True if k_per_bag == 300: k_per_bag_str = '' else: k_per_bag_str = '_k' + str(k_per_bag) for set_str in sets: name_pkl_all_features = os.path.join( path_output, metamodel + '_' + demonet + '_' + database + '_N' + str(N) + extL2 + '_TLforMIL_nms_' + str(nms_thresh) + savedstr + k_per_bag_str + '_' + set_str + '.tfrecords') dict_name_file[set_str] = name_pkl_all_features if set_str in ['trainval', 'test' ] and not (os.path.isfile(name_pkl_all_features)): data_precomputeed = False if demonet in ['vgg16_COCO', 'vgg16_VOC07', 'vgg16_VOC12']: num_features = 4096 elif demonet in ['res101_COCO', 'res152_COCO', 'res101_VOC07', 'res152']: num_features = 2048 if not (data_precomputeed): # Compute the features if verbose: print( "We will use a Faster RCNN as feature extractor and region proposals" ) if metamodel == 'FasterRCNN': Compute_Faster_RCNN_features(demonet=demonet, nms_thresh=nms_thresh, database=database, augmentation=False, L2=False, saved='all', verbose=verbose, filesave='tfrecords', k_regions=k_per_bag) else: raise (NotImplementedError) # Config param for TF session config = tf.ConfigProto() config.gpu_options.allow_growth = True # Data for the MI_max Latent SVM # All those parameter are design for a GPU 1080 Ti memory size ie 11GB performance = False sizeMax = 30 * 200000 // (k_per_bag * 20) if not (CV_Mode == 'CV' and num_split == 2): sizeMax //= 2 if num_features > 2048: sizeMax //= (num_features // 2048) model_str = 'MI_max' if k_per_bag == 300: buffer_size = 10000 else: buffer_size = 5000 * 300 // k_per_bag if (k_per_bag > 300 or num_trainval_im > 5000): usecache = False else: usecache = True if mini_batch_size is None or mini_batch_size == 0: mini_batch_size = min(sizeMax, num_trainval_im) max_iters = ((num_trainval_im // mini_batch_size)+ \ np.sign(num_trainval_im % mini_batch_size))*max_iters_all_base AP_per_class = [] P_per_class = [] R_per_class = [] P20_per_class = [] AP_per_classbS = [] final_clf = None if C == 1.0: C_str = '' else: C_str = '_C' + str(C) # regularisation term if with_scores: with_scores_str = '_WRC' + str(epsilon) else: with_scores_str = '' extPar = '_p' if CV_Mode == 'CV': max_iters = ( max_iters * (num_split - 1) // num_split ) # Modification d iteration max par rapport au nombre de split extCV = '_cv' + str(num_split) elif CV_Mode is None or CV_Mode == '': extCV = '' else: raise (NotImplementedError) extCV += '_wr' if Optimizer == 'Adam': opti_str = '' elif Optimizer == 'GradientDescent': opti_str = '_gd' elif Optimizer == 'lbfgs': opti_str = '_lbfgs' else: raise (NotImplementedError) if loss_type is None or loss_type == '': loss_type_str = '' elif loss_type == 'hinge': loss_type_str = 'Losshinge' if LR == 0.01: LR_str = '' else: LR_str = '_LR' + str(LR) optimArg = None if optimArg == None or Optimizer == 'GradientDescent': optimArg_str = '' else: if Optimizer == 'Adam' and str(optimArg).replace( ' ', '_' ) == "{'learning_rate':_0.01,_'beta1':_0.9,_'beta2':_0.999,_'epsilon':_1e-08}": optimArg_str = '' else: optimArg_str = str(optimArg).replace(' ', '_') verboseMI_max = verbose shuffle = True if num_trainval_im == mini_batch_size: shuffle = False number_zone = k_per_bag dont_use_07_metric = True dim_rois = 5 cachefilefolder = os.path.join(path_data, 'cachefile') cachefile_model_base='WLS_'+ database+ '_'+demonet+'_r'+str(restarts)+'_s' \ +str(mini_batch_size)+'_k'+str(k_per_bag)+'_m'+str(max_iters)+extPar+\ extCV+opti_str+LR_str+C_str+with_scores_str+ loss_type_str pathlib.Path(cachefilefolder).mkdir(parents=True, exist_ok=True) cachefile_model = os.path.join( cachefilefolder, cachefile_model_base + '_' + model_str + '.pkl') if verbose: print("cachefile name", cachefile_model) if not os.path.isfile(cachefile_model) or ReDo: name_milsvm = {} if verbose: print("The cachefile doesn t exist or we will erase it.") else: with open(cachefile_model, 'rb') as f: name_milsvm = pickle.load(f) if verbose: print("The cachefile exists") usecache_eval = True if database == 'watercolor': imdb = get_imdb('watercolor_test') imdb.set_force_dont_use_07_metric(dont_use_07_metric) num_images = len(imdb.image_index) elif database == 'PeopleArt': imdb = get_imdb('PeopleArt_test') imdb.set_force_dont_use_07_metric(dont_use_07_metric) num_images = len(imdb.image_index) elif database == 'clipart': imdb = get_imdb('clipart_test') imdb.set_force_dont_use_07_metric(dont_use_07_metric) num_images = len(imdb.image_index) elif database == 'IconArt_v1': imdb = get_imdb('IconArt_v1_test') imdb.set_force_dont_use_07_metric(dont_use_07_metric) num_images = len(df_label[df_label['set'] == 'test'][item_name]) else: num_images = len(df_label[df_label['set'] == 'test'][item_name]) all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] data_path_train = dict_name_file['trainval'] if not os.path.isfile(cachefile_model) or ReDo: if verbose: t0 = time.time() classifierMI_max = tf_MI_max(LR=LR, C=C, restarts=restarts, num_rois=k_per_bag, max_iters=max_iters, buffer_size=buffer_size, verbose=verboseMI_max, Optimizer=Optimizer, mini_batch_size=mini_batch_size, num_features=num_features, num_classes=num_classes, num_split=num_split, CV_Mode=CV_Mode, with_scores=with_scores, epsilon=epsilon, loss_type=loss_type, usecache=usecache) export_dir = classifierMI_max.fit_MI_max_tfrecords(data_path=data_path_train, \ shuffle=shuffle,C_Searching=C_Searching) if verbose: t1 = time.time() print('Total duration training part :', str(t1 - t0)) np_pos_value, np_neg_value = classifierMI_max.get_porportions() name_milsvm = export_dir, np_pos_value, np_neg_value with open(cachefile_model, 'wb') as f: pickle.dump(name_milsvm, f) else: export_dir, np_pos_value, np_neg_value = name_milsvm true_label_all_test,predict_label_all_test,name_all_test,labels_test_predited \ ,all_boxes = \ tfR_evaluation_parall(database=database,num_classes=num_classes, export_dir=export_dir,dict_name_file=dict_name_file,mini_batch_size=mini_batch_size ,config=config,scoreInMI_max=with_scores, path_to_img=path_to_img,path_data=path_data,classes=classes, verbose=verbose,thresh_evaluation=thresh_evaluation,TEST_NMS=TEST_NMS,all_boxes=all_boxes ,PlotRegions=PlotRegions,cachefile_model_base=cachefile_model_base,number_im=np.inf,dim_rois=dim_rois, usecache=usecache_eval,k_per_bag=k_per_bag,num_features=num_features) for j, classe in enumerate(classes): AP = average_precision_score(true_label_all_test[:, j], predict_label_all_test[:, j], average=None) print("MI_Max version Average Precision for", classes[j], " = ", AP) test_precision = precision_score( true_label_all_test[:, j], labels_test_predited[:, j], ) test_recall = recall_score( true_label_all_test[:, j], labels_test_predited[:, j], ) F1 = f1_score( true_label_all_test[:, j], labels_test_predited[:, j], ) print( "Test on all the data precision = {0:.2f}, recall = {1:.2f},F1 = {2:.2f}" .format(test_precision, test_recall, F1)) precision_at_k = ranking_precision_score( np.array(true_label_all_test[:, j]), predict_label_all_test[:, j], 20) P20_per_class += [precision_at_k] AP_per_class += [AP] R_per_class += [test_recall] P_per_class += [test_precision] with open(cachefile_model, 'wb') as f: pickle.dump(name_milsvm, f) # Detection evaluation if database in ['watercolor', 'clipart', 'PeopleArt', 'IconArt_v1']: det_file = os.path.join(path_data, 'cachefile', 'detections_aux.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) max_per_image = 100 num_images_detect = len( imdb.image_index ) # We do not have the same number of images in the WikiTenLabels or IconArt_v1 case all_boxes_order = [[[] for _ in range(num_images_detect)] for _ in range(imdb.num_classes)] number_im = 0 name_all_test = name_all_test.astype(str) for i in range(num_images_detect): name_img = imdb.image_path_at(i) if database == 'PeopleArt': name_img_wt_ext = name_img.split( '/')[-2] + '/' + name_img.split('/')[-1] name_img_wt_ext_tab = name_img_wt_ext.split('.') name_img_wt_ext = '.'.join(name_img_wt_ext_tab[0:-1]) else: name_img_wt_ext = name_img.split('/')[-1] name_img_wt_ext = name_img_wt_ext.split('.')[0] name_img_ind = np.where( np.array(name_all_test) == name_img_wt_ext)[0] #print(name_img_ind) if len(name_img_ind) == 0: print('len(name_img_ind), images not found in the all_boxes') print(name_img_wt_ext) raise (Exception) else: number_im += 1 #print(name_img_ind[0]) for j in range(1, imdb.num_classes): j_minus_1 = j - 1 all_boxes_order[j][i] = all_boxes[j_minus_1][name_img_ind[0]] if max_per_image > 0: image_scores = np.hstack([ all_boxes_order[j][i][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( all_boxes_order[j][i][:, -1] >= image_thresh)[0] all_boxes_order[j][i] = all_boxes_order[j][i][keep, :] assert ( number_im == num_images_detect ) # To check that we have the all the images in the detection prediction det_file = os.path.join(path_data, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes_order, f, pickle.HIGHEST_PROTOCOL) output_dir = path_data + 'tmp/' + database + '_mAP.txt' aps = imdb.evaluate_detections(all_boxes_order, output_dir) apsAt05 = aps print("Detection score (thres = 0.5): ", database, 'with MI_Max with score =', with_scores) print(arrayToLatex(aps, per=True)) ovthresh_tab = [0.3, 0.1, 0.] for ovthresh in ovthresh_tab: aps = imdb.evaluate_localisation_ovthresh(all_boxes_order, output_dir, ovthresh) if ovthresh == 0.1: apsAt01 = aps print("Detection score with thres at ", ovthresh, 'with MI_Max with score =', with_scores) print(arrayToLatex(aps, per=True)) print('~~~~~~~~') print("mean Average Precision Classification for all the data = {0:.3f}". format(np.mean(AP_per_class))) print("mean Precision Classification for all the data = {0:.3f}".format( np.mean(P_per_class))) print("mean Recall Classification for all the data = {0:.3f}".format( np.mean(R_per_class))) #print("mean Precision Classification @ 20 for all the data = {0:.3f}".format(np.mean(P20_per_class))) print('Mean Average Precision Classification with MI_Max with score =', with_scores, ' : ') print(AP_per_class) print(arrayToLatex(AP_per_class, per=True)) return (apsAt05, apsAt01, AP_per_class)
def mainEval(dataset_nm='IconArt_v1',classe=0,k_per_bag = 300,metamodel = 'FasterRCNN',\ demonet='res152_COCO',test=False,MILmodel='MI_Net',max_epoch=20,verbose=True): # dataset_nm='IconArt_v1' # classe=1 # k_per_bag = 300 # metamodel = 'FasterRCNN' # demonet='res152_COCO' # test=True # MILmodel='MI_Net_with_DS' # max_epoch = 1 t0 = time.time() if test: classe = 0 if MILmodel=='MI_Net': MILmodel_fct = MI_Net_WSOD elif MILmodel=='MI_Max_AddOneLayer_Keras': MILmodel_fct = MI_Max_AddOneLayer_Keras elif MILmodel=='mi_Net': MILmodel_fct = mi_Net_WSOD elif MILmodel=='MI_Net_with_DS': MILmodel_fct = MI_Net_with_DS_WSOD elif MILmodel=='MI_Net_with_RC': MILmodel_fct = MI_Net_with_RC_WSOD else: print(MILmodel,'is unkwon') return(0) print('MILmodel',MILmodel,max_epoch) item_name,path_to_img,default_path_imdb,classes,ext,num_classes,str_val,df_label,\ path_data,Not_on_NicolasPC = get_database(dataset_nm) dataset,bags_full_label,mean_fea,std_fea = load_dataset(dataset_nm,classe=0,k_per_bag = k_per_bag,metamodel = metamodel,demonet=demonet) model_dict = {} for j in range(num_classes): if test and not(j==classe): continue else: for k in range(len(dataset['train'])): a = list(dataset['train'][k]) a[1] = [bags_full_label[k,j]]*k_per_bag a = tuple(a) dataset['train'][k] = a print('start training for class',j) model = MILmodel_fct(dataset,max_epoch=max_epoch,verbose=verbose) model_dict[j] = model t1 = time.time() print("--- Training duration :",str(t1-t0),' s') dict_name_file = getDictFeaturesPrecomputed(dataset_nm,k_per_bag=k_per_bag,\ metamodel=metamodel,demonet=demonet) name_file = dict_name_file['test'] if metamodel=='EdgeBoxes': dim_rois = 4 else: dim_rois = 5 next_element = getTFRecordDataset(name_file,k_per_bag =k_per_bag,dim_rois = dim_rois, num_classes = num_classes) dont_use_07_metric = False if dataset_nm=='VOC2007': imdb = get_imdb('voc_2007_test',data_path=default_path_imdb) num_images = len(imdb.image_index) elif dataset_nm=='watercolor': imdb = get_imdb('watercolor_test',data_path=default_path_imdb) num_images = len(imdb.image_index) elif dataset_nm=='PeopleArt': imdb = get_imdb('PeopleArt_test',data_path=default_path_imdb) num_images = len(imdb.image_index) elif dataset_nm=='clipart': imdb = get_imdb('clipart_test',data_path=default_path_imdb) num_images = len(imdb.image_index) elif dataset_nm=='comic': imdb = get_imdb('comic_test',data_path=default_path_imdb) num_images = len(imdb.image_index) elif dataset_nm=='CASPApaintings': imdb = get_imdb('CASPApaintings_test',data_path=default_path_imdb) num_images = len(imdb.image_index) elif dataset_nm=='IconArt_v1' or dataset_nm=='RMN': imdb = get_imdb('IconArt_v1_test',data_path=default_path_imdb) num_images = len(df_label[df_label['set']=='test'][item_name]) elif 'IconArt_v1' in dataset_nm and not('IconArt_v1' ==dataset_nm): imdb = get_imdb('IconArt_v1_test',ext=dataset_nm.split('_')[-1],data_path=default_path_imdb) # num_images = len(imdb.image_index) num_images = len(df_label[df_label['set']=='test'][item_name]) elif dataset_nm in ['WikiTenLabels','MiniTrain_WikiTenLabels','WikiLabels1000training']: imdb = get_imdb('WikiTenLabels_test',data_path=default_path_imdb) #num_images = len(imdb.image_index) num_images = len(df_label[df_label['set']=='test'][item_name]) elif 'OIV5' in dataset_nm: # For OIV5 for instance ! num_images = len(df_label[df_label['set']=='test'][item_name]) else: num_images = len(df_label[df_label['set']=='test'][item_name]) imdb.set_force_dont_use_07_metric(dont_use_07_metric) all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] TEST_NMS = 0.3 thresh = 0.0 true_label_all_test = [] predict_label_all_test = [] name_all_test = [] config = tf.ConfigProto() config.intra_op_parallelism_threads = 16 config.inter_op_parallelism_threads = 16 config.gpu_options.allow_growth = True sess = tf.Session(config=config) i = 0 num_features = 2048 while True: try: fc7s,roiss,rois_scores,labels,name_imgs = sess.run(next_element) fc7s = np.divide(fc7s-mean_fea, std_fea).astype(np.float32) true_label_all_test += [labels] score_all = None for j in range(num_classes): if not(test): model= model_dict[j] predictions = model.predict(fc7s.reshape((-1,num_features)),batch_size=1) if MILmodel=='MI_Net_with_DS': predictions = predictions[3] scores_all_j_k = predictions.reshape((fc7s.shape[0],1,fc7s.shape[1])) else: if j==classe: model= model_dict[j] predictions = model.predict(fc7s.reshape((-1,num_features)),batch_size=1) if MILmodel=='MI_Net_with_DS': predictions = predictions[3] scores_all_j_k = predictions.reshape((fc7s.shape[0],1,fc7s.shape[1])) if score_all is None: score_all = scores_all_j_k else: score_all = np.concatenate((score_all,scores_all_j_k),axis=1) predict_label_all_test += [np.max(score_all,axis=2)] for k in range(len(labels)): name_im = name_imgs[k].decode("utf-8") complet_name = path_to_img + str(name_im) + '.jpg' im = cv2.imread(complet_name) blobs, im_scales = get_blobs(im) roi = roiss[k,:] if metamodel=='EdgeBoxes': roi_boxes = roi / im_scales[0] else: roi_boxes = roi[:,1:5] / im_scales[0] for j in range(num_classes): scores = score_all[k,j,:] #print(j,'scores',scores.shape) inds = np.where(scores > thresh)[0] cls_scores = scores[inds] cls_boxes = roi_boxes[inds,:] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(cls_dets, TEST_NMS) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets i += 1 for l in range(len(name_imgs)): if dataset_nm in ['IconArt_v1','VOC2007','watercolor','clipart',\ 'comic','CASPApaintings','WikiTenLabels','PeopleArt',\ 'MiniTrain_WikiTenLabels','WikiLabels1000training']: name_all_test += [[str(name_imgs[l].decode("utf-8"))]] else: name_all_test += [[name_imgs[l]]] except tf.errors.OutOfRangeError: break sess.close() true_label_all_test = np.concatenate(true_label_all_test) predict_label_all_test = np.concatenate(predict_label_all_test,axis=0) name_all_test = np.concatenate(name_all_test) AP_per_class = [] for j,classe in enumerate(classes): AP = average_precision_score(true_label_all_test[:,j],predict_label_all_test[:,j],average=None) AP_per_class += [AP] print('Average Precision classification task :') print(arrayToLatex(AP_per_class,per=True)) max_per_image = 100 num_images_detect = len(imdb.image_index) # We do not have the same number of images in the WikiTenLabels or IconArt_v1 case all_boxes_order = [[[] for _ in range(num_images_detect)] for _ in range(imdb.num_classes)] number_im = 0 name_all_test = name_all_test.astype(str) for i in range(num_images_detect): # print(i) name_img = imdb.image_path_at(i) if dataset_nm=='PeopleArt': name_img_wt_ext = name_img.split('/')[-2] +'/' +name_img.split('/')[-1] name_img_wt_ext_tab =name_img_wt_ext.split('.') name_img_wt_ext = '.'.join(name_img_wt_ext_tab[0:-1]) else: name_img_wt_ext = name_img.split('/')[-1] name_img_wt_ext =name_img_wt_ext.split('.')[0] name_img_ind = np.where(np.array(name_all_test)==name_img_wt_ext)[0] #print(name_img_ind) if len(name_img_ind)==0: print('len(name_img_ind), images not found in the all_boxes') print(name_img_wt_ext) raise(Exception) else: number_im += 1 # print(name_img_ind[0]) for j in range(1, imdb.num_classes): j_minus_1 = j-1 if len(all_boxes[j_minus_1][name_img_ind[0]]) >0: all_boxes_order[j][i] = all_boxes[j_minus_1][name_img_ind[0]] if max_per_image > 0 and len(all_boxes_order[j][i]) >0: image_scores = np.hstack([all_boxes_order[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes_order[j][i][:, -1] >= image_thresh)[0] all_boxes_order[j][i] = all_boxes_order[j][i][keep, :] assert (number_im==num_images_detect) # To check that we have the all the images in the detection prediction det_file = os.path.join(path_data, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes_order, f, pickle.HIGHEST_PROTOCOL) output_dir = path_data +'tmp/' + dataset_nm+'_mAP.txt' aps = imdb.evaluate_detections(all_boxes_order, output_dir) apsAt05 = aps print("Detection score (thres = 0.5): ",dataset_nm) print(arrayToLatex(aps,per=True)) ovthresh_tab = [0.3,0.1,0.] for ovthresh in ovthresh_tab: aps = imdb.evaluate_localisation_ovthresh(all_boxes_order, output_dir,ovthresh) if ovthresh == 0.1: apsAt01 = aps print("Detection score with thres at ",ovthresh,'with ',MILmodel) print(arrayToLatex(aps,per=True)) t2 = time.time() print("--- Testing duration :",str(t2-t1),' s') return(apsAt05,apsAt01,AP_per_class)