def make_submission_xception(name, name_ext, dropout_p): data_info = load_organized_data_info(imgs_dim=HEIGHT, name=name) _, _, _, _, _, te_names = create_embeddings(name) batch_size = 32 datagen = ImageDataGenerator(preprocessing_function=preprocess_input) datagen = datagen.flow_from_directory(directory=data_info['dir_te'], target_size=(HEIGHT, WIDTH), class_mode=None, batch_size=batch_size, shuffle=False) model_file = join(MODELS_DIR, MODEL_FILE.format(name, name_ext)) model = Xception(weights='imagenet', include_top=False, pooling='avg') top_classifier = _top_classifier(l2_reg=0, dropout_p=dropout_p, input_shape=(2048, )) model = Model(inputs=model.input, outputs=top_classifier(model.output)) model.load_weights(model_file) probs_pred = model.predict_generator(generator=datagen, steps=ceil(data_info['num_te'] / batch_size)) submission_file = 'xception_fine_tuned_{:s}.csv'.format(name) create_submission_file(image_names=te_names, probs=probs_pred, file_name=join(SUBMISSIONS_DIR, submission_file))
model.load_weights(root_dir + 'weights/'+ weight_name) test_paths = glob(os.path.join(root_dir , 'test/audio/*wav')) def test_generator(test_batch_size): while True: for start in range(0, len(test_paths), test_batch_size): x_batch = [] end = min(start + test_batch_size, len(test_paths)) this_paths = test_paths[start:end] for x in this_paths: x_batch.append(process_wav_file(x,phase='TEST')) x_batch = np.array(x_batch) yield x_batch predictions = model.predict_generator(test_generator(batch_size), int(np.ceil(len(test_paths)/batch_size))) classes = np.argmax(predictions, axis=1) # last batch will contain padding, so remove duplicates submission = dict() for i in range(len(test_paths)): fname, label = os.path.basename(test_paths[i]), id2name[classes[i]] submission[fname] = label with open(root_dir + weight_name + '.csv', 'w') as fout: fout.write('fname,label\n') for fname, label in submission.items(): fout.write('{},{}\n'.format(fname, label))
class TestSetAnalysis(object): """ class string """ def __init__(self, model='vgg19', show=True): """ doc string constructor """ firstlayer_index = 0 if model=='vgg19': from keras.applications.vgg19 import VGG19 self.model = VGG19(weights='imagenet', include_top = True) elif model=='vgg16': from keras.applications.vgg16 import VGG16 self.model = VGG16(weights='imagenet', include_top = True) elif model=='inceptionv3': from keras.applications.inception_v3 import InceptionV3 self.model = InceptionV3(weights='imagenet', include_top = True) elif model=='resnet50': from keras.applications.resnet50 import ResNet50 self.model = ResNet50(weights='imagenet', include_top = True) elif model=='xception': from keras.applications.xception import Xception self.model = Xception(weights='imagenet', include_top = True) elif model.endswith('.hdf5'): self.model = load_model(model) firstlayer_index = 1 else: print("Valid models are:") print("vgg19, vgg16, inceptionv3, resnet50, xception") print("xception/inceptionv3 model is only available in tf backend") print("Or provide path to a saved model in .hdf5 format") exit() if show: print(self.model.summary()) self.inputshape = self.model.layers[firstlayer_index].output_shape[1:] #------------------------------------------------------------------------------ def predict_gen(self, data_dir, batchsize=32, rescale=1.0/255): self.data_dir = data_dir datagen = ImageDataGenerator(rescale=rescale) self.generator = datagen.flow_from_directory(self.data_dir, \ target_size=self.inputshape[:2], \ batch_size=batchsize, \ class_mode='categorical', \ shuffle=False) nfiles = [] class_folders = glob(self.data_dir+'*') for i in range(len(class_folders)): files = glob(class_folders[i]+'/*') nfiles.append(len(files)) samples = self.generator.samples self.nb_class = self.generator.num_class self.predictions = self.model.predict_generator(self.generator, \ samples/batchsize+1) self.predictions = self.predictions[:samples, :] self.predict_labels = np.argmax(self.predictions, axis=1) self.true_labels = [] for i in range(self.nb_class): self.true_labels += list([i] * nfiles[i]) self.confusion_matrix = confusion_matrix(\ self.true_labels, \ self.predict_labels) if self.nb_class==2: self.FPR, self.TPR, thresholds = roc_curve(\ self.true_labels, \ self.predictions[:,1]) self.roc_auc = roc_auc_score(\ self.true_labels, \ self.predictions[:,1]) self.get_cm_index() #------------------------------------------------------------------------------ def predict_array(self, xdata, ydata, batchsize=32, rescale=1.0/255): # samples = self.generator.samples # self.nb_class = self.generator.num_class self.predictions = self.model.predict(xdata*rescale, batch_size=batchsize) # self.predictions = self.predictions[:samples, :] self.predict_labels = np.argmax(self.predictions, axis=1) self.true_labels = np.argmax(ydata, axis=1) self.confusion_matrix = confusion_matrix(\ self.true_labels, \ self.predict_labels) self.FPR, self.TPR, thresholds = roc_curve(\ self.true_labels, \ self.predictions[:,1]) self.roc_auc = roc_auc_score(\ self.true_labels, \ self.predictions[:,1]) self.get_cm_index() #------------------------------------------------------------------------------ def get_information_dictionary(self): mydict = { "FPR": self.FPR, "TPR": self.TPR, "predictions": self.predictions, "true_labels": self.true_labels, "predict_labels": self.predict_labels, "roc_auc": self.roc_auc, "confusion_matrix": self.confusion_matrix } return mydict #------------------------------------------------------------------------------ def plot_confusion_matrix(self, cmap='Blues', \ save=False, savename='cm.png'): plt.figure(figsize=(8,8)) matrix = np.zeros(self.confusion_matrix.shape) for i in range(len(matrix)): matrix[i] = self.confusion_matrix[i]/\ float(np.sum(self.confusion_matrix[i])) plt.imshow(matrix, cmap=cmap) plt.xticks([], []) plt.yticks([], []) plt.clim(0, 1) if save: print("Now saving confusion matrix figure") plt.savefig(savename) else: plt.show() return matrix #------------------------------------------------------------------------------ def plot_roc_curve(self, \ save=False, savename='roc.png'): plt.figure(figsize=(8,8)) plt.plot(self.FPR, self.TPR, 'k', lw=2) plt.plot(self.FPR, self.FPR, 'k', lw=0.5) plt.axhline(y=1, color='k', ls=':', lw=0.5) plt.axvline(x=0, color='k', ls=':', lw=0.5) plt.xlim(-0.01,1) plt.ylim(0,1.01) plt.xlabel('$\mathtt{FalsePositiveRate}$', fontsize=22) plt.ylabel('$\mathtt{TruePositiveRate}$', fontsize=22) if save: f.savefig(savefigname) else: plt.show() #------------------------------------------------------------------------------ def plot_samples(self, ind_arr, title, N=100, ncol=15, \ save=False, savefigname='samples.eps'): ind_arr = np.random.choice(ind_arr, size=N, replace=False) names = np.array(self.generator.filenames)[ind_arr] N = N - N%ncol print(N) nrow = N/ncol f, axarr = plt.subplots(nrow, ncol, sharex=True, sharey=True, \ figsize=(ncol, nrow)) f.subplots_adjust(wspace=0.0, hspace=0) f.suptitle("$\mathtt{%s}$"%title, fontsize=22) for i in range(nrow): for j in range(ncol): axarr[i,j].imshow(cv2.imread(self.data_dir+names[i*ncol+j])) axarr[i,j].set_xticks([], []) axarr[i,j].set_yticks([], []) if save: f.savefig(savefigname) else: plt.show() #------------------------------------------------------------------------------ def get_cm_index(self): self.tp = [] self.tn = [] self.fp = [] self.fn = [] for i in range(len(self.true_labels)): if self.true_labels[i]==1 and self.predict_labels[i]==1: self.tp.append(i) elif self.true_labels[i]==0 and self.predict_labels[i]==0: self.tn.append(i) elif self.true_labels[i]==0 and self.predict_labels[i]==1: self.fp.append(i) elif self.true_labels[i]==1 and self.predict_labels[i]==0: self.fn.append(i) #------------------------------------------------------------------------------ def plot_all(self): self.plot_confusion_matrix() if self.nb_class==2: self.plot_roc_curve() self.plot_samples(self.tp, 'TruePositive') self.plot_samples(self.fp, 'FalsePositive') self.plot_samples(self.tn, 'TrueNegative') self.plot_samples(self.fn, 'FalseNegative')
train_item_ids = x_train.index train_image_ids = x_train.image train_labels = x_train.deal_probability # print(val_labels) train_gen = ImageGenerator(image_dir, train_item_ids, train_image_ids, train_labels, dim=(224, 224), shuffle=False) features = model.predict_generator(train_gen, verbose=1) ids = train_image_ids print('getting img names') # In[5]: # img_names = [i for i in os.walk(image_dir)][0][2] # print(f'size of images: {len(img_names)}') # features = [] # ids = [] # for img_name in tqdm(img_names): # img_path = image_dir + img_name # try: # img = image.load_img(img_path, target_size=(224, 224)) # except OSError: