def test_preprocess_input(): x = np.random.uniform(0, 255, (2, 3, 2, 3)) assert utils.preprocess_input(x).shape == x.shape out1 = utils.preprocess_input(x, 'channels_last') out2 = utils.preprocess_input(np.transpose(x, (0, 3, 1, 2)), 'channels_first') assert_allclose(out1, out2.transpose(0, 2, 3, 1))
def add_channels(self): n_channels = self.n_channels if n_channels == 1: super().add_channels() else: X = self.X if X.ndim < 4: # if X.dim == 4, no need to add a channel rank. N, img_rows, img_cols = X.shape if K.image_dim_ordering() == 'th': X = X.reshape(X.shape[0], 1, img_rows, img_cols) X = np.concatenate([X, X, X], axis=1) input_shape = (n_channels, img_rows, img_cols) else: X = X.reshape(X.shape[0], img_rows, img_cols, 1) X = np.concatenate([X, X, X], axis=3) input_shape = (img_rows, img_cols, n_channels) else: if K.image_dim_ordering() == 'th': N, Ch, img_rows, img_cols = X.shape if Ch == 1: X = np.concatenate([X, X, X], axis=1) input_shape = (n_channels, img_rows, img_cols) else: N, img_rows, img_cols, Ch = X.shape if Ch == 1: X = np.concatenate([X, X, X], axis=3) input_shape = (img_rows, img_cols, n_channels) if self.preprocessing_flag: X = preprocess_input(X) self.X = X self.input_shape = input_shape
def loadImages(): # get current working directory path folder = 'data' # bats dataset dataroot = os.getcwd() + '/' + folder directoryList = os.listdir(dataroot) imageList =[] # loop over all the subdirectories and create the image array of all the images and store it in an array for categoryDir in directoryList: currentImageList = os.listdir(dataroot +'/'+ categoryDir) print ('Loading images for '+'{}\n'.format(categoryDir)) for currentImage in currentImageList: imagePath = dataroot + '/'+ categoryDir + '/'+ currentImage currentImage = image.load_img(imagePath, target_size=(224, 224)) x = image.img_to_array(currentImage) x = np.expand_dims(x, axis=0) x = preprocess_input(x) print('Image shape:', x.shape) imageList.append(x) # reshaping the data as needed by the resnet50 model i.e. (<observations>, 224, 224, 3) imageData = np.array(imageList) print(imageData.shape) # it should show (40,1, 224, 224, 3) imageData=np.rollaxis(imageData,1,0) print (imageData.shape) # it should show (1, 40, 224, 224, 3) imageData=imageData[0] print (imageData.shape) # now it should show (40, 224, 224, 3) , which is exactly the shape that we need return imageData
def preprocess_input(x): """Preprocesses a numpy array encoding a batch of images. # Arguments x: a 4D numpy array consists of RGB values within [0, 255]. # Returns Input array scaled to [-1.,1.] """ return imagenet_utils.preprocess_input(x, mode='tf')
def preprocess_image(im, width, height, train=True): size = min(im.shape[:2]) im = tf.constant(im) if train: im = tf.random_crop(im, (size, size, 3)) im = tf.image.resize_images(im, (width, height)) else: im = tf.image.resize_image_with_crop_or_pad(im, height, width) im = K.get_session().run(im) return preprocess_input(im)
def feature_flow(): bbox_util = BBoxUtility(NUM_CLASSES) raw_inputs, images = load_inputs(image_files) inputs = preprocess_input(np.array(raw_inputs)) dump_activation_layer = 'conv4_2' compare_layer_name = 'conv6_2' print('dump_activation_layer', dump_activation_layer) print('target_layer_name', compare_layer_name) # normal SSD network model1 = SSD300v2(input_shape, num_classes=NUM_CLASSES) model1.load_weights('weights_SSD300.hdf5', by_name=True) predictions = run_network(model1, inputs) results = bbox_util.detection_out(predictions) plot_detections(images, results) # get dump layer's output (as input for flow network) input_img2 = inputs[1:2, :, :, :] layer_dump = get_layer_output(model=model1, inputs=input_img2, output_layer_name=dump_activation_layer) print('layer_dump.shape = ', layer_dump.shape) # flow (raw rgb) flow_rgb = compute_flow(image_files[1], image_files[0]) print('flow.shape', flow_rgb.shape) imshow_fig(cv2.cvtColor(draw_hsv(flow_rgb), cv2.COLOR_BGR2RGB), title='flow_rgb') # flow (re-sized for feature map) flow_feature = get_flow_for_filter(flow_rgb) # imshow_fig(flow_feature[:, :, 0], title='flow_feature_y', cmap='gray') # imshow_fig(flow_feature[:, :, 1], title='flow_feature_x', cmap='gray') # warp image by flow_rgb iimg1 = cv2.imread(image_files[0]) img_warp = warp_flow(iimg1, flow_rgb) imshow_fig(cv2.cvtColor(img_warp, cv2.COLOR_BGR2RGB), title='frame_2_warp') # shift feature shifted_feature = shift_filter(layer_dump, flow_feature) # flow net model2 = SSD300_conv4_3((128, 128, 512), num_classes=NUM_CLASSES) model2.load_weights('weights_SSD300.hdf5', by_name=True) predictions = run_network(model2, shifted_feature) results = bbox_util.detection_out(predictions) plot_detections(images[1:2], results) # get specific layer's output and compare them (for debugging) compare_model_layer(model1, input_img2, compare_layer_name, model2, shifted_feature, compare_layer_name, True) sess.close() plt.show()
def get_features_pretrained(X, PretrainedModel=VGG19, preprocess_input=preprocess_input): """ get features by pre-trained networks :param Pretrained: VGG19 is default :return: features """ if preprocess_input is not None: X = preprocess_input(X) model = PretrainedModel(weights='imagenet', include_top=False, input_shape=X.shape[1:]) features = model.predict(X) return features
def read_images(filepath, filenames): """ Read images in batches """ img_data = list() for name in filenames: img_path = os.path.join(filepath, name+'.jpg') img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) img_data.append(preprocess_input(x)) return np.concatenate(img_data)
def prepare_image(image, target): # if the image mode is not RGB, convert it if image.mode != "RGB": image = image.convert("RGB") # resize the input image and preprocess it image = image.resize(target) image = img_to_array(image) image = np.expand_dims(image, axis=0) image = imagenet_utils.preprocess_input(image) # return the processed image return image
def test_preprocess_input_symbolic(): # Test image batch x = np.random.uniform(0, 255, (2, 10, 10, 3)) inputs = Input(shape=x.shape[1:]) outputs = Lambda(utils.preprocess_input, output_shape=x.shape[1:])(inputs) model = Model(inputs, outputs) assert model.predict(x).shape == x.shape outputs1 = Lambda(lambda x: utils.preprocess_input(x, 'channels_last'), output_shape=x.shape[1:])(inputs) model1 = Model(inputs, outputs1) out1 = model1.predict(x) x2 = np.transpose(x, (0, 3, 1, 2)) inputs2 = Input(shape=x2.shape[1:]) outputs2 = Lambda(lambda x: utils.preprocess_input(x, 'channels_first'), output_shape=x2.shape[1:])(inputs2) model2 = Model(inputs2, outputs2) out2 = model2.predict(x2) assert_allclose(out1, out2.transpose(0, 2, 3, 1)) # Test single image x = np.random.uniform(0, 255, (10, 10, 3)) inputs = Input(shape=x.shape) outputs = Lambda(utils.preprocess_input, output_shape=x.shape)(inputs) model = Model(inputs, outputs) assert model.predict(x[np.newaxis])[0].shape == x.shape outputs1 = Lambda(lambda x: utils.preprocess_input(x, 'channels_last'), output_shape=x.shape)(inputs) model1 = Model(inputs, outputs1) out1 = model1.predict(x[np.newaxis])[0] x2 = np.transpose(x, (2, 0, 1)) inputs2 = Input(shape=x2.shape) outputs2 = Lambda(lambda x: utils.preprocess_input(x, 'channels_first'), output_shape=x2.shape)(inputs2) model2 = Model(inputs2, outputs2) out2 = model2.predict(x2[np.newaxis])[0] assert_allclose(out1, out2.transpose(1, 2, 0))
def model_predict(img_path, model): img = image.load_img(img_path, target_size=(224, 224)) # Preprocessing the image x = image.img_to_array(img) # x = np.true_divide(x, 255) x = np.expand_dims(x, axis=0) # Be careful how your trained model deals with the input # otherwise, it won't make correct prediction! x = preprocess_input(x, mode='caffe') preds = model.predict(x) return preds
def generate(self, train=True): while True: if train: shuffle(self.train_keys) keys = self.train_keys else: shuffle(self.val_keys) keys = self.val_keys inputs = [] targets = [] for key in keys: if '.png' in key or '.jpg' in key: img_path = self.path_prefix + key else: img_path = self.path_prefix + key + '.png' img = imread(img_path, mode='L').astype('float32') y = self.gt[key].copy() try: img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) except: print('failed gray to rgb image %s' % (img_path,)) continue y = self.gt[key].copy() if train and self.do_crop: img, y = self.random_sized_crop(img, y) try: img = imresize(img, self.image_size).astype('float32') except: print('failed resizing image %s' % (img_path,)) continue if train: shuffle(self.color_jitter) for jitter in self.color_jitter: img = jitter(img) if self.lighting_std: img = self.lighting(img) if self.hflip_prob > 0: img, y = self.horizontal_flip(img, y) if self.vflip_prob > 0: img, y = self.vertical_flip(img, y) y = self.bbox_util.assign_boxes(y) inputs.append(img) targets.append(y) if len(targets) == self.batch_size: tmp_inp = np.array(inputs) tmp_targets = np.array(targets) inputs = [] targets = [] yield preprocess_input(tmp_inp), tmp_targets
def preprocess(self, raw_inputs): """ Args: raw_inputs (list of Images): a list of PIL Image objects Returns: array (float32): num images * height * width * num channels """ image_arrays = [] for raw_im in raw_inputs: im = raw_im.resize(VGG16_DIM[:2], Image.ANTIALIAS) im = im.convert('RGB') arr = np.array(im).astype('float32') image_arrays.append(arr) all_raw_inputs = np.array(image_arrays) return imagenet_utils.preprocess_input(all_raw_inputs)
def imagenet_imagegraph(imagefile): im1=image.load_img(imagefile,target_size=(224,224)) im1array=image.img_to_array(im1) im1array=np.expand_dims(im1array,axis=0) im1array=preprocess_input(im1array) model=ResNet50(weights="imagenet") preds=model.predict(im1array) decodepreds=decode_predictions(preds) print "Predictions:",decodepreds image_to_text="" for pred in decodepreds[0]: image_to_text += " " image_to_text += pred[1] imagegraph=RecursiveGlossOverlapGraph(image_to_text) print "ImageGraph:",imagegraph return imagegraph
def detect(self, img_path): """ イメージをもらって結果を返す return """ # TODO 一個しかこない inputs = [] images = [] img = image.load_img(img_path, target_size=(512, 512)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) inputs = preprocess_input(np.array(inputs)) # 予測 preds = self.model.predict(inputs, batch_size=1, verbose=1) # 結果 results = self.bbox_util.detection_out(preds) # 予測結果の格納 det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin = results[0][:, 2] det_ymin = results[0][:, 3] det_xmax = results[0][:, 4] det_ymax = results[0][:, 5] # Get detections with confidence higher than 0.6. # TODO 14牌にする top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6] # 上位14牌の情報 top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] # 返すjsonを作成 result = {"pis":[]} for i, label_num in enumerate(top_label_indices): result['pis'].append({"name":self.voc_classes[int(label_num)-1], "xmin":top_xmin[i], "ymin":top_ymin[i], "xmax":top_xmax[i], "ymax":top_ymax[i], "conf":top_conf[i]}) return result
def predict_batch(model, img_batch_path, img_size=None): img_list = [] for im_path in img_batch_path: img = imread(im_path) if img_size: img = imresize(img,img_size) img = img.astype('float32') img_list.append(img) try: img_batch = np.stack(img_list, axis=0) except: raise ValueError( 'when both img_size and crop_size are None, all images ' 'in image_paths must have the same shapes.') return model.predict(preprocess_input(img_batch))
def ext_img_feat(image_folder, batch_size): base_model = ResNet50(weights='imagenet') img_model = Model(input=base_model.input, output=base_model.get_layer('res5c').output) img_list = os.listdir(image_folder) all_img_feats = list() si = 0 while si < len(img_list): batch_img = img_list[si:si+batch_size] si += batch_size imgs = [] for imgf in batch_img: img_path = os.path.join(image_folder, imgf) img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) imgs.append(x) imgs = np.concatenate(imgs, axis=0) img_feats = img_model.predict(imgs) all_img_feats.append(img_feats) print('%d images extracted\r'%si),
def test_preprocess_input(): # Test image batch x = np.random.uniform(0, 255, (2, 10, 10, 3)) assert utils.preprocess_input(x).shape == x.shape out1 = utils.preprocess_input(x, 'channels_last') out2 = utils.preprocess_input(np.transpose(x, (0, 3, 1, 2)), 'channels_first') assert_allclose(out1, out2.transpose(0, 2, 3, 1)) # Test single image x = np.random.uniform(0, 255, (10, 10, 3)) assert utils.preprocess_input(x).shape == x.shape out1 = utils.preprocess_input(x, 'channels_last') out2 = utils.preprocess_input(np.transpose(x, (2, 0, 1)), 'channels_first') assert_allclose(out1, out2.transpose(1, 2, 0))
def process_frame_bgr_with_SSD(frame_bgr, ssd_model, bbox_helper, allow_classes=None, min_confidence=0.2): """ Perform detection on one BGR frame and return list of detected objects. Parameters ---------- frame_bgr : ndarray Input frame give to be processed. ssd_model : Keras Model Pretrained model of SSD network. bbox_helper : BBoxUtility Helper for handling detection results. allow_classes : list, default If present, return only detections that belong to these classes. min_confidence : float, default Only detections whose confidence is greater than min_confidence are returned. Returns ------- results : list List of detection results [class, confidence, x_min, y_min, x_max, y_max] """ frame_bgr = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) inputs = [] img = image.img_to_array(cv2.resize(frame_bgr, (300, 300))) inputs.append(img.copy()) inputs = preprocess_input(np.array(inputs)) preds = ssd_model.predict(inputs, batch_size=1, verbose=1) results = bbox_helper.detection_out(preds, confidence_threshold=min_confidence) results = results[0] # processing one frame, so remove batchsize # eventually filter results keeping only certain classes if allow_classes: results = [r for r in results if int(r[0]) in allow_classes] return results
def main(img_paths): """ Detect objects in images. Parameters ---------- img_paths : list of strings """ # Load the model voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'] NUM_CLASSES = len(voc_classes) + 1 input_shape = (300, 300, 3) model = SSD300(input_shape, num_classes=NUM_CLASSES) model.load_weights('weights_SSD300.hdf5', by_name=True) bbox_util = BBoxUtility(NUM_CLASSES) # Load the inputs inputs = [] images = [] for img_path in img_paths: img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) inputs = preprocess_input(np.array(inputs)) # Predict preds = model.predict(inputs, batch_size=1, verbose=1) results = bbox_util.detection_out(preds) # Visualize for i, img in enumerate(images): create_overlay(img, results[i], voc_classes, "{}-det.png".format(img_paths[i]))
def generate(self, train=True): while True: if train: shuffle(self.train_keys) keys = self.train_keys else: shuffle(self.val_keys) keys = self.val_keys inputs = [] targets = [] for key in keys: img_path = self.path_prefix + key img = imread(img_path).astype('float32') #one image y = self.gt[key].copy() if train and self.do_crop: img, y = self.random_sized_crop(img, y) img = imresize(img, self.image_size).astype('float32') if train: shuffle(self.color_jitter) for jitter in self.color_jitter: img = jitter(img) if self.lighting_std: img = self.lighting(img) if self.hflip_prob > 0: img, y = self.horizontal_flip(img, y) if self.vflip_prob > 0: img, y = self.vertical_flip(img, y) y = self.bbox_util.assign_boxes(y) inputs.append(img) targets.append(y) if len(targets) == self.batch_size: tmp_inp = np.array(inputs) tmp_targets = np.array(targets) inputs = [] targets = [] yield preprocess_input(tmp_inp), tmp_targets
img_path = './pics/boys.jpg' img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) img_path = './pics/car_cat.jpg' img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) img_path = './pics/car_cat2.jpg' img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) inputs = preprocess_input(np.array(inputs)) # In[5]: preds = model.predict(inputs, batch_size=1, verbose=1) # In[6]: results = bbox_util.detection_out(preds) # In[8]: for i, img in enumerate(images):
validation_data=gen.generate(False), nb_val_samples=gen.val_batches, nb_worker=1) model.save_weights('params_SSD_epoch_{0:03d}.hdf5'.format(j), True) # 学習履歴を保存 save_history(history, os.path.join("./checkpoints/", 'history_SSD.txt'), j) inputs = [] images = [] img_path = path_prefix + sorted(val_keys)[0] img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) inputs = preprocess_input(np.array(inputs)) preds = model.predict(inputs, batch_size=1, verbose=1) results = bbox_util.detection_out(preds) for i, img in enumerate(images): # Parse the outputs. det_label = results[i][:, 0] det_conf = results[i][:, 1] det_xmin = results[i][:, 2] det_ymin = results[i][:, 3] det_xmax = results[i][:, 4] det_ymax = results[i][:, 5] # Get detections with confidence higher than 0.6. top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]
def detect_image(self, image): start = timer() if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' boxed_image = letterbox_image( image, tuple(reversed(self.model_image_size))) else: new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32)) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data_copy = np.copy(image_data) ##################### print(image_data.shape) image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) total = len(out_boxes) font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = (image.size[0] + image.size[1]) // 300 classi_model_path = os.path.expanduser(self.classi_model_path) self.classi_model = load_model(classi_model_path) ######compile=False print('Classi model loaded.') image_copy = image.copy() idx = 0 ########### for i, c in reversed(list(enumerate(out_classes))): predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) print(label, (left, top), (right, bottom)) box = (left, top, right, bottom) roi_img = image_copy.crop(box) roi_img = roi_img.resize((224, 224)) numpy_img = img_to_array(roi_img) x = np.expand_dims(numpy_img, axis=0) x = preprocess_input(x, mode='caffe') pred_value = self.classi_model.predict(x) pred_class = pred_value.argmax(axis=-1) if (pred_class == 0): roi_img.show() roi_img.save('static/pics/' + str(idx) + '.jpg') ############# idx += 1 ########### if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) # My kingdom for a good redistributable image drawing library. for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw percent = float(idx) / total end = timer() print(end - start) return image, percent
def test_RotNet(model, input_path): """ Randomly rotates an image iterating by 0, 90, 180, 270 degrees and tests if the FaceRot model applys the right counter rotation. Finally prints the accuracy, number of not detected faces and number of corrupted files. """ # Admitted input file extensions extensions = ['.jpg', '.jpeg', '.bmp', '.png'] # Check if input is a single image or a directory if os.path.isfile(input_path): image_paths = [input_path] else: image_paths = [ os.path.join(input_path, f) for f in os.listdir(input_path) if os.path.splitext(f)[1].lower() in extensions ] # Parameters accuracy = 0.0 count = 0 corr = 0 rotations = [] predicted_angles = [] rotation_choice = [0, 90, 180, 270] input_shape = (224, 224, 3) for idx, image_path in enumerate(image_paths): print('no {}, path {}'.format(idx, secure_filename(image_path))) count += 1 image = cv2.imread(image_path, 1) if image is None: corr += 1 count -= 1 print('Pic {} corrupted'.format(image_path)) continue image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) rotations.append(np.random.choice(rotation_choice)) rotation_angle = rotations[idx] # generate the rotated image image = generate_rotated_image(image, rotation_angle, size=input_shape[:2], crop_center=True, crop_largest_rect=True) # add dimension to account for the channels if the image is greyscale if image.ndim == 2: image = np.expand_dims(image, axis=2) # preprocess input images image = preprocess_input( np.expand_dims(image.astype('float32'), axis=0)) predictions = model.predict(image) predicted_angles.append(np.argmax(predictions, axis=1) * 90) if rotations[idx] == predicted_angles[idx]: accuracy += 1 print('original {}, detected {} --> GOOD'.format( rotations[idx], predicted_angles[idx])) else: print('original {}, detected {} --> BAD'.format( rotations[idx], predicted_angles[idx])) return accuracy / count, len(image_paths), corr
def prepare_img_224(img_path): img = load_img(img_path, target_size=(224, 224)) x = img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) return x
def next(self): with self.lock: index_array, current_index, current_batch_size = next( self.index_generator) # The transformation of images is not under thread lock so it can be # done in parallel if self.target_size: # TODO(ahundt) make dtype properly configurable batch_x = np.zeros((current_batch_size,) + self.image_shape) if self.loss_shape is None and self.label_file_format is 'img': batch_y = np.zeros((current_batch_size,) + self.label_shape, dtype=int) elif self.loss_shape is None: batch_y = np.zeros((current_batch_size,) + self.label_shape) else: batch_y = np.zeros((current_batch_size,) + self.loss_shape, dtype=np.uint8) grayscale = self.color_mode == 'grayscale' # build batch of image data and labels for i, j in enumerate(index_array): data_file = self.data_files[j] label_file = self.label_files[j] img_file_format = 'img' img = load_img(os.path.join(self.data_dir, data_file), grayscale=grayscale, target_size=None) label_filepath = os.path.join(self.label_dir, label_file) if self.label_file_format == 'npy': y = np.load(label_filepath) else: label = Image.open(label_filepath) if self.save_to_dir and self.palette is None: self.palette = label.palette # do padding if self.target_size: if self.crop_mode != 'none': x = img_to_array(img, data_format=self.data_format) if self.label_file_format is not 'npy': y = img_to_array( label, data_format=self.data_format).astype(int) img_w, img_h = img.size if self.pad_size: pad_w = max(self.pad_size[1] - img_w, 0) pad_h = max(self.pad_size[0] - img_h, 0) else: pad_w = max(self.target_size[1] - img_w, 0) pad_h = max(self.target_size[0] - img_h, 0) if self.data_format == 'channels_first': x = np.lib.pad(x, ((0, 0), (pad_h / 2, pad_h - pad_h / 2), (pad_w / 2, pad_w - pad_w / 2)), 'constant', constant_values=0.) y = np.lib.pad(y, ((0, 0), (pad_h / 2, pad_h - pad_h / 2), (pad_w / 2, pad_w - pad_w / 2)), 'constant', constant_values=self.label_cval) elif self.data_format == 'channels_last': x = np.lib.pad(x, ((pad_h / 2, pad_h - pad_h / 2), (pad_w / 2, pad_w - pad_w / 2), (0, 0)), 'constant', constant_values=0.) y = np.lib.pad(y, ((pad_h / 2, pad_h - pad_h / 2), (pad_w / 2, pad_w - pad_w / 2), (0, 0)), 'constant', constant_values=self.label_cval) else: x = img_to_array(img.resize((self.target_size[1], self.target_size[0]), Image.BILINEAR), data_format=self.data_format) if self.label_file_format is not 'npy': y = img_to_array(label.resize((self.target_size[1], self.target_size[ 0]), Image.NEAREST), data_format=self.data_format).astype(int) else: print('ERROR: resize not implemented for label npy file') if self.target_size is None: batch_x = np.zeros((current_batch_size,) + x.shape) if self.loss_shape is not None: batch_y = np.zeros((current_batch_size,) + self.loss_shape) else: batch_y = np.zeros((current_batch_size,) + y.shape) x, y = self.seg_data_generator.random_transform(x, y) x = self.seg_data_generator.standardize(x) if self.ignore_label: y[np.where(y == self.ignore_label)] = self.classes if self.loss_shape is not None: y = np.reshape(y, self.loss_shape) batch_x[i] = x batch_y[i] = y # optionally save augmented images to disk for debugging purposes if self.save_to_dir: for i in range(current_batch_size): img = array_to_img(batch_x[i], self.data_format, scale=True) label = batch_y[i][:, :, 0].astype('uint8') label[np.where(label == self.classes)] = self.ignore_label label = Image.fromarray(label, mode='P') label.palette = self.palette fname = '{prefix}_{index}_{hash}'.format(prefix=self.save_prefix, index=current_index + i, hash=np.random.randint(1e4)) img.save(os.path.join(self.save_to_dir, 'img_' + fname + '.{format}'.format(format=self.save_format))) label.save(os.path.join(self.save_to_dir, 'label_' + fname + '.png')) # return batch_x = preprocess_input(batch_x) if self.class_mode == 'sparse': return batch_x, batch_y else: return batch_x
def extract_features(input_dir, output_dir, model_type='inceptionv3', batch_size=32): """ Extracts features from a CNN trained on ImageNet classification from all videos in a directory. Args: input_dir (str): Input directory of videos to extract from. output_dir (str): Directory where features should be stored. model_type (str): Model type to use. batch_size (int): Batch size to use when processing. """ input_dir = os.path.expanduser(input_dir) output_dir = os.path.expanduser(output_dir) if not os.path.isdir(input_dir): sys.stderr.write("Input directory '%s' does not exist!\n" % input_dir) sys.exit(1) # Load desired ImageNet model # Note: import Keras only when needed so we don't waste time revving up # Theano/TensorFlow needlessly in case of an error model = None input_shape = (224, 224) if model_type.lower() == 'inceptionv3': from keras.applications import InceptionV3 model = InceptionV3(include_top=True, weights='imagenet') elif model_type.lower() == 'xception': from keras.applications import Xception model = Xception(include_top=True, weights='imagenet') elif model_type.lower() == 'resnet50': from keras.applications import ResNet50 model = ResNet50(include_top=True, weights='imagenet') elif model_type.lower() == 'vgg16': from keras.applications import VGG16 model = VGG16(include_top=True, weights='imagenet') elif model_type.lower() == 'vgg19': from keras.applications import VGG19 model = VGG19(include_top=True, weights='imagenet') else: sys.stderr.write("'%s' is not a valid ImageNet model.\n" % model_type) sys.exit(1) if model_type.lower() == 'inceptionv3' or model_type.lower() == 'xception': shape = (299, 299) # Get outputs of model from layer just before softmax predictions from keras.models import Model model = Model(model.inputs, output=model.layers[-2].output) # Create output directories visual_dir = os.path.join(output_dir, 'visual') # RGB features #motion_dir = os.path.join(output_dir, 'motion') # Spatiotemporal features #opflow_dir = os.path.join(output_dir, 'opflow') # Optical flow features for directory in [visual_dir]:#, motion_dir, opflow_dir]: if not os.path.exists(directory): os.makedirs(directory) # Find all videos that need to have features extracted def is_video(x): return x.endswith('.mp4') or x.endswith('.avi') or x.endswith('.mov') vis_existing = [x.split('.')[0] for x in os.listdir(visual_dir)] #mot_existing = [os.path.splitext(x)[0] for x in os.listdir(motion_dir)] #flo_existing = [os.path.splitext(x)[0] for x in os.listdir(opflow_dir)] video_filenames = [x for x in sorted(os.listdir(input_dir)) if is_video(x) and os.path.splitext(x)[0] not in vis_existing] # Go through each video and extract features from keras.applications.imagenet_utils import preprocess_input for video_filename in tqdm(video_filenames): # Open video clip for reading try: clip = VideoFileClip( os.path.join(input_dir, video_filename) ) except Exception as e: sys.stderr.write("Unable to read '%s'. Skipping...\n" % video_filename) sys.stderr.write("Exception: {}\n".format(e)) continue # Sample frames at 1fps fps = int( np.round(clip.fps) ) frames = [scipy.misc.imresize(crop_center(x.astype(np.float32)), shape) for idx, x in enumerate(clip.iter_frames()) if idx % fps == fps//2] n_frames = len(frames) frames_arr = np.empty((n_frames,)+shape+(3,), dtype=np.float32) for idx, frame in enumerate(frames): frames_arr[idx,:,:,:] = frame frames_arr = preprocess_input(frames_arr) features = model.predict(frames_arr, batch_size=batch_size) name, _ = os.path.splitext(video_filename) feat_filepath = os.path.join(visual_dir, name+'.npy') with open(feat_filepath, 'wb') as f: np.save(f, features)
if include_top: maxpool = model.get_layer(name='avg_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1000') layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first') if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') return model if __name__ == '__main__': model = ResNet50(include_top=True, weights='imagenet') img_path = 'elephant.jpg' img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) print('Input image shape:', x.shape) preds = model.predict(x) print('Predicted:', decode_predictions(preds))
def caltech_preprocessing(x): return imagenet_utils.preprocess_input(x, mode='tf')
print("[INFO] loading network...") custom_model = 'rbc_custom_model.h5' model = load_model(custom_model) cap = cv2.VideoCapture(file) time.sleep(2) video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1 frames = 1 while frames < video_length: ret, original = cap.read() # Load the image using Keras helper ultility print("[INFO] loading and preprocessing image...") frame = cv2.resize(original, (224, 224)) frame = image_utils.img_to_array(frame) frame = np.expand_dims(frame, axis=0) frame = preprocess_input(frame) preds = model.predict(frame) (inID, label, prob) = decode_predictions_custom(preds)[0][0] # Display the predictions print("RBC ID: {}, Label: {}, Prob: {}".format(inID, label, prob)) cv2.putText(original, "Label: {}, Prob: {}".format(label, prob), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) cv2.imshow("Classification", original) cv2.waitKey(1) frames += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows() sys.exit()
def preprocess_inputs(self, X): return imagenet_utils.preprocess_input(X)
def test_preprocess_input(): # Test image batch with float and int image input x = np.random.uniform(0, 255, (2, 10, 10, 3)) xint = x.astype('int32') assert utils.preprocess_input(x).shape == x.shape assert utils.preprocess_input(xint).shape == xint.shape out1 = utils.preprocess_input(x, 'channels_last') out1int = utils.preprocess_input(xint, 'channels_last') out2 = utils.preprocess_input(np.transpose(x, (0, 3, 1, 2)), 'channels_first') out2int = utils.preprocess_input(np.transpose(xint, (0, 3, 1, 2)), 'channels_first') assert_allclose(out1, out2.transpose(0, 2, 3, 1)) assert_allclose(out1int, out2int.transpose(0, 2, 3, 1)) # Test single image x = np.random.uniform(0, 255, (10, 10, 3)) xint = x.astype('int32') assert utils.preprocess_input(x).shape == x.shape assert utils.preprocess_input(xint).shape == xint.shape out1 = utils.preprocess_input(x, 'channels_last') out1int = utils.preprocess_input(xint, 'channels_last') out2 = utils.preprocess_input(np.transpose(x, (2, 0, 1)), 'channels_first') out2int = utils.preprocess_input(np.transpose(xint, (2, 0, 1)), 'channels_first') assert_allclose(out1, out2.transpose(1, 2, 0)) assert_allclose(out1int, out2int.transpose(1, 2, 0)) # Test that writing over the input data works predictably for mode in ['torch', 'tf']: x = np.random.uniform(0, 255, (2, 10, 10, 3)) xint = x.astype('int') x2 = utils.preprocess_input(x, mode=mode) xint2 = utils.preprocess_input(xint) assert_allclose(x, x2) assert xint.astype('float').max() != xint2.max() # Caffe mode works differently from the others x = np.random.uniform(0, 255, (2, 10, 10, 3)) xint = x.astype('int') x2 = utils.preprocess_input(x, data_format='channels_last', mode='caffe') xint2 = utils.preprocess_input(xint) assert_allclose(x, x2[..., ::-1]) assert xint.astype('float').max() != xint2.max()
maxpool = model.get_layer(name='block5_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1') layer_utils.convert_dense_weights_data_format( dense, shape, 'channels_first') if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') return model if __name__ == '__main__': model = VGG16(include_top=True, weights='imagenet') img_path = 'D:\\TF\\animal.jpg' img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) print('Input image shape:', x.shape) preds = model.predict(x) print('Predicted:', decode_predictions(preds))
def preprocess_image(image, target, mode='tf'): image = image.resize(target) image = img_to_array(image) image = np.expand_dims(image, axis=0) image = imagenet_utils.preprocess_input(image, mode=mode) return image
img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) img_path = './pics/15979756904=Taipei 101 @ ______=______101=25.027366=121.576141.jpg' img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) img_path = './pics/3310110280=中山碑林=25.040071=121.559332.jpg' img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) ''' inputs = preprocess_input(np.array(inputs))#只是rgb各扣掉1個常數(Zero-center by mean pixel) #=============================== preds = model.predict(inputs, batch_size=1, verbose=2) results = bbox_util.detection_out(preds) #np.shape(preds) #len(results) #=============================== # test ''' a = model.predict(inputs, batch_size=1) b = bbox_util.detection_out(preds) np.shape(a) ''' #===============================
input = model.layers[0].input output = model.layers[-2].output base_model = Model(input, output) del model paths = ["images_resize/" + path for path in sorted(os.listdir("images_resize/"))] batch_size = 32 out_tensors = np.zeros((len(paths), 2048), dtype="float32") print(out_tensors.shape) for idx in range(len(paths) // batch_size + 1): batch_bgn = idx * batch_size batch_end = min((idx+1) * batch_size, len(paths)) imgs = [] for path in paths[batch_bgn:batch_end]: img = imread(path) img = imresize(img, (224,224)).astype("float32") img = preprocess_input(img[np.newaxis]) imgs.append(img) batch_tensor = np.vstack(imgs) print("tensor", idx, "with shape",batch_tensor.shape) out_tensor = base_model.predict(batch_tensor, batch_size=32) print("output shape:", out_tensor.shape) out_tensors[batch_bgn:batch_end, :] = out_tensor print("shape of representation", out_tensors.shape) # Serialize representations h5f = h5py.File('img_emb.h5', 'w') h5f.create_dataset('img_emb', data=out_tensors) h5f.close()
def preprocess_image(image_path): img = load_img(image_path, target_size=(224, 224)) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = preprocess_input(img) return img
def run(self, video_path = 0, start_frame = 0, conf_thresh = 0.6): """ Runs the test on a video (or webcam) # Arguments video_path: A file path to a video to be tested on. Can also be a number, in which case the webcam with the same number (i.e. 0) is used instead start_frame: The number of the first frame of the video to be processed by the network. conf_thresh: Threshold of confidence. Any boxes with lower confidence are not visualized. """ vid = cv2.VideoCapture(video_path) if not vid.isOpened(): raise IOError(("Couldn't open video file or webcam. If you're " "trying to open a webcam, make sure you video_path is an integer!")) # Compute aspect ratio of video vidw = vid.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH) vidh = vid.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT) vidar = vidw/vidh # Skip frames until reaching start_frame if start_frame > 0: vid.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() while True: retval, orig_image = vid.read() if not retval: print("Done!") return im_size = (self.input_shape[0], self.input_shape[1]) resized = cv2.resize(orig_image, im_size) rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) # Reshape to original aspect ratio for later visualization # The resized version is used, to visualize what kind of resolution # the network has to work with. to_draw = cv2.resize(resized, (int(self.input_shape[0]*vidar), self.input_shape[1])) # Use model to predict inputs = [image.img_to_array(rgb)] tmp_inp = np.array(inputs) x = preprocess_input(tmp_inp) y = self.model.predict(x) # This line creates a new TensorFlow device every time. Is there a # way to avoid that? results = self.bbox_util.detection_out(y) if len(results) > 0 and len(results[0]) > 0: # Interpret output, only one frame is used det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin = results[0][:, 2] det_ymin = results[0][:, 3] det_xmax = results[0][:, 4] det_ymax = results[0][:, 5] top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] for i in range(top_conf.shape[0]): xmin = int(round(top_xmin[i] * to_draw.shape[1])) ymin = int(round(top_ymin[i] * to_draw.shape[0])) xmax = int(round(top_xmax[i] * to_draw.shape[1])) ymax = int(round(top_ymax[i] * to_draw.shape[0])) # Draw the box on top of the to_draw image class_num = int(top_label_indices[i]) cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), self.class_colors[class_num], 2) text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i]) text_top = (xmin, ymin-10) text_bot = (xmin + 80, ymin + 5) text_pos = (xmin + 5, ymin) cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1) cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) # Calculate FPS # This computes FPS for everything, not just the model's execution # which may or may not be what you want curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 # Draw FPS in top left corner cv2.rectangle(to_draw, (0,0), (50, 17), (255,255,255), -1) cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) cv2.imshow("SSD result", to_draw) cv2.waitKey(10)
def preprocess_input(x, data_format=None): return imagenet_utils.preprocess_input(x, data_format=data_format, mode='caffe')
#loop over the image paths for imagePath in imagePaths: #load the image via OpenCV so we can manipulate it after #classification orig = cv2.imread(imagePath) #laod the input image using the Keras helper utility while #ensuring the image is resized to 224x224 pixels image = load_img(imagePath, target_size = (224, 224)) image = img_to_array(image) #preprocess the image by (1) expanding the dimensions and (2) #subtracting the mean RGB pixel intensity fromt he Imagenet dataset image = np.expand_dims(image, axis = 0) image = imagenet_utils.preprocess_input(image) #pass the image through the network to obtain the feature vector features = vgg.predict(image) features = features.reshape((features.shape[0], 512 * 7 * 7)) #now that we have the CNN features, pass these through our #classifier to obtain the orientaion predictions angle = model.predict(features) angle = labelNames[angle[0]] #correction rotated = imutils.rotate_bound(orig, 360 - angle) #display cv2.imshow("Original", orig)
labels = [] # loop over the image paths for imagePath in imagePaths: # extract the class label from the filename, load the image, and # resize it to be a fixed 64x64 pixels, ignoring aspect ratio label = imagePath.split(os.path.sep)[-2] #image = load_img(imagePath, target_size=(224, 224)) # convert the image pixels to a numpy array #image = img_to_array(image) # reshape data for the model #image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) image = cv2.imread(imagePath,1) #image=cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = cv2.resize(image, (224, 224)) image= preprocess_input(image) # update the data and labels lists, respectively data.append(image) labels.append(label) data = np.array(data, dtype="float") print('loaded data') #print(len(data)) #print(len(labels)) # convert the data into a NumPy array, then preprocess it by scaling # all pixel intensities to the range [0, 1] #data = np.array(data, dtype="float") / 255.0