def segment(img): time_start = time.time() caffe.set_device(0) caffe.set_mode_gpu() im = modifySize(Image.open(img)) if (im.mode == "RGBA"): im = im.convert("RGB") in_ = np.array(im, dtype=np.float32) in_ = in_[:, :, ::-1] in_ -= np.array((104.00698793, 116.66876762, 122.67891434)) in_ = in_.transpose((2, 0, 1)) # load net #net = caffe.Net('Server/FCN/voc-fcn8s/deploy.prototxt', 'Server/FCN/voc-fcn8s/fcn8s-heavy-pascal.caffemodel', caffe.TEST) # shape for input (data blob is N x C x H x W), set data net.blobs['data'].reshape(1, *in_.shape) net.blobs['data'].data[...] = in_ # run net and take argmax for prediction net.forward() out = net.blobs['score'].data[0].argmax(axis=0) # visualize segmentation in PASCAL VOC colors voc_palette = vis.make_palette(21) out_im = Image.fromarray(vis.color_seg(out, voc_palette)) imgNames = img.split('.') output = os.path.join("./static/image", imgNames[0] + ".output.png") out_im.save(output) masked_im = Image.fromarray(vis.vis_seg(im, out, voc_palette)) visualization = os.path.join("./static/image", imgNames[0] + ".vis.jpg") masked_im.save(visualization) time_end = time.time() print('totally cost', time_end - time_start) return time_end - time_start
def predict_image(id): # load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe VOCopts_seg_imgsetpath = os.path.join(VOCopts['datadir'], VOCopts['dataset'], 'ImageSets/Segmentation/{}.txt'.format(VOCopts['testset'])) with open(VOCopts_seg_imgsetpath, 'r') as rf: gtids = [i.replace('\n', '') for i in rf.readlines()] num = 1 total_time = 0 for imname in gtids: start_time = datetime.datetime.now() if not imname: continue imgpath = os.path.join(VOCopts['datadir'], VOCopts['dataset'], 'JPEGImages/{}.jpg'.format(imname)) print '读取图片: {}'.format(imgpath) im = Image.open(imgpath).resize((1008,1100)) in_ = np.array(im, dtype=np.float32) in_ = in_[:, :, ::-1] in_ -= np.array((104.00698793, 116.66876762, 122.67891434)) in_ = in_.transpose((2, 0, 1)) # shape for input (data blob is N x C x H x W), set data net.blobs['data'].reshape(1, *in_.shape) net.blobs['data'].data[...] = in_ # np.save("infer.npy", np.array(net.blobs['data'].data)) # run net and take argmax foim = im.resize((500, 500))r prediction net.forward() out = net.blobs['score'].data[0].argmax(axis=0) print out.max() out_im = Image.fromarray(out.astype('uint8')) print out_im.mode end_time = datetime.datetime.now() process_time = end_time - start_time total_time += process_time.total_seconds() print '处理图片第{}张图片{} 耗时{}'.format(num, imname, process_time) print '总耗时{}, 平均耗时{}'.format(total_time, total_time / num) num += 1 save_path = os.path.join(VOCopts['resdir'], 'Segmentation/{}_{}_cls/'.format(id, VOCopts['testset'])) check_dir(save_path) resfile = os.path.join(VOCopts['resdir'], 'Segmentation/{}_{}_cls/{}.png'.format(id, VOCopts['testset'], imname)) out_im.putpalette(np.array(colormap).reshape(-1)) out_im.save(resfile) # visualize segmentation in PASCAL VOC colors voc_palette = vis.make_palette(21) out_palette_im = Image.fromarray(vis.color_seg(out, voc_palette)) check_dir('./valresult/') out_palette_im.save('./valresult/out_palette_{}.png'.format(imname)) masked_im = Image.fromarray(vis.vis_seg(im, out, voc_palette)) masked_im.save('./valresult/out_palette_{}_visualization.png'.format(imname)) print '存储结果图片: {}'.format(resfile)
def compute_hist(net, save_dir, dataset, layer='score', gt='label'): n_cl = net.blobs[layer].channels if save_dir: os.mkdir(save_dir) hist = np.zeros((n_cl, n_cl)) loss = 0 for idx in dataset: net.forward() hist += fast_hist(net.blobs[gt].data[0, 0].flatten(), net.blobs[layer].data[0].argmax(0).flatten(), n_cl) if save_dir: sat_path = 'data/roads/ROADS/CroppedImages/{}.png'.format(idx) sat = Image.open(sat_path) score = net.blobs["score"].data[...][0, :, :, :] score = score.transpose((1, 2, 0)) label = np.argmax(score, axis=2) vis_img = Image.fromarray( vis.vis_seg(sat, label, vis.make_palette(4))) vis_img.save(os.path.join(save_dir, idx + '.png')) #im = Image.fromarray(net.blobs[layer].data[0].argmax(0).astype(np.uint8), mode='P') #im.save(os.path.join(save_dir, idx + '.png')) # compute the loss as well loss += net.blobs['loss'].data.flat[0] return hist, loss / len(dataset)
def segment(img): # load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe print "start" im = modifySize(Image.open(img)) in_ = np.array(im, dtype=np.float32) in_ = in_[:, :, ::-1] in_ -= np.array((104.00698793, 116.66876762, 122.67891434)) in_ = in_.transpose((2, 0, 1)) net = caffe.Net('Server/FCN/voc-fcn8s/deploy.prototxt', 'Server/FCN/voc-fcn8s/fcn8s-heavy-pascal.caffemodel', caffe.TEST) time_start = time.time() # shape for input (data blob is N x C x H x W), set data net.blobs['data'].reshape(1, *in_.shape) net.blobs['data'].data[...] = in_ # run net and take argmax for prediction print "forward" net.forward() out = net.blobs['score'].data[0].argmax(axis=0) print "end forward" # visualize segmentation in PASCAL VOC colors voc_palette = vis.make_palette(21) out_im = Image.fromarray(vis.color_seg(out, voc_palette)) time_end = time.time() print('totally cost', time_end - time_start) imgNames = img.split('.') output = os.path.join("./static/image", imgNames[0] + "output.png") out_im.save(output) masked_im = Image.fromarray(vis.vis_seg(im, out, voc_palette)) visualization = output = os.path.join("./static/image", imgNames[0] + "visualization.jpg") masked_im.save(visualization)
def infer(input_image,input_mask,input_mask_thr,output_mask,output_overlay): # the demo image is "2007_000129" from PASCAL VOC # load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe image = Image.open(input_image) image = image.resize([128, 128], Image.ANTIALIAS) in_ = np.array(image, dtype=np.float32) in_ = in_[:,:,::-1] # in_ -= np.array((104.00698793,116.66876762,122.67891434)) in_ = in_.transpose((2,0,1)) label = Image.open(input_mask) label = label.resize([128, 128],Image.NEAREST) label = np.array(label,np.uint8) # label -= 1 # rotate labels so classes start at 0, void is 255 # label = label[np.newaxis, ...] label = (label > 200) * 1 from scipy.misc import imsave if input_mask_thr: imsave(input_mask_thr,label) # shape for input (data blob is N x C x H x W), set data net.blobs['data'].reshape(1, *in_.shape) net.blobs['data'].data[...] = in_ # run net and take argmax for prediction net.forward() out = net.blobs['output_sep'].data[0] Dtype = out.dtype im = Image.fromarray(out[0,:,:]) im = im.resize([6, 6], Image.NEAREST) im = np.array(im,Dtype) print im out = out.argmax(axis=0) im = Image.fromarray(out.astype(Dtype)) im = im.resize([6, 6], Image.NEAREST) im = np.array(im,Dtype) print im # visualize segmentation in PASCAL VOC colors voc_palette = vis.make_palette(2) out_im = Image.fromarray(vis.color_seg(out, voc_palette)) out_im.save(output_mask) masked_im = Image.fromarray(vis.vis_seg(image, out, voc_palette)) masked_im.save(output_overlay)
def callback(imgmsg): # print(imgmsg) bridge = CvBridge() img_ori = bridge.imgmsg_to_cv2(imgmsg, "bgr8") # Resize image [Image size is defined during training] img_resize = skimage.transform.resize(img_ori, IMAGE_DIM) * 255 # Convert RGB to BGR [skimage reads image in RGB, some networks may need BGR] image_t = img_resize[:, :, ::-1] # Mean subtraction & scaling [A common technique used to center the data] image_t = image_t.astype(numpy.float16) image_t = (image_t - numpy.float16(IMAGE_MEAN)) # -----------step4: get result------------------------------------------------- graph.queue_inference_with_fifo_elem(fifo_in, fifo_out, image_t, 'user object') # Get the results from NCS out, userobj = fifo_out.read_elem() # flatten ---> image out = out.reshape(-1, 2).T.reshape(2, 331, -1) out = out.argmax(axis=0) out = out[6:-5, 6:-5] # save result voc_palette = vis.make_palette(2) #out_im = Image.fromarray(vis.color_seg(out, voc_palette)) #iamge_name = IMAGE_PATH.split('/')[-1].rstrip('.jpg') # out_im.save('demo_test/' + iamge_name + '_ncs_' + '.png') # get masked image img_masked = Image.fromarray(vis.vis_seg(img_ori, out, voc_palette)) # # visualization img_ori = img_ori[:, :, ::-1] img_masked = img_masked[:, :, ::-1] cv2.imshow("in", img_ori) cv2.imshow("out", img_masked) cv2.waitKey(1)
def segmentation(path, current_painting): # the demo image is "2007_000129" from PASCAL VOC # load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe # im = Image.open('demo/image.jpg') # path = "demo/Trials/twice.jpg" im = Image.open(path) # reshape input layer from dimensions of image H x W reshapeInputLayer(im) delayPrint("Starting to segment the image: {}".format(current_painting), PRINT_SECONDS) in_ = np.array(im, dtype=np.float32) in_ = in_[:, :, ::-1] in_ -= np.array((104.00698793, 116.66876762, 122.67891434)) in_ = in_.transpose((2, 0, 1)) # Own code: # Set mode to GPU caffe.set_mode_cpu() # load net net = caffe.Net('voc-fcn8s/deploy.prototxt', 'voc-fcn8s/fcn8s-heavy-pascal.caffemodel', caffe.TEST) # shape for input (data blob is N x C x H x W), set data net.blobs['data'].reshape(1, *in_.shape) net.blobs['data'].data[...] = in_ # run net and take argmax for prediction net.forward() out = net.blobs['score'].data[0].argmax(axis=0) # visualize segmentation in PASCAL VOC colors voc_palette = vis.make_palette(21) out_im = Image.fromarray(vis.color_seg(out, voc_palette)) # out_im.save('demo/output.png') out_im.save('demo/output/output_%s.png' % (current_painting.split(".")[0])) masked_im = Image.fromarray(vis.vis_seg(im, out, voc_palette)) # print extracted colors of original image vis.extractColors(path) # masked_im.save('demo/visualization.jpg') masked_im.save('demo/output/output_%s.jpg' % (current_painting))
def callback(imgmsg): # print(imgmsg) bridge = CvBridge() img_ori = bridge.imgmsg_to_cv2(imgmsg, "bgr8") # Resize image [Image size is defined during training] img_resize = skimage.transform.resize(img_ori, IMAGE_DIM) * 255 # Convert RGB to BGR [skimage reads image in RGB, some networks may need BGR] image_t = img_resize[:, :, ::-1] # Mean subtraction & scaling [A common technique used to center the data] image_t = image_t.astype(numpy.float16) image_t = (image_t - numpy.float16(IMAGE_MEAN)) # -----------step4: get result------------------------------------------------- graph.LoadTensor(image_t, 'user object') # Get the results from NCS out = graph.GetResult()[0] # flatten ---> image out = out.reshape(-1, 2).T.reshape(2, 331, -1) out = out.argmax(axis=0) out = out[:-11, :-11] # save result voc_palette = vis.make_palette(2) # get masked image img_masked = vis.vis_seg(img_resize, out, voc_palette) # # visualization img_ori = img_ori[:, :, ::-1] img_masked = img_masked[:, :, ::-1] cv2.imshow("in", img_ori) cv2.imshow("out", img_masked) cv2.waitKey(1)
parser = argparse.ArgumentParser() parser.add_argument('--model', type=str, required=True) parser.add_argument('--weights', type=str, required=True) parser.add_argument('--imgs_txt', type=str, required=True) parser.add_argument('--save_dir', type=str, required=True) parser.add_argument('--num_classes', type=str, required=True) args = parser.parse_args() net = caffe.Net(args.model, args.weights, caffe.TEST) test_txt = np.loadtxt(args.imgs_txt, dtype=str) test_names = [os.path.basename(img_path) for img_path in test_txt[:, 1]] for img_name in test_names: net.forward() image = net.blobs['data'].data label = net.blobs['label'].data predicted = net.blobs['prob'].data image = np.squeeze(image[0, :, :, :]) output = np.squeeze(predicted[0, :, :, :]) ind = np.argmax(output, axis=0) image = np.transpose(image, (1, 2, 0)) image = image[:, :, (2, 1, 0)] vis_img = Image.fromarray( vis.vis_seg(image, ind, vis.make_palette(int(args.num_classes)))) vis_img.save(os.path.join(args.save_dir, img_name))
def predict(): # initialize the data dictionary that will be returned from the # view data = {"success": False} # ensure an image was properly uploaded to our endpoint if flask.request.method == "POST": if flask.request.files.get("image"): # read the image in PIL format image = flask.request.files["image"].read() image = Image.open(io.BytesIO(image)) # if necesary, we can perpare images ahead. # preprocess the image and prepare it for classification # image = prepare_image(image, target=(224, 224)) # image = np.matrix(image, dtype=np.int32) # make file "time.txt" and write the path of the image get from clinet file_time = open("time.txt", "w+") fileName = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S') image.save('./images/' + fileName + '.jpg') file_time.write(absPath + '/images/' + fileName + '.jpg') file_time.close() pred_hooks = None if FLAGS.debug: debug_hook = tf_debug.LocalCLIDebugHook() pred_hooks = [debug_hook] examples = dataset_util.read_examples_list(FLAGS.infer_data_list) image_files = [os.path.join(FLAGS.data_dir, filename) for filename in examples] with graph.as_default(): try: global model predictions = model.predict(input_fn=lambda: preprocessing.eval_input_fn(image_files), hooks=pred_hooks) except Exception as e: raise TypeError("bad input") from e output_dir = FLAGS.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) # print(image_files) for pred_dict, image_path in zip(predictions, image_files): image_basename = os.path.splitext(os.path.basename(image_path))[0] output_filename = image_basename + '.png' path_to_output = os.path.join(output_dir, output_filename) orginalImage = np.array(Image.open(image_path)) img = Image.fromarray(orginalImage) mask = pred_dict['decoded_labels'] mask = Image.fromarray(mask) mask = mask.convert('L') threshold = 10 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) mask = mask.point(table, '1') mask = np.matrix(mask, dtype=np.int32) voc_palette = vis.make_palette(2) out_im = Image.fromarray(vis.color_seg(mask, voc_palette)) (shotname, extension) = os.path.splitext(image_path) # mask images # out_im.save(shotname+'out.png') masked_im = Image.fromarray(vis.vis_seg(img, mask, voc_palette)) # get vis in images/ # masked_im.save(shotname+'vis.png') print("generating:", path_to_output) masked_im.save(path_to_output) # results = imagenet_utils.decode_predictions(preds) data["predictions"] = [] # loop over the results and add them to the list of # returned predictions # for (imagenetID, label, prob) in 1: r = {"label": "apple", "probability": float(0.999)} data["predictions"].append(r) # indicate that the request was a success data["success"] = True # return the data dictionary as a JSON response return flask.jsonify(data)
def load_image_label(self, img_path, label_path): white_img = cv2.imread(img_path) label = cv2.imread(label_path)[..., 0] if self.testing: plt.figure() plt.imshow(white_img[..., ::-1]) plt.title("White Image") flip_op = np.random.choice([-1, 0, 1]) white_img = cv2.flip(white_img, flip_op) label = cv2.flip(label, flip_op) if self.testing: vis_img = vis.vis_seg(white_img.astype(np.uint8), label, vis.make_palette(20)) plt.figure() plt.imshow(vis_img[..., ::-1]) plt.title("Flipped {} Image".format(flip_op)) if np.random.rand() < 0.5: white_img, top_border = trans.random_crop(white_img, self.crop_size) label = trans.crop(label, top_border, self.crop_size) if self.testing: vis_img = vis.vis_seg(white_img.astype(np.uint8), label, vis.make_palette(20)) plt.figure() plt.imshow(vis_img[..., ::-1]) plt.title("Cropped Image") else: angle = self.sigma * np.random.randn() white_img = trans.rotate_img(white_img, angle, self.crop_size) label = trans.rotate_img(label, angle, self.crop_size, is_mask=True) if self.testing: label_vis = label.copy() label_vis[label_vis == 255] = 5 vis_img = vis.vis_seg(white_img.astype(np.uint8), label_vis, vis.make_palette(20)) plt.figure() plt.imshow(vis_img[..., ::-1]) plt.title("Rotated Image_{}".format(int(angle))) illuminant = self.sample_illuminant() img = illu.apply_illuminant(white_img, illuminant) if self.testing: plt.figure() plt.imshow(img[..., ::-1]) plt.title("New Image") plt.show() print img = img.astype(float) img -= self.mean img = img.transpose((2, 0, 1)) label = label[np.newaxis, ...] return img, label
def main(unused_argv): # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' pred_hooks = None if FLAGS.debug: debug_hook = tf_debug.LocalCLIDebugHook() pred_hooks = [debug_hook] model = tf.estimator.Estimator( model_fn=deeplab_model.deeplabv3_plus_model_fn, model_dir=FLAGS.model_dir, params={ 'output_stride': FLAGS.output_stride, 'batch_size': 1, # Batch size must be 1 because the images' size may differ 'base_architecture': FLAGS.base_architecture, 'pre_trained_model': None, 'batch_norm_decay': None, 'num_classes': _NUM_CLASSES, }) examples = dataset_util.read_examples_list(FLAGS.infer_data_list) image_files = [ os.path.join(FLAGS.data_dir, filename) for filename in examples ] predictions = model.predict( input_fn=lambda: preprocessing.eval_input_fn(image_files), hooks=pred_hooks) output_dir = FLAGS.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) for pred_dict, image_path in zip(predictions, image_files): image_basename = os.path.splitext(os.path.basename(image_path))[0] output_filename = image_basename + '.png' path_to_output = os.path.join(output_dir, output_filename) orginalImage = np.array(Image.open(image_path)) img = Image.fromarray(orginalImage) print("generating:", path_to_output) start = time.clock() mask = pred_dict['decoded_labels'] end = time.clock() print('running time %s' % (end - start)) print(mask.size) mask = Image.fromarray(mask) mask = mask.convert('L') threshold = 10 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) mask = mask.point(table, '1') mask = np.matrix(mask, dtype=np.int32) print(mask) voc_palette = vis.make_palette(2) out_im = Image.fromarray(vis.color_seg(mask, voc_palette)) (shotname, extension) = os.path.splitext(image_path) out_im.save(path_to_output) masked_im = Image.fromarray(vis.vis_seg(img, mask, voc_palette)) masked_im.save(shotname + 'vis.jpg') plt.axis('off') plt.imshow(masked_im) plt.savefig(path_to_output, bbox_inches='tight') plt.show()
import numpy as np from PIL import Image import caffe import vis # the demo image is "2007_000129" from PASCAL VOC # load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe im = Image.open('demo/image.jpg') in_ = np.array(im, dtype=np.float32) in_ = in_[:,:,::-1] in_ -= np.array((104.00698793,116.66876762,122.67891434)) in_ = in_.transpose((2,0,1)) # load net net = caffe.Net('fcn-544/deploy.prototxt', 'fcn-544/snapshot/train_iter_100000.caffemodel', caffe.TEST) # shape for input (data blob is N x C x H x W), set data net.blobs['data'].reshape(1, *in_.shape) net.blobs['data'].data[...] = in_ # run net and take argmax for prediction net.forward() out = net.blobs['score'].data[0].argmax(axis=0) # visualize segmentation in PASCAL VOC colors voc_palette = vis.make_palette(21) out_im = Image.fromarray(vis.color_seg(out, voc_palette)) out_im.save('demo/test7_output.png') masked_im = Image.fromarray(vis.vis_seg(im, out, voc_palette)) masked_im.save('demo/test7_visualization.jpg')
def main(args): '''main ''' detection = CaffeDetection(args.gpu_id, args.model_def, args.model_weights, args.image_resize, args.labelmap_file) img_dir = args.image_file img_path = '/media/data/seg_dataset/fbox/JPEGImages' dst_path = '/home/yaok/software/caffe_ssd/result/box/mask_96/union' mask_path = '/media/data/seg_dataset/fbox/SemanticLabels' xml_path = '/media/data/seg_dataset/fbox/Annotations' font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 1 fontColor = (0,0,255) lineType = 2 n_cls = 8 voc_palette = vis.make_palette(n_cls) # metrics seg_miou, seg_recall, seg_precision = [], [], [] with open(img_dir) as f: img_list = f.readlines() for img_name in img_list: # img_name = 'IMG_20180305_120305' img_name = img_name.strip() + '.png' mask_name = img_name.strip()[0:-4] + '_seg.png' gt_mask_name = img_name.strip()[0:-4] + '.bmp' xml_name = img_name.strip()[0:-4] + '.xml' print(img_name) full_name = os.path.join(img_path, img_name) gt_mask_path = os.path.join(mask_path, gt_mask_name) xml_full_path = os.path.join(xml_path, xml_name) result, seg_mask = detection.detect(full_name) # mask_rect = detection.seg2box_multiclass(mask) mask_h, mask_w = seg_mask.shape det_mask = np.zeros((mask_h, mask_w, 3)) gt_mask = cv2.imread(gt_mask_path, 0) gt_mask = cv2.resize(gt_mask, (mask_h, mask_w)) for item in result: xmin = int(round(item[0] * mask_w)) ymin = int(round(item[1] * mask_h)) xmax = int(round(item[2] * mask_w)) ymax = int(round(item[3] * mask_h)) pt_x = int((xmax + xmin) / 2) pt_y = int((ymax + ymin) / 2) cls_ind = categories[item[-1]] det_mask = cv2.rectangle(det_mask, (xmin, ymin), (xmax, ymax), (cls_ind,cls_ind,cls_ind), -1) img = cv2.imread(full_name) h, w, _ = img.shape det_mask_gray = det_mask[:,:,0].astype(np.uint8) seg_mask_onehot = detection.one_hot_transform(seg_mask) seg_mask_onehot.transpose(1,2,0) det_mask_onehot = detection.one_hot_transform(det_mask_gray) det_mask_onehot.transpose(1,2,0) assert seg_mask_onehot.shape == det_mask_onehot.shape union = np.zeros(seg_mask.shape).astype(np.uint8) for i in range(1, n_cls): union_tmp = cv2.bitwise_or(det_mask_onehot[i-1], seg_mask_onehot[i-1]) union[np.where(union_tmp == 1)] = i miou_seg, rec_seg, prec_seg = seg_scores(gt_mask, union, n_cls) seg_miou.append(miou_seg) seg_recall.append(rec_seg) seg_precision.append(prec_seg) print("segmentation metrics: miou: ", miou_seg, " recall: ", \ rec_seg, " precision: ", prec_seg) resize_mask = cv2.resize(union, (h, w), interpolation=cv2.INTER_NEAREST) out_im = vis.vis_seg(img, resize_mask, voc_palette) dst_mask_name = os.path.join(dst_path, mask_name) cv2.imwrite(dst_mask_name, out_im) print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<") print("Final Segmentaiton: miou: ", np.array(seg_miou).sum()/len(seg_miou), " recall: ", np.array(seg_recall).sum()/len(seg_recall), " precision: ", np.array(seg_precision).sum()/len(seg_precision)) # print("Final deteciton: miou: ", np.array(det_miou).sum()/len(det_miou), # " recall: ", np.array(det_recall).sum()/len(det_recall), # " precision: ", np.array(det_precision).sum()/len(det_precision)) print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
K.clear_session() # Clear previous models from memory. model = fcn8s_resnet(height=cfg.height, width=cfg.width) weights_path = os.path.abspath(args.weights) model.load_weights(weights_path, by_name=True) with open('../data/VOC2007/ImageSets/Segmentation/minival.txt') as f: ims = [x.strip() + '.jpg' for x in f.readlines()][:10] t = timer.now() segs = [] for i in range(len(ims)): img_path = '../data/VOC2007/JPEGImages/' + ims[i] segs.append(pred(model, img_path)) delta = timer.now() - t print(len(ims), 'images cost: ', delta) print('average cost: ', delta / len(ims)) with open('segs.pkl', 'wb') as f: pickle.dump(segs, f) # visualize segmentation in PASCAL VOC colors voc_palette = make_palette(21) for i in range(len(ims)): img_path = '../data/VOC2007/JPEGImages/' + ims[i] im = Image.open(img_path) im = np.array(im, np.uint8) im = cv2.resize(im, (cfg.width, cfg.height)) out_im = Image.fromarray(color_seg(segs[i], voc_palette)) out_im.save('results/{}_output.png'.format(ims[i][:-4])) masked_im = Image.fromarray(vis_seg(im, segs[i], voc_palette)) masked_im.save('results/' + ims[i])
def main(args): '''main ''' detection = CaffeDetection(args.gpu_id, args.model_def, args.model_weights, args.image_resize, args.labelmap_file) img_dir = args.image_file img_path = '/media/data/seg_dataset/fbox/JPEGImages' dst_path = '/home/yaok/software/caffe_ssd/result/box/mask_96/threshold_8e3' mask_path = '/media/data/seg_dataset/fbox/SemanticLabels' xml_path = '/media/data/seg_dataset/fbox/Annotations' font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 1 fontColor = (0,0,255) lineType = 2 n_cls = 8 voc_palette = vis.make_palette(n_cls) gamma = 0.008 voc_palette[-1] = np.array([255, 128, 255]) # metrics seg_miou, seg_recall, seg_precision = [], [], [] with open(img_dir) as f: img_list = f.readlines() for img_name in img_list: startTm = datetime.now() # img_name = 'IMG_20180305_120305' img_name = img_name.strip() + '.png' mask_name = img_name.strip()[0:-4] + '_seg.png' gt_mask_name = img_name.strip()[0:-4] + '.bmp' xml_name = img_name.strip()[0:-4] + '.xml' print(img_name) full_name = os.path.join(img_path, img_name) gt_mask_path = os.path.join(mask_path, gt_mask_name) xml_full_path = os.path.join(xml_path, xml_name) result, seg_mask = detection.detect(full_name) # mask_rect = detection.seg2box_multiclass(mask) mask_h, mask_w = seg_mask.shape # det_mask = np.zeros((mask_h, mask_w, 3)) gt_mask = cv2.imread(gt_mask_path, 0) gt_h, gt_w = gt_mask.shape # gt_mask = cv2.resize(gt_mask, (mask_h, mask_w), cv2.INTER_NEAREST) # gt_mask = gt_mask[:, :, 0] # cv2.cvtColor(gt_mask, cv2.COLOR_RGB2GRAY) img = cv2.imread(full_name) h, w, _ = img.shape thresh_inter_tmp = seg_inter_threshold(detection, result, seg_mask, n_cls, gamma) thresh_inter_pil = Image.fromarray(thresh_inter_tmp) thresh_inter_pil = thresh_inter_pil.resize((h, w)) thresh_inter = np.array(thresh_inter_pil) miou_seg, rec_seg, prec_seg = seg_scores(gt_mask, thresh_inter, n_cls) seg_miou.append(miou_seg) seg_recall.append(rec_seg) seg_precision.append(prec_seg) print("segmentation metrics: miou: ", miou_seg, " recall: ", \ rec_seg, " precision: ", prec_seg) resize_mask = cv2.resize(thresh_inter, (h, w), interpolation=cv2.INTER_NEAREST) out_im = vis.vis_seg(img, resize_mask, voc_palette) dst_mask_name = os.path.join(dst_path, mask_name) cv2.imwrite(dst_mask_name, out_im) endTm = datetime.now() tm = endTm - startTm print("processing one image needs to take ", tm.seconds, "s ", tm.microseconds/1000, "ms") print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<") print("Final Segmentaiton: miou: ", np.array(seg_miou).sum()/len(seg_miou), " recall: ", np.array(seg_recall).sum()/len(seg_recall), " precision: ", np.array(seg_precision).sum()/len(seg_precision)) # print("Final deteciton: miou: ", np.array(det_miou).sum()/len(det_miou), # " recall: ", np.array(det_recall).sum()/len(det_recall), # " precision: ", np.array(det_precision).sum()/len(det_precision)) print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
from PIL import Image import sys import numpy as np import os from pathlib import Path import shutil import vis import pdb png_images_dir = sys.argv[1] segmentation_class_raw_dir = sys.argv[2] save_dir = sys.argv[3] if os.path.exists(save_dir): shutil.rmtree(save_dir, ignore_errors=True) os.makedirs(save_dir) p = Path(segmentation_class_raw_dir) #pdb.set_trace() for glob in p.glob("*.png"): img_name = glob.parts[-1] #pdb.set_trace() sat = np.array(Image.open(png_images_dir + img_name)) label = np.array(Image.open(segmentation_class_raw_dir + img_name)) #pdb.set_trace() print img_name vis_img = Image.fromarray(vis.vis_seg(sat, label, vis.make_palette(3))) vis_img.save(os.path.join(save_dir, img_name))
import numpy as np from PIL import Image import caffe import vis # the demo image is "2007_000129" from PASCAL VOC # load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe im = Image.open('demo/image.jpg') in_ = np.array(im, dtype=np.float32) in_ = in_[:,:,::-1] in_ -= np.array((104.00698793,116.66876762,122.67891434)) in_ = in_.transpose((2,0,1)) # load net net = caffe.Net('voc-fcn8s/deploy.prototxt', 'voc-fcn8s/fcn8s-heavy-pascal.caffemodel', caffe.TEST) # shape for input (data blob is N x C x H x W), set data net.blobs['data'].reshape(1, *in_.shape) net.blobs['data'].data[...] = in_ # run net and take argmax for prediction net.forward() out = net.blobs['score'].data[0].argmax(axis=0) # visualize segmentation in PASCAL VOC colors voc_palette = vis.make_palette(21) out_im = Image.fromarray(vis.color_seg(out, voc_palette)) out_im.save('demo/output.png') masked_im = Image.fromarray(vis.vis_seg(im, out, voc_palette)) masked_im.save('demo/visualization.jpg')
label = cv2.resize(label, (self.width, self.height), interpolation=cv2.INTER_NEAREST) # label = np.expand_dims(label, 0) return label def data_generation(self, inds): inputs, targets = [], [] for idx in inds: inputs.append(self.load_image(idx)) targets.append(self.load_label(idx)) inputs = np.array(inputs) targets = np.array(targets) return inputs, targets if __name__ == '__main__': import vis palette = vis.make_palette(21) g = DataGenerator(320, 320, split='train', batch_size=1, shuffle=False) for i in range(20): image, label = g[i] image = np.array(image[0] + cfg.mean, 'uint8') label = label[0] label[np.where(label==255)] = 0 out_im = Image.fromarray(vis.color_seg(label, palette)) out_im.save('check_data/{}.png'.format(i)) masked_im = vis.vis_seg(image, label, palette) cv2.imwrite('check_data/{}.jpg'.format(i), masked_im)
def main(args): '''main ''' detection = CaffeDetection(args.gpu_id, args.model_def, args.model_weights, args.image_resize, args.labelmap_file) img_dir = args.image_file img_path = '/media/data/seg_dataset/corrosion/JPEGImages' dst_path = '/home/yaok/software/caffe_ssd/result/corrosion/' font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 1 fontColor = (0, 0, 255) lineType = 2 voc_palette = vis.make_palette(8) with open(img_dir) as f: img_list = f.readlines() for img_name in img_list: # img_name = 'IMG_20180305_120305' img_name = img_name.strip() + '.jpg' mask_name = img_name.strip()[0:-4] + '_seg.png' print(img_name) full_name = os.path.join(img_path, img_name) result, mask = detection.detect(full_name) mask_rect = detection.seg2box(mask) mask_h, mask_w = mask.shape print(result) img = cv2.imread(full_name) h, w, _ = img.shape img_mask = img.copy() mask_name = os.path.join(dst_path, mask_name) print(mask.shape, h, w) # cv2.resize(mask.astype(np.float32), (h, w), interpolation=cv2.INTER_CUBIC).astype(np.int32) img_mask = cv2.resize(img, (mask_h, mask_w), interpolation=cv2.INTER_CUBIC) out_im = vis.vis_seg(img_mask, mask, voc_palette) out_im = cv2.resize(out_im, (h, w), interpolation=cv2.INTER_CUBIC) cv2.imwrite(mask_name, out_im) # out_im.save(mask_name) # img = cv2.resize(out_im, (h, w), interpolation = cv2.INTER_CUBIC) det_bbox = [] for item in result: xmin = int(round(item[0] * w)) ymin = int(round(item[1] * h)) xmax = int(round(item[2] * w)) ymax = int(round(item[3] * h)) det_bbox.append([xmin, ymin, xmax, ymax]) img = cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) pt_x = int((xmax + xmin) / 2) pt_y = int((ymax + ymin) / 2) cv2.putText(img, item[-1] + str(item[-2]), (pt_x, pt_y), font, fontScale, fontColor, lineType) det_bbox = np.array(det_bbox) print("det_bbox: ", det_bbox.shape, len(det_bbox)) for box in mask_rect: # print(box) xmin = int(round(box[0] * w)) ymin = int(round(box[1] * h)) xmax = int(round(box[2] * w)) ymax = int(round(box[3] * h)) box_w = xmax - xmin box_h = ymax - ymin area = box_w * box_h if len(det_bbox) > 0: rect = np.array([xmin, ymin, xmax, ymax]) ixmin = np.maximum(det_bbox[:, 0], rect[0]) iymin = np.maximum(det_bbox[:, 1], rect[1]) ixmax = np.minimum(det_bbox[:, 2], rect[2]) iymax = np.minimum(det_bbox[:, 3], rect[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih uni = ((rect[2] - rect[0] + 1.) * (rect[3] - rect[1] + 1.) + (det_bbox[:, 2] - det_bbox[:, 0] + 1.) * (det_bbox[:, 3] - det_bbox[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) if ovmax == 0 and area > 300: img = cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) else: if area > 300: img = cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) dst_name = os.path.join(dst_path, img_name) cv2.imwrite(dst_name, img)
def main(args): '''main ''' detection = CaffeDetection(args.gpu_id, args.model_def, args.model_weights, args.image_resize, args.labelmap_file) img_dir = args.image_file img_path = '/media/data/seg_dataset/fbox/JPEGImages' dst_path = '/home/yaok/software/caffe_ssd/result/box/mask_96' mask_path = '/media/data/seg_dataset/fbox/SemanticLabels' xml_path = '/media/data/seg_dataset/fbox/Annotations' font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 1 fontColor = (0,0,255) lineType = 2 n_cls = 8 voc_palette = vis.make_palette(n_cls) # metrics seg_miou, seg_recall, seg_precision = [], [], [] det_miou, det_recall, det_precision = [], [], [] with open(img_dir) as f: img_list = f.readlines() for img_name in img_list: # img_name = 'IMG_20180305_120305' img_name = img_name.strip() + '.png' mask_name = img_name.strip()[0:-4] + '_seg.png' gt_mask_name = img_name.strip()[0:-4] + '.bmp' xml_name = img_name.strip()[0:-4] + '.xml' print(img_name) full_name = os.path.join(img_path, img_name) gt_mask_path = os.path.join(mask_path, gt_mask_name) xml_full_path = os.path.join(xml_path, xml_name) result, mask = detection.detect(full_name) mask_rect = detection.seg2box_multiclass(mask) mask_h, mask_w = mask.shape img = cv2.imread(full_name) h, w, _ = img.shape gt_mask = cv2.imread(gt_mask_path, 0) gt_mask = cv2.resize(gt_mask, (mask_h, mask_w)) # img_mask = img.copy() mask_name = os.path.join(dst_path, mask_name) # cv2.resize(mask.astype(np.float32), (h, w), interpolation=cv2.INTER_CUBIC).astype(np.int32) # img_mask = cv2.resize(img, (mask_h, mask_w), interpolation=cv2.INTER_CUBIC) # out_im = vis.vis_seg(img_mask, mask, voc_palette) # out_im = cv2.resize(out_im, (h, w), interpolation=cv2.INTER_CUBIC) # cv2.imwrite(mask_name, out_im) # out_im.save(mask_name) # img = cv2.resize(out_im, (h, w), interpolation = cv2.INTER_CUBIC) resize_mask = mask.copy() img_mask = img.copy() print("seg result: ", gt_mask.shape, mask.shape) miou_seg, rec_seg, prec_seg = seg_scores(gt_mask, mask, n_cls) seg_miou.append(miou_seg) seg_recall.append(rec_seg) seg_precision.append(prec_seg) print("segmentation metrics: miou: ", miou_seg, " recall: ", \ rec_seg, " precision: ", prec_seg) resize_mask = cv2.resize(mask, (h, w), interpolation=cv2.INTER_NEAREST) out_im = vis.vis_seg(img_mask, resize_mask, voc_palette) cv2.imwrite(mask_name, out_im) for item in result: xmin = int(round(item[0] * w)) ymin = int(round(item[1] * h)) xmax = int(round(item[2] * w)) ymax = int(round(item[3] * h)) img = cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0,255,0), 2) pt_x = int((xmax + xmin) / 2) pt_y = int((ymax + ymin) / 2) cv2.putText(img,item[-1] + str(item[-2]), (pt_x, pt_y), font, fontScale, fontColor, lineType) # print("bbox: ", xmin, ymin, xmax, ymax, str(item[-2]), str(item[-1])) dst_name = os.path.join(dst_path, img_name) result = np.array(result) if len(result) == 0: det_miou.append(0) det_recall.append(0) det_precision.append(0) print("detection metrics: miou: ", 0, " recall: ", \ 0, " precision: ", 0) else: miou_per_img, rec_per_img, prec_per_img = voc_eval_per_img(result, xml_full_path, h, w) print("detection metrics: miou: ", miou_per_img, " recall: ", \ rec_per_img, " precision: ", prec_per_img) det_miou.append(miou_per_img) det_recall.append(rec_per_img) det_precision.append(prec_per_img) # print("deteciton metrics: miou: ", np.array(miou_per_img).sum()/(len(miou_per_img)), # " recall: ", np.array(rec_per_img).sum()/(len(rec_per_img)), # " precision: ", np.array(prec_per_img).sum()/(len(prec_per_img))) # for i in range(len(mask_rect)): # (mask_rect.shape[0]): # for box in mask_rect[i]: # xmin = int(round(box[0] * w)) # ymin = int(round(box[1] * h)) # xmax = int(round(box[2] * w)) # ymax = int(round(box[3] * h)) # label = box[-1] # # print("seg label: ", label) # box_w = xmax - xmin # box_h = ymax - ymin # area = box_w * box_h # img = cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255,0,0), 2) # pt_x = int((xmax + xmin) / 2) # pt_y = int((ymax + ymin) / 2) # cv2.putText(img, label, (pt_x, pt_y), font, fontScale, fontColor, lineType) # """ # result_tmp = result[np.where(result[:, 4][0].astype(np.uint8))] # if len(result_tmp) > 0: # rect = np.array([xmin, ymin, xmax, ymax]) # ixmin = np.maximum(result_tmp[:, 0].astype(np.float32), rect[0]) # iymin = np.maximum(result_tmp[:, 1].astype(np.float32), rect[1]) # ixmax = np.minimum(result_tmp[:, 2].astype(np.float32), rect[2]) # iymax = np.minimum(result_tmp[:, 3].astype(np.float32), rect[3]) # iw = np.maximum(ixmax - ixmin + 1., 0.) # ih = np.maximum(iymax - iymin + 1., 0.) # inters = iw * ih # uni = ((rect[2] - rect[0] + 1.) * (rect[3] - rect[1] + 1.) + # (result_tmp[:, 2].astype(np.float32) - result_tmp[:, 0].astype(np.float32) + 1.) * # (result_tmp[:, 3].astype(np.float32) - result_tmp[:, 1].astype(np.float32) + 1.) - inters) # overlaps = inters / uni # ovmax = np.max(overlaps) # if ovmax < 0.5 and area > 300: # img = cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255,0,0), 2) # else: # if area > 300: # img = cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255,0,0), 2) # """ cv2.imwrite(dst_name, img) print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<") print("Final Segmentaiton: miou: ", np.array(seg_miou).sum()/len(seg_miou), " recall: ", np.array(seg_recall).sum()/len(seg_recall), " precision: ", np.array(seg_precision).sum()/len(seg_precision)) print("Final deteciton: miou: ", np.array(det_miou).sum()/len(det_miou), " recall: ", np.array(det_recall).sum()/len(det_recall), " precision: ", np.array(det_precision).sum()/len(det_precision)) print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
cropped_img = img.crop((0, int(t), 256, int(b))) new_name = '{}:{}.png'.format(os.path.splitext(img_name)[0], index) with open(tmp_dir + 'test.txt', 'a') as txt: txt.write(tmp_dir + new_name + ' ' + tmp_dir + new_name + '\n') cropped_img.save(tmp_dir + new_name) num_crops += 1 net = caffe.Net(args.model, args.weights, caffe.TEST) masks = [] for i in range(0, num_crops): net.forward() predicted = net.blobs['prob'].data output = np.squeeze(predicted[0, :, :, :]) ind = np.argmax(output, axis=0) masks.append(ind) #pdb.set_trace() total_mask = np.vstack(tuple(masks)) img_cropped = np.array(img.crop((0, top[0], 256, bottom[-1]))) vis_img = Image.fromarray( vis.vis_seg(img_cropped, total_mask, vis.make_palette(4))) vis_img.save(os.path.join(blended_dir + img_name)) shutil.rmtree(tmp_dir, ignore_errors=True) os.makedirs(tmp_dir) os.mknod(tmp_dir + 'test.txt')