def augument(data_path, label, image_name, save_path, size=224, training = True): image_path = os.path.join(data_path, image_name) (name, extension) = splitfilename(image_name) extension = extension.lower() if extension not in IMG_EXTS: print('filered image: %s' % image_name) return try: img = image.imdecode(open(image_path, 'rb').read()).astype('float32') except Exception as ex: print("error: ", ex) return if label is not None: label_path = os.path.join(save_path, label) else: label_path = save_path mkdir(label_path) if training: aug1 = image.HorizontalFlipAug(0.5) aug2 = image.HorizontalFlipAug(.5) img = image.resize_short(img, size=384, interp=2) center_crop, _ = image.center_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "0", extension) cv.imwrite(os.path.join(label_path, new_name), center_crop.asnumpy()) random_crop, _ = image.random_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "1", extension) cv.imwrite(os.path.join(label_path, new_name), random_crop.asnumpy()) random_crop, _ = image.random_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "2", extension) cv.imwrite(os.path.join(label_path, new_name), random_crop.asnumpy()) random_crop, _ = image.random_crop(img, size=(size, size)) new_name = "%s_%s%s" % (name, "3", extension) cv.imwrite(os.path.join(label_path, new_name), random_crop.asnumpy()) img_aug1 = aug1(random_crop).clip(0,255) new_name = "%s_%s%s" % (name, "4", extension) cv.imwrite(os.path.join(label_path, new_name), img_aug1.asnumpy()) img_aug2 = aug2(center_crop).clip(0, 255) new_name = "%s_%s%s" % (name, "5", extension) cv.imwrite(os.path.join(label_path, new_name), img_aug2.asnumpy()) img_resize = image.imresize(img, w=size, h=size, interp=2) new_name = "%s_%s%s" % (name, "6", extension) cv.imwrite(os.path.join(label_path, new_name), img_resize.asnumpy()) else: img = image.resize_short(img, size=size) img, _ = image.center_crop(img, size=(size, size)) new_name = "%s%s" % (name, extension) cv.imwrite(os.path.join(label_path, new_name), img.asnumpy())
def predict_with_models_from_gluon_model_zoo_example(): # Gluon model zoo provides multiple pre-trained powerful models. # We can download and load a pre-trained ResNet-50 V2 model that was trained on the ImageNet dataset. net = models.resnet50_v2(pretrained=True) # Download and load the text labels for each class. url = 'http://data.mxnet.io/models/imagenet/synset.txt' fname = download(url) with open(fname, 'r') as f: text_labels = [' '.join(l.split()[1:]) for l in f] # Randomly pick a dog image from Wikipedia as a test image, download and read it. url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/b/b5/Golden_Retriever_medium-to-light-coat.jpg/365px-Golden_Retriever_medium-to-light-coat.jpg' fname = download(url) x = image.imread(fname) # Use the image processing functions provided in the MXNet image module. x = image.resize_short(x, 256) x, _ = image.center_crop(x, (224, 224)) plt.imshow(x.asnumpy()) plt.show() def transform(data): data = data.transpose((2, 0, 1)).expand_dims(axis=0) rgb_mean = nd.array([0.485, 0.456, 0.406]).reshape((1, 3, 1, 1)) rgb_std = nd.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) return (data.astype('float32') / 255 - rgb_mean) / rgb_std prob = net(transform(x)).softmax() idx = prob.topk(k=5)[0] for i in idx: i = int(i.asscalar()) print('With prob = %.5f, it contains %s' % (prob[0, i].asscalar(), text_labels[i]))
def transform_predict(im): im = im.astype('float32') / 255 im = image.resize_short(im, 324) im = nd.transpose(im, (2, 0, 1)) im = mx.nd.image.normalize(im, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) im = ten_crop(im, (299, 299)) return (im)
def transform_cropped_img(im): im = im.astype('float32') / 255 im = image.resize_short(im, 256) im = nd.transpose(im, (2,0,1)) im = mx.nd.image.normalize(im, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) im = ten_crop(im, (224, 224)) return (im)
def transform_val(data, label): im = data.astype('float32') / 255 im = image.resize_short(im, 256) im, _ = image.center_crop(im, (224, 224)) im = nd.transpose(im, (2,0,1)) im = mx.nd.image.normalize(im, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) return (im, nd.array([label]).asscalar())
def transform_val(data, label): im = data.astype('float32') / 255 im = image.resize_short(im, 256) #对数据按照短边进行crop为256*256 im, _ = image.center_crop(im, (224, 224)) #对数据进行中心裁剪为224*224 im = nd.transpose(im, (2,0,1)) # im = mx.nd.image.normalize(im, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))#归一化操作 return (im, nd.array([label]).asscalar()) #返回图像和标签
def main(): net = models.resnet50_v2(pretrained=True) url = 'http://data.mxnet.io/models/imagenet/synset.txt' fname = download(url) with open(fname, 'r') as f: text_labels = [' '.join(l.split()[1:]) for l in f] url2 = 'https://upload.wikimedia.org/wikipedia/commons/thumb/b/b5/\ Golden_Retriever_medium-to-light-coat.jpg/\ 365px-Golden_Retriever_medium-to-light-coat.jpg' fname2 = download(url2) x = image.imread(fname2) x = image.resize_short(x, 256) x, _ = image.center_crop(x, (224, 224)) plt.imshow(x.asnumpy()) plt.show() prob = net(transform(x)).softmax() idx = prob.topk(k=5)[0] for i in idx: i = int(i.asscalar()) print('With prob = %.5f, it contains %s' % (prob[0, i].asscalar(), text_labels[i]))
def transform_image(img_path): img = image.imread(img_path) data = image.resize_short(img, 256) data, _ = image.center_crop(data, (224, 224)) data = data.transpose((2, 0, 1)).expand_dims(axis=0) rgb_mean = nd.array([0.485, 0.456, 0.406]).reshape((1, 3, 1, 1)) rgb_std = nd.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) data = (data.astype("float32") / 255 - rgb_mean) / rgb_std return data
def transform_predict(im): im = im.astype('float32') / 255 im = image.resize_short(im, 256) im = nd.transpose(im, (2, 0, 1)) # im = mx.nd.image.normalize(im, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) # im = mx.image.color_normalize(im, mean=nd.array([[[0.485]], [[0.456]], [[0.406]]]), std=nd.array([[[0.229]], [[0.224]], [[0.225]]])) im = mx.image.color_normalize(im, mean=0.485, std=0.229) im = ten_crop(im, (224, 224)) return (im)
def transform_predict(im, size): im = im.astype('float32') / 255 im = image.resize_short(im, size, interp=1) # im = image.resize_short(im, 331) im = nd.transpose(im, (2,0,1)) im = mx.nd.image.normalize(im, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) # im = forty_crop(im, (352, 352)) im = ten_crop(im, (448, 448)) return (im)
def __getitem__(self, idx): raw_line = self.raw_label[idx] img_path, bbox = raw_line['img_path'], raw_line['bbox'] raw_image = image.imread(img_path) raw_image = image.resize_short(raw_image, BASE_SHAPE) if self.dataset_type == 'train': raw_image = image.HorizontalFlipAug(0.5)(raw_image) raw_image = normalize_image(raw_image) data = raw_image.transpose((2,0,1)) return data, nd.array([raw_line['argmax_index_label']]), nd.array(raw_line['hinge_label'])
def get_img(self): img_path = raw_img_path.as_posix() raw_image = image.imread(img_path) raw_mask = nd.zeros((raw_image.shape[0], raw_image.shape[1], 1)).astype(np.uint8) concated_data = nd.zeros((raw_image.shape[0], raw_image.shape[1], 4)) raw_mask[bbox[0]:width, bbox[1]:height] = 255 mask_raw_img = nd.concat(raw_image, raw_mask, dim=2) norm_mask_raw_img = normalize_image(mask_raw_img) resized_norm_mask_raw_img = image.resize_short(norm_mask_raw_img, BASE_SHAPE) data = resized_norm_mask_raw_img.transpose((2,0,1))
def load_image(img_path, long_side_length): x = image.imread(img_path) x = image.resize_short(x, long_side_length) x, _ = image.center_crop(x, (448, 448)) x = x.astype('float32') x = x / 255 x = image.color_normalize(x, mean=nd.array([0.485, 0.456, 0.406]), std=nd.array([0.229, 0.224, 0.225])) x = x.reshape((1, 3, 448, 448)) return x
def classify(image_file_path): x = image.imread(image_file_path) x = image.resize_short(x, 256) x, _ = image.center_crop(x, (224, 224)) plt.imshow(x.asnumpy()) plt.show() prob = net(transform(x)).softmax() idx = prob.topk(k=5)[0] print(' prob | name') print(' ------------------') for i in idx: i = int(i.asscalar()) print(' %.3f | %s' % (prob[0, i].asscalar(), text_labels[i]))
def transformPredict(img): ''' Input an image, transform and random crop the image, return images after processed. 输入一张图片,对其进行数据形式转换和随机剪裁堆叠后返回。 :param img: image data, mx.ndarray, h*w*c :return img: imafe data, mx.ndarray, b*c*h*w ''' img = img.astype('float32') / 255 # 0-255 to 0-1 img = image.resize_short(img, 120) # resize img = nd.transpose( img, (2, 0, 1)) # channel transpose to batch * channel * h * w img = mx.nd.image.normalize(img, mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)) # normalize img = sixCrop(img, (112, 112)) # random crop return img
def transform_filter(data, label): """ 用于图片/标签前处理的函数。 Transform function for image/label pre-processing. :param data: image data, mx.ndarray :param label: image label, mx.ndarray """ im = filter_image(data, filter_level) # compress image to certain bit im = im.astype("float32") / 255 im = image.resize_short(im, RESIZE_SIZE) im, _ = image.center_crop(im, (INPUT_SIZE, INPUT_SIZE)) im = nd.transpose(im, (2, 0, 1)) im = mx.nd.image.normalize(im, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) return (im, nd.array([label]).asscalar())
def transform(data, label): ''' Function that converts "data"" into the input image tensor for a CNN Label is converted into a float tensor. ''' image = mx.nd.array(data).expand_dims(axis=2) image = resize_short(image, int(800 / 3)) image = image.transpose([2, 0, 1]) / 255. label = label[0].astype(np.float32) bb = label.copy() new_w = (1 + expand_bb_scale) * bb[2] new_h = (1 + expand_bb_scale) * bb[3] bb[0] = bb[0] - (new_w - bb[2]) / 2 bb[1] = bb[1] - (new_h - bb[3]) / 2 bb[2] = new_w bb[3] = new_h return image, mx.nd.array(bb)
def transform(image, label): ''' Function that converts resizes image into the input image tensor for a CNN. The labels (bounding boxes) are expanded, converted into (x, y, x+w, y+h), and zero padded to the maximum number of labels. Finally, it is converted into a float tensor. ''' max_label_n = 128 if detection_box == "word" else 13 # Resize the image image = np.expand_dims(image, axis=2) image = mx.nd.array(image) image = resize_short(image, image_size) image = image.transpose([2, 0, 1])/255. # Expand the bounding box by expand_bb_scale bb = label.copy() new_w = (1 + expand_bb_scale) * bb[:, 2] new_h = (1 + expand_bb_scale) * bb[:, 3] bb[:, 0] = bb[:, 0] - (new_w - bb[:, 2])/2 bb[:, 1] = bb[:, 1] - (new_h - bb[:, 3])/2 bb[:, 2] = new_w bb[:, 3] = new_h label = bb # Convert the predicted bounding box from (x, y, w, h to (x, y, x + w, y + h) label = label.astype(np.float32) label[:, 2] = label[:, 0] + label[:, 2] label[:, 3] = label[:, 1] + label[:, 3] # Zero pad the data label_n = label.shape[0] label_padded = np.zeros(shape=(max_label_n, 5)) label_padded[:label_n, 1:] = label label_padded[:label_n, 0] = np.ones(shape=(1, label_n)) label_padded = mx.nd.array(label_padded) return image, label_padded
def transform(image, bbox, text): ''' Function that converts resizes image into the input image tensor for a CNN. The bounding boxes are expanded, and zero padded to the maximum number of labels. Finally, it is converted into a float tensor. ''' max_label_n = 128 # Resize the image image = np.expand_dims(image, axis=2) image = mx.nd.array(image) image = resize_short(image, image_size) image = image.transpose([2, 0, 1]) / 255. # Expand the bounding box by expand_bb_scale bb = bbox.copy() new_w = (1 + expand_bb_scale) * bb[:, 2] new_h = (1 + expand_bb_scale) * bb[:, 3] bb[:, 0] = bb[:, 0] - (new_w - bb[:, 2]) / 2 bb[:, 1] = bb[:, 1] - (new_h - bb[:, 3]) / 2 bb[:, 2] = new_w bb[:, 3] = new_h bbox = bb bbox = bbox.astype(np.float32) # Zero pad the data label_n = bbox.shape[0] label_padded = np.zeros(shape=(max_label_n, 5)) label_padded[:label_n, 1:] = bbox label_padded[:label_n, 0] = np.ones(shape=(1, label_n)) label_padded = mx.nd.array(label_padded) return image, label_padded
def load_vgg16_image(img_path, image_width=224, image_height=224): x = image.imread(img_path) x = image.resize_short(x, 256) x, _ = image.center_crop(x, (image_width, image_height)) return x
from mxnet.gluon.model_zoo import vision as models from mxnet.gluon.utils import download from mxnet import image net = models.resnet50_v2(pretrained=True) url = 'http://data.mxnet.io/models/imagenet/synset.txt' fname = download(url) with open(fname, 'r') as f: text_labels = [' '.join(l.split()[1]) for l in f] url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/b/b5/Golden_Retriever_medium-to-light-coat.jpg/365px-Golden_Retriever_medium-to-light-coat.jpg' fname = download(url) x = image.imread(fname) x = image.resize_short(x, 256) x, _ = image.center_crop(x, (224, 224)) plt.imshow(x.asnumpy()) plt.show() def transform(data): data = data.transpose((2, 0, 1)).expand_dims(axis=0) rgb_mean = nd.array([0.485, 0.456, 0.406]).reshape((1, 3, 1, 1)) rgb_std = nd.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) return (data.astype('float32') / 255 - rgb_mean) / rgb_std prob = net(transform(x)).softmax() idx = prob.topk(k=5)[0] for i in idx:
def load_vgg16_image(img_path): x = image.imread(img_path) x = image.resize_short(x, 256) x, _ = image.center_crop(x, (224, 224)) return x
def transform_fully_img(im): im = image.resize_short(im, BASE_SHAPE) im = normalize_image(im) im = im.transpose((2,0,1)) im = im.expand_dims(axis=0) return (im)
print(Essex_test_files) saved_path = r'op/' # Essex_test_files = glob.glob(r'I:\t\depthmap\*.jpg') Essex_test_files = glob.glob(r'photos/*.jpg') print("len of Essex_test_files:", len(Essex_test_files)) import random print("before sampling:", len(Essex_test_files)) Essex_test_files = random.sample(Essex_test_files, len(Essex_test_files)) print("after sampling:", len(Essex_test_files)) for idx, filename in enumerate(Essex_test_files): try: img = image.imread(filename) img = image.resize_short(img, 1024) # img = image.resize_short(img, 100) print("filename: ", filename) # ctx = mx.gpu(0) img = test_transform(img, ctx) # print("img: ", img) output = model.predict(img) # print("output: ", output) predict = mx.nd.squeeze(mx.nd.argmax(output, 1)).asnumpy() # print("predict: ", predict)
def main(): start = timer() print('Processing Start time: %.1f' % (start)) print("current time", datetime.now()) gauth = GoogleAuth() gauth.LocalWebserverAuth() drive = GoogleDrive(gauth) # Auto-iterate through all files that matches this query file_list = drive.ListFile({'q': "'root' in parents"}).GetList() for file in file_list: # print('title: {}, id: {}'.format(file1['title'], file1['id'])) file_id = None if file['title'] == "semanticsegmentation": print('Folder Found') file_id = file['id'] break if file_id is not None: classes = ["wall","building;edifice","sky","floor;flooring","tree","ceiling","road;route","bed","windowpane;window","grass","cabinet","sidewalk;pavement","person;individual;someone;somebody;mortal;soul","earth;ground","door;double;door","table","mountain;mount","plant;flora;plant;life","curtain;drape;drapery;mantle;pall","chair","car;auto;automobile;machine;motorcar","water","painting;picture","sofa;couch;lounge","shelf","house","sea","mirror","rug;carpet;carpeting","field","armchair","seat","fence;fencing","desk","rock;stone","wardrobe;closet;press","lamp","bathtub;bathing;tub;bath;tub","railing;rail","cushion","base;pedestal;stand","box","column;pillar","signboard;sign","chest;of;drawers;chest;bureau;dresser","counter","sand","sink","skyscraper","fireplace;hearth;open;fireplace","refrigerator;icebox","grandstand;covered;stand","path","stairs;steps","runway","case;display;case;showcase;vitrine","pool;table;billiard;table;snooker;table","pillow","screen;door;screen","stairway;staircase","river","bridge;span","bookcase","blind;screen","coffee;table;cocktail;table","toilet;can;commode;crapper;pot;potty;stool;throne","flower","book","hill","bench","countertop","stove;kitchen;stove;range;kitchen;range;cooking;stove","palm;palm;tree","kitchen;island","computer;computing;machine;computing;device;data;processor;electronic;computer;information;processing;system","swivel;chair","boat","bar","arcade;machine","hovel;hut;hutch;shack;shanty","bus;autobus;coach;charabanc;double-decker;jitney;motorbus;motorcoach;omnibus;passenger;vehicle","towel","light;light;source","truck;motortruck","tower","chandelier;pendant;pendent","awning;sunshade;sunblind","streetlight;street;lamp","booth;cubicle;stall;kiosk","television;television;receiver;television;set;tv;tv;set;idiot;box;boob;tube;telly;goggle;box","airplane;aeroplane;plane","dirt;track","apparel;wearing;apparel;dress;clothes","pole","land;ground;soil","bannister;banister;balustrade;balusters;handrail","escalator;moving;staircase;moving;stairway","ottoman;pouf;pouffe;puff;hassock","bottle","buffet;counter;sideboard","poster;posting;placard;notice;bill;card","stage","van","ship","fountain","conveyer;belt;conveyor;belt;conveyer;conveyor;transporter","canopy","washer;automatic;washer;washing;machine","plaything;toy","swimming;pool;swimming;bath;natatorium","stool","barrel;cask","basket;handbasket","waterfall;falls","tent;collapsible;shelter","bag","minibike;motorbike","cradle","oven","ball","food;solid;food","step;stair","tank;storage;tank","trade;name;brand;name;brand;marque","microwave;microwave;oven","pot;flowerpot","animal;animate;being;beast;brute;creature;fauna","bicycle;bike;wheel;cycle","lake","dishwasher;dish;washer;dishwashing;machine","screen;silver;screen;projection;screen","blanket;cover","sculpture","hood;exhaust;hood","sconce","vase","traffic;light;traffic;signal;stoplight","tray","ashcan;trash;can;garbage;can;wastebin;ash;bin;ash-bin;ashbin;dustbin;trash;barrel;trash;bin","fan","pier;wharf;wharfage;dock","crt;screen","plate","monitor;monitoring;device","bulletin;board;notice;board","shower","radiator","glass;drinking;glass","clock","flag"] files = glob.glob(r'/Users/divyachandana/Documents/NJIT/work/summertasks/jun1-jun5/atlanta/*.jpg') print("Total Files",len(files)) columns = ['filename','class','total_pixel','individual_pixel','ratio','timestamp'] # ---------- drive code ----- with open('semantic_results_atlanta.csv','a') as csvfile: csvwriter = csv.writer(csvfile,lineterminator='\n') # csvwriter.writerow(columns) # i=0 for f in files: file_check_query = "select count(*) from {} where filename like '%{}%'".format('semantic_results_atlanta', os.path.basename(f)) # print(file_check_query) # i += 1 # print(i) count = dbms.get_count_result(file_check_query) # print(count) if count > 0: continue # print('resuming',f) try: img = image.imread(f) img = image.resize_short(img, 1024) # img = image.resize_short(img, 100) # print("filename: ", f) # ctx = mx.gpu(0) img = test_transform(img, ctx) # print("img: ", img) output = model.predict(img) # print("output: ", output) predict = mx.nd.squeeze(mx.nd.argmax(output, 1)).asnumpy() # print("predict: ", predict) mask = get_color_pallete(predict, 'ade20k') # predict.save('predict.png') # mmask = mpimg.imread('output.png') predict = predict.astype(numpy.uint8) convert_single_array = numpy.array(predict) unique_numbers = numpy.unique(convert_single_array) # print(unique_numbers) new_basename = os.path.basename(f).replace(".jpg", ".png") new_name = os.path.join('output/', new_basename) mask.save(new_name) # color_img = image.imread(new_name) # colors, counts = numpy.unique(color_img.reshape(-1, 3), return_counts=True, axis=0) total_pixel = numpy.sum(predict) d_file = drive.CreateFile({'parents': [{'id': file_id}], 'title': os.path.basename(new_name)}) d_file.SetContentFile(new_name) d_file.Upload() # print('Created file %s with mimeType %s' % (d_file['title'], d_file['mimeType'])) combile_all_csv_data = [] combine_sql_srting_format = [] for i in unique_numbers: individual_count = numpy.sum(predict == i) # print(individual_count) csv_data = [] csv_data.append(os.path.basename(f)) csv_data.append(classes[i]) csv_data.append(total_pixel) csv_data.append(individual_count) csv_data.append(round((individual_count/total_pixel),6)) time_stamp = datetime.now() csv_data.append(time_stamp) # csv_data = [filename,predict,colors,counts,total_pixel] # print(csv_data) combile_all_csv_data.append(csv_data) sql_srting = ["NULL" if val == None else "'"+str(val)+"'" for val in csv_data] sql_srting_format = ",".join([str(val) for val in sql_srting]) combine_sql_srting_format.append(sql_srting_format) csvwriter.writerows(combile_all_csv_data) dbms.insertmany_sqlite3('semantic_results_atlanta',','.join(columns),combine_sql_srting_format) os.remove(new_name) # if idx % 10 == 0: # print("Processed: ", idx) except Exception as e: print("Error in :", ' ' + f, e) continue print('Finished') end = timer() print('Processing time: %.1f' % (end - start))