def main(): with open(settings.DATA_LIST) as f: data_list = json.load(f) m = dict() for root in settings.IMAGE_SOURCES_ROOTS: for p, r, fn in common_tools.each_file_tuple(root): m[fn] = p def cp(file_name): src = m[file_name] dst = os.path.join(copy_dest, file_name) os.symlink(os.path.relpath(src, os.path.dirname(dst)), dst) copy_dest = settings.TRAINVAL_IMAGE_DIR if os.path.isdir(copy_dest): shutil.rmtree(copy_dest) os.makedirs(copy_dest) file_names = list( {meta['file_name'] for meta in data_list['train'] + data_list['val']}) common_tools.multithreaded(cp, file_names, num_thread=8) copy_dest = settings.TEST_IMAGE_DIR if os.path.isdir(copy_dest): shutil.rmtree(copy_dest) os.makedirs(copy_dest) file_names = list({ meta['file_name'] for meta in data_list['test_cls'] + data_list['test_det'] }) common_tools.multithreaded(cp, file_names, num_thread=8)
def get_feed_dict_sync(self): a = [ random.randrange(self.num_classes) for i in range(self.FLAGS.batch_size) ] labels_feed = [] for c in a: v = [0.] * self.num_classes v[c] = 1. labels_feed.append(v) images_feed = [ self.labels[a[i]][random.randrange(0, len(self.labels[a[i]]))] for i in range(self.FLAGS.batch_size) ] def job(i): img = images_feed[i] images_feed[i] = self.cv_preprocess_image( img, self.FLAGS.train_image_size, self.FLAGS.train_image_size, True) common_tools.multithreaded(job, range(len(images_feed)), num_thread=8) return { self.image_holder: np.array(images_feed, dtype=np.float), self.label_holder: np.array(labels_feed, dtype=np.float) }
def main(): assert six.PY3 # with open(settings.TRAIN) as f: # lines = f.read().splitlines() # with open(settings.VAL) as f: # lines += f.read().splitlines() # train = [[] for _ in lines] with open('../data/icpr_data_char_1/train_15962.tags') as f: lines = f.read().splitlines() train = [[] for _ in lines] def load_train(i): if i % 100 == 0: print('trainval', i, '/', len(lines)) # anno = json.loads(lines[i].strip()) anno = lines[i].strip().split() image = misc.imread(anno[0]) # assert image.shape == (anno['height'], anno['width'], 3) assert len(anno[1]) == 1 # for char in anno[1]: train[i].append([image, anno[1]]) common_tools.multithreaded(load_train, range(len(lines)), num_thread=8) train = common_tools.reduce_sum(train) with open('self_synth.pkl', 'wb') as f: cPickle.dump(train, f) train = None # release memory
def main(): assert six.PY3 with open(settings.TRAIN) as f: lines = f.read().splitlines() with open(settings.VAL) as f: lines += f.read().splitlines() train = [[] for _ in lines] def load_train(i): if i % 100 == 0: print('trainval', i, '/', len(lines)) anno = json.loads(lines[i].strip()) image = misc.imread( os.path.join(settings.TRAINVAL_IMAGE_DIR, anno['file_name'])) assert image.shape == (anno['height'], anno['width'], 3) for char in anno_tools.each_char(anno): if not char['is_chinese']: continue cropped = crop(image, char['adjusted_bbox']) train[i].append([cropped, char['text']]) common_tools.multithreaded(load_train, range(len(lines)), num_thread=8) train = common_tools.reduce_sum(train) max_bytes = 2**31 - 1 train_out = pickle.dumps(train, protocol=pickle.HIGHEST_PROTOCOL) with open(settings.TRAINVAL_PICKLE, 'wb') as f: for idx in range(0, len(train_out), max_bytes): f.write(train_out[idx:idx + max_bytes]) train = None # release memory train_out = None with open(settings.TEST_CLASSIFICATION) as f: lines = f.read().splitlines() test = [[] for _ in lines] def load_test(i): if i % 100 == 0: print('test', i, '/', len(lines)) anno = json.loads(lines[i].strip()) image = misc.imread( os.path.join(settings.TEST_IMAGE_DIR, anno['file_name'])) #assert image.shape == (anno['height'], anno['width'], 3) for char in anno['proposals']: cropped = crop(image, char['adjusted_bbox']) test[i].append([cropped, None]) common_tools.multithreaded(load_test, range(len(lines)), num_thread=8) test = common_tools.reduce_sum(test) max_bytes = 2**31 - 1 test_out = pickle.dumps(test, protocol=pickle.HIGHEST_PROTOCOL) with open(settings.TEST_PICKLE, 'wb') as f: for idx in range(0, len(test_out), max_bytes): f.write(test_out[idx:idx + max_bytes]) test = None # release memory test_out = None
def crop_test_images(list_file_name): with open(settings.CATES) as f: cates = json.load(f) text2cate = {c['text']: c['cate_id'] for c in cates} if not os.path.isdir(settings.TEST_CROPPED_DIR): os.makedirs(settings.TEST_CROPPED_DIR) imgPath_list = glob.glob(os.path.join(settings.TEST_IMAGE_DIR, '*.jpg')) imgPath_list += glob.glob(os.path.join(settings.TEST_IMAGE_DIR, '*.png')) testset_size = len(imgPath_list) def crop_once(imgPath, testName_list): image = cv2.imread(imgPath) imshape = image.shape fileName = os.path.basename(imgPath) image_id = os.path.splitext(fileName)[0] image_type = os.path.splitext(fileName)[1] cropped_list = [] for level_id, (cropratio, cropoverlap) in enumerate(settings.TEST_CROP_LEVELS): cropshape = (int(round(settings.TEST_IMAGE_SIZE // cropratio)), int(round(settings.TEST_IMAGE_SIZE // cropratio))) for o in darknet_tools.get_crop_bboxes(imshape, cropshape, (cropoverlap, cropoverlap)): xlo = o['xlo'] xhi = xlo + cropshape[1] ylo = o['ylo'] yhi = ylo + cropshape[0] basename = '{}_{}_{}'.format(image_id, level_id, o['name']) cropped_file_name = os.path.join(settings.TEST_CROPPED_DIR, basename + image_type) cropped_list.append(cropped_file_name) cropped = image[ylo:yhi, xlo:xhi] cv2.imwrite(cropped_file_name, cropped) testName_list += cropped_list q_i = queue.Queue() q_i.put(0) def foo(*args): i = q_i.get() if i % 100 == 0: print('crop test', i, '/', testset_size) q_i.put(i + 1) crop_once(*args) testName_list = [] common_tools.multithreaded(foo, [(imgPath, testName_list) for imgPath in imgPath_list], num_thread=4) with open(list_file_name, 'w') as f: for file_name in testName_list: f.write(file_name) f.write('\n')
def crop_test_images(list_file_name): imshape = (2048, 2048, 3) with open(settings.CATES) as f: cates = json.load(f) text2cate = {c['text']: c['cate_id'] for c in cates} if not os.path.isdir(settings.TEST_CROPPED_DIR): os.makedirs(settings.TEST_CROPPED_DIR) with open(settings.DATA_LIST) as f: data_list = json.load(f) test_det = data_list['test_det'] def crop_once(anno, write_images): image_id = anno['image_id'] if write_images: image = cv2.imread(os.path.join(settings.TEST_IMAGE_DIR, anno['file_name'])) assert image.shape == imshape cropped_list = [] for level_id, (cropratio, cropoverlap) in enumerate(settings.TEST_CROP_LEVELS): cropshape = (int(round(settings.TEST_IMAGE_SIZE // cropratio)), int(round(settings.TEST_IMAGE_SIZE // cropratio))) for o in darknet_tools.get_crop_bboxes(imshape, cropshape, (cropoverlap, cropoverlap)): xlo = o['xlo'] xhi = xlo + cropshape[1] ylo = o['ylo'] yhi = ylo + cropshape[0] basename = '{}_{}_{}'.format(image_id, level_id, o['name']) cropped_file_name = os.path.join(settings.TEST_CROPPED_DIR, '{}.jpg'.format(basename)) cropped_list.append(cropped_file_name) if write_images: cropped = image[ylo:yhi, xlo:xhi] cv2.imwrite(cropped_file_name, cropped) return cropped_list q_i = queue.Queue() q_i.put(0) def foo(*args): i = q_i.get() if i % 100 == 0: print('crop test', i, '/', len(test_det)) q_i.put(i + 1) crop_once(*args) common_tools.multithreaded(foo, [(anno, True) for anno in test_det], num_thread=4) testset = [] for i, anno in enumerate(test_det): if i % 1000 == 0: print('list test', i, '/', len(test_det)) testset += crop_once(anno, False) with open(list_file_name, 'w') as f: for file_name in testset: f.write(file_name) f.write('\n')
def crop_test_images(list_file_name): with open(settings.CATES) as f: cates = json.load(f) text2cate = {c['text']: c['cate_id'] for c in cates} if not os.path.isdir(settings.TEST_CROPPED_DIR): os.makedirs(settings.TEST_CROPPED_DIR) with open(settings.DATA_LIST) as f: data_list = json.load(f) test_det = data_list['test_det'] #delete no image in info.json/// create own info, don't need # test_det2=[] # path = '../data/images/test/' # for anno in test_det: # filename=anno['file_name'] # file = Path(path+filename) # if file.exists(): # test_det2.append(anno) # test_det = test_det2 print(test_det) def crop_once(anno, write_images): image_id = anno['image_id'] print(image_id) #if write_images: image = cv2.imread( os.path.join(settings.TEST_IMAGE_DIR, anno['file_name'])) imshape = image.shape # height,width,channel # assert image.shape == imshape cropped_list = [] #TEST_CROP_LEVELS=((1,32),(0.5,96),(.25,96) #TEST_IMAGE_SIZE=(128,128), so cropped image is 128*128 and 256*256 and 512*512, for level_id, (cropratio, cropoverlap) in enumerate(settings.TEST_CROP_LEVELS): print(cropratio, cropoverlap) cropshape = (int(round(settings.TEST_IMAGE_SIZE // cropratio)), int(round(settings.TEST_IMAGE_SIZE // cropratio))) for o in darknet_tools.get_crop_bboxes(imshape, cropshape, (cropoverlap, cropoverlap)): xlo = o['xlo'] xhi = xlo + cropshape[1] ylo = o['ylo'] yhi = ylo + cropshape[0] basename = '{}_{}_{}'.format(image_id, level_id, o['name']) cropped_file_name = os.path.join(settings.TEST_CROPPED_DIR, '{}.jpg'.format(basename)) cropped_list.append(cropped_file_name) if write_images: if xhi > imshape[1]: image = cv2.resize(image, (xhi, imshape[0]), cv2.INTER_CUBIC) imshape = image.shape if yhi > imshape[0]: image = cv2.resize(image, (imshape[1], yhi), cv2.INTER_CUBIC) imshape = image.shape cropped = image[ylo:yhi, xlo:xhi] cv2.imwrite(cropped_file_name, cropped) return cropped_list q_i = queue.Queue() q_i.put(0) print('crop') def foo(*args): i = q_i.get() #if i % 100 == 0: print('crop test', i, '/', len(test_det)) q_i.put(i + 1) crop_once(*args) # after crop, save to ssd/products/test common_tools.multithreaded(foo, [(anno, True) for anno in test_det], num_thread=1) testset = [] # testlists=glob.glob("../ssd/products/test/*.jpg") # for item in testlists: # testset.append( os.path.join(settings.TEST_CROPPED_DIR,os.path.basename(item))) for i, anno in enumerate(test_det): print('list test', i, '/', len(test_det)) testset += crop_once(anno, False) with open(list_file_name, 'w') as f: for file_name in testset: f.write(file_name) print(file_name) f.write('\n')
def crop_train_images(): imshape = (2048, 2048, 3) cropshape = (settings.TRAIN_IMAGE_SIZE, settings.TRAIN_IMAGE_SIZE) cropoverlap = (16, 16) with open(settings.CATES) as f: cates = json.load(f) text2cate = {c['text']: c['cate_id'] for c in cates} def in_image_ratio(bbox): # bbox is in darknet bbox representation xmid, ymid, w, h = bbox def cutto01(x): return max(0, min(1, x)) Acut = (cutto01(xmid + w / 2) - cutto01(xmid - w / 2)) * ( cutto01(ymid + h / 2) - cutto01(ymid - h / 2)) return Acut / (w * h) def crop_once(line, write_images): anno = json.loads(line.strip()) image_id = anno['image_id'] all = [] for char in anno_tools.each_char(anno): if not char['is_chinese']: continue cate_id = text2cate[char['text']] if cate_id >= settings.NUM_CHAR_CATES: cate_id = settings.NUM_CHAR_CATES all.append((char['adjusted_bbox'], cate_id)) if write_images: image = cv2.imread( os.path.join(settings.TRAINVAL_IMAGE_DIR, anno['file_name'])) assert image.shape == imshape for o in anno['ignore']: poly = (np.array(o['polygon'])).astype(np.int32) cv2.fillConvexPoly(image, poly, (128, 128, 128)) cropped_list = list() for o in darknet_tools.get_crop_bboxes(imshape, cropshape, cropoverlap): xlo = o['xlo'] xhi = xlo + cropshape[1] ylo = o['ylo'] yhi = ylo + cropshape[0] labels = [] for bbox, cate_id in all: x, y, w, h = bbox if x > xhi or x + w < xlo or y > yhi or y + h < ylo: continue bbox = ((x + w / 2 - xlo) / cropshape[1], (y + h / 2 - ylo) / cropshape[0], w / cropshape[1], h / cropshape[0]) if 0.5 < in_image_ratio(bbox): labels.append((bbox, cate_id)) if 0 < len(labels): basename = '{}_{}'.format(image_id, o['name']) cropped_file_name = os.path.join(settings.TRAINVAL_CROPPED_DIR, '{}.jpg'.format(basename)) cropped_xml_name = os.path.join(settings.TRAINVAL_CROPPED_DIR, '{}.xml'.format(basename)) cropped_list.append('{} {}'.format(cropped_file_name, cropped_xml_name)) if write_images: cropped = image[ylo:yhi, xlo:xhi] cv2.imwrite(cropped_file_name, cropped) with open( os.path.join(settings.TRAINVAL_CROPPED_DIR, '{}.xml'.format(basename)), 'w') as f: write_xml(labels, cropshape, f) return cropped_list if not os.path.isdir(settings.TRAINVAL_CROPPED_DIR): os.makedirs(settings.TRAINVAL_CROPPED_DIR) lines = [] with open(settings.TRAIN) as f: lines += f.read().splitlines() with open(settings.VAL) as f: lines += f.read().splitlines() q_i = queue.Queue() q_i.put(0) def foo(*args): i = q_i.get() if i % 100 == 0: print('crop trainval', i, '/', len(lines)) q_i.put(i + 1) crop_once(*args) common_tools.multithreaded(foo, [(line, True) for line in lines], num_thread=4) trainset = [] for i, line in enumerate(lines): if i % 1000 == 0: print('list trainval', i, '/', len(lines)) trainset += crop_once(line, False) random.shuffle(trainset) with open(settings.TRAINVAL_LIST, 'w') as f: for file_name in trainset: f.write(file_name) f.write('\n') with open(settings.VAL) as f: lines = f.read().splitlines() valset = [] for i, line in enumerate(lines): if i % 1000 == 0: print('list val', i, '/', len(lines)) valset += crop_once(line, False) random.shuffle(valset) with open(settings.VAL_LIST, 'w') as f: for file_name in valset: f.write(file_name) f.write('\n') with open(settings.VAL_NAME_SIZE, 'w') as f: for line in valset: cropped_file_name, cropped_xml_name = line.split() f.write('{} {} {}\n'.format(cropped_file_name, settings.TRAIN_IMAGE_SIZE, settings.TRAIN_IMAGE_SIZE))
def work(selected, ext): with open(settings.DATA_LIST) as f: data_list = json.load(f) with open(settings.TEST_DETECTION_GT) as f: gts = f.read().splitlines() with open('../detection/products/detections.jsonl') as f: dts = f.read().splitlines() assert len(gts) == len(dts) def gt2array(gt, draw_ignore): color = '#f00' color_ignore = '#ff0' a = list() for char in anno_tools.each_char(gt): if char['is_chinese']: a.append({ 'bbox': char['adjusted_bbox'], 'text': char['text'], 'color': color }) if draw_ignore: for char in gt['ignore']: a.append({ 'bbox': char['bbox'], 'text': '', 'color': color_ignore }) return a def dt2array(dtobj, gtobj, draw_ignore, draw_proposal): iou_thresh = settings.IOU_THRESH charset = set() proposal = False def in_size(_): return True dt = dtobj['detections'] dt.sort( key=lambda o: -o['score'] ) # sort must be stable, otherwise mAP will be slightly different dt = [(o['bbox'], o.get('text'), o['score']) for o in dt] ig = [(o['bbox'], None) for o in gtobj['ignore']] gt = [] for char in anno_tools.each_char(gtobj): if char['is_chinese']: charset.add(char['text']) gt.append( (char['adjusted_bbox'], char['text'], char['attributes'])) dt_matches = [[] for i in range(len(dt))] dt_ig = [False] * len(dt) for i_dt, dtchar in enumerate(dt): for i_gt, gtchar in enumerate(gt): if proposal or dtchar[1] == gtchar[1]: miou = eval_tools.iou(dtchar[0], gtchar[0]) if miou > iou_thresh: dt_matches[i_dt].append((i_gt, miou)) for igchar in ig: miou = eval_tools.a_in_b(dtchar[0], igchar[0]) if miou > iou_thresh: dt_ig[i_dt] = True for matches in dt_matches: matches.sort( key=lambda t: -t[1] ) # sort must be stable, otherwise you shoule use key=lambda t: (-t[1], t[0]) dt_matched = [ 0 if in_size(o[0]) and False == b else 2 for o, b in zip(dt, dt_ig) ] gt_taken = [(0, None) if in_size(o[0]) else (2, None) for o in gt] for i_dt, matches in enumerate(dt_matches): for i_gt, _ in matches: if 1 != dt_matched[i_dt] and 1 != gt_taken[i_gt][0]: if 0 == gt_taken[i_gt][0]: dt_matched[i_dt] = 1 gt_taken[i_gt] = (1, i_dt) else: dt_matched[i_dt] = 2 a = list() minscore = 1. colormap = {0: '#ff0', 1: '#0f0', 2: '#0ff'} for i in range(len(dt)): if len(a) >= len(gt): break bbox, text, score = dt[i] taken = dt_matched[i] if 2 != taken or draw_ignore: flag = True for o in a: if settings.IOU_THRESH < eval_tools.a_in_b( bbox, o['bbox'] ) or settings.IOU_THRESH < eval_tools.a_in_b( o['bbox'], bbox): flag = False if flag: a.append({ 'bbox': bbox, 'text': text or '■', 'color': colormap[taken] }) minscore = score if draw_proposal: for o in sorted(dtobj['proposals'], key=lambda o: -o['score']): bbox, score = o['bbox'], o['score'] if score >= minscore: s = 0 for igbbox, _ in ig: s += eval_tools.a_in_b(bbox, igbbox) for o in a: s += max(eval_tools.a_in_b(o['bbox'], bbox), eval_tools.a_in_b(bbox, o['bbox'])) if s <= settings.IOU_THRESH: a.append({'bbox': bbox, 'text': '', 'color': '#00f'}) return list(reversed(a)) if selected is None: selected = [(o['image_id'], 0, 0, 2048, 2048) for i, o in enumerate(data_list['test_det']) if i % 200 == 0] draw_gt = False if not os.path.isdir(settings.PRINTTEXT_DRAWING_DIR): os.makedirs(settings.PRINTTEXT_DRAWING_DIR) tasks = [] for image_id, x, y, w, h in sorted(selected): i = [o['image_id'] for o in data_list['test_det']].index(image_id) gt = json.loads(gts[i]) dt = json.loads(dts[i]) crop = (x, y, w, h) file_name = os.path.join(settings.TEST_IMAGE_DIR, gt['file_name']) if draw_gt: tasks.append( (file_name, os.path.join( settings.PRINTTEXT_DRAWING_DIR, '{}_{}_{}_{}_{}_gt.{}'.format(image_id, crop[0], crop[1], crop[2], crop[3], ext)), { 'boxes': gt2array(gt, draw_ignore=True), 'crop': crop, 'place': 'force', })) tasks.append( (file_name, os.path.join( settings.PRINTTEXT_DRAWING_DIR, '{}_{}_{}_{}_{}_dt.{}'.format(image_id, crop[0], crop[1], crop[2], crop[3], ext)), { 'boxes': dt2array(dt, gt, draw_ignore=False, draw_proposal=False), 'crop': crop, 'place': 'force', })) if print_text.concurrent: common_tools.multithreaded(print_text, tasks, num_thread=cpu_count()) else: for task in tasks: print_text(*task)
def main(): assert six.PY3 random.seed(0) polygons = get_polygons() print('polygons loaded') if not os.path.isfile(settings.DATASET_CROPPED): lines = [] with open(settings.TRAIN) as f: lines += [('train', s) for s in f.read().splitlines()] with open(settings.VAL) as f: lines += [('val', s) for s in f.read().splitlines()] with open(settings.TEST_CLASSIFICATION) as f1, open( settings.TEST_CLASSIFICATION_GT) as f2: prs = f1.read().splitlines() gts = f2.read().splitlines() lines += [('test_cls', pr, gt) for pr, gt in zip(prs, gts)] with open(settings.TEST_DETECTION_GT) as f: lines += [('test_det', s) for s in f.read().splitlines()] all = [[] for _ in lines] def load_train(i): if i % 100 == 0: print('dataset', i, '/', len(lines)) line = lines[i] if line[0] == 'test_cls': prs, gts = line[1:] prs, gts = json.loads(prs), json.loads(gts) image = cv2.imread( os.path.join(settings.TEST_IMAGE_DIR, prs['file_name'])) assert image.shape == (prs['height'], prs['width'], 3) for pr, gt in zip(prs['proposals'], gts['ground_truth']): cropped = predictions2html.crop(image, pr['adjusted_bbox'], 64) all[i].append([cropped, gt['attributes']]) else: anno = json.loads(line[1].strip()) image = cv2.imread( os.path.join( settings.TRAINVAL_IMAGE_DIR if line[0] in {'train', 'val'} else settings.TEST_IMAGE_DIR, anno['file_name'])) assert image.shape == (anno['height'], anno['width'], 3) for char in anno_tools.each_char(anno): if not char['is_chinese']: continue cropped = predictions2html.crop(image, char['adjusted_bbox'], 64) all[i].append([cropped, char['attributes']]) common_tools.multithreaded(load_train, range(len(lines)), num_thread=8) all = common_tools.reduce_sum(all) with open(settings.DATASET_CROPPED, 'wb') as f: cPickle.dump(all, f) with open(settings.DATASET_CROPPED, 'rb') as f: all = cPickle.load(f) print('cropped loaded') belongs = defaultdict(list) for i, (_, attrs) in enumerate(all): for attr in settings.ATTRIBUTES: if attr in attrs: belongs[attr].append(i) else: belongs['not-{}'.format(attr)].append(i) x, y, cropsize = 25, 20, 64 forbidden = { 'bgcomplex': { 29784, 30793, 54066, 60905, 80018, 85910, 92040, 93009, 108277, 117829, 145939, 159277, 166330, 166891, 168897, 174142, 181156, 181461, 185076, 197249, 197278, 197390, 197483, 197736, 233084, 241839, 267090, 278862, 282057, 304974, 305250, 309420, 309505, 311269, 312195, 317930, 318505, 366441, 366568, 366798, 367485, 367889, 369698, 371721, 372093, 372993, 373129, 378209, 438060, 438222, 451115, 451181, 452219, 476071, 494493, 511841, 537930, 568693, 591312, 593805, 604174, 604482, 607029, 613924, 620551, 624535, 630988, 644721, 662645, 685127, 692149, 697909, 713403, 718308, 722478, 731465, 744738, 752377, 756651, 756983, 757500, 838142, 840193, 853782, 868967, 870828, 895909, 897855, 914324, 924949, 926112, 948187, 949547, 951922, 966718, 990845 }, 'distorted': { 4272, 11545, 31628, 66623, 77833, 80068, 82815, 101418, 101429, 101461, 101475, 102750, 103990, 106226, 122330, 122629, 122864, 127239, 137058, 138722, 165570, 215740, 215956, 241371, 244889, 300272, 311629, 351641, 354914, 355965, 407871, 493919, 516472, 520137, 531795, 545332, 560774, 564422, 568087, 568530, 580687, 580940, 584408, 584961, 587857, 595353, 605587, 635609, 646782, 659324, 674405, 676416, 677631, 763798, 776019, 794110, 799670, 799845, 817479, 827616, 891523, 903911, 909307, 925630, 938309, 942272, 951543, 971224, 974709, 993244, 999040, 1000422 }, 'handwritten': { 329601, 408868, 512096, 821406, 978196, 982861, 982872, 982875, 1008997 }, 'not-bgcomplex': { 9246, 15606, 16655, 36299, 61621, 68809, 262313, 270993, 272949, 282797, 310460, 314072, 352096, 399965, 403986, 405162, 606926, 677085, 703288, 779237, 822430, 827693, 850763, 875473, 915644, 922101, 933638, 938877, 990631, 996811, 1010739 }, 'not-distorted': { 89019, 288330, 289995, 303808, 316933, 420413, 489284, 534030, 585589, 590783, 639562, 652671, 687953, 774776, 845746, 886599, 887318, 955774, 957490 }, 'not-handwritten': set(), 'not-occluded': { 4608, 5314, 209656, 242426, 288072, 323822, 434571, 500784, 567581, 569271, 666036, 693361, 854716 }, 'not-raised': { 32737, 196852, 301792, 320325, 476295, 534636, 652281, 704042, 910982, 915965, 950785 }, 'not-wordart': { 3645, 50260, 50296, 57502, 82294, 96621, 109453, 204976, 262595, 269395, 284668, 350879, 382009, 414268, 509196, 513846, 516881, 524924, 557900, 567024, 582488, 644754, 647845, 670733, 672294, 683110, 685146, 697891, 704277, 711269, 718931, 731280, 734266, 757216, 792544, 805942, 806092, 814184, 821115, 826304, 836793, 881432, 882749, 882764, 887005, 890080, 898900, 918417, 941640, 944208 }, 'occluded': { 873, 5730, 8228, 13395, 15541, 41004, 47143, 51186, 61060, 74182, 123631, 124450, 135852, 147417, 157442, 172524, 184918, 185294, 190257, 190412, 190643, 192596, 197677, 224041, 226001, 227348, 227590, 230286, 232497, 235413, 245273, 246929, 248790, 252125, 257097, 272903, 272904, 277566, 284177, 284181, 306556, 309870, 310208, 310823, 312420, 313796, 315943, 320599, 324504, 325094, 345861, 348978, 350766, 355099, 355197, 361102, 363122, 364891, 371829, 375936, 378402, 383816, 385305, 385587, 406896, 429101, 441511, 457760, 460850, 464921, 532963, 532972, 537419, 537840, 566316, 567464, 570925, 575854, 576324, 580475, 580786, 582479, 587189, 601068, 612825, 627320, 629511, 645262, 648763, 660725, 670146, 671016, 676628, 702305, 728385, 734681, 735671, 745464, 747664, 767248, 778261, 779184, 779315, 781154, 786504, 786792, 789390, 797219, 799166, 810608, 836407, 837725, 843622, 843863, 851364, 864668, 868260, 870504, 872464, 888636, 892626, 939872, 940036, 941901, 956912, 976644, 979918, 992558, 1000251, 1006844 }, 'raised': { 6962, 58512, 60516, 61103, 80469, 85491, 94437, 94524, 116556, 125124, 165233, 185106, 222793, 231913, 234829, 238321, 244816, 253130, 264975, 275946, 275958, 282919, 293993, 294043, 302357, 305192, 308649, 315404, 316111, 320636, 392341, 429254, 431867, 431870, 432207, 444736, 447251, 486993, 488383, 510305, 511770, 515521, 537062, 537275, 543490, 566084, 568212, 570926, 574094, 575914, 576594, 580506, 583107, 586520, 701097, 703253, 735409, 748760, 754485, 757556, 757604, 768041, 776067, 791019, 831144, 853508, 884373, 888685, 899910, 903512, 903602, 939832, 952561, 956100, 965107, 968079, 974788, 975073, 983868, 1010585, 1011033 }, 'wordart': { 31715, 39716, 44876, 74919, 75133, 104696, 108556, 110006, 113592, 117784, 140866, 143122, 143125, 145951, 149959, 150049, 150213, 150279, 150281, 150428, 150490, 151687, 154129, 156874, 159778, 159895, 160120, 160247, 160276, 160283, 162846, 163105, 163216, 164079, 164761, 166267, 168230, 171234, 171790, 172286, 172298, 172308, 172335, 175266, 175334, 175831, 176590, 176806, 177410, 177554, 179221, 179305, 180561, 181293, 181310, 182794, 183106, 183209, 183290, 184192, 185667, 186031, 186403, 186514, 187030, 187343, 190446, 194951, 197172, 198183, 198237, 200187, 201238, 201769, 202088, 202370, 202382, 203726, 207168, 207507, 208267, 208991, 212503, 212525, 213482, 223470, 226107, 227287, 232970, 233894, 233940, 234845, 236981, 237720, 241485, 242131, 244615, 244620, 244624, 245553, 246792, 247779, 247807, 251942, 254875, 255095, 258723, 259914, 259966, 262937, 263127, 263146, 263239, 267506, 268151, 268235, 276946, 278369, 283787, 290607, 292262, 292299, 292365, 293734, 294791, 296802, 297370, 298140, 298342, 305806, 306567, 306734, 307309, 310559, 310642, 312263, 312802, 327995, 329859, 335910, 337662, 338453, 343304, 353405, 353413, 353893, 354529, 355161, 355494, 355623, 355691, 355774, 355950, 355954, 356051, 356179, 356242, 356497, 356706, 357194, 357264, 373665, 373717, 376825, 404495, 419551, 422019, 422612, 427949, 433690, 435115, 435506, 436128, 436294, 436315, 436376, 436654, 437268, 437576, 437642, 438235, 438558, 439054, 439080, 439472, 439674, 441572, 455623, 476376, 481154, 485066, 527590, 529010, 531947, 538763, 539110, 540596, 542057, 542098, 545024, 546352, 551044, 556749, 560492, 560528, 565064, 567446, 585918, 611633, 675438, 676105, 678361, 678431, 682537, 683559, 683671, 684947, 714507, 747645, 748163, 749913, 764697, 770430, 772118, 772585, 774869, 774876, 776310, 777338, 796911, 798872, 817377, 826692, 833591, 838845, 840866, 876796, 878192, 883355, 887083, 887166, 887344, 894624, 903599, 912879, 915091, 922486, 935850, 945271, 945286, 946310, 947397, 951044, 951139, 951591, 952854, 952928, 953157, 954346, 959683, 960200, 962644, 968523, 977304, 981412, 988359, 990304, 998700, 1000188, 1003561 }, } for attr, imgset in sorted(belongs.items()): random.shuffle(imgset) for id in forbidden[attr]: imgset.remove(id) imgset = list(filter(lambda id: min(all[id][0].shape[:2]) > 16, imgset)) if 'distorted' in attr: imgset = list( filter(lambda id: polygon_in_center(polygons[id], 512), imgset)) else: imgset = list( filter(lambda id: polygon_in_center(polygons[id], 10), imgset)) imgset = imgset[:x * y] if not os.path.isdir(settings.ATTR_SAMPLE_DIR): os.makedirs(settings.ATTR_SAMPLE_DIR) file_path = os.path.join(settings.ATTR_SAMPLE_DIR, '{}.png'.format(attr)) print(file_path) canvas = np.zeros((y * cropsize, x * cropsize, 3), dtype=np.uint8) for i, j in enumerate(imgset): cropped = all[j][0] resized = cv2.resize(cropped, (cropsize, cropsize)) canvas[(i // x) * cropsize:(i // x + 1) * cropsize, (i % x) * cropsize:(i % x + 1) * cropsize] = resized cv2.imwrite(file_path, canvas) with open( os.path.join(settings.ATTR_SAMPLE_DIR, '{}.json'.format(attr)), 'w') as f: json.dump(imgset, f)
def encode(s1, s2, mode, version, level1, level2, image=None, timeout=100, debug=False, *, different_mask=0, do_reduce=1, merge_bound_thresh=300, lamb=.1, seed=0): mode1, encode1 = qrhelper.mode_and_encode(s1) mode2, encode2 = qrhelper.mode_and_encode(s2) v1 = qrhelper.minimal_version(mode1, level1, encode1) v2 = qrhelper.minimal_version(mode2, level2, encode2) ver = max(v1, v2, int(version)) code1 = qrhelper.indicators(ver, mode1, len(s1)) + encode1 code2 = qrhelper.indicators(ver, mode2, len(s2)) + encode2 n = 17 + ver * 4 groups1, ecc_num1 = qrhelper.group_and_ecc(ver, level1) groups2, ecc_num2 = qrhelper.group_and_ecc(ver, level2) _make_exe() if mode == 'art': if image: input = io.BytesIO(image) img = Image.open(input) else: img = Image.open('qrmerge/logo.png') img = img.convert('L') img = img.resize((n, n), resample=Image.BILINEAR) img = np.array(img.getdata()).reshape((n, n)) img = np.array(img <= 127, dtype=np.int).tolist() start_time = time.time() class Result(): pass def encode_once(maskid1, maskid2): input = [] input.append( '{:d} {:s} {:s} {:d} {:d} {:d} {:d} {:d} {:f} {:d}'.format( ver, level1, level2, maskid1, maskid2, int(mode == 'art'), do_reduce, merge_bound_thresh, lamb, seed)) input.append(code1) input.append(code2) if mode == 'art': for line in img: input.append(' '.join([str(b) for b in line])) res = Result() res.maskid1 = maskid1 res.maskid2 = maskid2 isTimeout = [False] def timeout_target(p): isTimeout[0] = True p.kill() if debug: stderr = None else: stderr = open(os.devnull, 'w') p = subprocess.Popen([ os.path.join(os.path.abspath(os.path.dirname(__file__)), 'main.exe') ], cwd=os.path.dirname(__file__), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=stderr) timer = threading.Timer(timeout, timeout_target, [p]) timer.start() output, errout = p.communicate('\n'.join(input).encode()) timer.cancel() if isTimeout[0]: res.error = 1 res.msg = 'time out' return res assert 0 == p.wait(), p.returncode if stderr is not None: stderr.close() output = output.decode() output = output.split('\n', 3) retcode = int(output[0].split()[0]) if retcode != 0: res.error = 1 res.msg = 'mask id not match' return res res.error = 0 error_format_bits_1, error_format_bits_2, max_merge_bound, cpp_time_cost = output[ 0].split()[1:] res.error_format_bits = [error_format_bits_1, error_format_bits_2] res.max_merge_bound = int(max_merge_bound) res.cpp_time_cost = float(cpp_time_cost) res.err1 = [int(x) for x in output[1].split()] res.err2 = [int(x) for x in output[2].split()] res.matrixes = output[3] return res q = queue.Queue() def target(maskid1, maskid2): res = encode_once(maskid1, maskid2) q.put(res) if different_mask: tasks = [(maskid1, maskid2) for maskid1 in range(8) for maskid2 in range(8)] else: tasks = [(maskid, maskid) for maskid in range(8)] common_tools.multithreaded(target, tasks, num_thread=min(8, len(tasks), multiprocessing.cpu_count())) results = [] while not q.empty(): res = q.get() results.append(res) assert len(results) == len(tasks) results.sort(key=lambda res: (res.maskid1, res.maskid2)) max_merge_bounds = [] cpp_time_costs = [] for res in results: if not res.error: max_merge_bounds.append(res.max_merge_bound) cpp_time_costs.append(res.cpp_time_cost) if debug: if res.error: print('maskid', res.maskid1, res.maskid2, res.msg) else: print('maskid', res.maskid1, res.maskid2, '#misencoded', res.err1, res.err2) rank = list(range(len(results))) def cost(result_id): res = results[result_id] if res.error: return (1, ) def recovery_ratio(groups, ecc_num, err): x = fractions.Fraction(1, 1) for a in err[:groups[0]]: x = min( x, fractions.Fraction(ecc_num // 2 - a, groups[1] + ecc_num)) for a in err[groups[0]:]: x = min( x, fractions.Fraction(ecc_num // 2 - a, groups[3] + ecc_num)) return x return (0, -min(recovery_ratio(groups1, ecc_num1, res.err1), recovery_ratio(groups2, ecc_num2, res.err2)), max(res.error_format_bits), 0 if res.maskid1 == res.maskid2 else 1, sorted(res.err1 + res.err2, reverse=True)) rank.sort(key=cost) res = results[rank[0]] time_cost = time.time() - start_time if res.error: return { 'error': 1, 'msg': 'solver timeout', 'time_cost': time_cost, } block_length1 = [ x + ecc_num1 for x in [groups1[1]] * groups1[0] + [groups1[3]] * groups1[2] ] block_length2 = [ x + ecc_num2 for x in [groups2[1]] * groups2[0] + [groups2[3]] * groups2[2] ] matrixes = res.matrixes.split('\n') m1 = [[int(c) for c in line.split()] for line in matrixes[0:n]] m2 = [[int(c) for c in line.split()] for line in matrixes[1 * n:2 * n]] l1 = [[int(c) for c in line.split()] for line in matrixes[2 * n:3 * n]] l2 = [[int(c) for c in line.split()] for line in matrixes[3 * n:4 * n]] o1 = [[int(c) for c in line.split()] for line in matrixes[4 * n:5 * n]] o2 = [[int(c) for c in line.split()] for line in matrixes[5 * n:6 * n]] return { 'error': 0, 'version': ver, 'level': [level1, level2], 'mask_id': [res.maskid1, res.maskid2], 'ecc_num': [ecc_num1, ecc_num2], 'block_length': [block_length1, block_length2], 'timeout': timeout, 'is_art': mode == 'art', 'different_mask': different_mask, 'do_reduce': do_reduce, 'max_merge_bounds': max_merge_bounds, 'seed': seed, 'cpp_time_costs': cpp_time_costs, 'time_cost': time_cost, 'string_left': s1, 'string_right': s2, 'reference_left': m1, 'reference_right': m2, 'layer_down': l1, 'layer_up': l2, 'qrcode_left': o1, 'qrcode_right': o2, 'error_left': res.err1, 'error_right': res.err2, 'error_format_bits': res.error_format_bits, }