def __init__(self, data_dir, max_time=None): self.text_renderer = tu.RenderFont(data_dir) self.colorizer = Colorize(data_dir) #self.colorizerV2 = colorV2.Colorize(data_dir) self.min_char_height = 8 #px self.min_asp_ratio = 0.4 # self.max_text_regions = 7 self.max_time = max_time
class RendererV3(object): def __init__(self, data_dir, max_time=None): self.text_renderer = tu.RenderFont(data_dir) self.colorizer = Colorize(data_dir) #self.colorizerV2 = colorV2.Colorize(data_dir) self.min_char_height = 8 #px self.min_asp_ratio = 0.4 # self.max_text_regions = 7 self.max_time = max_time def filter_regions(self,regions,filt): """ filt : boolean list of regions to keep. """ idx = np.arange(len(filt))[filt] for k in list(regions.keys()): regions[k] = [regions[k][i] for i in idx] return regions def filter_for_placement(self,xyz,seg,regions): filt = np.zeros(len(regions['label'])).astype('bool') masks,Hs,Hinvs = [],[], [] for idx,l in enumerate(regions['label']): res = get_text_placement_mask(xyz,seg==l,regions['coeff'][idx],pad=2) if res is not None: mask,H,Hinv = res masks.append(mask) Hs.append(H) Hinvs.append(Hinv) filt[idx] = True regions = self.filter_regions(regions,filt) regions['place_mask'] = masks regions['homography'] = Hs regions['homography_inv'] = Hinvs return regions def warpHomography(self,src_mat,H,dst_size): dst_mat = cv2.warpPerspective(src_mat, H, dst_size, flags=cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR) return dst_mat def homographyBB(self, bbs, H, offset=None): """ Apply homography transform to bounding-boxes. BBS: 2 x 4 x n matrix (2 coordinates, 4 points, n bbs). Returns the transformed 2x4xn bb-array. offset : a 2-tuple (dx,dy), added to points before transfomation. """ eps = 1e-16 # check the shape of the BB array: t,f,n = bbs.shape assert (t==2) and (f==4) # append 1 for homogenous coordinates: bbs_h = np.reshape(np.r_[bbs, np.ones((1,4,n))], (3,4*n), order='F') if offset != None: bbs_h[:2,:] += np.array(offset)[:,None] # perpective: bbs_h = H.dot(bbs_h) bbs_h /= (bbs_h[2,:]+eps) bbs_h = np.reshape(bbs_h, (3,4,n), order='F') return bbs_h[:2,:,:] def bb_filter(self,bb0,bb,text): """ Ensure that bounding-boxes are not too distorted after perspective distortion. bb0 : 2x4xn martrix of BB coordinates before perspective bb : 2x4xn matrix of BB after perspective text: string of text -- for excluding symbols/punctuations. """ h0 = np.linalg.norm(bb0[:,3,:] - bb0[:,0,:], axis=0) w0 = np.linalg.norm(bb0[:,1,:] - bb0[:,0,:], axis=0) hw0 = np.c_[h0,w0] h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0) w = np.linalg.norm(bb[:,1,:] - bb[:,0,:], axis=0) hw = np.c_[h,w] # remove newlines and spaces: text = ''.join(text.split()) assert len(text)==bb.shape[-1] alnum = np.array([ch.isalnum() for ch in text]) hw0 = hw0[alnum,:] hw = hw[alnum,:] min_h0, min_h = np.min(hw0[:,0]), np.min(hw[:,0]) asp0, asp = hw0[:,0]/hw0[:,1], hw[:,0]/hw[:,1] asp0, asp = np.median(asp0), np.median(asp) asp_ratio = asp/asp0 is_good = ( min_h > self.min_char_height and asp_ratio > self.min_asp_ratio and asp_ratio < 1.0/self.min_asp_ratio) return is_good def get_min_h(selg, bb, text): # find min-height: h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0) # remove newlines and spaces: text = ''.join(text.split()) assert len(text)==bb.shape[-1] alnum = np.array([ch.isalnum() for ch in text]) h = h[alnum] return np.min(h) def feather(self, text_mask, min_h): # determine the gaussian-blur std: if min_h <= 15 : bsz = 0.25 ksz=1 elif 15 < min_h < 30: bsz = max(0.30, 0.5 + 0.1*np.random.randn()) ksz = 3 else: bsz = max(0.5, 1.5 + 0.5*np.random.randn()) ksz = 5 return cv2.GaussianBlur(text_mask,(ksz,ksz),bsz) def place_text(self,rgb,collision_mask,H,Hinv): font = self.text_renderer.font_state.sample() font = self.text_renderer.font_state.init_font(font) render_res = self.text_renderer.render_sample(font,collision_mask) if render_res is None: # rendering not successful return #None else: text_mask,loc,bb,text = render_res #text=text.decode('utf-8') # update the collision mask with text: collision_mask += (255 * (text_mask>0)).astype('uint8') # warp the object mask back onto the image: text_mask_orig = text_mask.copy() bb_orig = bb.copy() text_mask = self.warpHomography(text_mask,H,rgb.shape[:2][::-1]) bb = self.homographyBB(bb,Hinv) if not self.bb_filter(bb_orig,bb,text): colorize(Color.RED, 'bad charBB statistics') #warn("bad charBB statistics") return #None # get the minimum height of the character-BB: min_h = self.get_min_h(bb,text) #feathering: text_mask = self.feather(text_mask, min_h) im_final = self.colorizer.color(rgb,[text_mask],np.array([min_h])) print(colorize(Color.GREEN, 'text in synthgen.py/place_text to return '+text)) return im_final, text, bb, collision_mask def get_num_text_regions(self, nregions): #return nregions nmax = min(self.max_text_regions, nregions) if np.random.rand() < 0.10: rnd = np.random.rand() else: rnd = np.random.beta(5.0,1.0) return int(np.ceil(nmax * rnd)) def char2wordBB(self, charBB, text): """ Converts character bounding-boxes to word-level bounding-boxes. charBB : 2x4xn matrix of BB coordinates text : the text string output : 2x4xm matrix of BB coordinates, where, m == number of words. """ wrds = text.split() bb_idx = np.r_[0, np.cumsum([len(w) for w in wrds])] wordBB = np.zeros((2,4,len(wrds)), 'float32') for i in range(len(wrds)): cc = charBB[:,:,bb_idx[i]:bb_idx[i+1]] # fit a rotated-rectangle: # change shape from 2x4xn_i -> (4*n_i)x2 cc = np.squeeze(np.concatenate(np.dsplit(cc,cc.shape[-1]),axis=1)).T.astype('float32') rect = cv2.minAreaRect(cc.copy()) box = np.array(cv2.boxPoints(rect)) # find the permutation of box-coordinates which # are "aligned" appropriately with the character-bb. # (exhaustive search over all possible assignments): cc_tblr = np.c_[cc[0,:], cc[-3,:], cc[-2,:], cc[3,:]].T perm4 = np.array(list(itertools.permutations(np.arange(4)))) dists = [] for pidx in range(perm4.shape[0]): d = np.sum(np.linalg.norm(box[perm4[pidx],:]-cc_tblr,axis=1)) dists.append(d) wordBB[:,:,i] = box[perm4[np.argmin(dists)],:].T return wordBB def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False): """ rgb : HxWx3 image rgb values (uint8) depth : HxW depth values (float) seg : HxW segmentation region masks area : number of pixels in each region label : region labels == unique(seg) / {0} i.e., indices of pixels in SEG which constitute a region mask ninstance : no of times image should be used to place text. @return: res : a list of dictionaries, one for each of the image instances. Each dictionary has the following structure: 'img' : rgb-image with text on it. 'bb' : 2x4xn matrix of bounding-boxes for each character in the image. 'txt' : a list of strings. The correspondence b/w bb and txt is that i-th non-space white-character in txt is at bb[:,:,i]. If there's an error in pre-text placement, for e.g. if there's no suitable region for text placement, an empty list is returned. """ try: # depth -> xyz xyz = su.DepthCamera.depth2xyz(depth) # find text-regions: regions = TextRegions.get_regions(xyz,seg,area,label) # find the placement mask and homographies: regions = self.filter_for_placement(xyz,seg,regions) # finally place some text: nregions = len(regions['place_mask']) if nregions < 1: # no good region to place text on return [] except: # failure in pre-text placement #import traceback traceback.print_exc() return [] res = [] for i in range(ninstance): place_masks = copy.deepcopy(regions['place_mask']) print(colorize(Color.CYAN, " ** instance # : %d"%i)) idict = {'img':[], 'charBB':None, 'wordBB':None, 'txt':None} m = self.get_num_text_regions(nregions)#np.arange(nregions)#min(nregions, 5*ninstance*self.max_text_regions)) reg_idx = np.arange(min(2*m,nregions)) #np.random.shuffle(reg_idx) reg_idx = reg_idx[:m] placed = False img = rgb.copy() itext = [] ibb = [] # process regions: num_txt_regions = len(reg_idx) NUM_REP = 5 # re-use each region three times: reg_range = np.arange(NUM_REP * num_txt_regions) % num_txt_regions for idx in reg_range: ireg = reg_idx[idx] try: if self.max_time is None: txt_render_res = self.place_text(img,place_masks[ireg], regions['homography'][ireg], regions['homography_inv'][ireg]) else: with time_limit(self.max_time): txt_render_res = self.place_text(img,place_masks[ireg], regions['homography'][ireg], regions['homography_inv'][ireg]) except TimeoutException as msg: print(msg) continue except: traceback.print_exc() # some error in placing text on the region continue if txt_render_res is not None: placed = True img,text,bb,collision_mask = txt_render_res # update the region collision mask: place_masks[ireg] = collision_mask # store the result: itext.append(text) ibb.append(bb) print(colorize(Color.GREEN, 'text in synthgen.py/render_text append into itext '+text)) if placed: # at least 1 word was placed in this instance: idict['img'] = img idict['txt'] = itext idict['charBB'] = np.concatenate(ibb, axis=2) idict['wordBB'] = self.char2wordBB(idict['charBB'].copy(), ' '.join(itext)) print(colorize(Color.GREEN, itext)) res.append(idict.copy()) if viz: viz_textbb(1,img, [idict['wordBB']], alpha=1.0) viz_masks(2,img,seg,depth,regions['label']) # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label']) if i < ninstance-1: input(colorize(Color.BLUE,'continue?',True)) return res
def RenderWordImageA(self, Height, Width, LightMap, mask, WordColor=(255, 255, 255, 200), SaveID=None): """ While True: img0 = render single word imgs.append(img0) if ar < target ar: break if random.random() < 0.2 break pad output :param Height: :param Width: :param WordColor: :return: Img[HxWx4], Texts(List[str]), CBOX[nx4x2], BBOX[nx4x2] (N_box, corner, x/y) """ #LightMap_float = LightMap.astype(np.float) UL_x, UL_y, BR_x, BR_y = mask bg_arr = LightMap[UL_y:BR_y, UL_x:BR_x, :] #cv2.bitwise_and(LightMap, mask)# #bg_img = np.zeros((bg_arr.shape[0],bg_arr.shape[1],4)) #bg_img[:,:,:3] = LightMap #bg_img[:,:,3] = (bg_arr[:,:,0]>0).astype(np.float32) FontSize = self.getFontSize(Height, Width) if FontSize > min(Height, Width): FontSize //= 2 self._RandomLoadFont(FontSize) TextCrops = [] CurrentHeight = 0 MaxWidth = 0 while True: CharImg, texts, text_joint, cbox, bbox = self.RenderStraightTextLine( Width, SpacingFactor=np.random.uniform(0, 0.4)) if CharImg is None: FontSize = self.getFontSize(Height, Width) self._RandomLoadFont(FontSize) continue TextCrops.append([CharImg, texts, text_joint, cbox, bbox]) try: CurrentHeight += CharImg.shape[0] except: TextCrops = TextCrops[:-1] continue MaxWidth = max(MaxWidth, CharImg.shape[1]) if CurrentHeight > Height * 1.2 and len(TextCrops) > 1: CurrentHeight -= TextCrops[-1][0].shape[0] TextCrops = TextCrops[:-1] break if CurrentHeight > Height: break if len(TextCrops) > 5: break Color = np.array( [[[WordColor[0], WordColor[1], WordColor[2], WordColor[3]]]], dtype=np.uint8) Img = np.zeros(shape=[ max(CurrentHeight, int(random.uniform(1.0, 1.05) * Height)), max(MaxWidth, Width), 4 ], dtype=np.uint8) Texts = [] BBOX = [] CBOX = [] currentheight = 0 try: H, W, _ = Img.shape for CharImg, texts, text_joint, cbox, bbox in TextCrops: h, w, _ = CharImg.shape left_pad = random.randint(0, W - w) if currentheight + h > H: break Img[currentheight:currentheight + h, left_pad:w + left_pad] = CharImg Texts += texts bbox[:, ::2] += left_pad cbox[:, ::2] += left_pad BBOX.append(bbox) CBOX.append(cbox) BBOX[-1][:, 1::2] += currentheight CBOX[-1][:, 1::2] += currentheight if random.random() < 0.5: currentheight += int(h * random.uniform(1.0, 1.5)) else: currentheight += h BBOX = np.concatenate(BBOX, axis=0) CBOX = np.concatenate(CBOX, axis=0) except: pdb.set_trace() if random.random() < 0.5: Color = Color * np.random.uniform(0.85, 1.0, size=Img.shape) Img = (Img / 255 * Color).astype(np.uint8) #text_mask = Img[:,:,3] #print(text_mask) #plt.imshow(text_mask,cmap='gray') #plt.show() CBOX = np.reshape(np.stack([ CBOX[:, 0], CBOX[:, 1], CBOX[:, 2], CBOX[:, 1], CBOX[:, 2], CBOX[:, 3], CBOX[:, 0], CBOX[:, 3] ], axis=-1), newshape=[-1, 4, 2]) BBOX = np.reshape(np.stack([ BBOX[:, 0], BBOX[:, 1], BBOX[:, 2], BBOX[:, 1], BBOX[:, 2], BBOX[:, 3], BBOX[:, 0], BBOX[:, 3] ], axis=-1), newshape=[-1, 4, 2]) text_mask = Img[:, :, 3] print(bg_arr.shape, text_mask.shape) bg_img_ = cv2.resize(bg_arr[:, :, :3], (text_mask.shape[1], text_mask.shape[0])) #plt.imshow(cv2.cvtColor(bg_arr,cv2.COLOR_BGR2RGB)) #plt.show() Colorizer = Colorize() Img = Colorizer.process(text_mask, bg_img_, LightMap, FontSize) #Img = Colorizer.color(bg_img,text_mask,np.array([FontSize])) if SaveID is not None: path = osp.join(self.ContentPath, f'word-{SaveID}.png') cv2.imwrite(path, Img) H, W, _ = Img.shape CBOX = CBOX.astype(np.float) BBOX = BBOX.astype(np.float) CBOX[:, :, 0] /= W CBOX[:, :, 1] /= H BBOX[:, :, 0] /= W BBOX[:, :, 1] /= H return path, Texts, CBOX, BBOX, W, H return Img, Texts, CBOX, BBOX
class RendererV3(object): def __init__(self, data_dir, max_time=None): self.text_renderer = tu.RenderFont(data_dir) self.colorizer = Colorize(data_dir) #self.colorizerV2 = colorV2.Colorize(data_dir) self.min_char_height = 8 #px self.min_asp_ratio = 0.4 # self.max_text_regions = 7 self.max_time = max_time def filter_regions(self, regions, filt): """ filt : boolean list of regions to keep. """ idx = np.arange(len(filt))[filt] for k in regions.keys(): regions[k] = [regions[k][i] for i in idx] return regions def filter_for_placement(self, xyz, seg, regions): filt = np.zeros(len(regions['label'])).astype('bool') masks, Hs, Hinvs = [], [], [] for idx, l in enumerate(regions['label']): res = get_text_placement_mask(xyz, seg == l, regions['coeff'][idx], pad=2) if res is not None: mask, H, Hinv = res masks.append(mask) Hs.append(H) Hinvs.append(Hinv) filt[idx] = True regions = self.filter_regions(regions, filt) regions['place_mask'] = masks regions['homography'] = Hs regions['homography_inv'] = Hinvs return regions def warpHomography(self, src_mat, H, dst_size): dst_mat = cv2.warpPerspective(src_mat, H, dst_size, flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR) return dst_mat def homographyBB(self, bbs, H, offset=None): """ Apply homography transform to bounding-boxes. BBS: 2 x 4 x n matrix (2 coordinates, 4 points, n bbs). Returns the transformed 2x4xn bb-array. offset : a 2-tuple (dx,dy), added to points before transfomation. """ eps = 1e-16 # check the shape of the BB array: t, f, n = bbs.shape assert (t == 2) and (f == 4) # append 1 for homogenous coordinates: bbs_h = np.reshape(np.r_[bbs, np.ones((1, 4, n))], (3, 4 * n), order='F') if offset != None: bbs_h[:2, :] += np.array(offset)[:, None] # perpective: bbs_h = H.dot(bbs_h) bbs_h /= (bbs_h[2, :] + eps) bbs_h = np.reshape(bbs_h, (3, 4, n), order='F') return bbs_h[:2, :, :] def bb_filter(self, bb0, bb, text): """ Ensure that bounding-boxes are not too distorted after perspective distortion. bb0 : 2x4xn martrix of BB coordinates before perspective bb : 2x4xn matrix of BB after perspective text: string of text -- for excluding symbols/punctuations. """ h0 = np.linalg.norm(bb0[:, 3, :] - bb0[:, 0, :], axis=0) w0 = np.linalg.norm(bb0[:, 1, :] - bb0[:, 0, :], axis=0) hw0 = np.c_[h0, w0] h = np.linalg.norm(bb[:, 3, :] - bb[:, 0, :], axis=0) w = np.linalg.norm(bb[:, 1, :] - bb[:, 0, :], axis=0) hw = np.c_[h, w] # remove newlines and spaces: text = ''.join(text.split()) assert len(text) == bb.shape[-1] alnum = np.array([ch.isalnum() for ch in text]) hw0 = hw0[alnum, :] hw = hw[alnum, :] min_h0, min_h = np.min(hw0[:, 0]), np.min(hw[:, 0]) asp0, asp = hw0[:, 0] / hw0[:, 1], hw[:, 0] / hw[:, 1] asp0, asp = np.median(asp0), np.median(asp) asp_ratio = asp / asp0 is_good = (min_h > self.min_char_height and asp_ratio > self.min_asp_ratio and asp_ratio < 1.0 / self.min_asp_ratio) return is_good def get_min_h(self, bb, text): # find min-height: h = np.linalg.norm(bb[:, 3, :] - bb[:, 0, :], axis=0) # remove newlines and spaces: text = ''.join(text.split()) assert len(text) == bb.shape[-1] alnum = np.array([ch.isalnum() for ch in text]) h = h[alnum] return np.min(h) def feather(self, text_mask, min_h): # determine the gaussian-blur std: if min_h <= 15: bsz = 0.25 ksz = 1 elif 15 < min_h < 30: bsz = max(0.30, 0.5 + 0.1 * np.random.randn()) ksz = 3 else: bsz = max(0.5, 1.5 + 0.5 * np.random.randn()) ksz = 5 return cv2.GaussianBlur(text_mask, (ksz, ksz), bsz) def place_text(self, rgb, collision_mask, H, Hinv): font = self.text_renderer.font_state.sample() font = self.text_renderer.font_state.init_font(font) render_res = self.text_renderer.render_sample(font, collision_mask) if render_res is None: # rendering not successful return #None else: text_mask, loc, bb, text = render_res # update the collision mask with text: collision_mask += (255 * (text_mask > 0)).astype('uint8') # warp the object mask back onto the image: text_mask_orig = text_mask.copy() bb_orig = bb.copy() text_mask = self.warpHomography(text_mask, H, rgb.shape[:2][::-1]) bb = self.homographyBB(bb, Hinv) if not self.bb_filter(bb_orig, bb, text): #warn("bad charBB statistics") return #None # get the minimum height of the character-BB: min_h = self.get_min_h(bb, text) #feathering: text_mask = self.feather(text_mask, min_h) im_final = self.colorizer.color(rgb, [text_mask], np.array([min_h])) return im_final, text, bb, collision_mask def get_num_text_regions(self, nregions): #return nregions nmax = min(self.max_text_regions, nregions) if np.random.rand() < 0.10: rnd = np.random.rand() else: rnd = np.random.beta(5.0, 1.0) return int(np.ceil(nmax * rnd)) def char2wordBB(self, charBB, text): """ Converts character bounding-boxes to word-level bounding-boxes. charBB : 2x4xn matrix of BB coordinates text : the text string output : 2x4xm matrix of BB coordinates, where, m == number of words. """ wrds = text.split() # save character and word files as text files # print(len(wrds)) with open('char_text.txt', 'a') as f: with open('word_text.txt', 'a') as f1: for i in range(len(wrds)): # f1.write(wrds[i].encode('utf-8')) # f.write('\n') for j in range(len(wrds[i])): f1.write(wrds[i]) f1.write('\n') f.write(wrds[i][j]) f.write('\n') bb_idx = np.r_[0, np.cumsum([len(w) for w in wrds])] # bb_idx = np.r_[0, np.render_textmsum([len(w) for w in wrds])] wordBB = np.zeros((2, 4, len(wrds)), 'float32') for i in range(len(wrds)): cc = charBB[:, :, bb_idx[i]:bb_idx[i + 1]] # fit a rotated-rectangle: # change shape from 2x4xn_i -> (4*n_i)x2 cc = np.squeeze(np.concatenate(np.dsplit(cc, cc.shape[-1]), axis=1)).T.astype('float32') rect = cv2.minAreaRect(cc.copy()) box = np.array(cv2.boxPoints(rect)) # find the permutation of box-coordinates which # are "aligned" appropriately with the character-bb. # (exhaustive search over all possible assignments): cc_tblr = np.c_[cc[0, :], cc[-3, :], cc[-2, :], cc[3, :]].T perm4 = np.array(list(itertools.permutations(np.arange(4)))) dists = [] for pidx in range(perm4.shape[0]): d = np.sum( np.linalg.norm(box[perm4[pidx], :] - cc_tblr, axis=1)) dists.append(d) wordBB[:, :, i] = box[perm4[np.argmin(dists)], :].T return wordBB def render_text(self, rgb, depth, seg, area, label, ninstance=1, viz=False): """ rgb : HxWx3 image rgb values (uint8) depth : HxW depth values (float) seg : HxW segmentation region masks area : number of pixels in each region label : region labels == unique(seg) / {0} i.e., indices of pixels in SEG which constitute a region mask ninstance : no of times image should be used to place text. @return: res : a list of dictionaries, one for each of the image instances. Each dictionary has the following structure: 'img' : rgb-image with text on it. 'bb' : 2x4xn matrix of bounding-boxes for each character in the image. 'txt' : a list of strings. The correspondence b/w bb and txt is that i-th non-space white-character in txt is at bb[:,:,i]. If there's an error in pre-text placement, for e.g. if there's no suitable region for text placement, an empty list is returned. """ try: # depth -> xyz xyz = su.DepthCamera.depth2xyz(depth) # find text-regions: regions = TextRegions.get_regions(xyz, seg, area, label) # find the placement mask and homographies: regions = self.filter_for_placement(xyz, seg, regions) # finally place some text: nregions = len(regions['place_mask']) if nregions < 1: # no good region to place text on return [] except: # failure in pre-text placement #import traceback traceback.print_exc() return [] res = [] for i in range(ninstance): place_masks = copy.deepcopy(regions['place_mask']) print(colorize(Color.CYAN, " ** instance # : %d" % i)) idict = {'img': [], 'charBB': None, 'wordBB': None, 'txt': None} m = self.get_num_text_regions( nregions ) #np.arange(nregions)#min(nregions, 5*ninstance*self.max_text_regions)) reg_idx = np.arange(min(2 * m, nregions)) np.random.shuffle(reg_idx) reg_idx = reg_idx[:m] placed = False img = rgb.copy() itext = [] ibb = [] # process regions: num_txt_regions = len(reg_idx) NUM_REP = 5 # re-use each region three times: reg_range = np.arange(NUM_REP * num_txt_regions) % num_txt_regions for idx in reg_range: ireg = reg_idx[idx] try: if self.max_time is None: txt_render_res = self.place_text( img, place_masks[ireg], regions['homography'][ireg], regions['homography_inv'][ireg]) else: with time_limit(self.max_time): txt_render_res = self.place_text( img, place_masks[ireg], regions['homography'][ireg], regions['homography_inv'][ireg]) except TimeoutException as msg: print(msg) continue except: traceback.print_exc() # some error in placing text on the region placed = False continue if txt_render_res is not None: placed = True img, text, bb, collision_mask = txt_render_res # update the region collision mask: place_masks[ireg] = collision_mask # store the result: itext.append(text) ibb.append(bb) if placed: try: # at least 1 word was placed in this instance: idict['img'] = img idict['txt'] = itext idict['charBB'] = np.concatenate(ibb, axis=2) idict['wordBB'] = self.char2wordBB(idict['charBB'].copy(), ' '.join(itext)) res.append(idict.copy()) if viz: viz_textbb(1, img, [idict['wordBB']], alpha=1.0) viz_masks(2, img, seg, depth, regions['label']) # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label']) if i < ninstance - 1: raw_input(colorize(Color.BLUE, 'continue?', True)) except: print("THIS IS THE REASON WHY!!!!!") return res
if __name__ == '__main__': # testing: Renderer = WordRenderer(is_debug=True, Language="Vietnamese") for i in (range(40)): print(f"{i}--------") # Img[HxWx4], Texts(List[str]), CBOX[nx4x2], BBOX[nx4x2] (N_box, corner, x/y) bg_img = cv2.imread("img_2.jpg") img, texts, cbox, bbox = Renderer.RenderWordImageA( 660, 1000, (30, 30, 30, 240)) text_mask = img[:, :, 3] print(text_mask.shape) print(bg_img.shape) bg_img = cv2.resize(bg_img, (text_mask.shape[1], text_mask.shape[0])) Colorizer = Colorize() img = Colorizer.color(bg_img, np.array([text_mask]), np.array([30])) print(texts) # print(img.shape) # visualize boxes img_b = img.copy() for box in bbox: img_b[box[0, 1]:box[1, 1] + 2, box[0, 0]:box[1, 0] + 2] = [255, 0, 0, 255] img_b[box[2, 1]:box[3, 1] + 2, box[3, 0]:box[2, 0] + 2] = [0, 255, 0, 255] img_b[box[0, 1]:box[3, 1] + 2, box[0, 0]:box[3, 0] + 2] = [255, 0, 255, 255] img_b[box[1, 1]:box[2, 1] + 2, box[1, 0]:box[2, 0] + 2] = [0, 255, 255, 255]