def progress(frames): detected_lines = [] # last 10 frames for i in range(0, len(frames)): # detect lines left_right_lines = get_lines(img=frames[i], rho=2, theta=np.pi / 180, threshold=1, min_line_len=15, max_line_gap=5) detected_lines.append(left_right_lines) # prepare empty mask on which lines are drawn line_img = np.zeros((frames[0].shape[0], frames[0].shape[1], 3), dtype=np.uint8) # last 10 frames line averages detected_lines = last_lines_averages(detected_lines) # draw lines for lane in detected_lines: draw_lines(line_img, lane) # region of interest masked_img = region_of_interest(line_img) # img_color = frames[-1] if is_videoclip else frames[0] result = weighted_img(masked_img, frames[-1]) return result
def _vis_train(self, epoch, i, len_loader, img, heatmap, adj_mtx, junctions_gt, adj_mtx_gt): junctions_gt = np.int32(junctions_gt) lines_gt, scores_gt = graph2line(junctions_gt, adj_mtx_gt) vis_line_gt = vutils.make_grid(draw_lines(img, lines_gt, scores_gt)) lines_pred, score_pred = graph2line(junctions_gt, adj_mtx, threshold=self.vis_line_th) vis_line_pred = vutils.make_grid( draw_lines(img, lines_pred, score_pred)) junc_score = [] line_score = [] for m, juncs in zip(heatmap, junctions_gt): juncs = juncs[juncs.sum(axis=1) > 0] junc_score += m[juncs[:, 1], juncs[:, 0]].tolist() for s in score_pred: line_score += s.tolist() self.writer.add_image(self.exp_name + "/" + "train/lines_gt", vis_line_gt, epoch * len_loader + i) self.writer.add_image(self.exp_name + "/" + "train/lines_pred", vis_line_pred, epoch * len_loader + i) self.writer.add_scalar(self.exp_name + "/" + "train/mean_junc_score", np.mean(junc_score), epoch * len_loader + i) self.writer.add_scalar(self.exp_name + "/" + "train/mean_line_score", np.mean(line_score), epoch * len_loader + i)
def _vis_eval(self, epoch, i, len_loader, img, heatmap, adj_mtx, junctions_pred, junctions_gt, adj_mtx_gt): junctions_gt = np.int32(junctions_gt) lines_gt, scores_gt = graph2line(junctions_gt, adj_mtx_gt, threshold=self.vis_junc_th) vis_line_gt = vutils.make_grid(draw_lines(img, lines_gt, scores_gt)) img_with_junc = draw_jucntions(img, junctions_pred) img_with_junc = torch.stack(img_with_junc, dim=0).numpy()[:, ::-1, :, :] lines_pred, score_pred = graph2line(junctions_pred, adj_mtx) vis_line_pred = vutils.make_grid( draw_lines(img_with_junc, lines_pred, score_pred)) junc_score = [] line_score = [] for m, juncs in zip(heatmap, junctions_gt): juncs = juncs[juncs.sum(axis=1) > 0] junc_score += m[juncs[:, 1], juncs[:, 0]].tolist() for s in score_pred: line_score += s.tolist() junc_pooling = vutils.make_grid(draw_jucntions(heatmap, junctions_pred)) self.writer.add_image(self.exp_name + "/" + "eval/junction_pooling", junc_pooling, epoch * len_loader + i) self.writer.add_image(self.exp_name + "/" + "eval/lines_gt", vis_line_gt, epoch * len_loader + i) self.writer.add_image(self.exp_name + "/" + "eval/lines_pred", vis_line_pred, epoch * len_loader + i) self.writer.add_scalar(self.exp_name + "/" + "eval/mean_junc_score", np.mean(junc_score), epoch * len_loader + i) self.writer.add_scalar(self.exp_name + "/" + "eval/mean_line_score", np.mean(line_score), epoch * len_loader + i)
def draw(self, app, cr, mouse_pos): if self._input is None: return display = self._input.component.display node_pos = display.node_pos(input=True, idx=self._input.index) end_pos = app.snap_position(mouse_pos) positions = [node_pos] + self._wire_positions + [end_pos] color = (0, 0, 0) utils.draw_lines(cr, positions, color) for pos in self._wire_positions: utils.draw_circle(cr, pos, 2, color, color)
def land_detection(image): # Load configuration loader = importlib.machinery.SourceFileLoader('cf', './config.py') cf = loader.load_module() # color selection color_mask = cv2.inRange(image, np.array(cf.lower_yellow_white), np.array([255, 255, 255])) color_select = cv2.bitwise_and(image, image, mask=color_mask) grey_image = cv2.cvtColor(color_select, cv2.COLOR_BGR2GRAY) # Gaussian smoothing image_blurred = cv2.GaussianBlur( grey_image, (cf.guassin_blur_kernel_size, cf.guassin_blur_kernel_size), 0) # Define our parameters for Canny and apply edges = cv2.Canny(image_blurred, cf.canny_low_threshold, cf.canny_high_threshold) # region selection vertices = np.array([[x * image.shape[1], y * image.shape[0]] for [x, y] in cf.vertices_ratio], dtype=np.int32) masked_image = utils.region_of_interest(edges, np.expand_dims(vertices, axis=0)) # draw lines on an image given endpoints # Hough transform lines = cv2.HoughLinesP(masked_image, cf.hough_rho, np.pi / cf.hough_theta_scale, cf.hough_threshold, np.array([]), minLineLength=cf.hough_min_line_length, maxLineGap=cf.hough_max_line_gap) line_img = np.zeros((edges.shape[0], edges.shape[1], 3), dtype=np.uint8) utils.draw_lines(cf, line_img, lines) # Draw the lines on the edge image # initial_img * alpha + img * beta + γ lines_edges = cv2.addWeighted(src1=image, alpha=0.8, src2=line_img, beta=1, gamma=0) return lines_edges
def test(self, path_to_image): # main loop torch.set_grad_enabled(False) print(f"test for image: {path_to_image}", flush=True) if self.is_cuda: model = self.model.cuda().eval() else: model = self.model.eval() img = cv2.imread(path_to_image) img = cv2.resize(img, (self.img_size, self.img_size)) img = torch.from_numpy(img[:, :, ::-1]).float().permute(2, 0, 1).unsqueeze(0) if self.is_cuda: img = img.cuda() # measure elapsed time junc_pred, heatmap_pred, adj_mtx_pred = model(img) # visualize eval img = img.cpu().numpy() junctions_pred = junc_pred.cpu().numpy() adj_mtx = adj_mtx_pred.cpu().numpy() img_with_junc = draw_jucntions(img, junctions_pred) img_with_junc = img_with_junc[0].numpy()[:, ::-1, :, :] lines_pred, score_pred = graph2line(junctions_pred, adj_mtx) vis_line_pred = draw_lines(img_with_junc, lines_pred, score_pred)[0] cv2.imshow("result", vis_line_pred) return self
def draw_input_wires(self, app, cr): inputs = self._component.inputs for input_idx, input in enumerate(inputs): output = input.connected_output if output is None: continue component = output.component output_idx = output.index input_pos = self.node_pos(True, input_idx) output_pos = component.display.node_pos(False, output_idx) positions = [input_pos] + input.wire_positions + [output_pos] color = _wire_color(input.new_value) utils.draw_lines(cr, positions, color) for pos in input.wire_positions: utils.draw_circle(cr, pos, 2, color, color)
def golden_test(img_name): img = cv2.imread(IMG_PATH + img_name) lands = lands_from_img(img) all_dists = all_distances(lands, allow_repeats=True) rs = ratios(all_dists) angs = angles(lands) for i, j in enumerate(rs): if abs(j[3] - PHI) < 0.005: ang = angs[tuple(sorted((j[0], j[1], j[2])))] if not is_number(ang) or ang < 90: continue p1 = tuple(lands[rs[i][0]]) p2 = tuple(lands[rs[i][1]]) p3 = tuple(lands[rs[i][2]]) draw_lines(img, p1, p2, p3) cv2.imshow('golden-test', img) cv2.imwrite('golden-test-angles.jpg', img) cvwait()
def get_cells(self, ori_img, table_coords) -> List[np.ndarray]: # in tensorflow cells = [] for coord in table_coords: # for each boarded table xmin, ymin, xmax, ymax = [int(k) for k in coord ] # used for cropping & shifting table_img = ori_img[ymin:ymax, xmin:xmax] # cropped img with Timer("lines extraction(in cells extraction)"): row_boxes, col_boxes = table_line(self.model, table_img[..., ::-1], size=self.shape, hprob=self.hprob, vprob=self.vprob) tmp = np.zeros(ori_img.shape[:2], dtype=np.uint8) tmp = draw_lines(tmp, row_boxes + col_boxes, color=255, lineW=2) labels = measure.label(tmp < 255, connectivity=2) # 解八连通区域 regions = measure.regionprops(labels) cell_boxes = minAreaRectbox(regions, flag=False, W=tmp.shape[1], H=tmp.shape[0], filtersmall=True, adjustBox=True) cell_boxes = np.array(cell_boxes) if len(cell_boxes.shape) == 1: # TODO: Add Prompt RemoteLogger.info("在此表中未构建出cell!") continue # shifting to fit original image cell_boxes[:, [ 0, 2, 4, 6 ]] += xmin # cell_boxes: [N, 8] N: number of boxes of each table cell_boxes[:, [1, 3, 5, 7]] += ymin # sort boxes to avoid displacement # cell_boxes = np.array(utils.sorted_boxes(cell_boxes.reshape(-1, 4, 2))).reshape(-1, 8) cells.append(cell_boxes) return cells
def table_ceil(self): ###表格单元格 n = len(self.adBoxes) self.tableCeilBoxes = [] self.childImgs = [] for i in range(n): xmin, ymin, xmax, ymax = [int(x) for x in self.adBoxes[i]] childImg = self.img[ymin:ymax, xmin:xmax] rowboxes, colboxes = table_line(childImg[..., ::-1], size=self.tableLineSize, hprob=0.5, vprob=0.5) tmp = np.zeros(self.img.shape[:2], dtype='uint8') tmp = draw_lines(tmp, rowboxes + colboxes, color=255, lineW=2) labels = measure.label(tmp < 255, connectivity=2) #8连通区域标记 regions = measure.regionprops(labels) ceilboxes = minAreaRectbox(regions, False, tmp.shape[1], tmp.shape[0], True, True) ceilboxes = np.array(ceilboxes) ceilboxes[:, [0, 2, 4, 6]] += xmin ceilboxes[:, [1, 3, 5, 7]] += ymin self.tableCeilBoxes.extend(ceilboxes) self.childImgs.append(childImg)
def create_calendar(year, font_file, country, lang, draw_line, white_text): config_dict = get_dict_from_yml("config.yml") cell_dim = 300 date_util = DateUtil(country=country) TITLE_FONT_SIZE = 120 title_font = ImageFont.truetype(font_file, TITLE_FONT_SIZE) CELL_FONT_SIZE = 80 cell_font = ImageFont.truetype(font_file, CELL_FONT_SIZE) if white_text: text_fill = (250, 250, 250) else: text_fill = (0, 0, 0) for m in range(1, 13): month_name = config_dict['language'][lang]['months'][m] canvas_np = draw_blank_canvas(cell_dim=cell_dim) canvas_img = Image.fromarray(canvas_np.astype('uint8'), 'RGB') if draw_line: canvas_img = draw_lines(canvas_img) canvas_img = write_month_title(title=month_name, canvas_img=canvas_img, img_font=title_font, fill=text_fill) draw = ImageDraw.Draw(canvas_img) n_days = config_dict['days'][m] if m == 2 and date_util.is_leap_year(year): n_days += 1 week = 1 weekdays = [6, 0, 1, 2, 3, 4, 5] for d in range(1, n_days + 1): date = datetime.datetime(year, m, d) weekday = date.weekday() fill = text_fill if weekday == 6 or date == date_util.is_holiday( date=(year, m, d)): # If Sunday # TODO: Write in red fill = (255, 0, 0) date_text = f"{d}" text_width, text_height = cell_font.getsize(date_text) cell_corner_x = weekdays.index(weekday) * cell_dim cell_corner_y = week * cell_dim x = cell_corner_x + cell_dim // 2 - text_width // 2 y = cell_corner_y + cell_dim // 2 - text_height // 2 draw.text(xy=(x, y), text=date_text, font=cell_font, fill=fill) if weekday == 5: # change to new row when saturday is hit week += 1 calendar_done_np = np.array(canvas_img) calendar_done_grayscale = np.array(canvas_img.convert( 'L')) # convert to grayscale for finding alpha map transparency_mask = make_transparency_mask( calendar_done_grayscale[..., np.newaxis].astype('uint8')) calendar_png_np = np.concatenate([calendar_done_np, transparency_mask], axis=-1) calendar_png_img = Image.fromarray(calendar_png_np, mode="RGBA") if white_text: calendar_png_img.save( f"saved_calendars/{m}_{month_name}_{year}_{country}_{lang}_white.png" ) else: calendar_png_img.save( f"saved_calendars/{m}_{month_name}_{year}_{country}_{lang}_black.png" ) return
def process_image(path_to_image, empty_output, output_dir): output_path = os.path.dirname(path_to_image) last_folder_name = os.path.basename(output_path) image_name = os.path.basename(path_to_image) image_sans_ext = os.path.splitext(image_name)[0] # check if file exists here and exist if not try: f = open(path_to_image) f.close() except FileNotFoundError: logging.critical('Given image does not exist') sys.exit(0) logging.info(f"Processing {image_name}") founds = glob.glob(f'{output_dir}/{image_sans_ext}-*.xml') if len(founds) > 0: logging.info(f"FILE EXISTS: {founds}") return # standardize size of the images maintaining aspect ratio if empty_output: files = glob.glob('{}/*'.format(output_dir)) for f in files: os.remove(f) image = cv2.imread(path_to_image) #reading the image image_height = image.shape[0] image_width = image.shape[1] if image_width != 2048: image = imutils.resize(image, width=2048) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # converting to grayscale image # applying thresholding technique on the grayscale image # all pixels value above 0 will be set to 255 but because we are using THRESH_OTSU # we have avoid have to set threshold (i.e. 0 = just a placeholder) since otsu's method does it automatically (thresh, im_bw) = cv2.threshold( gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # converting to binary image # invert image data using unary tilde operator # im_bw = ~im_bw # Noise removal step - Perform opening on the thresholded image (erosion followed by dilation) kernel = np.ones((2, 2), np.uint8) # kernel noise size (2,2) im_bw = cv2.morphologyEx( im_bw, cv2.MORPH_OPEN, kernel) # cleans up random lines that appear on the page if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f'{image_sans_ext}-im-negative.png'), im_bw) if logging.getLogger().level == logging.DEBUG: cv2.imwrite(os.path.join(output_dir, f'{image_sans_ext}-im-bw.png'), ~im_bw) # extract and draw any lines from the image lines_mask = draw_lines(image, gray) if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f'{image_sans_ext}-lines-mask.png'), lines_mask) # debug remove # extract complete shapes likes boxes of ads and banners found_polygons_mask = extract_polygons(im_bw, lines_mask) if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f'{image_sans_ext}-found-polygons-mask.png'), found_polygons_mask) # debug remove # nullifying the mask of unwanted polygons over binary (toss images) # this should not only have texts, without images text_im_bw = cv2.bitwise_and(im_bw, im_bw, mask=found_polygons_mask) if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f'{image_sans_ext}-text-im-bw-negative.png'), ~text_im_bw) # initialize blank image for extracted titles titles_mask = np.ones(image.shape[:2], dtype="uint8") * 255 contents_mask = np.ones(image.shape[:2], dtype="uint8") * 255 (contours, _) = cv2.findContours(text_im_bw, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) heights = [cv2.boundingRect(contour)[3] for contour in contours] avgheight = sum(heights) / len(heights) title_widths = [] content_widths = [] if logging.getLogger().level == logging.DEBUG: debug_contents_mask = np.ones( image.shape, dtype="uint8") * 255 # blank 3 layer image for debug colour # finding the larger text for c in contours: [x, y, w, h] = cv2.boundingRect(c) cv2.rectangle(contents_mask, (x, y), (x + w, y + h), (255, 0, 0), 1) if h > 2 * avgheight: cv2.drawContours(titles_mask, [c], -1, 0, -1) title_widths.append(w) elif h * w > 20: # remove specks on dots # get the biggest chunks of texts... articles! cv2.drawContours(contents_mask, [c], -1, 0, -1) content_widths.append(w) if logging.getLogger().level == logging.DEBUG: cv2.drawContours(debug_contents_mask, [c], -1, 0, -1) cv2.rectangle(debug_contents_mask, (x, y), (x + w, y + h), (0, 255, 0), 1) if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f'{image_sans_ext}-debug_drawn_contours.png'), debug_contents_mask) # helps further detach titles if necessary. This step can be removed # titles_mask = cv2.erode(titles_mask, kernel, iterations = 1) m_height, m_width = titles_mask.shape # get image dimensions, height and width # make 2D Image mask of proto-original image for cutting contents image_mask = np.ones(image.shape, dtype="uint8") * 255 # blank 3 layer image image_mask[0:m_height, 0:m_width] = image[0:m_height, 0:m_width] # run length smoothing algorithms for vertical and lateral conjoining of pixels value = math.ceil(sum(title_widths) / len(title_widths)) * 2 logging.info(f'RLSA Title Value {value}') rlsa_titles_mask = rlsa.rlsa(titles_mask, True, False, value) #rlsa application rlsa_titles_mask_for_final = rlsa_titles_mask if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f'{image_sans_ext}-rlsa-titles-mask.png'), rlsa_titles_mask) # debug remove value = math.ceil(sum(content_widths) / len(content_widths)) * 3 logging.info(f'RLSA Content Value {value}') rlsa_contents_mask = rlsa.rlsa(contents_mask, False, True, value) #rlsa application rlsa_contents_mask_for_avg_width = rlsa_contents_mask if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f'{image_sans_ext}-rlsa-contents-mask.png'), rlsa_contents_mask) # debug remove # get avg properties of columns contents_sum_list, contents_x_list, for_avgs_contours_mask = column_summaries( image, rlsa_contents_mask_for_avg_width) if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f'{image_sans_ext}-for-avgs-contours-mask.png'), for_avgs_contours_mask) # debug remove trimmed_mean = int(stats.trim_mean(contents_sum_list, 0.1)) # trimmed mean leftmost_x = min(contents_x_list) threshold = 2500 # remove tiny contours that dirtify the image ### titles work (contours, _) = cv2.findContours(~rlsa_titles_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # apply some heuristic to differentiate other stranger things masquerading as titles nt_contours = [ contour for contour in contours if cv2.boundingRect(contour)[2] * cv2.boundingRect(contour)[3] > threshold ] total_columns = int(image.shape[1] / trimmed_mean) contours = sorted( nt_contours, key=lambda contour: determine_precedence( contour, total_columns, trimmed_mean, leftmost_x, m_height)) clear_titles_mask = redraw_titles(image, contours) # draw_columns(leftmost_x, trimmed_mean, total_columns, clear_titles_mask) if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f'{image_sans_ext}-clear-titles-mask.png'), clear_titles_mask) # debug remove ### contents work (contours, _) = cv2.findContours(~rlsa_contents_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # apply some heuristic to different other stranger things masquerading as titles nt_contours = [ contour for contour in contours if cv2.boundingRect(contour)[2] * cv2.boundingRect(contour)[3] > threshold ] contents_contours = sorted( nt_contours, key=lambda contour: determine_precedence( contour, total_columns, trimmed_mean, leftmost_x, m_height)) clear_contents_mask = redraw_contents(image_mask, contents_contours) # draw_columns(leftmost_x, trimmed_mean, total_columns, clear_contents_mask) if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f'{image_sans_ext}-sorted-clear-contents-mask.png'), clear_contents_mask) # start printing individual letters based on titles! The final act (contours, _) = cv2.findContours(~rlsa_titles_mask_for_final, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # apply some heuristic to different other stranger things masquerading as titles nt_contours = [ contour for contour in contours if cv2.boundingRect(contour)[2] * cv2.boundingRect(contour)[3] > threshold ] contours = sorted( nt_contours, key=lambda contour: determine_precedence( contour, total_columns, trimmed_mean, leftmost_x, m_height)) article_complete = False title_came_up = True title_count = len(contours) ct_widths = [] article_mask = np.ones( image.shape, dtype="uint8") * 255 # blank layer image for one article letter_root = ET.Element("letter") desc = ET.SubElement(letter_root, "description") ET.SubElement(desc, "MeasurementUnit").text = "pixel" ocv_proc = ET.SubElement(desc, "OPenCVProcessing", pageImage=image_sans_ext) ET.SubElement(ocv_proc, "ProcessingDateTime").text = str(datetime.today()) ET.SubElement(ocv_proc, "Script").text = 'Lettersiterate' layout = ET.SubElement(letter_root, "Layout") page = ET.SubElement(layout, "Page") print_space = ET.SubElement(page, "PrintSpace", height=str(image_height), width=str(image_width), xpos=str(0), ypos=str(0)) # ET.Element(print_space, attrib={'height':image_height, 'width':image_width, 'xpos':0, 'ypos':0}) # for idx, contour in enumerate(contours): for idx, (_curr, _next) in enumerate(zip(contours[::], contours[1::])): # https://www.quora.com/How-do-I-iterate-through-a-list-in-python-while-comparing-the-values-at-adjacent-indices/answer/Jignasha-Patel-14 if article_complete: article_mask = np.ones( image.shape, dtype="uint8" ) * 255 # blank layer image for another separate letter # xml file letter_root = ET.Element("letter") desc = ET.SubElement(letter_root, "description") ET.SubElement(desc, "MeasurementUnit").text = "pixel" ocv_proc = ET.SubElement(desc, "OPenCVProcessing") ET.SubElement(ocv_proc, "ProcessingDateTime").text = str(datetime.today()) ET.SubElement(ocv_proc, "Script").text = 'Lettersiterate' layout = ET.SubElement(letter_root, "Layout") page = ET.SubElement(layout, "Page") print_space = ET.SubElement(page, "PrintSpace", height=str(image_height), width=str(image_width), xpos=str(0), ypos=str(0)) [cx, cy, cw, ch] = cv2.boundingRect(_curr) [nx, ny, nw, nh] = cv2.boundingRect(_next) ct_height = cy + ch # title height in this column ct_widths.append(cx + cw) ct_width = max( ct_widths ) # adjust to get longest title width if multiple line title :) # dont proceed any further if the next title is right below it on same column # continue to next title # current and next have to be within the same column # detect last article in the columns if (idx + 2) == title_count: title_came_up = False elif cy < ny and ny - (nh * 3) < cy and nx < ct_width: # 1) current title is above next # 2) next title is directly above current # 3) next title is withing the length of the current title. Cannot be in another column # and considered directly below current. Phew!, it happened title_came_up = True else: title_came_up = False if not title_came_up: title_encounters = 0 # loop through contents within these boundaries and insert them to the canvas for content_idx, content_contour in enumerate(contents_contours): [x, y, w, h] = cv2.boundingRect(content_contour) content_width = x + w # length -50 is to be safe sometimes the content cut maybe infringe onto the next title # get any content that starts within the title (take out -50) and within the end of the title width # and give (+50), it is still below the title logging.debug( f"{x} >= {cx-50} and {x} <= {ct_width} and {y+50} > {ct_height}" ) if x >= cx - 50 and x <= ct_width and y + 50 > ct_height: # now that we have limited the content to be within the width and below the title of interest # make sure it does not transgress into other titles. The bane of my existence begins, sigh! for tidx, tcontour in enumerate(contours): [tx, ty, tw, th] = cv2.boundingRect(tcontour) # validating titles that are directly below # 1) it has to be greater than the current title # 2) it starts within the width of the current title # 3) it starts within the width of the current content # 4) it does not start left of the content even if we take out 50 pixels to the left (-50) if tidx > idx and tx < ct_width and tx < content_width and tx > x - 50 and title_encounters < 1: # print(f"TITLE BELOW---> ###{content_idx} ##{tidx} > #{idx} and {tx} < {content_width} and {cx} >= {x-50}") article_mask = cutouts(article_mask, clear_contents_mask, content_contour) ET.SubElement(print_space, "BodyText", height=str(h), width=str(w), xpos=str(x), ypos=str(y), contourId=str(idx), bodyTextContourId=str(content_idx)) # cv2.putText(article_mask, "###{content_idx},{x},{y}.{w},{h}", cv2.boundingRect(content_contour)[:2], cv2.FONT_HERSHEY_PLAIN, 1.50, [255, 0, 0], 2) title_encounters += 1 # hitting a title in this case means we don't need to go any further for current content break # validating titles that are on a different column # 1)it has to be greater than the current title # 2)it starts within the width of the current title # 3)it starts below this content but within the contents limits (meaning it is multicolumn extension) if tidx > idx and tx < ct_width and ( ty > y and tx > x - 50) and title_encounters < 1: article_mask = cutouts(article_mask, clear_contents_mask, content_contour) ET.SubElement(print_space, "BodyText", height=str(h), width=str(w), xpos=str(x), ypos=str(y), contourId=str(idx), bodyTextContourId=str(content_idx)) # validating titles that are at the end of the column # 1) there is no title directly below it if all(x < cv2.boundingRect(tcontour)[0] for tidx, tcontour in enumerate(contours) if tidx > idx and cv2.boundingRect(tcontour)[0] > content_width) and title_encounters < 1: article_mask = cutouts(article_mask, clear_contents_mask, content_contour) ET.SubElement(print_space, "BodyText", height=str(h), width=str(w), xpos=str(x), ypos=str(y), contourId=str(idx), bodyTextContourId=str(content_idx)) if title_came_up: ct_widths.append(cx + cw) article_title_p = clear_titles_mask[cy:cy + ch, cx:cx + cw] article_mask[ cy:cy + ch, cx:cx + cw] = article_title_p # copied title contour onto the blank image ET.SubElement(print_space, "Title", height=str(ch), width=str(cw), xpos=str(cx), ypos=str(cy), contourId=str(idx)) article_complete = False else: ct_widths = [] # reset widths article_title_p = clear_titles_mask[cy:cy + ch, cx:cx + cw] article_mask[ cy:cy + ch, cx:cx + cw] = article_title_p # copied title contour onto the blank image ET.SubElement(print_space, "Title", height=str(ch), width=str(cw), xpos=str(cx), ypos=str(cy), contourId=str(idx)) if (idx + 2) == title_count: # we are at the end article_title_p = clear_titles_mask[ny:ny + nh, nx:nx + nw] article_mask[ ny:ny + nh, nx:nx + nw] = article_title_p # copied title contour onto the blank image file_name = f"article-{str(idx).zfill(2)}" if logging.getLogger().level == logging.DEBUG: cv2.imwrite( os.path.join(output_dir, f"{image_sans_ext}-{file_name}.png"), article_mask) article_complete = True content = pytesseract.image_to_string( Image.fromarray(article_mask)) with open( os.path.join(output_dir, f'{image_sans_ext}-{file_name}.txt'), 'a') as the_file: the_file.write(content) ET.SubElement(page, "TextBlock", articleNo=str(file_name), contourId=str(idx)).text = content tree = ET.ElementTree(letter_root) xml_output_file = os.path.join( output_dir, f'{image_sans_ext}-{file_name}.xml') # this method may cause 'OSError: [Errno 24] Too many open files' and does not prettyprint # tree.write(xml_output_file, encoding='utf8') # OR xmlstr = ET.tostring(letter_root).decode() xmlstr = minidom.parseString(xmlstr).toprettyxml(indent="\t", newl="\n") with open(xml_output_file, 'w+') as outfile: outfile.write(xmlstr)
colboxes[j] = line_to_line(colboxes[j], rowboxes[i], 10) return rowboxes, colboxes if __name__ == '__main__': DEBUG = False p = 'merged.jpg' img = cv2.imread(p) t = time.time() rowboxes, colboxes = table_line(img[..., ::-1], size=(512, 512), hprob=0.5, vprob=0.5) img = draw_lines(img, rowboxes + colboxes, color=(255, 0, 0), lineW=2) if DEBUG: blank = np.zeros(img.shape[:2], dtype=np.uint8) blank = draw_lines(blank, rowboxes + colboxes, lineW=2) cv2.namedWindow('hello', cv2.WINDOW_AUTOSIZE) cv2.imshow('hello', blank) cv2.waitKey(0) # 合并rowboxes中相近的直线 clusters = [LineCluster(len(rowboxes)) for _ in range(len(rowboxes))] for i in range(len(rowboxes), -1, -1): pass # fld = cv2.ximgproc.createFastLineDetector() # lines = fld.detect(blank)
class Vision: # Get parameters, initialize subscribers and publishers, etc. def __init__(self): self.camera_topic = rospy.get_param("~camera_topic") self.vision_output_topic = rospy.get_param("~vision_output_topic") self.labeled_image_topic = rospy.get_param("~labeled_image_topic") self.obj_detect_results_topic = rospy.get_param("~obj_detect_results_topic") self.classes_path = rospy.get_param("~classes_path") self.class_names = _get_class(self.classes_path) self.camera_sub = rospy.Subscriber(self.camera_topic, Image, self.camera_callback) # self.camera_sub = rospy.Subscriber(self.camera_topic + "/compressed", # CompressedImage, self.camera_callback, queue_size=1) self.obj_detect_results_sub = rospy.Subscriber(self.obj_detect_results_topic, DetectionResults, self.obj_detect_results_callback, queue_size=1) self.labels_visualizer = utils.LabelsVisualizer(self.class_names) self.vision_output_pub = rospy.Publisher(self.vision_output_topic, VisionOutput, queue_size=1) # self.labeled_image_pub = rospy.Publisher(self.labeled_image_topic, Image, queue_size=1) self.labeled_image_pub = rospy.Publisher(self.labeled_image_topic + "/compressed", CompressedImage, queue_size=1) # bridge is no longer necessary for CompressedImage, consider removing the line below self.bridge = CvBridge() # Instantiate bridge between cv2 and ROS self.clear_processing_output() self.last_obj_detect_results_time = rospy.get_time() #Threshold parameters for determining whether or not to call controller routines self.stop_sign_thr = rospy.get_param("~stop_sign_thr") #Minimum bbox area for stop sign self.yield_sign_thr = rospy.get_param("~yield_sign_thr") #Minimum bbox are for yield sign self.traffic_light_thr = rospy.get_param("~traffic_light_thr") #Minimum bbox are for traffic light self.pedestrian_sign_thr = rospy.get_param("~pedestrian_sign_thr") #Minimum bbox for pedestrian sign self.bbox_low_midpoint = None #State variable attributes self.centerlines = [] self.are_cones = False self.is_disabled_parking = False self.is_pedestrian_sign = False self.is_pedestrian = False self.is_stop_sign = False self.is_traffic_light = False self.is_green = False self.obstacle_distance = -1 #TODO: Initialize to large or small num? self.TAcar_distance = -1 #TODO: Initialize to large or small num? self.TAcar_loc = [0,0] # I am the TA jkjk self.pedestrian_distance = 0 #TODO: Initialize to large or small num? self.start_parking_time = 0 self.is_parking = False def camera_callback(self, msg): try: # Convert your ROS Image message to OpenCV2 image = self.bridge.imgmsg_to_cv2(msg, "bgr8") print "new image at time %.2f" % rospy.get_time() except CvBridgeError, e: print("error converting imgmsg to cv2: %s" % e) # image = utils.compressed_imgmsg_to_cv2(msg) centerlines_world = lane_segmentation.centerlines(image) # for visualizing line in rqt_image_view new_lines_world = np.reshape(centerlines_world, (-1, 2)) # reshape to array of endpoints new_lines_warped = homography.world_to_orig_image_fn(new_lines_world) centerlines_warped = np.reshape(new_lines_warped, (-1, 4)) line_types = ["other" for _ in centerlines_warped] #print('CENTERLINES_WORLD', centerlines_world) #print('CENTERLINES_WARPED', centerlines_warped) # self.detect_objects(image) # TODO: add other processing steps ''' try: # publish annotated image for real-time visualization labeled_image = image # TODO: CHANGE self.labeled_image_pub.publish(self.bridge.cv2_to_imgmsg(labeled_image, "bgr8")) except CvBridgeError as e: print("error converting cv2 to imgmsg: %s" % e) ''' # CONES pcones = cone_segmentation.check_cones(image) self.mid_cone_u, self.mid_cone_v = -1, -1 self.mid_cone_x, self.mid_cone_y = -1, -1 if pcones is not None: size_smallest_cone = pcones["size_smallest_cone"] if size_smallest_cone > 30000: print("size_smallest_cone too large %.2f" % size_smallest_cone) elif self.is_parking and 6 < (rospy.get_time() - self.start_parking_time) < 8 and pcones["goal_point_smallest"] is not None: mid_cone_u, mid_cone_v = pcones["goal_point_smallest"] mid_cone_x, mid_cone_y = homography.apply_homography(mid_cone_u, mid_cone_v) self.mid_cone_u, self.mid_cone_v = mid_cone_u, mid_cone_v self.mid_cone_x, self.mid_cone_y = mid_cone_x, mid_cone_y print "use smallest two cones' midpoint instead; x: %.2f, y: %.2f" % (self.mid_cone_x, self.mid_cone_y) elif self.is_parking and (rospy.get_time() - self.start_parking_time) >= 8: self.is_parking = False elif pcones["goal_point_largest_3"] is not None: mid_cone_u, mid_cone_v = pcones["goal_point_largest_3"] mid_cone_x, mid_cone_y = homography.apply_homography(mid_cone_u, mid_cone_v) if mid_cone_x < 2: self.mid_cone_u, self.mid_cone_v = mid_cone_u, mid_cone_v self.mid_cone_x, self.mid_cone_y = mid_cone_x, mid_cone_y print "found 3 cones - midpoint is x: %.2f, y: %.2f" % (self.mid_cone_x, self.mid_cone_y) if not self.is_parking: self.start_parking_time = rospy.get_time() self.is_parking = True print "start parking at time %.2f" % rospy.get_time() if self.labeled_image_pub.get_num_connections() > 0: # draw road lines labeled_image = utils.draw_lines(image, centerlines_warped, line_types) # draw bboxes labeled_image = self.labels_visualizer.draw_bboxes(labeled_image, self.out_boxes, self.out_scores, self.out_classes) # draw obstacle low midpoint if self.bbox_low_midpoint is not None: u, v = self.bbox_low_midpoint u, v = int(u), int(v) print("obstacle low midpoint at %d, %d" % (u, v) ) labeled_image = cv2.circle(labeled_image, (u, v), 10, (0, 0, 255), -1) # draw cone midpoint if self.mid_cone_u != -1: u, v = self.mid_cone_u, self.mid_cone_v print("cone midpoint at %d, %d" % (u, v)) labeled_image = cv2.circle(labeled_image, (u, v), 10, (255, 0, 0), -1) msg = utils.cv2_to_compressed_imgmsg(labeled_image) self.labeled_image_pub.publish(msg) if self.vision_output_pub.get_num_connections() > 0: self.centerlines = centerlines_world self.publish_vision_message() '''
def process_image(path_to_image, empty_output, out_dir_name): image_name = os.path.basename(path_to_image) img_sans_ext = os.path.splitext(image_name)[0] # check if file exists here and exist if not try: f = open(path_to_image) f.close() except FileNotFoundError: log.critical('Given image does not exist') sys.exit(0) log.info(f"Processing {image_name}") # create out dir current_directory = os.getcwd() final_dir = os.path.join(current_directory, r'dates') if not os.path.exists(final_dir): os.makedirs(final_dir) founds = glob.glob(f'{final_dir}/{img_sans_ext}-*.xml') if len(founds) > 0: log.info(f"FILE EXISTS: {founds}") return # standardize size of the images maintaining aspect ratio if empty_output: files = glob.glob('{}/*'.format(final_dir)) for f in files: os.remove(f) image = cv2.imread(path_to_image) # reading the image image_width = image.shape[1] if image_width != 2048: image = imutils.resize(image, width=2048) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # converting to grayscale image # applying thresholding technique on the grayscale image # all pixels value above 0 will be set to 255 but because # we are using THRESH_OTSU # we have avoid have to set threshold (i.e. 0 = just a placeholder) # since otsu's method does it automatically (thresh, im_bw) = cv2.threshold( gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # converting to binary image # invert image data using unary tilde operator # im_bw = ~im_bw # Noise removal step - Perform opening on the thresholded image # (erosion followed by dilation) kernel = np.ones((2, 2), np.uint8) # kernel noise size (2,2) # cleans up random lines that appear on the page im_bw = cv2.morphologyEx(im_bw, cv2.MORPH_OPEN, kernel) if log.getLogger().level == log.DEBUG: cv2.imwrite(os.path.join(final_dir, f'{img_sans_ext}-im-negative.png'), im_bw) if log.getLogger().level == log.DEBUG: cv2.imwrite(os.path.join(final_dir, f'{img_sans_ext}-im-bw.png'), ~im_bw) # extract and draw any lines from the image lines_mask = draw_lines(image, gray) if log.getLogger().level == log.DEBUG: cv2.imwrite(os.path.join(final_dir, f'{img_sans_ext}-lines-mask.png'), lines_mask) # extract complete shapes likes boxes of ads and banners found_polygons_mask = extract_polygons(im_bw, lines_mask) if log.getLogger().level == log.DEBUG: cv2.imwrite( os.path.join(final_dir, f'{img_sans_ext}-found-polygons-mask.png'), found_polygons_mask) # nullifying the mask of unwanted polygons over binary (toss images) # this should not only have texts, without images text_im_bw = cv2.bitwise_and(im_bw, im_bw, mask=found_polygons_mask) if log.getLogger().level == log.DEBUG: cv2.imwrite( os.path.join(final_dir, f'{img_sans_ext}-text-im-bw-negative.png'), ~text_im_bw) # initialize blank image for extracted contents contents_mask = np.ones(image.shape[:2], dtype="uint8") * 255 (contours, _) = cv2.findContours(text_im_bw, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) heights = [cv2.boundingRect(contour)[3] for contour in contours] avgheight = sum(heights)/len(heights) content_widths = [] if log.getLogger().level == log.DEBUG: # blank 3 layer image for debug colour debug_mask = np.ones(image.shape, dtype="uint8") * 255 # finding the larger text for c in contours: [x, y, w, h] = cv2.boundingRect(c) cv2.rectangle(contents_mask, (x, y), (x+w, y+h), (255, 0, 0), 1) if h > 2*avgheight: # avoid titles altogether pass elif h*w > 20 and x > 1000 and y < 100: # avoid specks or dots # get the biggest chunks of texts... articles! cv2.drawContours(contents_mask, [c], -1, 0, -1) content_widths.append(w) if log.getLogger().level == log.DEBUG: cv2.drawContours(debug_mask, [c], -1, 0, -1) cv2.rectangle(debug_mask, (x, y), (x+w, y+h), (0, 255, 0), 1) if log.getLogger().level == log.DEBUG: cv2.imwrite(os.path.join( final_dir, f'{img_sans_ext}-debug_drawn_contours.png'), debug_mask) # get image dimensions, height and width m_height, m_width = contents_mask.shape # make 2D Image mask of proto-original image for cutting contents # blank 3 layer image image_mask = np.ones(image.shape, dtype="uint8") * 255 image_mask[0: m_height, 0: m_width] = image[0: m_height, 0: m_width] try: value = math.ceil(sum(content_widths)/len(content_widths))*5 except ZeroDivisionError as e: value = 140 log.info(f'RLSA Content Value {value}') # rlsa application rlsa_contents_mask = rlsa.rlsa(contents_mask, True, False, value) if log.getLogger().level == log.DEBUG: cv2.imwrite(os.path.join( final_dir, f'{img_sans_ext}-rlsa-contents-mask.png'), rlsa_contents_mask) # debug remove threshold = 1500 # remove tiny contours that dirtify the image # contents work (contours, _) = cv2.findContours(~rlsa_contents_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # apply some heuristic to different other stranger things # masquerading as contents contents_contours = [contour for contour in contours if cv2.boundingRect(contour)[2] * cv2.boundingRect(contour)[3] > threshold] # blank layer image for one article article_mask = np.ones(image.shape, dtype="uint8") * 255 # loop through and insert it to the canvas for content_idx, content_contour in enumerate(contents_contours): # https://www.quora.com/How-do-I-iterate-through-a-list-in-python-while-comparing-the-values-at-adjacent-indices/answer/Jignasha-Patel-14 [x, y, w, h] = cv2.boundingRect(content_contour) if x > 1000 and y < 100: log.debug(f"{x} >= {x-50} and {x} {y+50}") article_mask = cutouts(article_mask, image_mask, content_contour) angle, rotated_article_mask = correct_skew(article_mask) log.info(f'Rotation Angle: {angle}') # DIlating the output improved overall readbility by tesseract especially # in cases where resulting output was empty # https://stackoverflow.com/a/54582118/754432 cv2.dilate(rotated_article_mask, (5, 5), rotated_article_mask) if log.getLogger().level == log.DEBUG: cv2.imwrite(os.path.join(final_dir, f"{img_sans_ext}.png"), rotated_article_mask) # 3 Fully automatic page segmentation, but no OSD. (default for tesserocr) # 7 means treat the image as a single text line. # https://medium.com/better-programming/beginners-guide-to-tesseract-ocr-using-python-10ecbb426c3d content = pytesseract.image_to_string( Image.fromarray(rotated_article_mask), config='--psm 3') with open(os.path.join(final_dir, f'{out_dir_name}.csv'), 'a+') as f_out: # Using dictionary keys as fieldnames for the CSV file header writer = csv.writer(f_out, delimiter='\t') # writer = csv.DictWriter(f_out, fieldnames=['file_name', 'raw_date']) writer.writerow([img_sans_ext, content.partition('\n')[0]])
from PIL import Image, ImageDraw import utils import argparse parser = argparse.ArgumentParser(description="Rao's and Chinese algorithms") parser.add_argument("image", nargs=1, help = "Path to image") parser.add_argument("block_size", nargs=1, help = "Block size") parser.add_argument('--smooth', "-s", action='store_true', help = "Use Gauss for smoothing") parser.add_argument('--chinese', "-c", action='store_true', help = "Use Chinese alg. instead of Rao's") args = parser.parse_args() im = Image.open(args.image[0]) im = im.convert("L") # covert to grayscale W = int(args.block_size[0]) f = lambda x, y: 2 * x * y g = lambda x, y: x ** 2 - y ** 2 if args.chinese: normalizator = 255.0 f = lambda x, y: 2 * x * y / (normalizator ** 2) g = lambda x, y: ((x ** 2) * (y ** 2)) / (normalizator ** 4) angles = utils.calculate_angles(im, W, f, g) utils.draw_lines(im, angles, W).show() if args.smooth: smoothed_angles = utils.smooth_angles(angles) utils.draw_lines(im, smoothed_angles, W).show()
def main(): # Load configuration loader = importlib.machinery.SourceFileLoader('cf', './config.py') cf = loader.load_module() # reading in an image image = mpimg.imread('test_images/solidWhiteRight.jpg') print('This image is:', type(image), 'with dimensions:', image.shape) # region selection vertices = np.array([[x * image.shape[1], y * image.shape[0]] for [x, y] in cf.vertices_ratio], dtype=np.int32) masked_image = utils.region_of_interest(image, np.expand_dims(vertices, axis=0)) polygon = patches.Polygon(vertices, linewidth=2, edgecolor='r', facecolor='none') # Create figure and axes fig, ax = plt.subplots(1) # Add the patch to the Axes # Display the image ax.imshow(image) ax.add_patch(polygon) # Gaussian smoothing image_blurred = cv2.GaussianBlur( masked_image, (cf.guassin_blur_kernel_size, cf.guassin_blur_kernel_size), 0) # color selection color_mask = cv2.inRange(image_blurred, np.array(cf.rgb_threshold), np.array([255, 255, 255])) color_select = cv2.bitwise_and(image, image, mask=color_mask) plt.figure() plt.imshow(color_select, cmap='gray') # Define our parameters for Canny and apply edges = cv2.Canny(color_select, cf.canny_low_threshold, cf.canny_high_threshold) plt.figure() plt.imshow(edges, cmap='gray') # draw lines on an image given endpoints # Hough transform lines = cv2.HoughLinesP(edges, cf.hough_rho, np.pi / cf.hough_theta_scale, cf.hough_threshold, np.array([]), minLineLength=cf.hough_min_line_length, maxLineGap=cf.hough_max_line_gap) line_img = np.zeros((edges.shape[0], edges.shape[1], 3), dtype=np.uint8) utils.draw_lines(cf, line_img, lines) # Draw the lines on the edge image # initial_img * alpha + img * beta + γ lines_edges = cv2.addWeighted(src1=image, alpha=0.8, src2=line_img, beta=1, gamma=0) plt.figure() plt.imshow(lines_edges) plt.waitforbuttonpress() print('Done')
parser.add_argument("block_size", nargs=1, help="Block size") parser.add_argument('--smooth', "-s", action='store_true', help="Use Gauss for smoothing") parser.add_argument('--chinese', "-c", action='store_true', help="Use Chinese alg. instead of Rao's") args = parser.parse_args() im = Image.open(args.image[0]) im = im.convert("L") # covert to grayscale W = int(args.block_size[0]) f = lambda x, y: 2 * x * y g = lambda x, y: x**2 - y**2 if args.chinese: normalizator = 255.0 f = lambda x, y: 2 * x * y / (normalizator**2) g = lambda x, y: ((x**2) * (y**2)) / (normalizator**4) angles = utils.calculate_angles(im, W, f, g) utils.draw_lines(im, angles, W).show() if args.smooth: smoothed_angles = utils.smooth_angles(angles) utils.draw_lines(im, smoothed_angles, W).show()
blue_bin = t.get_blue_line(frame) green_bin = t.get_green_line(frame) blue_bin = blue_bin.astype(np.uint8) * 255 green_bin = green_bin.astype(np.uint8) * 255 blue_coords = l.get_line_coords(blue_bin) b = l.longest_line(blue_coords) print b green_coords = l.get_line_coords(green_bin) g = l.longest_line(green_coords) print g lin = ut.draw_lines(frame, [b, g]) # print blue_coords # image_bin = ut.img_to_bin(frame) # lines = l.get_line_coords(image_bin) # classes = l.classify_lines(lines) # l1, l2 = l.get_final_lines(classes) # ret = ut.draw_lines(frame, [l1, l2]) lin2bin = ut.img_to_bin(lin) ret = ut.select_roi(lin, lin2bin) if cnt == 40: break cv2.imshow('frame', lin)
avg_preds+=preds avg_preds/=len(models) avg_preds=avg_preds>0.2 remove_avg_preds=np.zeros((len(val_list),CLF_SIZE,CLF_SIZE,1)) for i,pred in enumerate(avg_preds): pred_tmp=remove_small_objects(pred,800,connectivity=2) pred_tmp=np.squeeze(pred_tmp) pred_tmp=label(pred_tmp) new_pred=np.zeros(pred_tmp.shape) for region in regionprops(pred_tmp): min_y,min_x,max_y,max_x=region.bbox new_pred[min_y:max_y,min_x:max_x]=1 remove_avg_preds[i]=np.expand_dims(new_pred,2) iou_coeffient=IOU_numpy(val_y,remove_avg_preds,CLF_SIZE) for i,(pred,true,img) in enumerate(zip(remove_avg_preds,val_y,val_x)): path=os.path.join(parent_path,"seg_out/"+str(i)+".png") img=np.squeeze(img) true=np.squeeze(true) pred=np.squeeze(pred) '''plt.imshow(pred) plt.show()''' draw_lines(img,true,pred,path) print("iou : "+str(iou_coeffient))
kf = KFold(len(data_list), n_folds=5, shuffle=True) for i, (train_idxs, val_idxs) in enumerate(kf): # if i<4:continue model_path = os.path.join(model_dir, "model_" + str(i) + ".h5") model = MODEL(SIZE) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["acc", IOU]) if i == 3: model.load_weights(model_path) print("KFold Model Loaded...") print(model_path) pred_y = model.predict(test_x) draw_lines(np.squeeze(test_x), test_y, np.squeeze(pred_y)) train_list = data_list[train_idxs] val_list = data_list[val_idxs] model.fit_generator( generator_all_data(BATCH_SIZE, train_list, SIZE), steps_per_epoch=2 * get_generator_steps(BATCH_SIZE, train_list), epochs=EPOCHS, validation_data=generator_all_data(BATCH_SIZE, val_list, SIZE), validation_steps=get_generator_steps(BATCH_SIZE, val_list), callbacks=[ ModelCheckpoint(model_path, monitor="val_loss", mode="min", verbose=1, save_best_only=True),