def text_detect(text_detector, im): im_small, f, im_height, im_width = resize_im(im, Config.SCALE, Config.MAX_SCALE) timer = Timer() timer.tic() text_lines = text_detector.detect(im_small) text_lines = draw_boxes(im_small, text_lines, f, im_height, im_width) print "Number of the detected text lines: %s" % len(text_lines) print "Detection Time: %f" % timer.toc() print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" return text_lines
def train_epoch(self, scaler, epoch, model, dataset, dataloader, optimizer, prefix="train"): model.train() _timer = Timer() lossLogger = LossLogger() performanceLogger = build_evaluator(self.cfg, dataset) num_iters = len(dataloader) for i, sample in enumerate(dataloader): self.n_iters_elapsed += 1 _timer.tic() self.run_step(scaler, model, sample, optimizer, lossLogger, performanceLogger, prefix) torch.cuda.synchronize() _timer.toc() if (i + 1) % self.cfg.N_ITERS_TO_DISPLAY_STATUS == 0: if self.cfg.local_rank == 0: template = "[epoch {}/{}, iter {}/{}, lr {}] Total train loss: {:.4f} " "(ips = {:.2f})\n" "{}" logger.info( template.format( epoch, self.cfg.N_MAX_EPOCHS - 1, i, num_iters - 1, round(get_current_lr(optimizer), 6), lossLogger.meters["loss"].value, self.batch_size * self.cfg.N_ITERS_TO_DISPLAY_STATUS / _timer.diff, "\n".join( ["{}: {:.4f}".format(n, l.value) for n, l in lossLogger.meters.items() if n != "loss"]), ) ) if self.cfg.TENSORBOARD and self.cfg.local_rank == 0: # Logging train losses [self.tb_writer.add_scalar(f"loss/{prefix}_{n}", l.global_avg, epoch) for n, l in lossLogger.meters.items()] performances = performanceLogger.evaluate() if performances is not None and len(performances): [self.tb_writer.add_scalar(f"performance/{prefix}_{k}", v, epoch) for k, v in performances.items()] if self.cfg.TENSORBOARD_WEIGHT and False: for name, param in model.named_parameters(): layer, attr = os.path.splitext(name) attr = attr[1:] self.tb_writer.add_histogram("{}/{}".format(layer, attr), param, epoch)
def text_detect(text_detector, im, img_type): if img_type == "others": return [], 0 im_small, f = resize_im(im, Config.SCALE, Config.MAX_SCALE) timer = Timer() timer.tic() text_lines = text_detector.detect(im_small) text_lines = text_lines / f # project back to size of original image text_lines = refine_boxes(im, text_lines, expand_pixel_len = Config.DILATE_PIXEL, pixel_blank = Config.BREATH_PIXEL, binary_thresh=Config.BINARY_THRESH) text_area_ratio = calc_area_ratio(text_lines, im.shape) print "Number of the detected text lines: %s" % len(text_lines) print "Detection Time: %f" % timer.toc() print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" if Config.DEBUG_SAVE_BOX_IMG: im_with_text_lines = draw_boxes(im, text_lines, is_display=False, caption=image_path, wait=False) if im_with_text_lines is not None: cv2.imwrite(image_path+'_boxes.jpg', im_with_text_lines) return text_lines, text_area_ratio
def train_epoch(self, scaler, epoch, model, dataloader, optimizer, prefix="train"): model.train() _timer = Timer() lossLogger = LossLogger() performanceLogger = MetricLogger(self.dictionary, self.cfg) for i, sample in enumerate(dataloader): imgs, targets = sample['image'], sample['target'] _timer.tic() # zero the parameter gradients optimizer.zero_grad() imgs = list( img.cuda() for img in imgs) if isinstance(imgs, list) else imgs.cuda() if isinstance(targets, list): if isinstance(targets[0], torch.Tensor): targets = [t.cuda() for t in targets] else: targets = [{k: v.cuda() for k, v in t.items()} for t in targets] else: targets = targets.cuda() # Autocast with amp.autocast(enabled=True): out = model(imgs, targets, prefix) if not isinstance(out, tuple): losses, predicts = out, None else: losses, predicts = out self.n_iters_elapsed += 1 # Scales loss. Calls backward() on scaled loss to create scaled gradients. # Backward passes under autocast are not recommended. # Backward ops run in the same dtype autocast chose for corresponding forward ops. scaler.scale(losses["loss"]).backward() # scaler.step() first unscales the gradients of the optimizer's assigned params. # If these gradients do not contain infs or NaNs, optimizer.step() is then called, # otherwise, optimizer.step() is skipped. scaler.step(optimizer) # Updates the scale for next iteration. scaler.update() # torch.cuda.synchronize() _timer.toc() if (i + 1) % self.cfg.N_ITERS_TO_DISPLAY_STATUS == 0: if self.cfg.distributed: # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_dict(losses) lossLogger.update(**loss_dict_reduced) del loss_dict_reduced else: lossLogger.update(**losses) if predicts is not None: if self.cfg.distributed: # reduce performances over all GPUs for logging purposes predicts_dict_reduced = reduce_dict(predicts) performanceLogger.update(targets, predicts_dict_reduced) del predicts_dict_reduced else: performanceLogger.update(**predicts) del predicts if self.cfg.local_rank == 0: template = "[epoch {}/{}, iter {}, lr {}] Total train loss: {:.4f} " "(ips = {:.2f})\n" "{}" logger.info( template.format( epoch, self.cfg.N_MAX_EPOCHS, i, round(get_current_lr(optimizer), 6), lossLogger.meters["loss"].value, self.batch_size * self.cfg.N_ITERS_TO_DISPLAY_STATUS / _timer.diff, "\n".join([ "{}: {:.4f}".format(n, l.value) for n, l in lossLogger.meters.items() if n != "loss" ]), )) del imgs, targets, losses if self.cfg.TENSORBOARD and self.cfg.local_rank == 0: # Logging train losses [ self.tb_writer.add_scalar(f"loss/{prefix}_{n}", l.global_avg, epoch) for n, l in lossLogger.meters.items() ] performances = performanceLogger.compute() if len(performances): [ self.tb_writer.add_scalar(f"performance/{prefix}_{k}", v, epoch) for k, v in performances.items() ] if self.cfg.TENSORBOARD_WEIGHT and False: for name, param in model.named_parameters(): layer, attr = os.path.splitext(name) attr = attr[1:] self.tb_writer.add_histogram("{}/{}".format(layer, attr), param, epoch)
def train_epoch(self, epoch, model, dataloader, optimizer, lr_scheduler, grad_normalizer=None, prefix="train"): model.train() _timer = Timer() lossMeter = LossMeter() perfMeter = PerfMeter() for i, (imgs, labels) in enumerate(dataloader): _timer.tic() # zero the parameter gradients optimizer.zero_grad() if self.cfg.HALF: imgs = imgs.half() if len(self.device) > 1: out = data_parallel(model, (imgs, labels, prefix), device_ids=self.device, output_device=self.device[0]) else: imgs = imgs.cuda() labels = [label.cuda() for label in labels] if isinstance( labels, list) else labels.cuda() out = model(imgs, labels, prefix) if not isinstance(out, tuple): losses, performances = out, None else: losses, performances = out if losses["all_loss"].sum().requires_grad: if self.cfg.GRADNORM is not None: grad_normalizer.adjust_losses(losses) grad_normalizer.adjust_grad(model, losses) else: losses["all_loss"].sum().backward() optimizer.step() self.n_iters_elapsed += 1 _timer.toc() lossMeter.__add__(losses) if performances is not None and all(performances): perfMeter.put(performances) if (i + 1) % self.cfg.N_ITERS_TO_DISPLAY_STATUS == 0: avg_losses = lossMeter.average() template = "[epoch {}/{}, iter {}, lr {}] Total train loss: {:.4f} " "(ips = {:.2f} )\n" "{}" self.logger.info( template.format( epoch, self.cfg.N_MAX_EPOCHS, i, round(get_current_lr(optimizer), 6), avg_losses["all_loss"], self.batch_size * self.cfg.N_ITERS_TO_DISPLAY_STATUS / _timer.total_time, "\n".join([ "{}: {:.4f}".format(n, l) for n, l in avg_losses.items() if n != "all_loss" ]), )) if self.cfg.TENSORBOARD: tb_step = int((epoch * self.n_steps_per_epoch + i) / self.cfg.N_ITERS_TO_DISPLAY_STATUS) # Logging train losses [ self.tb_writer.add_scalar(f"loss/{prefix}_{n}", l, tb_step) for n, l in avg_losses.items() ] lossMeter.clear() del imgs, labels, losses, performances lr_scheduler.step() if self.cfg.TENSORBOARD and len(perfMeter): avg_perf = perfMeter.average() [ self.tb_writer.add_scalar(f"performance/{prefix}_{k}", v, epoch) for k, v in avg_perf.items() ] if self.cfg.TENSORBOARD_WEIGHT and False: for name, param in model.named_parameters(): layer, attr = os.path.splitext(name) attr = attr[1:] self.tb_writer.add_histogram("{}/{}".format(layer, attr), param, epoch)
# initialize the detectors text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) demo_imnames = os.listdir(DEMO_IMAGE_DIR) timer = Timer() for im_name in demo_imnames: print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Image: %s" % im_name im_file = osp.join(DEMO_IMAGE_DIR, im_name) im = cv2.imread(im_file) timer.tic() im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = text_detector.detect(im) print "Number of the detected text lines: %s" % len(text_lines) print "Time: %f" % timer.toc() im_with_text_lines = draw_boxes(im, text_lines, caption=im_name, wait=False) print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Thank you for trying our demo. Press any key to exit..." cv2.waitKey(0)
def train_epoch(self, epoch, model, dataloader, optimizer, lr_scheduler, grad_normalizer=None, prefix="train"): model.train() _timer = Timer() lossLogger = MetricLogger(delimiter=" ") performanceLogger = MetricLogger(delimiter=" ") for i, (imgs, targets) in enumerate(dataloader): _timer.tic() # zero the parameter gradients optimizer.zero_grad() # imgs = imgs.cuda() imgs = list( img.cuda() for img in imgs) if isinstance(imgs, list) else imgs.cuda() # labels = [label.cuda() for label in labels] if isinstance(labels,list) else labels.cuda() # labels = [{k: v.cuda() for k, v in t.items()} for t in labels] if isinstance(labels,list) else labels.cuda() if isinstance(targets, list): if isinstance(targets[0], torch.Tensor): targets = [t.cuda() for t in targets] else: targets = [{k: v.cuda() for k, v in t.items()} for t in targets] else: targets = targets.cuda() out = model(imgs, targets, prefix) if not isinstance(out, tuple): losses, performances = out, None else: losses, performances = out self.n_iters_elapsed += 1 with amp.scale_loss(losses["loss"], optimizer) as scaled_loss: scaled_loss.backward() optimizer.step() torch.cuda.synchronize() _timer.toc() if (i + 1) % self.cfg.N_ITERS_TO_DISPLAY_STATUS == 0: if self.cfg.distributed: # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_dict(losses) lossLogger.update(**loss_dict_reduced) else: lossLogger.update(**losses) if performances is not None and all(performances): if self.cfg.distributed: # reduce performances over all GPUs for logging purposes performance_dict_reduced = reduce_dict(performances) performanceLogger.update(**performance_dict_reduced) else: performanceLogger.update(**performances) if self.cfg.local_rank == 0: template = "[epoch {}/{}, iter {}, lr {}] Total train loss: {:.4f} " "(ips = {:.2f})\n" "{}" logger.info( template.format( epoch, self.cfg.N_MAX_EPOCHS, i, round(get_current_lr(optimizer), 6), lossLogger.meters["loss"].value, self.batch_size * self.cfg.N_ITERS_TO_DISPLAY_STATUS / _timer.total_time, "\n".join([ "{}: {:.4f}".format(n, l.value) for n, l in lossLogger.meters.items() if n != "loss" ]), )) del imgs, targets if self.cfg.TENSORBOARD and self.cfg.local_rank == 0: # Logging train losses [ self.tb_writer.add_scalar(f"loss/{prefix}_{n}", l.global_avg, epoch) for n, l in lossLogger.meters.items() ] if len(performanceLogger.meters): [ self.tb_writer.add_scalar(f"performance/{prefix}_{k}", v.global_avg, epoch) for k, v in performanceLogger.meters.items() ] if self.cfg.TENSORBOARD_WEIGHT and False: for name, param in model.named_parameters(): layer, attr = os.path.splitext(name) attr = attr[1:] self.tb_writer.add_histogram("{}/{}".format(layer, attr), param, epoch)