def evaluate(cfg, data, weights=None, batch_size=16, workers=4, image_size=416, confidence_threshold=0.001, iou_threshold=0.6, # for nms save_json=True, single_cls=False, augment=False, model=None, dataloader=None): # Initialize/load model and set device if model is None: device = select_device(args.device, batch_size=batch_size) verbose = args.task == "eval" # Initialize model model = Darknet(cfg, image_size).to(device) # Load weightss if weights.endswith(".pth"): model.load_state_dict(torch.load(weights, map_location=device)["state_dict"]) else: load_darknet_weights(model, weights) if device.type != "cpu" and torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: device = next(model.parameters()).device # get model device verbose = False # Configure run data = parse_data_config(data) classes_num = 1 if single_cls else int(data["classes"]) path = data["valid"] # path to valid images names = load_classes(data["names"]) # class names iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 iouv = iouv[0].view(1) # comment for [email protected]:0.95 niou = iouv.numel() # Dataloader if dataloader is None: dataset = LoadImagesAndLabels(path, image_size, batch_size, rect=True) batch_size = min(batch_size, len(dataset)) dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=workers, pin_memory=True, collate_fn=dataset.collate_fn) seen = 0 model.eval() coco91class = coco80_to_coco91_class() s = ("%20s" + "%10s" * 6) % ("Class", "Images", "Targets", "P", "R", "[email protected]", "F1") p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3) json_dict, stats, ap, ap_class = [], [], [], [] for batch_i, (images, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): images = images.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) batch_size, _, height, width = images.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Test the effect of image enhancement if augment: fs_image = scale_image(images.flip(3), 0.9) # flip-lr and scale s_image = scale_image(images, 0.7) # scale images = torch.cat((images, fs_image, s_image), 0) # Run model start_time = time_synchronized() inference_outputs, training_outputs = model(images) t0 += time_synchronized() - start_time if augment: x = torch.split(inference_outputs, batch_size, dim=0) x[1][..., :4] /= 0.9 # scale x[1][..., 0] = width - x[1][..., 0] # flip lr x[2][..., :4] /= 0.7 # scale inference_outputs = torch.cat(x, 1) # Compute loss if hasattr(model, "hyp"): # if model has loss hyperparameters # GIoU, obj, cls loss += compute_loss(training_outputs, targets, model)[1][:3].cpu() # Run NMS start_time = time_synchronized() output = non_max_suppression(inference_outputs, confidence_threshold=confidence_threshold, iou_threshold=iou_threshold) t1 += time_synchronized() - start_time # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] label_num = len(labels) target_class = labels[:, 0].tolist() if label_num else [] seen += 1 if pred is None: if label_num: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), target_class)) continue # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[si]).stem.split("_")[-1]) box = pred[:, :4].clone() # xyxy # to original shape scale_coords(images[si].shape[1:], box, shapes[si][0], shapes[si][1]) box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): json_dict.append({"image_id": image_id, "category_id": coco91class[int(p[5])], "bbox": [round(x, 3) for x in b], "score": round(p[4], 5)}) # Assign all predictions as incorrect correct = torch.zeros(len(pred), niou, dtype=torch.bool, device=device) if label_num: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes target_boxes = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero().view(-1) # prediction indices pi = (cls == pred[:, 5]).nonzero().view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious # best ious, indices ious, i = box_iou(pred[pi, :4], target_boxes[ti]).max(1) # Append detections for j in (ious > iouv[0]).nonzero(): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn # all targets already located in image if len(detected) == label_num: break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), target_class)) # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) if niou > 1: p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(1), ap[:, 0] # [P, R, [email protected]:0.95, [email protected]] mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() # number of targets per class nt = np.bincount(stats[3].astype(np.int64), minlength=classes_num) else: nt = torch.zeros(1) # Print results context = "%20s" + "%10.3g" * 6 # print format print(context % ("all", seen, nt.sum(), mp, mr, map, mf1)) # Print results per class if verbose and classes_num > 1 and len(stats): for i, c in enumerate(ap_class): print(context % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Print speeds if verbose: # tuple memory = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0 start_time = tuple(ms / seen * 1E3 for ms in (t0, t1, t0 + t1)) start_time += (image_size, image_size, batch_size) print(f"Inference menory: {memory:.1f} GB.") print(f"Speed:\n" f"Image size: ({image_size}x{image_size}) at batch_size: {batch_size}\n" f"\t- Inference {t0 / seen * 1E3:.1f}ms.\n" f"\t- NMS {t1 / seen * 1E3:.1f}ms.\n" f"\t- Total {(t0 + t1) / seen * 1E3:.1f}ms.\n") # Save JSON if save_json and map and len(json_dict): print("\nCOCO mAP with pycocotools...") imgIds = [int(Path(x).stem.split("_")[-1]) for x in dataloader.dataset.image_files] with open("results.json", "w") as file: json.dump(json_dict, file) # initialize COCO ground truth api cocoGt = COCO(glob.glob("data/coco2014/annotations/instances_val*.json")[0]) cocoDt = cocoGt.loadRes("results.json") # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, "bbox") cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() mf1, map = cocoEval.stats[:2] # update to pycocotools results ([email protected]:0.95, [email protected]) # Return results maps = np.zeros(classes_num) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map, mf1, *(loss.cpu() / len(dataloader)).tolist()), maps
def train(): cfg = args.cfg data = args.data if len(args.image_size) == 2: image_size, image_size_val = args.image_size[0], args.image_size[1] else: image_size, image_size_val = args.image_size[0], args.image_size[0] epochs = args.epochs batch_size = args.batch_size accumulate = args.accumulate weights = args.weights # Initialize gs = 32 # (pixels) grid size assert math.fmod(image_size, gs) == 0, f"--image-size must be a {gs}-multiple" init_seeds() image_size_min = 6.6 # 320 / 32 / 1.5 image_size_max = 28.5 # 320 / 32 / 28.5 if args.multi_scale: image_size_min = round(image_size / gs / 1.5) + 1 image_size_max = round(image_size / gs * 1.5) image_size = image_size_max * gs # initiate with maximum multi_scale size print(f"Using multi-scale {image_size_min * gs} - {image_size}") # Configure run dataset_dict = parse_data_config(data) train_path = dataset_dict["train"] valid_path = dataset_dict["valid"] num_classes = 1 if args.single_cls else int(dataset_dict["classes"]) # Remove previous results for files in glob.glob("results.txt"): os.remove(files) # Initialize model model = Darknet(cfg).to(device) # Optimizer pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for model_key, model_value in dict(model.named_parameters()).items(): if ".bias" in model_key: pg2 += [model_value] # biases elif "Conv2d.weight" in model_key: pg1 += [model_value] # apply weight_decay else: pg0 += [model_value] # all else optimizer = torch.optim.SGD(pg0, lr=parameters["lr0"], momentum=parameters["momentum"], nesterov=True) optimizer.add_param_group({ "params": pg1, # add pg1 with weight_decay "weight_decay": parameters["weight_decay"] }) optimizer.add_param_group({"params": pg2}) # add pg2 with biases del pg0, pg1, pg2 epoch = 0 start_epoch = 0 best_fitness = 0.0 context = None if weights.endswith(".pth"): state = torch.load(weights, map_location=device) # load model try: state["state_dict"] = { k: v for k, v in state["state_dict"].items() if model.state_dict()[k].numel() == v.numel() } model.load_state_dict(state["state_dict"], strict=False) except KeyError as e: error_msg = f"{args.weights} is not compatible with {args.cfg}. " error_msg += f"Specify --weights `` or specify a --cfg " error_msg += f"compatible with {args.weights}. " raise KeyError(error_msg) from e # load optimizer if state["optimizer"] is not None: optimizer.load_state_dict(state["optimizer"]) best_fitness = state["best_fitness"] # load results if state.get("training_results") is not None: with open("results.txt", "w") as file: file.write(state["training_results"]) # write results.txt start_epoch = state["epoch"] + 1 del state elif len(weights) > 0: # possible weights are "*.weights", "yolov3-tiny.conv.15", "darknet53.conv.74" etc. load_darknet_weights(model, weights) else: print("Pre training model weight not loaded.") # Mixed precision training https://github.com/NVIDIA/apex if mixed_precision: # skip print amp info model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) # source https://arxiv.org/pdf/1812.01187.pdf lr_lambda = lambda x: (( (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.95 + 0.05 scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda, last_epoch=start_epoch - 1) # Initialize distributed training if device.type != "cpu" and torch.cuda.device_count( ) > 1 and torch.distributed.is_available(): dist.init_process_group( backend="nccl", # "distributed backend" # distributed training init method init_method="tcp://127.0.0.1:8888", # number of nodes for distributed training world_size=1, # distributed training node rank rank=0) model = torch.nn.parallel.DistributedDataParallel(model) model.yolo_layers = model.module.yolo_layers # Dataset # Apply augmentation hyperparameters (option: rectangular training) train_dataset = LoadImagesAndLabels(train_path, image_size, batch_size, augment=True, hyp=parameters, rect=args.rect, cache_images=args.cache_images, single_cls=args.single_cls) # No apply augmentation hyperparameters and rectangular inference valid_dataset = LoadImagesAndLabels(valid_path, image_size_val, batch_size, augment=False, hyp=parameters, rect=True, cache_images=args.cache_images, single_cls=args.single_cls) collate_fn = train_dataset.collate_fn # Dataloader train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=args.workers, shuffle=not args.rect, pin_memory=True, collate_fn=collate_fn) valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, num_workers=args.workers, shuffle=False, pin_memory=True, collate_fn=collate_fn) # Model parameters model.nc = num_classes # attach number of classes to model model.hyp = parameters # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) # attach class weights model.class_weights = labels_to_class_weights(train_dataset.labels, num_classes).to(device) # Model EMA ema = ModelEMA(model, decay=0.9998) # Start training batches_num = len(train_dataloader) # number of batches burns = max(3 * batches_num, 500) # burn-in iterations, max(3 epochs, 500 iterations) maps = np.zeros(num_classes) # mAP per class # "P", "R", "mAP", "F1", "val GIoU", "val Objectness", "val Classification" results = (0, 0, 0, 0, 0, 0, 0) print(f"Using {args.workers} dataloader workers.") print(f"Starting training for {args.epochs} epochs...") start_time = time.time() for epoch in range(start_epoch, args.epochs): model.train() # Update image weights (optional) if train_dataset.image_weights: # class weights class_weights = model.class_weights.cpu().numpy() * (1 - maps)**2 image_weights = labels_to_image_weights( train_dataset.labels, num_classes=num_classes, class_weights=class_weights) # rand weighted index train_dataset.indices = random.choices( range(train_dataset.image_files_num), weights=image_weights, k=train_dataset.image_files_num) mean_losses = torch.zeros(4).to(device) print("\n") print(("%10s" * 8) % ("Epoch", "memory", "GIoU", "obj", "cls", "total", "targets", " image_size")) progress_bar = tqdm(enumerate(train_dataloader), total=batches_num) for index, (images, targets, paths, _) in progress_bar: # number integrated batches (since train start) ni = index + batches_num * epoch # uint8 to float32, 0 - 255 to 0.0 - 1.0 images = images.to(device).float() / 255.0 targets = targets.to(device) # Hyperparameter Burn-in if ni <= burns * 2: # giou loss ratio (obj_loss = 1.0 or giou) model.gr = np.interp(ni, [0, burns * 2], [0.0, 1.0]) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x["lr"] = np.interp(ni, [0, burns], [ 0.1 if j == 2 else 0.0, x["initial_lr"] * lr_lambda(epoch) ]) if "momentum" in x: x["momentum"] = np.interp( ni, [0, burns], [0.9, parameters["momentum"]]) # Multi-Scale training if args.multi_scale: # adjust img_size (67% - 150%) every 1 batch if ni / accumulate % 1 == 0: image_size = random.randrange(image_size_min, image_size_max + 1) * gs scale_ratio = image_size / max(images.shape[2:]) if scale_ratio != 1: # new shape (stretched to 32-multiple) new_size = [ math.ceil(size * scale_ratio / gs) * gs for size in images.shape[2:] ] images = F.interpolate(images, size=new_size, mode="bilinear", align_corners=False) # Run model output = model(images) # Compute loss loss, loss_items = compute_loss(output, targets, model) if not torch.isfinite(loss): warnings.warn( f"WARNING: Non-finite loss, ending training {loss_items}") return results # Scale loss by nominal batch_size of (16 * 4 = 64) loss *= batch_size / (batch_size * accumulate) # Compute gradient if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # Optimize accumulated gradient if ni % accumulate == 0: optimizer.step() optimizer.zero_grad() ema.update(model) # Print batch results # update mean losses mean_losses = (mean_losses * index + loss_items) / (index + 1) memory = f"{torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0:.2f}G" context = ("%10s" * 2 + "%10.3g" * 6) % ("%g/%g" % (epoch, args.epochs - 1), memory, *mean_losses, len(targets), image_size) progress_bar.set_description(context) # Update scheduler scheduler.step() # Process epoch results ema.update_attr(model) final_epoch = epoch + 1 == epochs if not args.notest or final_epoch: # Calculate mAP coco = any([ coco_name in data for coco_name in ["coco.data", "coco2014.data", "coco2017.data"] ]) and model.nc == 80 results, maps = evaluate(cfg, data, batch_size=batch_size, image_size=image_size_val, model=ema.ema, save_json=final_epoch and coco, single_cls=args.single_cls, dataloader=valid_dataloader) # Write epoch results with open("results.txt", "a") as f: # P, R, mAP, F1, test_losses=(GIoU, obj, cls) f.write(context + "%10.3g" * 7 % results) f.write("\n") # Write Tensorboard results if tb_writer: tags = [ "train/giou_loss", "train/obj_loss", "train/cls_loss", "metrics/precision", "metrics/recall", "metrics/mAP_0.5", "metrics/F1", "val/giou_loss", "val/obj_loss", "val/cls_loss" ] for x, tag in zip(list(mean_losses[:-1]) + list(results), tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP # fitness_i = weighted combination of [P, R, mAP, F1] fitness_i = fitness(np.array(results).reshape(1, -1)) if fitness_i > best_fitness: best_fitness = fitness_i # Save training results save = (not args.nosave) or (final_epoch and not args.evolve) if save: with open("results.txt", "r") as f: # Create checkpoint state = { "epoch": epoch, "best_fitness": best_fitness, "training_results": f.read(), "state_dict": ema.ema.module.state_dict() if hasattr(model, "module") else ema.ema.state_dict(), "optimizer": None if final_epoch else optimizer.state_dict() } # Save last checkpoint torch.save(state, "weights/checkpoint.pth") # Save best checkpoint if (best_fitness == fitness_i) and not final_epoch: state = { "epoch": -1, "best_fitness": None, "training_results": None, "state_dict": model.state_dict(), "optimizer": None } torch.save(state, "weights/model_best.pth") # Delete checkpoint del state if not args.evolve: plot_results() # save as results.png print(f"{epoch - start_epoch} epochs completed " f"in " f"{(time.time() - start_time) / 3600:.3f} hours.\n") dist.destroy_process_group() if torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() return results
def detect(save_image=False): # (320, 192) or (416, 256) or (608, 352) for (height, width) image_size = (608, 352) if ONNX_EXPORT else args.image_size output = args.output source = args.source weights = args.weights view_image = args.view_image save_txt = args.save_txt camera = False if source == "0" or source.startswith("http") or source.endswith(".txt"): camera = True # Initialize device = select_device(device="cpu" if ONNX_EXPORT else args.device) if os.path.exists(output): shutil.rmtree(output) # delete output folder os.makedirs(output) # make new output folder # Initialize model model = Darknet(args.cfg, image_size) # Load weight if weights.endswith(".pth"): model.load_state_dict( torch.load(weights, map_location=device)["model"]) else: load_darknet_weights(model, weights) # Second-stage classifier classify = False if classify: # init model model_classifier = load_classifier(name="resnet101", classes=2) # load model model_classifier.load_state_dict( torch.load("weights/resnet101.pth", map_location=device)["model"]) model_classifier.to(device) model_classifier.eval() else: model_classifier = None # Migrate the model to the specified device model.to(device) # set eval model mode model.eval() # Export mode if ONNX_EXPORT: model.fuse() image = torch.zeros((1, 3) + image_size) # (1, 3, 608, 352) # *.onnx filename filename = args.weights.replace(args.weights.split(".")[-1], "onnx") torch.onnx.export(model, tuple(image), filename, verbose=False, opset_version=11) # Validate exported model import onnx model = onnx.load(filename) # Load the ONNX model onnx.checker.check_model(model) # Check that the IR is well formed # Print a human readable representation of the graph print(onnx.helper.printable_graph(model.graph)) return # Set Dataloader video_path, video_writer = None, None if camera: view_image = True cudnn.benchmark = True dataset = LoadStreams(source, image_size=image_size) else: save_image = True dataset = LoadImages(source, image_size=image_size) # Get names and colors names = load_classes(args.names) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference start_time = time.time() # run once _ = model(torch.zeros( (1, 3, img_size, img_size), device=device)) if device.type != "cpu" else None for image_path, image, im0s, video_capture in dataset: image = torch.from_numpy(image).to(device) image = image.float() # uint8 to fp16/32 image /= 255.0 # 0 - 255 to 0.0 - 1.0 if image.ndimension() == 3: image = image.unsqueeze(0) # Inference t1 = time_synchronized() predict = model(image, augment=args.augment)[0] t2 = time_synchronized() # Apply NMS predict = non_max_suppression(predict, args.confidence_threshold, args.iou_threshold, multi_label=False, classes=args.classes, agnostic=args.agnostic_nms) # Apply Classifier if classify: predict = apply_classifier(predict, model_classifier, image, im0s) # Process detections for i, detect in enumerate(predict): # detections per image if camera: # batch_size >= 1 p, context, im0 = image_path[i], f"{i:g}: ", im0s[i] else: p, context, im0 = image_path, "", im0s save_path = str(Path(output) / Path(p).name) context += f"{image.shape[2]}*{image.shape[3]} " # get image size if detect is not None and len(detect): # Rescale boxes from img_size to im0 size detect[:, :4] = scale_coords(image.shape[2:], detect[:, :4], im0.shape).round() # Print results for classes in detect[:, -1].unique(): # detections per class number = (detect[:, -1] == classes).sum() context += f"{number} {names[int(classes)]}s, " # Write results for *xyxy, confidence, classes in detect: if save_txt: # Write to file with open(save_path + ".txt", "a") as files: files.write(("%e " * 6 + "\n") % (*xyxy, classes, confidence)) if save_image or view_image: # Add bbox to image label = f"{names[int(classes)]} {confidence * 100:.2f}%" plot_one_box(xyxy, im0, label=label, color=colors[int(classes)]) # Stream results if view_image: cv2.imshow("camera", im0) if cv2.waitKey(1) == ord("q"): # q to quit raise StopIteration # Print time (inference + NMS) print(f"{context}Done. {t2 - t1:.3f}s") # Save results (image with detections) if save_image: if dataset.mode == "images": cv2.imwrite(save_path, im0) else: if video_path != save_path: # new video video_path = save_path if isinstance(video_writer, cv2.VideoWriter): video_writer.release( ) # release previous video writer fps = video_capture.get(cv2.CAP_PROP_FPS) w = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) video_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*args.fourcc), fps, (w, h)) video_writer.write(im0) print(f"Done. ({time.time() - start_time:.3f}s)")