def test_image_accepts_bounding_boxes(mocked_run): img = wandb.Image(image, boxes={"predictions": {"box_data": [full_box]}}) img.bind_to_run(mocked_run, "images", 0) img_json = img.to_json(mocked_run) path = img_json["boxes"]["predictions"]["path"] assert os.path.exists(os.path.join(mocked_run.dir, path))
sample_text != 0).tolist() decoded_text = tokenizer.decode(token_list) if not avoid_model_calls: # CUDA index errors when we don't guard this image = dalle.generate_images( text[:1], filter_thres=0.9) # topk sampling at 0.9 save_model(f'./dalle.pt') wandb.save(f'./dalle.pt') log = { **log, } if not avoid_model_calls: log['image'] = wandb.Image(image, caption=decoded_text) wandb.log(log) if LR_DECAY: distr_scheduler.step(loss) if distr_backend.is_root_worker(): # save trained model to wandb as an artifact every epoch's end model_artifact = wandb.Artifact('trained-dalle', type='model', metadata=dict(model_config)) model_artifact.add_file('dalle.pt') run.log_artifact(model_artifact)
if i % 100 == 0: sample_text = text[:1] token_list = sample_text.masked_select(sample_text != 0).tolist() decoded_text = tokenizer.decode(token_list) image = dalle.generate_images( text[:1], mask=mask[:1], filter_thres=0.9 # topk sampling at 0.9 ) save_model(f'./dalle.pt') wandb.save(f'./dalle.pt') log = {**log, 'image': wandb.Image(image, caption=decoded_text)} wandb.log(log) # save trained model to wandb as an artifact every epoch's end model_artifact = wandb.Artifact('trained-dalle', type='model', metadata=dict(model_config)) model_artifact.add_file('dalle.pt') run.log_artifact(model_artifact) save_model(f'./dalle-final.pt') wandb.save('./dalle-final.pt') model_artifact = wandb.Artifact('trained-dalle', type='model',
def test(data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling save_conf=False, plots=True, log_imgs=0): # number of logged images # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) save_txt = opt.save_txt # save *.txt labels # Directories if save_dir == Path('runs/test'): # if default save_dir.mkdir(parents=True, exist_ok=True) # make base save_dir = Path(increment_dir(save_dir / 'exp', opt.name)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make new dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs = min(log_imgs, 100) # ceil try: import wandb # Weights & Biases except ImportError: log_imgs = 0 # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] seen = 0 names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model(img, augment=augment) # inference and training outputs # print('shape') # print(inf_out.shape) # print('ceterx, cetery, w, h') # print(inf_out[0][..., 0:4]) # ceterx, cetery, w, h # print('cls_conf') # print(inf_out[0][..., 4]) # cls_conf # print('obj_conf') # print(inf_out[0][..., 5:]) # obj_conf t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # gain whwh x = pred.clone() x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original for *xyxy, conf, cls in x: xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(str(save_dir / 'labels' / Path(paths[si]).stem) + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) # W&B logging if len(wandb_images) < log_imgs: box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": {"class_score": conf}, "domain": "pixel"} for *xyxy, conf, cls in pred.clone().tolist()] boxes = {"predictions": {"box_data": box_data, "class_labels": names}} wandb_images.append(wandb.Image(img[si], boxes=boxes)) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = Path(paths[si]).stem box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # target indices 1xn pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 1: f = save_dir / f'test_batch{batch_i}_labels.jpg' # filename plot_images(img, targets, paths, str(f), names) # labels f = save_dir / f'test_batch{batch_i}_pred.jpg' plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # W&B logging if wandb_images: wandb.log({"outputs": wandb_images}) # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, fname=save_dir / 'precision-recall_curve.png') p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights file = save_dir / f"detections_val2017_{w}_results.json" # predicted annotations file print('\nCOCO mAP with pycocotools... saving %s...' % file) with open(file, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] cocoAnno = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO annotations api cocoPred = cocoAnno.loadRes(str(file)) # initialize COCO pred api cocoEval = COCOeval(cocoAnno, cocoPred, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results if not training: print('Results saved to %s' % save_dir) model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def main( loss_config="multiperceptual", mode="standard", visualize=False, fast=False, batch_size=None, subset_size=None, max_epochs=5000, dataaug=False, **kwargs, ): # CONFIG wandb.config.update({"loss_config":loss_config,"batch_size":batch_size,"data_aug":dataaug,"lr":"3e-5", "n_gauss":1,"distribution":"laplace"}) batch_size = batch_size or (4 if fast else 64) energy_loss = get_energy_loss(config=loss_config, mode=mode, **kwargs) # DATA LOADING train_dataset, val_dataset, val_noaug_dataset, train_step, val_step = load_train_val_merging( energy_loss.get_tasks("train_c"), batch_size=batch_size, fast=fast, subset_size=subset_size, ) test_set = load_test(energy_loss.get_tasks("test")) ood_set = load_ood(energy_loss.get_tasks("ood"), ood_path='./assets/ood_natural/') ood_syn_aug_set = load_ood(energy_loss.get_tasks("ood_syn_aug"), ood_path='./assets/st_syn_distortions/') ood_syn_set = load_ood(energy_loss.get_tasks("ood_syn"), ood_path='./assets/ood_syn_distortions/', sample=35) train = RealityTask("train_c", train_dataset, batch_size=batch_size, shuffle=True) # distorted and undistorted val = RealityTask("val_c", val_dataset, batch_size=batch_size, shuffle=True) # distorted and undistorted val_noaug = RealityTask("val", val_noaug_dataset, batch_size=batch_size, shuffle=True) # no augmentation test = RealityTask.from_static("test", test_set, energy_loss.get_tasks("test")) ood = RealityTask.from_static("ood", ood_set, [tasks.rgb,]) ## standard ood set - natural ood_syn_aug = RealityTask.from_static("ood_syn_aug", ood_syn_aug_set, [tasks.rgb,]) ## synthetic distortion images used for sig training ood_syn = RealityTask.from_static("ood_syn", ood_syn_set, [tasks.rgb,]) ## unseen syn distortions # GRAPH realities = [train, val, val_noaug, test, ood, ood_syn_aug, ood_syn] graph = TaskGraph(tasks=energy_loss.tasks + realities, pretrained=True, finetuned=False, freeze_list=energy_loss.freeze_list, ) graph.compile(torch.optim.Adam, lr=3e-5, weight_decay=2e-6, amsgrad=True) # LOGGING logger = VisdomLogger("train", env=JOB) # fake visdom logger logger.add_hook(lambda logger, data: logger.step(), feature="loss", freq=20) energy_loss.logger_hooks(logger) graph.eval() path_values = energy_loss.plot_paths(graph, logger, realities, prefix="") for reality_paths, reality_images in path_values.items(): wandb.log({reality_paths: [wandb.Image(reality_images)]}, step=0) with torch.no_grad(): for reality in [val,val_noaug]: for _ in range(0, val_step): val_loss = energy_loss(graph, realities=[reality]) val_loss = sum([val_loss[loss_name] for loss_name in val_loss]) reality.step() logger.update("loss", val_loss) for _ in range(0, train_step): train_loss = energy_loss(graph, realities=[train], compute_grad_ratio=True) train_loss = sum([train_loss[loss_name] for loss_name in train_loss]) train.step() logger.update("loss", train_loss) energy_loss.logger_update(logger) data=logger.step() del data['loss'] data = {k:v[0] for k,v in data.items()} wandb.log(data, step=0) # TRAINING for epochs in range(0, max_epochs): logger.update("epoch", epochs) graph.train() for _ in range(0, train_step): train_loss = energy_loss(graph, realities=[train], compute_grad_ratio=True) train_loss = sum([train_loss[loss_name] for loss_name in train_loss]) graph.step(train_loss) train.step() logger.update("loss", train_loss) graph.eval() for reality in [val,val_noaug]: for _ in range(0, val_step): with torch.no_grad(): val_loss = energy_loss(graph, realities=[reality]) val_loss = sum([val_loss[loss_name] for loss_name in val_loss]) reality.step() logger.update("loss", val_loss) energy_loss.logger_update(logger) data=logger.step() del data['loss'] del data['epoch'] data = {k:v[0] for k,v in data.items()} wandb.log(data, step=epochs+1) if epochs % 10 == 0: graph.save(f"{RESULTS_DIR}/graph.pth") torch.save(graph.optimizer.state_dict(),f"{RESULTS_DIR}/opt.pth") if epochs % 25 == 0: path_values = energy_loss.plot_paths(graph, logger, realities, prefix="") for reality_paths, reality_images in path_values.items(): wandb.log({reality_paths: [wandb.Image(reality_images)]}, step=epochs+1) graph.save(f"{RESULTS_DIR}/graph.pth") torch.save(graph.optimizer.state_dict(),f"{RESULTS_DIR}/opt.pth")
def on_epoch_end(self, epoch, smooth_loss, last_metrics, **kwargs): "Logs training loss, validation loss and custom metrics & log prediction samples & save model" if self.save_model: # Adapted from fast.ai "SaveModelCallback" current = self.get_monitor_value() if current is not None and self.operator(current, self.best): print( 'Better model found at epoch {} with {} value: {}.'.format( epoch, self.monitor, current)) self.best = current # Save within wandb folder with self.model_path.open('wb') as model_file: self.learn.save(model_file) # Log sample predictions if self.validation_data: pred_log = [] for x, y in self.validation_data: pred = self.learn.predict(x) # scalar -> likely to be a category if not pred[1].shape: pred_log.append( wandb.Image( x.data, caption='Ground Truth: {}\nPrediction: {}'.format( y, pred[0]))) # most vision datasets have a "show" function we can use elif hasattr(x, "show"): # log input data pred_log.append( wandb.Image(x.data, caption='Input data', grouping=3)) # log label and prediction for im, capt in ((pred[0], "Prediction"), (y, "Ground Truth")): # Resize plot to image resolution # from https://stackoverflow.com/a/13714915 my_dpi = 100 fig = plt.figure(frameon=False, dpi=my_dpi) h, w = x.size fig.set_size_inches(w / my_dpi, h / my_dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) # Superpose label or prediction to input image x.show(ax=ax, y=im) pred_log.append(wandb.Image(fig, caption=capt)) plt.close(fig) # likely to be an image elif hasattr(y, "shape") and ( (len(y.shape) == 2) or (len(y.shape) == 3 and y.shape[0] in [1, 3, 4])): pred_log.extend([ wandb.Image(x.data, caption='Input data', grouping=3), wandb.Image(pred[0].data, caption='Prediction'), wandb.Image(y.data, caption='Ground Truth') ]) # we just log input data else: pred_log.append(wandb.Image(x.data, caption='Input data')) wandb.log({"Prediction Samples": pred_log}, commit=False) # Log losses & metrics # Adapted from fast.ai "CSVLogger" logs = { name: stat for name, stat in list( zip(self.learn.recorder.names, [epoch, smooth_loss] + last_metrics)) } wandb.log(logs)
"KLanneal", use_KLannealing, 'Rss: ', rss ) # Init Optimizer Adam optimizer = optim.Adam(vae_model.parameters(), lr=lr_rate) # Start training print('Start training:') for epoch in range(1, epochs + 1): print(epoch) # Use Kl annealing train(epoch, vae_model, data_loader, img_size, batch_size, optimizer, wandb, beta, device) if epoch % 10 == 0: test(epoch, vae_model, data_loader_valid, img_size, batch_size, optimizer, wandb, device) mu_sampler, sigma_sample = vae_model.sample(64) mu_sampler = transforms.ToPILImage()(normalize_tensor(make_grid(mu_sampler.view(64,1,img_size,img_size).detach().cpu()))) sigma_sample = transforms.ToPILImage()(normalize_tensor(make_grid(sigma_sample.view(64,1,img_size,img_size).detach().cpu()))) wandb.log({"Sampled Mu": [wandb.Image(mu_sampler, caption="")]}) wandb.log({"Sampled Prec": [wandb.Image(sigma_sample, caption="")]}) if epoch % 50 == 0: path = log_dir + '/' + str(epoch) + '.pth' torch.save(vae_model, path)
**log, 'epoch': epoch, 'iter': i, 'loss': loss.item() } if i % 10 == 0: sample_text = text[:1] token_list = sample_text.masked_select(sample_text != 0).tolist() decoded_text = tokenizer.decode(token_list) image = dalle.generate_images( text[:1], mask = mask[:1], filter_thres = 0.5 # topk sampling at 0.9 ) save_model(f'./dalle.pt') wandb.save(f'./dalle.pt') log = { **log, 'image': wandb.Image(image, caption = decoded_text) } wandb.log(log) save_model(f'./dalle-final.pt') wandb.save('./dalle-final.pt') wandb.finish()
def __generate_image_with_grad_cam(self, df, trainer: 'pl.Trainer', pl_module, text): def revert_normalization(img, mean, std): return (img * std + mean) target_layer = list( pl_module.model.children())[-4] #con menos 4 funciona cam = gradCAMRegressorOneChannel(model=pl_module, target_layer=target_layer, use_cuda=True) df = df.head(5) normalize = trainer.datamodule.data_train.dataset.transform.transforms[ 0] # normalize= mean = normalize.mean std = normalize.std if "labels" in df.columns: iterator = zip(df.id_image, df.labels, df.target, df.results) else: iterator = zip(df.id_image, df.target, df.results) for batch in iterator: if len(batch) == 3: #isinstance(batch,int): idx, target, results = batch label = None else: idx, label, target, results = batch image = torch.unsqueeze( self.dataloader.dataset.dataset._create_image_from_dataframe( idx), dim=0).to(device=pl_module.device) grayscale_cam = cam( input_tensor=image, eigen_smooth=False) #si no funciona poner en True # In this example grayscale_cam has only one image in the batch: grayscale_cam = grayscale_cam[0, :] if np.isnan(grayscale_cam).any(): print("hola, alguno es nan") image = image.cpu().numpy() gray = image[0, :, :, :] gray = np.moveaxis(gray, 0, -1) if gray.shape[-1] != 3: img_bw_with_3_channels = cv2.merge((gray, gray, gray)) img_to_save = np.uint8((img_bw_with_3_channels + 1) * 127.5) else: img_bw_with_3_channels = gray img_bw_with_3_channels = revert_normalization( img_bw_with_3_channels, mean, std) img_to_save = np.uint8(img_bw_with_3_channels * 255) # img = Image.fromarray(img_to_save) img.save( os.path.join(self.folder_images, f"{text} {idx} image_3_channel.png")) heatmap = cv2.applyColorMap(np.uint8(255 * grayscale_cam), cv2.COLORMAP_PLASMA) heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) heatmap = np.float32(heatmap) cv2.imwrite( os.path.join(self.folder_images, f'{text} {idx} heatmap_cam.jpg'), heatmap) alpha = 0.5 beta = (1.0 - alpha) dst = np.uint8(alpha * (heatmap) + beta * (img_to_save)) cv2.imwrite( os.path.join(self.folder_images, f"{text} {idx} mixture.jpg"), dst) visualization = pytorch_grad_cam.utils.image.show_cam_on_image( img_bw_with_3_channels, grayscale_cam, use_rgb=True, colormap=cv2.COLORMAP_PLASMA) img = Image.fromarray(visualization) img.save( os.path.join(self.folder_images, f"{text} {idx} probando.png")) trainer.logger.experiment.log({ "graficas gradcam " + self.prefix: wandb.Image( img, caption= f" {idx} grad cam, Label {label}, Target: {target}, Pred: {results} " ), })
def tf_summary_to_dict(tf_summary_str_or_pb, namespace=""): """Convert a Tensorboard Summary to a dictionary Accepts either a tensorflow.summary.Summary or one encoded as a string. """ values = {} if hasattr(tf_summary_str_or_pb, "summary"): summary_pb = tf_summary_str_or_pb.summary values[namespaced_tag("global_step", namespace) ] = tf_summary_str_or_pb.step values["_timestamp"] = tf_summary_str_or_pb.wall_time elif isinstance(tf_summary_str_or_pb, (str, bytes, bytearray)): summary_pb = Summary() summary_pb.ParseFromString(tf_summary_str_or_pb) else: if not hasattr(tf_summary_str_or_pb, "value"): raise ValueError( "Can't log %s, only Event, Summary, or Summary proto buffer strings are accepted") else: summary_pb = tf_summary_str_or_pb for value in summary_pb.value: kind = value.WhichOneof("value") if kind == "simple_value": values[namespaced_tag(value.tag, namespace)] = value.simple_value elif kind == "image": from PIL import Image image = wandb.Image(Image.open( six.BytesIO(value.image.encoded_image_string))) tag_idx = value.tag.rsplit('/', 1) if len(tag_idx) > 1 and tag_idx[1].isdigit(): tag, idx = tag_idx values.setdefault(history_image_key( tag, namespace), []).append(image) else: values[history_image_key(value.tag, namespace)] = image # Coming soon... # elif kind == "audio": # audio = wandb.Audio(six.BytesIO(value.audio.encoded_audio_string), # sample_rate=value.audio.sample_rate, content_type=value.audio.content_type) elif kind == "histo": first = value.histo.bucket_limit[0] + \ value.histo.bucket_limit[0] - value.histo.bucket_limit[1] last = value.histo.bucket_limit[-2] + \ value.histo.bucket_limit[-2] - value.histo.bucket_limit[-3] np_histogram = (list(value.histo.bucket), [ first] + value.histo.bucket_limit[:-1] + [last]) values[namespaced_tag(value.tag, namespace)] = wandb.Histogram( np_histogram=np_histogram) elif value.tag == "_hparams_/session_start_info": if wandb.util.get_module("tensorboard.plugins.hparams"): from tensorboard.plugins.hparams import plugin_data_pb2 plugin_data = plugin_data_pb2.HParamsPluginData() plugin_data.ParseFromString( value.metadata.plugin_data.content) for key, param in six.iteritems(plugin_data.session_start_info.hparams): if not wandb.run.config.get(key): wandb.run.config[key] = param.number_value or param.string_value or param.bool_value else: wandb.termerror( "Received hparams tf.summary, but could not import the hparams plugin from tensorboard") return values
def train_gan(net, tr_loader, va_loader=None, epochs=100, device="cuda:0", disc_lr=0.001, gene_lr=0.001, disc_iter=1, gene_iter=1, writer=None): device = torch.device(device) net = net.to(device) disc_optimizer = optim.Adam(net.discriminator.parameters(), lr=disc_lr, betas=(0.5, 0.999)) gene_optimizer = optim.Adam(net.generator.parameters(), lr=gene_lr, betas=(0.5, 0.999)) hist = {"epoch": [], "phase": []} for e in tqdm(range(epochs), "Epoch: "): losses_cache = LossesMetric() for X, _ in tqdm(tr_loader, "Batch Train: ", leave=False): X = X.to(device).float() # discriminator training net.discriminator.train() net.generator.eval() for _ in range(disc_iter): Z = net.z_sample( (X.size(0), net.latent_size)).to(device).float() disc_optimizer.zero_grad() with torch.no_grad(): X_gen = net.generate(Z) with torch.enable_grad(): pred_gen = net.discriminate(X_gen) pred_tru = net.discriminate(X) disc_loss = net.disc_criterion(pred_gen, pred_tru) disc_loss.backward() disc_optimizer.step() # generator training net.discriminator.eval() net.generator.train() for _ in range(gene_iter): Z = net.z_sample( (X.size(0), net.latent_size)).to(device).float() gene_optimizer.zero_grad() with torch.enable_grad(): X_gen = net.generate(Z) pred_gen = net.discriminate(X_gen) gene_loss = net.gene_criterion(pred_gen) gene_loss.backward() gene_optimizer.step() losses_cache.add({"disc": disc_loss, "gene": gene_loss}, X.size(0)) losses = losses_cache.value() hist["epoch"].append(e) hist["phase"].append("train") for k, v in losses.items(): hist.setdefault(k, []).append(v) if writer is not None: losses["epoch"] = e writer.log(losses) # 生成一定数量的示例图像,并进行展示 if (e + 1) % 5 == 0: Z = net.z_sample((16, net.latent_size)).to(device).float() imgs_tensor = net.generate(Z) imgs_tensor = make_grid(imgs_tensor, nrow=8, normalize=True) img = to_pil_image(imgs_tensor) wandb_imgs = wandb.Image(img) writer.log({"generated": wandb_imgs, "epoch": e}) return net, hist
def train_recon(self, args): # no classification version #wandb codes wandb.init(project="acal_office-home", name=args.log_name) wandb.watch(self.Gst) wandb.watch(self.Gts) #dataload if args.data == "mnist": transform_m = transforms.Compose( [transforms.Resize((args.resolution,args.resolution)), transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]) transform_s = transforms.Compose( [transforms.Resize((args.resolution,args.resolution)), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) s_trainset = MNIST("datasets/mnist", train=True, download=True, transform=transform_m) s_testset = MNIST("datasets/mnist", train=False, download=True, transform=transform_m) if args.few_shot: t_trainset = dataset_sampler(SVHN("datasets/svhn", split="train", download=True, transform=transform_s)) t_testset = dataset_sampler(SVHN("datasets/svhn", split="test", download=True, transform=transform_s)) else: t_trainset = SVHN("datasets/svhn", split="train", download=True, transform=transform_s) t_testset = SVHN("datasets/svhn", split="test", download=True, transform=transform_s) trainset = concat_dataset(s_trainset, t_trainset) testset = concat_dataset(s_testset, t_testset) trainloader = DataLoader(trainset, batch_size = args.pre_batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) testloader = DataLoader(testset, batch_size = args.pre_batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) elif args.data == "original": transform = transforms.Compose( [transforms.RandomHorizontalFlip(), transforms.Resize((args.resolution,args.resolution)), transforms.RandomResizedCrop(size=(args.resolution,args.resolution)), transforms.ToTensor() ]) s_trainset = load_pict(os.path.join(args.csv_path, args.source_name +"_train.csv"), transform=transform) s_testset = load_pict(os.path.join(args.csv_path, args.source_name +"_test.csv"), transform=transform) t_trainset = load_pict(os.path.join(args.csv_path, args.target_name + "_train.csv"), transform=transform) t_testset = load_pict(os.path.join(args.csv_path, args.source_name +"_test.csv"), transform=transform) trainset = concat_dataset(s_trainset, t_trainset) testset = concat_dataset(s_testset, t_testset) trainloader = DataLoader(trainset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) testloader = DataLoader(testset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) # s_trainloader = DataLoader(s_trainset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) # s_testloader = DataLoader(s_testset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) # t_trainloader = DataLoader(t_trainset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) # t_testloader = DataLoader(t_testset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) #log best_acc_t = 0 log = pd.DataFrame( columns=[ "epoch", "s_loss", "t_loss", "s_acc", "t_acc" ] ) for epoch in range(args.num_epoch): end = time.time() # average meter batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses_d = AverageMeter('Loss_d', ':.4e') losses_g = AverageMeter('Loss_g', ':.4e') losses_g_idt = AverageMeter('Loss_g_idt', ':.4e') losses_g_recon = AverageMeter('Loss_g_recon', ':.4e') losses_g_adv = AverageMeter('Loss_g_adv', ':.4e') # progress meter progress = ProgressMeter( len(trainloader), [batch_time, data_time, losses_d, losses_g], prefix="Epoch: [{}]".format(epoch) ) for i, sample in enumerate(trainloader): self.reset_grad() s_data, s_label, t_data, t_label = sample["s_data"], sample["s_label"], sample["t_data"], sample["t_label"] s_data, s_label, t_data, t_label = s_data.to(self.device), s_label.to(self.device), t_data.to(self.device), t_label.to(self.device) data_time.update(time.time() - end) fake_t_data = self.Gst(s_data) fake_s_data = self.Gts(t_data) cyc_s_data = self.Gts(fake_t_data) cyc_t_data = self.Gst(fake_s_data) ###################### #train discriminator # ###################### tmp_batchsize = s_data.shape[0] label_shape = self.Ds(s_data).detach().shape ones_label = torch.ones(label_shape).to(self.device) zeros_label = torch.zeros(label_shape).to(self.device) #discriminate fake data loss_d_s = self.MSE(self.Ds(s_data), ones_label) + self.MSE(self.Ds(fake_s_data.detach()), zeros_label) loss_d_t = self.MSE(self.Dt(t_data), ones_label) + self.MSE(self.Dt(fake_t_data.detach()), zeros_label) loss_d = loss_d_s + loss_d_t loss_d.backward() self.d_optimizer.step() losses_d.update(loss_d.item(), n=tmp_batchsize) for i in range(args.num_k): ################### #train generator # ################### #reconstruction loss loss_g_recon = self.MSE(s_data, cyc_s_data) + self.MSE(t_data, cyc_t_data) #decieve discriminator loss_g_adv = self.MSE(self.Ds(fake_s_data), ones_label) + self.MSE(self.Dt(fake_t_data), ones_label) loss_g = loss_g_recon + loss_g_adv #identity construction loss if args.idt_loss: if args.data == "mnist": loss_idt_s = self.MSE(self.Gts(self.gs2rgb(s_data)), s_data) loss_idt_t = self.MSE(self.Gst(self.rgb2gs(t_data)), t_data) elif args.data == "original": loss_idt_s = self.MSE(self.Gts(s_data), s_data) loss_idt_t = self.MSE(self.Gst(t_data), t_data) loss_g += loss_idt_s loss_g += loss_idt_t loss_g.backward() self.g_optimizer.step() losses_g.update(loss_g.item(), n=tmp_batchsize) losses_g_adv.update(loss_g_adv.item(), n=tmp_batchsize) losses_g_recon.update(loss_g_recon.item(), n=tmp_batchsize) if args.idt_loss: losses_g_idt.update(loss_idt_s.item() + loss_idt_t.item(), n=tmp_batchsize) if i != 0 and i % 100 == 0: progress.display(1) end = time.time() #----------------wandb visualisation------------------------------------------------ s_example_images = [wandb.Image(s_data[0], caption="source original data"), wandb.Image(fake_t_data[0], caption="source converted data"), wandb.Image(cyc_s_data[0], caption="source cycled data")] t_example_images = [wandb.Image(t_data[0], caption="target original data"), wandb.Image(fake_s_data[0], caption="target converted data"), wandb.Image(cyc_t_data[0], caption="target cycled data")] wandb.log({"Gst&Gts_loss" : losses_g.avg}) wandb.log({"G idt loss" : losses_g_idt.avg}) wandb.log({"G reconstruct loss" : losses_g_recon.avg}) wandb.log({"G adv loss" : losses_g_adv.avg}) wandb.log({"D_loss" : losses_d.avg}) if epoch % 20 == 0: wandb.log({"epoch{} Source Images".format(epoch) : s_example_images}) wandb.log({"epoch{} Target Images".format(epoch) : t_example_images}) #----------------------------------------------------------------------------------- #save models torch.save( self.Gst.state_dict(), os.path.join(args.result_path, 'best_acc1_model_gst.prm') ) torch.save( self.Gts.state_dict(), os.path.join(args.result_path, 'best_acc1_model_gts.prm') ) print( 'epoch: {}\tg loss: {:.4f}\td loss: {:.4f}' .format(epoch, losses_g.avg, losses_d.avg) )
def train(self, args): #wandb codes wandb.init(project=args.project_name, name=args.log_name, config=args) wandb.watch(self.Gst) wandb.watch(self.Gts) wandb.watch(self.Cs) wandb.watch(self.Ct) #dataload if args.data == "digit": transform_m = transforms.Compose( [transforms.Resize((args.resolution,args.resolution)), transforms.ToTensor() #transforms.Normalize(mean=[0.5], std=[0.5]) ]) transform_s = transforms.Compose( [transforms.Resize((args.resolution,args.resolution)), transforms.ToTensor() #transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) s_trainset = SVHN("datasets/svhn", split="train", download=True, transform=transform_s) s_testset = SVHN("datasets/svhn", split="test", download=True, transform=transform_s) # s_trainset = MNIST("datasets/mnist", train=True, download=True, transform=transform_m) # s_testset = MNIST("datasets/mnist", train=False, download=True, transform=transform_m) if args.few_shot: t_trainset = dataset_sampler(MNIST("datasets/mnist", train=True, download=True, transform=transform_m), data_per_class=args.dpc) else: t_trainset = MNIST("datasets/mnist", train=True, download=True, transform=transform_m) t_testset = MNIST("datasets/mnist", train=False, download=True, transform=transform_m) trainset = concat_dataset(s_trainset, t_trainset) testset = concat_dataset(s_testset, t_testset) trainloader = DataLoader(trainset, batch_size = args.pre_batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True) testloader = DataLoader(testset, batch_size = args.pre_batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) elif args.data == "original": transform = transforms.Compose( [transforms.RandomHorizontalFlip(), transforms.Resize((args.resolution,args.resolution)), transforms.RandomResizedCrop(size=(args.resolution,args.resolution)), transforms.ToTensor() ]) s_trainset = load_pict(os.path.join(args.csv_path, args.source_name +"_train.csv"), transform=transform) s_testset = load_pict(os.path.join(args.csv_path, args.source_name +"_test.csv"), transform=transform) if args.few_shot: t_trainset = dataset_sampler(load_pict(os.path.join(args.csv_path, args.target_name + "_train.csv"), transform=transform) ,class_num=args.n_class , data_per_class=args.dpc) else: t_trainset = load_pict(os.path.join(args.csv_path, args.target_name + "_train.csv"), transform=transform) t_testset = load_pict(os.path.join(args.csv_path, args.source_name +"_test.csv"), transform=transform) trainset = concat_dataset(s_trainset, t_trainset) testset = concat_dataset(s_testset, t_testset) trainloader = DataLoader(trainset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) testloader = DataLoader(testset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) # s_trainloader = DataLoader(s_trainset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) # s_testloader = DataLoader(s_testset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) # t_trainloader = DataLoader(t_trainset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) # t_testloader = DataLoader(t_testset, batch_size = args.batch_size, shuffle = True, num_workers=args.num_workers, pin_memory=True, drop_last=True) #log best_acc_t = 0 log = pd.DataFrame( columns=[ "epoch", "s_loss", "t_loss", "s_acc", "t_acc" ] ) for epoch in range(args.num_epoch): end = time.time() # average meter batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses_d = AverageMeter('Loss_d', ':.4e') losses_g = AverageMeter('Loss_g', ':.4e') losses_g_augcls = AverageMeter('Loss_g_augcls', ':.4e') losses_g_idt = AverageMeter('Loss_g_idt', ':.4e') losses_g_recon = AverageMeter('Loss_g_recon', ':.4e') losses_g_adv = AverageMeter('Loss_g_adv', ':.4e') losses_cs = AverageMeter('Loss_cs', ':.4e') losses_ct = AverageMeter('Loss_ct', ':.4e') # progress meter progress = ProgressMeter( len(trainloader), [batch_time, data_time, losses_d, losses_g, losses_cs, losses_ct], prefix="Epoch: [{}]".format(epoch) ) for i, sample in enumerate(trainloader): self.reset_grad() s_data, s_label, t_data, t_label = sample["s_data"], sample["s_label"], sample["t_data"], sample["t_label"] s_data, s_label, t_data, t_label = s_data.to(self.device), s_label.to(self.device), t_data.to(self.device), t_label.to(self.device) data_time.update(time.time() - end) fake_t_data = self.Gst(s_data) fake_s_data = self.Gts(t_data) cyc_s_data = self.Gts(fake_t_data) cyc_t_data = self.Gst(fake_s_data) ###################### #train discriminator # ###################### tmp_batchsize = s_data.shape[0] label_shape = self.Ds(s_data).detach().shape ones_label = torch.ones(label_shape).to(self.device) zeros_label = torch.zeros(label_shape).to(self.device) #discriminate fake data # if args.use_labels: # loss_d_s = self.CE(self.Ds(s_data), s_label) + self.CE(self.Ds(fake_s_data), zeros_label) # loss_d_t = self.CE(self.Dt(t_data), t_label) + self.CE(self.Dt(fake_t_data), zeros_label) # else: # loss_d_s = self.MSE(self.Ds(s_data), ones_label) + self.MSE(self.Ds(fake_s_data.detach()), zeros_label) # loss_d_t = self.MSE(self.Dt(t_data), ones_label) + self.MSE(self.Dt(fake_t_data.detach()), zeros_label) # loss_d = loss_d_s + loss_d_t if args.use_labels: loss_d_real = self.MSE(self.Ds(s_data), s_label) + self.MSE(self.Dt(t_data), t_label) loss_d_fake = self.MSE(self.Ds(fake_s_data), zeros_label) + self.MSE(self.Dt(fake_t_data), zeros_label) else: loss_d_real = self.MSE(self.Ds(s_data), ones_label) + self.MSE(self.Dt(t_data), ones_label) loss_d_fake = self.MSE(self.Ds(fake_s_data), zeros_label) + self.MSE(self.Dt(fake_t_data), zeros_label) loss_d_real.backward(retain_graph=True) self.d_optimizer.step() self.d_optimizer.zero_grad() loss_d_fake.backward(retain_graph=True) self.d_optimizer.step() losses_d.update(loss_d_real.item() + loss_d_fake.item(), n=tmp_batchsize) ################### #train generator # ################### for j in range(args.num_k): if j > 0: self.reset_grad() fake_t_data = self.Gst(s_data) fake_s_data = self.Gts(t_data) cyc_s_data = self.Gts(fake_t_data) cyc_t_data = self.Gst(fake_s_data) #classification loss #normal classification loss loss_cs = self.CE(self.Cs(s_data), s_label) loss_ct = self.CE(self.Ct(t_data), t_label) #augmented classification loss loss_g_aug_s = self.CE(self.Ct(fake_t_data), s_label) + self.CE(self.Cs(cyc_s_data), s_label) loss_g_aug_t = self.CE(self.Cs(fake_s_data), t_label) + self.CE(self.Ct(cyc_t_data), t_label) #decieve discriminator if args.adversarial == "reverse": loss_g_adv = self.MSE(self.Ds(fake_s_data), ones_label) + self.MSE(self.Dt(fake_t_data), ones_label) elif args.adversarial == "minus": loss_g_adv = -self.MSE(self.Ds(fake_s_data), zeros_label) -self.MSE(self.Dt(fake_t_data), zeros_label) #max(logD) instead of min(1-logD) loss_g = loss_cs + loss_ct + args.alpha*(loss_g_aug_s + loss_g_aug_t) + args.beta*(loss_g_adv) #identity construction loss if args.idt_loss: loss_idt_s = self.MSE(self.Gts(self.gs2rgb(s_data)), s_data) loss_idt_t = self.MSE(self.Gst(self.rgb2gs(t_data)), t_data) loss_g += loss_idt_s loss_g += loss_idt_t if args.recon_loss: loss_g_recon = self.MSE(s_data, cyc_s_data) + self.MSE(t_data, cyc_t_data) loss_g += loss_g_recon loss_g.backward() self.g_optimizer.step() losses_g.update(loss_g.item(), n=tmp_batchsize) losses_g_augcls.update(loss_g_aug_s.item() + loss_g_aug_t.item(), n=tmp_batchsize) losses_g_adv.update(loss_g_adv.item(), n=tmp_batchsize) losses_cs.update(loss_cs.item(), n=tmp_batchsize) losses_ct.update(loss_ct.item(), n=tmp_batchsize) if args.recon_loss: losses_g_recon.update(loss_g_recon.item(), n=tmp_batchsize) if args.idt_loss: losses_g_idt.update(loss_idt_s.item() + loss_idt_t.item(), n=tmp_batchsize) if i != 0 and i % 100 == 0: progress.display(1) end = time.time() #----------------wandb visualisation------------------------------------------------ s_example_images = [wandb.Image(s_data[0], caption="source original data"), wandb.Image(fake_t_data[0], caption="source converted data"), wandb.Image(cyc_s_data[0], caption="source cycled data")] t_example_images = [wandb.Image(t_data[0], caption="target original data"), wandb.Image(fake_s_data[0], caption="target converted data"), wandb.Image(cyc_t_data[0], caption="target cycled data")] wandb.log({"Gst&Gts_loss" : losses_g.avg}) wandb.log({"G augmented classification loss" : losses_g_augcls.avg}) wandb.log({"G idt loss" : losses_g_idt.avg}) wandb.log({"G reconstruct loss" : losses_g_recon.avg}) wandb.log({"G adv loss" : losses_g_adv.avg}) wandb.log({"D_loss" : losses_d.avg}) if epoch % 20 == 0: wandb.log({"epoch{} Source Images".format(epoch) : s_example_images}) wandb.log({"epoch{} Target Images".format(epoch) : t_example_images}) #----------------------------------------------------------------------------------- #evaluate with torch.no_grad(): val_losses_s = AverageMeter('Loss_s', ':.4e') val_losses_t = AverageMeter('Loss_t', ':.4e') acc_s = AverageMeter('acc_s', ':6.3f') acc_t = AverageMeter('acc_t', ':6.3f') for sample in testloader: s_data, s_label, t_data, t_label = sample["s_data"], sample["s_label"], sample["t_data"], sample["t_label"] s_data, s_label, t_data, t_label = s_data.to(self.device), s_label.to(self.device), t_data.to(self.device), t_label.to(self.device) tmp_batchsize = s_data.shape[0] s_pred = self.Cs(s_data) t_pred = self.Ct(t_data) val_losses_s.update(self.CE(s_pred, s_label).item(), n=tmp_batchsize) val_losses_t.update(self.CE(t_pred, t_label).item(), n=tmp_batchsize) acc_s.update(accuracy(s_pred, s_label)[0].item(), n=tmp_batchsize) acc_t.update(accuracy(t_pred, t_label)[0].item(), n=tmp_batchsize) s_val_loss = val_losses_s.avg t_val_loss = val_losses_t.avg s_val_acc = acc_s.avg t_val_acc = acc_t.avg #wandb logging wandb.log({"Cs_val_acc" : s_val_acc}) wandb.log({"Ct_val_acc" : t_val_acc}) #save models if best_acc_t < t_val_acc: best_acc_t = t_val_acc torch.save( self.Ct.state_dict(), os.path.join(args.result_path, 'best_acc1_model_ct.prm') ) torch.save( self.Ct.state_dict(), os.path.join(args.result_path, 'best_acc1_model_cs.prm') ) torch.save( self.Gst.state_dict(), os.path.join(args.result_path, 'best_acc1_model_gst.prm') ) torch.save( self.Gts.state_dict(), os.path.join(args.result_path, 'best_acc1_model_gts.prm') ) torch.save( self.Ds.state_dict(), os.path.join(args.result_path, 'best_acc1_model_ds.prm') ) torch.save( self.Dt.state_dict(), os.path.join(args.result_path, 'best_acc1_model_dt.prm') ) #record tmp = pd.Series([ epoch, s_val_loss, t_val_loss, s_val_acc, t_val_acc ], index=log.columns ) log = log.append(tmp, ignore_index=True) log.to_csv(os.path.join(args.result_path, 'log_step1.csv'), index=False) print( 'epoch: {}\ts_val loss: {:.4f}\tt_val loss: {:.4f}\ts_val_acc: {:.5f}\tt_val_acc: {:.5f}' .format(epoch, s_val_loss, t_val_loss, s_val_acc, t_val_acc) )
image_root = dataset_path + dataset + '/RGB/' gt_root = dataset_path + dataset + '/GT/' depth_root = dataset_path + dataset + '/depth/' test_loader = test_dataset(image_root, gt_root, depth_root, opt.testsize) for i in range(test_loader.size): image, gt, depth, name, image_for_post = test_loader.load_data() gt = np.asarray(gt, np.float32) gt /= (gt.max() + 1e-8) image = image.cuda() depth = depth.cuda() _, res = model(image, depth) res = F.upsample(res, size=gt.shape, mode='bilinear', align_corners=False) res = res.sigmoid().data.cpu().numpy().squeeze() res = (res - res.min()) / (res.max() - res.min() + 1e-8) mae = metrics.mae(res, gt) f1 = metrics.f_beta_measure(res, gt) wandb.log({f'mae_{dataset}': mae, f'f1{dataset}': f1}) wandb.log({ 'res': [wandb.Image(torch.tensor(res))], 'gt': [wandb.Image(torch.tensor(gt))] }) total_mae += mae total_f1 += f1 print(f'Average f1 is {total_f1 / test_loader.size}') print(f'Average mae is {total_mae / test_loader.size}') print('Test Done!')
hard_recons = vae.decode(codes) images, recons = map(lambda t: t[:k], (images, recons)) images, recons, hard_recons, codes = map( lambda t: t.detach().cpu(), (images, recons, hard_recons, codes)) images, recons, hard_recons = map( lambda t: make_grid(t.float(), nrow=int(sqrt(k)), normalize=True, range=(-1, 1)), (images, recons, hard_recons)) logs = { **logs, 'sample images': wandb.Image(images, caption='original images'), 'reconstructions': wandb.Image(recons, caption='reconstructions'), 'hard reconstructions': wandb.Image(hard_recons, caption='hard reconstructions'), 'codebook_indices': wandb.Histogram(codes), 'temperature': temp } save_model(f'./vae.pt') wandb.save('./vae.pt') # temperature anneal
LGN_layer = Conv3dLGN_layer(in_channels=3, kernel_size=kernel_size) LGN_layer = LGN_layer.to(cuda) t1 = time.time() # x = torch.Tensor(data[::1,:,:]).unsqueeze_(0).unsqueeze_(0).repeat([1,3,1,1,1]).to(cuda) # x = torch.rand((1, 3, 20, 64, 64)).to('cuda') # x = torch.Tensor(data[::30,:,:]).view((1,1,*data.shape)).to('cuda') # out = LGN_layer(x).detach().numpy() t2 = time.time() print(LGN_layer.cell_types) print(LGN_layer.Convs) for cell in LGN_layer.cell_types: for i in range(LGN_layer.Convs[f"{cell}_dom"].weight.shape[2]): I = LGN_layer.Convs[f"{cell}_dom"].weight[ 0, 0, i, :, :].detach().cpu().numpy() wandb.log({f"spatial kernel {cell}": [wandb.Image(I)]}) T = LGN_layer.Convs[f"{cell}_dom"].weight[0, 0, :, kernel_size[0] // 2, kernel_size[0] // 2].detach().cpu().numpy() data = [[x, y] for (x, y) in zip(np.arange(len(T)), T)] table = wandb.Table(data=data, columns=["x", "y"]) wandb.log({ f"temporal kernel {cell}": wandb.plot.line(table, "x", "y", title=cell) }) # wandb.alert() # with open('LGN_allen_movie_one.npy','wb') as f: # np.save(f,out)
def main(): # load real images info or generate real images info inception_model_score = generative_model_score.GenerativeModelScore() inception_model_score.lazy_mode(True) import torchvision from torch.autograd import Variable from torchvision import transforms import tqdm import os batch_size = 64 epochs = 1000 img_size = 32 save_image_interval = 5 loss_calculation_interval = 10 latent_dim = 10 n_iter = 3 wandb.login() wandb.init(project="AAE", config={ "batch_size": batch_size, "epochs": epochs, "img_size": img_size, "save_image_interval": save_image_interval, "loss_calculation_interval": loss_calculation_interval, "latent_dim": latent_dim, "n_iter": n_iter, }) config = wandb.config train_loader, validation_loader, test_loader = get_celebA_dataset( batch_size, img_size) # train_loader = get_cifar1_dataset(batch_size) image_shape = [3, img_size, img_size] import hashlib real_images_info_file_name = hashlib.md5( str(train_loader.dataset).encode()).hexdigest() + '.pickle' if os.path.exists('./inception_model_info/' + real_images_info_file_name): print("Using generated real image info.") print(train_loader.dataset) inception_model_score.load_real_images_info('./inception_model_info/' + real_images_info_file_name) else: inception_model_score.model_to('cuda') #put real image for each_batch in train_loader: X_train_batch = each_batch[0] inception_model_score.put_real(X_train_batch) #generate real images info inception_model_score.lazy_forward(batch_size=64, device='cuda', real_forward=True) inception_model_score.calculate_real_image_statistics() #save real images info for next experiments inception_model_score.save_real_images_info('./inception_model_info/' + real_images_info_file_name) #offload inception_model inception_model_score.model_to('cpu') encoder = Encoder(latent_dim, image_shape).cuda() decoder = Decoder(latent_dim, image_shape).cuda() discriminator = Discriminator(latent_dim).cuda() ae_optimizer = torch.optim.Adam(itertools.chain(encoder.parameters(), decoder.parameters()), lr=1e-4) d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=1e-4) g_optimizer = torch.optim.Adam(encoder.parameters(), lr=1e-4) r_losses = [] d_losses = [] g_losses = [] precisions = [] recalls = [] fids = [] inception_scores_real = [] inception_scores_fake = [] for i in range(0, epochs): batch_count = 0 for each_batch in tqdm.tqdm(train_loader): batch_count += 1 X_train_batch = Variable(each_batch[0]).cuda() r_loss = update_autoencoder(ae_optimizer, X_train_batch, encoder, decoder) for iter_ in range(n_iter): d_loss = update_discriminator(d_optimizer, X_train_batch, encoder, discriminator, latent_dim) g_loss = update_generator(g_optimizer, X_train_batch, encoder, discriminator) sampled_images = sample_image(encoder, decoder, X_train_batch).detach().cpu() if i % loss_calculation_interval == 0: inception_model_score.put_fake(sampled_images) if i % save_image_interval == 0: image = save_images(n_row=10, epoch=i, latent_dim=latent_dim, model=decoder) wandb.log({'image': wandb.Image(image, caption='%s_epochs' % i)}, step=i) if i % loss_calculation_interval == 0: #offload all GAN model to cpu and onload inception model to gpu encoder = encoder.to('cpu') decoder = decoder.to('cpu') discriminator = discriminator.to('cpu') inception_model_score.model_to('cuda') #generate fake images info inception_model_score.lazy_forward(batch_size=64, device='cuda', fake_forward=True) inception_model_score.calculate_fake_image_statistics() metrics = inception_model_score.calculate_generative_score() #onload all GAN model to gpu and offload inception model to cpu inception_model_score.model_to('cpu') encoder = encoder.to('cuda') decoder = decoder.to('cuda') discriminator = discriminator.to('cuda') precision, recall, fid, inception_score_real, inception_score_fake, density, coverage = \ metrics['precision'], metrics['recall'], metrics['fid'], metrics['real_is'], metrics['fake_is'], metrics['density'], metrics['coverage'] wandb.log( { "precision": precision, "recall": recall, "fid": fid, "inception_score_real": inception_score_real, "inception_score_fake": inception_score_fake, "density": density, "coverage": coverage }, step=i) r_losses.append(r_loss) d_losses.append(d_loss) g_losses.append(g_loss) precisions.append(precision) recalls.append(recall) fids.append(fid) inception_scores_real.append(inception_score_real) inception_scores_fake.append(inception_score_fake) save_scores_and_print(i + 1, epochs, r_loss, d_loss, g_loss, precision, recall, fid, inception_score_real, inception_score_fake) inception_model_score.clear_fake() save_losses(epochs, loss_calculation_interval, r_losses, d_losses, g_losses) wandb.finish()
# ============================== # %% PREPROCESSING # ============================== # Demo # create key rng, prng_key = jax.random.split(KEY, num=2) # prep the data demo_data_prepped = prepare_data(init_batch, prng_key=prng_key) # plot image grid fig, ax = plot_image_grid(demo_data_prepped, image_shape) wandb.log({"initial_images": wandb.Image(plt)}) plt.close(fig) # ============================== # %% MODEL # ============================== from rbig_jax.transforms.multiscale import MultiScaleBijector from rbig_jax.transforms.reshape import init_scale_function from rbig_jax.transforms.base import BijectorChain from rbig_jax.models import GaussianizationFlow from distrax._src.distributions.normal import Normal from rbig_jax.models.gaussflow import add_gf_model_args, init_gf_spline_model from rbig_jax.models.gaussflow import init_default_gf_model # initialization data init_ds = load_dataset(tfds.Split.TRAIN, wandb_logger.config.n_init_samples)
def train(model, criterion, optimizer, train_loader, val_loader, args): best_prec1 = 0 epoch_no_improve = 0 for epoch in range(1000): statistics = Statistics() model.train() start_time = time.time() for i, (input, target) in enumerate(train_loader): loss, (prec1, prec5), y_pred, y_true = execute_batch( model, criterion, input, target, args.device) statistics.update(loss.detach().cpu().numpy(), prec1, prec5, y_pred, y_true) # compute gradient and do optimizer step optimizer.zero_grad() # loss.backward() optimizer.step() # if args.net_version == 2: # model.camera_position = model.camera_position.clamp(0, 1) del loss torch.cuda.empty_cache() elapsed_time = time.time() - start_time # Evaluate on validation set val_statistics = validate(val_loader, model, criterion, args.device) log_data(statistics, "train", val_loader.dataset.dataset.classes, epoch) log_data(val_statistics, "internal_val", val_loader.dataset.dataset.classes, epoch) wandb.log({"Epoch elapsed time": elapsed_time}, step=epoch) # print(model.camera_position) if epoch % 1 == 0: vertices = [] if args.net_version == 1: R = look_at_rotation(model.camera_position, device=args.device) T = -torch.bmm(R.transpose(1, 2), model.camera_position[:, :, None])[:, :, 0] else: t = Transform3d(device=model.device).scale( model.camera_position[3] * model.distance_range).rotate_axis_angle( model.camera_position[0] * model.angle_range, axis="X", degrees=False).rotate_axis_angle( model.camera_position[1] * model.angle_range, axis="Y", degrees=False).rotate_axis_angle( model.camera_position[2] * model.angle_range, axis="Z", degrees=False) vertices = t.transform_points(model.vertices) R = look_at_rotation(vertices[:model.nviews], device=model.device) T = -torch.bmm(R.transpose(1, 2), vertices[:model.nviews, :, None])[:, :, 0] cameras = OpenGLPerspectiveCameras(R=R, T=T, device=args.device) wandb.log( { "Cameras": [wandb.Image(plot_camera_scene(cameras, args.device))] }, step=epoch) plt.close() images = render_shape(model, R, T, args, vertices) wandb.log( { "Views": [ wandb.Image( image_grid(images, rows=int(np.ceil(args.nviews / 2)), cols=2)) ] }, step=epoch) plt.close() # Save best model and best prediction if val_statistics.top1.avg > best_prec1: best_prec1 = val_statistics.top1.avg save_model("views_net", model, optimizer, args.fname_best) epoch_no_improve = 0 else: # Early stopping epoch_no_improve += 1 if epoch_no_improve == 20: wandb.run.summary[ "best_internal_val_top1_accuracy"] = best_prec1 wandb.run.summary[ "best_internal_val_top1_accuracy_epoch"] = epoch - 20 return
def do_back_prop( self, X, Y, X_cv, Y_cv, optimiser="sgd", gamma=0.1, numepochs=5, learning_rate=0.001,\ batch_size=32, beta=0.99, epsilon=0.0000001, beta1=0.9, beta2=0.999, l2_reg_param=0,do_wandb=False): """ Performs back propagation with the given parameters/hyperparameters Parameters ---------- self : NeuralNet Class Instance X : numpy.ndarray Data on which gradient descent is performed. Shape ( m_features, nsamples) Y : numpy.ndarray Labels of datapoints in X. Shape ( nsamples, ) X_cv : numpy.ndarray Data on which cross validation is performed. Shape ( m_features, ksamples) Y_cv : numpy.ndarray Labels of datapoints in X_cv. Shape ( ksamples, ) optimiser : String, optional, default="sgd" Optimisation algorithm to be used Currently supported optimisers : "sgd", "momentum", "nesterov", "rmsprop", "adam", "nadam" gamma : float, optional, default=0.1 Used in momentum, nesterov optimiser numepochs : int, optional, default=5 Number of passes on data during training learning_rate : float, optional, default=0.001 Learning Rate at which gradient descent is performed batch_size : int, optional, default=32 Size of batch used in each step of batch gradient descent beta : float, optional, default=0.99 Used in rmsprop optimiser epsilon : float, optional, default=0.0000001 Used in rmsprop, adam, nadam optimisers beta1 : float, optional, default=0.9 Used in adam, nadam optimisers beta2 : float, optional, default=0.999 Used in adam, nadam optimisers l2_reg_param : float, optional, default=0 L2 Regularisation Parameter / Weight Decay Returns ------- None """ layers = len(self.structure) - 1 update = {} step_count = 0 for i in range(numepochs): if do_wandb == True: wandb.log({"Sample Data":[wandb.Image(X[:,jj].reshape(28,28),caption=dataset_labels[Y[jj]])\ for jj in range(20*i,20*i+20)]},commit = False) for j in tqdm(range(math.ceil(X.shape[1] / batch_size))): X_pass = X[:, j * batch_size:min(X.shape[1], (j + 1) * batch_size)] Y_pass = Y[j * batch_size:min(X.shape[1], (j + 1) * batch_size)] step_count += 1 update = (self.optimisers[optimiser])( X_pass, Y_pass, update, learning_rate, gamma = gamma, beta = beta,\ beta1 = beta1, beta2 = beta2, epsilon = epsilon, l2_reg_param = l2_reg_param, step_num = step_count) Y_pred = self.predict(X) self.accuracies.append(np.mean(np.argmax(Y_pred, axis=0) == Y)) self.cvaccuracies.append( np.mean(self.predict(X_cv, returnclass=1) == Y_cv)) self.losses.append(self.get_loss(None, Y, l2_reg_param, Y_pred)) self.cvlosses.append(self.get_loss(X_cv, Y_cv, l2_reg_param)) if do_wandb == True: wandb.log({"train_acc":self.accuracies[-1],"train_loss":self.losses[-1],"val_acc":self.cvaccuracies[-1],\ "val_loss":self.cvlosses[-1],"step_count":step_count})
def evaluate_against_attacks(model, vae, attacks, val_loader, parallel=1, wandb=None, iteration=None, num_batches=None): """ Evaluates a model against the given attacks, printing the output and optionally writing it to a tensorboardX summary writer. """ l2_distance = L2Distance() linf_distance = LinfDistance() if torch.cuda.is_available(): l2_distance.cuda() linf_distance.cuda() device_ids = list(range(parallel)) l2_distance = nn.DataParallel(l2_distance, device_ids) linf_distance = nn.DataParallel(linf_distance, device_ids) model_state_dict = copy.deepcopy(model.state_dict()) for attack in attacks: if isinstance(attack, nn.DataParallel): attack_name = attack.module.__class__.__name__ else: attack_name = attack.__class__.__name__ batches_correct = [] successful_attacks = [] successful_l2_distance = [] successful_linf_distance = [] for batch_index, (inputs, labels) in enumerate(val_loader): if num_batches is not None and batch_index >= num_batches: break if torch.cuda.is_available(): inputs = inputs.cuda() labels = labels.cuda() adv_inputs = attack(inputs, labels) with torch.no_grad(): logits = model(inputs - vae(inputs)[2]) adv_logits = model(adv_inputs - vae(adv_inputs)[2]) batches_correct.append((adv_logits.argmax(1) == labels).detach()) success = ((logits.argmax(1) == labels) & # was classified correctly (adv_logits.argmax(1) != labels) # and now is not ) inputs_success = inputs[success] adv_inputs_success = adv_inputs[success] num_samples = min(len(inputs_success), 1) adv_indices = random.sample(range(len(inputs_success)), num_samples) for adv_index in adv_indices: successful_attacks.append( torch.cat([ inputs_success[adv_index], adv_inputs_success[adv_index], torch.clamp( (adv_inputs_success[adv_index] - inputs_success[adv_index]) * 3 + 0.5, 0, 1), ], dim=1).detach()) if success.sum() > 0: successful_l2_distance.extend( l2_distance( inputs_success, adv_inputs_success, ).detach()) successful_linf_distance.extend( linf_distance( inputs_success, adv_inputs_success, ).detach()) print_cols = [f'ATTACK {attack_name}'] correct = torch.cat(batches_correct) accuracy = correct.float().mean() if wandb is not None: wandb.log({f'val-{attack_name}-accuracy': accuracy.item()}, step=iteration) print_cols.append(f'accuracy: {accuracy.item() * 100:.1f}%') print(*print_cols, sep='\t') for lpips_name, successful_lpips in [('l2', successful_l2_distance), ('linf', successful_linf_distance) ]: if len(successful_lpips) > 0 and wandb is not None: wandb.log( { f'val-{attack_name}-distance/{lpips_name}': wandb.Histogram( torch.stack( successful_lpips).cpu().detach().numpy()) }, step=iteration) if len(successful_attacks) > 0 and wandb is not None: wandb.log( { f'val-{attack_name}-images': [wandb.Image(torch.cat(successful_attacks, dim=2))] }, step=iteration) new_model_state_dict = copy.deepcopy(model.state_dict()) for key in model_state_dict: old_tensor = model_state_dict[key] new_tensor = new_model_state_dict[key] max_diff = (old_tensor - new_tensor).abs().max().item() if max_diff > 1e-8: print(f'max difference for {key} = {max_diff}')
def run_one_epoch(self, training): tic = time.time() batch_time = AverageMeter() losses = AverageMeter() accs = AverageMeter() if training: amnt = self.num_train dataset = self.train_loader else: dataset = self.val_loader amnt = self.num_valid with tqdm(total=amnt) as pbar: for i, data in enumerate(dataset): x, y = data # segmentation task if self.classification: # assuming one-hot y = y.view(1, -1).expand(self.model.num_heads, -1) else: y = y.view(1, -1, 1, x.shape[-2], x.shape[-1]).expand(self.model.num_heads, -1, -1, -1, -1) if self.config.use_gpu: x, y = x.cuda(), y.cuda() output = self.model(x) if training: self.optimizer.zero_grad() loss = None for head in range(self.model.num_heads): if loss is None: loss = self.criterion(output[head], y[head]) else: loss = loss + self.criterion(output[head], y[head]) loss = loss / self.model.num_heads if training: loss.backward() self.optimizer.step() try: loss_data = loss.data[0] except IndexError: loss_data = loss.data.item() losses.update(loss_data) # measure elapsed time toc = time.time() batch_time.update(toc - tic) if self.classification: _, predicted = torch.max(output.data, -1) total = self.batch_size*self.model.num_heads correct = (predicted == y).sum().item() acc = correct/total accs.update(acc) pbar.set_description(f"{(toc - tic):.1f}s - loss: {loss_data:.3f} acc {accs.avg:.3f}") else: pbar.set_description(f"{(toc - tic):.1f}s - loss: {loss_data:.3f}") pbar.update(self.batch_size) if training and i % 2 == 0: self.model.log_illumination(self.curr_epoch, i) if not training and i == 0 and not self.classification: y_sample = y[0, 0].view(256, 256).detach().cpu().numpy() p_sample = output[0, 0].view(256, 256).detach().cpu().numpy() wandb.log({f"images_epoch{self.curr_epoch}": [ wandb.Image(np.round(p_sample * 255), caption="prediction"), wandb.Image(np.round(y_sample * 255), caption="label")]}, step=self.curr_epoch) return losses.avg, accs.avg
def wandb_process(x:TensorImage, y:(TensorCategory,TensorMultiCategory), samples, outs): return [wandb.Image(s[0].permute(1,2,0), caption=f'Ground Truth: {s[1]}\nPrediction: {o[0]}') for s,o in zip(samples,outs)]
train_loss_interp_clever, test_loss_interp_clever) interp_acc_plot = plot_interp_acc(epoch, lambdas, train_acc_interp_naive, test_acc_interp_naive, train_acc_interp_clever, test_acc_interp_clever) wandb.log({ "epoch": epoch, "train_loss1": train_loss1, "train_loss2": train_loss2, "train_acc1": train_acc1, "train_acc2": train_acc2, "test_loss1": test_loss1, "test_loss2": test_loss2, "test_acc1": test_acc1, "test_acc2": test_acc2, # This doesn't really change, but it's more convenient to store it here # when we go to make videos/plots later. "lambdas": lambdas, "train_loss_interp_naive": train_loss_interp_naive, "test_loss_interp_naive": test_loss_interp_naive, "train_acc_interp_naive": train_acc_interp_naive, "test_acc_interp_naive": test_acc_interp_naive, "train_loss_interp_clever": train_loss_interp_clever, "test_loss_interp_clever": test_loss_interp_clever, "train_acc_interp_clever": train_acc_interp_clever, "test_acc_interp_clever": test_acc_interp_clever, "interp_loss_plot": wandb.Image(interp_loss_plot), "interp_acc_plot": wandb.Image(interp_acc_plot), })
def test( data, weights=None, batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images pred_result_dir='', save_txt=False, # for auto-labelling save_hybrid=False, # for hybrid auto-labelling save_conf=False, # save auto-label confidences plots=True, log_imgs=0): # number of logged images pred_result_dir = os.path.join(pred_result_dir, 'results') os.makedirs(pred_result_dir, exist_ok=True) # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() is_coco = data.endswith('coco.yaml') # is COCO dataset with open(data, 'r', encoding='utf-8') as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs, wandb = min(log_imgs, 100), None # ceil try: import wandb # Weights & Biases except ImportError: log_imgs = 0 # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data[ 'val'] # path to val/test images print(path) print(f'imgsz:{imgsz}') print(f'batch_size:{batch_size}') dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True)[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = { k: v for k, v in enumerate( model.names if hasattr(model, 'names') else model.module.names) } coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model( img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb) ] if save_hybrid else [] # for autolabelling t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): print(f'si:{si}') print(f'pred:{pred}') labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class path = Path(paths[si]) seen += 1 if len(pred) == 0: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions predn = pred.clone() scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # line = (cls, conf, *xywh) if save_conf else (cls, *xywh) # label format line = (cls, conf, *xywh) txt_save_path = os.path.join(pred_result_dir, path.stem + '.txt') with open(txt_save_path, 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging if plots and len(wandb_images) < log_imgs: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3] }, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": { "class_score": conf }, "domain": "pixel" } for *xyxy, conf, cls in pred.tolist()] boxes = { "predictions": { "box_data": box_data, "class_labels": names } } # inference-space wandb_images.append( wandb.Image(img[si], boxes=boxes, caption=path.name)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int( path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels if plots: confusion_matrix.process_batch( pred, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(predn[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): cls_list = [] img_list = [] target_list = [] mAP50_list = [] for i, c in enumerate(ap_class): cls_list.append(names[c]) img_list.append(seen) target_list.append(nt[c]) mAP50_list.append(ap50[i]) print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) stat_df = pd.DataFrame( { 'Class': cls_list, 'The number of images': img_list, 'Targets': target_list, 'AP': mAP50_list }, columns=['Class', 'The number of images', 'Targets', 'AP']) df_save_path = os.path.join(save_dir, 'TEST_RESULT.csv') stat_df.to_csv(df_save_path, index=False, encoding='euc-kr') # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb and wandb.run: wandb.log({"Images": wandb_images}) wandb.log({ "Validation": [ wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg')) ] }) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights ).stem if weights is not None else '' # weights anno_json = '../coco/annotations/instances_val2017.json' # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [ int(Path(x).stem) for x in dataloader.dataset.img_files ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print(f'pycocotools unable to run: {e}') # Return results if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def send_images(images, step: np.array, name: str, captions=None, masks_prediction=None, masks_target=None): """ For some reason, autograph is trying to understand what I'm doing here. With some failure. Thus, @tf.autograph.experimental.do_not_convert() is used to prevent autograph to scan this method. Send some images to wandb Args: images: (8, h, w, c) Images to log in wandb step: The global training step as eager tensor name: Image names """ class_labels = { 0: "background", 1: "0", 2: "1", 3: "2", 4: "3", 5: "4", 6: "5", 7: "6", 8: "7", 9: "8", 10: "9" } step = int(step) images_list = [] for i, img in enumerate(images): img_params = {} if captions is not None: img_params["caption"] = captions[i] if masks_prediction is not None: mask_pred = cv2.resize(masks_prediction[i], (img.shape[1], img.shape[0]), interpolation=cv2.INTER_NEAREST) mask_pred = mask_pred.astype(np.int32) if "masks" not in img_params: img_params["masks"] = {} #seg = np.expand_dims(masks[i].astype(np.int32), axis=-1) img_params["masks"]["predictions"] = { "mask_data": mask_pred, "class_labels": class_labels } if masks_target is not None: if "masks" not in img_params: img_params["masks"] = {} mask_target = masks_target[i].astype(np.int32) #seg = np.expand_dims(masks[i].astype(np.int32), axis=-1) print(mask_target.shape) img_params["masks"]["groud_truth"] = { "mask_data": mask_target, "class_labels": class_labels } images_list.append(wandb.Image(img, **img_params)) wandb.log({name: images_list}, step=step) return np.array(0, dtype=np.int64)
def visualize_task(): fig = plot(cast(GraphObjectIDFeaturizerEmbedder, self.model)) wandb.log({"visualization": wandb.Image(fig)}, step=self.iteration_num) plt.close(fig)
def train(hyp, opt, device, tb_writer=None, wandb=None): logger.info(f'Hyperparameters {hyp}') save_dir, epochs, batch_size, total_batch_size, weights, rank = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Directories wdir = save_dir / 'weights' wdir.mkdir(parents=True, exist_ok=True) # make dir last = wdir / 'last.pt' best = wdir / 'best.pt' results_file = save_dir / 'results.txt' # Save run settings with open(save_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure plots = not opt.evolve # create plots cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes names = ['item'] if opt.single_cls and len( data_dict['names']) != 1 else data_dict['names'] # class names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Model pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint if hyp.get('anchors'): ckpt['model'].yaml['anchors'] = round( hyp['anchors']) # force autoanchor model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [ ] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info( 'Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze freeze = [] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay logger.info(f"Scaled weight_decay = {hyp['weight_decay']}") pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): pg2.append(v.bias) # biases if isinstance(v, nn.BatchNorm2d): pg0.append(v.weight) # no decay elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): pg1.append(v.weight) # apply decay if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Logging if rank in [-1, 0] and wandb and wandb.run is None: opt.hyp = hyp # add hyperparameters wandb_run = wandb.init( config=opt, resume="allow", project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem, name=save_dir.stem, id=ckpt.get('wandb_id') if 'ckpt' in locals() else None) loggers = {'wandb': wandb} # loggers dict # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # Results if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if opt.resume: assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % ( weights, epochs) if epochs < start_epoch: logger.info( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes gs = int(model.stride.max()) # grid size (max stride) nl = model.model[ -1].nl # number of detection layers (used for scaling hyp['obj']) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # EMA ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers, image_weights=opt.image_weights, quad=opt.quad) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader( test_path, imgsz_test, total_batch_size, gs, opt, # testloader hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers, pad=0.5)[0] if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) if plots: plot_labels(labels, save_dir, loggers) if tb_writer: tb_writer.add_histogram('classes', c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters hyp['cls'] *= nc / 80. # scale hyp['cls'] to class count hyp['obj'] *= imgsz**2 / 640.**2 * 3. / nl # scale hyp['obj'] to image size and output layers model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) model.class_weights = labels_to_class_weights( dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0 ) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) logger.info('Image sizes %g train, %g test\n' 'Using %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs)) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: cw = model.class_weights.cpu().numpy() * ( 1 - maps)**2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [ hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch) ]) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss( pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if plots and ni < 3: f = save_dir / f'train_batch{ni}.jpg' # filename Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() # if tb_writer: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard elif plots and ni == 3 and wandb: wandb.log({ "Mosaics": [ wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg') ] }) # end batch ------------------------------------------------------------------------------------------------ # end epoch ---------------------------------------------------------------------------------------------------- # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: ema.update_attr(model, include=[ 'yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights' ]) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test( opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, plots=plots and final_epoch, log_imgs=opt.log_imgs if wandb else 0) # Write with open(results_file, 'a') as f: f.write( s + '%10.4g' * 7 % results + '\n') # P, R, [email protected], [email protected], val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Log tags = [ 'train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: tb_writer.add_scalar(tag, x, epoch) # tensorboard if wandb: wandb.log({tag: x}) # W&B # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict(), 'wandb_id': wandb_run.id if wandb else None } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers final = best if best.exists() else last # final model for f in [last, best]: if f.exists(): strip_optimizer(f) # strip optimizers if opt.bucket: os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload # Plots if plots: plot_results(save_dir=save_dir) # save as results.png if wandb: files = [ 'results.png', 'precision_recall_curve.png', 'confusion_matrix.png' ] wandb.log({ "Results": [ wandb.Image(str(save_dir / f), caption=f) for f in files if (save_dir / f).exists() ] }) if opt.log_artifacts: wandb.log_artifact(artifact_or_path=str(final), type='model', name=save_dir.stem) # Test best.pt logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) if opt.data.endswith('coco.yaml') and nc == 80: # if COCO for conf, iou, save_json in ([0.25, 0.45, False], [0.001, 0.65, True]): # speed, mAP tests results, _, _ = test.test(opt.data, batch_size=total_batch_size, imgsz=imgsz_test, conf_thres=conf, iou_thres=iou, model=attempt_load(final, device).half(), single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, save_json=save_json, plots=False) else: dist.destroy_process_group() wandb.run.finish() if wandb and wandb.run else None torch.cuda.empty_cache() return results
def _log_images(self, num_images=36): validation_X = self.validation_data[0] validation_y = self.validation_data[1] validation_length = len(validation_X) if validation_length > num_images: # pick some data at random indices = np.random.choice(validation_length, num_images, replace=False) else: indices = range(validation_length) test_data = [] test_output = [] for i in indices: test_example = validation_X[i] test_data.append(test_example) test_output.append(validation_y[i]) predictions = self.model.predict(np.stack(test_data)) if self.input_type == 'label': if self.output_type in ('image', 'images', 'segmentation_mask'): captions = self._logits_to_captions(test_data) output_image_data = self._masks_to_pixels( predictions ) if self.output_type == 'segmentation_mask' else predictions reference_image_data = self._masks_to_pixels( test_output ) if self.output_type == 'segmentation_mask' else test_output output_images = [ wandb.Image(data, caption=captions[i], grouping=2) for i, data in enumerate(output_image_data) ] reference_images = [ wandb.Image(data, caption=captions[i]) for i, data in enumerate(reference_image_data) ] return list( chain.from_iterable(zip(output_images, reference_images))) elif self.input_type in ('image', 'images', 'segmentation_mask'): input_image_data = self._masks_to_pixels( test_data ) if self.input_type == 'segmentation_mask' else test_data if self.output_type == 'label': # we just use the predicted label as the caption for now captions = self._logits_to_captions(predictions) return [ wandb.Image(data, caption=captions[i]) for i, data in enumerate(test_data) ] elif self.output_type in ('image', 'images', 'segmentation_mask'): output_image_data = self._masks_to_pixels( predictions ) if self.output_type == 'segmentation_mask' else predictions reference_image_data = self._masks_to_pixels( test_output ) if self.output_type == 'segmentation_mask' else test_output input_images = [ wandb.Image(data, grouping=3) for i, data in enumerate(input_image_data) ] output_images = [ wandb.Image(data) for i, data in enumerate(output_image_data) ] reference_images = [ wandb.Image(data) for i, data in enumerate(reference_image_data) ] return list( chain.from_iterable( zip(input_images, output_images, reference_images))) else: # unknown output, just log the input images return [wandb.Image(img) for img in test_data] elif self.output_type in ('image', 'images', 'segmentation_mask'): # unknown input, just log the predicted and reference outputs without captions output_image_data = self._masks_to_pixels( predictions ) if self.output_type == 'segmentation_mask' else predictions reference_image_data = self._masks_to_pixels( test_output ) if self.output_type == 'segmentation_mask' else test_output output_images = [ wandb.Image(data, grouping=2) for i, data in enumerate(output_image_data) ] reference_images = [ wandb.Image(data) for i, data in enumerate(reference_image_data) ] return list( chain.from_iterable(zip(output_images, reference_images)))
def test_image_accepts_other_images(mocked_run): image_a = wandb.Image(np.random.random((300, 300, 3))) image_b = wandb.Image(image_a) assert image_a == image_b