def run_epoch(self, epoch, iteration_loss=False): """Runs an epoch of validation. Keyword arguments: - iteration_loss (``bool``, optional): Prints loss at every step. Returns: - The epoch loss (float), and the values of the specified metrics """ self.model.eval() epoch_loss = 0.0 self.metric.reset() for step, batch_data in enumerate(self.data_loader): if step > 0: break # Get the inputs and labels inputs = batch_data[0].to(self.device) labels = batch_data[1].to(self.device) with torch.no_grad(): # Forward propagation outputs = self.model(inputs) _, preds = torch.max(outputs.data, 1) # Loss computation loss = self.criterion(outputs, labels) # Keep track of loss for current epoch epoch_loss += loss.item() # Keep track of evaluation the metric self.metric.add(outputs.detach(), labels.detach()) if iteration_loss: print("[Step: %d] Iteration loss: %.4f" % (step, loss.item())) if epoch == 59 or epoch == 99 or epoch == 159 or epoch == 199: if step % 500 == 0: print('Visualization of Val:') label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(self.color_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform( labels.cpu(), label_to_rgb) color_outputs = utils.batch_transform( preds.cpu(), label_to_rgb) utils.imshow_batch(color_outputs, color_labels) return epoch_loss / len(self.data_loader), self.metric.value()
def predict(model, images, class_encoding): images = Variable(images) if use_cuda: images = images.cuda() # Make predictions! predictions = model(images) #Predictions用"num_classes" channels进行one-hot encoded,使用maximum (1)的索引将其转换为单个int _, predictions = torch.max(predictions.data, 1) # max返回在通道维度的最大值的索引,也就是一维 label_to_rgb = transforms.Compose([ # 把label转为RBG的图片,方便显示 ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) utils.imshow_batch(images.data.cpu(), color_predictions)
def predict(model, images, class_encoding, device): images = images.to(device) model.eval() while torch.no_grad(): # Make predictions! predictions, _ = model(images) # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) utils.imshow_batch(images.data.cpu(), color_predictions)
def single(): print('Mode: Single') img = Image.open('test_content/example_01.png').convert('RGB') class_encoding = color_encoding = OrderedDict([ ('unlabeled', (0, 0, 0)), ('road', (128, 64, 128)), ('sidewalk', (244, 35, 232)), ('building', (70, 70, 70)), ('wall', (102, 102, 156)), ('fence', (190, 153, 153)), ('pole', (153, 153, 153)), ('traffic_light', (250, 170, 30)), ('traffic_sign', (220, 220, 0)), ('vegetation', (107, 142, 35)), ('terrain', (152, 251, 152)), ('sky', (70, 130, 180)), ('person', (220, 20, 60)), ('rider', (255, 0, 0)), ('car', (0, 0, 142)), ('truck', (0, 0, 70)), ('bus', (0, 60, 100)), ('train', (0, 80, 100)), ('motorcycle', (0, 0, 230)), ('bicycle', (119, 11, 32)) ]) num_classes = len(class_encoding) model = ERFNet(num_classes) model_path = os.path.join(args.save_dir, args.name) print('Loading model at:', model_path) checkpoint = torch.load(model_path) # model = ENet(num_classes) model = model.cuda() model.load_state_dict(checkpoint['state_dict']) img = img.resize((args.width, args.height), Image.BILINEAR) start = time.time() images = transforms.ToTensor()(img) torch.reshape(images, (1, 3, args.width, args.height)) images = images.unsqueeze(0) with torch.no_grad(): images = images.cuda() predictions = model(images) end = time.time() print('model speed:', int(1 / (end - start)), "FPS") _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose( [utils.LongTensorToRGBPIL(class_encoding), transforms.ToTensor()]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) end = time.time() print('model+transform:', int(1 / (end - start)), "FPS") utils.imshow_batch(images.data.cpu(), color_predictions)
def predict(model, images, class_encoding): StartTime = time.time() images = images.to(device) # Make predictions! model.eval() with torch.no_grad(): predictions = model(images) # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions1 = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions1.cpu(), label_to_rgb) utils.imshow_batch(images.data.cpu(), color_predictions) EndTime = time.time() RunTime = EndTime - StartTime print("For each figure, the running time is %.4f s." % (RunTime)) if args.generate_images is True: cur_rgb = image cur_output = torch.clone(predictions) _, cur_output = cur_output.max(0) cur_output = cur_output.detach().cpu().numpy() pred_label_image = create_label_image(cur_output, self.color_palette) rgb_image = image height = cur_output.shape[0] width = cur_output.shape[1] composite_image = np.zeros((2 * height, width, 3), dtype=np.uint8) composite_image[0:height, :, :] = rgb_image composite_image[height:2 * height, :, :] = pred_label_image imageio.imwrite(os.path.join(self.generate_image_dir, str(fileName)+'.png'), \ composite_image)
def predict(model, images, class_encoding): images = images.to(device) # Make predictions! model.eval() with torch.no_grad(): predictions = model(images) # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(predictions.data, 1) print(predictions.size()) print(images.size()) # print (image_pil.size) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) # color_predictions.convert('1') label_to_pil = transforms.Compose( [ext_transforms.LongTensorToRGBPIL(class_encoding)]) color_predictions_pil = [ label_to_pil(tensor) for tensor in torch.unbind(predictions.cpu()) ] color_images_pil = [ transforms.ToPILImage()(tensor) for tensor in torch.unbind(images.cpu()) ] import numpy as np # print (len(color_predictions_pil)) count = 0 print(type(images)) for (pil, img) in zip(color_predictions_pil, color_images_pil): # pil = pil.convert('L') # pil = pil.convert('1') # pil = pil.convert('L') # pil = pil.filter(ImageFilter.GaussianBlur) pil_cv = ext_transforms.RGBPILToCvMat(pil) kernel = np.ones((11, 11), np.uint8) # pil_open = cv2.morphologyEx(pil_cv, cv2.MORPH_OPEN, kernel) _, pil_open = cv2.threshold(pil_cv, 200, 255, cv2.THRESH_BINARY) pil_open = cv2.blur(pil_open, (11, 11)) img_cv = ext_transforms.RGBPILToCvMat(img) img_filter = cv2.GaussianBlur(img_cv, (5, 5), 0) img_filter = cv2.GaussianBlur(img_filter, (5, 5), 0) ########## plan 1 ############# # for row in range(pil_cv.shape[0]): # for col in range(pil_cv.shape[1]): # for chan in range(pil_cv.shape[2]): # img_filter[row][col][chan] = float(img_filter[row][col][chan]) * float(pil_open[row][col][chan]) / 255 + float(img_cv[row][col][chan]) * float(255 - pil_open[row][col][chan]) / 255 # img_out = ext_transforms.CvMatToRGBPIL(img_filter) ########## plan 2 ############# img_filter_np = np.asarray(img_filter).astype(np.float32) img_cv_np = np.asarray(img_cv).astype(np.float32) pil_open_np = np.asarray(pil_open) img_filter2 = img_filter_np * pil_open_np / 255 + img_cv_np * ( 255 - pil_open_np) / 255 img_out = Image.fromarray( cv2.cvtColor(img_filter2.astype(np.uint8), cv2.COLOR_BGR2RGB)) img.save( '/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 300) + '.png') img_out.save( '/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 400) + '.png') Image.fromarray( cv2.cvtColor(pil_open.astype(np.uint8), cv2.COLOR_BGR2RGB) ).save( '/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 500) + '.png') ########## plan 3 ############# # img_inv = img.copy() # img.paste(pil, fliterLabeltoBinary(pil.convert('L'), 1)) # # img_inv.paste(pil, fliterLabeltoBinary(pil.convert('L'), 0)) # img_inv = img_inv.filter(ImageFilter.GaussianBlur) # # img_inv = img_inv.filter(ImageFilter.BLUR) # img_inv = img_inv.filter(ImageFilter.GaussianBlur) # img.save('/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count) + '.png') # img_inv.save('/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 100) + '.png') # # img.convert('RGBA') # # img_inv.convert('RGBA') # img_inv.paste(img, fliterLabeltoBinary(pil.convert('L'), 0)) # img_inv.save('/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 200) + '.png') # (fliterLabeltoBinary(pil.convert('L'))).save('/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 100) + '.png') count = count + 1 utils.imshow_batch(images.data.cpu(), color_predictions)
def load_dataset(dataset): print("\nLoading dataset...\n") print("Selected dataset:", args.dataset) print("Dataset directory:", args.dataset_dir) print('Train file:', args.trainFile) print('Val file:', args.valFile) print('Test file:', args.testFile) print("Save directory:", args.save_dir) image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) label_transform = transforms.Compose([ transforms.Resize((args.height, args.width)), ext_transforms.PILToLongTensor() ]) # Get selected dataset # Load the training set as tensors train_set = dataset(args.dataset_dir, args.trainFile, mode='train', transform=image_transform, \ label_transform=label_transform, color_mean=color_mean, color_std=color_std) train_loader = data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) # Load the validation set as tensors val_set = dataset(args.dataset_dir, args.valFile, mode='val', transform=image_transform, \ label_transform=label_transform, color_mean=color_mean, color_std=color_std) val_loader = data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Load the test set as tensors test_set = dataset(args.dataset_dir, args.testFile, mode='inference', transform=image_transform, \ label_transform=label_transform, color_mean=color_mean, color_std=color_std) test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Get encoding between pixel valus in label images and RGB colors class_encoding = train_set.color_encoding # Get number of classes to predict num_classes = len(class_encoding) # Print information for debugging print("Number of classes to predict:", num_classes) print("Train dataset size:", len(train_set)) print("Validation dataset size:", len(val_set)) # Get a batch of samples to display if args.mode.lower() == 'test': images, labels = iter(test_loader).next() else: images, labels = iter(train_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # Show a batch of samples and labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # Get class weights from the selected weighing technique print("Weighing technique:", args.weighing) # If a class weight file is provided, try loading weights from in there class_weights = None if args.class_weights_file: print('Trying to load class weights from file...') try: class_weights = np.loadtxt(args.class_weights_file) except Exception as e: raise e if class_weights is None: print("Computing class weights...") print("(this can take a while depending on the dataset size)") class_weights = 0 if args.weighing.lower() == 'enet': class_weights = enet_weighing(train_loader, num_classes) elif args.weighing.lower() == 'mfb': class_weights = median_freq_balancing(train_loader, num_classes) else: class_weights = None if class_weights is not None: class_weights = torch.from_numpy(class_weights).float().to(device) # Set the weight of the unlabeled class to 0 print("Ignoring unlabeled class: ", args.ignore_unlabeled) if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') class_weights[ignore_index] = 0 print("Class weights:", class_weights) return (train_loader, val_loader, test_loader), class_weights, class_encoding
image_id_list = [] encoded_pixels_list = [] # Get the requested device and move the model to that device in evaluation mode then # make predictions batch by batch device = torch.device(config["device"]) net = net.to(device).eval() for step, (img_batch, target_batch, path_batch) in enumerate(tqdm(dataloader)): img_batch = img_batch.to(device) pred_batch = predict_batch(net, img_batch) # Show the images and predictions if requested if config["show_predictions"]: utils.imshow_batch(img_batch, torch.from_numpy(pred_batch), pad_value=1, padding=4) # Iterate over each prediction in the batch and split the segmented ships into # individual masks for (pred, path) in zip(pred_batch, path_batch): image_id = os.path.basename(path) pred = pred.squeeze(0).astype("uint8") # Post processing if pred.shape != output_dim: pred = pp.resize(pred, output_dim) if config["imfill"]: pred = pp.imfill(pred) if config["oriented_bbox"]: pred = pp.fill_oriented_bbox(pred, config["oriented_bbox_th"])
model.eval() output_img_dir = args.output_img_dir if not os.path.exists(output_img_dir): os.makedirs(output_img_dir) for data_path in test_data: try: image_name = data_path.split('/')[-1].split('_')[:3] new_image_name = ('_').join(image_name) + '*.png' data = Image.open(data_path) h, w, _ = np.array(data).shape image = image_transform(data) prediction = predict(model, image, device=device) save_png = transforms.Resize(w, h)(torch.ByteTensor(prediction)) save_png = torchvision.utils.make_grid(save_png).numpy() save_png = remap(save_png, full_classes, new_classes) Image.fromarray(save_png).save( os.path.join(output_img_dir, 'submission', new_image_name)) if args.visual: color_prediction = utils.batch_transform( prediction.cpu(), label_to_rgb) utils.imshow_batch( image.data.cpu(), color_prediction, os.path.join(output_img_dir, 'visual', new_image_name)) except Exception as e: traceback.print_exc()
if not args.test: model.eval() sample_image = torch.unsqueeze(sample_image, 0) with torch.no_grad(): output = model(sample_image) print("Model output dimension:", output.shape) # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(output.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(color_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) utils.imshow_batch(sample_image.data.cpu(), color_predictions) # Run several iterations for each batch size to determine the else: model.eval() with torch.no_grad(): if args.iter_batch: batch_size = [ int(2**i) for i in range(int(math.log2(args.batch_size) + 1)) ] else: batch_size = [args.batch_size] means = [] stds = [] percentile_90 = [] percentile_99 = [] fps = []
def load_dataset(dataset): print("\nLoading dataset...\n") print("Selected dataset:", args.dataset) print("Dataset directory:", args.dataset_dir) print('Test file:', args.testFile) print("Save directory:", args.save_dir) image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) label_transform = transforms.Compose([ transforms.Resize((args.height, args.width)), ext_transforms.PILToLongTensor() ]) # Load the test set as tensors test_set = dataset(args.dataset_dir, args.testFile, mode='inference', transform=image_transform, \ label_transform=label_transform, color_mean=color_mean, color_std=color_std, \ load_depth=(args.arch=='rgbd'), seg_classes=args.seg_classes) test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Get encoding between pixel valus in label images and RGB colors class_encoding = test_set.color_encoding # Get number of classes to predict num_classes = len(class_encoding) # Print information for debugging print("Number of classes to predict:", num_classes) print("Test dataset size:", len(test_set)) # Get a batch of samples to display if args.arch == 'rgbd': images, labels, data_path, depth_path, label_path = iter(test_loader).next() else: images, labels, data_path, label_path = iter(test_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # Show a batch of samples and labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # Get class weights # If a class weight file is provided, try loading weights from in there class_weights = None if args.class_weights_file: print('Trying to load class weights from file...') try: class_weights = np.loadtxt(args.class_weights_file) except Exception as e: raise e else: print('No class weights found...') if class_weights is not None: class_weights = torch.from_numpy(class_weights).float().to(device) # Set the weight of the unlabeled class to 0 if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') class_weights[ignore_index] = 0 print("Class weights:", class_weights) return test_loader, class_weights, class_encoding
val_dataset = torchvision.datasets.FashionMNIST(root=data_dir, train=False, transform=tranform) train_dataloader = DataLoader(dataset=train_dataset, batch_size=4, shuffle=True, num_workers=4) val_dataloader = DataLoader(dataset=val_dataset, batch_size=4, num_workers=4, shuffle=False) # 随机显示一个batch plt.figure() utils.imshow_batch(next(iter(train_dataloader))) plt.show() # -------------------------定义网络,参数设置-------------------------------- device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') net = net.Net() print(net) net = net.to(device) loss_fc = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) scheduler = lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1) # -----------------------------训练----------------------------------------- file_runing_loss = open('./log/running_loss.txt', 'w') file_test_accuarcy = open('./log/test_accuracy.txt', 'w')
def load_dataset(dataset): print("\n加载数据...\n") print("选择的数据:", args.dataset) print("Dataset 目录:", args.dataset_dir) print("存储目录:", args.save_dir) # 数据转换和标准化 image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) # 转化:PILToLongTensor,因为是label,所以不能进行标准化 label_transform = transforms.Compose([ transforms.Resize((args.height, args.width)), ext_transforms.PILToLongTensor() # (H x W x C) 转到 (C x H x W ) ]) # 获取选定的数据集 # 加载数据集作为一个tensors train_set = dataset(args.dataset_dir, transform=image_transform, label_transform=label_transform) train_loader = data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) # 加载验证集作为一个tensors val_set = dataset(args.dataset_dir, mode='val', transform=image_transform, label_transform=label_transform) val_loader = data.DataLoader(val_set, batch_size=3, shuffle=True, num_workers=args.workers) # 加载测试集作为一个tensors test_set = dataset(args.dataset_dir, mode='test', transform=image_transform, label_transform=label_transform) test_loader = data.DataLoader(test_set, batch_size=3, shuffle=True, num_workers=args.workers) # 获取标签图像和RGB颜色中的像素值之间的编码 class_encoding = train_set.color_encoding # 获取需要预测的类别的数量 num_classes = len(class_encoding) # 打印调试的信息 print("Number of classes to predict:", num_classes) print("Train dataset size:", len(train_set)) print("Validation dataset size:", len(val_set)) # 展示一个batch的样本 if args.mode.lower() == 'test': images, labels = iter(test_loader).next() else: images, labels = iter(train_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # 展示一个batch的samples和labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # 获取类别的权重 print("\nWeighing technique:", args.weighing) print("Computing class weights...") print("(this can take a while depending on the dataset size)") class_weights = 0 if args.weighing.lower() == 'enet': # 传回的class_weights是一个list class_weights = np.array([ 1.44752114, 33.41317956, 43.89576605, 47.85765692, 48.3393951, 47.18958997, 40.2809274, 46.61960781, 48.28854284 ]) # class_weights = enet_weighing(train_loader, num_classes) else: class_weights = None if class_weights is not None: class_weights = torch.Tensor(class_weights) # 把没有标记的类别设置为0 # if args.ignore_unlabeled: # ignore_index = list(class_encoding).index('unlabeled') # class_weights[ignore_index] = 0 print("Class weights:", class_weights) return (train_loader, val_loader, test_loader), class_weights, class_encoding
def run_epoch(self, epoch, iteration_loss=False): """Runs an epoch of training. Keyword arguments: - iteration_loss (``bool``, optional): Prints loss at every step. Returns: - The epoch loss (float). """ self.model.train() epoch_loss = 0.0 self.metric.reset() for step, batch_data in enumerate(self.data_loader): if step > 0: break # Get the inputs and labels inputs = batch_data[0].to(self.device) labels = batch_data[1].to(self.device) #print('labels!!!!!!!!!!!!!!!!!!!!!!',labels) # Forward propagation outputs = self.model(inputs) _, preds = torch.max(outputs.data, 1) # Loss computation loss = self.criterion(outputs, labels) # Backpropagation self.optim.zero_grad() loss.backward() self.optim.step() # Keep track of loss for current epoch epoch_loss += loss.item() # Keep track of the evaluation metric self.metric.add(outputs.detach(), labels.detach()) if iteration_loss: print("[Step: %d] Iteration loss: %.4f" % (step, loss.item())) if epoch == 59 or epoch == 99 or epoch == 159 or epoch == 199: if step % 500 == 0: print('Visualization of Train:') label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(self.color_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform( labels.cpu(), label_to_rgb) color_outputs = utils.batch_transform( preds.cpu(), label_to_rgb) utils.imshow_batch(color_outputs, color_labels) if epoch == 80: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_80') if epoch == 120: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_120') if epoch == 140: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_140') if epoch == 160: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_160') if epoch == 180: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_180') if epoch == 199: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_200') return epoch_loss / len(self.data_loader), self.metric.value()
def video(): print('testing from video') cameraWidth = 1920 cameraHeight = 1080 cameraMatrix = np.matrix([[1.3878727764994030e+03, 0, cameraWidth / 2], [0, 1.7987055172413220e+03, cameraHeight / 2], [0, 0, 1]]) distCoeffs = np.matrix([ -5.8881725390917083e-01, 5.8472404395779809e-01, -2.8299599929891900e-01, 0 ]) vidcap = cv2.VideoCapture('test_content/massachusetts.mp4') success = True i = 0 while success: success, img = vidcap.read() if i % 1000 == 0: print("frame: ", i) if args.rmdistort: P = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify( cameraMatrix, distCoeffs, (cameraWidth, cameraHeight), None) map1, map2 = cv2.fisheye.initUndistortRectifyMap( cameraMatrix, distCoeffs, np.eye(3), P, (1920, 1080), cv2.CV_16SC2) img = cv2.remap(img, map1, map2, cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) # img = img.convert('RGB') # cv2.imshow('',img) # cv2.waitKey(0) # img2 = Image.open(filename).convert('RGB') class_encoding = color_encoding = OrderedDict([ ('unlabeled', (0, 0, 0)), ('road', (128, 64, 128)), ('sidewalk', (244, 35, 232)), ('building', (70, 70, 70)), ('wall', (102, 102, 156)), ('fence', (190, 153, 153)), ('pole', (153, 153, 153)), ('traffic_light', (250, 170, 30)), ('traffic_sign', (220, 220, 0)), ('vegetation', (107, 142, 35)), ('terrain', (152, 251, 152)), ('sky', (70, 130, 180)), ('person', (220, 20, 60)), ('rider', (255, 0, 0)), ('car', (0, 0, 142)), ('truck', (0, 0, 70)), ('bus', (0, 60, 100)), ('train', (0, 80, 100)), ('motorcycle', (0, 0, 230)), ('bicycle', (119, 11, 32)) ]) num_classes = len(class_encoding) model_path = os.path.join(args.save_dir, args.name) checkpoint = torch.load(model_path) model = ERFNet(num_classes) model = model.cuda() model.load_state_dict(checkpoint['state_dict']) img = img.resize((args.width, args.height), Image.BILINEAR) start = time.time() images = transforms.ToTensor()(img) torch.reshape(images, (1, 3, args.width, args.height)) images = images.unsqueeze(0) with torch.no_grad(): images = images.cuda() predictions = model(images) end = time.time() print('model speed:', int(1 / (end - start)), "FPS") _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ utils.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform( predictions.cpu(), label_to_rgb) end = time.time() print('model+transform:', int(1 / (end - start)), "FPS") utils.imshow_batch(images.data.cpu(), color_predictions) i += 1
def load_dataset(dataset): print("\nLoading dataset...\n") print("Selected dataset:", args.dataset) print("Dataset directory:", args.dataset_dir) print("Save directory:", args.save_dir) image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) label_transform = transforms.Compose([ transforms.Resize((args.height, args.width), transforms.InterpolationMode.NEAREST), ext_transforms.PILToLongTensor() ]) # Get selected dataset # Load the training set as tensors train_set = dataset( args.dataset_dir, transform=image_transform, label_transform=label_transform) train_loader = data.DataLoader( train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) # Load the validation set as tensors val_set = dataset( args.dataset_dir, mode='val', transform=image_transform, label_transform=label_transform) val_loader = data.DataLoader( val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Load the test set as tensors test_set = dataset( args.dataset_dir, mode='test', transform=image_transform, label_transform=label_transform) test_loader = data.DataLoader( test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Get encoding between pixel valus in label images and RGB colors class_encoding = train_set.color_encoding # Remove the road_marking class from the CamVid dataset as it's merged # with the road class if args.dataset.lower() == 'camvid': del class_encoding['road_marking'] # Get number of classes to predict num_classes = len(class_encoding) # Print information for debugging print("Number of classes to predict:", num_classes) print("Train dataset size:", len(train_set)) print("Validation dataset size:", len(val_set)) # Get a batch of samples to display if args.mode.lower() == 'test': images, labels = iter(test_loader).next() else: images, labels = iter(train_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # Show a batch of samples and labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # Get class weights from the selected weighing technique print("\nWeighing technique:", args.weighing) print("Computing class weights...") print("(this can take a while depending on the dataset size)") class_weights = 0 if args.weighing.lower() == 'enet': class_weights = enet_weighing(train_loader, num_classes) elif args.weighing.lower() == 'mfb': class_weights = median_freq_balancing(train_loader, num_classes) else: class_weights = None if class_weights is not None: class_weights = torch.from_numpy(class_weights).float().to(device) # Set the weight of the unlabeled class to 0 if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') class_weights[ignore_index] = 0 print("Class weights:", class_weights) return (train_loader, val_loader, test_loader), class_weights, class_encoding
def main(): assert os.path.isdir( args.dataset_dir), "The directory \"{0}\" doesn't exist.".format( args.dataset_dir) # Fail fast if the saving directory doesn't exist assert os.path.isdir( args.save_dir), "The directory \"{0}\" doesn't exist.".format( args.save_dir) # Import the requested dataset if args.dataset.lower() == 'cityscapes': from data import Cityscapes as dataset else: # Should never happen...but just in case it does raise RuntimeError("\"{0}\" is not a supported dataset.".format( args.dataset)) print("\nLoading dataset...\n") print("Selected dataset:", args.dataset) print("Dataset directory:", args.dataset_dir) print("Save directory:", args.save_dir) image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) label_transform = transforms.Compose([ transforms.Resize((args.height, args.width)), ext_transforms.PILToLongTensor() ]) # Get selected dataset # Load the training set as tensors train_set = dataset(args.dataset_dir, mode='train', max_iters=args.max_iters, transform=image_transform, label_transform=label_transform) train_loader = data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) trainloader_iter = enumerate(train_loader) # Load the validation set as tensors val_set = dataset(args.dataset_dir, mode='val', max_iters=args.max_iters, transform=image_transform, label_transform=label_transform) val_loader = data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Load the test set as tensors test_set = dataset(args.dataset_dir, mode='test', max_iters=args.max_iters, transform=image_transform, label_transform=label_transform) test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Get encoding between pixel valus in label images and RGB colors class_encoding = train_set.color_encoding # Get number of classes to predict num_classes = len(class_encoding) # Print information for debugging print("Number of classes to predict:", num_classes) print("Train dataset size:", len(train_set)) print("Validation dataset size:", len(val_set)) # Get the parameters for the validation set if args.mode.lower() == 'test': images, labels = iter(test_loader).next() else: images, labels = iter(train_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # Show a batch of samples and labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # Get class weights from the selected weighing technique print("\nTraining...\n") num_classes = len(class_encoding) # Define the model with the encoder and decoder from the deeplabv2 input_encoder = Encoder().to(device) decoder_t = Decoder(num_classes).to(device) # Define the entropy loss for the segmentation task criterion = CrossEntropy2d() # Set the optimizer function for model optimizer_g = optim.SGD(itertools.chain(input_encoder.parameters(), decoder_t.parameters()), lr=args.learning_rate, momentum=0.9, weight_decay=1e-4) optimizer_g.zero_grad() # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Optionally resume from a checkpoint if args.resume: input_encoder, decoder_t, optimizer_g, start_epoch, best_miou = utils.load_checkpoint( input_encoder, decoder_t, optimizer_g, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() metric.reset() val = Test(input_encoder, decoder_t, val_loader, criterion, metric, device) for i_iter in range(args.max_iters): optimizer_g.zero_grad() adjust_learning_rate(optimizer_g, i_iter) _, batch_data = trainloader_iter.__next__() inputs = batch_data[0].to(device) labels = batch_data[1].to(device) f_i = input_encoder(inputs) outputs_i = decoder_t(f_i) loss_seg = criterion(outputs_i, labels) loss_g = loss_seg loss_g.backward() optimizer_g.step() if i_iter % args.save_pred_every == 0 and i_iter != 0: print('iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}'.format( i_iter, args.max_iters, loss_g)) print(">>>> [iter: {0:d}] Validation".format(i_iter)) # Validate the trained model after the weights are saved loss, (iou, miou) = val.run_epoch(args.print_step) print(">>>> [iter: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(i_iter, loss, miou)) if miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(input_encoder, decoder_t, optimizer_g, i_iter + 1, best_miou, args)