Пример #1
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser(
        (ModelArguments, DataTrainingArguments, TrainingArguments))
    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args = parser.parse_json_file(
            json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args = parser.parse_args_into_dataclasses(
        )

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        handlers=[logging.StreamHandler(sys.stdout)],
    )

    log_level = training_args.get_process_log_level()
    logger.setLevel(log_level)
    transformers.utils.logging.set_verbosity(log_level)
    transformers.utils.logging.enable_default_handler()
    transformers.utils.logging.enable_explicit_format()

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        +
        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    logger.info(f"Training/evaluation parameters {training_args}")

    # Detecting last checkpoint.
    last_checkpoint = None
    if os.path.isdir(
            training_args.output_dir
    ) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
        if last_checkpoint is None and len(os.listdir(
                training_args.output_dir)) > 0:
            raise ValueError(
                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
                "Use --overwrite_output_dir to overcome.")
        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
            )

    # Initialize our dataset and prepare it for the 'image-classification' task.
    if data_args.dataset_name is not None:
        dataset = load_dataset(
            data_args.dataset_name,
            data_args.dataset_config_name,
            cache_dir=model_args.cache_dir,
            task="image-classification",
            use_auth_token=True if model_args.use_auth_token else None,
        )
    else:
        data_files = {}
        if data_args.train_dir is not None:
            data_files["train"] = os.path.join(data_args.train_dir, "**")
        if data_args.validation_dir is not None:
            data_files["validation"] = os.path.join(data_args.validation_dir,
                                                    "**")
        dataset = load_dataset(
            "imagefolder",
            data_files=data_files,
            cache_dir=model_args.cache_dir,
            task="image-classification",
        )

    # If we don't have a validation split, split off a percentage of train as validation.
    data_args.train_val_split = None if "validation" in dataset.keys(
    ) else data_args.train_val_split
    if isinstance(data_args.train_val_split,
                  float) and data_args.train_val_split > 0.0:
        split = dataset["train"].train_test_split(data_args.train_val_split)
        dataset["train"] = split["train"]
        dataset["validation"] = split["test"]

    # Prepare label mappings.
    # We'll include these in the model's config to get human readable labels in the Inference API.
    labels = dataset["train"].features["labels"].names
    label2id, id2label = dict(), dict()
    for i, label in enumerate(labels):
        label2id[label] = str(i)
        id2label[str(i)] = label

    # Load the accuracy metric from the datasets package
    metric = datasets.load_metric("accuracy")

    # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
    # predictions and label_ids field) and has to return a dictionary string to float.
    def compute_metrics(p):
        """Computes accuracy on a batch of predictions"""
        return metric.compute(predictions=np.argmax(p.predictions, axis=1),
                              references=p.label_ids)

    config = AutoConfig.from_pretrained(
        model_args.config_name or model_args.model_name_or_path,
        num_labels=len(labels),
        label2id=label2id,
        id2label=id2label,
        finetuning_task="image-classification",
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )
    model = AutoModelForImageClassification.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
        ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
    )
    feature_extractor = AutoFeatureExtractor.from_pretrained(
        model_args.feature_extractor_name or model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        revision=model_args.model_revision,
        use_auth_token=True if model_args.use_auth_token else None,
    )

    # Define torchvision transforms to be applied to each image.
    normalize = Normalize(mean=feature_extractor.image_mean,
                          std=feature_extractor.image_std)
    _train_transforms = Compose([
        RandomResizedCrop(feature_extractor.size),
        RandomHorizontalFlip(),
        ToTensor(),
        normalize,
    ])
    _val_transforms = Compose([
        Resize(feature_extractor.size),
        CenterCrop(feature_extractor.size),
        ToTensor(),
        normalize,
    ])

    def train_transforms(example_batch):
        """Apply _train_transforms across a batch."""
        example_batch["pixel_values"] = [
            _train_transforms(pil_img.convert("RGB"))
            for pil_img in example_batch["image"]
        ]
        return example_batch

    def val_transforms(example_batch):
        """Apply _val_transforms across a batch."""
        example_batch["pixel_values"] = [
            _val_transforms(pil_img.convert("RGB"))
            for pil_img in example_batch["image"]
        ]
        return example_batch

    if training_args.do_train:
        if "train" not in dataset:
            raise ValueError("--do_train requires a train dataset")
        if data_args.max_train_samples is not None:
            dataset["train"] = (dataset["train"].shuffle(
                seed=training_args.seed).select(
                    range(data_args.max_train_samples)))
        # Set the training transforms
        dataset["train"].set_transform(train_transforms)

    if training_args.do_eval:
        if "validation" not in dataset:
            raise ValueError("--do_eval requires a validation dataset")
        if data_args.max_eval_samples is not None:
            dataset["validation"] = (dataset["validation"].shuffle(
                seed=training_args.seed).select(
                    range(data_args.max_eval_samples)))
        # Set the validation transforms
        dataset["validation"].set_transform(val_transforms)

    # Initalize our trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset["train"] if training_args.do_train else None,
        eval_dataset=dataset["validation"] if training_args.do_eval else None,
        compute_metrics=compute_metrics,
        tokenizer=feature_extractor,
        data_collator=collate_fn,
    )

    # Training
    if training_args.do_train:
        checkpoint = None
        if training_args.resume_from_checkpoint is not None:
            checkpoint = training_args.resume_from_checkpoint
        elif last_checkpoint is not None:
            checkpoint = last_checkpoint
        train_result = trainer.train(resume_from_checkpoint=checkpoint)
        trainer.save_model()
        trainer.log_metrics("train", train_result.metrics)
        trainer.save_metrics("train", train_result.metrics)
        trainer.save_state()

    # Evaluation
    if training_args.do_eval:
        metrics = trainer.evaluate()
        trainer.log_metrics("eval", metrics)
        trainer.save_metrics("eval", metrics)

    # Write model card and (optionally) push to hub
    kwargs = {
        "finetuned_from": model_args.model_name_or_path,
        "tasks": "image-classification",
        "dataset": data_args.dataset_name,
        "tags": ["image-classification", "vision"],
    }
    if training_args.push_to_hub:
        trainer.push_to_hub(**kwargs)
    else:
        trainer.create_model_card(**kwargs)
Пример #2
0
def target_transform(crop_size):
    return Compose([
        CenterCrop(crop_size),
        ToTensor(),
    ])
            LeakyReLU(0.2, inplace=True),
            Conv2d(CONFIG["NDF"] * 8,
                   1,
                   kernel_size=4,
                   stride=1,
                   padding=0,
                   bias=False),
            Sigmoid())

    def forward(self, input):
        return self.mainNetwork(input).view(-1)


transforms = Compose([
    Resize(CONFIG["IMAGE_SIZE"]),
    CenterCrop(CONFIG["IMAGE_SIZE"]),
    ToTensor(),
    Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
dataset = ImageFolder('/input/AnimateFace', transform=transforms)
dataLoader = DataLoader(dataset=dataset,
                        batch_size=CONFIG["BATCH_SIZE"],
                        shuffle=True,
                        drop_last=True)
netG, netD = DataParallel(GeneratorNet()), DataParallel(DiscriminatorNet())
map_location = lambda storage, loc: storage

optimizer_generator = Adam(netG.parameters(), 2e-4, betas=(0.5, 0.999))
optimizer_discriminator = Adam(netD.parameters(), 2e-4, betas=(0.5, 0.999))

criterion = BCELoss()
Пример #4
0
def get_generator(args):
    if args.dataset.startswith('mnist'):
        cluttered = args.dataset.endswith('cluttered')
        dataset_train = MNISTMulti('.',
                                   n_digits=args.n_digits,
                                   backrand=args.backrand,
                                   cluttered=cluttered,
                                   image_rows=args.row,
                                   image_cols=args.col,
                                   download=True,
                                   size_min=args.size_min,
                                   size_max=args.size_max)
        dataset_valid = MNISTMulti('.',
                                   n_digits=args.n_digits,
                                   backrand=args.backrand,
                                   cluttered=cluttered,
                                   image_rows=args.row,
                                   image_cols=args.col,
                                   download=False,
                                   mode='valid',
                                   size_min=args.size_min,
                                   size_max=args.size_max)
        dataset_test = MNISTMulti('.',
                                  n_digits=args.n_digits,
                                  backrand=args.backrand,
                                  cluttered=cluttered,
                                  image_rows=args.row,
                                  image_cols=args.col,
                                  download=False,
                                  mode='test',
                                  size_min=args.size_min,
                                  size_max=args.size_max)
        train_sampler = valid_sampler = test_sampler = None
        loader_train = data_generator_mnistmulti(dataset_train,
                                                 args.batch_size,
                                                 shuffle=True)
        loader_valid = data_generator_mnistmulti(dataset_valid,
                                                 args.v_batch_size,
                                                 shuffle=False)
        loader_test = data_generator_mnistmulti(dataset_test,
                                                args.v_batch_size,
                                                shuffle=False)
        preprocessor = preprocess_mnistmulti
    elif args.dataset == 'cifar10':
        # TODO: test set
        #transform_train = Compose([
        #    RandomCrop(32, padding=4),
        #    RandomHorizontalFlip(),
        #    ToTensor(),
        #])
        #transform_test = Compose([
        #    ToTensor(),
        #])
        dataset_train = torchvision.datasets.CIFAR10('.',
                                                     download=True,
                                                     transform=ToTensor())
        dataset_valid = torchvision.datasets.CIFAR10('.',
                                                     download=True,
                                                     transform=ToTensor())
        train_sampler = SubsetRandomSampler(range(0, 45000))
        valid_sampler = SubsetSampler(range(45000, 50000))
        loader_train = data_generator_cifar10(dataset_train,
                                              args.batch_size,
                                              sampler=train_sampler)
        loader_valid = data_generator_cifar10(dataset_valid,
                                              args.v_batch_size,
                                              sampler=valid_sampler)
        loader_test = None
        preprocessor = preprocess_cifar10
        args.row = args.col = 32
    elif args.dataset == 'bird':
        transform_train = Compose([
            ToPILImage(),
            RandomCrop(448),
            RandomHorizontalFlip(),
            ToTensor(),
        ])
        transform_test = Compose([
            ToPILImage(),
            CenterCrop(448),
            ToTensor(),
        ])
        dataset_train = BirdSingle('train', transform=transform_train)
        dataset_test = BirdSingle('test', transform=transform_test)
        train_sampler = SubsetRandomSampler(range(0, 3000))
        #valid_sampler = SubsetSampler(range(2700, 3000))
        test_sampler = SubsetSampler(range(0, 3033))
        loader_train = data_generator_bird(dataset_train,
                                           args.batch_size,
                                           sampler=train_sampler)
        #loader_valid = data_generator_bird(dataset_train, args.batch_size, sampler=valid_sampler)
        loader_test = data_generator_bird(dataset_test,
                                          args.v_batch_size,
                                          sampler=test_sampler)
        loader_valid = loader_test
        preprocessor = preprocess_bird
    elif args.dataset == 'flower':
        dataset_train = FlowerSingle('train')
        dataset_valid = FlowerSingle('valid')
        dataset_test = FlowerSingle('test')
        loader_train = data_generator_flower(dataset_train,
                                             args.batch_size,
                                             shuffle=True)
        loader_valid = data_generator_flower(dataset_valid,
                                             args.v_batch_size,
                                             shuffle=False)
        loader_test = data_generator_flower(dataset_test,
                                            args.v_batch_size,
                                            shuffle=False)
        preprocessor = preprocess_flower
    elif args.dataset in ['imagenet', 'dogs']:
        # TODO: test set
        dataset_train = ImageNetSingle(args.imagenet_root,
                                       args.imagenet_train_sel,
                                       args.batch_size)
        dataset_valid = ImageNetSingle(args.imagenet_root,
                                       args.imagenet_valid_sel,
                                       args.v_batch_size)
        train_sampler = ImageNetBatchSampler(dataset_train)
        valid_sampler = ImageNetBatchSampler(dataset_valid)
        loader_train = data_generator_imagenet(dataset_train,
                                               args.batch_size,
                                               num_workers=args.num_workers)
        loader_valid = data_generator_imagenet(dataset_valid,
                                               args.batch_size,
                                               num_workers=args.num_workers)
        loader_test = None
        preprocessor = preprocess_imagenet

    return loader_train, loader_valid, loader_test, preprocessor
Пример #5
0
def generator_loss(netsD, image_encoder, fake_imgs, real_labels, words_embs,
                   sent_emb, match_labels, cap_lens, class_ids, model,
                   sent_emb_damsm, sent_emb_clip):
    numDs = len(netsD)
    batch_size = real_labels.size(0)
    logs = ''
    # Forward
    errG_total = 0

    for i in range(numDs):
        features = netsD[i](fake_imgs[i])
        cond_logits = netsD[i].COND_DNET(features, sent_emb)
        cond_errG = nn.BCELoss()(cond_logits, real_labels)
        if netsD[i].UNCOND_DNET is not None:
            logits = netsD[i].UNCOND_DNET(features)
            errG = nn.BCELoss()(logits, real_labels)
            g_loss = errG + cond_errG
        else:
            g_loss = cond_errG
        errG_total += g_loss
        # err_img = errG_total.data[0]
        logs += 'g_loss%d: %.2f ' % (i, g_loss.item())

        # Ranking loss
        if i == (numDs - 1):
            # words_features: batch_size x nef x 17 x 17
            # sent_code: batch_size x nef
            # new: rename
            region_features_damsm, cnn_code_damsm = image_encoder(fake_imgs[i])

            #print("cnn_code before: ", cnn_code[0])
            #print("fake_imgs[i] shape: ", fake_imgs[i].shape) # torch.Size([10, 3, 256, 256])
            #print("cnn_code shape: ", cnn_code.shape)  # torch.Size([10, 512])
            #print("region_features shape: ", region_features.shape)    # torch.Size([10, 512, 17, 17])

            w_loss0, w_loss1, _ = words_loss(region_features_damsm, words_embs,
                                             match_labels, cap_lens, class_ids,
                                             batch_size)
            w_loss = (w_loss0 + w_loss1) * \
                     cfg.TRAIN.SMOOTH.LAMBDA
            # err_words = err_words + w_loss.data[0]

            # new: use CLIP ImageEncoder for global image features (cnn_code)
            if cfg.TRAIN.CLIP_LOSS:
                # model = torch.jit.load("model.pt").cuda().eval()
                input_resolution = model.input_resolution.item()  # 224

                preprocess = Compose([
                    Resize(input_resolution, interpolation=Image.BICUBIC),
                    CenterCrop(input_resolution),
                    ToTensor()
                ])

                images = []
                for j in range(fake_imgs[i].shape[0]):
                    image = fake_imgs[i][j].cpu().clone()
                    image = image.squeeze(0)
                    unloader = transforms.ToPILImage()
                    image = unloader(image)

                    image = preprocess(
                        image.convert("RGB"))  # 256*256 -> 224*224
                    images.append(image)

                image_mean = torch.tensor([0.48145466, 0.4578275,
                                           0.40821073]).cuda()
                image_std = torch.tensor([0.26862954, 0.26130258,
                                          0.27577711]).cuda()

                image_input = torch.tensor(np.stack(images)).cuda()
                image_input -= image_mean[:, None, None]
                image_input /= image_std[:, None, None]

                #print("image_input shape: ", image_input.shape) # torch.Size([10, 3, 224, 224])

                with torch.no_grad():
                    cnn_code_clip = model.encode_image(image_input).float()

                    #print("cnn_code shape: ", cnn_code.shape)    # torch.Size([10, 512])
            #print("cnn_code after: ", cnn_code[0])

            # new: add additional damsm sent loss
                if cfg.TRAIN.EXTRA_LOSS:
                    weight = cfg.TRAIN.WEIGHT_DAMSM_LOSS
                    s_loss0_damsm, s_loss1_damsm = sent_loss(
                        cnn_code_damsm, sent_emb_damsm, match_labels,
                        class_ids, batch_size)
                    s_loss0_clip, s_loss1_clip = sent_loss(
                        cnn_code_clip, sent_emb_clip, match_labels, class_ids,
                        batch_size)
                    #print("s_loss0", s_loss0_damsm)
                    #print("type: ", type(s_loss0_damsm))
                    s_loss0 = torch.tensor(
                        weight) * s_loss0_damsm + torch.tensor(
                            (1 - weight)) * s_loss0_clip
                    s_loss1 = torch.tensor(
                        weight) * s_loss1_damsm + torch.tensor(
                            (1 - weight)) * s_loss1_clip
                else:
                    s_loss0, s_loss1 = sent_loss(cnn_code_clip, sent_emb_clip,
                                                 match_labels, class_ids,
                                                 batch_size)
            else:
                if cfg.TRAIN.CLIP_SENTENCODER:  # sent_emb_clip
                    print("SOS, please check code"
                          )  #"ERROR: Cannot use CLIP text encoder only")
                    sys.exit()
                else:
                    s_loss0, s_loss1 = sent_loss(cnn_code_damsm,
                                                 sent_emb_damsm, match_labels,
                                                 class_ids, batch_size)
            s_loss = (s_loss0 + s_loss1) * \
                cfg.TRAIN.SMOOTH.LAMBDA
            # err_sent = err_sent + s_loss.data[0]

            errG_total += w_loss + s_loss
            logs += 'w_loss: %.2f s_loss: %.2f ' % (w_loss.item(),
                                                    s_loss.item())
    return errG_total, logs
Пример #6
0
def get_transforms(transforms_list,
                   width,
                   height,
                   is_train):
    transforms = []
    for transform in transforms_list:
        if transform == 'random_resized_crop':
            scale = (0.8, 1.2) if is_train else (1.0, 1.0)
            ratio = (1.0, 1.0) if is_train else (1.0, 1.0)
            transforms.append(
                RandomResizedCrop(
                    (width, height),
                    scale=scale,
                    ratio=ratio,
                )
                
            )
        elif transform == 'center_crop' :
            transforms.append(
                CenterCrop((700, 700))
            )
        elif transform == 'resize':
            transforms.append(
                Resize(
                    (width, height)
                )
            )
        elif transform == 'resize':
            transforms.append(
                Resize(
                    (width, height)
                )
            )
        elif transform == 'crop_black': # crop_black은 첫번째로 넣어줘야함.
            p = 1.0 if is_train else 1.0
            transforms.append(CropBlack(p))
        elif transform == 'random_rotate':
            p = 0.5 if is_train else 0.25
            transforms.append(RandomRotate(p))
        elif transform == 'random_vertical_flip':
            p = 0.5 if is_train else 0.25
            transforms.append(RandomVerticalFlip(p))
        elif transform == 'random_horizontal_flip':
            p = 0.5 if is_train else 0.25
            transforms.append(RandomHorizontalFlip(p))
        elif transform == 'random_color_jitter':
            brightness = 0.1 if is_train else 0.0
            contrast = 0.1 if is_train else 0.0
            transforms.append(ColorJitter(
                brightness=brightness,
                contrast=contrast,
                saturation=0,
                hue=0,
            ))
        elif transform == 'random_grayscale':
            p = 0.5 if is_train else 0.25
            transforms.append(RandomGrayscale(p))
        elif transform == 'ben_graham':
            p = 1 if is_train else 1
            transforms.append(BenGrahamAug(p))
        elif transform == 'imagenet_poilcy':
            transforms.append(ImageNetPolicy())
        elif transform == 'cifar_policy':
            transforms.append(CIFAR10Policy())
        elif transform == 'svhn_policy':
            transform.append(SVHNPolicy())
        else:
            print(transform)
            raise NotImplementedError
    return transforms
Пример #7
0
def train(args):
    writer = SummaryWriter(log_dir=args.logdir)

    # Datasets
    dataset_tr = CUBDataset(root=args.datapath,
                            train=True,
                            transforms=Compose([
                                Resize(256),
                                RandomCrop((224, 224), pad_if_needed=True),
                                RandomHorizontalFlip(),
                                ToTensor()
                            ]))
    data_loader_tr = DataLoader(dataset_tr,
                                batch_size=args.batch_size,
                                shuffle=True,
                                num_workers=args.number_workers)

    dataset_val = CUBDataset(root=args.datapath,
                             train=False,
                             transforms=Compose([CenterCrop(224),
                                                 ToTensor()]))
    data_loader_val = DataLoader(dataset_val,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=args.number_workers)

    # Model
    model = BirdNet(num_classes=dataset_tr.number_classes).to(args.device)

    # Optimizer
    optimizer = Adam(
        params=model.classifier.parameters(
        ),  # Optimize only the classifier layer
        lr=args.learning_rate,
        weight_decay=args.weight_decay)

    # Meters
    meter_loss = AverageMeter()
    meter_accuracy = AverageMeter()
    train_accuracy, train_loss, val_accuracy, val_loss = 0, 0, 0, 0

    epoch_bar = tqdm.trange(args.number_epochs, desc='Epoch')
    for epoch in epoch_bar:
        epoch_start_time = time()

        # Training
        model.train()
        torch.set_grad_enabled(True)
        batch_bar = tqdm.tqdm(data_loader_tr, desc='Batch')
        meter_loss.reset()
        meter_accuracy.reset()
        for batch in batch_bar:
            input_batch = batch[0].to(args.device)
            target = batch[1].to(args.device)
            logits = model(input_batch)

            number_samples = target.shape[0]
            predictions = logits.argmax(dim=1)
            accuracy = (predictions == target).float().sum() / number_samples
            loss = F.cross_entropy(logits, target)
            meter_accuracy.update(accuracy, number_samples)
            meter_loss.update(loss, number_samples)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # batch_bar.set_postfix({'loss': loss.item()})

        train_accuracy, train_loss = meter_accuracy.get_average(
        ), meter_loss.get_average()
        epoch_bar.set_postfix({"loss": train_loss, "accuracy": train_accuracy})
        writer.add_scalar("/train/loss", train_loss, epoch)
        writer.add_scalar("/train/accuracy", train_accuracy, epoch)

        # Validation
        model.eval()
        torch.set_grad_enabled(False)
        batch_bar = tqdm.tqdm(data_loader_val, desc='Batch')
        meter_loss.reset()
        meter_accuracy.reset()
        for batch in batch_bar:
            input_batch = batch[0].to(args.device)
            target = batch[1].to(args.device)
            logits = model(input_batch)

            number_samples = target.shape[0]
            predictions = logits.argmax(dim=1)
            accuracy = (predictions == target).float().sum() / number_samples
            loss = F.cross_entropy(logits, target)
            meter_accuracy.update(accuracy, number_samples)
            meter_loss.update(loss, number_samples)

        val_accuracy, val_loss = meter_accuracy.get_average(
        ), meter_loss.get_average()
        epoch_time = time() - epoch_start_time

        epoch_bar.set_postfix({"loss": val_loss, "accuracy": val_accuracy})
        writer.add_scalar("/validation/loss", val_loss, epoch)
        writer.add_scalar("/validation/accuracy", val_accuracy, epoch)
        writer.add_scalar("time_per_epoch", epoch_time, epoch)

    torch.save(model.classifier.state_dict(),
               str(args.logdir / "final_model.pt"))
    return {
        "train": {
            "accuracy": train_accuracy,
            "loss": train_loss
        },
        "validation": {
            "accuracy": val_accuracy,
            "loss": val_loss
        }
    }
Пример #8
0
    cv2.imshow("test", cv_image)
    cv2.waitKey(-1)


def showTensorImage(tensor_image):
    pil_image = transforms.ToPILImage()(tensor_image).convert('RGB')
    showImage(pil_image)


if __name__ == "__main__":
    from torchvision.transforms import Compose, CenterCrop, Normalize
    from torchvision.transforms import ToTensor, ToPILImage
    from piwise.transform import Relabel, ToLabel, Colorize
    image_transform = ToPILImage()
    input_transform = Compose([
        CenterCrop(30),
        ToTensor(),
        #Normalize([.485, .456, .406], [.229, .224, .225]),
    ])
    target_transform = Compose([
        CenterCrop(30),
        ToLabel(),
        #Relabel(255, 21),
    ])

    dataset = VOC12("/data_1/data/VOC2012/VOCdevkit/VOC2012", input_transform,
                    target_transform)
    for image, label in dataset:
        print(label)
        #showTensorImage(image)
Пример #9
0
def display_transform():
    return Compose([ToPILImage(), Resize(448), CenterCrop(448), ToTensor()])
Пример #10
0
def input_transform(crop_size, upscale_factor):
    return Compose([
        CenterCrop(crop_size),
        Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC)
    ])
Пример #11
0
def target_transform(crop_size):
    return Compose([CenterCrop(crop_size)])
Пример #12
0
def test_train():
    # 此函数 解析 train 文件的过程
    global weight
    from robosat.robosat.tools.train import train
    from robosat.robosat.tools.train import validate
    from robosat.robosat.config import load_config
    from robosat.robosat.unet import UNet

    from torch.nn import DataParallel
    from robosat.robosat.losses import CrossEntropyLoss2d, mIoULoss2d, FocalLoss2d, LovaszLoss2d
    import collections
    from robosat.robosat.log import Log
    args = parse_default()
    print(args)
    model = load_config(args.model)
    dataset = load_config(args.dataset)
    print(dataset)
    workers = args.workers
    print(model)

    device = torch.device("cuda" if model["common"]["cuda"] else "cpu")
    print("device", device)
    if model["common"]["cuda"] and not torch.cuda.is_available():
        sys.exit("Error: CUDA requested but not available")
    # 生成文件夹,文件夹在根目录下
    os.makedirs(model["common"]["checkpoint"], exist_ok=True)
    num_classes = len(dataset["common"]["classes"])
    print("num_classes", num_classes)
    #####################################################
    # 加载Unet模型 默认下载resnet模型,我的保存在C:\Users\Administrator/.cache\torch\checkpoints\resnet50-19c8e357.pth
    net = UNet(num_classes)
    net = DataParallel(net)
    net = net.to(device)
    print(net)
    if model["common"]["cuda"]:
        torch.backends.cudnn.benchmark = True
    ##################################################
    # 设置训练参数
    # 如果使用"CrossEntropy", "mIoU", "Focal"损失函数,必须要有weight
    try:
        weight = torch.Tensor(dataset["weights"]["values"])
    except KeyError:
        if model["opt"]["loss"] in ("CrossEntropy", "mIoU", "Focal"):
            sys.exit(
                "Error: The loss function used, need dataset weights values")
    optimizer = Adam(net.parameters(), lr=model["opt"]["lr"])
    resume = 0
    if args.checkpoint:
        # 具体干啥不知道,默认值设置成false,就不用执行了
        pass
    if model["opt"]["loss"] == "CrossEntropy":
        criterion = CrossEntropyLoss2d(weight=weight).to(device)
    elif model["opt"]["loss"] == "mIoU":
        criterion = mIoULoss2d(weight=weight).to(device)
    elif model["opt"]["loss"] == "Focal":
        criterion = FocalLoss2d(weight=weight).to(device)
    elif model["opt"]["loss"] == "Lovasz":
        criterion = LovaszLoss2d().to(device)
    else:
        sys.exit("Error: Unknown [opt][loss] value !")
    #####################################################################
    # 加载数据集
    target_size = (model["common"]["image_size"], ) * 2
    print("target_size", target_size)
    batch_size = model["common"]["batch_size"]
    print("batch_size", batch_size)
    # 数据集的路径
    path = dataset["common"]["dataset"]
    print("path", path)
    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]

    from robosat.robosat.transforms import (
        JointCompose,
        JointTransform,
        JointRandomHorizontalFlip,
        JointRandomRotation,
        ConvertImageMode,
        ImageToTensor,
        MaskToTensor,
    )
    from torchvision.transforms import Resize, CenterCrop, Normalize
    transform = JointCompose([
        JointTransform(ConvertImageMode("RGB"), ConvertImageMode("P")),
        JointTransform(Resize(target_size, Image.BILINEAR),
                       Resize(target_size, Image.NEAREST)),
        JointTransform(CenterCrop(target_size), CenterCrop(target_size)),
        JointRandomHorizontalFlip(0.5),
        JointRandomRotation(0.5, 90),
        JointRandomRotation(0.5, 90),
        JointRandomRotation(0.5, 90),
        JointTransform(ImageToTensor(), MaskToTensor()),
        JointTransform(Normalize(mean=mean, std=std), None),
    ])
    from robosat.robosat.datasets import SlippyMapTilesConcatenation
    train_dataset = SlippyMapTilesConcatenation(
        [os.path.join(path, "training", "images")],
        os.path.join(path, "training", "labels"), transform)
    val_dataset = SlippyMapTilesConcatenation(
        [os.path.join(path, "validation", "images")],
        os.path.join(path, "validation", "labels"), transform)
    print("len train_dataset:", len(train_dataset))
    print("len val_dataset:", len(val_dataset))
    assert len(train_dataset) > 0, "at least one tile in training dataset"
    assert len(val_dataset) > 0, "at least one tile in validation dataset"
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              drop_last=True,
                              num_workers=workers)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            drop_last=True,
                            num_workers=workers)
    ############################################
    # 保存训练参数
    num_epochs = model["opt"]["epochs"]
    if resume >= num_epochs:
        sys.exit(
            "Error: Epoch {} set in {} already reached by the checkpoint provided"
            .format(num_epochs, args.model))
    history = collections.defaultdict(list)
    log = Log(os.path.join(model["common"]["checkpoint"], "log"))
    log.log("--- Hyper Parameters on Dataset: {} ---".format(
        dataset["common"]["dataset"]))
    log.log("Batch Size:\t {}".format(model["common"]["batch_size"]))
    log.log("Image Size:\t {}".format(model["common"]["image_size"]))
    log.log("Learning Rate:\t {}".format(model["opt"]["lr"]))
    log.log("Loss function:\t {}".format(model["opt"]["loss"]))
    if "weight" in locals():
        log.log("Weights :\t {}".format(dataset["weights"]["values"]))
    log.log("---")
    ##########################################################
    # 开始训练
    for epoch in range(resume, num_epochs):
        log.log("Epoch: {}/{}".format(epoch + 1, num_epochs))

        train_hist = train(train_loader, num_classes, device, net, optimizer,
                           criterion)
        log.log(
            "Train    loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}".
            format(
                train_hist["loss"],
                train_hist["miou"],
                dataset["common"]["classes"][1],
                train_hist["fg_iou"],
                train_hist["mcc"],
            ))

        for k, v in train_hist.items():
            history["train " + k].append(v)
        val_hist = validate(val_loader, num_classes, device, net, criterion)
        log.log(
            "Validate loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}".
            format(val_hist["loss"], val_hist["miou"],
                   dataset["common"]["classes"][1], val_hist["fg_iou"],
                   val_hist["mcc"]))
        if (epoch + 1) % 10 == 0:
            for k, v in val_hist.items():
                history["val " + k].append(v)

            visual = "history-{:05d}-of-{:05d}.png".format(
                epoch + 1, num_epochs)
            plot(os.path.join(model["common"]["checkpoint"], visual), history)

            checkpoint = "checkpoint-{:05d}-of-{:05d}.pth".format(
                epoch + 1, num_epochs)

            states = {
                "epoch": epoch + 1,
                "state_dict": net.state_dict(),
                "optimizer": optimizer.state_dict()
            }

            torch.save(states,
                       os.path.join(model["common"]["checkpoint"], checkpoint))
Пример #13
0
    def __init__(self,
                 Datasets_params,
                 mode,
                 transform=None,
                 input_transform=None,
                 target_transform=None,
                 randstep=5,
                 rand=None,
                 in_type=None):
        super(DAVIS2017_loader, self).__init__()
        self.iter_mode = mode
        self.randstep = randstep
        self.Datasets_params = Datasets_params
        self.reading_type = Datasets_params[0]['reading_type']
        self.num_objects = []
        datasets = []
        X_train = []
        y_train = []
        X_val = []
        y_val = []
        X_test = []
        y_test = []

        for DP in Datasets_params:
            X = []
            Y = []
            self.root = DP['root']
            if DP['reading_type'] in ['SVOS', 'SVOS-YTB']:
                self.years = DP['year']
                if DP['mode'] in ['test', '16val', '17val', 'YTB18']:
                    Set = '/val.txt'
                elif DP['mode'] in ['16all']:
                    Set = '/trainval.txt'
                elif DP['mode'] in ['test_dev', '17test_dev']:
                    Set = '/test-dev.txt'
                with open(self.root + 'ImageSets/' + self.years + Set) as f:
                    SetsTxts = f.readlines()
                    print("Reading folders ", SetsTxts)
                # if DP['mode'] in ['all', 'online_all']:
                #     with open(self.root + 'ImageSets/' + self.years + '/val.txt') as f:
                #         SetsTxts2 = f.readlines()
                #     SetsTxts = SetsTxts + SetsTxts2
                Dirs = [
                    self.root + 'JPEGImages/480p/' + name[0:-1]
                    for name in SetsTxts
                ]
                Dirs.sort()
                for dir in Dirs:
                    print("scanning DIR ", dir)
                    files = glob(dir + '/*.*')
                    files.sort()
                    if self.iter_mode == 'test':
                        X.append(files)
                        if DP['tar_mode'] == 'find':
                            Y_files = glob(
                                dir.replace('JPEGImages', 'Annotations') +
                                '/*.*')
                            if len(Y_files) == 0:
                                print(dir + 'Not find')
                        else:
                            Y_files = [
                                f.replace('.jpg', '.png').replace(
                                    'JPEGImages',
                                    'Annotations').replace('.bmp', '.png')
                                for f in files
                            ]
                        Y_files.sort()
                        Y.append(Y_files)
                    else:
                        assert ('error')
                    if DP['reading_type'] != 'SVOS-YTB':
                        _mask = np.array(Image.open(Y_files[0]).convert("P"))
                        self.num_objects.append(np.max(_mask))
                if DP['mode'] == 'train':
                    X_train = X
                    y_train = Y
                elif DP['mode'] in [
                        'test', 'all', 'test_dev', '17test_dev', '16val',
                        '17val', '16all', 'YTB18'
                ]:
                    X_test = X
                    y_test = Y
                datasets.append(
                    dict(X_train=[X_train],
                         y_train=[y_train],
                         X_valid=[X_val],
                         y_valid=[y_val],
                         X_test=[X_test],
                         y_test=[y_test]))
        self.image_filenames = Data_combinePicNameList(datasets)
        self.transform = transform
        self.input_transform = input_transform
        self.target_transform = target_transform
        self.centerCrop = CenterCrop((480, 864))
        self.random_crop = RandomCrop((512, 960))
        self.rand = rand
        self.in_type = in_type
        self.idx_0 = 0
        plot_data.append(samples[i].cpu())
        all_dists = torch.min(sample_cdist[i], flip_sample_cdist[i])
        indices = torch.topk(-all_dists, k=k)[1]
        for ind in indices:
            plot_data.append(data[ind])

    plot_data = torch.stack(plot_data, dim=0)
    save_image(plot_data, '{}.png'.format(name), nrow=k + 1)


if __name__ == '__main__':
    args = parser.parse_args()
    if args.dataset == 'church':
        transforms = Compose([
            Resize(96),
            CenterCrop(96),
            ToTensor()
        ])
        dataset = LSUN('exp/datasets/lsun', ['church_outdoor_train'], transform=transforms)

    elif args.dataset == 'tower' or args.dataset == 'bedroom':
        transforms = Compose([
            Resize(128),
            CenterCrop(128),
            ToTensor()
        ])
        dataset = LSUN('exp/datasets/lsun', ['{}_train'.format(args.dataset)], transform=transforms)

    elif args.dataset == 'celeba':
        transforms = Compose([
            CenterCrop(140),
Пример #15
0
def main(args):
    normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    transform = Compose([Resize(256), CenterCrop(224), ToTensor(), normalize])

    dataset = ImageDataset(args.image_folder,
                           transform=transform,
                           return_paths=True)
    # n_images = len(dataset)
    dataloader = DataLoader(dataset,
                            shuffle=False,
                            batch_size=args.batch_size,
                            pin_memory=True,
                            num_workers=0)

    model = models.resnet50(pretrained=True).to(args.device)
    model.eval()

    config = tf.ConfigProto(intra_op_parallelism_threads=1,
                            inter_op_parallelism_threads=1,
                            allow_soft_placement=True,
                            device_count={'CPU': 1})
    sess = tf.Session(config=config)
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        3,
        224,
        224,
    ))

    tf_model = convert_pytorch_model_to_tf(model, args.device)
    cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits')

    # compute clip_min and clip_max suing a full black and a full white image
    clip_min = normalize(torch.zeros(3, 1, 1)).min().item()
    clip_max = normalize(torch.ones(3, 1, 1)).max().item()

    eps = args.eps / 255.
    eps_iter = 20
    nb_iter = 10
    args.ord = np.inf if args.ord < 0 else args.ord
    grad_params = {'eps': eps, 'ord': args.ord}
    common_params = {'clip_min': clip_min, 'clip_max': clip_max}
    iter_params = {'eps_iter': eps_iter / 255., 'nb_iter': nb_iter}

    attack_name = ''
    if args.attack == 'fgsm':
        attack_name = '_L{}_eps{}'.format(args.ord, args.eps)
        attack_op = FastGradientMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params}
    elif args.attack == 'iter':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = BasicIterativeMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'm-iter':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = MomentumIterativeMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'pgd':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = MadryEtAl(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'jsma':
        attack_op = SaliencyMapMethod(cleverhans_model, sess=sess)
        attack_params = {'theta': eps, 'symbolic_impl': False, **common_params}
    elif args.attack == 'deepfool':
        attack_op = DeepFool(cleverhans_model, sess=sess)
        attack_params = common_params
    elif args.attack == 'cw':
        attack_op = CarliniWagnerL2(cleverhans_model, sess=sess)
        attack_params = common_params
    elif args.attack == 'lbfgs':
        attack_op = LBFGS(cleverhans_model, sess=sess)
        target = np.zeros((1, 1000))
        target[0, np.random.randint(1000)] = 1
        y = tf.placeholder(tf.float32, target.shape)
        attack_params = {'y_target': y, **common_params}

    attack_name = args.attack + attack_name

    print('Running [{}]. Params: {}'.format(args.attack.upper(),
                                            attack_params))

    adv_x_op = attack_op.generate(x_op, **attack_params)
    adv_preds_op = tf_model(adv_x_op)
    preds_op = tf_model(x_op)

    n_success = 0
    n_processed = 0
    progress = tqdm(dataloader)
    for paths, x in progress:

        progress.set_description('ATTACK')

        z, adv_x, adv_z = sess.run([preds_op, adv_x_op, adv_preds_op],
                                   feed_dict={
                                       x_op: x,
                                       y: target
                                   })

        src, dst = np.argmax(z, axis=1), np.argmax(adv_z, axis=1)
        success = src != dst
        success_paths = np.array(paths)[success]
        success_adv_x = adv_x[success]
        success_src = src[success]
        success_dst = dst[success]

        n_success += success_adv_x.shape[0]
        n_processed += x.shape[0]

        progress.set_postfix(
            {'Success': '{:3.2%}'.format(n_success / n_processed)})
        progress.set_description('SAVING')

        for p, a, s, d in zip(success_paths, success_adv_x, success_src,
                              success_dst):
            path = '{}_{}_src{}_dst{}.npz'.format(p, attack_name, s, d)
            path = os.path.join(args.out_folder, path)
            np.savez_compressed(path, img=a)
Пример #16
0
from MA.transform import ToLabel, Relabel
from MA.dataset import MA, eval_ds

# from basic_net.dataset import dt_ma

torch.cuda.set_device(0)

NUM_CHANNELS = 3
NUM_CLASSES = 2

color_transform = Colorize()
image_transform = ToPILImage()

input_transform = Compose([
    Scale(256),
    CenterCrop(256),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225]),
])

eval_input_transform = Compose([
    Scale(256),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225]),
])

target_transform = Compose([
    Scale(256),
    CenterCrop(256),
    ToLabel(),
    Relabel(255, 1),
Пример #17
0
                    help="Location of mapping file for gestures to commands")
args = parser.parse_args()

parser.print_help()
# sys.exit(1)

print('Using %s for inference' % ('GPU' if args.use_gpu else 'CPU'))

# initialise some variables
verbose = args.verbose
device = torch.device(
    "cuda" if args.use_gpu and torch.cuda.is_available() else "cpu")

transform = Compose([
    ToPILImage(),
    CenterCrop(84),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

model = ConvColumn(num_classes)

# read in configuration file for mapping of gestures to keyboard keys
mapping = configparser.ConfigParser()
action = {}
if os.path.isfile(args.mapping):
    mapping.read(args.mapping)

    for m in mapping['MAPPING']:
        val = mapping['MAPPING'][m].split(',')
        action[m] = {
Пример #18
0

TRAIN_TRANSFORMS = [
    RandomApply(
        [RandomAffine(degrees=45, translate=(0.1, 0.1), scale=(0.7, 1.2), resample=2), ],
        p=0.5
    ),
    RandomCrop(size=350),
    RandomHorizontalFlip(p=0.5),
    RandomVerticalFlip(p=0.5),
    ColorJitter(hue=0.1, brightness=0.1),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]
VAL_TRANSFORMS = [
    CenterCrop(size=350),
    RandomHorizontalFlip(p=0.5),
    RandomVerticalFlip(p=0.5),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]


BATCH_SIZE = 32
NUM_WORKERS = 8

TRAIN_LOADER, VAL_LOADER = get_data_loaders(
    train_dataset_path=DATASET_PATH / "train_400x400",
    val_dataset_path=DATASET_PATH / "val_400x400",
    train_data_transform=TRAIN_TRANSFORMS,
    val_data_transform=VAL_TRANSFORMS,
Пример #19
0
def init_dataloaders(src_path,
                     tgt_path,
                     src_num,
                     tgt_num,
                     sample_ratio,
                     resize_dim,
                     batch_size,
                     shuffle,
                     crop_size=224,
                     filter_num_cls=50):
    if not "domainnet" in src_path and not "domainnet" in tgt_path:
        transforms = Compose([
            ToPILImage(),
            Resize(resize_dim),
            ToTensor(),
            Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        train_transforms = transforms
        test_transforms = transforms
    else:
        train_transforms = Compose([
            ToPILImage(),
            RandomHorizontalFlip(),
            Resize(resize_dim),
            RandomCrop(resize_dim),
            ToTensor(),
            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        test_transforms = Compose([
            ToPILImage(),
            Resize(resize_dim),
            CenterCrop(resize_dim),
            ToTensor(),
            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    train_dataset = PairDataset(src_path,
                                tgt_path,
                                src_num,
                                tgt_num,
                                sample_ratio,
                                transform=train_transforms,
                                filter_num_cls=filter_num_cls)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=8)

    valid_dataset = SingleDataset(tgt_path,
                                  "te",
                                  transform=test_transforms,
                                  filter_num_cls=filter_num_cls)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=4)

    test_dataset = SingleDataset(tgt_path,
                                 "te",
                                 transform=test_transforms,
                                 filter_num_cls=filter_num_cls)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=4)

    return train_dataloader, valid_dataloader, test_dataloader
Пример #20
0
def input_transform(crop_size):
    return Compose([
        CenterCrop(crop_size),
        ToTensor()
    ])
Пример #21
0
            Conv2d(config.DISCRIMINATOR_FEATURES_NUM * 8,
                   1,
                   kernel_size=4,
                   stride=1,
                   padding=0,
                   bias=False),
            Sigmoid())

    def forward(self, input):
        return self.mainNetwork(input).view(-1)


if PHRASE == "TRAIN":
    transforms = Compose([
        Resize(config.IMAGE_SIZE),
        CenterCrop(config.IMAGE_SIZE),
        ToTensor(),
        Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    dataset = ImageFolder(config.GAN_DATA_PATH, transform=transforms)
    dataLoader = DataLoader(dataset=dataset,
                            batch_size=config.BATCH_SIZE,
                            shuffle=True,
                            num_workers=config.NUM_WORKERS_LOAD_IMAGE,
                            drop_last=True)
    netG, netD = DataParallel(GeneratorNet()), DataParallel(DiscriminatorNet())
    map_location = lambda storage, loc: storage

    optimizer_generator = Adam(netG.parameters(),
                               config.LR_GENERATOR,
                               betas=(config.BETA1, 0.999))
Пример #22
0
    def __init__(self, opt, val=False):
        super(CustomImageNet1K, self).__init__()
        dir_dataset = os.path.join(opt.path_ImageNet, "Val" if val else "Train")
       
        #list_dir = sorted(glob(os.path.join(dir_dataset, '*')))
       
        #self.list_input = [] #sorted(glob(os.path.join(dir_dataset, 'val' if val else 'train', '*'))) #.JPEG')))
        #for dir in list_dir:
        #    self.list_input.extend(glob(os.path.join(dir_dataset, dir, "*.JPEG")))
        self.list_input = sorted(glob(os.path.join(dir_dataset, "*.JPEG")))
        assert len(self.list_input) > 0, "Please check the path of dataset. Current path is set as {}".format(dir_dataset)
        if val:
            # path_label = "/mnt/home/gishin/training_WNID2class.txt"
            path_label = opt.path_label_val
            dict_WNID2label = dict()
#            with open(path_label, 'r') as txt_file:
#                csv_file = reader(txt_file, delimiter=',')
#                print(csv_file)
#                for i, row in enumerate(csv_file):
                    
#                    if i != 0:
#                        if int(row[1]) - 1 == 1000:
#                            break
#                        dict_WNID2label.update({row[0]: int(row[1]) - 1})  # -1 is for making the label start from 0.
                    
#                    else:
#                        pass
#            self.label = dict_WNID2label
            # print(len(self.list_input))
#            path_label = os.path.join("/mnt/home/gishin/ILSVRC2012_validation_ground_truth.txt")
            
            label = list()
            with open(path_label, 'r') as txt_file:
                for i, row in enumerate(txt_file):
                    dict_WNID2label.update({i: int(row) - 1})
                    # label.append(int(row) - 1)
            self.label = dict_WNID2label

            self.transform = Compose([Resize(256),
                                      CenterCrop(224),
                                      ToTensor(),
                                      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

        else:
            # path_label = "/mnt/home/gishin/training_WNID2class.txt"
            path_label = opt.path_label_train
            dict_WNID2label = dict()
            with open(path_label, 'r') as txt_file:
                csv_file = reader(txt_file, delimiter=',')
                for i, row in enumerate(csv_file):
                    if i != 0:
                        if int(row[1]) - 1 == 1000:
                            break
                        dict_WNID2label.update({row[0]: int(row[1]) - 1})  # -1 is for making the label start from 0.
                        
                    else:
                        pass
            self.label = dict_WNID2label

            self.transform = Compose([RandomResizedCrop(224),
                                      RandomHorizontalFlip(),
                                      ToTensor(),
                                      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
        self.val = val
Пример #23
0
def load(name: str,
         device: Union[str, torch.device] = "cuda"
         if torch.cuda.is_available() else "cpu"):
    if name not in _MODELS:
        raise RuntimeError(
            f"Model {name} not found; available models = {available_models()}")

    model_path = _download(_MODELS[name])
    model = torch.jit.load(model_path, map_location=device).eval()
    n_px = model.input_resolution.item()

    # patch the device names
    device_holder = torch.jit.trace(
        lambda: torch.ones([]).to(torch.device(device)), example_inputs=[])
    device_node = [
        n for n in device_holder.graph.findAllNodes("prim::Constant")
        if "Device" in repr(n)
    ][-1]

    def patch_device(module):
        graphs = [module.graph] if hasattr(module, "graph") else []
        if hasattr(module, "forward1"):
            graphs.append(module.forward1.graph)

        for graph in graphs:
            for node in graph.findAllNodes("prim::Constant"):
                if "value" in node.attributeNames() and str(
                        node["value"]).startswith("cuda"):
                    node.copyAttributes(device_node)

    model.apply(patch_device)
    patch_device(model.encode_image)
    patch_device(model.encode_text)

    # patch dtype to float32 on CPU
    if device == "cpu":
        float_holder = torch.jit.trace(lambda: torch.ones([]).float(),
                                       example_inputs=[])
        float_input = list(float_holder.graph.findNode("aten::to").inputs())[1]
        float_node = float_input.node()

        def patch_float(module):
            graphs = [module.graph] if hasattr(module, "graph") else []
            if hasattr(module, "forward1"):
                graphs.append(module.forward1.graph)

            for graph in graphs:
                for node in graph.findAllNodes("aten::to"):
                    inputs = list(node.inputs())
                    for i in [
                            1, 2
                    ]:  # dtype can be the second or third argument to aten::to()
                        if inputs[i].node()["value"] == 5:
                            inputs[i].node().copyAttributes(float_node)

        model.apply(patch_float)
        patch_float(model.encode_image)
        patch_float(model.encode_text)

        model.float()

    transform = Compose([
        Resize(n_px, interpolation=Image.BICUBIC),
        CenterCrop(n_px),
        lambda image: image.convert("RGB"),
        ToTensor(),
        Normalize((0.48145466, 0.4578275, 0.40821073),
                  (0.26862954, 0.26130258, 0.27577711)),
    ])

    return model, transform
    def __init__(self,
                 opts,
                 blur_root,
                 sharp_root,
                 sharp_start_root,
                 gt_root,
                 gt_pos_root,
                 test_mode=False):
        blur_datasets = np.load(blur_root)
        sharp_datasets = np.load(sharp_root)
        sharp_start_datasets = np.load(sharp_start_root)
        gt_datasets = np.load(gt_root)
        gt_pos_datasets = np.load(gt_pos_root)
        self.test_mode = test_mode

        category = blur_datasets.files[0]
        self.blur = blur_datasets[category]
        self.sharp = sharp_datasets[category]
        self.sharp_start = sharp_start_datasets[category]
        self.gt = gt_datasets[category]
        self.gt_pos = gt_pos_datasets[category]

        self.batch_size = self.blur.shape[0]
        self.train_set_num = int((1 - test_set_ratio) * self.batch_size)
        if not test_mode:
            self.blur = self.blur[:self.train_set_num]
            self.sharp = self.sharp[:self.train_set_num]
            self.sharp_start = self.sharp_start[:self.train_set_num]
            self.gt = self.gt[:self.train_set_num]
            self.gt_pos = self.gt_pos[:self.train_set_num]
        else:
            self.blur = self.blur[self.train_set_num:]
            self.sharp = self.sharp[self.train_set_num:]
            self.sharp_start = self.sharp_start[self.train_set_num:]
            self.gt = self.gt[self.train_set_num:]
            self.gt_pos = self.gt_pos[self.train_set_num:]
        # flatten
        self.blur = np.concatenate(self.blur, 0)
        self.sharp = np.concatenate(self.sharp, 0)
        self.sharp_start = np.concatenate(self.sharp_start, 0)
        #self.gt = self.gt[:,:,3:,:]
        #self.gt = self.gt[:,:,::4,:] # [data_num, frame_num, ratio(16 -> 4), 2]
        # gt 평균 내서, 하나의 값으로 만들기
        #self.gt = self.gt.mean(2, keepdims=True)
        print(self.gt.shape)

        self.gt = np.concatenate(self.gt, 0)
        self.gt = np.reshape(self.gt, [self.gt.shape[0], -1])
        self.gt = self.gt.astype(np.float32)

        #self.gt_pos = self.gt_pos[:,:,15:,:]
        self.gt_pos = np.concatenate(self.gt_pos, 0)
        self.gt_pos = np.reshape(self.gt_pos, [self.gt_pos.shape[0], -1])
        self.gt_pos = self.gt_pos.astype(np.float32)

        self.dataset_size = len(self.blur)

        self.input_dim_A = opts.input_dim_a
        self.input_dim_B = opts.input_dim_b
        self.resize_x = opts.resize_size_x
        self.resize_y = opts.resize_size_y

        if opts.phase == 'train':
            transforms = [RandomCrop(opts.crop_size)]
        else:
            transforms = [CenterCrop(opts.crop_size)]
        #if not opts.no_flip:
        #  transforms.append(RandomHorizontalFlip())

        transforms.append(ToTensor())
        transforms.append(Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]))
        self.transforms = Compose(transforms)
        print('train A, B: %d images' % (self.dataset_size))
        return
Пример #25
0
def input_transform(crop_size, upscale_factor):
    return Compose([
        CenterCrop(crop_size),
        Resize(crop_size // upscale_factor),
        ToTensor(),
    ])
batch_size = args.batch_size
epoch = args.num_epoch
save_path = 'model_save/'

#normalize for ImageNet
normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])

crop = 200
rng = np.random.RandomState(args.random_seed)
precrop = crop + 24
crop = rng.randint(crop, precrop)
transformations = Compose([
    Scale((256, 256)),
    Pad((24, 24, 24, 24)),
    CenterCrop(precrop),
    RandomCrop(crop),
    Scale((256, 256)),
    ToTensor(), normalize
])


#define a batch-wise l2 loss
def criterion_l2(input_f, target_f):
    # return a per batch l2 loss
    res = (input_f - target_f)
    res = res * res
    return res.sum(dim=2)


def criterion_l2_2(input_f, target_f):
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torchvision.datasets.mnist import MNIST
from torchvision.transforms import Compose, Resize, ToTensor, CenterCrop
from torch.optim import Adam

from einops import rearrange

import opt_einsum as oe

from libcrap.torch import set_random_seeds

MNIST_TRANSFORM = Compose((CenterCrop((16, 16)), Resize(4, 4), ToTensor()))

train_size = 50000
batch_size = 512
device = torch.device("cuda:1")
lr = 1e-2
num_iters = 30000
mov_avg_coeff = 0.99
seed = 0
save_where = (
    "/mnt/important/experiments/tiny_mnist_probabilistic_multilinear_classifier_adam.pth"
)

set_random_seeds(device, seed)
print(f"{seed=}")
Пример #28
0
def target_transform(crop_size):
    return Compose([
        CenterCrop(crop_size),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
Пример #29
0
def main():
    args = parse_args()

    # Initialize the accelerator. We will let the accelerator handle device placement for us in this example.
    # If we're using tracking, we also need to initialize it here and it will pick up all supported trackers in the environment
    accelerator = Accelerator(
        log_with="all",
        logging_dir=args.output_dir) if args.with_tracking else Accelerator()
    logger.info(accelerator.state)
    # Make one log on every process with the configuration for debugging.
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )
    logger.info(accelerator.state)

    # Setup logging, we only want one process per machine to log things on the screen.
    # accelerator.is_local_main_process is only True for one process per machine.
    logger.setLevel(
        logging.INFO if accelerator.is_local_main_process else logging.ERROR)
    if accelerator.is_local_main_process:
        datasets.utils.logging.set_verbosity_warning()
        transformers.utils.logging.set_verbosity_info()
    else:
        datasets.utils.logging.set_verbosity_error()
        transformers.utils.logging.set_verbosity_error()

    # If passed along, set the training seed now.
    if args.seed is not None:
        set_seed(args.seed)

    # Handle the repository creation
    if accelerator.is_main_process:
        if args.push_to_hub:
            if args.hub_model_id is None:
                repo_name = get_full_repo_name(Path(args.output_dir).name,
                                               token=args.hub_token)
            else:
                repo_name = args.hub_model_id
            repo = Repository(args.output_dir, clone_from=repo_name)

            with open(os.path.join(args.output_dir, ".gitignore"),
                      "w+") as gitignore:
                if "step_*" not in gitignore:
                    gitignore.write("step_*\n")
                if "epoch_*" not in gitignore:
                    gitignore.write("epoch_*\n")
        elif args.output_dir is not None:
            os.makedirs(args.output_dir, exist_ok=True)
    accelerator.wait_for_everyone()

    # Get the datasets: you can either provide your own training and evaluation files (see below)
    # or specify a Dataset from the hub (the dataset will be downloaded automatically from the datasets Hub).

    # In distributed training, the load_dataset function guarantees that only one local process can concurrently
    # download the dataset.
    if args.dataset_name is not None:
        # Downloading and loading a dataset from the hub.
        dataset = load_dataset(args.dataset_name, task="image-classification")
    else:
        data_files = {}
        if args.train_dir is not None:
            data_files["train"] = os.path.join(args.train_dir, "**")
        if args.validation_dir is not None:
            data_files["validation"] = os.path.join(args.validation_dir, "**")
        dataset = load_dataset(
            "imagefolder",
            data_files=data_files,
            cache_dir=args.cache_dir,
            task="image-classification",
        )
        # See more about loading custom images at
        # https://huggingface.co/docs/datasets/v2.0.0/en/image_process#imagefolder.

    # If we don't have a validation split, split off a percentage of train as validation.
    args.train_val_split = None if "validation" in dataset.keys(
    ) else args.train_val_split
    if isinstance(args.train_val_split, float) and args.train_val_split > 0.0:
        split = dataset["train"].train_test_split(args.train_val_split)
        dataset["train"] = split["train"]
        dataset["validation"] = split["test"]

    # Prepare label mappings.
    # We'll include these in the model's config to get human readable labels in the Inference API.
    labels = dataset["train"].features["labels"].names
    label2id = {label: str(i) for i, label in enumerate(labels)}
    id2label = {str(i): label for i, label in enumerate(labels)}

    # Load pretrained model and feature extractor
    #
    # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.
    config = AutoConfig.from_pretrained(
        args.model_name_or_path,
        num_labels=len(labels),
        i2label=id2label,
        label2id=label2id,
        finetuning_task="image-classification",
    )
    feature_extractor = AutoFeatureExtractor.from_pretrained(
        args.model_name_or_path)
    model = AutoModelForImageClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    )

    # Preprocessing the datasets

    # Define torchvision transforms to be applied to each image.
    normalize = Normalize(mean=feature_extractor.image_mean,
                          std=feature_extractor.image_std)
    train_transforms = Compose([
        RandomResizedCrop(feature_extractor.size),
        RandomHorizontalFlip(),
        ToTensor(),
        normalize,
    ])
    val_transforms = Compose([
        Resize(feature_extractor.size),
        CenterCrop(feature_extractor.size),
        ToTensor(),
        normalize,
    ])

    def preprocess_train(example_batch):
        """Apply _train_transforms across a batch."""
        example_batch["pixel_values"] = [
            train_transforms(image.convert("RGB"))
            for image in example_batch["image"]
        ]
        return example_batch

    def preprocess_val(example_batch):
        """Apply _val_transforms across a batch."""
        example_batch["pixel_values"] = [
            val_transforms(image.convert("RGB"))
            for image in example_batch["image"]
        ]
        return example_batch

    with accelerator.main_process_first():
        if args.max_train_samples is not None:
            dataset["train"] = dataset["train"].shuffle(seed=args.seed).select(
                range(args.max_train_samples))
        # Set the training transforms
        train_dataset = dataset["train"].with_transform(preprocess_train)
        if args.max_eval_samples is not None:
            dataset["validation"] = dataset["validation"].shuffle(
                seed=args.seed).select(range(args.max_eval_samples))
        # Set the validation transforms
        eval_dataset = dataset["validation"].with_transform(preprocess_val)

    # DataLoaders creation:
    def collate_fn(examples):
        pixel_values = torch.stack(
            [example["pixel_values"] for example in examples])
        labels = torch.tensor([example["labels"] for example in examples])
        return {"pixel_values": pixel_values, "labels": labels}

    train_dataloader = DataLoader(train_dataset,
                                  shuffle=True,
                                  collate_fn=collate_fn,
                                  batch_size=args.per_device_train_batch_size)
    eval_dataloader = DataLoader(eval_dataset,
                                 collate_fn=collate_fn,
                                 batch_size=args.per_device_eval_batch_size)

    # Optimizer
    # Split weights in two groups, one with weight decay and the other not.
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0,
        },
    ]
    optimizer = torch.optim.AdamW(optimizer_grouped_parameters,
                                  lr=args.learning_rate)

    # Scheduler and math around the number of training steps.
    num_update_steps_per_epoch = math.ceil(
        len(train_dataloader) / args.gradient_accumulation_steps)
    if args.max_train_steps is None:
        args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
    else:
        args.num_train_epochs = math.ceil(args.max_train_steps /
                                          num_update_steps_per_epoch)

    lr_scheduler = get_scheduler(
        name=args.lr_scheduler_type,
        optimizer=optimizer,
        num_warmup_steps=args.num_warmup_steps,
        num_training_steps=args.max_train_steps,
    )

    # Prepare everything with our `accelerator`.
    model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare(
        model, optimizer, train_dataloader, eval_dataloader, lr_scheduler)

    # Figure out how many steps we should save the Accelerator states
    if hasattr(args.checkpointing_steps, "isdigit"):
        checkpointing_steps = args.checkpointing_steps
        if args.checkpointing_steps.isdigit():
            checkpointing_steps = int(args.checkpointing_steps)
    else:
        checkpointing_steps = None

    # We need to initialize the trackers we use, and also store our configuration
    if args.with_tracking:
        experiment_config = vars(args)
        # TensorBoard cannot log Enums, need the raw value
        experiment_config["lr_scheduler_type"] = experiment_config[
            "lr_scheduler_type"].value
        accelerator.init_trackers("image_classification_no_trainer",
                                  experiment_config)

    # Get the metric function
    metric = load_metric("accuracy")

    # Train!
    total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps

    logger.info("***** Running training *****")
    logger.info(f"  Num examples = {len(train_dataset)}")
    logger.info(f"  Num Epochs = {args.num_train_epochs}")
    logger.info(
        f"  Instantaneous batch size per device = {args.per_device_train_batch_size}"
    )
    logger.info(
        f"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}"
    )
    logger.info(
        f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}")
    logger.info(f"  Total optimization steps = {args.max_train_steps}")
    # Only show the progress bar once on each machine.
    progress_bar = tqdm(range(args.max_train_steps),
                        disable=not accelerator.is_local_main_process)
    completed_steps = 0
    # Potentially load in the weights and states from a previous save
    if args.resume_from_checkpoint:
        if args.resume_from_checkpoint is not None or args.resume_from_checkpoint != "":
            accelerator.print(
                f"Resumed from checkpoint: {args.resume_from_checkpoint}")
            accelerator.load_state(args.resume_from_checkpoint)
            resume_step = None
            path = args.resume_from_checkpoint
        else:
            # Get the most recent checkpoint
            dirs = [f.name for f in os.scandir(os.getcwd()) if f.is_dir()]
            dirs.sort(key=os.path.getctime)
            path = dirs[
                -1]  # Sorts folders by date modified, most recent checkpoint is the last
        if "epoch" in path:
            args.num_train_epochs -= int(path.replace("epoch_", ""))
        else:
            resume_step = int(path.replace("step_", ""))
            args.num_train_epochs -= resume_step // len(train_dataloader)
            resume_step = (args.num_train_epochs *
                           len(train_dataloader)) - resume_step

    for epoch in range(args.num_train_epochs):
        model.train()
        if args.with_tracking:
            total_loss = 0
        for step, batch in enumerate(train_dataloader):
            # We need to skip steps until we reach the resumed step
            if args.resume_from_checkpoint and epoch == 0 and step < resume_step:
                continue
            outputs = model(**batch)
            loss = outputs.loss
            # We keep track of the loss at each epoch
            if args.with_tracking:
                total_loss += loss.detach().float()
            loss = loss / args.gradient_accumulation_steps
            accelerator.backward(loss)
            if step % args.gradient_accumulation_steps == 0 or step == len(
                    train_dataloader) - 1:
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()
                progress_bar.update(1)
                completed_steps += 1

            if isinstance(checkpointing_steps, int):
                if completed_steps % checkpointing_steps == 0:
                    output_dir = f"step_{completed_steps}"
                    if args.output_dir is not None:
                        output_dir = os.path.join(args.output_dir, output_dir)
                    accelerator.save_state(output_dir)

                    if args.push_to_hub and epoch < args.num_train_epochs - 1:
                        accelerator.wait_for_everyone()
                        unwrapped_model = accelerator.unwrap_model(model)
                        unwrapped_model.save_pretrained(
                            args.output_dir, save_function=accelerator.save)
                        if accelerator.is_main_process:
                            feature_extractor.save_pretrained(args.output_dir)
                            repo.push_to_hub(
                                commit_message=
                                f"Training in progress {completed_steps} steps",
                                blocking=False,
                                auto_lfs_prune=True,
                            )

            if completed_steps >= args.max_train_steps:
                break

        model.eval()
        samples_seen = 0
        for step, batch in enumerate(eval_dataloader):
            outputs = model(**batch)
            predictions = outputs.logits.argmax(dim=-1)
            predictions, references = accelerator.gather(
                (predictions, batch["labels"]))
            # If we are in a multiprocess environment, the last batch has duplicates
            if accelerator.num_processes > 1:
                if step == len(eval_dataloader):
                    predictions = predictions[:len(eval_dataloader.dataset) -
                                              samples_seen]
                    references = references[:len(eval_dataloader.dataset) -
                                            samples_seen]
                else:
                    samples_seen += references.shape[0]
            metric.add_batch(
                predictions=predictions,
                references=references,
            )

        eval_metric = metric.compute()
        logger.info(f"epoch {epoch}: {eval_metric}")

        if args.with_tracking:
            accelerator.log(
                {
                    "accuracy": eval_metric,
                    "train_loss": total_loss,
                    "epoch": epoch,
                    "step": completed_steps,
                }, )

        if args.push_to_hub and epoch < args.num_train_epochs - 1:
            accelerator.wait_for_everyone()
            unwrapped_model = accelerator.unwrap_model(model)
            unwrapped_model.save_pretrained(args.output_dir,
                                            save_function=accelerator.save)
            if accelerator.is_main_process:
                feature_extractor.save_pretrained(args.output_dir)
                repo.push_to_hub(
                    commit_message=f"Training in progress epoch {epoch}",
                    blocking=False,
                    auto_lfs_prune=True)

        if args.checkpointing_steps == "epoch":
            output_dir = f"epoch_{epoch}"
            if args.output_dir is not None:
                output_dir = os.path.join(args.output_dir, output_dir)
            accelerator.save_state(output_dir)

    if args.output_dir is not None:
        accelerator.wait_for_everyone()
        unwrapped_model = accelerator.unwrap_model(model)
        unwrapped_model.save_pretrained(args.output_dir,
                                        save_function=accelerator.save)
        if accelerator.is_main_process:
            feature_extractor.save_pretrained(args.output_dir)
            if args.push_to_hub:
                repo.push_to_hub(commit_message="End of training",
                                 auto_lfs_prune=True)

    if args.output_dir is not None:
        with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
            json.dump({"eval_accuracy": eval_metric["accuracy"]}, f)
Пример #30
0
from resnet.resnet_single_scale import *

import importlib
#import evalIoU

from iouEval import iouEval, getColorEntry

from shutil import copyfile

NUM_CHANNELS = 3
NUM_CLASSES = 28

color_transform = Colorize(NUM_CLASSES)
image_transform = ToPILImage()
input_transform = Compose([
    CenterCrop(240),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225]),
])
target_transform = Compose([
    CenterCrop(240),
    ToLabel(),
    Relabel(255, 27),
])


#Augmentations - different function implemented to perform random augments on both image and target
class MyCoTransform(object):
    def __init__(self, enc, augment=True, height=512):
        self.enc = enc
        self.augment = augment