Пример #1
0
    def __init__(
        self,
        log_path: str = None,
        writer: SummaryWriter = None,
        name: str = "tensorboard",
        enabled: bool = True,
    ):
        if tensorboard_import_error:
            raise tensorboard_import_error

        if writer and log_path:
            raise ValueError(
                (
                    "log_path given:{} and writer object passed in, "
                    "to create a writer at the log path set writer=None"
                ).format(log_path)
            )
        elif not writer and not log_path:
            log_path = os.path.join(".", "tensorboard")

        if log_path:
            create_dirs(log_path)

        self._writer = writer if writer is not None else SummaryWriter(log_path)
        super().__init__(lambda_func=self._log_lambda, name=name, enabled=enabled)
Пример #2
0
    def _download_and_extract(self):
        if self._dataset_size == ImagenetteSize.full:
            url = "https://s3.amazonaws.com/fast-ai-imageclas/imagewoof.tgz"
        elif self._dataset_size == ImagenetteSize.s320:
            url = "https://s3.amazonaws.com/fast-ai-imageclas/imagewoof-320.tgz"
        elif self._dataset_size == ImagenetteSize.s160:
            url = "https://s3.amazonaws.com/fast-ai-imageclas/imagewoof-160.tgz"
        else:
            raise ValueError("unknown imagenette size given of {}".format(
                self._dataset_size))

        create_dirs(self._extracted_root)
        file_path = "{}.tar".format(self._extracted_root)

        if os.path.exists(file_path):
            print("already downloaded imagewoof {}".format(self._dataset_size))

            return

        download_file(
            url,
            file_path,
            overwrite=False,
            progress_title="downloading imagewoof {}".format(
                self._dataset_size),
        )

        with tarfile.open(file_path, "r:gz") as tar:
            tar.extractall(path=self.download_root)
Пример #3
0
def setup_save_and_log_dirs(args) -> Tuple[str, Optional[str]]:
    # Saving dir setup
    save_dir = os.path.abspath(os.path.expanduser(args.save_dir))
    if not args.model_tag:
        model_tag = "{}@{}".format(args.arch_key, args.dataset)
        model_id = model_tag
        model_inc = 0

        while os.path.exists(os.path.join(save_dir, model_id)):
            model_inc += 1
            model_id = "{}__{:02d}".format(model_tag, model_inc)
    else:
        model_id = args.model_tag

    save_dir = os.path.join(save_dir, model_id)
    create_dirs(save_dir)
    LOGGER.info("Model directory is set to {}".format(save_dir))

    # log dir setup
    log_dir = (
        os.path.abspath(os.path.expanduser(args.log_dir))
        if args.command == TRAIN_COMMAND
        else None
    )
    if args.command == TRAIN_COMMAND:
        log_dir = os.path.join(log_dir, model_id)
        create_dirs(log_dir)
        LOGGER.info("Logging directory is set to {}".format(log_dir))
    else:
        log_dir = None
    return save_dir, log_dir
def _setup_save_dirs(args) -> Tuple[str, Optional[str]]:
    # logging and saving setup
    save_dir = os.path.abspath(os.path.expanduser(args.save_dir))
    logs_dir = (os.path.abspath(os.path.expanduser(os.path.join(
        args.logs_dir))) if args.command == TRAIN_COMMAND else None)

    if not args.model_tag:
        model_tag = "{}_{}".format(args.arch_key.replace("/", "."),
                                   args.dataset)
        model_id = model_tag
        model_inc = 0
        # set location to check for models with same name
        model_main_dir = logs_dir or save_dir

        while os.path.exists(os.path.join(model_main_dir, model_id)):
            model_inc += 1
            model_id = "{}__{:02d}".format(model_tag, model_inc)
    else:
        model_id = args.model_tag

    save_dir = os.path.join(save_dir, model_id)
    create_dirs(save_dir)

    # logs dir setup
    if args.command == TRAIN_COMMAND:
        logs_dir = os.path.join(logs_dir, model_id)
        create_dirs(logs_dir)
    else:
        logs_dir = None
    LOGGER.info("Model id is set to {}".format(model_id))
    return save_dir, logs_dir
Пример #5
0
def tensors_export(
    tensors: Union[Tensor, Iterable[Tensor]],
    export_dir: str,
    name_prefix: str,
    counter: int = 0,
    break_batch: bool = False,
) -> List[str]:
    """
    :param tensors: the tensors to export to a saved numpy array file
    :param export_dir: the directory to export the files in
    :param name_prefix: the prefix name for the tensors to save as, will append
        info about the position of the tensor in a list or dict in addition
        to the .npy file format
    :param counter: the current counter to save the tensor at
    :param break_batch: treat the tensor as a batch and break apart into
        multiple tensors
    :return: the exported paths
    """
    create_dirs(export_dir)
    exported_paths = []
    if break_batch:
        _tensors_export_batch(tensors, export_dir, name_prefix, counter,
                              exported_paths)
    else:
        _tensors_export_recursive(tensors, export_dir, name_prefix, counter,
                                  exported_paths)

    return exported_paths
Пример #6
0
    def __init__(
        self,
        root: str,
        train: bool = True,
        image_size: int = 32,
        pre_resize_transforms: Union[SplitsTransforms,
                                     None] = SplitsTransforms(
                                         train=(preprocess_for_train, ),
                                         val=(preprocess_for_eval, )),
        post_resize_transforms: Union[SplitsTransforms,
                                      None] = SplitsTransforms(
                                          train=None,
                                          val=None,
                                      ),
        download: bool = True,
    ):
        create_dirs(root)
        self._download_dir = os.path.join(root, "download")
        self._extract_dir = os.path.join(root, "extract")
        self._train_dir = os.path.join(root, "train")
        self._test_dir = os.path.join(root, "test")
        if download and not os.path.exists(self._download_dir):
            self._download_and_extract()
            self._create_image_folders()

        self._per_pixel_mean = None
        super().__init__(
            root,
            train,
            image_size=image_size,
            pre_resize_transforms=pre_resize_transforms,
            post_resize_transforms=post_resize_transforms,
        )
def _save_checkpoint(args, sess, save_dir, checkpoint_name) -> str:
    checkpoint_path = os.path.join(
        os.path.join(save_dir, checkpoint_name, "model"))
    create_dirs(checkpoint_path)
    saver = ModelRegistry.saver(args.arch_key)
    saved_name = saver.save(sess, checkpoint_path)
    checkpoint_path = os.path.join(checkpoint_path, saved_name)
    LOGGER.info("Checkpoint saved to {}".format(checkpoint_path))
    return checkpoint_path
Пример #8
0
    def _download_and_extract(self):
        """
        Download and extract the dataset into root
        """
        create_dirs(self._download_dir)
        file_path = os.path.join(self._download_dir, "cifar-100-python.tar.gz")
        url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
        download_file(
            url,
            file_path,
            overwrite=False,
            progress_title="downloading CIFAR-100",
        )

        create_dirs(self._extract_dir)
        with tarfile.open(file_path, "r:gz") as tar:
            tar.extractall(path=self._extract_dir)
Пример #9
0
    def export_named_samples(
        self,
        inp_dict: Dict[Union[str, tf_compat.Tensor], numpy.ndarray],
        out_dict: Dict[Union[str, tf_compat.Tensor], numpy.ndarray],
    ):
        """
        Export sample inputs and outputs for the model to the local system.

        :param inp_dict: the inputs to save
        :param out_dict: the outputs to save
        """
        inp_dict = OrderedDict(
            [
                (tens if isinstance(tens, str) else tens.name, val)
                for tens, val in inp_dict.items()
            ]
        )
        out_dict = OrderedDict(
            [
                (tens if isinstance(tens, str) else tens.name, val)
                for tens, val in out_dict.items()
            ]
        )
        create_dirs(self.sample_inputs_path)
        create_dirs(self.sample_outputs_path)
        exp_counter = path_file_count(self.sample_inputs_path, "inp*.npz")
        tensors_export(
            inp_dict,
            self.sample_inputs_path,
            name_prefix="inp",
            counter=exp_counter,
            break_batch=True,
        )
        tensors_export(
            out_dict,
            self.sample_outputs_path,
            name_prefix="out",
            counter=exp_counter,
            break_batch=True,
        )
Пример #10
0
def get_save_dir_and_loggers(
        args: Any,
        task: Optional[Tasks] = None) -> Tuple[Union[str, None], List]:
    if args.is_main_process:
        save_dir = os.path.abspath(os.path.expanduser(args.save_dir))
        logs_dir = (os.path.abspath(
            os.path.expanduser(os.path.join(args.logs_dir)))
                    if task == Tasks.TRAIN else None)

        if not args.model_tag:
            dataset_name = (f"{args.dataset}-{args.dataset_kwargs['year']}"
                            if "year" in args.dataset_kwargs else args.dataset)
            model_tag = f"{args.arch_key.replace('/', '.')}_{dataset_name}"
            model_id = model_tag
            model_inc = 0
            # set location to check for models with same name
            model_main_dir = logs_dir or save_dir

            while os.path.exists(os.path.join(model_main_dir, model_id)):
                model_inc += 1
                model_id = f"{model_tag}__{model_inc:02d}"
        else:
            model_id = args.model_tag

        save_dir = os.path.join(save_dir, model_id)
        create_dirs(save_dir)

        # loggers setup
        loggers = [PythonLogger()]
        if task == Tasks.TRAIN:
            logs_dir = os.path.join(logs_dir, model_id)
            create_dirs(logs_dir)
            loggers.append(TensorBoardLogger(log_path=logs_dir))
        print(f"Model id is set to {model_id}")
    else:
        # do not log for non main processes
        save_dir = None
        loggers = []
    return save_dir, loggers
Пример #11
0
    def _create_image_folders(self):
        create_dirs(self._train_dir)
        create_dirs(self._test_dir)

        batches_dir = os.path.join(self._extract_dir, "cifar-10-batches-py")

        # Train
        image_tensors = []
        [
            create_dirs(os.path.join(self._train_dir, str(label)))
            for label in range(10)
        ]
        batch_files = ["data_batch_{}".format(i) for i in range(1, 6)]
        for fname in batch_files:
            fpath = os.path.join(batches_dir, fname)
            print("Processing {}...".format(fpath))
            if not os.path.exists(fpath):
                raise ValueError("Train data batch {} not found".format(fpath))
            with open(fpath, "rb") as fo:
                batch_dict = pickle.load(fo, encoding="bytes")
            image_tensors.append(
                self._save_images(
                    batch_dict[b"labels"],
                    batch_dict[b"data"],
                    batch_dict[b"filenames"],
                    self._train_dir,
                ))
        image_tensors = np.concatenate(image_tensors)
        per_pixel_mean = np.mean(image_tensors, axis=0)
        np.save(
            os.path.join(self._train_dir, os.pardir,
                         "per_pixel_mean_image.npy"),
            per_pixel_mean,
        )
        del image_tensors

        # Test
        [
            create_dirs(os.path.join(self._test_dir, str(label)))
            for label in range(10)
        ]
        fpath = os.path.join(batches_dir, "test_batch")
        print("Processing {}...".format(fpath))
        if not os.path.exists(fpath):
            raise ValueError("Test data batch {} not found".format(fpath))
        with open(fpath, "rb") as fo:
            batch_dict = pickle.load(fo, encoding="bytes")
        self._save_images(
            batch_dict[b"labels"],
            batch_dict[b"data"],
            batch_dict[b"filenames"],
            self._test_dir,
        )
Пример #12
0
def main(args):
    ############################
    # logging and saving setup #
    ############################
    save_dir = os.path.abspath(os.path.expanduser(args.save_dir))

    # get unique model tag, defaults to '{model_name}'
    if not args.model_tag:
        model_tag = args.model.replace("/", ".")
        model_id = model_tag
        model_inc = 0

        while os.path.exists(os.path.join(args.save_dir, model_id)):
            model_inc += 1
            model_id = "{}__{:02d}".format(model_tag, model_inc)
    else:
        model_id = args.model_tag
    save_dir = os.path.join(save_dir, model_id)
    create_dirs(save_dir)
    print("Model id is set to {}".format(model_id))

    ###########################
    # standard training setup #
    ###########################

    # create data loaders
    train_loader, _, _ = _create_imagefolder_dataloader(args, train=True)
    val_loader, num_classes, image_shape = _create_imagefolder_dataloader(
        args, train=False
    )
    dataloaders = {"train": train_loader, "val": val_loader}

    # create model
    model = _get_torchvision_model(
        args.model,
        num_classes,
        args.pretrained,
        args.checkpoint_path,
    )
    print("created model: {}".format(model))
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    print("using device: {}".format(device))

    # create standard SGD optimizer and cross entropy loss function
    criterion = CrossEntropyLoss()
    optimizer = SGD(
        model.parameters(), lr=0.001, momentum=0.9
    )  # lr will be overridden by recipe

    ##########################
    # add sparseml modifiers #
    ##########################
    manager = ScheduledModifierManager.from_yaml(args.recipe_path)
    optimizer = ScheduledOptimizer(
        optimizer,
        model,
        manager,
        steps_per_epoch=len(train_loader),
        loggers=[PythonLogger()],
    )

    ########################
    # torchvision training #
    ########################
    model, val_acc_history = train_model(
        model,
        dataloaders,
        criterion,
        optimizer,
        device,
        num_epochs=manager.max_epochs,
        is_inception="inception" in args.model,
    )

    ########################
    # export trained model #
    ########################
    exporter = ModuleExporter(model, save_dir)
    sample_input = torch.randn(image_shape).unsqueeze(0)  # sample batch for ONNX export
    exporter.export_onnx(sample_input)
    exporter.export_pytorch()
    print("Model ONNX export and PyTorch weights saved to {}".format(save_dir))
Пример #13
0
 def setup_filesystem(self):
     """
     Setup the local file system so that it can be used with the data
     """
     create_dirs(self.dir_path)
Пример #14
0
    def __init__(self, save_dir: str):
        self._save_dir = save_dir
        self._idx = 0

        create_dirs(save_dir)