Пример #1
0
def evaluate(model_path: Path, datasets: typing.List[Datasets], output_folder: Path, find_mistakes: bool = False, include_heading: bool = False) -> str:
    """Evaluate a model, returning the results as CSV.

    Args:
        model_path (Path): path to the model folder containing the YAML file and the saved weights
        datasets (typing.List[Datasets]): the datasets to evaluate on
        output_folder (Path): output folder for the mistake images (if applicable)
        find_mistakes (bool, optional): whether to output all mistakes as images to the output folder. Defaults to False.
        include_heading (bool, optional): whether to include a heading in the CSV output. Defaults to False.

    Raises:
        ValueError: if the YAML config file is missing

    Returns:
        str: the CSV string
    """
    model_name = model_path.stem
    config_file = model_path.parent / f"{model_name}.yaml"
    if not config_file.exists():
        raise ValueError("config file missing")
    cfg = CN.load_yaml_with_base(config_file)
    model = torch.load(model_path, map_location=DEVICE)
    model = device(model)
    model.eval()
    datasets = {mode: build_dataset(cfg, mode)
                for mode in datasets}
    classes = next(iter(datasets.values())).classes

    csv = []
    if include_heading:
        csv.append(_csv_heading(classes))
    for mode, dataset in datasets.items():
        # Load dataset
        loader = build_data_loader(cfg, dataset, mode)
        # Compute statistics over whole dataset
        agg = StatsAggregator(classes)
        for images, labels in device(loader):
            predictions = model(images)
            agg.add_batch(predictions, labels, **(dict(inputs=images)
                                                  if find_mistakes else dict()))

        csv.append(_csv(model, agg, model_name, mode))
        if find_mistakes:
            groundtruth, mistakes = zip(*sorted(agg.mistakes,
                                                key=lambda x: x[0]))
            imgs = torch.tensor(mistakes).permute((0, 2, 3, 1))
            imgs = unnormalize(imgs).permute((0, 3, 1, 2))
            img = torchvision.utils.make_grid(imgs, pad_value=1, nrow=4)
            img = img.numpy().transpose((1, 2, 0)) * 255
            img = Image.fromarray(img.astype(np.uint8))
            mistakes_file = output_folder / \
                f"{model_name}_{mode.value}_mistakes.png"
            logger.info(f"Writing mistakes to {mistakes_file}")
            img.save(mistakes_file)
            groundtruth_file = output_folder / \
                f"{model_name}_{mode.value}_groundtruth.csv"
            with groundtruth_file.open("w") as f:
                f.write(",".join(map(str, groundtruth)))
    return "\n".join(csv)
Пример #2
0
 def _load_classifier(cls, path: Path):
     model_file = next(iter(path.glob("*.pt")))
     yaml_file = next(iter(path.glob("*.yaml")))
     cfg = CN.load_yaml_with_base(yaml_file)
     model = torch.load(model_file, map_location=DEVICE)
     model = device(model)
     model.eval()
     return cfg, model
Пример #3
0
def _train_model(model_type: str) -> typing.Tuple[torch.nn.Module, CN]:
    model_file = next((URI("models://") / model_type).glob("*.pt"))
    yaml_file = URI("config://transfer_learning") / \
        model_type / f"{model_file.stem}.yaml"
    cfg = CN.load_yaml_with_base(yaml_file)
    run_dir = URI("runs://transfer_learning") / model_type
    model = torch.load(model_file, map_location=DEVICE)
    model = device(model)
    is_inception = "inception" in model_file.stem.lower()
    train_model(cfg,
                run_dir,
                model,
                is_inception,
                model_file.stem,
                eval_on_train=True)
Пример #4
0
def _add_corners_to_train_labels(input_dir: Path):
    corner_detection_cfg = CN.load_yaml_with_base(
        "config://corner_detection.yaml")
    for subset in (x.value for x in (Datasets.TRAIN, Datasets.TEST)):
        for img_file in (input_dir / subset).glob("*.png"):
            img = cv2.imread(str(img_file))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img, img_scale = resize_image(corner_detection_cfg, img)
            corners = find_corners(corner_detection_cfg, img)
            corners = corners / img_scale

            json_file = img_file.parent / f"{img_file.stem}.json"
            with json_file.open("r") as f:
                label = json.load(f)
            label["corners"] = corners.tolist()
            with json_file.open("w") as f:
                json.dump(label, f)
Пример #5
0
def _find_best_configs(n: int, results_file: Path, output_folder: Path):
    df = pd.read_csv(results_file)
    df = df.sort_values("mistakes")
    df = df.drop_duplicates([x for x in df.columns if x != "mistakes"])
    df = df.reset_index(drop=True)
    df = df.head(n)

    output_folder.mkdir(exist_ok=True, parents=True)

    for i, row in df.iterrows():
        configs = {
            k[len("config."):]: v if not hasattr(v, "item") else v.item()
            for k, v in row.items() if k.startswith("config.")
        }
        cfg = CN.load_yaml_with_base("config://corner_detection/_base.yaml")
        cfg.merge_with_dict(configs)
        with (output_folder / f"{i:04d}.yaml").open("w") as f:
            cfg.dump(stream=f)
Пример #6
0
    def __init__(self, classifiers_folder: Path = URI("models://")):
        """Constructor.

        Args:
            classifiers_folder (Path, optional): the path to the classifiers (supplying a different path is especially useful because the transfer learning classifiers are located at ``models://transfer_learning``). Defaults to ``models://``.
        """
        self._corner_detection_cfg = CN.load_yaml_with_base(
            "config://corner_detection.yaml")

        self._occupancy_cfg, self._occupancy_model = self._load_classifier(
            classifiers_folder / "occupancy_classifier")
        self._occupancy_transforms = build_transforms(
            self._occupancy_cfg, mode=Datasets.TEST)
        self._pieces_cfg, self._pieces_model = self._load_classifier(
            classifiers_folder / "piece_classifier")
        self._pieces_transforms = build_transforms(
            self._pieces_cfg, mode=Datasets.TEST)
        self._piece_classes = np.array(list(map(name_to_piece,
                                                self._pieces_cfg.DATASET.CLASSES)))
Пример #7
0
def create_configs(classifier: str, include_centercrop: bool = False):
    """Create the YAML configuration files for all registered models for a classifier.

    Args:
        classifier (str): the classifier (either `"occupancy_classifier"` or `"piece_classifier"`)
        include_centercrop (bool, optional): whether to create two configs per model, one including center crop and one not. Defaults to False.
    """
    config_dir = URI("config://") / classifier

    logger.info(f"Removing YAML files from {config_dir}.")
    for f in config_dir.glob("*.yaml"):
        if not f.name.startswith("_"):
            f.unlink()

    for name, model in MODELS_REGISTRY[classifier.upper()].items():
        for center_crop in ({True, False} if include_centercrop else {False}):
            config_file = config_dir / \
                (name + ("_centercrop" if center_crop else "") + ".yaml")
            logging.info(f"Writing configuration file {config_file}")

            size = model.input_size
            C = CN()
            override_base = f"config://{classifier}/_base_override_{name}.yaml"
            if URI(override_base).exists():
                C._BASE_ = override_base
            else:
                suffix = "_pretrained" if model.pretrained else ""
                C._BASE_ = f"config://{classifier}/_base{suffix}.yaml"
            C.DATASET = CN()
            C.DATASET.TRANSFORMS = CN()
            C.DATASET.TRANSFORMS.CENTER_CROP = (50, 50) \
                if center_crop else None
            C.DATASET.TRANSFORMS.RESIZE = size
            C.TRAINING = CN()
            C.TRAINING.MODEL = CN()
            C.TRAINING.MODEL.REGISTRY = classifier.upper()
            C.TRAINING.MODEL.NAME = name

            with config_file.open("w") as f:
                C.dump(stream=f)
Пример #8
0
@listify
def _add_parameter(key: str, values: typing.Iterable[typing.Any], cfgs: typing.List[CN]) -> list:
    for value in values:
        for cfg in cfgs:
            cfg = cfg.clone()
            cfg_node = cfg
            *key_items, final_key = key.split(".")
            for k in key_items:
                cfg_node = cfg_node[k]
            cfg_node[final_key] = value
            yield cfg


def _is_valid_cfg(cfg: CN) -> bool:
    return cfg.EDGE_DETECTION.LOW_THRESHOLD <= cfg.EDGE_DETECTION.HIGH_THRESHOLD


if __name__ == "__main__":
    argparse.ArgumentParser(
        description="Create YAML config files for grid search.").parse_args()
    cfg_folder = URI("config://corner_detection")
    cfg = CN.load_yaml_with_base(cfg_folder / "_base.yaml")
    cfgs = [cfg]
    for k, v in parameters.items():
        cfgs = _add_parameter(k, v, cfgs)
    cfgs = filter(_is_valid_cfg, cfgs)
    for i, cfg in enumerate(cfgs, 1):
        with (cfg_folder / f"generated_{i}.yaml").open("w") as f:
            cfg.dump(stream=f)
Пример #9
0
        bottom = get_nonmax_supressed(ymin - 1)

        if top.sum() > bottom.sum():
            ymax += 1
        else:
            ymin -= 1
    return ymin, ymax


if __name__ == "__main__":
    import matplotlib.pyplot as plt
    import argparse

    parser = argparse.ArgumentParser(description="Chessboard corner detector.")
    parser.add_argument("file", type=str, help="URI of the input image file")
    parser.add_argument("--config", type=str, help="path to the config file",
                        default="config://corner_detection.yaml")
    args = parser.parse_args()

    cfg = CN.load_yaml_with_base(args.config)
    filename = URI(args.file)
    img = cv2.imread(str(filename))
    corners = find_corners(cfg, img)

    fig = plt.figure()
    fig.canvas.set_window_title("Corner detection output")
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.scatter(*corners.T, c="r")
    plt.axis("off")
    plt.show()
Пример #10
0
                        type=str,
                        default=f"results://corner_detection")
    parser.set_defaults(find_mistakes=False)
    args = parser.parse_args()

    datasets = [Datasets.TRAIN, Datasets.VAL] \
        if args.dataset is None else [d for d in Datasets if d.value == args.dataset]
    config_path = URI(args.config)
    if config_path.is_dir():
        cfgs = URI(args.config).glob("*.yaml")
        cfgs = filter(lambda x: not x.name.startswith("_"), cfgs)
        cfgs = sorted(cfgs)
        cfgs = map(CN.load_yaml_with_base, cfgs)
        cfgs = list(cfgs)
    else:
        cfgs = [CN.load_yaml_with_base(config_path)]

    output_folder = URI(args.out)
    output_folder.mkdir(parents=True, exist_ok=True)
    with (output_folder / "evaluate.csv").open("w") as f:
        cfg_headers = None
        for i, cfg in enumerate(cfgs, 1):
            params = cfg.params_dict()
            if cfg_headers is None:
                cfg_headers = list(params.keys())
                values = ["dataset", "mistakes", "total"]
                values.extend(f"config.{x}" for x in cfg_headers)
                f.write(",".join(values) + "\n")
            for dataset in datasets:
                mistakes, total = _evaluate(cfg, dataset, args.out,
                                            args.find_mistakes)
Пример #11
0
def train_model(cfg: CN,
                run_dir: Path,
                model: torch.nn.Module,
                is_inception: bool = False,
                model_name: str = None,
                eval_on_train: bool = False) -> nn.Module:
    """Train a model that has already been loaded.

    Args:
        cfg (CN): the configuration object describing the model, dataset, etc.
        run_dir (Path): where to write tensorboard files, the active YAML file, and the chosen weights
        model (torch.nn.Module): the loaded model
        is_inception (bool, optional): whether the model is InceptionV3. Defaults to False.
        model_name (str, optional): the name of the model (by default the last component of the run directory). Defaults to None.
        eval_on_train (bool, optional): whether to evaluate on the training set. Defaults to False.

    Returns:
        nn.Module: the trained model
    """
    logger.info(f"Starting training in {run_dir}")
    if not model_name:
        model_name = run_dir.name

    # Create folder
    if run_dir.exists():
        logger.warning(
            f"The folder {run_dir} already exists and will be overwritten by this run"
        )
        shutil.rmtree(run_dir, ignore_errors=True)
    run_dir.mkdir(parents=True, exist_ok=True)

    # Store config
    with (run_dir / f"{model_name}.yaml").open("w") as f:
        cfg.dump(stream=f)

    # Move model to device
    device(model)

    best_weights, best_accuracy, best_step = copy.deepcopy(
        model.state_dict()), 0., 0

    criterion = nn.CrossEntropyLoss()

    modes = {Datasets.TRAIN, Datasets.VAL}
    if eval_on_train:
        dataset = build_dataset(cfg, Datasets.TRAIN)
        datasets = {mode: dataset for mode in modes}
    else:
        datasets = {mode: build_dataset(cfg, mode) for mode in modes}
    classes = datasets[Datasets.TRAIN].classes
    loader = {
        mode: build_data_loader(cfg, datasets[mode], mode)
        for mode in modes
    }
    writer = {mode: SummaryWriter(run_dir / mode.value) for mode in modes}
    aggregator = {mode: StatsAggregator(classes) for mode in modes}

    def log(step: int, loss: float, mode: Datasets):
        if mode == Datasets.TRAIN:
            logger.info(f"Step {step:5d}: loss {loss:.3f}")

        w, agg = (x[mode] for x in (writer, aggregator))

        w.add_scalar("Loss", loss, step)
        w.add_scalar("Accuracy", agg.accuracy(), step)
        for c in classes:
            w.add_scalar(f"Precision/{c}", agg.precision(c), step)
            w.add_scalar(f"Recall/{c}", agg.recall(c), step)
            w.add_scalar(f"F1 score/{c}", agg.f1_score(c), step)

    def perform_iteration(data: typing.Tuple[torch.Tensor, torch.Tensor],
                          mode: Datasets):
        inputs, labels = map(device, data)
        with torch.set_grad_enabled(mode == Datasets.TRAIN):
            # Reset gradients
            optimizer.zero_grad()

            # Forward pass and compute loss
            if is_inception and mode == Datasets.TRAIN:
                # Special case for inception models
                outputs, auxiliary_outputs = model(inputs)
                loss1 = criterion(outputs, labels)
                loss2 = criterion(auxiliary_outputs, labels)
                loss = loss1 + 0.4 * loss2
            else:
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            if mode == Datasets.TRAIN:
                loss.backward()

        with torch.no_grad():
            aggregator[mode].add_batch(outputs, labels)

        # Perform optimisation
        if mode == Datasets.TRAIN:
            optimizer.step()

        # Return
        return loss.item()

    step = 0
    log_every_n = 100

    # Ensure we're in training mode
    model.train()

    # Loop over training phases
    for phase in cfg.TRAINING.PHASES:

        for p in model.parameters():
            p.requires_grad = False
        parameters = list(model.parameters()) if phase.PARAMS == "all" \
            else model.params[phase.PARAMS]
        for p in parameters:
            p.requires_grad = True
        optimizer = build_optimizer_from_config(phase.OPTIMIZER, parameters)

        # Loop over epochs (passes over the whole dataset)
        for epoch in range(phase.EPOCHS):
            aggregator[Datasets.TRAIN].reset()

            # Iterate the training set
            losses = []
            for i, data in enumerate(loader[Datasets.TRAIN]):

                # Perform training iteration
                losses.append(perform_iteration(data, mode=Datasets.TRAIN))

                if step % log_every_n == 0:
                    loss = np.mean(list(losses))
                    log(step, loss, Datasets.TRAIN)
                    aggregator[Datasets.TRAIN].reset()
                    losses = []

                    # Validate entire validation dataset
                    model.eval()
                    aggregator[Datasets.VAL].reset()

                    # Iterate entire val dataset
                    perform_val_iteration = functools.partial(
                        perform_iteration, mode=Datasets.VAL)
                    val_losses = map(perform_val_iteration,
                                     loader[Datasets.VAL])

                    # Gather losses and log
                    val_loss = np.mean(list(val_losses))
                    log(step, val_loss, Datasets.VAL)
                    model.train()

                # Save weights if we get a better performance
                accuracy = aggregator[Datasets.VAL].accuracy()
                if accuracy >= best_accuracy:
                    best_accuracy = accuracy
                    best_weights = copy.deepcopy(model.state_dict())
                    best_step = step

                # Get ready for next step
                step += 1

    # Clean up
    for w in writer.values():
        w.flush()
        w.close()

    logger.info("Finished training")

    logger.info(
        f"Restoring best weight state (step {best_step} with validation accuracy of {best_accuracy})"
    )
    model.load_state_dict(best_weights)
    torch.save(model, run_dir / f"{model_name}.pt")
    with (run_dir / f"{model_name}.txt").open("w") as f:
        f.write(f"exported at step: {best_step}")
    return model
Пример #12
0
def test_load_yaml():
    cfg = CN.load_yaml_with_base(RESOURCES / "inherit_base.yaml")
    assert cfg.TEST == 1
    assert cfg.XYZ == "abc"
Пример #13
0
def test_number_as_string():
    cfg = CN.load_yaml_with_base(RESOURCES / "number_as_string.yaml")
    assert isinstance(cfg.PROPERTY, str)
Пример #14
0
def test_data_type_override():
    with pytest.raises(ValueError):
        cfg = CN.load_yaml_with_base(RESOURCES / "data_type_override.yaml")
Пример #15
0
def test_inherit_yaml():
    cfg = CN.load_yaml_with_base(RESOURCES / "inherit_override.yaml")
    assert cfg.TEST == 2
    assert cfg.XYZ == "abc"
Пример #16
0
    def _train(config: str):
        cfg = CN.load_yaml_with_base(configs_dir / f"{config}.yaml")
        run_dir = URI("runs://") / name / config

        # Train the model and save it
        train(cfg, run_dir)