예제 #1
0
def check_unique_arch(meta_file):
    api = API(str(meta_file))
    arch_strs = deepcopy(api.meta_archs)
    xarchs = [CellStructure.str2structure(x) for x in arch_strs]

    def get_unique_matrix(archs, consider_zero):
        UniquStrs = [arch.to_unique_str(consider_zero) for arch in archs]
        print("{:} create unique-string ({:}/{:}) done".format(
            time_string(), len(set(UniquStrs)), len(UniquStrs)))
        Unique2Index = dict()
        for index, xstr in enumerate(UniquStrs):
            if xstr not in Unique2Index:
                Unique2Index[xstr] = list()
            Unique2Index[xstr].append(index)
        sm_matrix = torch.eye(len(archs)).bool()
        for _, xlist in Unique2Index.items():
            for i in xlist:
                for j in xlist:
                    sm_matrix[i, j] = True
        unique_ids, unique_num = [-1 for _ in archs], 0
        for i in range(len(unique_ids)):
            if unique_ids[i] > -1:
                continue
            neighbours = sm_matrix[i].nonzero().view(-1).tolist()
            for nghb in neighbours:
                assert unique_ids[nghb] == -1, "impossible"
                unique_ids[nghb] = unique_num
            unique_num += 1
        return sm_matrix, unique_ids, unique_num

    print("There are {:} valid-archs".format(
        sum(arch.check_valid() for arch in xarchs)))
    sm_matrix, uniqueIDs, unique_num = get_unique_matrix(xarchs, None)
    print(
        "{:} There are {:} unique architectures (considering nothing).".format(
            time_string(), unique_num))
    sm_matrix, uniqueIDs, unique_num = get_unique_matrix(xarchs, False)
    print("{:} There are {:} unique architectures (not considering zero).".
          format(time_string(), unique_num))
    sm_matrix, uniqueIDs, unique_num = get_unique_matrix(xarchs, True)
    print("{:} There are {:} unique architectures (considering zero).".format(
        time_string(), unique_num))
예제 #2
0
def train_single_model(save_dir, workers, datasets, xpaths, splits, use_less,
                       seeds, model_str, arch_config):
    assert torch.cuda.is_available(), "CUDA is not available."
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = True
    torch.set_num_threads(workers)

    save_dir = (Path(save_dir) / "specifics" / "{:}-{:}-{:}-{:}".format(
        "LESS" if use_less else "FULL",
        model_str,
        arch_config["channel"],
        arch_config["num_cells"],
    ))
    logger = Logger(str(save_dir), 0, False)
    if model_str in CellArchitectures:
        arch = CellArchitectures[model_str]
        logger.log(
            "The model string is found in pre-defined architecture dict : {:}".
            format(model_str))
    else:
        try:
            arch = CellStructure.str2structure(model_str)
        except:
            raise ValueError(
                "Invalid model string : {:}. It can not be found or parsed.".
                format(model_str))
    assert arch.check_valid_op(get_search_spaces(
        "cell", "full")), "{:} has the invalid op.".format(arch)
    logger.log("Start train-evaluate {:}".format(arch.tostr()))
    logger.log("arch_config : {:}".format(arch_config))

    start_time, seed_time = time.time(), AverageMeter()
    for _is, seed in enumerate(seeds):
        logger.log(
            "\nThe {:02d}/{:02d}-th seed is {:} ----------------------<.>----------------------"
            .format(_is, len(seeds), seed))
        to_save_name = save_dir / "seed-{:04d}.pth".format(seed)
        if to_save_name.exists():
            logger.log("Find the existing file {:}, directly load!".format(
                to_save_name))
            checkpoint = torch.load(to_save_name)
        else:
            logger.log(
                "Does not find the existing file {:}, train and evaluate!".
                format(to_save_name))
            checkpoint = evaluate_all_datasets(
                arch,
                datasets,
                xpaths,
                splits,
                use_less,
                seed,
                arch_config,
                workers,
                logger,
            )
            torch.save(checkpoint, to_save_name)
        # log information
        logger.log("{:}".format(checkpoint["info"]))
        all_dataset_keys = checkpoint["all_dataset_keys"]
        for dataset_key in all_dataset_keys:
            logger.log("\n{:} dataset : {:} {:}".format(
                "-" * 15, dataset_key, "-" * 15))
            dataset_info = checkpoint[dataset_key]
            # logger.log('Network ==>\n{:}'.format( dataset_info['net_string'] ))
            logger.log("Flops = {:} MB, Params = {:} MB".format(
                dataset_info["flop"], dataset_info["param"]))
            logger.log("config : {:}".format(dataset_info["config"]))
            logger.log("Training State (finish) = {:}".format(
                dataset_info["finish-train"]))
            last_epoch = dataset_info["total_epoch"] - 1
            train_acc1es, train_acc5es = (
                dataset_info["train_acc1es"],
                dataset_info["train_acc5es"],
            )
            valid_acc1es, valid_acc5es = (
                dataset_info["valid_acc1es"],
                dataset_info["valid_acc5es"],
            )
            logger.log(
                "Last Info : Train = Acc@1 {:.2f}% Acc@5 {:.2f}% Error@1 {:.2f}%, Test = Acc@1 {:.2f}% Acc@5 {:.2f}% Error@1 {:.2f}%"
                .format(
                    train_acc1es[last_epoch],
                    train_acc5es[last_epoch],
                    100 - train_acc1es[last_epoch],
                    valid_acc1es[last_epoch],
                    valid_acc5es[last_epoch],
                    100 - valid_acc1es[last_epoch],
                ))
        # measure elapsed time
        seed_time.update(time.time() - start_time)
        start_time = time.time()
        need_time = "Time Left: {:}".format(
            convert_secs2time(seed_time.avg * (len(seeds) - _is - 1), True))
        logger.log(
            "\n<<<***>>> The {:02d}/{:02d}-th seed is {:} <finish> other procedures need {:}"
            .format(_is, len(seeds), seed, need_time))
    logger.close()
예제 #3
0
def main(
    save_dir: Path,
    workers: int,
    datasets: List[Text],
    xpaths: List[Text],
    splits: List[int],
    seeds: List[int],
    nets: List[str],
    opt_config: Dict[Text, Any],
    to_evaluate_indexes: tuple,
    cover_mode: bool,
    arch_config: Dict[Text, Any],
):

    log_dir = save_dir / "logs"
    log_dir.mkdir(parents=True, exist_ok=True)
    logger = Logger(str(log_dir), os.getpid(), False)

    logger.log("xargs : seeds      = {:}".format(seeds))
    logger.log("xargs : cover_mode = {:}".format(cover_mode))
    logger.log("-" * 100)
    logger.log("Start evaluating range =: {:06d} - {:06d}".format(
        min(to_evaluate_indexes), max(to_evaluate_indexes)) +
               "({:} in total) / {:06d} with cover-mode={:}".format(
                   len(to_evaluate_indexes), len(nets), cover_mode))
    for i, (dataset, xpath, split) in enumerate(zip(datasets, xpaths, splits)):
        logger.log(
            "--->>> Evaluate {:}/{:} : dataset={:9s}, path={:}, split={:}".
            format(i, len(datasets), dataset, xpath, split))
    logger.log("--->>> optimization config : {:}".format(opt_config))

    start_time, epoch_time = time.time(), AverageMeter()
    for i, index in enumerate(to_evaluate_indexes):
        arch = nets[index]
        logger.log(
            "\n{:} evaluate {:06d}/{:06d} ({:06d}/{:06d})-th arch [seeds={:}] {:}"
            .format(
                time_string(),
                i,
                len(to_evaluate_indexes),
                index,
                len(nets),
                seeds,
                "-" * 15,
            ))
        logger.log("{:} {:} {:}".format("-" * 15, arch, "-" * 15))

        # test this arch on different datasets with different seeds
        has_continue = False
        for seed in seeds:
            to_save_name = save_dir / "arch-{:06d}-seed-{:04d}.pth".format(
                index, seed)
            if to_save_name.exists():
                if cover_mode:
                    logger.log(
                        "Find existing file : {:}, remove it before evaluation"
                        .format(to_save_name))
                    os.remove(str(to_save_name))
                else:
                    logger.log(
                        "Find existing file : {:}, skip this evaluation".
                        format(to_save_name))
                    has_continue = True
                    continue
            results = evaluate_all_datasets(
                CellStructure.str2structure(arch),
                datasets,
                xpaths,
                splits,
                opt_config,
                seed,
                arch_config,
                workers,
                logger,
            )
            torch.save(results, to_save_name)
            logger.log(
                "\n{:} evaluate {:06d}/{:06d} ({:06d}/{:06d})-th arch [seeds={:}] ===>>> {:}"
                .format(
                    time_string(),
                    i,
                    len(to_evaluate_indexes),
                    index,
                    len(nets),
                    seeds,
                    to_save_name,
                ))
        # measure elapsed time
        if not has_continue:
            epoch_time.update(time.time() - start_time)
        start_time = time.time()
        need_time = "Time Left: {:}".format(
            convert_secs2time(
                epoch_time.avg * (len(to_evaluate_indexes) - i - 1), True))
        logger.log("This arch costs : {:}".format(
            convert_secs2time(epoch_time.val, True)))
        logger.log("{:}".format("*" * 100))
        logger.log("{:}   {:74s}   {:}".format(
            "*" * 10,
            "{:06d}/{:06d} ({:06d}/{:06d})-th done, left {:}".format(
                i, len(to_evaluate_indexes), index, len(nets), need_time),
            "*" * 10,
        ))
        logger.log("{:}".format("*" * 100))

    logger.close()
예제 #4
0
def main(
    save_dir,
    workers,
    datasets,
    xpaths,
    splits,
    use_less,
    srange,
    arch_index,
    seeds,
    cover_mode,
    meta_info,
    arch_config,
):
    assert torch.cuda.is_available(), "CUDA is not available."
    torch.backends.cudnn.enabled = True
    # torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    torch.set_num_threads(workers)

    assert (len(srange) == 2 and
            0 <= srange[0] <= srange[1]), "invalid srange : {:}".format(srange)

    if use_less:
        sub_dir = Path(save_dir) / "{:06d}-{:06d}-C{:}-N{:}-LESS".format(
            srange[0], srange[1], arch_config["channel"],
            arch_config["num_cells"])
    else:
        sub_dir = Path(save_dir) / "{:06d}-{:06d}-C{:}-N{:}".format(
            srange[0], srange[1], arch_config["channel"],
            arch_config["num_cells"])
    logger = Logger(str(sub_dir), 0, False)

    all_archs = meta_info["archs"]
    assert srange[1] < meta_info[
        "total"], "invalid range : {:}-{:} vs. {:}".format(
            srange[0], srange[1], meta_info["total"])
    assert (arch_index == -1 or srange[0] <= arch_index <= srange[1]
            ), "invalid range : {:} vs. {:} vs. {:}".format(
                srange[0], arch_index, srange[1])
    if arch_index == -1:
        to_evaluate_indexes = list(range(srange[0], srange[1] + 1))
    else:
        to_evaluate_indexes = [arch_index]
    logger.log("xargs : seeds      = {:}".format(seeds))
    logger.log("xargs : arch_index = {:}".format(arch_index))
    logger.log("xargs : cover_mode = {:}".format(cover_mode))
    logger.log("-" * 100)

    logger.log(
        "Start evaluating range =: {:06d} vs. {:06d} vs. {:06d} / {:06d} with cover-mode={:}"
        .format(srange[0], arch_index, srange[1], meta_info["total"],
                cover_mode))
    for i, (dataset, xpath, split) in enumerate(zip(datasets, xpaths, splits)):
        logger.log(
            "--->>> Evaluate {:}/{:} : dataset={:9s}, path={:}, split={:}".
            format(i, len(datasets), dataset, xpath, split))
    logger.log("--->>> architecture config : {:}".format(arch_config))

    start_time, epoch_time = time.time(), AverageMeter()
    for i, index in enumerate(to_evaluate_indexes):
        arch = all_archs[index]
        logger.log(
            "\n{:} evaluate {:06d}/{:06d} ({:06d}/{:06d})-th architecture [seeds={:}] {:}"
            .format(
                "-" * 15,
                i,
                len(to_evaluate_indexes),
                index,
                meta_info["total"],
                seeds,
                "-" * 15,
            ))
        # logger.log('{:} {:} {:}'.format('-'*15, arch.tostr(), '-'*15))
        logger.log("{:} {:} {:}".format("-" * 15, arch, "-" * 15))

        # test this arch on different datasets with different seeds
        has_continue = False
        for seed in seeds:
            to_save_name = sub_dir / "arch-{:06d}-seed-{:04d}.pth".format(
                index, seed)
            if to_save_name.exists():
                if cover_mode:
                    logger.log(
                        "Find existing file : {:}, remove it before evaluation"
                        .format(to_save_name))
                    os.remove(str(to_save_name))
                else:
                    logger.log(
                        "Find existing file : {:}, skip this evaluation".
                        format(to_save_name))
                    has_continue = True
                    continue
            results = evaluate_all_datasets(
                CellStructure.str2structure(arch),
                datasets,
                xpaths,
                splits,
                use_less,
                seed,
                arch_config,
                workers,
                logger,
            )
            torch.save(results, to_save_name)
            logger.log(
                "{:} --evaluate-- {:06d}/{:06d} ({:06d}/{:06d})-th seed={:} done, save into {:}"
                .format(
                    "-" * 15,
                    i,
                    len(to_evaluate_indexes),
                    index,
                    meta_info["total"],
                    seed,
                    to_save_name,
                ))
        # measure elapsed time
        if not has_continue:
            epoch_time.update(time.time() - start_time)
        start_time = time.time()
        need_time = "Time Left: {:}".format(
            convert_secs2time(
                epoch_time.avg * (len(to_evaluate_indexes) - i - 1), True))
        logger.log("This arch costs : {:}".format(
            convert_secs2time(epoch_time.val, True)))
        logger.log("{:}".format("*" * 100))
        logger.log("{:}   {:74s}   {:}".format(
            "*" * 10,
            "{:06d}/{:06d} ({:06d}/{:06d})-th done, left {:}".format(
                i, len(to_evaluate_indexes), index, meta_info["total"],
                need_time),
            "*" * 10,
        ))
        logger.log("{:}".format("*" * 100))

    logger.close()
예제 #5
0
def create_result_count(used_seed, dataset, arch_config, results,
                        dataloader_dict):
    xresult = ResultsCount(
        dataset,
        results["net_state_dict"],
        results["train_acc1es"],
        results["train_losses"],
        results["param"],
        results["flop"],
        arch_config,
        used_seed,
        results["total_epoch"],
        None,
    )

    net_config = dict2config(
        {
            "name": "infer.tiny",
            "C": arch_config["channel"],
            "N": arch_config["num_cells"],
            "genotype": CellStructure.str2structure(arch_config["arch_str"]),
            "num_classes": arch_config["class_num"],
        },
        None,
    )
    network = get_cell_based_tiny_net(net_config)
    network.load_state_dict(xresult.get_net_param())
    if "train_times" in results:  # new version
        xresult.update_train_info(
            results["train_acc1es"],
            results["train_acc5es"],
            results["train_losses"],
            results["train_times"],
        )
        xresult.update_eval(results["valid_acc1es"], results["valid_losses"],
                            results["valid_times"])
    else:
        if dataset == "cifar10-valid":
            xresult.update_OLD_eval("x-valid", results["valid_acc1es"],
                                    results["valid_losses"])
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict["{:}@{:}".format("cifar10", "test")],
                network.cuda())
            xresult.update_OLD_eval(
                "ori-test",
                {results["total_epoch"] - 1: top1},
                {results["total_epoch"] - 1: loss},
            )
            xresult.update_latency(latencies)
        elif dataset == "cifar10":
            xresult.update_OLD_eval("ori-test", results["valid_acc1es"],
                                    results["valid_losses"])
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict["{:}@{:}".format(dataset, "test")],
                network.cuda())
            xresult.update_latency(latencies)
        elif dataset == "cifar100" or dataset == "ImageNet16-120":
            xresult.update_OLD_eval("ori-test", results["valid_acc1es"],
                                    results["valid_losses"])
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict["{:}@{:}".format(dataset, "valid")],
                network.cuda())
            xresult.update_OLD_eval(
                "x-valid",
                {results["total_epoch"] - 1: top1},
                {results["total_epoch"] - 1: loss},
            )
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict["{:}@{:}".format(dataset, "test")],
                network.cuda())
            xresult.update_OLD_eval(
                "x-test",
                {results["total_epoch"] - 1: top1},
                {results["total_epoch"] - 1: loss},
            )
            xresult.update_latency(latencies)
        else:
            raise ValueError("invalid dataset name : {:}".format(dataset))
    return xresult