Пример #1
0
 def __init__(self, submodules: list, name: str, strategy_name: str):
     """
     :param submodules: list or nn.ModuleList of choices
     :param name: name of the architecture weight
     :param strategy_name: name of the architecture strategy to use
     """
     super().__init__(submodules)
     self._add_to_kwargs(name=name, strategy_name=strategy_name)
     self.sm = StrategyManager()
     self.ws = self.sm.make_weight(self.strategy_name, name, only_single_path=False, choices=self.submodules)
Пример #2
0
def generate_from_name(name: str, save=True, verbose=True):
    genotype, compact = compact_from_name(name, verbose=verbose)
    run_configs = '{path_conf_tasks}/d1_dartsv1.run_config, {path_conf_net_search}darts.run_config'
    # create weight sharing cell model
    changes = {
        'cls_data':
        'Cifar10Data',
        '{cls_data}.fake':
        True,
        '{cls_task}.save_del_old':
        False,
        '{cls_network_body}.cell_order':
        'n, r',
        '{cls_network_body}.features_first_cell':
        36 * 4,
        '{cls_network_stem}.features':
        36 * 3,
        'cls_network_cells_primitives':
        "%s, %s" % (compact.get('primitives'), compact.get('primitives')),
    }
    task = Main.new_task(run_configs, args_changes=changes)
    net = task.get_method().get_network()
    args = task.args

    wss = StrategyManager().get_strategies()
    assert len(wss) == 1
    ws = wss[list(wss.keys())[0]]

    # fix arc, all block inputs use different weights
    # go through all weights in the search cell
    for n, w in ws.named_parameters_single():
        # figure out cell type ("normal", "reduce"), block index, and if it's the first, second, ... op of that block
        c_type, block_idx, num_inputs, num_idx = n.split('/')[-4:]
        block_idx = int(block_idx.split('-')[-1])
        num_idx = int(num_idx.split('-')[-1])
        # set all paths weights to zero
        w.data.zero_()
        # go through the cell description of the genotype, if input and op number match, set the weight to be higher
        for op_idx, from_idx in compact.get(c_type)[block_idx]:
            if num_idx == from_idx:
                w[op_idx] = 1
    ws.forward()

    # saving config now will only use the highest weighted connections, since we have a search network
    cfg = net.config(finalize=True, num_block_ops=2)
    if save:
        path = Builder.save_config(cfg, get_net_config_dir(genotype.source),
                                   name)
        print('Saved config: %s' % path)
    return net, cfg, args
Пример #3
0
 def __init__(self, submodules: list, name: str, strategy_name: str):
     """
     :param submodules: list or nn.ModuleList of choices
     :param name: name of the architecture weight
     :param strategy_name: name of the architecture strategy to use
     """
     assert None not in [self._depth, self._act_fun], "this class should not be initialized directly"
     super().__init__(submodules, name, strategy_name)
     # store previous names, get their number of choices, no need to store the own name
     sm = StrategyManager()
     self._all_prev_names = sm.ordered_names(unique=False)[-self._depth - 1:-1]
     self._all_prev_sizes = [sm.get_num_weight_choices(n) for n in self._all_prev_names]
     self._eye = np.eye(N=max(self._all_prev_sizes + [1]))
     self._attention_op = None
     self._expand_axis = []
Пример #4
0
class MixedOp(SumParallelModules):
    """
    all op choices on one path in parallel,
    the weight strategy decides which results to compute and combine
    """

    def __init__(self, submodules: list, name: str, strategy_name: str):
        """
        :param submodules: list or nn.ModuleList of choices
        :param name: name of the architecture weight
        :param strategy_name: name of the architecture strategy to use
        """
        super().__init__(submodules)
        self._add_to_kwargs(name=name, strategy_name=strategy_name)
        self.sm = StrategyManager()
        self.ws = self.sm.make_weight(self.strategy_name, name, only_single_path=False, choices=self.submodules)

    def config(self, finalize=True, **_) -> dict:
        if finalize:
            indices = self.ws.get_finalized_indices(self.name)
            if len(indices) == 1:
                return self.submodules[indices[0]].config(finalize=finalize, **_)
            return SumParallelModules([self.submodules[i] for i in indices]).config(finalize=finalize, **_)
        else:
            return super().config(finalize=finalize, **_)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.ws.combine(self.name, x, self.submodules)
Пример #5
0
    def __init__(self, args: Namespace, *args_, **kwargs):
        AbstractTask.__init__(self, args, *args_, **kwargs)

        # for architecture weights
        log_headline(self.logger, 'adding Strategy and Data')
        StrategyManager().add_strategy(RandomChoiceStrategy(max_epochs=1))

        # data
        data_set = self._parsed_meta_argument(Register.data_sets,
                                              'cls_data',
                                              args,
                                              index=None).from_args(args,
                                                                    index=None)
        self.batch_size = data_set.get_batch_size(train=False)

        # device handling
        self.devices_handler = self._parsed_meta_argument(Register.devices_managers, 'cls_device', args, index=None)\
            .from_args(self.seed, self.is_deterministic, args, index=None)
        self.mover = self.devices_handler.allocate_devices(num=-1)

        # network
        log_headline(self.logger, 'adding Network')
        self.net = self._parsed_meta_argument(Register.networks,
                                              'cls_network',
                                              args,
                                              index=None).from_args(args)
        self.net.build(s_in=data_set.get_data_shape(),
                       s_out=data_set.get_label_shape())
        self.net = self.mover.move_module(self.net)

        # profiler
        log_headline(self.logger, 'adding Profiler')
        self.profiler = self._parsed_meta_argument(Register.profilers, 'cls_profiler', args, index=None)\
            .from_args(args, index=None, is_test_run=self.is_test_run)
        assert isinstance(self.profiler, AbstractProfiler)
Пример #6
0
    def __init__(self, args: Namespace, wildcards: dict, descriptions: dict = None):
        super().__init__()

        # args, seed
        self.args = args
        self.save_dir = self._parsed_argument('save_dir', args)
        self.is_test_run = self._parsed_argument('is_test_run', args)
        self.seed = self._parsed_argument('seed', args)
        self.is_deterministic = self._parsed_argument('is_deterministic', args)
        random.seed(self.seed)
        np.random.seed(self.seed)
        torch.manual_seed(self.seed)
        if self.is_deterministic:
            # see https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
            os.environ.setdefault("CUBLAS_WORKSPACE_CONFIG", ":4096:8")
            torch.set_deterministic(self.is_deterministic)

        # maybe delete old dir, note arguments, save run_config
        if self._parsed_argument('save_del_old', args):
            shutil.rmtree(self.save_dir, ignore_errors=True)
        os.makedirs(self.save_dir, exist_ok=True)
        save_as_json(args, get_task_config_path(self.save_dir), wildcards)
        dump_system_info(self.save_dir + 'sysinfo.txt')

        # logging
        self.log_file = '%slog_task.txt' % self.save_dir
        LoggerManager().set_logging(default_save_file=self.log_file)
        self.logger = self.new_logger(index=None)
        log_args(self.logger, None, self.args, add_git_hash=True, descriptions=descriptions)
        Register.log_all(self.logger)

        # reset weight strategies so that consecutive tasks do not conflict with each other
        StrategyManager().reset()

        self.methods = []
Пример #7
0
 def setup_strategy(self) -> StrategyManager:
     """ set up the strategy for architecture weights """
     key, alpha, grace_epochs = self._parsed_arguments(
         ['key', 'alpha', 'grace_epochs'], self.hparams)
     return StrategyManager().add_strategy(
         MdlStrategy(self.max_epochs,
                     key=key,
                     alpha=alpha,
                     grace_epochs=grace_epochs))
Пример #8
0
    def _run(self, save=False):
        # value spaces
        values = set()
        sm = StrategyManager()

        # add all evaluated architectures of the benchmarks
        for bs in self.benchmark_sets:
            assert isinstance(bs, MiniNASTabularBenchmark)
            l0, l1 = len(sm.ordered_names(
                unique=True)), bs.get_value_space().num_choices()
            assert l0 == l1, "Num choices of the network space (%d) and the bench space (%d) must match" % (
                l0, l1)
            for r in bs.get_all():
                values.add(r.arch_tuple)
        if len(values) > 0:
            self.logger.info(
                "Added %d architectures from given benchmark set(s) to the list"
                % len(values))

        # if the space is smaller than desired, add random architectures
        network = self.get_method().get_network()
        assert isinstance(network, SearchUninasNetwork)
        net_space = sm.get_value_space()
        if self.measure_min > len(values):
            self.logger.info("Adding random architectures, have %d/%d" %
                             (len(values), self.measure_min))
            while len(values) < self.measure_min:
                values.add(net_space.random_sample())

        # evaluate the given architectures
        self._architecture_space = SpecificValueSpace(list(values))
        algorithm, population = super()._run(save=save)

        # add info to the candidates, e.g. from profilers, such as loss/flops/latency/macs
        pass

        # create a new bench
        bench = MiniNASSearchTabularBenchmark.make_from_population(
            population, self.get_method())
        log_headline(self.logger, "Created bench file from super-network")
        bench.print_info(self.logger.info)
        bench.save_in_dir(self.save_dir)
        explore(bench, self.logger, n=10)
Пример #9
0
    def _build(self, s_in: Shape, c_out: int) -> Shape:
        conv_kwargs = dict(dilation=self.dilation, padding=self.padding)
        c_in = s_in.num_features()
        self.has_skip = self.stride == 1 and c_in == c_out
        for e in range(len(self.expansions)):
            for k in range(len(self.k_sizes)):
                self._choices_by_idx.append((e, k))
        if self.has_skip and isinstance(self.skip_op, str):
            self.skip = Register.network_layers.get(self.skip_op)()
            self.skip.build(s_in, c_out)
            self._choices_by_idx.append(('skip', 'skip'))
        self.ws = StrategyManager().make_weight(self.strategy_name, self.name, only_single_path=True,
                                                num_choices=len(self._choices_by_idx))

        for e in self.expansions:
            c_mid = int(c_in * e)
            # pw in
            self.pw_in.append(nn.Sequential(
                get_conv2d(c_in, c_mid, k_size=self.k_size_in, groups=1, **conv_kwargs),
                nn.BatchNorm2d(c_mid, affine=self.bn_affine),
                Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            ))
            # dw conv ops with different kernel sizes
            convs = nn.ModuleList([])
            for k in self.k_sizes:
                convs.append(nn.Sequential(
                    get_conv2d(c_mid, c_mid, k_size=k, stride=self.stride, groups=-1, **conv_kwargs),
                    nn.BatchNorm2d(c_mid, affine=self.bn_affine),
                    Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
                ))
            self.dw_conv.append(convs)
            # dw optional attention module
            if self.has_att:
                self.dw_att.append(AbstractAttentionModule.module_from_dict(c_mid, c_substitute=c_in,
                                                                            att_dict=self.att_dict))
            # pw out
            self.pw_out.append(nn.Sequential(
                get_conv2d(c_mid, c_out, k_size=self.k_size_out, groups=1, **conv_kwargs),
                nn.BatchNorm2d(c_out, affine=self.bn_affine),
            ))
        return self.probe_outputs(s_in)
Пример #10
0
    def profile(self, network: SearchUninasNetwork, mover: AbstractDeviceMover, batch_size: int):
        """ profile the network """
        assert self.profile_fun is not None, "Can not measure if there is no profile function!"

        # unnecessary here, could check if this is a test and shorten everything
        # is_test_run = self.get('is_test_run')

        # set up nested structure if it does not exist
        self.data['measured'] = self.data.get('measured', {})

        # stem
        if self.data.get('measured').get('stem', None) is None:
            self.logger.info('Measuring the stem')
            stem = network.get_stem()
            self.data['measured']['stem'] =\
                self.profile_fun.profile(stem, stem.get_shape_in(), mover, batch_size)

        # cells
        self.data['measured']['cells'] = self.data.get('measured').get('cells', {})
        sm = StrategyManager()
        cells = network.get_cells()
        n_choices = sm.get_num_choices()
        if len(cells) != len(n_choices):
            raise ValueError("Number of cells (%d) must match number of arc choices (%d)" % (len(cells), len(n_choices)))
        network.set_forward_strategy(False)
        for i1, (cell, n) in enumerate(zip(cells, n_choices)):
            self.data['measured']['cells'][i1] = self.data['measured']['cells'].get(i1, {})
            for i2 in range(n):
                if self.data['measured']['cells'][i1].get(i2, None) is None:
                    self.logger.info('Measuring cell %d, option %d' % (i1, i2))
                    sm.forward_const(i2)
                    self.data['measured']['cells'][i1][i2] =\
                        self.profile_fun.profile(cell, cell.get_shape_in(), mover, batch_size)

        # final head
        if self.data.get('measured').get('head', None) is None:
            self.logger.info('Measuring the final head')
            head = network.get_heads()[-1]
            self.data['measured']['head'] =\
                self.profile_fun.profile(head, head.get_shape_in(), mover, batch_size)
Пример #11
0
    def make_from_population(cls, population: Population, method: AbstractMethod):
        """
        creating a mini bench dataset from an evaluated super-network
        """
        results = {}
        arch_to_idx = {}
        tuple_to_str = {}
        tuple_to_idx = {}

        space = StrategyManager().get_value_space(unique=True)
        data_set_name = method.get_data_set().__class__.__name__
        space_name = method.get_network().get_model_name()
        default_result_type = "test"

        for i, candidate in enumerate(population.get_candidates()):
            # first use all estimated metrics
            # if they contain e.g. "acc1/valid", create a sub dict
            metrics = {}
            for k, v in candidate.metrics.items():
                splits = k.split('/')
                if len(splits) == 1:
                    metrics[splits[0]] = {data_set_name: v}
                else:
                    metrics[splits[0]] = metrics.get(splits[0], {})
                    metrics[splits[0]][data_set_name] = metrics[splits[0]].get(data_set_name, {})
                    metrics[splits[0]][data_set_name][splits[1]] = v
                    default_result_type = splits[1]
            # now make sure all keys exist
            for k in MiniResult.get_metric_keys():
                metrics[k] = metrics.get(k, {data_set_name: -1})
            # result
            r = MiniResult(
                arch_index=i,
                arch_str="%s(%s)" % (space_name, ", ".join([str(v) for v in candidate.values])),
                arch_tuple=candidate.values,
                **metrics
            )

            assert tuple_to_str.get(r.arch_tuple) is None, "can not yet merge duplicate architecture results"
            results[i] = r
            arch_to_idx[r.arch_str] = i
            tuple_to_idx[r.arch_tuple] = i
            tuple_to_str[r.arch_tuple] = r.arch_str

        data_sets = list(results.get(0).params.keys())
        return MiniNASSearchTabularBenchmark(
            default_data_set=data_sets[0],
            default_result_type=default_result_type,
            bench_name="%s on %s" % (space_name, data_sets[0]),
            bench_description="super-network evaluation results",
            value_space=space, results=results, arch_to_idx=arch_to_idx,
            tuple_to_str=tuple_to_str, tuple_to_idx=tuple_to_idx)
Пример #12
0
    def log_detailed(self):
        # log some things
        log_headline(self.logger, 'Trainer, Method, Data, ...')
        rows = [('Trainer', '')]
        for i, trainer in enumerate(self.trainer):
            rows.append((' (%d)' % i, trainer.str()))
        log_in_columns(self.logger, rows)

        for i, method in enumerate(self.methods):
            log_headline(self.logger, "Method %d/%d" % (i+1, len(self.methods)), target_len=80)
            method.log_detailed(self.logger)

        StrategyManager().log_detailed(self.logger)
Пример #13
0
 def __init__(self, submodules: list, name: str, strategy_name: str, depth=0):
     """
     :param submodules: list or nn.ModuleList of choices
     :param name: name of the architecture weight
     :param strategy_name: name of the architecture strategy to use
     :param depth: depth, how many previous architecture decisions to consider
     """
     super().__init__(submodules, name, strategy_name)
     # store previous names in case this mixed op will be deepened, no need to store the own name
     self._add_to_kwargs(depth=depth)
     self._all_prev_names = StrategyManager().ordered_names(unique=False)[-self.max_depth - 1:-1]
     self._state_dicts = {}
     self._last_state = 'w'
     self.change_depth(new_depth=self.depth)
Пример #14
0
 def _on_epoch_start(self) -> dict:
     log_dict = super()._on_epoch_start()
     tau_0, tau_grace, beta = self._parsed_arguments(
         ['tau_0', 'tau_grace', 'beta'], self.hparams)
     for strategy in StrategyManager().get_strategies_list():
         strategy.tau = tau_0 * beta**self.current_epoch
         log_dict = self._add_to_dict(log_dict, dict(tau=strategy.tau))
         self.update_architecture_weights = strategy.tau < tau_grace
         if self.update_architecture_weights:
             strategy.mask_all_weights_below(0.4, div_by_numel=True)
             log_dict.update(
                 strategy.get_masks_log_dict(prefix='asap/masks'))
             self.set_loader_multiples((1, 1))
         else:
             self.set_loader_multiples((1, 0))
         return log_dict
Пример #15
0
 def change_depth(self, new_depth=1):
     """
     called by a VariableDepthMixedOpCallback,
     increases the recursive depth of the op, copying the weights, using a copy depending on a previous layer choice
     """
     if new_depth > 0:
         assert new_depth >= self.depth, "Can not reduce the depth"
         assert new_depth <= self.max_depth, "Can not increase the depth beyond %d" % self.max_depth
         assert StrategyManager().is_only_single_path()
     while self.depth < min([new_depth, len(self._all_prev_names)]):
         if len(self._state_dicts) == 0:
             self._state_dicts[self._last_state] = self.submodules.state_dict()
         # enlarge dict of stored state dicts by one layer
         new_state_dicts = {'0.%s' % k: v for k, v in self._state_dicts.items()}
         self._state_dicts = new_state_dicts
         self._last_state = '0.%s' % self._last_state
         self.depth += 1
Пример #16
0
    def profile(self, network: SearchUninasNetwork, mover: AbstractDeviceMover,
                batch_size: int):
        """ profile the network """
        assert self.profile_fun is not None, "Can not measure if there is no profile function!"
        sm = StrategyManager()

        # step 1) generate a dataset
        # at some point, if other predictors are attempted (nearest neighbor, SVM, ...) step1 code could be moved
        # to a shared parent class

        # number of choices at every position
        max_choices = sm.get_num_choices()
        print("max choices", max_choices)

        # get the search space, we can sample random architectures from it
        space = sm.get_value_space(unique=True)
        for i in range(10):
            print("random arc %d: %s" % (i, space.random_sample()))

        # make sure that a forward pass will not change the network topology
        network.set_forward_strategy(False)

        # find out the size of the network inputs
        shape_in = network.get_shape_in()

        # fix the network architecture, profile it
        sm.forward(fixed_arc=space.random_sample())
        value = self.profile_fun.profile(module=network,
                                         shape_in=shape_in,
                                         mover=mover,
                                         batch_size=batch_size)
        print('value 1', value)

        # alternate way: instead of using one over-complete network that has unused modules,
        # - get the current network architecture (the last set fixed_arc indices will be used now)
        # - build it stand-alone (exactly as the "true" network would be used later), with the same input/output sizes
        # - place it on the profiled device
        # - profile that instead
        # this takes longer, but the mismatch between over-complete and stand-alone is very interesting to explore
        # can make this an option via Argument
        network_config = network.config(finalize=True)
        network_body = Builder().from_config(network_config)
        standalone = RetrainUninasNetwork(model_name='__tmp__',
                                          net=network_body,
                                          checkpoint_path='',
                                          assert_output_match=True)
        standalone.build(network.get_shape_in(), network.get_shape_out()[0])
        standalone = mover.move_module(standalone)
        value = self.profile_fun.profile(module=standalone,
                                         shape_in=shape_in,
                                         mover=mover,
                                         batch_size=batch_size)
        print('value 2', value)
Пример #17
0
 def test_rebuild(self):
     """
     getting finalized configs from which we can build modules
     """
     builder = Builder()
     StrategyManager().delete_strategy('default')
     StrategyManager().add_strategy(RandomChoiceStrategy(max_epochs=1))
     n, c, h, w = 2, 8, 16, 16
     x = torch.empty(size=[n, c, h, w])
     shape = Shape([c, h, w])
     layers = [
         FusedMobileInvertedConvLayer(name='mmicl',
                                      k_sizes=(3, 5, 7),
                                      expansions=(3, 6)),
         SuperConvThresholdLayer(k_sizes=(3, 5, 7)),
         SuperSepConvThresholdLayer(k_sizes=(3, 5, 7)),
         SuperMobileInvertedConvThresholdLayer(k_sizes=(3, 5, 7),
                                               expansions=(3, 6),
                                               sse_dict=dict(c_muls=(0.0,
                                                                     0.25,
                                                                     0.5))),
         LinearTransformerLayer(),
         SuperConvLayer(k_sizes=(3, 5, 7), name='scl1'),
         SuperSepConvLayer(k_sizes=(3, 5, 7), name='scl2'),
         SuperMobileInvertedConvLayer(k_sizes=(3, 5, 7),
                                      name='scl3',
                                      expansions=(2, 3, 4, 6)),
     ]
     for layer in layers:
         assert layer.build(shape, c) == shape
     StrategyManager().build()
     StrategyManager().forward()
     for layer in layers:
         print('\n' * 2)
         print(layer.__class__.__name__)
         for i in range(3):
             StrategyManager().randomize_weights()
             StrategyManager().forward()
             for finalize in [False, True]:
                 cfg = layer.config(finalize=finalize)
                 print('\t', i, 'finalize', finalize)
                 print('\t\tconfig dct:', cfg)
                 cfg_layer = builder.from_config(cfg)
                 assert cfg_layer.build(shape, c) == shape
                 cfg_layer.forward(x)
                 print('\t\tmodule str:', cfg_layer.str()[1:])
                 del cfg, cfg_layer
Пример #18
0
 def setup_strategy(self) -> StrategyManager:
     """ set up the strategy for architecture weights """
     tau_0 = self._parsed_argument('tau_0', self.hparams)
     return StrategyManager().add_strategy(
         DifferentiableStrategy(self.max_epochs, tau=tau_0, use_mask=True))
Пример #19
0
 def setup_strategy(self) -> StrategyManager:
     """ set up the strategy for architecture weights """
     return StrategyManager().add_strategy(
         FairRandomChoiceStrategy(self.max_epochs, assert_same_length=True))
Пример #20
0
 def setup_strategy(self) -> StrategyManager:
     """ set up the strategy for architecture weights """
     return StrategyManager().add_strategy(
         RandomChoiceStrategy(self.max_epochs))
Пример #21
0
 def setup_strategy(self) -> StrategyManager:
     """ set up the strategy for architecture weights """
     return StrategyManager().add_strategy(DifferentiableStrategy(self.max_epochs, use_mask=False))
Пример #22
0
class SearchUninasNetwork(AbstractUninasNetwork):

    def __init__(self, model_name: str, net: AbstractNetworkBody, do_forward_strategy=True, *args, **kwargs):
        super().__init__(model_name=model_name, net=net, *args, **kwargs)
        self.do_forward_strategy = do_forward_strategy  # unnecessary line to remove "error" highlighting
        self._add_to_kwargs(do_forward_strategy=self.do_forward_strategy)
        self.strategy_manager = StrategyManager()
        self.strategies = None

    @classmethod
    def from_args(cls, args: Namespace, index=None, weight_strategies: Union[dict, str] = None)\
            -> 'SearchUninasNetwork':
        """
        :param args: global argparse namespace
        :param index: argument index
        :param weight_strategies: {strategy name: [cell indices]}, or name used for all, or None for defaults
        """
        all_parsed = cls._all_parsed_arguments(args)
        cls_net = cls._parsed_meta_argument(Register.network_bodies, 'cls_network_body', args, index=index)
        net = cls_net.search_network_from_args(args, index=index, weight_strategies=weight_strategies)
        return cls(cls.__name__, net, **all_parsed)

    @classmethod
    def meta_args_to_add(cls) -> [MetaArgument]:
        """
        list meta arguments to add to argparse for when this class is chosen,
        classes specified in meta arguments may have their own respective arguments
        """
        return super().meta_args_to_add() + [
            MetaArgument('cls_network_body', Register.network_bodies, help_name='network', allowed_num=1),
        ]

    def _build2(self, s_in: Shape, s_out: Shape) -> ShapeList:
        """ build the network """
        s = self.net.build(s_in, s_out)
        self.strategies = self.strategy_manager.get_strategies_list()
        self.strategy_manager.build()
        return s

    def get_strategy_manager(self) -> StrategyManager:
        return self.strategy_manager

    def set_forward_strategy(self, forward_strategy: bool):
        self.do_forward_strategy = forward_strategy

    def get_forward_strategy(self) -> bool:
        return self.do_forward_strategy

    def forward(self, x: torch.Tensor, ws_kwargs: dict = None, **net_kwargs) -> [torch.Tensor]:
        """
        forward first the weight strategy, then the network
        """
        if self.do_forward_strategy:
            self.forward_strategy(**({} if ws_kwargs is None else ws_kwargs))
        return super().forward(x, **net_kwargs)

    def forward_net(self, x: torch.Tensor, **net_kwargs) -> [torch.Tensor]:
        """
        forward only the network
        """
        return self.net(x, **net_kwargs)

    def forward_strategy(self, **ws_kwargs):
        """
        forward only the weight strategy
        """
        self.strategy_manager.forward(**ws_kwargs)

    def str(self, depth=0, **_) -> str:
        r = '{d}{name}(\n{ws},{net}\n{d}])'.format(**{
            'd': '{d}',
            'd1': '{d1}',
            'name': self.__class__.__name__,
            'ws': '{d1}Strategies: [%s]' % ', '.join([ws.str() for ws in self.strategies]),
            'net': self.net.str(depth=depth+1, max_depth=self.log_detail, **_),
        })
        r = r.replace('{d}', '. '*depth).replace('{d1}', '. '*(depth+1))
        return r

    def config(self, finalize=True, **_) -> dict:
        if finalize:
            return self.net.config(finalize=finalize, **_)
        return super().config(finalize=finalize, **_)

    def named_net_arc_parameters(self) -> (list, list):
        # all named parameters
        net_params, arc_params, duplicate_idx = list(self.net.named_parameters()), [], []
        for ws in self.strategies:
            arc_params += list(ws.named_parameters())
        # remove arc parameters from the network
        for an, ap in arc_params:
            for idx, (n, p) in enumerate(net_params):
                if ap is p:
                    duplicate_idx.append(idx)
        for idx in sorted(duplicate_idx, reverse=True):
            net_params.pop(idx)
        return net_params, arc_params

    def track_used_params(self, x: torch.Tensor) -> Tracker:
        """
        track which weights are used for the current architecture,
        and in which cell
        """
        tracker = Tracker()
        is_train = self.training
        self.eval()
        handles = []
        ws_modules = []
        x = x.to(self.get_device())

        # find all modules that have a weight strategy, add hooks
        for name, module in self.named_modules():
            if hasattr(module, 'ws') and isinstance(module.ws, (AbstractWeightStrategy, StrategyManager)):
                ws_modules.append(module)
                for name2, m2 in module.named_modules():
                    if len(get_to_print(m2)) >= 1:
                        handles.append(m2.register_forward_hook(Hook(tracker, 'net.%s.%s' % (name, name2))))

        # forward pass with the current arc, all used weights are tracked
        self.forward_net(x)

        tracker.finalize()
        for h in handles:
            h.remove()
        self.train(is_train)
        return tracker

    @classmethod
    def get_space_tuple(cls, unique=True, flat=False) -> tuple:
        """ tuple of final topology """
        return tuple(StrategyManager().get_all_finalized_indices(unique=unique, flat=flat))
Пример #23
0
    def test_output_shapes(self):
        """
        expected output shapes of standard layers
        """
        Builder()
        StrategyManager().delete_strategy('default')
        StrategyManager().add_strategy(RandomChoiceStrategy(max_epochs=1))

        bs, c1, c2, hw1, hw2 = 4, 4, 8, 32, 16
        s_in = Shape([c1, hw1, hw1])
        x = torch.empty(size=[bs] + s_in.shape)

        case_s1_c1 = (c1, 1, Shape([c1, hw1, hw1]))
        case_s1_c2 = (c2, 1, Shape([c2, hw1, hw1]))
        case_s2_c1 = (c1, 2, Shape([c1, hw2, hw2]))
        case_s2_c2 = (c2, 2, Shape([c2, hw2, hw2]))

        for cls, cases, kwargs in [
            (SkipLayer, [case_s1_c1, case_s1_c2], dict()),
            (ZeroLayer, [case_s1_c1, case_s1_c2, case_s2_c1,
                         case_s2_c2], dict()),
            (FactorizedReductionLayer, [case_s2_c1, case_s2_c2], dict()),
            (PoolingLayer, [case_s1_c1, case_s1_c2, case_s2_c1,
                            case_s2_c2], dict(k_size=3)),
            (ConvLayer, [case_s1_c1, case_s1_c2, case_s2_c1,
                         case_s2_c2], dict(k_size=3)),
            (SepConvLayer, [case_s1_c1, case_s1_c2, case_s2_c1,
                            case_s2_c2], dict(k_size=3)),
            (MobileInvertedConvLayer,
             [case_s1_c1, case_s1_c2, case_s2_c1, case_s2_c2], dict(k_size=3)),
            (MobileInvertedConvLayer,
             [case_s1_c1, case_s1_c2, case_s2_c1,
              case_s2_c2], dict(k_size=(3, ))),
            (MobileInvertedConvLayer,
             [case_s1_c1, case_s1_c2, case_s2_c1, case_s2_c2],
             dict(k_size=(3, 5, 7), k_size_in=(1, 1), k_size_out=(1, 1))),
            (FusedMobileInvertedConvLayer,
             [case_s1_c1, case_s1_c2, case_s2_c1, case_s2_c2],
             dict(name='mmicl1',
                  k_sizes=(3, 5, 7),
                  k_size_in=(1, 1),
                  k_size_out=(1, 1))),
            (FusedMobileInvertedConvLayer,
             [case_s1_c1, case_s1_c2, case_s2_c1, case_s2_c2],
             dict(name='mmicl2',
                  k_sizes=((3, 5), (3, 5, 7)),
                  k_size_in=(1, 1),
                  k_size_out=(1, 1))),
            (ShuffleNetV2Layer, [case_s1_c1, case_s1_c2,
                                 case_s2_c2], dict(k_size=3)),
            (ShuffleNetV2XceptionLayer, [case_s1_c1, case_s1_c2,
                                         case_s2_c2], dict(k_size=3)),
            (LinearTransformerLayer, [case_s1_c1, case_s1_c2], dict()),
            (SuperConvThresholdLayer,
             [case_s1_c1, case_s1_c2, case_s2_c1,
              case_s2_c2], dict(k_sizes=(3, 5, 7))),
            (SuperSepConvThresholdLayer,
             [case_s1_c1, case_s1_c2, case_s2_c1,
              case_s2_c2], dict(k_sizes=(3, 5, 7))),
            (SuperMobileInvertedConvThresholdLayer,
             [case_s1_c1, case_s1_c2, case_s2_c1, case_s2_c2],
             dict(k_sizes=(3, 5, 7),
                  expansions=(3, 6),
                  sse_dict=dict(c_muls=(0.0, 0.25, 0.5)))),
            (SuperConvLayer, [case_s1_c1, case_s1_c2, case_s2_c1,
                              case_s2_c2], dict(k_sizes=(3, 5, 7),
                                                name='scl')),
            (SuperSepConvLayer,
             [case_s1_c1, case_s1_c2, case_s2_c1,
              case_s2_c2], dict(k_sizes=(3, 5, 7), name='sscl')),
            (SuperMobileInvertedConvLayer,
             [case_s1_c1, case_s1_c2, case_s2_c1, case_s2_c2],
             dict(k_sizes=(3, 5, 7), name='smicl', expansions=(3, 6))),
            (AttentionLayer, [case_s1_c1],
             dict(att_dict=dict(att_cls='EfficientChannelAttentionModule'))),
            (AttentionLayer, [case_s1_c1],
             dict(att_dict=dict(att_cls='SqueezeExcitationChannelModule'))),
        ]:
            for c, stride, shape_out in cases:
                m1 = cls(stride=stride, **kwargs)
                s_out = m1.build(s_in, c)
                assert s_out == shape_out, 'Expected output shape does not match, %s, build=%s / expected=%s' %\
                                           (cls.__name__, s_out, shape_out)
                assert_output_shape(m1, x, [bs] + shape_out.shape)
                print('%s(stride=%d, c_in=%d, c_out=%d)' %
                      (cls.__name__, stride, c1, c))
Пример #24
0
 def setup_strategy(self) -> StrategyManager:
     """ set up the strategy for architecture weights """
     tau0 = self._parsed_argument('tau0', self.hparams)
     return StrategyManager().add_strategy(
         GDASStrategy(self.max_epochs, tau0=tau0, use_mask=False))
Пример #25
0
    def _build2(self, s_in: Shape, s_out: Shape) -> ShapeList:
        """ build the network """

        # find the search config
        if not os.path.isfile(self.search_config_path):
            self.search_config_path = Builder.find_net_config_path(
                self.search_config_path, pattern='search')

        # create a temporary search strategy
        tmp_s = RandomChoiceStrategy(max_epochs=1, name='__tmp__')
        sm = StrategyManager()
        assert len(sm.get_strategies_list(
        )) == 0, "can not load when there already is a search network"
        sm.add_strategy(tmp_s)
        sm.set_fixed_strategy_name('__tmp__')

        # create a search network
        search_net = Register.builder.load_from_config(self.search_config_path)
        assert isinstance(search_net, SearchUninasNetwork)
        search_net.build(s_in, s_out)
        search_net.set_forward_strategy(False)

        # set the architecture, get the config
        req_gene = ""
        if self.gene == 'random':
            search_net.forward_strategy()
            gene = sm.get_all_finalized_indices(unique=True, flat=True)
            self.model_name = "random(%s)" % str(gene)
            req_gene = " (%s)" % self.gene
        else:
            gene = split(self.gene, int)
        l0, l1 = len(sm.get_all_finalized_indices(unique=True)), len(gene)
        assert l0 == l1, "number of unique choices in the network (%d) must match length of the gene (%d)" % (
            l0, l1)
        search_net.forward_strategy(fixed_arc=gene)
        config = search_net.config(finalize=True)

        # clean up
        sm.delete_strategy('__tmp__')
        del sm
        del search_net

        # build the actually used finalized network
        LoggerManager().get_logger().info(
            "Extracting architecture %s%s from the super-network" %
            (gene, req_gene))
        self.net = Register.builder.from_config(config)
        return self.net.build(s_in, s_out)
Пример #26
0
    def _initialize_weights(self, net: AbstractModule, logger: logging.Logger):
        assert isinstance(
            net, AbstractUninasNetwork
        ), "This initializer will not work with external networks!"
        search_config = Builder.find_net_config_path(self.path,
                                                     pattern='search')

        checkpoint = CheckpointCallback.load_last_checkpoint(self.path)
        state_dict = checkpoint.get('state_dict')

        # figure out correct weights in super-network checkpoint
        if len(self.gene) > 0:
            log_headline(logger,
                         "tmp network to track used params",
                         target_len=80)
            sm = StrategyManager()
            tmp_s = RandomChoiceStrategy(max_epochs=1, name='__tmp__')
            assert len(sm.get_strategies_list(
            )) == 0, "can not load when there already is a search network"
            sm.add_strategy(tmp_s)
            sm.set_fixed_strategy_name('__tmp__')

            search_net = Builder().load_from_config(search_config)
            assert isinstance(search_net, SearchUninasNetwork)
            s_in, s_out = net.get_shape_in(), net.get_shape_out()
            search_net.build(s_in, s_out[0])
            search_net.set_forward_strategy(False)
            search_net.forward_strategy(fixed_arc=self.gene)
            tracker = search_net.track_used_params(
                s_in.random_tensor(batch_size=2))
            # tracker.print()

            logger.info(' > loading weights of gene %s from checkpoint "%s"' %
                        (str(self.gene), self.path))
            target_dict = net.state_dict()
            target_names = list(target_dict.keys())
            new_dict = {}

            # add all stem and head weights, they are at the front of the dict and have pretty much the same name
            log_columns = [('shape in checkpoint', 'name in checkpoint',
                            'name in network', 'shape in network')]
            for k, v in state_dict.items():
                if '.stem.' in k or '.heads.' in k:
                    tn = target_names.pop(0)
                    ts = target_dict[tn].shape
                    log_columns.append(
                        (str(list(v.shape)), k, tn, str(list(ts))))
                    n = k.replace('net.', '', 1)
                    assert n == tn
                    new_dict[n] = v

            # add all cell weights, can generally not compare names, only shapes
            for i, tracker_cell_entry in enumerate(tracker.get_cells()):
                for entry in tracker_cell_entry.get_pareto_best():
                    tn = target_names.pop(0)
                    ts = target_dict[tn].shape
                    log_columns.append((str(list(entry.shape)), entry.name, tn,
                                        str(list(ts))))
                    assert entry.shape == ts,\
                        'Mismatching shapes for "%s" and "%s", is the gene correct?' % (entry.name, tn)
                    new_dict[tn] = state_dict[entry.name]

            # log matches, load
            log_in_columns(logger, log_columns, add_bullets=True)
            net.load_state_dict(new_dict, strict=self.strict)

            # clean up
            del search_net
            sm.delete_strategy('__tmp__')
            del sm

        # simply load
        else:
            logger.info(' > simply loading state_dict')
            net.load_state_dict(state_dict, strict=self.strict)
Пример #27
0
    def __init__(self,
                 c_in: int,
                 c_out: int,
                 name: str,
                 strategy_name='default',
                 k_sizes=(3, 5),
                 c_multipliers=(0.5, 1.0),
                 dilation=1,
                 stride=1,
                 padding='same',
                 groups=-1,
                 bias=False):
        """
        A super-kernel that applies convolution with a masked weight, using architecture weights to figure out the best
        masking, thus kernel size and num output channels. Since the architecture weights are applied to the mask rather
        than generating different outputs, this module can be used efficiently for differentiable weight strategies.

        :param c_in: num input channels
        :param c_out: num output channels
        :param name: name under which to register architecture weights
        :param strategy_name: name of the strategy for architecture weights
        :param k_sizes: kernel sizes
        :param c_multipliers:
        :param dilation: dilation for the kernel
        :param stride: stride for the kernel
        :param padding:
        :param padding: 'same' or number
        :param bias:
        """
        super().__init__()
        self.name_c = '%s/c' % name
        self.name_k = '%s/k' % name
        self.k_sizes = k_sizes
        self.c_multipliers = c_multipliers
        assert max(
            c_multipliers
        ) <= 1.0, "Can only reduce max channels, choose a higher c_in/c_out"

        self._stride = stride
        self._groups = get_number(groups, c_out)
        self._dilation = dilation
        assert c_in % self._groups == 0

        max_k = max(k_sizes)
        channels = [int(c_out * ci) for ci in sorted(c_multipliers)]
        masks_c, masks_k = [], []

        # arc weights
        self.ws = StrategyManager().make_weight(strategy_name,
                                                self.name_k,
                                                only_single_path=True,
                                                num_choices=len(k_sizes))
        self.ws = StrategyManager().make_weight(strategy_name,
                                                self.name_c,
                                                only_single_path=True,
                                                num_choices=len(channels))

        # conv weight
        self._padding = get_padding(padding, max_k, stride, 1)
        self.weight = nn.Parameter(torch.Tensor(c_out, c_in // self._groups,
                                                max_k, max_k),
                                   requires_grad=True)
        nn.init.kaiming_normal_(self.weight, mode='fan_out')

        # bias
        if bias:
            self.bias = nn.Parameter(torch.Tensor(c_out))
            nn.init.zeros_(self.bias)
        else:
            self.bias = None

        # mask c
        for cs in channels:
            mask = torch.ones(size=(c_out, 1, 1, 1), dtype=self.weight.dtype)
            mask[cs:c_out, :, :, :].zero_()
            masks_c.append(mask)
        self.register_buffer('masks_c', torch.stack(masks_c, dim=0))

        # mask k
        for k in sorted(k_sizes):
            mask = torch.zeros(size=(1, 1, max_k, max_k),
                               dtype=self.weight.dtype)
            dk = (max_k - k) // 2
            if dk == 0:
                mask += 1
            else:
                mask[:, :, dk:-dk, dk:-dk] += 1
            masks_k.append(mask)
        self.register_buffer('masks_k', torch.stack(masks_k, dim=0))
Пример #28
0
class FusedMobileInvertedConvLayer(AbstractLayer, FusedOp):

    def __init__(self, name: str, strategy_name='default', skip_op: str = None, k_size_in=1, k_size_out=1,
                 k_sizes=(3, 5, 7), stride=1, padding='same', expansions=(3, 6),
                 dilation=1, bn_affine=True, act_fun='relu6', act_inplace=True, att_dict: dict = None):
        """
        A fused layer for several kernel sizes and expansion sizes, to share the 1x1 conv weights.
        Currently only designed for having a single kernel+expansion per forward pass and for the final config.

        :param name: name under which to register architecture weights
        :param strategy_name: name of the strategy for architecture weights
        :param skip_op: optional layer name, adds an op that enables skipping the entire block, e.g. "SkipLayer"
        :param k_size_in: kernel size(s) for the first conv kernel (expanding)
        :param k_size_out: kernel size(s) for the last conv kernel (projecting)
        :param k_sizes: kernel sizes for the spatial kernel
        :param stride: stride for the spatial kernel
        :param padding: 'same' or number
        :param expansions: multipliers for inner channels, based on input channels
        :param dilation: dilation for the spatial kernel
        :param bn_affine: affine batch norm
        :param act_fun: activation function
        :param act_inplace: whether to use the activation function in-place if possible (e.g. ReLU)
        :param att_dict: None to disable attention modules, otherwise a dict with respective kwargs
        """
        super().__init__()
        self._add_to_kwargs(name=name, strategy_name=strategy_name, skip_op=skip_op,
                            k_size_in=k_size_in, k_size_out=k_size_out,
                            k_sizes=k_sizes, stride=stride, expansions=expansions,
                            padding=padding, dilation=dilation, bn_affine=bn_affine,
                            act_fun=act_fun, act_inplace=act_inplace, att_dict=att_dict)
        self._add_to_print_kwargs(has_skip=False, has_att=isinstance(self.att_dict, dict))
        self.ws = None
        self.skip = None
        self.pw_in = nn.ModuleList([])
        self.dw_conv = nn.ModuleList([])
        self.dw_att = nn.ModuleList([])
        self.pw_out = nn.ModuleList([])
        self.drop_path = DropPathModule()

        self._choices_by_idx = []

    def _build(self, s_in: Shape, c_out: int) -> Shape:
        conv_kwargs = dict(dilation=self.dilation, padding=self.padding)
        c_in = s_in.num_features()
        self.has_skip = self.stride == 1 and c_in == c_out
        for e in range(len(self.expansions)):
            for k in range(len(self.k_sizes)):
                self._choices_by_idx.append((e, k))
        if self.has_skip and isinstance(self.skip_op, str):
            self.skip = Register.network_layers.get(self.skip_op)()
            self.skip.build(s_in, c_out)
            self._choices_by_idx.append(('skip', 'skip'))
        self.ws = StrategyManager().make_weight(self.strategy_name, self.name, only_single_path=True,
                                                num_choices=len(self._choices_by_idx))

        for e in self.expansions:
            c_mid = int(c_in * e)
            # pw in
            self.pw_in.append(nn.Sequential(
                get_conv2d(c_in, c_mid, k_size=self.k_size_in, groups=1, **conv_kwargs),
                nn.BatchNorm2d(c_mid, affine=self.bn_affine),
                Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            ))
            # dw conv ops with different kernel sizes
            convs = nn.ModuleList([])
            for k in self.k_sizes:
                convs.append(nn.Sequential(
                    get_conv2d(c_mid, c_mid, k_size=k, stride=self.stride, groups=-1, **conv_kwargs),
                    nn.BatchNorm2d(c_mid, affine=self.bn_affine),
                    Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
                ))
            self.dw_conv.append(convs)
            # dw optional attention module
            if self.has_att:
                self.dw_att.append(AbstractAttentionModule.module_from_dict(c_mid, c_substitute=c_in,
                                                                            att_dict=self.att_dict))
            # pw out
            self.pw_out.append(nn.Sequential(
                get_conv2d(c_mid, c_out, k_size=self.k_size_out, groups=1, **conv_kwargs),
                nn.BatchNorm2d(c_out, affine=self.bn_affine),
            ))
        return self.probe_outputs(s_in)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        idx, _ = self.ws.combine_info(self.name)[0]
        idx_e, idx_k = self._choices_by_idx[idx]
        if idx_e == 'skip':
            return x + self.skip(x)
        x2 = self.pw_in[idx_e](x)
        x2 = self.dw_conv[idx_e][idx_k](x2)
        if self.has_att:
            x2 = self.dw_att[idx_e](x2)
        x2 = self.pw_out[idx_e](x2)
        if self.has_skip:
            return x + self.drop_path(x2)
        return x2

    def config(self, finalize=False, **__) -> dict:
        cfg = super().config(finalize=finalize, **__)
        if finalize:
            idxs = self.ws.get_finalized_indices(self.name)
            assert len(idxs) == 1
            idx_e, idx_k = self._choices_by_idx[idxs[0]]
            if idx_e == 'skip':
                return self.skip.config(finalize=finalize, **__)
            cfg['name'] = MobileInvertedConvLayer.__name__
            kwargs = cfg['kwargs']
            for s in ['name', 'strategy_name', 'skip_op']:
                kwargs.pop(s)
            kwargs['k_size'] = kwargs.pop('k_sizes')[idx_k]
            kwargs['expansion'] = kwargs.pop('expansions')[idx_e]
            cfg['kwargs'] = kwargs
        return cfg
Пример #29
0
 def __init__(self, model_name: str, net: AbstractNetworkBody, do_forward_strategy=True, *args, **kwargs):
     super().__init__(model_name=model_name, net=net, *args, **kwargs)
     self.do_forward_strategy = do_forward_strategy  # unnecessary line to remove "error" highlighting
     self._add_to_kwargs(do_forward_strategy=self.do_forward_strategy)
     self.strategy_manager = StrategyManager()
     self.strategies = None
Пример #30
0
 def get_space_tuple(cls, unique=True, flat=False) -> tuple:
     """ tuple of final topology """
     return tuple(StrategyManager().get_all_finalized_indices(unique=unique, flat=flat))