Exemplo n.º 1
0
class FeedfreePredictor(PredictorBase):
    """
    Create a predictor that takes inputs from an :class:`InputSource`, instead of from feeds.
    An instance `pred` of :class:`FeedfreePredictor` can be called only by `pred()`, which returns
    a list of output values as defined in config.output_names.
    """

    def __init__(self, config, input_source):
        """
        Args:
            config (PredictConfig): the config to use.
            input_source (InputSource): the feedfree InputSource to use.
                Must match the inputs_desc in config.
        """
        self._config = config
        self._input_source = input_source
        assert config.return_input is False, \
            "return_input is not supported in FeedfreePredictor! " \
            "If you need to fetch inputs, add the names to the output_names!"

        self._hooks = []
        self.graph = config._maybe_create_graph()
        with self.graph.as_default():
            self._input_callbacks = Callbacks(
                self._input_source.setup(config.inputs_desc))
            with PredictTowerContext(''):
                self._input_tensors = self._input_source.get_input_tensors()
                config.tower_func(*self._input_tensors)
                self._tower_handle = config.tower_func.towers[-1]

            self._output_tensors = self._tower_handle.get_tensors(config.output_names)

            self._input_callbacks.setup_graph(None)

            for h in self._input_callbacks.get_hooks():
                self._register_hook(h)
            self._initialize_session()

    def _register_hook(self, hook):
        """
        Args:
            hook (tf.train.SessionRunHook):
        """
        self._hooks.append(hook)

    def _initialize_session(self):
        # init the session
        self._config.session_init._setup_graph()
        self._sess = self._config.session_creator.create_session()
        self._config.session_init._run_init(self._sess)

        with self._sess.as_default():
            self._input_callbacks.before_train()
            self._hooked_sess = HookedSession(self._sess, self._hooks)

    def __call__(self):
        return self._hooked_sess.run(self._output_tensors)

    def _do_call(self):
        raise NotImplementedError("You're calling the wrong function!")
Exemplo n.º 2
0
    def _initialize_session(self):
        # init the session
        self._config.session_init._setup_graph()
        self._sess = self._config.session_creator.create_session()
        self._config.session_init._run_init(self._sess)

        with self._sess.as_default():
            self._input_callbacks.before_train()
            self._hooked_sess = HookedSession(self._sess, self._hooks)
Exemplo n.º 3
0
class DataParallelInferenceRunner(InferenceRunnerBase):
    """
    Inference with data-parallel support on multiple GPUs.
    It will build one predict tower on each GPU, and run prediction
    with a large total batch in parallel on all GPUs.
    It will run the remainder (when the total size of input is not a multiple of #GPU)
    sequentially.
    """
    def __init__(self,
                 input,
                 infs,
                 gpus,
                 tower_name='InferenceTower',
                 tower_func=None):
        """
        Args:
            input (DataFlow or QueueInput)
            gpus (int or list[int]): #gpus, or list of GPU id
            tower_name (str): the name scope of the tower to build. Need to set a
                different one if multiple InferenceRunner are used.
            tower_func (tfutils.TowerFuncWrapper or None): the tower function to be used to build the graph.
                The tower function will be called under a `training=False` TowerContext.
                The default is `trainer.tower_func`,
                but you can change it to a different tower function
                if you need to inference with several different models.
        """
        if isinstance(gpus, int):
            gpus = list(range(gpus))
        self._devices = [_device_from_int(k) for k in gpus]
        self._tower_names = [
            '{}{}'.format(tower_name, k) for k in range(len(gpus))
        ]

        if isinstance(input, DataFlow):
            input = QueueInput(input)
        assert isinstance(input, QueueInput), input
        super(DataParallelInferenceRunner, self).__init__(input, infs)
        assert self._size > 0, "Input for DataParallelInferenceRunner must have a size!"

        self._hooks = []
        self._hooks_parallel = []
        self._tower_func = tower_func

    def _setup_graph(self):
        self._handles = []
        if self._tower_func is None:
            assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!"
            self._tower_func = self.trainer.tower_func

        input_callbacks = self._input_source.setup(
            self._tower_func.inputs_desc)
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            for idx, dev in enumerate(self._devices):
                vs_name = self.trainer._vs_name_for_predictor(idx)
                with tf.device(dev), PredictTowerContext(
                        self._tower_names[idx], vs_name=vs_name):
                    logger.info(
                        "[InferenceRunner] Building tower '{}' on device {} {}..."
                        .format(
                            self._tower_names[idx], dev,
                            "with variable scope '{}'".format(vs_name)
                            if vs_name else ''))
                    # TODO log for tower creation, here or in tower.py?
                    self._tower_func(*self._input_source.get_input_tensors())
                    self._handles.append(self._tower_func.towers[-1])

        # setup callbacks and hooks
        self._input_callbacks = Callbacks(input_callbacks)

        # TODO InputSource might have hooks which break us.
        # e.g. hooks from StagingInput will force the consumption
        # of nr_tower datapoints in every run.
        input_hooks = self._input_callbacks.get_hooks()
        self._hooks.extend([self._build_hook(inf)
                            for inf in self.infs] + input_hooks)
        self._hooks_parallel.extend(
            [self._build_hook_parallel(inf)
             for inf in self.infs] + input_hooks)

        for inf in self.infs:
            inf.setup_graph(self.trainer)
        self._input_callbacks.setup_graph(self.trainer)

    def register_hook(self, h):
        logger.info(
            "[DataParallelInferenceRunner] Registering hook {} on both parallel and sequential inference."
        )
        self._hooks.append(h)
        self._hooks_parallel.append(h)

    class _InferencerToHookDataParallel(InferencerToHook):
        def __init__(self, inf, fetches, size):
            """
            Args:
                size(int): number of tensors to fetch per tower
            """
            super(DataParallelInferenceRunner._InferencerToHookDataParallel,
                  self).__init__(inf, fetches)
            assert len(self._fetches) % size == 0
            self._sz = size

        def after_run(self, _, run_values):
            res = run_values.results
            for i in range(0, len(res), self._sz):
                vals = res[i:i + self._sz]
                self._inf.on_fetches(vals)

    def _build_hook_parallel(self, inf):
        out_names = inf.get_fetches()
        sz = len(out_names)
        fetches = list(
            itertools.chain(*[t.get_tensors(out_names)
                              for t in self._handles]))
        return self._InferencerToHookDataParallel(inf, fetches, sz)

    def _build_hook(self, inf):
        out_names = inf.get_fetches()
        fetches = self._handles[0].get_tensors(out_names)
        return InferencerToHook(inf, fetches)

    def _before_train(self):
        super(DataParallelInferenceRunner, self)._before_train()
        self._parallel_hooked_sess = HookedSession(self.trainer.sess,
                                                   self._hooks_parallel)

    def _trigger(self):
        for inf in self.infs:
            inf.before_epoch()

        total = self._size
        nr_tower = len(self._devices)
        self._input_source.reset_state()
        with _inference_context():
            with tqdm.tqdm(total=total, **get_tqdm_kwargs()) as pbar:
                while total >= nr_tower:
                    self._parallel_hooked_sess.run(fetches=[])
                    pbar.update(nr_tower)
                    total -= nr_tower
                # take care of the rest
                for _ in range(total):
                    self._hooked_sess.run(fetches=[])
                    pbar.update(1)
        for inf in self.infs:
            inf.trigger_epoch()
Exemplo n.º 4
0
class DataParallelInferenceRunner(InferenceRunnerBase):
    """
    Inference with data-parallel support on multiple GPUs.
    It will build one predict tower on each GPU, and run prediction
    with a large total batch in parallel on all GPUs.
    It will run the remainder (when the total size of input is not a multiple of #GPU)
    sequentially.
    """
    def __init__(self, input, infs, gpus):
        """
        Args:
            input (DataFlow or QueueInput)
            gpus (int or list[int]): #gpus, or list of GPU id
        """
        if isinstance(gpus, int):
            gpus = list(range(gpus))
        self._tower_names = ['InferenceTower{}'.format(k) for k in range(len(gpus))]
        if isinstance(input, DataFlow):
            input = QueueInput(input)
        assert isinstance(input, QueueInput), input
        super(DataParallelInferenceRunner, self).__init__(input, infs)
        assert self._size > 0, "Input for DataParallelInferenceRunner must have a size!"
        self._gpus = gpus

        self._hooks = []
        self._hooks_parallel = []

    def _setup_graph(self):
        self._handles = []

        assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!"
        input_callbacks = self._input_source.setup(self.trainer.inputs_desc)
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            for idx, t in enumerate(self._gpus):
                tower_name = self._tower_names[idx]
                SimplePredictBuilder(
                    ns_name=tower_name,
                    vs_name=self.trainer._main_tower_vs_name, device=t).build(
                        self._input_source, self.trainer.tower_func)
                self._handles.append(self.trainer.tower_func.towers[-1])

        # setup callbacks and hooks
        self._input_callbacks = Callbacks(input_callbacks)

        # TODO InputSource might have hooks which break us.
        # e.g. hooks from StagingInput will force the consumption
        # of nr_tower datapoints in every run.
        input_hooks = self._input_callbacks.get_hooks()
        self._hooks.extend([self._build_hook(inf) for inf in self.infs] + input_hooks)
        self._hooks_parallel.extend([self._build_hook_parallel(inf) for inf in self.infs] + input_hooks)

        for inf in self.infs:
            inf.setup_graph(self.trainer)
        self._input_callbacks.setup_graph(self.trainer)

    def register_hook(self, h):
        logger.info(
            "[DataParallelInferenceRunner] Registering hook {} on both parallel and sequential inference.")
        self._hooks.append(h)
        self._hooks_parallel.append(h)

    class InferencerToHookDataParallel(InferencerToHook):
        def __init__(self, inf, fetches, size):
            """
            Args:
                size(int): number of tensors to fetch per tower
            """
            super(DataParallelInferenceRunner.InferencerToHookDataParallel, self).__init__(inf, fetches)
            assert len(self._fetches) % size == 0
            self._sz = size

        def after_run(self, _, run_values):
            res = run_values.results
            for i in range(0, len(res), self._sz):
                vals = res[i:i + self._sz]
                self._inf.on_fetches(vals)

    def _build_hook_parallel(self, inf):
        out_names = inf.get_fetches()
        sz = len(out_names)
        fetches = list(itertools.chain(*[t.get_tensors(out_names) for t in self._handles]))
        return self.InferencerToHookDataParallel(inf, fetches, sz)

    def _build_hook(self, inf):
        out_names = inf.get_fetches()
        fetches = self._handles[0].get_tensors(out_names)
        return InferencerToHook(inf, fetches)

    def _before_train(self):
        super(DataParallelInferenceRunner, self)._before_train()
        self._parallel_hooked_sess = HookedSession(self.trainer.sess, self._hooks_parallel)

    def _trigger(self):
        for inf in self.infs:
            inf.before_epoch()

        total = self._size
        nr_tower = len(self._gpus)
        with _inference_context():
            with tqdm.tqdm(total=total, **get_tqdm_kwargs()) as pbar:
                while total >= nr_tower:
                    self._parallel_hooked_sess.run(fetches=[])
                    pbar.update(nr_tower)
                    total -= nr_tower
                # take care of the rest
                for _ in range(total):
                    self._hooked_sess.run(fetches=[])
                    pbar.update(1)
        for inf in self.infs:
            inf.trigger_epoch()
Exemplo n.º 5
0
 def _before_train(self):
     super(DataParallelInferenceRunner, self)._before_train()
     self._parallel_hooked_sess = HookedSession(self.trainer.sess, self._hooks_parallel)
Exemplo n.º 6
0
 def _before_train(self):
     self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)
     self._parallel_hooked_sess = HookedSession(self.trainer.sess,
                                                self._hooks_parallel)
Exemplo n.º 7
0
class InferenceRunnerBase(Callback):
    """ Base methods for inference runner"""
    def __init__(self,
                 input,
                 infs,
                 input_names=None,
                 prefix='',
                 extra_hooks=None):
        """
        Args:
            input (InputSource): the input to use. Must have ``size()``.
            infs (list): list of :class:`Inferencer` to run.
            input_names (list): must be a subset of the names in InputDesc.
            prefix(str): an prefix used to build the tower. Must be set
                differently if more than one :class:`InferenceRunner` are used.
            extra_hooks (list): extra ``SessionRunHook`` to run with the evaluation.
        """
        self._input_source = input
        if not isinstance(infs, list):
            self.infs = [infs]
        else:
            self.infs = infs
        for v in self.infs:
            assert isinstance(v, Inferencer), v
        if input_names is not None:
            assert isinstance(input_names, list)
        self.input_names = input_names

        try:
            self._size = input.size()
        except NotImplementedError:
            raise ValueError("Input used in InferenceRunner must have a size!")
        self._prefix = prefix

        if extra_hooks is None:
            extra_hooks = []
        self._extra_hooks = extra_hooks

    def _setup_input_names(self):
        # just use all the placeholders, if input_name is None
        if self.input_names is None:
            inputs = self.trainer.model.get_reused_placehdrs()
            self.input_names = [x.name for x in inputs]

            # TODO sparse. even if it works here, sparse still is unavailable
            # because get_tensor_by_name doesn't work for sparse

            # def get_name(x):
            #     if isinstance(x, tf.SparseTensor):
            #         return x.op.name.split('/')[0]
            #     return x.name

    def _setup_graph(self):
        self._input_source.setup(self.trainer.model)
        self._setup_input_names()
        # Use predict_tower in train config. either gpuid or -1
        self._predict_tower_id = self.trainer.config.predict_tower[0]
        in_tensors = self._find_input_tensors()
        assert isinstance(in_tensors, list), in_tensors

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):

            def fn(_):
                self.trainer.model.build_graph(in_tensors)

            PredictorTowerBuilder(fn,
                                  self._prefix).build(self._predict_tower_id)

        self._feed_tensors = self._find_feed_tensors()
        self._hooks = [self._build_hook(inf) for inf in self.infs]

    def _before_train(self):
        self._hooks.extend(self._extra_hooks)
        self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)

    def _get_tensors_maybe_in_tower(self, names):
        placeholder_names = set(
            [k.name for k in self.trainer.model.get_inputs_desc()])
        get_tensor_fn = PredictorTowerBuilder.get_tensors_maybe_in_tower
        return get_tensor_fn(placeholder_names,
                             names,
                             self._predict_tower_id,
                             prefix=self._prefix)

    def _find_input_tensors(self):
        pass

    @abstractmethod
    def _find_feed_tensors(self):
        pass

    @abstractmethod
    def _build_hook(self, inf):
        pass

    def _trigger(self):
        for inf in self.infs:
            inf.before_inference()

        # iterate over the data, and run the hooked session
        self._input_source.reset_state()
        for _ in tqdm.trange(self._input_source.size(), **get_tqdm_kwargs()):
            dp = self._input_source.next_feed()
            feed = dict(zip(self._feed_tensors, dp))
            self._hooked_sess.run(fetches=[], feed_dict=feed)
        summary_inferencer(self.trainer, self.infs)
Exemplo n.º 8
0
 def _before_train(self):
     self._hooks.extend(self._extra_hooks)
     self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)
Exemplo n.º 9
0
class DataParallelInferenceRunner(InferenceRunner):
    def __init__(self, ds, infs, gpus, input_names=None):
        super(DataParallelInferenceRunner,
              self).__init__(ds, infs, input_names)
        self._gpus = gpus

    def _setup_graph(self):
        model = self.trainer.model
        self._input_source.setup(model)
        self._setup_input_names()

        # build graph
        def build_tower(k):
            towername = TowerContext.get_predict_tower_name(k)
            # inputs (placeholders) for this tower only
            input_tensors = model.build_placeholders(prefix=towername + '/')
            model.build_graph(input_tensors)

        builder = PredictorTowerBuilder(build_tower, prefix=self._prefix)
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            for t in self._gpus:
                builder.build(t)

        # setup feeds and hooks
        self._feed_tensors = self._find_feed_tensors()
        self._hooks_parallel = [
            self._build_hook_parallel(inf) for inf in self.infs
        ]
        self._hooks = [self._build_hook(inf) for inf in self.infs]

    def _duplicate_names_across_towers(self, names):
        ret = []
        for t in self._gpus:
            ret.extend([
                TowerContext.get_predict_tower_name(t, self._prefix) + '/' + n
                for n in names
            ])
        return ret

    def _find_feed_tensors(self):
        names = self._duplicate_names_across_towers(self.input_names)
        return get_tensors_by_names(names)

    class InferencerToHookDataParallel(InferencerToHook):
        def __init__(self, inf, fetches, size):
            super(DataParallelInferenceRunner.InferencerToHookDataParallel,
                  self).__init__(inf, fetches)
            assert len(self._fetches) % size == 0
            self._sz = size

        def after_run(self, _, run_values):
            res = run_values.results
            for i in range(0, len(res), self._sz):
                vals = res[i:i + self._sz]
                self._inf.datapoint(vals)

    def _build_hook_parallel(self, inf):
        out_names = inf.get_output_tensors()
        sz = len(out_names)
        out_names = self._duplicate_names_across_towers(out_names)
        fetches = get_tensors_by_names(out_names)
        return DataParallelInferenceRunner.InferencerToHookDataParallel(
            inf, fetches, sz)

    def _build_hook(self, inf):
        out_names = inf.get_output_tensors()
        names = [
            TowerContext.get_predict_tower_name(self._gpus[0], self._prefix) +
            '/' + n for n in out_names
        ]
        fetches = get_tensors_by_names(names)
        return InferencerToHook(inf, fetches)

    def _before_train(self):
        self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)
        self._parallel_hooked_sess = HookedSession(self.trainer.sess,
                                                   self._hooks_parallel)

    def _trigger(self):
        for inf in self.infs:
            inf.before_inference()

        self._input_source.reset_state()
        total = self._input_source.size()
        nr_tower = len(self._gpus)
        with tqdm.tqdm(total=total, **get_tqdm_kwargs()) as pbar:
            while total >= nr_tower:
                dps = []
                for k in self._gpus:
                    dps.extend(self._input_source.next_feed())
                feed = dict(zip(self._feed_tensors, dps))
                self._parallel_hooked_sess.run(fetches=[], feed_dict=feed)
                pbar.update(nr_tower)
                total -= nr_tower
            # take care of the rest
            while total > 0:
                dp = self._input_source.next_feed()
                feed = dict(zip(self._feed_tensors[:len(dp)], dp))
                self._hooked_sess.run(fetches=[], feed_dict=feed)
        summary_inferencer(self.trainer, self.infs)
class DataParallelInferenceRunner(InferenceRunnerBase):
    """
    Inference with data-parallel support on multiple GPUs.
    It will build one predict tower on each GPU, and run prediction
    with a larger batch.
    """
    def __init__(self, input, infs, gpus):
        """
        Args:
            input (DataFlow or QueueInput)
            gpus (list[int]): list of GPU id
        """
        self._tower_names = [
            'InferenceTower{}'.format(k) for k in range(len(gpus))
        ]
        if isinstance(input, DataFlow):
            input = QueueInput(input)
        assert isinstance(input, QueueInput), input
        super(DataParallelInferenceRunner, self).__init__(input, infs)
        self._gpus = gpus

    def _setup_graph(self):
        self._handles = []

        assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!"
        input_callbacks = self._input_source.setup(self.trainer.inputs_desc)
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            for idx, t in enumerate(self._gpus):
                tower_name = self._tower_names[idx]
                SimplePredictBuilder(ns_name=tower_name,
                                     vs_name=self.trainer._main_tower_vs_name,
                                     device=t).build(self._input_source,
                                                     self.trainer.tower_func)
                self._handles.append(self.trainer.tower_func.towers[-1])

        # setup callbacks and hooks
        self._input_callbacks = Callbacks(input_callbacks)

        # TODO InputSource might have hooks which break us.
        # e.g. hooks from StagingInput will force the consumption
        # of nr_tower datapoints in every run.
        input_hooks = self._input_callbacks.get_hooks()
        self._hooks = [self._build_hook(inf)
                       for inf in self.infs] + input_hooks
        self._hooks_parallel = [
            self._build_hook_parallel(inf) for inf in self.infs
        ] + input_hooks

        for inf in self.infs:
            inf.setup_graph(self.trainer)
        self._input_callbacks.setup_graph(self.trainer)

    def register_hook(self, h):
        raise NotImplementedError(
            "DataParallelInferenceRunner doesn't accept extra hooks!")

    class InferencerToHookDataParallel(InferencerToHook):
        def __init__(self, inf, fetches, size):
            """
            Args:
                size(int): number of tensors to fetch per tower
            """
            super(DataParallelInferenceRunner.InferencerToHookDataParallel,
                  self).__init__(inf, fetches)
            assert len(self._fetches) % size == 0
            self._sz = size

        def after_run(self, _, run_values):
            res = run_values.results
            for i in range(0, len(res), self._sz):
                vals = res[i:i + self._sz]
                self._inf.on_fetches(vals)

    def _build_hook_parallel(self, inf):
        out_names = inf.get_fetches()
        sz = len(out_names)
        fetches = list(
            itertools.chain(*[t.get_tensors(out_names)
                              for t in self._handles]))
        return self.InferencerToHookDataParallel(inf, fetches, sz)

    def _build_hook(self, inf):
        out_names = inf.get_fetches()
        fetches = self._handles[0].get_tensors(out_names)
        return InferencerToHook(inf, fetches)

    def _before_train(self):
        super(DataParallelInferenceRunner, self)._before_train()
        self._parallel_hooked_sess = HookedSession(self.trainer.sess,
                                                   self._hooks_parallel)

    def _trigger(self):
        for inf in self.infs:
            inf.before_epoch()

        self._input_source.reset_state()
        total = self._size
        nr_tower = len(self._gpus)
        with _inference_context():
            with tqdm.tqdm(total=total, **get_tqdm_kwargs()) as pbar:
                while total >= nr_tower:
                    self._parallel_hooked_sess.run(fetches=[])
                    pbar.update(nr_tower)
                    total -= nr_tower
                # take care of the rest
                for _ in range(total):
                    self._hooked_sess.run(fetches=[])
                    pbar.update(1)
        for inf in self.infs:
            inf.trigger_epoch()
Exemplo n.º 11
0
class InferenceRunnerBase(Callback):
    """ Base methods for inference runner"""
    def __init__(self,
                 input,
                 infs,
                 tower_name='InferenceTower',
                 extra_hooks=None,
                 prefix=None):
        """
        Args:
            input (InputSource): the input to use. Must have ``size()``.
            infs (list[Inferencer]): list of :class:`Inferencer` to run.
            tower_name(str): name scope to build the tower. Must be set
                differently if more than one :class:`InferenceRunner` are used.
            extra_hooks (list[SessionRunHook]): extra :class:`SessionRunHook` to run with the evaluation.
        """
        self._input_source = input
        if not isinstance(infs, list):
            self.infs = [infs]
        else:
            self.infs = infs
        for v in self.infs:
            assert isinstance(v, Inferencer), v

        try:
            self._size = input.size()
        except NotImplementedError:
            raise ValueError("Input used in InferenceRunner must have a size!")
        self._tower_name = tower_name
        if prefix is not None:
            self._tower_name = 'InferenceTower' + prefix

        if extra_hooks is None:
            extra_hooks = []
        self._extra_hooks = extra_hooks

    def _setup_graph(self):
        # Use predict_tower in train config. either gpuid or -1
        tower_id = self.trainer.config.predict_tower[0]
        device = '/gpu:{}'.format(tower_id) if tower_id >= 0 else '/cpu:0'

        self._input_source.setup(self.trainer.model.get_inputs_desc())
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            self._tower_handle = self.trainer.predictor_factory.build(
                self._tower_name, device, self._input_source)

        self._hooks = [self._build_hook(inf) for inf in self.infs]
        cbs = self._input_source.get_callbacks()
        self._hooks.extend([CallbackToHook(cb) for cb in cbs])

    def _before_train(self):
        self._hooks.extend(self._extra_hooks)
        self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)

    @abstractmethod
    def _build_hook(self, inf):
        pass

    def _trigger(self):
        for inf in self.infs:
            inf.before_inference()

        # iterate over the data, and run the hooked session
        self._input_source.reset_state()
        for _ in tqdm.trange(self._size, **get_tqdm_kwargs()):
            self._hooked_sess.run(fetches=[])
        summary_inferencer(self.trainer, self.infs)
Exemplo n.º 12
0
class InferenceRunnerBase(Callback):
    """ Base methods for inference runner"""
    def __init__(self, input, infs, prefix='', extra_hooks=None):
        """
        Args:
            input (InputSource): the input to use. Must have ``size()``.
            infs (list[Inferencer]): list of :class:`Inferencer` to run.
            prefix(str): an prefix used to build the tower. Must be set
                differently if more than one :class:`InferenceRunner` are used.
            extra_hooks (list[SessionRunHook]): extra :class:`SessionRunHook` to run with the evaluation.
        """
        self._input_source = input
        if not isinstance(infs, list):
            self.infs = [infs]
        else:
            self.infs = infs
        for v in self.infs:
            assert isinstance(v, Inferencer), v

        try:
            self._size = input.size()
        except NotImplementedError:
            raise ValueError("Input used in InferenceRunner must have a size!")
        self._prefix = prefix

        if extra_hooks is None:
            extra_hooks = []
        self._extra_hooks = extra_hooks

    def _setup_graph(self):
        self._input_source.setup(self.trainer.model)
        # Use predict_tower in train config. either gpuid or -1
        self._predict_tower_id = self.trainer.config.predict_tower[0]

        def fn(_):
            in_tensors = self._input_source.get_input_tensors()
            self.trainer.model.build_graph(in_tensors)
        with tf.variable_scope(self.trainer.vs_name_for_predictor, reuse=True):
            PredictorTowerBuilder(fn, self._prefix).build(self._predict_tower_id)

        self._hooks = [self._build_hook(inf) for inf in self.infs]

    def _before_train(self):
        self._hooks.extend(self._extra_hooks)
        self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)

    def _get_tensors_maybe_in_tower(self, names):
        placeholder_names = set([k.name for k in self.trainer.model.get_inputs_desc()])
        get_tensor_fn = PredictorTowerBuilder.get_tensors_maybe_in_tower
        return get_tensor_fn(placeholder_names, names, self._predict_tower_id, prefix=self._prefix)

    @abstractmethod
    def _build_hook(self, inf):
        pass

    def _trigger(self):
        for inf in self.infs:
            inf.before_inference()

        # iterate over the data, and run the hooked session
        self._input_source.reset_state()
        for _ in tqdm.trange(self._input_source.size(), **get_tqdm_kwargs()):
            feed = self._input_source.next_feed()
            self._hooked_sess.run(fetches=[], feed_dict=feed)
        summary_inferencer(self.trainer, self.infs)
Exemplo n.º 13
0
 def _before_train(self):
     self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)
Exemplo n.º 14
0
class InferenceRunnerBase(Callback):
    """ Base methods for inference runner"""
    def __init__(self, input, infs, input_names=None, prefix='', extra_hooks=None):
        """
        Args:
            input (InputData): the input to use. Must have ``size()``.
            infs (list): list of :class:`Inferencer` to run.
            input_names (list): must be a subset of the names in InputDesc.
            prefix(str): an prefix used to build the tower. Must be set
                differently if more than one :class:`InferenceRunner` are used.
            extra_hooks (list): extra ``SessionRunHook`` to run with the evaluation.
        """
        self._input_data = input
        if not isinstance(infs, list):
            self.infs = [infs]
        else:
            self.infs = infs
        for v in self.infs:
            assert isinstance(v, Inferencer), v
        if input_names is not None:
            assert isinstance(input_names, list)
        self.input_names = input_names

        try:
            self._size = input.size()
        except NotImplementedError:
            raise ValueError("Input used in InferenceRunner must have a size!")
        self._prefix = prefix

        if extra_hooks is None:
            extra_hooks = []
        self._extra_hooks = extra_hooks

    def _setup_input_names(self):
        # just use all the placeholders, if input_name is None
        if self.input_names is None:
            inputs = self.trainer.model.get_reused_placehdrs()
            self.input_names = [x.name for x in inputs]

            # TODO sparse. even if it works here, sparse still is unavailable
            # because get_tensor_by_name doesn't work for sparse

            # def get_name(x):
            #     if isinstance(x, tf.SparseTensor):
            #         return x.op.name.split('/')[0]
            #     return x.name

    def _setup_graph(self):
        self._input_data.setup(self.trainer.model)
        self._setup_input_names()
        in_tensors = self._find_input_tensors()
        assert isinstance(in_tensors, list), in_tensors

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            def fn(_):
                self.trainer.model.build_graph(in_tensors)
            PredictorTowerBuilder(fn, self._prefix).build(0)

        self._feed_tensors = self._find_feed_tensors()
        self._hooks = [self._build_hook(inf) for inf in self.infs]

    def _before_train(self):
        self._hooks.extend(self._extra_hooks)
        self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)

    def _get_tensors_maybe_in_tower(self, names):
        placeholder_names = set([k.name for k in self.trainer.model.get_inputs_desc()])
        get_tensor_fn = PredictorTowerBuilder.get_tensors_maybe_in_tower
        return get_tensor_fn(placeholder_names, names, 0, prefix=self._prefix)

    def _find_input_tensors(self):
        pass

    @abstractmethod
    def _find_feed_tensors(self):
        pass

    @abstractmethod
    def _build_hook(self, inf):
        pass

    def _trigger(self):
        for inf in self.infs:
            inf.before_inference()

        # iterate over the data, and run the hooked session
        self._input_data.reset_state()
        for _ in tqdm.trange(self._input_data.size(), **get_tqdm_kwargs()):
            dp = self._input_data.next_feed()
            feed = dict(zip(self._feed_tensors, dp))
            self._hooked_sess.run(fetches=[], feed_dict=feed)
        summary_inferencer(self.trainer, self.infs)
Exemplo n.º 15
0
 def _before_train(self):
     self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)
     self._parallel_hooked_sess = HookedSession(self.trainer.sess, self._hooks_parallel)
Exemplo n.º 16
0
class DataParallelInferenceRunner(InferenceRunner):
    def __init__(self, ds, infs, gpus, input_names=None):
        super(DataParallelInferenceRunner, self).__init__(ds, infs, input_names)
        self._gpus = gpus

    def _setup_graph(self):
        model = self.trainer.model
        self._input_data.setup(model)
        self._setup_input_names()

        # build graph
        def build_tower(k):
            towername = TowerContext.get_predict_tower_name(k)
            # inputs (placeholders) for this tower only
            input_tensors = model.build_placeholders(
                prefix=towername + '/')
            model.build_graph(input_tensors)

        builder = PredictorTowerBuilder(build_tower, prefix=self._prefix)
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            for t in self._gpus:
                builder.build(t)

        # setup feeds and hooks
        self._feed_tensors = self._find_feed_tensors()
        self._hooks_parallel = [self._build_hook_parallel(inf) for inf in self.infs]
        self._hooks = [self._build_hook(inf) for inf in self.infs]

    def _duplicate_names_across_towers(self, names):
        ret = []
        for t in self._gpus:
            ret.extend([TowerContext.get_predict_tower_name(t, self._prefix) +
                       '/' + n for n in names])
        return ret

    def _find_feed_tensors(self):
        names = self._duplicate_names_across_towers(self.input_names)
        return get_tensors_by_names(names)

    class InferencerToHookDataParallel(InferencerToHook):
        def __init__(self, inf, fetches, size):
            super(DataParallelInferenceRunner.InferencerToHookDataParallel, self).__init__(inf, fetches)
            assert len(self._fetches) % size == 0
            self._sz = size

        def after_run(self, _, run_values):
            res = run_values.results
            for i in range(0, len(res), self._sz):
                vals = res[i:i + self._sz]
                self._inf.datapoint(vals)

    def _build_hook_parallel(self, inf):
        out_names = inf.get_output_tensors()
        sz = len(out_names)
        out_names = self._duplicate_names_across_towers(out_names)
        fetches = get_tensors_by_names(out_names)
        return DataParallelInferenceRunner.InferencerToHookDataParallel(
            inf, fetches, sz)

    def _build_hook(self, inf):
        out_names = inf.get_output_tensors()
        names = [TowerContext.get_predict_tower_name(
            self._gpus[0], self._prefix) + '/' + n for n in out_names]
        fetches = get_tensors_by_names(names)
        return InferencerToHook(inf, fetches)

    def _before_train(self):
        self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)
        self._parallel_hooked_sess = HookedSession(self.trainer.sess, self._hooks_parallel)

    def _trigger(self):
        for inf in self.infs:
            inf.before_inference()

        self._input_data.reset_state()
        total = self._input_data.size()
        nr_tower = len(self._gpus)
        with tqdm.tqdm(total=total, **get_tqdm_kwargs()) as pbar:
            while total >= nr_tower:
                dps = []
                for k in self._gpus:
                    dps.extend(self._input_data.next_feed())
                feed = dict(zip(self._feed_tensors, dps))
                self._parallel_hooked_sess.run(fetches=[], feed_dict=feed)
                pbar.update(nr_tower)
                total -= nr_tower
            # take care of the rest
            while total > 0:
                dp = self._input_data.next_feed()
                feed = dict(zip(self._feed_tensors[:len(dp)], dp))
                self._hooked_sess.run(fetches=[], feed_dict=feed)
        summary_inferencer(self.trainer, self.infs)
Exemplo n.º 17
0
 def _before_train(self):
     self._hooks.extend(self._extra_hooks)
     self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)
Exemplo n.º 18
0
class DataParallelInferenceRunner(InferenceRunnerBase):
    """
    Inference by feeding datapoints in a data-parallel way to multiple GPUs.

    Doesn't support remapped InputSource for now.
    """
    def __init__(self, input, infs, gpus):
        """
        Args:
            input (DataParallelFeedInput or DataFlow)
            gpus (list[int]): list of GPU id
        """
        if isinstance(input, DataFlow):
            tower_names = [
                TowerContext.get_predict_tower_name(k)
                for k in range(len(gpus))
            ]
            input = DataParallelFeedInput(input, tower_names)
        assert isinstance(input, DataParallelFeedInput), input

        super(DataParallelInferenceRunner, self).__init__(input, infs)
        self._gpus = gpus

    def _setup_graph(self):
        model = self.trainer.model
        self._input_source.setup(model.get_inputs_desc())

        # build graph
        def build_tower(k):
            # inputs (placeholders) for this tower only
            model.build_graph(self._input_source)

        builder = PredictorTowerBuilder(build_tower, prefix=self._prefix)
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            for t in self._gpus:
                builder.build(t)

        # setup feeds and hooks
        self._hooks_parallel = [
            self._build_hook_parallel(inf) for inf in self.infs
        ]
        self._hooks = [self._build_hook(inf) for inf in self.infs]
        cbs = self._input_source.get_callbacks()
        self._hooks_parallel.extend([CallbackToHook(cb) for cb in cbs])

    def _duplicate_names_across_towers(self, names):
        ret = []
        for t in self._gpus:
            ret.extend([
                TowerContext.get_predict_tower_name(t, self._prefix) + '/' + n
                for n in names
            ])
        return ret

    class InferencerToHookDataParallel(InferencerToHook):
        def __init__(self, inf, fetches, size):
            super(DataParallelInferenceRunner.InferencerToHookDataParallel,
                  self).__init__(inf, fetches)
            assert len(self._fetches) % size == 0
            self._sz = size

        def after_run(self, _, run_values):
            res = run_values.results
            for i in range(0, len(res), self._sz):
                vals = res[i:i + self._sz]
                self._inf.datapoint(vals)

    def _build_hook_parallel(self, inf):
        out_names = inf.get_output_tensors()
        sz = len(out_names)
        out_names = self._duplicate_names_across_towers(out_names)
        fetches = get_tensors_by_names(out_names)
        return DataParallelInferenceRunner.InferencerToHookDataParallel(
            inf, fetches, sz)

    def _build_hook(self, inf):
        out_names = inf.get_output_tensors()
        names = [
            TowerContext.get_predict_tower_name(self._gpus[0], self._prefix) +
            '/' + n for n in out_names
        ]
        fetches = get_tensors_by_names(names)
        return InferencerToHook(inf, fetches)

    def _before_train(self):
        self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)
        self._parallel_hooked_sess = HookedSession(self.trainer.sess,
                                                   self._hooks_parallel)

    def _trigger(self):
        for inf in self.infs:
            inf.before_inference()

        self._input_source.reset_state()
        total = self._size
        nr_tower = len(self._gpus)
        with tqdm.tqdm(total=total, **get_tqdm_kwargs()) as pbar:
            while total >= nr_tower:
                self._parallel_hooked_sess.run(fetches=[])
                pbar.update(nr_tower)
                total -= nr_tower
            # take care of the rest
            try:
                while total > 0:
                    # TODO XXX doesn't support remap
                    feed = self._input_source.next_feed(cnt=1)
                    self._hooked_sess.run(fetches=[], feed_dict=feed)
                    pbar.update(1)
                    total -= 1
            except AttributeError:
                logger.error(
                    "[DataParallelInferenceRunner] doesn't support InputSource wrappers very well!"
                )
                logger.error(
                    "[DataParallelInferenceRunner] Skipping the rest of the datapoints ..."
                )
        summary_inferencer(self.trainer, self.infs)
 def _before_train(self):
     super(DataParallelInferenceRunner, self)._before_train()
     self._parallel_hooked_sess = HookedSession(self.trainer.sess,
                                                self._hooks_parallel)
Exemplo n.º 20
0
class DataParallelInferenceRunner(InferenceRunnerBase):
    """
    Inference by feeding datapoints in a data-parallel way to multiple GPUs.

    Doesn't support remapped InputSource for now.
    """
    def __init__(self, input, infs, gpus):
        """
        Args:
            input (DataParallelFeedInput or DataFlow)
            gpus (list[int]): list of GPU id
        """
        self._tower_names = [
            'InferenceTower{}'.format(k) for k in range(len(gpus))
        ]
        if isinstance(input, DataFlow):
            input = DataParallelFeedInput(input, self._tower_names)
        assert isinstance(input, DataParallelFeedInput), input

        super(DataParallelInferenceRunner, self).__init__(input, infs)
        self._gpus = gpus

    def _setup_graph(self):
        self._input_source.setup(self.trainer.model.get_inputs_desc())
        self._handles = []
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            for idx, t in enumerate(self._gpus):
                tower_name = self._tower_names[idx]
                device = '/gpu:{}'.format(t)
                self._handles.append(
                    self.trainer.predictor_factory.build(
                        tower_name, device, self._input_source))

        # setup feeds and hooks
        self._hooks_parallel = [
            self._build_hook_parallel(inf) for inf in self.infs
        ]
        self._hooks = [self._build_hook(inf) for inf in self.infs]
        cbs = self._input_source.get_callbacks()
        self._hooks_parallel.extend([CallbackToHook(cb) for cb in cbs])

    class InferencerToHookDataParallel(InferencerToHook):
        def __init__(self, inf, fetches, size):
            """
            Args:
                size(int): number of tensors to fetch per tower
            """
            super(DataParallelInferenceRunner.InferencerToHookDataParallel,
                  self).__init__(inf, fetches)
            assert len(self._fetches) % size == 0
            self._sz = size

        def after_run(self, _, run_values):
            res = run_values.results
            for i in range(0, len(res), self._sz):
                vals = res[i:i + self._sz]
                self._inf.datapoint(vals)

    def _build_hook_parallel(self, inf):
        out_names = inf.get_output_tensors()
        sz = len(out_names)
        fetches = list(
            itertools.chain(*[t.get_tensors(out_names)
                              for t in self._handles]))
        return self.InferencerToHookDataParallel(inf, fetches, sz)

    def _build_hook(self, inf):
        out_names = inf.get_output_tensors()
        fetches = self._handles[0].get_tensors(out_names)
        return InferencerToHook(inf, fetches)

    def _before_train(self):
        self._hooked_sess = HookedSession(self.trainer.sess, self._hooks)
        self._parallel_hooked_sess = HookedSession(self.trainer.sess,
                                                   self._hooks_parallel)

    def _trigger(self):
        for inf in self.infs:
            inf.before_inference()

        self._input_source.reset_state()
        total = self._size
        nr_tower = len(self._gpus)
        with tqdm.tqdm(total=total, **get_tqdm_kwargs()) as pbar:
            while total >= nr_tower:
                self._parallel_hooked_sess.run(fetches=[])
                pbar.update(nr_tower)
                total -= nr_tower
            # take care of the rest
            try:
                while total > 0:
                    # TODO XXX doesn't support remap
                    feed = self._input_source.next_feed(cnt=1)
                    self._hooked_sess.run(fetches=[], feed_dict=feed)
                    pbar.update(1)
                    total -= 1
            except AttributeError:
                logger.error(
                    "[DataParallelInferenceRunner] doesn't support InputSource wrappers very well!"
                )
                logger.error(
                    "[DataParallelInferenceRunner] Skipping the rest of the datapoints ..."
                )
        summary_inferencer(self.trainer, self.infs)