Exemplo n.º 1
def test():
    dataset = Dataset('full_pascal_trainval')
    fm = FastinfModel(dataset, 'perfect', 20)
    # NOTE: just took values from a run of the thing

    prior_correct = [
        float(x) for x in
        "0.050543  0.053053  0.073697  0.038331  0.050954  0.041879  0.16149\
    0.068721  0.10296   0.026837  0.043779  0.087683  0.063447  0.052205\
    0.41049   0.051664  0.014211  0.068361  0.056969  0.05046".split()
    np.testing.assert_almost_equal(fm.p_c, prior_correct, 4)

    observations = np.zeros(20)
    taken = np.zeros(20)
    fm.update_with_observations(taken, observations)
    np.testing.assert_almost_equal(fm.p_c, prior_correct, 4)
    observations[5] = 1
    taken[5] = 1
    fm.update_with_observations(taken, observations)
    print fm.p_c
    correct = [
        float(x) for x in
        "0.027355   0.11855    0.027593   0.026851   0.012569   0.98999    0.52232\
    0.017783   0.010806   0.015199   0.0044641  0.02389    0.033602   0.089089\
    0.50297    0.0083272  0.0088274  0.0098522  0.034259   0.0086298".split()
    np.testing.assert_almost_equal(fm.p_c, correct, 4)
    observations[15] = 0
    taken[15] = 1
    fm.update_with_observations(taken, observations)
    correct = [
        float(x) for x in
        "2.73590000e-02   1.19030000e-01   2.75500000e-02   2.68760000e-02 \
   1.23920000e-02   9.90200000e-01   5.25320000e-01   1.76120000e-02 \
   1.05030000e-02   1.52130000e-02   4.26410000e-03   2.38250000e-02 \
   3.36870000e-02   8.96450000e-02   5.04300000e-01   8.71880000e-05 \
   8.82630000e-03   9.55290000e-03   3.43240000e-02   8.44510000e-03".split()
    np.testing.assert_almost_equal(fm.p_c, correct)

    # reinit_marginals
    np.testing.assert_equal(fm.p_c, prior_correct)

def test():
  dataset = Dataset('full_pascal_trainval')
  fm = FastinfModel(dataset, 'perfect', 20)
  # NOTE: just took values from a run of the thing
  prior_correct = [float(x) for x in "0.050543  0.053053  0.073697  0.038331  0.050954  0.041879  0.16149\
    0.068721  0.10296   0.026837  0.043779  0.087683  0.063447  0.052205\
    0.41049   0.051664  0.014211  0.068361  0.056969  0.05046".split()]
  np.testing.assert_almost_equal(fm.p_c, prior_correct, 4)
  observations = np.zeros(20)
  taken = np.zeros(20)
  np.testing.assert_almost_equal(fm.p_c, prior_correct, 4)
  observations[5] = 1
  taken[5] = 1
  print fm.p_c
  correct = [float(x) for x in  "0.027355   0.11855    0.027593   0.026851   0.012569   0.98999    0.52232\
    0.017783   0.010806   0.015199   0.0044641  0.02389    0.033602   0.089089\
    0.50297    0.0083272  0.0088274  0.0098522  0.034259   0.0086298".split()]
  np.testing.assert_almost_equal(fm.p_c, correct, 4)
  observations[15] = 0
  taken[15] = 1
  correct = [float(x) for x in "2.73590000e-02   1.19030000e-01   2.75500000e-02   2.68760000e-02 \
   1.23920000e-02   9.90200000e-01   5.25320000e-01   1.76120000e-02 \
   1.05030000e-02   1.52130000e-02   4.26410000e-03   2.38250000e-02 \
   3.36870000e-02   8.96450000e-02   5.04300000e-01   8.71880000e-05 \
   8.82630000e-03   9.55290000e-03   3.43240000e-02   8.44510000e-03".split()]
  np.testing.assert_almost_equal(fm.p_c, correct)

  # reinit_marginals
  np.testing.assert_equal(fm.p_c, prior_correct)

class BeliefState(object):
    Encapsulates stuff that we keep track of during policy execution.
    Methods to initialize the model, update with an observed posterior,
    condition on observed values, and compute expected information gain.

    ngram_modes = ['no_smooth', 'backoff']
    accepted_modes = ngram_modes + ['random', 'fixed_order', 'fastinf']

    def __init__(self,
        assert (mode in self.accepted_modes)
        self.mode = mode
        self.dataset = dataset
        self.actions = actions
        self.bounds = bounds
        self.fastinf_model_name = fastinf_model_name

        # Is GIST in the actions? Need to behave differently if so.
        self.gist_mode = ('gist' in [action.name for action in self.actions])
        self.num_obs_vars = len(self.actions)
        if self.gist_mode:
            assert (self.actions[0].name == 'gist')
            self.num_obs_vars = len(self.actions) - 1 + len(

        if mode == 'random':
            if model:
                assert (isinstance(model, RandomModel))
                self.model = model
                self.model = RandomModel(len(self.dataset.classes))
        elif mode == 'no_smooth' or mode == 'backoff':
            if model:
                assert (isinstance(model, NGramModel))
                self.model = model
                self.model = NGramModel(dataset, mode)
        elif mode == 'fixed_order':
            if model:
                assert (isinstance(model, FixedOrderModel))
                self.model = model
                self.model = FixedOrderModel(dataset)
        elif mode == 'fastinf':
            if model:
                assert (isinstance(model, FastinfModel))
                self.model = model
                self.model = FastinfModel(dataset, self.fastinf_model_name,
            raise RuntimeError("Unknown mode")
        self.orig_p_c = self.get_p_c()

    def __repr__(self):
        return "BeliefState: \n%s\n%s" % (
            self.get_p_c(), zip(self.observed, self.observations))

    def get_p_c(self):
        return self.model.p_c

    def get_entropies(self):
        p_c = self.model.p_c
        p_not_c = 1 - p_c
        return -p_c * skutil.log2(p_c) + -p_not_c * skutil.log2(p_not_c)

    def reset(self):
        "Zero everything and reset the model."
        self.t = 0
        self.taken = np.zeros(len(self.actions))
        self.observed = np.zeros(self.num_obs_vars)
        self.observations = np.zeros(self.num_obs_vars)

    def update_with_score(self, action_ind, score):
        "Update the taken and observations lists, the model, and get the new marginals."
        self.taken[action_ind] = 1
        self.observed[action_ind] = 1
        self.observations[action_ind] = score

        if self.mode in ['random', 'fixed_order']:
        # self.full_feature = self.compute_full_feature()

    def update_with_gist(self, action_ind, scores):
        GIST returns multiple scores (for all the C_i), but is only one action.
        We deal with this by converting to multiple "actions" here, to maintain
        the interface to the FastInf model.
        self.taken[action_ind] = 1
        # FastInf models expect GIST to be the second half of observations
        self.observed[len(self.dataset.classes):] = 1
        self.observations[len(self.dataset.classes):] = scores

        if self.mode == 'random':
        elif self.mode == 'fixed_order':
        # self.full_feature = self.compute_full_feature()

    num_time_blocks = 1
    num_features = num_time_blocks * 47

    # [P(C) [P(C_i|O) for all i] [H(C_i|O) for all i] mean_entropy max_entropy t/S (1-t/S) t/T (1-t/T)]

    def compute_full_feature(self):
        Return featurized representation of the current belief state.
        The features are in action blocks, meaning that this method returns
        an array of size (len(self.actions), self.num_features).
        To get a usable vector, simply flatten() this array.
        This is useful for zeroing-out all actions but the relevant one.
        NOTE: Keep the class variable num_features synced with the behavior here.
        orig_p_c = self.orig_p_c
        p_c = self.get_p_c()
        h_c = self.get_entropies()
        mean_entropy = np.mean(h_c)
        max_entropy = np.max(h_c)
        h_c[h_c == -0] = 0
        time_to_start_ratio = 0 if self.t >= self.bounds[0] else self.t / (
        time_ratio = 0 if self.t <= self.bounds[0] else self.t / self.bounds[1]

        # If GIST is an action, it's our first action, and doesn't care about any
        # class feature, only the mean_entropy and time_ratio ones.
        if self.gist_mode:
            orig_p_c = np.concatenate(([0], orig_p_c))

        # Tile the dynamic probability features
        p_c = np.tile(np.atleast_2d(p_c), (len(self.actions), 1))
        h_c = np.tile(np.atleast_2d(h_c), (len(self.actions), 1))

        rest = np.vstack(
            (mean_entropy * np.ones(len(self.actions)),
             max_entropy * np.ones(len(self.actions)),
             time_to_start_ratio * np.ones(len(self.actions)),
             1. - time_to_start_ratio * np.ones(len(self.actions)),
             time_ratio * np.ones(len(self.actions)),
             1. - time_ratio * np.ones(len(self.actions)))).T

        feat = np.hstack((np.atleast_2d(orig_p_c).T, p_c, h_c, rest))

        # zero out those actions that have been taken
        # NOTE: this makes sense because it allows the policy to simply do argmax
        # all the time, without worrying about actions that have been taken:
        # the best it will be able to do for those is 0
        feat[np.flatnonzero(self.taken), :] = 0
        return feat

    def block_out_action(self, full_feature, action_ind=-1):
        Take a full_feature matrix and zero out all the values except those
        in the relevant action block.
        If action_ind < 0, returns the flat feature with nothing zeroed out.
        if action_ind < 0:
            return full_feature.flatten()
        assert (action_ind < len(self.actions))
        feature = np.zeros(np.prod(full_feature.shape))
        start_ind = action_ind * self.num_features
        feature[start_ind:start_ind +
                self.num_features] = full_feature[action_ind, :]
        return feature
class BeliefState(object):
    Encapsulates stuff that we keep track of during policy execution.
    Methods to initialize the model, update with an observed posterior,
    condition on observed values, and compute expected information gain.

    ngram_modes = ["no_smooth", "backoff"]
    accepted_modes = ngram_modes + ["random", "fixed_order", "fastinf"]

    def __init__(self, dataset, actions, mode="fixed_order", bounds=None, model=None, fastinf_model_name="perfect"):
        assert mode in self.accepted_modes
        self.mode = mode
        self.dataset = dataset
        self.actions = actions
        self.bounds = bounds
        self.fastinf_model_name = fastinf_model_name

        # Is GIST in the actions? Need to behave differently if so.
        self.gist_mode = "gist" in [action.name for action in self.actions]
        self.num_obs_vars = len(self.actions)
        if self.gist_mode:
            assert self.actions[0].name == "gist"
            self.num_obs_vars = len(self.actions) - 1 + len(self.dataset.classes)

        if mode == "random":
            if model:
                assert isinstance(model, RandomModel)
                self.model = model
                self.model = RandomModel(len(self.dataset.classes))
        elif mode == "no_smooth" or mode == "backoff":
            if model:
                assert isinstance(model, NGramModel)
                self.model = model
                self.model = NGramModel(dataset, mode)
        elif mode == "fixed_order":
            if model:
                assert isinstance(model, FixedOrderModel)
                self.model = model
                self.model = FixedOrderModel(dataset)
        elif mode == "fastinf":
            if model:
                assert isinstance(model, FastinfModel)
                self.model = model
                self.model = FastinfModel(dataset, self.fastinf_model_name, self.num_obs_vars)
            raise RuntimeError("Unknown mode")
        self.orig_p_c = self.get_p_c()

    def __repr__(self):
        return "BeliefState: \n%s\n%s" % (self.get_p_c(), zip(self.observed, self.observations))

    def get_p_c(self):
        return self.model.p_c

    def get_entropies(self):
        p_c = self.model.p_c
        p_not_c = 1 - p_c
        return -p_c * skutil.log2(p_c) + -p_not_c * skutil.log2(p_not_c)

    def reset(self):
        "Zero everything and reset the model."
        self.t = 0
        self.taken = np.zeros(len(self.actions))
        self.observed = np.zeros(self.num_obs_vars)
        self.observations = np.zeros(self.num_obs_vars)

    def update_with_score(self, action_ind, score):
        "Update the taken and observations lists, the model, and get the new marginals."
        self.taken[action_ind] = 1
        self.observed[action_ind] = 1
        self.observations[action_ind] = score

        if self.mode in ["random", "fixed_order"]:
                self.observed[: len(self.dataset.classes)], self.observations[: len(self.dataset.classes)]
            self.model.update_with_observations(self.observed, self.observations)
        # self.full_feature = self.compute_full_feature()

    def update_with_gist(self, action_ind, scores):
        GIST returns multiple scores (for all the C_i), but is only one action.
        We deal with this by converting to multiple "actions" here, to maintain
        the interface to the FastInf model.
        self.taken[action_ind] = 1
        # FastInf models expect GIST to be the second half of observations
        self.observed[len(self.dataset.classes) :] = 1
        self.observations[len(self.dataset.classes) :] = scores

        if self.mode == "random":
        elif self.mode == "fixed_order":
                self.observed[len(self.dataset.classes) :], self.observations[len(self.dataset.classes) :]
            self.model.update_with_observations(self.observed, self.observations)
        # self.full_feature = self.compute_full_feature()

    num_time_blocks = 1
    num_features = num_time_blocks * 47
    # [P(C) [P(C_i|O) for all i] [H(C_i|O) for all i] mean_entropy max_entropy t/S (1-t/S) t/T (1-t/T)]

    def compute_full_feature(self):
        Return featurized representation of the current belief state.
        The features are in action blocks, meaning that this method returns
        an array of size (len(self.actions), self.num_features).
        To get a usable vector, simply flatten() this array.
        This is useful for zeroing-out all actions but the relevant one.
        NOTE: Keep the class variable num_features synced with the behavior here.
        orig_p_c = self.orig_p_c
        p_c = self.get_p_c()
        h_c = self.get_entropies()
        mean_entropy = np.mean(h_c)
        max_entropy = np.max(h_c)
        h_c[h_c == -0] = 0
        time_to_start_ratio = 0 if self.t >= self.bounds[0] else self.t / (self.bounds[0])
        time_ratio = 0 if self.t <= self.bounds[0] else self.t / self.bounds[1]

        # If GIST is an action, it's our first action, and doesn't care about any
        # class feature, only the mean_entropy and time_ratio ones.
        if self.gist_mode:
            orig_p_c = np.concatenate(([0], orig_p_c))

        # Tile the dynamic probability features
        p_c = np.tile(np.atleast_2d(p_c), (len(self.actions), 1))
        h_c = np.tile(np.atleast_2d(h_c), (len(self.actions), 1))

        rest = np.vstack(
                mean_entropy * np.ones(len(self.actions)),
                max_entropy * np.ones(len(self.actions)),
                time_to_start_ratio * np.ones(len(self.actions)),
                1.0 - time_to_start_ratio * np.ones(len(self.actions)),
                time_ratio * np.ones(len(self.actions)),
                1.0 - time_ratio * np.ones(len(self.actions)),

        feat = np.hstack((np.atleast_2d(orig_p_c).T, p_c, h_c, rest))

        # zero out those actions that have been taken
        # NOTE: this makes sense because it allows the policy to simply do argmax
        # all the time, without worrying about actions that have been taken:
        # the best it will be able to do for those is 0
        feat[np.flatnonzero(self.taken), :] = 0
        return feat

    def block_out_action(self, full_feature, action_ind=-1):
        Take a full_feature matrix and zero out all the values except those
        in the relevant action block.
        If action_ind < 0, returns the flat feature with nothing zeroed out.
        if action_ind < 0:
            return full_feature.flatten()
        assert action_ind < len(self.actions)
        feature = np.zeros(np.prod(full_feature.shape))
        start_ind = action_ind * self.num_features
        feature[start_ind : start_ind + self.num_features] = full_feature[action_ind, :]
        return feature