def test_passes_additional_kwargs_to_regressor_transform(self):
        regressor = mock.Mock(n_features=3)
        kwargs = {"foo": 3, "bar": 5}
        transform_ar(regressor, [], **kwargs)

        call_kwargs = regressor.transform.call_args[1]
        for key in kwargs:
            self.assertIn(key, call_kwargs)
            self.assertEqual(kwargs[key], call_kwargs[key])
    def test_size_of_predictor_variable_correct_when_bias_is_true(self):
        p = 5
        regressor = mock.Mock(n_features=p + 1)

        n = 20
        a = n * [0]
        transform_ar(regressor, a, bias=True)

        call_args = regressor.transform.call_args[0]
        self.assertEqual(np.shape(call_args[0])[1], p + 1)
    def test_args_sent_to_regressor_transform_have_the_correct_length(self):
        p = 4
        regressor = mock.Mock(n_features=p)

        n = 52
        a = n * [0]
        transform_ar(regressor, a)

        call_args = regressor.transform.call_args[0]
        self.assertEqual(len(call_args[0]), n - p)
        self.assertEqual(len(call_args[1]), n - p)
    def test_y_values_passed_to_regressor_infer_fit_correct_when_bias_is_false(self):
        rng = np.random.default_rng(1)

        n = 57
        y = rng.normal(size=n)

        p = 4
        regressor = mock.Mock(n_features=p)
        transform_ar(regressor, y, bias=False)

        call_args = regressor.transform.call_args[0]
        np.testing.assert_allclose(call_args[1], y[p:])
    def test_bias_false_by_default(self):
        p = 4
        regressor1 = mock.Mock(n_features=p)
        regressor2 = mock.Mock(n_features=p)

        n = 10
        a = n * [0]
        transform_ar(regressor1, a, bias=False)
        transform_ar(regressor2, a)

        call_args1 = regressor1.transform.call_args[0]
        call_args2 = regressor2.transform.call_args[0]

        self.assertEqual(np.shape(call_args1[0]), np.shape(call_args2[0]))
    def test_returns_regressor_transform_output(self):
        regressor = mock.Mock(n_features=3)

        ret = (1, {"a": 3})
        regressor.transform.return_value = ret

        actual_ret = transform_ar(regressor, [])

        self.assertEqual(actual_ret, ret)
    def test_X_values_passed_to_regressor_infer_fit_correct_when_bias_is_false(self):
        rng = np.random.default_rng(1)

        n = 57
        y = rng.normal(size=n)

        p = 4
        regressor = mock.Mock(n_features=p)
        transform_ar(regressor, y, bias=False)

        call_args = regressor.transform.call_args[0]

        expected_X = np.zeros((len(y) - p, p))
        for i in range(len(y) - p):
            for j in range(p):
                expected_X[i, j] = y[i - j + p - 1]

        np.testing.assert_allclose(call_args[0], expected_X)
    def test_X_values_augmented_by_constant_one_when_bias_is_true(self):
        rng = np.random.default_rng(2)

        n = 23
        y = rng.normal(size=n)

        p = 5
        regressor1 = mock.Mock(n_features=p)
        regressor2 = mock.Mock(n_features=p + 1)

        transform_ar(regressor1, y, bias=False)
        transform_ar(regressor2, y, bias=True)

        call_args1 = regressor1.transform.call_args[0]
        call_args2 = regressor2.transform.call_args[0]

        X = call_args1[0]
        X_bias = call_args2[0]
        X_bias_exp = np.hstack((X, np.ones((len(X), 1))))

        np.testing.assert_allclose(X_bias, X_bias_exp)
Exemplo n.º 9
0
def hyper_score_ar(
    regressor_class: Callable,
    dataset: Sequence,
    metric: Callable[[np.ndarray, np.ndarray], float],
    rng: Union[int, np.random.RandomState, np.random.Generator] = 0,
    fit_kws: Optional[dict] = None,
    initial_weights: str = "default",
    monitor: Optional[Sequence] = None,
    monitor_step: int = 1,
    progress: Optional[Callable] = None,
    progress_trial: Optional[Callable] = None,
    test_fraction: float = 0.2,
    test_samples: Optional[int] = None,
    **kwargs,
) -> Tuple[float, SimpleNamespace]:
    """ Score hyperparameter choice for AR clustering.

    This runs a time series clustering algorithm on a set of signals and assesses the
    accuracy of the output using a given metric. This is done using a given set of
    hyperparameter values for the algorithm. The function then returns a summary score.

    Parameters
    ----------
    regressor_class
        Class to use to create regressors. One regressor is created for each signal in
        the dataset, and `transform_ar` is used to fit the regressor on each signal.
        Additional keyword arguments passed to `hyper_score_ar` are transferred to the
        `regressor_class` constructor -- you'll probably need at least `n_models` and
        `n_features`. A random number generator is chosen automatically by
        `hyper_score_ar` and passed as the `rng` argument to the constructor -- note
        that this will not pass `hyper_score_ar`s `rng` directly!
    dataset
        Sequence of signals to use for testing the regressors. Each entry in `dataset`
        should be an object containing at least fields `y` and `usage_seq`, which should
        be arrays of equal length. `y` is the signal, `usage_seq` gives the ground truth
        of which generating model was used at each time step.
    metric
        The function to use to assess the accuracy of the clustering. This should take a
        vector `labels_true` of ground-truth latent states at each time step and a
        vector `labels_pred` of predicted (inferred) latent states, and return a scalar
        accuracy score.
    rng
        Random number generator or seed to use for generating initial weight values. If
        seed, a random number generator is created using `np.random.default_rng`.
    fit_kws
        Additional argument to pass to `transform_ar`.
    initial_weights
        How to set the initial weights (for regressors that have that option). This can
        be
          "default":    do the default, i.e., no `weights` attribute passed to __init__
          "oracle_ar":  pass the `a` coefficients from the signal's `armas` member as
                        `weights`.
    monitor
        Sequence of strings denoting values to monitor during the inference procedure
        for each regressor and signal pair. The special value "r" can be used to also
        store continuous latent-state assignments that are returned as the first output
        of `transform` / `transform_ar`.
    monitor_step
        How often to record the values from `monitor`.
    progress
        Progress callable for monitoring how far the function is in running the
        regressor on each signal from the `dataset`. This is used to wrap an iterator,
        `progress(iterator) -> iterator`.
    progress_trial
        Callable for monitoring the progress during each trial. This is directly passed
        to `transform_ar` (and thus to `regressor_class().transform`).
    test_fraction
        Fraction of samples to use when estimating the clustering score.
    test_samples
        Number of samples to use when estimating the clustering score. If this is
        provided, it overrides `test_fraction`
    Additional keyword arguments are directly passed to the `regressor_class`
    constructor.

    Returns a tuple `(summary_score, details)`, where `summary_score` is the median of
    the scores assigned for the regressor output on each signal in the `dataset`, and
    `details` is a `SimpleNamespace` containing more details from the run:
        trial_scores (np.ndarray)       -- The scores for each trial.
        regressor_seeds (np.ndarray)    -- Random seeds used to initialize regressors.
        regressors (Sequence)           -- Regressors used for each trial.
        history (Sequence)              -- Monitoring data for each trial.
    """
    # handle seed form of rng
    if not hasattr(rng, "normal"):
        rng = np.random.default_rng(rng)

    # handle optional fit_kws
    if fit_kws is None:
        fit_kws = {}
    else:
        fit_kws = copy.copy(fit_kws)
    fit_kws.setdefault("progress", progress_trial)

    # handle optional monitor
    if monitor is None:
        monitor = []

    # handle monitoring of transform output
    if "r" in monitor:
        monitor = [_ for _ in monitor if _ != "r"]
        store_r = True
    else:
        store_r = False

    # ensure monitor_step makes sense
    monitor_step = max(monitor_step, 1)

    # set up trial scores
    n_trials = len(dataset)
    trial_scores = np.zeros(n_trials)

    # set up regressor random seeds
    if hasattr(rng, "randint"):
        gen_integers = rng.randint
    else:
        gen_integers = rng.integers
    regressor_seeds = gen_integers(0, sys.maxsize, size=n_trials)

    # run the simulations
    it = dataset
    if progress is not None:
        it = progress(it)
    history = []
    regressors = []
    for i, signal in enumerate(it):
        # create the regressor
        crt_args = copy.copy(kwargs)

        crt_seed = regressor_seeds[i]
        crt_rng = np.random.default_rng(crt_seed)
        crt_args["rng"] = crt_rng

        if initial_weights == "oracle_ar" and not hasattr(crt_args, "weights"):
            crt_args["weights"] = np.asarray([_.a for _ in signal.armas])

        regressor = regressor_class(**crt_args)

        # run transform_ar with this regressor
        crt_monitor = monitor
        if len(crt_monitor) > 0 and monitor_step > 1:
            crt_monitor = AttributeMonitor(monitor, step=monitor_step)

        crt_r, crt_history = transform_ar(regressor,
                                          signal.y,
                                          monitor=crt_monitor,
                                          **fit_kws)
        if store_r:
            crt_history.r = crt_r[::monitor_step]
        history.append(crt_history)

        crt_inferred_usage = np.argmax(crt_r, axis=1)

        # score only the last test_fraction / test_samples samples
        if test_samples is None:
            crt_n = int(test_fraction * len(crt_r))
        else:
            crt_n = test_samples
        crt_score = metric(signal.usage_seq[-crt_n:],
                           crt_inferred_usage[-crt_n:])

        regressors.append(regressor)

        trial_scores[i] = crt_score

    # noinspection PyTypeChecker
    summary_score: float = np.median(trial_scores)
    details = SimpleNamespace(
        trial_scores=trial_scores,
        regressor_seeds=regressor_seeds,
        regressors=regressors,
        history=history,
    )

    return summary_score, details
    def test_calls_regressor_transform_once(self):
        regressor = mock.Mock(n_features=3)
        transform_ar(regressor, [])

        regressor.transform.assert_called_once()
    def test_calls_regressor_transform_with_two_positional_args(self):
        regressor = mock.Mock(n_features=3)
        transform_ar(regressor, [])

        self.assertEqual(len(regressor.transform.call_args[0]), 2)
    def test_does_not_pass_bias_to_regressor_transform(self):
        regressor = mock.Mock(n_features=3)
        transform_ar(regressor, [], bias=True)

        call_args = regressor.transform.call_args
        self.assertNotIn("bias", call_args[1])