示例#1
0
def test_pca_destructor():
    destructor = CompositeDestructor(destructors=[
        LinearProjector(
            linear_estimator=PCA(),
            orthogonal=False,
        ),
        IndependentDestructor(),
    ], )
    assert check_destructor(destructor, is_canonical=False)
示例#2
0
def test_random_linear_householder_destructor():
    destructor = CompositeDestructor(
        destructors=[
            LinearProjector(
                # Since n_components=1, a Householder matrix will be used
                linear_estimator=RandomOrthogonalEstimator(n_components=1),
                orthogonal=False,
            ),
            IndependentDestructor(),
        ], )
    assert check_destructor(destructor, is_canonical=False)
    def _fit_and_score(data_name, destructor, destructor_name, n_train, random_state=0):
        """Simple function to fit and score a destructor."""
        # Fix random state of global generator so repeatable if destructors are random
        rng = check_random_state(random_state)
        old_random_state = np.random.get_state()
        np.random.seed(rng.randint(2 ** 32, dtype=np.uint32))

        try:
            # Fit destructor
            start_time = time.time()
            destructor.fit(X_train)
            train_time = time.time() - start_time
        except RuntimeError as e:
            # Handle MLPACK error
            if 'mlpack' not in str(e).lower():
                raise e
            warnings.warn('Skipping density tree destructors because of MLPACK error "%s". '
                          'Using dummy IndependentDestructor() instead.' % str(e))
            destructor = CompositeDestructor([IndependentDestructor()]).fit(X_train)
            train_time = 0
            train_score = -np.inf
            test_score = -np.inf
            score_time = 0
        else:
            # Get scores
            start_time = time.time()
            train_score = destructor.score(X_train)
            test_score = destructor.score(X_test)
            score_time = time.time() - start_time

        logger.debug(
            'train=%.3f, test=%.3f, train_time=%.3f, score_time=%.3f, destructor=%s, data_name=%s'
            % (train_score, test_score, train_time, score_time, destructor_name, data_name))

        # Reset random state
        np.random.set_state(old_random_state)
        return dict(fitted_destructor=destructor,
                    destructor_name=destructor_name,
                    train_score=train_score,
                    test_score=test_score)
def _get_model(data_name, model_name, model_kwargs):
    if 'is_test' not in model_kwargs:
        model_kwargs['is_test'] = False
    # Init destructor is shared with all models
    init_destructor = CompositeDestructor(
        destructors=[
            _get_inverse_logit_destructor(data_name),
            IndependentDestructor(independent_density=IndependentDensity(
                univariate_estimators=HistogramUnivariateDensity(
                    bins=256, bounds=[0, 1], alpha=1)))
        ],
        random_state=0,
    )

    # Setup canonical destructor for various models
    if model_name == 'deep-copula':
        deep_stop_tol = 0.001
        canonical_destructor = _get_copula_destructor()
    else:
        deep_stop_tol = 0.0001
        n_jobs = model_kwargs['n_jobs']

        # Get pair estimators (i.e. pairs of pixels in a spiral pattern)
        pair_estimators = _get_pair_estimators(data_name, n_uniq_dir=8)

        # Setup the local/pair destructor
        pair_canonical_destructor = _get_pair_canonical_destructor(model_name)

        # Setup a list of canonical destructors that destroy in each pixel direction
        canonical_destructor = [
            FeatureGroupsDestructor(
                groups_estimator=pair_estimator,
                group_canonical_destructor=clone(pair_canonical_destructor),
                n_jobs=n_jobs) for pair_estimator in pair_estimators
        ]

    # Shared DeepDestructorCV
    return DeepDestructorCV(
        init_destructor=init_destructor,
        canonical_destructor=canonical_destructor,
        stop_tol=deep_stop_tol,
        # Either n_extend or max_canonical_destructors must be None
        n_extend=1,
        cv=model_kwargs['cv'],
        refit=model_kwargs['refit'],
        silent=False,
        log_prefix='',
        random_state=0,
        # Set maximum number of layers (None for infinite)
        max_canonical_destructors=None if not model_kwargs['is_test'] else 1,
    )
def _get_copula_destructor(hist_kwargs=None):
    if hist_kwargs is None:
        hist_kwargs = dict(bins=40, bounds=[0, 1], alpha=100)
    return CompositeDestructor(
        destructors=[
            IndependentDestructor(independent_density=IndependentDensity(
                univariate_estimators=HistogramUnivariateDensity(
                    **hist_kwargs))),
            IndependentInverseCdf(),
            BestLinearReconstructionDestructor(
                linear_estimator=PCA(),
                destructor=IndependentDestructor(),
            )
        ],
        random_state=0,
    )
def _get_inverse_logit_destructor(data_name):
    if data_name == 'mnist':
        alpha = MNIST_ALPHA
    elif data_name == 'cifar10':
        alpha = CIFAR10_ALPHA
    else:
        raise ValueError('dataset should either be mnist or cifar10')
    inverse_logit = CompositeDestructor(destructors=[
        IndependentDestructor(independent_density=IndependentDensity(
            univariate_estimators=ScipyUnivariateDensity(
                scipy_rv=scipy.stats.logistic,
                scipy_fit_kwargs=dict(floc=0, fscale=1)))),
        IndependentDestructor(independent_density=IndependentDensity(
            univariate_estimators=ScipyUnivariateDensity(
                scipy_rv=scipy.stats.uniform,
                scipy_fit_kwargs=dict(floc=alpha, fscale=1 - 2 * alpha))))
    ])
    return inverse_logit
示例#7
0
def get_rbig_model(bins="auto", bounds=0.1, alpha=1e-10):

    # ================================ #
    # Step I - Marginal Uniformization
    # ================================ #

    # Choose the Histogram estimator that converts the data X to uniform U(0,1)
    univariate_estimator = HistogramUnivariateDensity(bounds=bounds,
                                                      bins=bins,
                                                      alpha=alpha)

    # Marginally uses histogram estimator
    marginal_uniformization = IndependentDensity(
        univariate_estimators=univariate_estimator)

    # Creates "Destructor" D_theta_1
    uniform_density = IndependentDestructor(marginal_uniformization)

    # ================================== #
    # Step II - Marginal Gaussianization
    # ================================== #

    # Choose destructor D_theta_2 that converts data
    marginal_gaussianization = IndependentInverseCdf()

    # =================== #
    # Step III - Rotation
    # =================== #

    # Choose a linear projection to rotate the features (PCA) "D_theta_3"
    rotation = LinearProjector(linear_estimator=PCA())

    # ==================== #
    # Composite Destructor
    # ==================== #

    # Composite Destructor
    rbig_model = CompositeDestructor([
        clone(uniform_density),  # Marginal Uniformization
        clone(marginal_gaussianization),  # Marginal Gaussianization
        clone(rotation),  # Rotation (PCA)
    ])

    return rbig_model
def _get_toy_destructors_and_names():
    # BASELINE SHALLOW DESTRUCTORS
    gaussian_full = CompositeDestructor(
        destructors=[
            LinearProjector(
                linear_estimator=PCA(),
                orthogonal=False,
            ),
            IndependentDestructor(),
        ],
    )
    mixture_20 = AutoregressiveDestructor(
        density_estimator=GaussianMixtureDensity(
            covariance_type='spherical',
            n_components=20,
        )
    )
    random_tree = CompositeDestructor(
        destructors=[
            IndependentDestructor(),
            TreeDestructor(
                tree_density=TreeDensity(
                    tree_estimator=RandomTreeEstimator(min_samples_leaf=20, max_leaf_nodes=50),
                    node_destructor=IndependentDestructor(
                        independent_density=IndependentDensity(
                            univariate_estimators=HistogramUnivariateDensity(
                                bins=10, alpha=10, bounds=[0, 1]
                            )
                        )
                    )
                )
            )
        ]
    )
    density_tree = CompositeDestructor(
        destructors=[
            IndependentDestructor(),
            TreeDestructor(
                tree_density=TreeDensity(
                    tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10),
                    uniform_weight=0.001,
                )
            )
        ]
    )
    baseline_destructors = [gaussian_full, mixture_20, random_tree, density_tree]
    baseline_names = ['Gaussian', 'Mixture', 'SingleRandTree', 'SingleDensityTree']

    # LINEAR DESTRUCTORS
    alpha_histogram = [10]  # [1, 10, 100]
    random_linear_projector = LinearProjector(
        linear_estimator=RandomOrthogonalEstimator(), orthogonal=True
    )
    canonical_histogram_destructors = [
        IndependentDestructor(
            independent_density=IndependentDensity(
                univariate_estimators=HistogramUnivariateDensity(bins=20, bounds=[0, 1], alpha=a)
            )
        )
        for a in alpha_histogram
    ]
    linear_destructors = [
        DeepDestructorCV(
            init_destructor=IndependentDestructor(),
            canonical_destructor=CompositeDestructor(destructors=[
                IndependentInverseCdf(),  # Project to inf real space
                random_linear_projector,  # Random linear projector
                IndependentDestructor(),  # Project to canonical space
                destructor,  # Histogram destructor in canonical space
            ]),
            n_extend=20,  # Need to extend since random projections
        )
        for destructor in canonical_histogram_destructors
    ]
    linear_names = ['RandLin (%g)' % a for a in alpha_histogram]

    # MIXTURE DESTRUCTORS
    fixed_weight = [0.5]  # [0.1, 0.5, 0.9]
    mixture_destructors = [
        CompositeDestructor(destructors=[
            IndependentInverseCdf(),
            AutoregressiveDestructor(
                density_estimator=FirstFixedGaussianMixtureDensity(
                    covariance_type='spherical',
                    n_components=20,
                    fixed_weight=w,
                )
            )
        ])
        for w in fixed_weight
    ]
    # Make deep destructors
    mixture_destructors = [
        DeepDestructorCV(
            init_destructor=IndependentDestructor(),
            canonical_destructor=destructor,
            n_extend=5,
        )
        for destructor in mixture_destructors
    ]
    mixture_names = ['GausMix (%.2g)' % w for w in fixed_weight]

    # TREE DESTRUCTORS
    # Random trees
    histogram_alpha = [10]  # [1, 10, 100]
    tree_destructors = [
        TreeDestructor(
            tree_density=TreeDensity(
                tree_estimator=RandomTreeEstimator(
                    max_leaf_nodes=4
                ),
                node_destructor=IndependentDestructor(
                    independent_density=IndependentDensity(
                        univariate_estimators=HistogramUnivariateDensity(
                            alpha=a, bins=10, bounds=[0, 1]
                        )
                    )
                ),
            )
        )
        for a in histogram_alpha
    ]
    tree_names = ['RandTree (%g)' % a for a in histogram_alpha]

    # Density trees using mlpack
    tree_uniform_weight = [0.5]  # [0.1, 0.5, 0.9]
    tree_destructors.extend([
        TreeDestructor(
            tree_density=TreeDensity(
                tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10),
                uniform_weight=w,
            )
        )
        for w in tree_uniform_weight
    ])
    tree_names.extend(['DensityTree (%.2g)' % w for w in tree_uniform_weight])

    # Add random rotation to tree destructors
    tree_destructors = [
        CompositeDestructor(destructors=[
            IndependentInverseCdf(),
            LinearProjector(linear_estimator=RandomOrthogonalEstimator()),
            IndependentDestructor(),
            destructor,
        ])
        for destructor in tree_destructors
    ]

    # Make deep destructors
    tree_destructors = [
        DeepDestructorCV(
            init_destructor=IndependentDestructor(),
            canonical_destructor=destructor,
            # Density trees don't need to extend as much as random trees
            n_extend=50 if 'Rand' in name else 5,
        )
        for destructor, name in zip(tree_destructors, tree_names)
    ]
    # Collect all destructors and set CV parameter
    destructors = baseline_destructors + linear_destructors + mixture_destructors + tree_destructors
    destructor_names = baseline_names + linear_names + mixture_names + tree_names
    for d in destructors:
        if 'cv' in d.get_params():
            d.set_params(cv=cv)
        # **** Change from notebook to make faster ****
        if 'max_canonical_destructors' in d.get_params():
            d.set_params(max_canonical_destructors=1)

    return destructors, destructor_names