def test_histogram_univariate_destructor(): destructor = IndependentDestructor(independent_density=IndependentDensity( # Note only one univariate estimator univariate_estimators=[ HistogramUnivariateDensity(bins=4, alpha=10, bounds=[0, 1]) ])) assert check_destructor(destructor)
def get_rbig_cvmodel(n_layers=50, bins="auto", bounds=0.1, alpha=1e-10, random_state=123, init=None, **kwargs): if init is not None: # Choose the Histogram estimator that converts the data X to uniform U(0,1) univariate_estimator = HistogramUnivariateDensity(bounds=bounds, bins=bins, alpha=alpha) # Marginally uses histogram estimator marginal_uniformization = IndependentDensity( univariate_estimators=univariate_estimator) # Creates "Destructor" D_theta_1 init = IndependentDestructor(marginal_uniformization) deep_rbig_model = get_deep_rbig_model( n_layers=n_layers, bins=bins, bounds=bounds, alpha=alpha, random_state=random_state, ) # Initialize Deep RBIG CV Model deep_rbig_cvmodel = DeepDestructorCV(init_destructor=clone(init), canonical_destructor=deep_rbig_model, **kwargs) return deep_rbig_cvmodel
def test_autoregressive_mixture_density(): """Test generic mixture density.""" density = _MixtureDensity( cluster_estimator=KMeans(n_clusters=2, random_state=0), component_density_estimator=IndependentDensity() ) assert check_density(density)
def test_histogram_multivariate_density(): density = IndependentDensity( univariate_estimators=HistogramUnivariateDensity( bins=10, alpha=10, bounds=[0, 1] ) ) assert check_density(density)
def _get_inverse_logit_destructor(data_name): if data_name == 'mnist': alpha = MNIST_ALPHA elif data_name == 'cifar10': alpha = CIFAR10_ALPHA else: raise ValueError('dataset should either be mnist or cifar10') inverse_logit = CompositeDestructor(destructors=[ IndependentDestructor(independent_density=IndependentDensity( univariate_estimators=ScipyUnivariateDensity( scipy_rv=scipy.stats.logistic, scipy_fit_kwargs=dict(floc=0, fscale=1)))), IndependentDestructor(independent_density=IndependentDensity( univariate_estimators=ScipyUnivariateDensity( scipy_rv=scipy.stats.uniform, scipy_fit_kwargs=dict(floc=alpha, fscale=1 - 2 * alpha)))) ]) return inverse_logit
def test_tree_destructor_with_node_destructor(): node_tree_destructor = IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( bins=10, alpha=100, bounds=[0, 1]))) for node_destructor in [IdentityDestructor(), node_tree_destructor]: destructor = TreeDestructor(tree_density=TreeDensity( tree_estimator=RandomTreeEstimator(max_leaf_nodes=3, random_state=0), node_destructor=node_destructor, uniform_weight=0.9, )) assert check_destructor(destructor)
def _get_model(data_name, model_name, model_kwargs): if 'is_test' not in model_kwargs: model_kwargs['is_test'] = False # Init destructor is shared with all models init_destructor = CompositeDestructor( destructors=[ _get_inverse_logit_destructor(data_name), IndependentDestructor(independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( bins=256, bounds=[0, 1], alpha=1))) ], random_state=0, ) # Setup canonical destructor for various models if model_name == 'deep-copula': deep_stop_tol = 0.001 canonical_destructor = _get_copula_destructor() else: deep_stop_tol = 0.0001 n_jobs = model_kwargs['n_jobs'] # Get pair estimators (i.e. pairs of pixels in a spiral pattern) pair_estimators = _get_pair_estimators(data_name, n_uniq_dir=8) # Setup the local/pair destructor pair_canonical_destructor = _get_pair_canonical_destructor(model_name) # Setup a list of canonical destructors that destroy in each pixel direction canonical_destructor = [ FeatureGroupsDestructor( groups_estimator=pair_estimator, group_canonical_destructor=clone(pair_canonical_destructor), n_jobs=n_jobs) for pair_estimator in pair_estimators ] # Shared DeepDestructorCV return DeepDestructorCV( init_destructor=init_destructor, canonical_destructor=canonical_destructor, stop_tol=deep_stop_tol, # Either n_extend or max_canonical_destructors must be None n_extend=1, cv=model_kwargs['cv'], refit=model_kwargs['refit'], silent=False, log_prefix='', random_state=0, # Set maximum number of layers (None for infinite) max_canonical_destructors=None if not model_kwargs['is_test'] else 1, )
def _get_copula_destructor(hist_kwargs=None): if hist_kwargs is None: hist_kwargs = dict(bins=40, bounds=[0, 1], alpha=100) return CompositeDestructor( destructors=[ IndependentDestructor(independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( **hist_kwargs))), IndependentInverseCdf(), BestLinearReconstructionDestructor( linear_estimator=PCA(), destructor=IndependentDestructor(), ) ], random_state=0, )
def get_rbig_model(bins="auto", bounds=0.1, alpha=1e-10): # ================================ # # Step I - Marginal Uniformization # ================================ # # Choose the Histogram estimator that converts the data X to uniform U(0,1) univariate_estimator = HistogramUnivariateDensity(bounds=bounds, bins=bins, alpha=alpha) # Marginally uses histogram estimator marginal_uniformization = IndependentDensity( univariate_estimators=univariate_estimator) # Creates "Destructor" D_theta_1 uniform_density = IndependentDestructor(marginal_uniformization) # ================================== # # Step II - Marginal Gaussianization # ================================== # # Choose destructor D_theta_2 that converts data marginal_gaussianization = IndependentInverseCdf() # =================== # # Step III - Rotation # =================== # # Choose a linear projection to rotate the features (PCA) "D_theta_3" rotation = LinearProjector(linear_estimator=PCA()) # ==================== # # Composite Destructor # ==================== # # Composite Destructor rbig_model = CompositeDestructor([ clone(uniform_density), # Marginal Uniformization clone(marginal_gaussianization), # Marginal Gaussianization clone(rotation), # Rotation (PCA) ]) return rbig_model
def test_inverse_canonical_destructor(): rng = check_random_state(0) fitted_canonical_destructor = IdentityDestructor().fit(rng.rand(10, 2)) destructor = get_inverse_canonical_destructor(fitted_canonical_destructor) assert check_destructor(destructor) # Alpha must be high to pass the identity test fitted_canonical_destructor = get_inverse_canonical_destructor( TreeDestructor(TreeDensity(uniform_weight=0.99)).fit(rng.rand(10, 2)) ) destructor = get_inverse_canonical_destructor(fitted_canonical_destructor) assert check_destructor(destructor) # Alpha must be high to pass the identity test fitted_canonical_destructor = IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity(bins=10, alpha=1000, bounds=[0, 1]) ) ).fit(rng.rand(10, 2)) destructor = get_inverse_canonical_destructor(fitted_canonical_destructor) assert check_destructor(destructor)
def _get_toy_destructors_and_names(): # BASELINE SHALLOW DESTRUCTORS gaussian_full = CompositeDestructor( destructors=[ LinearProjector( linear_estimator=PCA(), orthogonal=False, ), IndependentDestructor(), ], ) mixture_20 = AutoregressiveDestructor( density_estimator=GaussianMixtureDensity( covariance_type='spherical', n_components=20, ) ) random_tree = CompositeDestructor( destructors=[ IndependentDestructor(), TreeDestructor( tree_density=TreeDensity( tree_estimator=RandomTreeEstimator(min_samples_leaf=20, max_leaf_nodes=50), node_destructor=IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( bins=10, alpha=10, bounds=[0, 1] ) ) ) ) ) ] ) density_tree = CompositeDestructor( destructors=[ IndependentDestructor(), TreeDestructor( tree_density=TreeDensity( tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10), uniform_weight=0.001, ) ) ] ) baseline_destructors = [gaussian_full, mixture_20, random_tree, density_tree] baseline_names = ['Gaussian', 'Mixture', 'SingleRandTree', 'SingleDensityTree'] # LINEAR DESTRUCTORS alpha_histogram = [10] # [1, 10, 100] random_linear_projector = LinearProjector( linear_estimator=RandomOrthogonalEstimator(), orthogonal=True ) canonical_histogram_destructors = [ IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity(bins=20, bounds=[0, 1], alpha=a) ) ) for a in alpha_histogram ] linear_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=CompositeDestructor(destructors=[ IndependentInverseCdf(), # Project to inf real space random_linear_projector, # Random linear projector IndependentDestructor(), # Project to canonical space destructor, # Histogram destructor in canonical space ]), n_extend=20, # Need to extend since random projections ) for destructor in canonical_histogram_destructors ] linear_names = ['RandLin (%g)' % a for a in alpha_histogram] # MIXTURE DESTRUCTORS fixed_weight = [0.5] # [0.1, 0.5, 0.9] mixture_destructors = [ CompositeDestructor(destructors=[ IndependentInverseCdf(), AutoregressiveDestructor( density_estimator=FirstFixedGaussianMixtureDensity( covariance_type='spherical', n_components=20, fixed_weight=w, ) ) ]) for w in fixed_weight ] # Make deep destructors mixture_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=destructor, n_extend=5, ) for destructor in mixture_destructors ] mixture_names = ['GausMix (%.2g)' % w for w in fixed_weight] # TREE DESTRUCTORS # Random trees histogram_alpha = [10] # [1, 10, 100] tree_destructors = [ TreeDestructor( tree_density=TreeDensity( tree_estimator=RandomTreeEstimator( max_leaf_nodes=4 ), node_destructor=IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( alpha=a, bins=10, bounds=[0, 1] ) ) ), ) ) for a in histogram_alpha ] tree_names = ['RandTree (%g)' % a for a in histogram_alpha] # Density trees using mlpack tree_uniform_weight = [0.5] # [0.1, 0.5, 0.9] tree_destructors.extend([ TreeDestructor( tree_density=TreeDensity( tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10), uniform_weight=w, ) ) for w in tree_uniform_weight ]) tree_names.extend(['DensityTree (%.2g)' % w for w in tree_uniform_weight]) # Add random rotation to tree destructors tree_destructors = [ CompositeDestructor(destructors=[ IndependentInverseCdf(), LinearProjector(linear_estimator=RandomOrthogonalEstimator()), IndependentDestructor(), destructor, ]) for destructor in tree_destructors ] # Make deep destructors tree_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=destructor, # Density trees don't need to extend as much as random trees n_extend=50 if 'Rand' in name else 5, ) for destructor, name in zip(tree_destructors, tree_names) ] # Collect all destructors and set CV parameter destructors = baseline_destructors + linear_destructors + mixture_destructors + tree_destructors destructor_names = baseline_names + linear_names + mixture_names + tree_names for d in destructors: if 'cv' in d.get_params(): d.set_params(cv=cv) # **** Change from notebook to make faster **** if 'max_canonical_destructors' in d.get_params(): d.set_params(max_canonical_destructors=1) return destructors, destructor_names