def test_pca_destructor(): destructor = CompositeDestructor(destructors=[ LinearProjector( linear_estimator=PCA(), orthogonal=False, ), IndependentDestructor(), ], ) assert check_destructor(destructor, is_canonical=False)
def test_random_linear_householder_destructor(): destructor = CompositeDestructor( destructors=[ LinearProjector( # Since n_components=1, a Householder matrix will be used linear_estimator=RandomOrthogonalEstimator(n_components=1), orthogonal=False, ), IndependentDestructor(), ], ) assert check_destructor(destructor, is_canonical=False)
def _fit_and_score(data_name, destructor, destructor_name, n_train, random_state=0): """Simple function to fit and score a destructor.""" # Fix random state of global generator so repeatable if destructors are random rng = check_random_state(random_state) old_random_state = np.random.get_state() np.random.seed(rng.randint(2 ** 32, dtype=np.uint32)) try: # Fit destructor start_time = time.time() destructor.fit(X_train) train_time = time.time() - start_time except RuntimeError as e: # Handle MLPACK error if 'mlpack' not in str(e).lower(): raise e warnings.warn('Skipping density tree destructors because of MLPACK error "%s". ' 'Using dummy IndependentDestructor() instead.' % str(e)) destructor = CompositeDestructor([IndependentDestructor()]).fit(X_train) train_time = 0 train_score = -np.inf test_score = -np.inf score_time = 0 else: # Get scores start_time = time.time() train_score = destructor.score(X_train) test_score = destructor.score(X_test) score_time = time.time() - start_time logger.debug( 'train=%.3f, test=%.3f, train_time=%.3f, score_time=%.3f, destructor=%s, data_name=%s' % (train_score, test_score, train_time, score_time, destructor_name, data_name)) # Reset random state np.random.set_state(old_random_state) return dict(fitted_destructor=destructor, destructor_name=destructor_name, train_score=train_score, test_score=test_score)
def _get_model(data_name, model_name, model_kwargs): if 'is_test' not in model_kwargs: model_kwargs['is_test'] = False # Init destructor is shared with all models init_destructor = CompositeDestructor( destructors=[ _get_inverse_logit_destructor(data_name), IndependentDestructor(independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( bins=256, bounds=[0, 1], alpha=1))) ], random_state=0, ) # Setup canonical destructor for various models if model_name == 'deep-copula': deep_stop_tol = 0.001 canonical_destructor = _get_copula_destructor() else: deep_stop_tol = 0.0001 n_jobs = model_kwargs['n_jobs'] # Get pair estimators (i.e. pairs of pixels in a spiral pattern) pair_estimators = _get_pair_estimators(data_name, n_uniq_dir=8) # Setup the local/pair destructor pair_canonical_destructor = _get_pair_canonical_destructor(model_name) # Setup a list of canonical destructors that destroy in each pixel direction canonical_destructor = [ FeatureGroupsDestructor( groups_estimator=pair_estimator, group_canonical_destructor=clone(pair_canonical_destructor), n_jobs=n_jobs) for pair_estimator in pair_estimators ] # Shared DeepDestructorCV return DeepDestructorCV( init_destructor=init_destructor, canonical_destructor=canonical_destructor, stop_tol=deep_stop_tol, # Either n_extend or max_canonical_destructors must be None n_extend=1, cv=model_kwargs['cv'], refit=model_kwargs['refit'], silent=False, log_prefix='', random_state=0, # Set maximum number of layers (None for infinite) max_canonical_destructors=None if not model_kwargs['is_test'] else 1, )
def _get_copula_destructor(hist_kwargs=None): if hist_kwargs is None: hist_kwargs = dict(bins=40, bounds=[0, 1], alpha=100) return CompositeDestructor( destructors=[ IndependentDestructor(independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( **hist_kwargs))), IndependentInverseCdf(), BestLinearReconstructionDestructor( linear_estimator=PCA(), destructor=IndependentDestructor(), ) ], random_state=0, )
def _get_inverse_logit_destructor(data_name): if data_name == 'mnist': alpha = MNIST_ALPHA elif data_name == 'cifar10': alpha = CIFAR10_ALPHA else: raise ValueError('dataset should either be mnist or cifar10') inverse_logit = CompositeDestructor(destructors=[ IndependentDestructor(independent_density=IndependentDensity( univariate_estimators=ScipyUnivariateDensity( scipy_rv=scipy.stats.logistic, scipy_fit_kwargs=dict(floc=0, fscale=1)))), IndependentDestructor(independent_density=IndependentDensity( univariate_estimators=ScipyUnivariateDensity( scipy_rv=scipy.stats.uniform, scipy_fit_kwargs=dict(floc=alpha, fscale=1 - 2 * alpha)))) ]) return inverse_logit
def get_rbig_model(bins="auto", bounds=0.1, alpha=1e-10): # ================================ # # Step I - Marginal Uniformization # ================================ # # Choose the Histogram estimator that converts the data X to uniform U(0,1) univariate_estimator = HistogramUnivariateDensity(bounds=bounds, bins=bins, alpha=alpha) # Marginally uses histogram estimator marginal_uniformization = IndependentDensity( univariate_estimators=univariate_estimator) # Creates "Destructor" D_theta_1 uniform_density = IndependentDestructor(marginal_uniformization) # ================================== # # Step II - Marginal Gaussianization # ================================== # # Choose destructor D_theta_2 that converts data marginal_gaussianization = IndependentInverseCdf() # =================== # # Step III - Rotation # =================== # # Choose a linear projection to rotate the features (PCA) "D_theta_3" rotation = LinearProjector(linear_estimator=PCA()) # ==================== # # Composite Destructor # ==================== # # Composite Destructor rbig_model = CompositeDestructor([ clone(uniform_density), # Marginal Uniformization clone(marginal_gaussianization), # Marginal Gaussianization clone(rotation), # Rotation (PCA) ]) return rbig_model
def _get_toy_destructors_and_names(): # BASELINE SHALLOW DESTRUCTORS gaussian_full = CompositeDestructor( destructors=[ LinearProjector( linear_estimator=PCA(), orthogonal=False, ), IndependentDestructor(), ], ) mixture_20 = AutoregressiveDestructor( density_estimator=GaussianMixtureDensity( covariance_type='spherical', n_components=20, ) ) random_tree = CompositeDestructor( destructors=[ IndependentDestructor(), TreeDestructor( tree_density=TreeDensity( tree_estimator=RandomTreeEstimator(min_samples_leaf=20, max_leaf_nodes=50), node_destructor=IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( bins=10, alpha=10, bounds=[0, 1] ) ) ) ) ) ] ) density_tree = CompositeDestructor( destructors=[ IndependentDestructor(), TreeDestructor( tree_density=TreeDensity( tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10), uniform_weight=0.001, ) ) ] ) baseline_destructors = [gaussian_full, mixture_20, random_tree, density_tree] baseline_names = ['Gaussian', 'Mixture', 'SingleRandTree', 'SingleDensityTree'] # LINEAR DESTRUCTORS alpha_histogram = [10] # [1, 10, 100] random_linear_projector = LinearProjector( linear_estimator=RandomOrthogonalEstimator(), orthogonal=True ) canonical_histogram_destructors = [ IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity(bins=20, bounds=[0, 1], alpha=a) ) ) for a in alpha_histogram ] linear_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=CompositeDestructor(destructors=[ IndependentInverseCdf(), # Project to inf real space random_linear_projector, # Random linear projector IndependentDestructor(), # Project to canonical space destructor, # Histogram destructor in canonical space ]), n_extend=20, # Need to extend since random projections ) for destructor in canonical_histogram_destructors ] linear_names = ['RandLin (%g)' % a for a in alpha_histogram] # MIXTURE DESTRUCTORS fixed_weight = [0.5] # [0.1, 0.5, 0.9] mixture_destructors = [ CompositeDestructor(destructors=[ IndependentInverseCdf(), AutoregressiveDestructor( density_estimator=FirstFixedGaussianMixtureDensity( covariance_type='spherical', n_components=20, fixed_weight=w, ) ) ]) for w in fixed_weight ] # Make deep destructors mixture_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=destructor, n_extend=5, ) for destructor in mixture_destructors ] mixture_names = ['GausMix (%.2g)' % w for w in fixed_weight] # TREE DESTRUCTORS # Random trees histogram_alpha = [10] # [1, 10, 100] tree_destructors = [ TreeDestructor( tree_density=TreeDensity( tree_estimator=RandomTreeEstimator( max_leaf_nodes=4 ), node_destructor=IndependentDestructor( independent_density=IndependentDensity( univariate_estimators=HistogramUnivariateDensity( alpha=a, bins=10, bounds=[0, 1] ) ) ), ) ) for a in histogram_alpha ] tree_names = ['RandTree (%g)' % a for a in histogram_alpha] # Density trees using mlpack tree_uniform_weight = [0.5] # [0.1, 0.5, 0.9] tree_destructors.extend([ TreeDestructor( tree_density=TreeDensity( tree_estimator=MlpackDensityTreeEstimator(min_samples_leaf=10), uniform_weight=w, ) ) for w in tree_uniform_weight ]) tree_names.extend(['DensityTree (%.2g)' % w for w in tree_uniform_weight]) # Add random rotation to tree destructors tree_destructors = [ CompositeDestructor(destructors=[ IndependentInverseCdf(), LinearProjector(linear_estimator=RandomOrthogonalEstimator()), IndependentDestructor(), destructor, ]) for destructor in tree_destructors ] # Make deep destructors tree_destructors = [ DeepDestructorCV( init_destructor=IndependentDestructor(), canonical_destructor=destructor, # Density trees don't need to extend as much as random trees n_extend=50 if 'Rand' in name else 5, ) for destructor, name in zip(tree_destructors, tree_names) ] # Collect all destructors and set CV parameter destructors = baseline_destructors + linear_destructors + mixture_destructors + tree_destructors destructor_names = baseline_names + linear_names + mixture_names + tree_names for d in destructors: if 'cv' in d.get_params(): d.set_params(cv=cv) # **** Change from notebook to make faster **** if 'max_canonical_destructors' in d.get_params(): d.set_params(max_canonical_destructors=1) return destructors, destructor_names