Exemplo n.º 1
0
    def fit(self,
            X,
            y,
            cats,
            inference_type='advi',
            minibatch_size=None,
            inference_args=None):
        """
        Train the Hierarchical Logistic Regression model

        Parameters
        ----------
        X : numpy array, shape [n_samples, n_features]

        y : numpy array, shape [n_samples, ]

        cats : numpy array, shape [n_samples, ]

        inference_type : string, specifies which inference method to call. Defaults to 'advi'. Currently, only 'advi' and 'nuts' are supported

        minibatch_size : number of samples to include in each minibatch for ADVI, defaults to None, so minibatch is not run by default

        inference_args : dict, arguments to be passed to the inference methods. Check the PyMC3 docs for permissable values. If no arguments are specified, default values will be set.
        """
        self.num_cats = len(np.unique(cats))
        self.num_training_samples, self.num_pred = X.shape

        self.inference_type = inference_type

        if y.ndim != 1:
            y = np.squeeze(y)

        if not inference_args:
            inference_args = self._set_default_inference_args()

        if self.cached_model is None:
            self.cached_model = self.create_model()

        if minibatch_size:
            with self.cached_model:
                minibatches = {
                    self.shared_vars['model_input']:
                    pm.Minibatch(X, batch_size=minibatch_size),
                    self.shared_vars['model_output']:
                    pm.Minibatch(y, batch_size=minibatch_size),
                    self.shared_vars['model_cats']:
                    pm.Minibatch(cats, batch_size=minibatch_size)
                }

                inference_args['more_replacements'] = minibatches
        else:
            self._set_shared_vars({
                'model_input': X,
                'model_output': y,
                'model_cats': cats
            })

        self._inference(inference_type, inference_args)

        return self
Exemplo n.º 2
0
    def fit(self,
            X,
            y,
            inference_type='advi',
            minibatch_size=None,
            inference_args=None):
        """
        Train the Naive Bayes model.

        Parameters
        ----------
        X : numpy array, shape [num_training_samples, num_pred].
           Contains the data points.

        y : numpy array, shape [num_training_samples,].
           Contains the category of the data points.

        inference_type : string, specifies which inference method to call.
            Default is 'advi'. Currently, only 'advi' and 'nuts'
            are implemented.

        minibatch_size : int, number of samples to include in each minibatch
            for ADVI. Defaults to None so minibatch is not run by default.

        inference_args : dict, arguments to be passed to the inference methods.
            Check the PyMC3 documentation.

        Returns
        -------
        The current instance of the GaussianNB class.
        """
        self.num_training_samples, self.num_pred = X.shape
        self.num_cats = len(np.unique(y))
        self.inference_type = inference_type

        if not inference_args:
            inference_args = self._set_default_inference_args()

        if not self.cached_model:
            self.cached_model = self.create_model()

        if minibatch_size:
            with self.cached_model:
                minibatches = {
                    self.shared_vars['model_input']:
                    pm.Minibatch(X, batch_size=minibatch_size),
                    self.shared_vars['model_output']:
                    pm.Minibatch(y, batch_size=minibatch_size),
                }

                inference_args['more_replacements'] = minibatches
        else:
            self._set_shared_vars({'model_input': X, 'model_output': y})

        self._inference(inference_type, inference_args)

        return self
Exemplo n.º 3
0
def main():
    if len(sys.argv) < 2 or len(sys.argv) > 3:
        print(
            'usage: python3 inference_dir.py [chain no] [optional output no]')
        sys.exit()
    elif len(sys.argv) == 2:
        c = int(sys.argv[1])
        d = int(sys.argv[1])
    if len(sys.argv) == 3:
        c = int(sys.argv[1])
        d = int(sys.argv[2])
    np.random.seed(c)
    np.random.shuffle(lang_ind)
    np.random.shuffle(sound_ind)
    lang_minibatch = pm.Minibatch(lang_ind, 500)
    sound_minibatch = pm.Minibatch(sound_ind, 500)
    model_ln = pm.Model()
    with model_ln:
        beta = pm.HalfFlat('beta')
        "theta = language-level prior over components"
        theta = tt.stack([
            pm.Dirichlet('theta_{}'.format(l), a=tt.ones(K) * beta, shape=K)
            for l in range(L)
        ])
        psi = [
            pm.MvNormal('psi_{}'.format(k), mu=[0] * S, cov=Sigma, shape=S)
            for k in range(K)
        ]
        "phi = component-level collection of distributions over sound change"
        phi = tt.stack([
            tt.concatenate([
                pm.Deterministic(
                    'phi_{}_{}'.format(k, x),
                    tt.nnet.softmax(psi[k][s_breaks[x][0]:s_breaks[x][1]])[0])
                for x in range(X)
            ]) for k in range(K)
        ])
        target = pm.DensityDist('target',
                                logprob(theta=theta, phi=phi),
                                observed=dict(lang_array=lang_minibatch,
                                              sound_array=sound_minibatch),
                                total_size=N)
        inference_ln = pm.ADVI()
        inference_ln.fit(50000,
                         obj_optimizer=pm.adam(learning_rate=.01,
                                               beta1=uniform(.7, .9)),
                         callbacks=[pm.callbacks.CheckParametersConvergence()])
        trace_ln = inference_ln.approx.sample()
        posterior = {
            k: trace_ln[k]
            for k in trace_ln.varnames if not k.endswith('__')
        }
        posterior['ELBO'] = inference_ln.hist
        f = open('posterior_ln_shuffle_{}.pkl'.format(d), 'wb')
        pkl.dump(posterior, f)
        f.close()
Exemplo n.º 4
0
def main():
    if len(sys.argv) < 2 or len(sys.argv) > 3:
        print(
            'usage: python3 inference_dir.py [chain no] [optional output no]')
        sys.exit()
    elif len(sys.argv) == 2:
        c = int(sys.argv[1])
        d = int(sys.argv[1])
    if len(sys.argv) == 3:
        c = int(sys.argv[1])
        d = int(sys.argv[2])
    np.random.seed(c)
    lang_minibatch = pm.Minibatch(lang_ind, 500)
    sound_minibatch = pm.Minibatch(sound_ind, 500)
    model_dir = pm.Model()
    with model_dir:
        beta = pm.HalfFlat('beta')
        "theta = language-level prior over components"
        theta = tt.stack([
            pm.Dirichlet('theta_{}'.format(l), a=tt.ones(K) * beta, shape=K)
            for l in range(L)
        ])
        phi = tt.stack([
            tt.concatenate([
                pm.Dirichlet('phi_{}_{}'.format(k, x),
                             a=tt.ones(R[x]) * alpha,
                             shape=R[x]) for x in range(X)
            ]) for k in range(K)
        ])
        target = pm.DensityDist('target',
                                logprob(theta=theta, phi=phi),
                                observed=dict(lang_array=lang_minibatch,
                                              sound_array=sound_minibatch),
                                total_size=N)
        inference_dir = pm.ADVI()
        inference_dir.fit(
            50000,
            obj_optimizer=pm.adam(learning_rate=.01, beta1=uniform(.7, .9)),
            callbacks=[pm.callbacks.CheckParametersConvergence()])
        trace_dir = inference_dir.approx.sample()
        posterior = {
            k: trace_dir[k]
            for k in trace_dir.varnames if not k.endswith('__')
        }
        posterior['ELBO'] = inference_dir.hist
        f = open('posterior_dir_{}.pkl'.format(d), 'wb')
        pkl.dump(posterior, f)
        f.close()
Exemplo n.º 5
0
 def test_cloning_available(self):
     gop = pm.Minibatch(np.arange(100), 1)
     res = gop ** 2
     shared = theano.shared(np.array([10]))
     res1 = theano.clone(res, {gop: shared})
     f = theano.function([], res1)
     assert f() == np.array([100])
Exemplo n.º 6
0
def construct_bayes_model(nems_model,
                          signals,
                          pred_name,
                          resp_name,
                          batches=None):
    '''
    Builds the Bayesian version of the NEMS model. This essentially converts the
    NEMS set of modules into a symbolic evaluation graph that is used for
    maximizing the likelihood of a Poisson prior.
    '''
    signals = signals.copy()
    nems_priors = nems_model.get_priors(signals)

    # Now, batch the signal if requested. The get_priors code typically doesn't
    # work with batched tensors, so we need to do this *after* getting the
    # priors.
    if batches is not None:
        for k, v in signals.items():
            signals[k] = mc.Minibatch(v, batch_size=batches)

    with mc.Model() as mc_model:
        mc_priors = construct_priors(nems_priors)
        tensors = nems_model.generate_tensor(signals, mc_priors)
        pred = tensors[pred_name]
        obs = tensors[resp_name]
        likelihood = mc.Poisson('likelihood', mu=pred, observed=obs)
    return mc_model
def generate_groups_data_matrix_minibatch(groups, n_mi, s_mi):

    groups['train']['n_series_idx'] = pm.Minibatch(
        groups['train']['n_series_idx'], s_mi)

    for group in groups['train']['groups_idx'].keys():
        groups['train']['groups_idx'][group] = pm.Minibatch(
            groups['train']['groups_idx'][group], s_mi)

    groups['train']['data'] = pm.Minibatch(groups['train']['data'],
                                           ((n_mi, s_mi)))

    X = np.arange(groups['train']['n']).reshape(-1, 1)
    X_mi = pm.Minibatch(X.ravel(), n_mi).reshape((-1, 1))

    return groups, X_mi
def test_vae():
    minibatch_size = 10
    data = pm.floatX(np.random.rand(100))
    x_mini = pm.Minibatch(data, minibatch_size)
    x_inp = tt.vector()
    x_inp.tag.test_value = data[:minibatch_size]

    ae = theano.shared(pm.floatX([.1, .1]))
    be = theano.shared(pm.floatX(1.))

    ad = theano.shared(pm.floatX(1.))
    bd = theano.shared(pm.floatX(1.))

    enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be
    mu,  rho = enc[:, 0], enc[:, 1]

    with pm.Model():
        # Hidden variables
        zs = pm.Normal('zs', mu=0, sigma=1, shape=minibatch_size)
        dec = zs * ad + bd
        # Observation model
        pm.Normal('xs_', mu=dec, sigma=0.1, observed=x_inp)

        pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)},
               more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd])
Exemplo n.º 9
0
Arquivo: pmlp.py Projeto: kqf/lish-moa
 def predict_proba(self, X):
     if self.sample is None:
         raise NotFittedError("Please call model.fit(X, y) first")
     minibatch_x = pm.Minibatch(X, batch_size=self.batch_size)
     samples = self.sample(minibatch_x, self.inf_samples)
     # Average over inf_samples dimension
     return samples.mean(0)
Exemplo n.º 10
0
 def test_align(self):
     m = pm.Minibatch(np.arange(1000), 1, random_seed=1)
     n = pm.Minibatch(np.arange(1000), 1, random_seed=1)
     f = theano.function([], [m, n])
     n.eval()  # not aligned
     a, b = zip(*(f() for _ in range(1000)))
     assert a != b
     pm.align_minibatches()
     a, b = zip(*(f() for _ in range(1000)))
     assert a == b
     n.eval()  # not aligned
     pm.align_minibatches([m])
     a, b = zip(*(f() for _ in range(1000)))
     assert a != b
     pm.align_minibatches([m, n])
     a, b = zip(*(f() for _ in range(1000)))
     assert a == b
 def fit(self, X, y, n=200000, batch_size=10):
     """
     Train the Bayesian NN model.
     """
     num_samples, self.num_pred = X.shape
     
     if self.cached_model is None:
         self.cached_model = self.create_model()
         
     with self.cached_model:
         minibatches = {
             self.shared_vars['model_input']: pm.Minibatch(X, batch_size=batch_size),
             self.shared_vars['model_output']: pm.Minibatch(y, batch_size=batch_size),
         }
         self._inference(minibatches, n)
         
     return self
Exemplo n.º 12
0
    def fit(self,
            x,
            y,
            epochs=30000,
            method='advi',
            batch_size=128,
            n_models=1,
            **sample_kwargs):
        """

        :param x:
        :param y:
        :param epochs:
        :param method:
        :param batch_size: int or array. For hierarchical models, batch along the second dimension (e.g., [None, 128])
        :param sample_kwargs:
        :return:
        """
        self.train_x = x
        with self.model:
            if method == 'nuts':
                # self.x.set_value(x)
                # self.y.set_value(y)
                for _ in range(n_models):
                    self.trace.append(pm.sample(epochs, **sample_kwargs))
            else:
                mini_x = pm.Minibatch(x, batch_size=batch_size, dtype=floatX)
                mini_y = pm.Minibatch(y, batch_size=batch_size, dtype=floatX)

                if method == 'advi':
                    inference = pm.ADVI()
                elif method == 'svgd':
                    inference = pm.SVGD()
                for _ in range(n_models):
                    approx = pm.fit(n=epochs,
                                    method=inference,
                                    more_replacements={
                                        self.x: mini_x,
                                        self.y: mini_y
                                    },
                                    **sample_kwargs)
                    self.trace.append(approx.sample(draws=20000))
                    self.approx.append(approx)
Exemplo n.º 13
0
Arquivo: model.py Projeto: jancr/ppv
    def __init__(self, df, true_prior=None, mini_batch=0):
        self.df = df
        self.true_prior = true_prior
        self.predictors = df.ppv.predictors
        self.target = df.ppv.target
        if not isinstance(mini_batch, int) or mini_batch < 0:
            raise ValueError("mini_batch must be a positive integer, not {}".format(mini_batch))
        self.mini_batch = mini_batch

        # scale data
        self.meanx = self.predictors.mean()
        self.scalex = self.predictors.std()
        zX, y = self._prep_data()

        if self.mini_batch:
            zX = pm.Minibatch(zX, batch_size=self.mini_batch)
            y = pm.Minibatch(y, batch_size=self.mini_batch)
        self.model = self._create_model(zX, y)

        # inferred from trace
        self.trace = self.intercept = self.parameters = None
Exemplo n.º 14
0
    def fit(self, X, y, cats, n=200000, batch_size=100):
        """
        Train the HLR model

        Parameters
        ----------
        X : numpy array, shape [n_samples, n_features]

        y : numpy array, shape [n_samples, ]

        cats: numpy array, shape [n_samples, ]

        n: number of iterations for ADVI fit, defaults to 200000

        batch_size: number of samples to include in each minibatch for ADVI, defaults to 100
        """
        self.num_cats = len(np.unique(cats))
        num_samples, self.num_pred = X.shape

        if self.cached_model is None:
            self.cached_model = self.create_model()

        with self.cached_model:

            minibatches = {
                self.shared_vars['model_input']:
                pm.Minibatch(X, batch_size=batch_size),
                self.shared_vars['model_output']:
                pm.Minibatch(y, batch_size=batch_size),
                self.shared_vars['model_cats']:
                pm.Minibatch(cats, batch_size=batch_size)
            }

            self._inference(minibatches, n)

        return self
Exemplo n.º 15
0
def simple_model_data(using_minibatch):
    n = 1000
    sd0 = 2.
    mu0 = 4.
    sd = 3.
    mu = -5.

    data = sd * np.random.randn(n) + mu
    d = n / sd**2 + 1 / sd0**2
    mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d
    if using_minibatch:
        data = pm.Minibatch(data)
    return dict(
        n=n,
        data=data,
        mu_post=mu_post,
        d=d,
        mu0=mu0,
        sd0=sd0,
        sd=sd,
    )
Exemplo n.º 16
0
def simple_model_data(use_minibatch):
    n = 1000
    sigma0 = 2.
    mu0 = 4.
    sigma = 3.
    mu = -5.

    data = sigma * np.random.randn(n) + mu
    d = n / sigma ** 2 + 1 / sigma0 ** 2
    mu_post = (n * np.mean(data) / sigma ** 2 + mu0 / sigma0 ** 2) / d
    if use_minibatch:
        data = pm.Minibatch(data)
    return dict(
        n=n,
        data=data,
        mu_post=mu_post,
        d=d,
        mu0=mu0,
        sigma0=sigma0,
        sigma=sigma,
    )
Exemplo n.º 17
0
 def test_1d(self):
     mb = pm.Minibatch(self.data, 20)
     assert mb.eval().shape == (20, 10, 40, 10, 50)
Exemplo n.º 18
0
sns.kdeplot(xloc, yloc, bw=lengthscale_, cmap="viridis", shade=True, ax=ax[0])
ax[0].scatter(xloc, yloc, color='r', alpha=.25)
ax[0].set_xlim(0, 100)
ax[0].set_ylim(0, 100)

ax[1].imshow(hist, cmap='viridis', origin='lower')
ax[1].axis('off')
#%%
#input/output
xv, yv = np.meshgrid(xcenters, ycenters)
x_data = np.vstack((yv.flatten(), xv.flatten())).T
y_data = hist.flatten()
#%% pymc3 minibatch setup
# Not suitable for 2D mapping problem, overestimated lengthscale
batchsize = 10
Xbatch = pm.Minibatch(x_data, batchsize**2)
Ybatch = pm.Minibatch(y_data, batchsize**2)
#%% set up minibatch
data = hist
batchsize = 10
z1, z2 = batchsize, batchsize
s1, s2 = np.shape(data)
yshared = theano.shared(data)
x1shared = theano.shared(ycenters[:, np.newaxis].repeat(64, axis=1))
x2shared = theano.shared(xcenters[:, np.newaxis].T.repeat(64, axis=0))

ixs1 = pm.tt_rng().uniform(size=(1, ), low=0,
                           high=s1 - z1 - 1e-10).astype('int64')
ixs2 = pm.tt_rng().uniform(size=(1, ), low=0,
                           high=s2 - z2 - 1e-10).astype('int64')
range1 = tt.arange(ixs1.squeeze(), (ixs1 + z1).squeeze())
Exemplo n.º 19
0
    def fit_advi_refine(self, n_iter=10000, learning_rate=None,
                        progressbar=True, reducing_lr=False):
        """Refine posterior using ADVI - continue training after `.fit_advi_iterative()`

        Parameters
        ----------
        n_iter :
            number of additional iterations (Default value = 10000)
        learning_rate :
            same as in `.fit_advi_iterative()` (Default value = None)
        progressbar :
            same as in `.fit_advi_iterative()` (Default value = True)
        reducing_lr :
            same as in `.fit_advi_iterative()` (Default value = False)

        Returns
        -------
        dict
            update the self.mean_field dictionary with MeanField pymc3 objects.

        """

        self.n_iter = self.n_iter + n_iter

        if learning_rate is None:
            learning_rate = self.learning_rate

        ### Initialise optimiser ###
        if reducing_lr:
            # initialise the function for adaptive learning rate
            s = theano.shared(np.array(learning_rate).astype(self.data_type))

            def reduce_rate(a, h, i):
                s.set_value(np.array(learning_rate / ((i / self.n_obs) + 1) ** .7).astype(self.data_type))

            optimiser = pm.adam(learning_rate=s)
            callbacks = [reduce_rate, CheckParametersConvergence()]
        else:
            optimiser = pm.adam(learning_rate=learning_rate)
            callbacks = [CheckParametersConvergence()]

        for i, name in enumerate(self.advi.keys()):

            # when type is molecular cross-validation or bootstrap,
            # replace self.x_data tensor with new data
            if np.isin(self.n_type, ['cv', 'bootstrap']):

                # defining minibatch
                if self.minibatch_size is not None:
                    # minibatch main data - expression matrix
                    self.x_data_minibatch = pm.Minibatch(self.X_data_sample[i].astype(self.data_type),
                                                         batch_size=[self.minibatch_size, None],
                                                         random_seed=self.minibatch_seed[i])
                    more_replacements = {self.x_data: self.x_data_minibatch}

                    # if any other data inputs should be minibatched add them too
                    if self.extra_data is not None:
                        # for each parameter in the dictionary add it to more_replacements
                        for k in self.extra_data.keys():
                            more_replacements[self.extra_data_tt[k]] = \
                                pm.Minibatch(self.extra_data[k].astype(self.data_type),
                                             batch_size=[self.minibatch_size, None],
                                             random_seed=self.minibatch_seed[i])

                # or using all data
                else:
                    more_replacements = {self.x_data: self.X_data_sample[i].astype(self.data_type)}
                    # if any other data inputs should be added
                    if self.extra_data is not None:
                        # for each parameter in the dictionary add it to more_replacements
                        for k in self.extra_data.keys():
                            more_replacements[self.extra_data_tt[k]] = \
                                self.extra_data[k].astype(self.data_type)

            else:
                # defining minibatch
                if self.minibatch_size is not None:
                    # minibatch main data - expression matrix
                    self.x_data_minibatch = pm.Minibatch(self.X_data.astype(self.data_type),
                                                         batch_size=[self.minibatch_size, None],
                                                         random_seed=self.minibatch_seed[i])
                    more_replacements = {self.x_data: self.x_data_minibatch}

                    # if any other data inputs should be minibatched add them too
                    if self.extra_data is not None:
                        # for each parameter in the dictionary add it to more_replacements
                        for k in self.extra_data.keys():
                            more_replacements[self.extra_data_tt[k]] = \
                                pm.Minibatch(self.extra_data[k].astype(self.data_type),
                                             batch_size=[self.minibatch_size, None],
                                             random_seed=self.minibatch_seed[i])

                else:
                    more_replacements = {}

            with self.model:
                # train for more iterations & export trained model by overwriting the initial mean field object
                self.mean_field[name] = self.advi[name].fit(n_iter, callbacks=callbacks,
                                                            obj_optimizer=optimiser,
                                                            total_grad_norm_constraint=self.total_grad_norm_constraint,
                                                            progressbar=progressbar,
                                                            more_replacements=more_replacements)

                if self.verbose:
                    print(plt.plot(np.log10(self.mean_field[name].hist[15000:])))
Exemplo n.º 20
0
    def fit_advi_iterative(self, n=3, method='advi', n_type='restart',
                           n_iter=None,
                           learning_rate=None, reducing_lr=False,
                           progressbar=True,
                           scale_cost_to_minibatch=True):
        """Find posterior using pm.ADVI() method directly (allows continuing training through `refine` method.
        (maximising likelihood of the data and minimising KL-divergence of posterior to prior - ELBO loss)

        Parameters
        ----------
        n :
            number of independent initialisations (Default value = 3)
        method :
            advi', to allow for potential use of SVGD, MCMC, custom (currently only ADVI implemented). (Default value = 'advi')
        n_type :
            type of repeated initialisation:
            
            * **'restart'** to pick different initial value,
            * **'cv'** for molecular cross-validation - splits counts into n datasets, for now, only n=2 is implemented
            * **'bootstrap'** for fitting the model to multiple downsampled datasets.
              Run `mod.bootstrap_data()` to generate variants of data (Default value = 'restart')

        n_iter :
            number of iterations, supersedes self.n_iter specified when creating model instance. (Default value = None)
        learning_rate :
            learning rate, supersedes self.learning_rate specified when creating model instance. (Default value = None)
        reducing_lr :
            boolean, use decaying learning rate? (Default value = False)
        progressbar :
            boolean, show progress bar? (Default value = True)
        scale_cost_to_minibatch :
            when using training in minibatches, scale cost function appropriately?
            See discussion https://discourse.pymc.io/t/effects-of-scale-cost-to-minibatch/1429 to understand the effects. (Default value = True)

        Returns
        -------
        None
            self.mean_field dictionary with MeanField pymc3 objects,
            and self.advi dictionary with ADVI objects for each initialisation.

        """

        self.n_type = n_type
        self.scale_cost_to_minibatch = scale_cost_to_minibatch

        if n_iter is None:
            n_iter = self.n_iter

        if learning_rate is None:
            learning_rate = self.learning_rate

        ### Initialise optimiser ###
        if reducing_lr:
            # initialise the function for adaptive learning rate
            s = theano.shared(np.array(learning_rate).astype(self.data_type))

            def reduce_rate(a, h, i):
                s.set_value(np.array(learning_rate / ((i / self.n_obs) + 1) ** .7).astype(self.data_type))

            optimiser = pm.adam(learning_rate=s)
            callbacks = [reduce_rate, CheckParametersConvergence()]
        else:
            optimiser = pm.adam(learning_rate=learning_rate)
            callbacks = [CheckParametersConvergence()]

        if np.isin(n_type, ['bootstrap']):
            if self.X_data_sample is None:
                self.bootstrap_data(n=n)
        elif np.isin(n_type, ['cv']):
            self.generate_cv_data()  # cv data added to self.X_data_sample

        init_names = ['init_' + str(i + 1) for i in np.arange(n)]

        for i, name in enumerate(init_names):

            with self.model:

                self.advi[name] = pm.ADVI()

            # when type is molecular cross-validation or bootstrap, 
            # replace self.x_data tensor with new data
            if np.isin(n_type, ['cv', 'bootstrap']):

                # defining minibatch
                if self.minibatch_size is not None:
                    # minibatch main data - expression matrix
                    self.x_data_minibatch = pm.Minibatch(self.X_data_sample[i].astype(self.data_type),
                                                         batch_size=[self.minibatch_size, None],
                                                         random_seed=self.minibatch_seed[i])
                    more_replacements = {self.x_data: self.x_data_minibatch}

                    # if any other data inputs should be minibatched add them too
                    if self.extra_data is not None:
                        # for each parameter in the dictionary add it to more_replacements
                        for k in self.extra_data.keys():
                            more_replacements[self.extra_data_tt[k]] = \
                                pm.Minibatch(self.extra_data[k].astype(self.data_type),
                                             batch_size=[self.minibatch_size, None],
                                             random_seed=self.minibatch_seed[i])

                # or using all data
                else:
                    more_replacements = {self.x_data: self.X_data_sample[i].astype(self.data_type)}
                    # if any other data inputs should be added
                    if self.extra_data is not None:
                        # for each parameter in the dictionary add it to more_replacements
                        for k in self.extra_data.keys():
                            more_replacements[self.extra_data_tt[k]] = \
                                self.extra_data[k].astype(self.data_type)

            else:

                # defining minibatch
                if self.minibatch_size is not None:
                    # minibatch main data - expression matrix
                    self.x_data_minibatch = pm.Minibatch(self.X_data.astype(self.data_type),
                                                         batch_size=[self.minibatch_size, None],
                                                         random_seed=self.minibatch_seed[i])
                    more_replacements = {self.x_data: self.x_data_minibatch}

                    # if any other data inputs should be minibatched add them too
                    if self.extra_data is not None:
                        # for each parameter in the dictionary add it to more_replacements
                        for k in self.extra_data.keys():
                            more_replacements[self.extra_data_tt[k]] = \
                                pm.Minibatch(self.extra_data[k].astype(self.data_type),
                                             batch_size=[self.minibatch_size, None],
                                             random_seed=self.minibatch_seed[i])

                else:
                    more_replacements = {}

            self.advi[name].scale_cost_to_minibatch = scale_cost_to_minibatch

            # train the model  
            self.mean_field[name] = self.advi[name].fit(n_iter, callbacks=callbacks,
                                                        obj_optimizer=optimiser,
                                                        total_grad_norm_constraint=self.total_grad_norm_constraint,
                                                        progressbar=progressbar, more_replacements=more_replacements)

            # plot training history
            if self.verbose:
                print(plt.plot(np.log10(self.mean_field[name].hist[15000:])));
Exemplo n.º 21
0
def sgd_optimization(NNInput):

    RandomSeed = 42
    set_tt_rng(MRG_RandomStreams(RandomSeed))

    NSigmaSamples = 1000
    SigmaIntCoeff = 2

    ##################################################################################################################################
    ### LOADING DATA
    ##################################################################################################################################
    print('\nLoading Data ... \n')

    if (NNInput.TryNNFlg):
        datasets, datasetsPlot, RDataOrig, yDataOrig, yDataDiatOrig = load_data(
            NNInput)
    else:
        datasets, RDataOrig, yDataOrig, yDataDiatOrig = load_data(NNInput)

    RSetTrain, ySetTrain, ySetTrainDiat, ySetTrainTriat = datasets[0]
    RSetPlot, ySetPlot, ySetPlotDiat, ySetPlotTriat = datasetsPlot[0]
    RSetPlotTemp = RSetPlot
    #NNInput.NIn  = xSetTrain.get_value(borrow=True).shape[1]
    NNInput.NOut = ySetTrain.get_value(borrow=True).shape[1]
    print(('    Nb of Input:  %i') % NNInput.NIn)
    print(('    Nb of Output: %i \n') % NNInput.NOut)
    NNInput.NLayers = NNInput.NHid
    NNInput.NLayers.insert(0, NNInput.NIn)
    NNInput.NLayers.append(NNInput.NOut)

    NNInput.NTrain = RSetTrain.get_value(borrow=True).shape[0]
    print(('    Nb of Training   Examples: %i') % NNInput.NTrain)

    # compute number of minibatches for training, validation and testing
    if (NNInput.NMiniBatch != 0):
        NNInput.NBatchTrain = NNInput.NTrain // NNInput.NMiniBatch
        print(('    Nb of Training   Batches: %i') % NNInput.NBatchTrain)
    else:
        print('    No-BATCH Version')

    ##############################################################################################
    ### TESTING REAL PARAMETERS ##################################################################
    if (NNInput.ReadIniParamsFlg):
        if (NNInput.Model == 'PIP'):
            LambdaVec = NNInput.LambdaVec
            reVec = NNInput.reVec
            WIni = [
                load_parameters(NNInput.PathToWeightFldr +
                                NNInput.LayersName[iLayer] + '/')[0]
                for iLayer in range(1, len(NNInput.LayersName))
            ]
            bIni = [
                load_parameters(NNInput.PathToWeightFldr +
                                NNInput.LayersName[iLayer] + '/')[1]
                for iLayer in range(1, len(NNInput.LayersName))
            ]
        if (NNInput.Model == 'ModPIP'):
            LambdaIni = load_parameters_PIP(NNInput.PathToWeightFldr +
                                            NNInput.LayersName[1] + '/')[0]
            reIni = load_parameters_PIP(NNInput.PathToWeightFldr +
                                        NNInput.LayersName[1] + '/')[1]
            LambdaVec = numpy.array([1.0, 1.0, 1.0]) * LambdaIni
            reVec = numpy.array([1.0, 1.0, 1.0]) * reIni
            #print('Lambda = ', LambdaVec)
            #print('re     = ', reVec)
            WIni = [
                load_parameters(NNInput.PathToWeightFldr +
                                NNInput.LayersName[iLayer] + '/')[0]
                for iLayer in range(3, len(NNInput.LayersName))
            ]
            bIni = [
                load_parameters(NNInput.PathToWeightFldr +
                                NNInput.LayersName[iLayer] + '/')[1]
                for iLayer in range(3, len(NNInput.LayersName))
            ]
        elif (NNInput.Model == 'LEPS'):
            DeVec = NNInput.DeVec
            betaVec = NNInput.betaVec
            reVec = NNInput.reVec
            k = NNInput.k
        i = -1
        for Ang in NNInput.AngVector:
            i = i + 1
            RSetPlot, ySetPlot, ySetPlotDiat, ySetPlotTriat = datasetsPlot[i]
            if (NNInput.Model == 'PIP') or (NNInput.Model == 'ModPIP'):
                yPredInitial = try_model_PIP(NNInput,
                                             RSetPlot.get_value(borrow=True),
                                             LambdaVec, reVec, WIni, bIni)
            elif (NNInput.Model == 'LEPS'):
                yPredInitial = try_model_LEPS(NNInput,
                                              RSetPlot.get_value(borrow=True),
                                              DeiVec, betaiVec, reiVec, ki)
            yPredInitial = InverseTransformation(NNInput, yPredInitial,
                                                 ySetPlotDiat.get_value())
            PathToPlotLabels = NNInput.PathToOutputFldr + '/REInitial.csv.' + str(
                int(numpy.floor(Ang)))
            ySetPlot = T.cast(ySetPlot, 'float64')
            ySetPlot = ySetPlot.eval()
            ySetPlot = InverseTransformation(NNInput, ySetPlot,
                                             ySetPlotDiat.get_value())
            save_to_plot(
                PathToPlotLabels, 'Initial',
                numpy.column_stack(
                    [RSetPlot.get_value(), ySetPlot, yPredInitial]))
            print('    Initial Evaluation Saved in File: ', PathToPlotLabels,
                  '\n')
        RSetPlotTemp = RSetPlot
    ##############################################################################################

    ##################################################################################################################################
    # BUILD ACTUAL MODEL #
    ##################################################################################################################################
    ### COMPUTING / UPDATING INFERENCE ######################################################################
    # print(RSetTrain.get_value())
    # print(ySetTrain.get_value())
    # time.sleep(5)
    if (NNInput.TrainFlg):
        if (NNInput.NMiniBatch > 0):
            RSetTrainTemp = pymc3.Minibatch(RSetTrain.get_value(),
                                            batch_size=NNInput.NMiniBatch,
                                            dtype='float64')
            ySetTrainTemp = pymc3.Minibatch(ySetTrain.get_value(),
                                            batch_size=NNInput.NMiniBatch,
                                            dtype='float64')
        else:
            RSetTrainTemp = RSetTrain
            ySetTrainTemp = ySetTrain
            NNInput.NMiniBatch = NNInput.NTrain
        #ADVIApprox, ADVIInference, ADVITracker, SVGDApprox, NUTSTrace, model, yPred, Sigma, Layers = construct_model(NNInput, RSetTrainTemp, ySetTrainTemp, GaussWeightsW, GaussWeightsb)
        ADVIApprox, ADVIInference, SVGDApprox, NUTSTrace, Params, yPred = construct_model(
            NNInput, RSetTrain, ySetTrain, RSetTrainTemp, ySetTrainTemp,
            GaussWeightsW, GaussWeightsb)
        #
        plot_ADVI_ELBO(NNInput, ADVIInference)
        #
        if (NNInput.SaveInference):
            PathToModTrace = NNInput.PathToOutputFldr + '/Approx&Preds.pkl'
            with open(PathToModTrace, 'wb') as buff:
                #pickle.dump({'model': model, 'trace': ADVITrace, 'tracker': ADVITracker, 'inference': ADVIInference, 'approx': ADVIApprox, 'yLike': yLike}, buff)
                pickle.dump(
                    {
                        'ADVIApprox': ADVIApprox,
                        'Params': Params,
                        'yPred': yPred
                    }, buff)
        #
    else:
        PathToWeightFldr = NNInput.PathToOutputFldr + '/Model&Trace.pkl'
        with open(PathToWeightFldr, 'rb') as buff:
            data = pickle.load(buff)
        #model, ADVITrace, ADVITracker, ADVIInference, ADVIApprox, yPred = data['model'], data['trace'], data['tracker'], data['inference'], data['approx'], data['yPred']
        ADVIApprox, Params, yPred = data['ADVIApprox'], data['Params'], data[
            'yPred']
        RSetPlot, ySetPlot, ySetPlotDiat, ySetPlotTriat = datasetsPlot[0]
        RSetPlotTemp = RSetPlot

    if (NNInput.NTraceADVI > 0):
        ADVITrace = ADVIApprox.sample(draws=NNInput.NTraceADVI)
        plot_ADVI_trace(NNInput, ADVITrace)
    else:
        ADVITrace = 1
    ##############################################################################################

    ### SAMPLING PARAMETERS POSTERIOR #######################################################################
    PathToADVI = NNInput.PathToOutputFldr + '/ParamsPosts/'
    if not os.path.exists(PathToADVI):
        os.makedirs(PathToADVI)

    if (NNInput.Model == 'PIP') or (NNInput.Model == 'ModPIP'):
        save_ADVI_reconstruction_PIP(NNInput, PathToADVI, ADVIApprox, Params)
        save_ADVI_sample_PIP(NNInput, PathToADVI, ADVIApprox, Params)
    elif (NNInput.Model == 'LEPS'):
        save_ADVI_reconstruction_LEPS(PathToADVI, ADVIApprox, Params)
    ##############################################################################################

    ### RECONSTRUCTING MOMENTS ###################################################################
    #means = ADVIApprox.bij.rmap(ADVIApprox.mean.eval())
    #sds   = ADVIApprox.bij.rmap(ADVIApprox.std.eval())
    #plot_ADVI_reconstruction(NNInput, means, sds)

    # PathToADVI = NNInput.PathToOutputFldr + '/ParamsPosts/'
    # if not os.path.exists(PathToADVI):
    #     os.makedirs(PathToADVI)
    # save_ADVI_reconstruction(PathToADVI, ADVITrace, model, 0.0, 0.0)
    ##############################################################################################

    ### RUNNING NUTS #############################################################################
    # xSetTrainTemp = xSetTrain
    # ySetTrainTemp = ySetTrain

    # fig = plt.figure()
    # pymc3.traceplot(NUTSTrace);
    # plt.show()
    # FigPath = NNInput.PathToOutputFldr + '/NUTSTrace.png'
    # fig.savefig(FigPath)
    # #plt.close()

    # varnames = means.keys()
    # fig, axs = plt.subplots(nrows=len(varnames), figsize=(12, 18))
    # for var, ax in zip(varnames, axs):
    #     mu_arr    = means[var]
    #     sigma_arr = sds[var]
    #     ax.set_title(var)
    #     for i, (mu, sigma) in enumerate(zip(mu_arr.flatten(), sigma_arr.flatten())):
    #         sd3 = (-4*sigma + mu, 4*sigma + mu)
    #         x = numpy.linspace(sd3[0], sd3[1], 300)
    #         y = stats.norm(mu, sigma).pdf(x)
    #         ax.plot(x, y)
    #         if hierarchical_trace[var].ndim > 1:
    #             t = NUTSTrace[var][i]
    #         else:
    #             t = NUTSTrace[var]
    #         sns.distplot(t, kde=False, norm_hist=True, ax=ax)
    # fig.tight_layout()
    # plt.show()
    # FigPath = NNInput.PathToOutputFldr + '/ADVIDistributionsReconstruction.png'
    # fig.savefig(FigPath)
    # #plt.close()
    ##############################################################################################

    # ## COMPUTING MAX POSTERIOR ##################################################################
    # map_estimate = pymc3.find_MAP(model=model)
    # if (NNInput.Model == 'ModPIP'):
    #    LambdaVec    = map_estimate.get('Lambda')
    #    reVec        = map_estimate.get('re')
    #    WNames       = ['W1', 'W2', 'W3']
    #    WIni         = [ map_estimate.get(WNames[iLayer]) for iLayer in range(len(NNInput.LayersName))]
    #    bNames       = ['b1', 'b2', 'b3']
    #    bIni         = [ map_estimate.get(bNames[iLayer]) for iLayer in range(len(NNInput.LayersName))]
    # elif (NNInput.Model == 'PIP'):
    #    LambdaVec    = NNInput.reVec
    #    reVec        = NNInput.reVec
    #    WNames       = ['W1', 'W2', 'W3']
    #    WIni         = [ map_estimate.get(WNames[iLayer]) for iLayer in range(len(NNInput.LayersName))]
    #    bNames       = ['b1', 'b2', 'b3']
    #    bIni         = [ map_estimate.get(bNames[iLayer]) for iLayer in range(len(NNInput.LayersName))]
    # elif (NNInput.Model == 'LEPS'):
    #    DeVec   = map_estimate.get('De')
    #    betaVec = map_estimate.get('beta')
    #    reVec   = map_estimate.get('re')
    #    k       = map_estimate.get('k')

    # i=-1
    # for Ang in NNInput.AngVector:
    #    i=i+1

    #    xSetTry,  ySetTry  = datasetsTry[i]

    #    PathToAbscissaToPlot = NNInput.PathToDataFldr + '/R.csv.' + str(Ang)
    #    xPlot = abscissa_to_plot(PathToAbscissaToPlot)
    #    if (NNInput.Model == 'PIP') or (NNInput.Model == 'ModPIP'):
    #        yPredMaxPosterior = try_model_PIP(NNInput, xSetTry.get_value(borrow=True), LambdaVec, reVec, WIni, bIni, IniMean, IniStD)
    #    elif (NNInput.Model == 'LEPS'):
    #        yPredMaxPosterior = try_model_LEPS(NNInput, xSetTry.get_value(borrow=True), DeVec, betaVec, reVec, k)
    #    #print(WIni, bIni)
    #    PathToTryLabels = NNInput.PathToOutputFldr + '/REMaxPosterior.' + str(Ang) + '.csv'
    #    save_to_plot(PathToTryLabels, 'Evaluated', numpy.column_stack([xPlot, yPredMaxPosterior]))
    # #############################################################################################

    ### SAMPLING OUTPUT POSTERIOR #######################################################################
    PathToADVI = NNInput.PathToOutputFldr + '/OutputPosts/'
    if not os.path.exists(PathToADVI):
        os.makedirs(PathToADVI)

    x = T.dmatrix('X')
    n = T.iscalar('n')
    x.tag.test_value = numpy.empty_like(RSetPlotTemp)
    x.tag.test_value = numpy.random.randint(100, size=(100, 3))
    n.tag.test_value = 100
    _sample_proba_yPred = ADVIApprox.sample_node(
        yPred, size=n, more_replacements={RSetTrainTemp: x})
    sample_proba_yPred = theano.function([x, n], _sample_proba_yPred)

    m = T.iscalar('m')
    _sample_proba_SigmaPred = ADVIApprox.sample_node(Params.get('Sigma'),
                                                     size=n * m)
    sample_proba_SigmaPred = theano.function([n, m], _sample_proba_SigmaPred)
    SigmaPred = sample_proba_SigmaPred(NNInput.NOutPostSamples, NSigmaSamples)
    SigmaPred = numpy.reshape(SigmaPred,
                              (NNInput.NOutPostSamples, NSigmaSamples))

    i = -1
    for Ang in NNInput.AngVector:
        numpy.random.seed(RandomSeed)
        pymc3.set_tt_rng(RandomSeed)
        i = i + 1
        RSetPlot, ySetPlot, ySetPlotDiat, ySetPlotTriat = datasetsPlot[i]
        ySetPlot = T.cast(ySetPlot, 'float64')
        ySetPlot = ySetPlot.eval()
        #ySetPlot     = InverseTransformation(NNInput, ySetPlot, ySetPlotDiat.get_value())
        yPredPlot = sample_proba_yPred(RSetPlot.get_value(borrow=True),
                                       NNInput.NOutPostSamples)
        yPredSum = ySetPlot * 0.0
        yPredSumSqr = ySetPlot * 0.0
        for j in range(NNInput.NOutPostSamples):
            yPredTemp = numpy.array(yPredPlot[j, :])
            yPredTemp = InverseTransformation(NNInput, yPredTemp,
                                              ySetPlotDiat.get_value())
            yPredSum = yPredSum + yPredTemp
            yPredSumSqr = yPredSumSqr + numpy.square(yPredTemp)
        #
        yMean = yPredSum / NNInput.NOutPostSamples
        yStD = numpy.sqrt(yPredSumSqr / NNInput.NOutPostSamples -
                          numpy.square(yMean))
        yPlus = yMean + SigmaIntCoeff * yStD
        yMinus = yMean - SigmaIntCoeff * yStD
        PathToPlotLabels = NNInput.PathToOutputFldr + '/OutputPosts/yPred' + str(
            int(numpy.floor(Ang))) + '.csv'
        save_moments(
            PathToPlotLabels, 'yPred',
            numpy.column_stack(
                [RSetPlot.get_value(), ySetPlot, yMean, yStD, yMinus, yPlus]))
        print('    Wrote Sampled yPred for Angle ', Ang, '\n')

    if (NNInput.AddNoiseToPredsFlg):
        i = -1
        for Ang in NNInput.AngVector:
            numpy.random.seed(RandomSeed)
            pymc3.set_tt_rng(RandomSeed)
            i = i + 1
            RSetPlot, ySetPlot, ySetPlotDiat, ySetPlotTriat = datasetsPlot[i]
            ySetPlot = T.cast(ySetPlot, 'float64')
            ySetPlot = ySetPlot.eval()
            #ySetPlot     = InverseTransformation(NNInput, ySetPlot, ySetPlotDiat.get_value())
            yPredPlot = sample_proba_yPred(RSetPlot.get_value(borrow=True),
                                           NNInput.NOutPostSamples)
            yPostSum = ySetPlot * 0.0
            yPostSumSqr = ySetPlot * 0.0
            for j in range(NNInput.NOutPostSamples):
                yPredTemp = numpy.array(yPredPlot[j, :])
                if (NNInput.AddNoiseToPredsFlg):
                    for k in range(NSigmaSamples):
                        yPostTemp = InverseTransformation(
                            NNInput, yPostTemp, ySetPlotDiat.get_value())
                        SigmaTemp = SigmaPred[j, k]
                        RandNum = numpy.random.normal(loc=0.0, scale=SigmaTemp)
                        yPostTemp = yPredTemp * RandNum
                        yPostSum = yPostSum + yPostTemp
                        yPostSumSqr = yPostSumSqr + numpy.square(yPostTemp)
            #
            yMean = yPostSum / NNInput.NOutPostSamples
            yStD = numpy.sqrt(yPostSumSqr / NNInput.NOutPostSamples -
                              numpy.square(yMean))
            yPlus = yMean + SigmaIntCoeff * yStD
            yMinus = yMean - SigmaIntCoeff * yStD
            PathToPlotLabels = NNInput.PathToOutputFldr + '/OutputPosts/yPost' + str(
                int(numpy.floor(Ang))) + '.csv'
            save_moments(
                PathToPlotLabels, 'yPost',
                numpy.column_stack([
                    RSetPlot.get_value(), ySetPlot, yMean, yStD, yMinus, yPlus
                ]))
            print('    Wrote Sampled yPost for Angle ', Ang, '\n')
Exemplo n.º 22
0
_ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel='X', ylabel='Y')
cbar.ax.set_ylabel('Uncertainty (posterior predictive standard deviation)')

# We can see that very close to the decision boundary, our uncertainty as to which label to predict is highest. You can imagine that associating predictions with uncertainty is a critical property for many applications like health care. To further maximize accuracy, we might want to train the model primarily on samples from that high-uncertainty region.

# It is also clear that the uncertainty is large in the region where there is no training data. That is what should be expected, and it is good that our network shows this explicitly. The normal neural network would not give any such signals.

# ## Mini-batch ADVI
#
# So far, we have trained our model on all data at once. Obviously this won't scale to something like ImageNet. Moreover, training on mini-batches of data (stochastic gradient descent) avoids local minima and can lead to faster convergence.
#
# Fortunately, ADVI can be run on mini-batches as well. It just requires some setting up:

# In[22]:

minibatch_x = pm.Minibatch(X_train, batch_size=50)
minibatch_y = pm.Minibatch(Y_train, batch_size=50)
neural_network_minibatch = construct_nn(minibatch_x, minibatch_y)
with neural_network_minibatch:
    approx = pm.fit(40000, method=pm.ADVI())

# In[23]:

fig, ax = plt.subplots(figsize=(8, 6))

ax.plot(-inference.hist)
ax.set_ylabel('ELBO')
ax.set_xlabel('iteration')

# As you can see, mini-batch ADVI's running time is much lower. It also seems to converge faster.
#
Exemplo n.º 23
0
    def fit(
        self,
        X,
        y,
        inference_type='advi',
        num_advi_sample_draws=10000,
        minibatch_size=None,
        inference_args=None,
    ):
        """
        Train the Linear Regression model

        Parameters
        ----------
        X : numpy array
            shape [num_training_samples, num_pred]

        y : numpy array
            shape [num_training_samples, ]

        inference_type : str (defaults to 'advi')
            specifies which inference method to call
            Currently, only 'advi' and 'nuts' are supported.

        num_advi_sample_draws : int (defaults to 10000)
            Number of samples to draw from ADVI approximation after it has been fit;
            not used if inference_type != 'advi'

        minibatch_size : int (defaults to None)
            number of samples to include in each minibatch for ADVI
            If None, minibatch is not run.

        inference_args : dict (defaults to None)
            arguments to be passed to the inference methods.
            Check the PyMC3 docs for permissable values.
            If None, default values will be set.
        """
        self.num_training_samples, self.num_pred = X.shape

        self.inference_type = inference_type

        if y.ndim != 1:
            y = np.squeeze(y)

        if not inference_args:
            inference_args = self._set_default_inference_args()

        if self.cached_model is None:
            self.cached_model = self.create_model()

        if minibatch_size:
            with self.cached_model:
                minibatches = {
                    self.shared_vars['model_input']:
                    pm.Minibatch(X, batch_size=minibatch_size),
                    self.shared_vars['model_output']:
                    pm.Minibatch(y, batch_size=minibatch_size),
                }

                inference_args['more_replacements'] = minibatches
        else:
            self._set_shared_vars({'model_input': X, 'model_output': y})

        self._inference(inference_type,
                        inference_args,
                        num_advi_sample_draws=num_advi_sample_draws)

        return self
    def train_pymc3(docs_te, docs_tr, n_samples_te, n_samples_tr, n_words,
                    n_topics, n_tokens):
        """
        Return: 
            Pymc3 LDA results
        
        Parameters:
            docs_tr: training documents (processed)
            docs_te: testing documents (processed)
            n_samples_te: number of testing docs
            n_samples_tr: number of training docs
            n_words: size of vocabulary
            n_topics: number of topics to learn
            n_tokens: number of non-zero datapoints in processed training tf matrix
            
        """

        # Log-likelihood of documents for LDA
        def logp_lda_doc(beta, theta):
            """
            Returns the log-likelihood function for given documents.

            K : number of topics in the model
            V : number of words (size of vocabulary)
            D : number of documents (in a mini-batch)

            Parameters
            ----------
            beta : tensor (K x V)
              Word distribution.
            theta : tensor (D x K)
              Topic distributions for the documents.
            """
            def ll_docs_f(docs):
                dixs, vixs = docs.nonzero()
                vfreqs = docs[dixs, vixs]
                ll_docs = vfreqs * pmmath.logsumexp(
                    tt.log(theta[dixs]) + tt.log(beta.T[vixs]),
                    axis=1).ravel()

                # Per-word log-likelihood times no. of tokens in the whole dataset
                return tt.sum(ll_docs) / (tt.sum(vfreqs) + 1e-9) * n_tokens

            return ll_docs_f

        # fit the pymc3 LDA

        # we have sparse dataset. It's better to have dence batch so that all words accure there
        minibatch_size = 128

        # defining minibatch
        doc_t_minibatch = pm.Minibatch(docs_tr.toarray(), minibatch_size)
        doc_t = shared(docs_tr.toarray()[:minibatch_size])

        with pm.Model() as model:
            theta = Dirichlet(
                'theta',
                a=pm.floatX((1.0 / n_topics) * np.ones(
                    (minibatch_size, n_topics))),
                shape=(minibatch_size, n_topics),
                transform=t_stick_breaking(1e-9),
                # do not forget scaling
                total_size=n_samples_tr)
            beta = Dirichlet('beta',
                             a=pm.floatX((1.0 / n_topics) * np.ones(
                                 (n_topics, n_words))),
                             shape=(n_topics, n_words),
                             transform=t_stick_breaking(1e-9))
            # Note, that we defined likelihood with scaling, so here we need no additional `total_size` kwarg
            doc = pm.DensityDist('doc',
                                 logp_lda_doc(beta, theta),
                                 observed=doc_t)

        # Encoder
        class LDAEncoder:
            """Encode (term-frequency) document vectors to variational means and (log-transformed) stds.
            """
            def __init__(self,
                         n_words,
                         n_hidden,
                         n_topics,
                         p_corruption=0,
                         random_seed=1):
                rng = np.random.RandomState(random_seed)
                self.n_words = n_words
                self.n_hidden = n_hidden
                self.n_topics = n_topics
                self.w0 = shared(0.01 * rng.randn(n_words, n_hidden).ravel(),
                                 name='w0')
                self.b0 = shared(0.01 * rng.randn(n_hidden), name='b0')
                self.w1 = shared(0.01 * rng.randn(n_hidden, 2 *
                                                  (n_topics - 1)).ravel(),
                                 name='w1')
                self.b1 = shared(0.01 * rng.randn(2 * (n_topics - 1)),
                                 name='b1')
                self.rng = MRG_RandomStreams(seed=random_seed)
                self.p_corruption = p_corruption

            def encode(self, xs):
                if 0 < self.p_corruption:
                    dixs, vixs = xs.nonzero()
                    mask = tt.set_subtensor(
                        tt.zeros_like(xs)[dixs, vixs],
                        self.rng.binomial(size=dixs.shape,
                                          n=1,
                                          p=1 - self.p_corruption))
                    xs_ = xs * mask
                else:
                    xs_ = xs

                w0 = self.w0.reshape((self.n_words, self.n_hidden))
                w1 = self.w1.reshape((self.n_hidden, 2 * (self.n_topics - 1)))
                hs = tt.tanh(xs_.dot(w0) + self.b0)
                zs = hs.dot(w1) + self.b1
                zs_mean = zs[:, :(self.n_topics - 1)]
                zs_rho = zs[:, (self.n_topics - 1):]
                return {'mu': zs_mean, 'rho': zs_rho}

            def get_params(self):
                return [self.w0, self.b0, self.w1, self.b1]

            # call Encoder

        encoder = LDAEncoder(n_words=n_words,
                             n_hidden=100,
                             n_topics=n_topics,
                             p_corruption=0.0)
        local_RVs = OrderedDict([(theta, encoder.encode(doc_t))])

        # get parameters
        encoder_params = encoder.get_params()

        # Train pymc3 Model
        η = .1
        s = shared(η)

        def reduce_rate(a, h, i):
            s.set_value(η / ((i / minibatch_size) + 1)**.7)

        with model:
            approx = pm.MeanField(local_rv=local_RVs)
            approx.scale_cost_to_minibatch = False
            inference = pm.KLqp(approx)
        inference.fit(10000,
                      callbacks=[reduce_rate],
                      obj_optimizer=pm.sgd(learning_rate=s),
                      more_obj_params=encoder_params,
                      total_grad_norm_constraint=200,
                      more_replacements={doc_t: doc_t_minibatch})

        # Extracting characteristic words
        doc_t.set_value(docs_tr.toarray())
        samples = pm.sample_approx(approx, draws=100)
        beta_pymc3 = samples['beta'].mean(axis=0)

        # Predictive distribution
        def calc_pp(ws, thetas, beta, wix):
            """
            Parameters
            ----------
            ws: ndarray (N,)
                Number of times the held-out word appeared in N documents.
            thetas: ndarray, shape=(N, K)
                Topic distributions for N documents.
            beta: ndarray, shape=(K, V)
                Word distributions for K topics.
            wix: int
                Index of the held-out word

            Return
            ------
            Log probability of held-out words.
            """
            return ws * np.log(thetas.dot(beta[:, wix]))

        def eval_lda(transform, beta, docs_te, wixs):
            """Evaluate LDA model by log predictive probability.

            Parameters
            ----------
            transform: Python function
                Transform document vectors to posterior mean of topic proportions.
            wixs: iterable of int
                Word indices to be held-out.
            """
            lpss = []
            docs_ = deepcopy(docs_te)
            thetass = []
            wss = []
            total_words = 0
            for wix in wixs:
                ws = docs_te[:, wix].ravel()
                if 0 < ws.sum():
                    # Hold-out
                    docs_[:, wix] = 0

                    # Topic distributions
                    thetas = transform(docs_)

                    # Predictive log probability
                    lpss.append(calc_pp(ws, thetas, beta, wix))

                    docs_[:, wix] = ws
                    thetass.append(thetas)
                    wss.append(ws)
                    total_words += ws.sum()
                else:
                    thetass.append(None)
                    wss.append(None)

            # Log-probability
            lp = np.sum(np.hstack(lpss)) / total_words

            return {'lp': lp, 'thetass': thetass, 'beta': beta, 'wss': wss}

        inp = tt.matrix(dtype='int64')
        sample_vi_theta = theano.function([inp],
                                          approx.sample_node(
                                              approx.model.theta,
                                              100,
                                              more_replacements={
                                                  doc_t: inp
                                              }).mean(0))

        def transform_pymc3(docs):
            return sample_vi_theta(docs)

        result_pymc3 = eval_lda(transform_pymc3, beta_pymc3, docs_te.toarray(),
                                np.arange(100))
        print('Predictive log prob (pm3) = {}'.format(result_pymc3['lp']))

        return result_pymc3
Exemplo n.º 25
0
    def __init__(self,
                 time,
                 event,
                 x,
                 rs,
                 minibatch=1,
                 labels=None,
                 priors=None,
                 vars=None,
                 name='',
                 model=None):

        super(FrailtyIndependentComponent_Fix, self).__init__(name, model)
        if priors is None:
            priors = {}
        if vars is None:
            vars = {}

        ### first thing to do is determine whether we are working with tensors or np.matrices
        ## Debugging

        print(str(time))
        # if we are working with a matrix, we need to grab the value of the array that populates it

        if str(time) == '<TensorType(float64, matrix)>':
            data_tensor = True
            self.k = k = time.get_value().shape[1]  # outcome dimentionality
            self.n = n = time.get_value().shape[
                0]  # total number of observations
            self.p = p = x.get_value().shape[1]  # number of covariates

        else:
            data_tensor = False
            self.k = k = time.shape[1]  # outcome dimentionality
            self.n = n = time.shape[0]  # total number of observations
            self.p = p = x.shape[1]  # number of covariates

        x, labels = any_to_tensor_and_labels(
            x, labels)  # might need to do this for the other variables

        ## now for secondary delta for the gamma frac
        if data_tensor == True:

            # Create tensor variable for the gamma_frac component of the likelihood

            self.event_change = event_change = theano.shared(np.array([np.append(np.repeat(1, s), np.repeat(0, k-s)).tolist()\
                                                                       for s in np.sum(event.get_value(), axis = 1)]), borrow = True)
        else:
            self.event_change = event_change = np.array([np.append(np.repeat(1, s), np.repeat(0, k-s)).tolist()\
                                                         for s in np.sum(event, axis = 1)])

        ## Keep track of total size of the dataset, for minibatching
        ## new 10.10.2018
        # If minibatch, then we need the x component to be a generator and not just a tensor
        # by this step in the computation, X is already in tensor form

        if minibatch >= 2:  # kinda hacky but whatever, we can fix this later
            print("We're Mini batching")
            # If we're using mini-batch, then we have to tell the inner workings to fix the MAP estimate
            minibatch = int(
                minibatch)  #just in case some n00b puts in a double/float here
            x_mini = pm.Minibatch(
                data=x.get_value(), batch_size=minibatch
            )  # make minibatch instance of the design matrix
            time_mini = pm.Minibatch(
                data=time.get_value(),
                batch_size=minibatch)  # minibatch instance of the time array
            event_mini = pm.Minibatch(
                data=event.get_value(),
                batch_size=minibatch)  # minibatch instance of the event array
            event_change_mini = pm.Minibatch(
                data=event_change.get_value(), batch_size=minibatch
            )  # minibatch instance of the transformed event array

            ## assign self. attributes to later parameterize the logp function

            self.x = x_mini
            self.time = time_mini
            self.event = event_mini
            self.event_change = event_change_mini

        else:
            # if not minibatching, just pass the tensors as they are
            self.x = x
            self.time = time
            self.event = event
            self.event_change = event_change

        # now we have x, shape and labels

        # init a list to store all of the parameters that go into our likelihood
        coeffs_all = list()
        lams = list()
        rhos = list()

        for level in range(
                k
        ):  # for each dimension, instantiate a covariate effect for each predictor

            labels_this = [s + "_" + str(level) for s in labels]

            coeffs_this = list()
            for name in labels_this:
                if name in vars:
                    v = Deterministic(name, vars[name])
                else:
                    v = self.Var(name=name,
                                 dist=priors.get(
                                     name,
                                     priors.get('Regressor',
                                                self.default_regressor_prior)))
                coeffs_this.append(v)
            coeffs_this = tt.stack(coeffs_this, axis=0)
            coeffs_all.append(coeffs_this)

            ### Now for the baseline hazard portions

            lam_name = 'lam_' + str(level)
            lam = self.Var(name=lam_name,
                           dist=priors.get(
                               lam_name,
                               priors.get('lam', self.default_lambda_prior))
                           )  # create labels for the lambdas
            lams.append(lam)
            # rhos
            rho_name = 'rho_' + str(level)
            rho = self.Var(name=rho_name,
                           dist=priors.get(
                               rho_name,
                               priors.get('rho', self.default_rho_prior)))
            rhos.append(rho)

            # finally, transformation parameters r
        # frailty parameter
        theta = self.Var(name='theta',
                         dist=priors.get(
                             'theta',
                             priors.get('Theta', self.default_theta_prior)))
        # make self attribute for the coefficients
        self.coeffs_all = coeffs_all

        # changing 10.18
        self.theta = theta
        self.lams = lams = tt.stack(lams, axis=0)
        self.rhos = rhos = tt.stack(rhos, axis=0)
Exemplo n.º 26
0
N, D = X.shape

# Out-path

out_path = "out/" + img_name + "_{0:d}".format(test_idx)
os.mkdir(out_path)

plt.imshow(img)
plt.grid(None)
plt.savefig(out_path + "/" + img_name + ".jpg")

print("defining model...")
# Define model
X_shared = theano.shared(X)
minibatch_size = 500
X_minibatch = pm.Minibatch(X, minibatch_size)

# set up model
with pm.Model() as model:
    pi = pm.Dirichlet('pi', np.ones(K))
    comp_dist = []
    mu = []
    packed_chol = []
    chol = []
    for i in range(K):
        temp_mean = np.random.randint(low=50, high=200, size=D)
        mu.append(pm.Normal('mu%i' % i, temp_mean, 20, shape=D))
        packed_chol.append(
            pm.LKJCholeskyCov('chol_cov_%i' % i,
                              eta=2,
                              n=D,
Exemplo n.º 27
0
 def test_special4(self):
     mb = pm.Minibatch(self.data, [10, None, Ellipsis, (4, 42)])
     assert mb.eval().shape == (10, 10, 40, 10, 4)
Exemplo n.º 28
0
 def test_special1(self):
     mb = pm.Minibatch(self.data, [(10, 42), None, (4, 42)])
     assert mb.eval().shape == (10, 10, 4, 10, 50)
Exemplo n.º 29
0
 def test_2d(self):
     mb = pm.Minibatch(self.data, [(10, 42), (4, 42)])
     assert mb.eval().shape == (10, 4, 40, 10, 50)
Exemplo n.º 30
0
 def test_mixed2(self):
     with pm.Model():
         data = np.random.rand(10, 20, 30, 40, 50)
         mb = pm.Minibatch(data, [2, None, 20])
         Normal('n', observed=mb, total_size=(10, None, 30))