Exemplo n.º 1
0
    def score_avg(self, enr_X, tst_X):
        if enr_X.ndim == 1:
            enr_X = enr_X.reshape(1, -1)
        if tst_X.ndim == 1:
            tst_X = tst_X.reshape(1, -1)

        enr_U = transform_X_to_U(enr_X, self.inv_A, self.m)
        tst_U = transform_X_to_U(tst_X, self.inv_A, self.m)

        # shared across enr_U
        logp_no_class = gaussian(np.zeros(tst_U.shape[1]),
                                 np.diag(self.Psi + 1)).logpdf(tst_U)

        llr_list = []
        cov_diag = np.diag(self.Psi / (self.Psi + 1))
        for i in range(len(enr_U)):
            logp_same_class = gaussian(enr_U[i] * cov_diag,
                                       cov_diag + 1).logpdf(tst_U)
            llr = logp_same_class - logp_no_class
            llr_list.append(llr)

        llr_arr = np.round(llr_list, 5)
        if llr_arr.ndim == 1:
            llr_arr = llr_arr.reshape(1, -1)

        return llr_arr
Exemplo n.º 2
0
class TestBleach(unittest.TestCase):
    pixel = np.arange(1, 100)
    pp_delays = np.linspace(-1, 10, 20)
    wavenumber = pixel[::-1]
    xx, yy = np.meshgrid(pixel, pp_delays, sparse=True)
    intensity = 100000 * gaussian(50, 30).pdf(xx) * gaussian(4, 4).pdf(yy) + 1
    baseline = np.ones_like(intensity)
    intensityE = (intensity - 0.9) * 0.1
    norm = 100000 * gaussian(55, 40).pdf(pixel)
    pp = pysfg.PumpProbe(intensity, baseline, norm, wavenumber, pp_delays,
                         2500, 80, 0.2, intensityE, pixel)
    intensity2 = 100000 * gaussian(52, 30).pdf(xx) * gaussian(4, 4).pdf(yy) + 1
    pp2 = pysfg.PumpProbe(intensity2, baseline, norm, wavenumber, pp_delays,
                          2500, 80, 0.2, intensityE, pixel)
    bleach = pp - pp2

    def test_shape(self):
        self.assertTrue(self.bleach.normalized.shape == (20, 99))

    def test_trace_shape(self):
        tr = self.bleach.get_trace(slice(20, 30))
        self.assertTrue(tr.bleach.shape == (20, ))

    def test_trace_result(self):
        tr = self.bleach.get_trace(slice(20, 30))
        self.assertEqual(tr.bleach.mean(), 0.005120605673482265)

    def test_to_and_from_json(self):
        os.chdir(dir_path)
        self.bleach.to_json(Path("bleach.json"))
        bleach = pysfg.spectrum.json_to_bleach(Path("results/bleach.json"))
        self.assertTrue(
            np.all(bleach.intensity - self.bleach.intensity < 0.0001))
Exemplo n.º 3
0
class TestSpectrum(unittest.TestCase):
    pixel = np.arange(1, 1600)
    wavenumber = pixel[::-1]
    intensity = gaussian(750, 100).pdf(pixel) * 100 + 1
    baseline = np.ones_like(intensity)
    intensityE = (intensity - 0.9) * 0.1
    norm = gaussian(800, 140).pdf(pixel) * 1000
    sp = pysfg.spectrum.Spectrum(intensity, baseline, norm, wavenumber,
                                 intensityE, pixel)

    def test_basesubed(self):
        self.assertEqual(np.all(self.sp.basesubed == self.intensity - 1), True)

    def test_normalized(self):
        self.assertEqual(
            np.all(self.sp.normalized == (self.intensity - 1) / self.norm),
            True)

    def test_to_and_from_json(self):
        os.chdir(dir_path)
        self.sp.to_json(Path("spectrum.json"))
        ssp = pysfg.json_to_spectrum(Path("results/spectrum.json"))
        # There is some numerical uncertainty
        self.assertEqual(np.all(self.sp.normalized - ssp.normalized < 0.0001),
                         True)
Exemplo n.º 4
0
 def test_PumpProbe_norms_3(self):
     norm = 100000 * gaussian(55, 40).pdf(self.xx) * gaussian(2, 2).pdf(
         self.yy) + 1
     pp = pysfg.PumpProbe(self.intensity, self.baseline, norm,
                          self.wavenumber, self.pp_delays, 2500, 80, 0.2,
                          self.intensityE, self.pixel)
     self.assertEqual(pp.normalized.mean(), 7.731632631662137)
Exemplo n.º 5
0
    def test_latent_pdf_class(self):
        """Tests the LatentPDF class."""
        # Testing no distribution estimation
        latent_pdf = glad.LatentPDF()
        cs_test = latent_pdf._init_cubic_spline()
        self.assertEqual(cs_test.number_of_samples, 3)

        # Check output
        dist_x_weights = latent_pdf(None, None)
        np.testing.assert_allclose(
            dist_x_weights,
            latent_pdf.weights *
            gaussian(0, 1).pdf(latent_pdf.quadrature_locations))

        ## Testing parameter estimation
        latent_pdf = glad.LatentPDF({
            "estimate_distribution": True,
            "number_of_samples": 7
        })
        cs_test = latent_pdf._init_cubic_spline()
        self.assertEqual(cs_test.number_of_samples, 7)

        # Check output at iteration 0
        dist_x_weights = latent_pdf(None, 0)
        np.testing.assert_allclose(
            dist_x_weights,
            latent_pdf.weights *
            gaussian(0, 1).pdf(latent_pdf.quadrature_locations))

        # Create a dummy distribution and integration
        np.random.seed(66871)
        coeffs = np.random.rand(9)
        cs_dist = glad.CubicSplinePDF({
            'number_of_samples': 9,
            'quadrature_bounds': [-4.5, 4.5]
        })
        cs_dist.update_coefficients(coeffs)
        cs_fixed = glad.resample(cs_dist, 7)
        result = glad._parameter_constraints(cs_fixed.coefficients,
                                             cs_fixed.sample_space)
        np.testing.assert_allclose(result,
                                   np.zeros((3, )),
                                   atol=1e-5,
                                   rtol=1e-5)

        # dummy integration method (smoke-test to make sure it runs)
        unweighted_integration = np.random.rand(
            1000, latent_pdf.quadrature_locations.size)

        dist_x_weights = latent_pdf(unweighted_integration, 1)
        np.testing.assert_allclose(
            latent_pdf.cubic_splines[0].coefficients[2:-2],
            [0., 0.057376, 0.264267, 0.313561, 0.350573, 0.014223, 0.],
            atol=1e-4)
        # test aic / bic call
        aic, bic = latent_pdf.compute_metrics(unweighted_integration,
                                              dist_x_weights, 4)
        self.assertAlmostEqual(bic - aic, 4 * (np.log(1000) - 2))
Exemplo n.º 6
0
    def calc_logp_posterior(self, v_model, category):
        assert v_model.shape[-1] == self.get_dimensionality('U_model')

        mean = self.posterior_params[category]['mean']
        cov_diag = self.posterior_params[category]['cov_diag']

        return gaussian(mean, np.diag(cov_diag)).logpdf(v_model)
Exemplo n.º 7
0
def default_options():
    """ Dictionary of options used in Girth.

    Args:
        max_iteration: [int] maximum number of iterations
            allowed during processing. (Default = 25)
        distribution: [callable] function that returns a pdf
            evaluated at quadrature points, p = f(theta).
            (Default = scipy.stats.norm(0, 1).pdf)
        quadrature_bounds: (lower, upper) bounds to limit
            numerical integration. Default = (-5, 5)
        quadrature_n: [int] number of quadrature points to use
                        Default = 61
        hyper_quadrature_n: [int] number of quadrature points to use to
                            estimate hyper prior integral (only mml_eap)
        use_LUT: [boolean] use a look up table in mml functions
        estimate_distribution: [boolean] estimate the latent distribution
                               using cubic splines
        number_of_samples: [int] number of samples to use when
                           estimating distribuion, must be > 5
    """
    return {
        "max_iteration": 25,
        "distribution": gaussian(0, 1).pdf,
        "quadrature_bounds": (-4.5, 4.5),
        "quadrature_n": 41,
        "hyper_quadrature_n": 41,
        "use_LUT": True,
        "estimate_distribution": False,
        "number_of_samples": 9
    }
Exemplo n.º 8
0
    def calc_error(truth_dict):
        model = plda.Model(truth_dict['data'], truth_dict['labels'])

        Phi_w = truth_dict['Phi_w']
        likelihood_means = truth_dict['means']

        dim = Phi_w.shape[0]
        test_data = np.random.randint(-100, 100, (10, dim))

        expected = []
        predicted = []
        for mean, label in zip(truth_dict['means'], truth_dict['labels']):
            true_logps = gaussian(mean, Phi_w).logpdf(test_data)
            true_logps -= logsumexp(true_logps)

            test_U = model.transform(test_data, 'D', 'U_model')
            predicted_logps = model.calc_logp_posterior_predictive(
                test_U, label)
            predicted_logps -= logsumexp(predicted_logps)

            expected.append(true_logps)
            predicted.append(predicted_logps)

        expected = np.asarray(expected)
        predicted = np.asarray(predicted)

        error = calc_mean_squared_error(expected, predicted, as_log=True)

        return error
Exemplo n.º 9
0
    def vector_avg(self, enr_X, tst_X):
        # averaging vertors befor scoring
        enr_X = enr_X.mean(0, keepdims=True)
        enr_U = transform_X_to_U(enr_X, self.inv_A, self.m)
        tst_U = transform_X_to_U(tst_X, self.inv_A, self.m)

        logp_no_class = gaussian(np.zeros(tst_U.shape[1]),
                                 np.diag(self.Psi + 1)).logpdf(tst_U)

        cov_diag = np.diag(self.Psi / (self.Psi + 1))
        mean = enr_U[0] * cov_diag
        logp_same_class = gaussian(mean, cov_diag + 1).logpdf(tst_U)

        llr = logp_same_class - logp_no_class

        return np.round(llr.reshape(1, -1), 5)
Exemplo n.º 10
0
def plot_gaussian(mu, Sigma, N=100, z_offset=-0.2, val=2):
    gauss = gaussian(mu, Sigma)
    x = np.linspace(mu[0] - val, mu[0] + val, N)
    y = np.linspace(mu[1] - val, mu[1] + val, N)
    x, y = np.meshgrid(x, y)
    pos = np.empty(x.shape + (2, ))
    pos[:, :, 0] = x
    pos[:, :, 1] = y
    z = gauss.pdf(pos)
    fig = plt.figure()
    ax = fig.gca(projection='3d')

    ax.plot_surface(x,
                    y,
                    z,
                    rstride=3,
                    cstride=3,
                    linewidth=1,
                    antialiased=True,
                    cmap=cm.rainbow)

    ax.contourf(x, y, z, zdir='z', offset=z_offset, cmap=cm.rainbow)

    max_z = np.max(z)
    ax.set_zlim(z_offset, max_z + 0.2 * max_z)
    ax.set_zticks(np.linspace(0, max_z + 0.2 * max_z, 5))
    ax.view_init(15, -21)

    plt.show()
Exemplo n.º 11
0
    def update(self, z):
        # TODO implement correction step

        Qt = self._Q
        standard_deviation = np.sqrt(Qt)

        observation = z[0]
        lm_id = z[1]

        expected_observation = np.array([
            get_observation(self.particles[i], lm_id)[0] for i in range(self.M)
        ])
        angle_deviations = np.array([
            wrap_angle(expected_observation[i] - observation)
            for i in range(self.M)
        ])

        weights = gaussian().pdf(angle_deviations / standard_deviation)
        weights = weights / np.sum(weights)  # normalization

        self.particles = self.particles[self.low_variance_sampling(weights)]
        self.X = self.particles

        gaussian_parameters = get_gaussian_statistics(self.particles)
        self._state.mu = gaussian_parameters.mu
        self._state.Sigma = gaussian_parameters.Sigma
Exemplo n.º 12
0
def gauss_prob(observed, mean, variance):
    """
    :param observed: an observed value
    :param mean: the mean of the distribution
    :param variance: the variance of the distribution
    :return: the probability of seeing the observation given the mean and variance of gaussian distribution
    """
    return gaussian(mean, math.sqrt(variance)).pdf(observed)
Exemplo n.º 13
0
 def draw(self, N):
     dt = np.dtype([(d, np.float) for d in self.dimensions])
     draws = np.zeros(N, dtype=dt)
     for d in self.dimensions:
         mu = self['{}_mean'.format(d)]
         sigma = self['{}_sigma'.format(d)]
         rv = gaussian(loc=mu, scale=sigma)
         draws[d] = rv.rvs(size=N)
     return draws
Exemplo n.º 14
0
def test_calc_logp_likelihood(teacher, test_args):
    np.random.seed(1234)

    v = test_args['v']

    # Posterior for the multivariate conjugate Gaussian.
    set_size = 2
    sets = np.asarray(list(teacher.generate_teaching_sets(v, set_size)))

    prior_cov_diag = teacher.prior_cov_diag
    posterior_cov_diag = prior_cov_diag / (set_size * prior_cov_diag + 1)

    posterior_means = teacher.data[sets].mean(axis=-2)
    posterior_means = set_size * posterior_cov_diag * posterior_means
    posterior_cov = np.diag(posterior_cov_diag)

    actual = teacher.calc_logp_likelihood(v, sets)

    for i, mean in enumerate(posterior_means):
        expected = gaussian(mean, posterior_cov).logpdf(v)

        assert actual[i] == expected

    # Test that code doesn't break when teaching set size is 1.
    set_size = 1
    sets = np.asarray(list(teacher.generate_teaching_sets(v, set_size)))

    prior_cov_diag = teacher.prior_cov_diag
    posterior_cov_diag = prior_cov_diag / (set_size * prior_cov_diag + 1)

    posterior_means = np.squeeze(teacher.data[sets])  # since set_size = 1.
    posterior_means = set_size * posterior_cov_diag * posterior_means
    posterior_cov = np.diag(posterior_cov_diag)

    actual = teacher.calc_logp_likelihood(v, sets)

    for i, mean in enumerate(posterior_means):
        expected = gaussian(mean, posterior_cov).logpdf(v)

        assert actual[i] == expected
Exemplo n.º 15
0
    def multi_sess(self,
                   enr_X,
                   tst_X,
                   weights=None,
                   n_enr=None,
                   cov_scaling=False,
                   cov_adapt=False):
        if enr_X.ndim == 1:
            enr_X = enr_X.reshape(1, -1)
        if tst_X.ndim == 1:
            tst_X = tst_X.reshape(1, -1)

        enr_U = transform_X_to_U(enr_X, self.inv_A, self.m)
        tst_U = transform_X_to_U(tst_X, self.inv_A, self.m)

        logp_no_class = gaussian(np.zeros(tst_U.shape[1]),
                                 np.diag(self.Psi + 1)).logpdf(tst_U)

        if n_enr is None:
            n_enr = len(enr_U)
        cov_diag = np.diag(self.Psi / (n_enr * self.Psi + 1))

        if weights is None:
            multi_mean = n_enr * cov_diag * enr_U.mean(0)
        else:
            enr_mean = (weights.reshape(-1, 1) * enr_U).sum(0) / weights.sum()
            multi_mean = n_enr * cov_diag * enr_mean

        if cov_scaling:
            cov_diag = n_enr * cov_diag

        if cov_adapt:
            enr_var = np.mean(np.square(enr_U - multi_mean), axis=0)
            cov_diag = cov_diag + enr_var

        logp_same_class = gaussian(multi_mean, cov_diag + 1).logpdf(tst_U)
        llr = logp_same_class - logp_no_class

        return np.round(llr.reshape(1, -1), 5)
Exemplo n.º 16
0
def prob_L_given_N(N, lsamples, mean, variance, N_critical=10):
    '''
    Redirector to prob_L_given_N_empirical or prob_L_given_N_CLT depending on ``N``.

    '''

    assert N > 0

    if N >= N_critical:
        return gaussian(N * mean, math.sqrt(N * variance)).pdf
    else:
        kde = prob_L_given_N_empirical(N, lsamples)
        return lambda L: kde(L)[0] if np.isscalar(L) else kde(L)
Exemplo n.º 17
0
    def calc_error(truth_dict):
        model = plda.Model(truth_dict['data'], truth_dict['labels'])

        Phi_b = truth_dict['Phi_b']
        prior_mean = truth_dict['prior_mean']
        dim = prior_mean.shape[0]

        random_vectors = np.random.randint(-100, 100, (10, dim))
        expected = gaussian(prior_mean, Phi_b).logpdf(random_vectors)

        latent_vectors = model.transform(random_vectors, 'D', 'U_model')
        predicted = model.calc_logp_prior(latent_vectors)

        error = calc_mean_squared_error(expected, predicted, as_log=True)
        print(error)

        return error
Exemplo n.º 18
0
def estimate_and_plot(s, path='coding_3a.png'):

    # Reference
    # https://docs.scipy.org/doc/scipy/reference/tutorial/optimize.html#broyden-fletcher-goldfarb-shanno-algorithm-method-bfgs
    # If we do not supply our gradient, it is taken by finite difference methods
    # order [mu, sigma]
    x0 = np.array([0, 1.0])
    param_Gaussian = minimize(MLE_Gaussian,
                              x0,
                              method='BFGS',
                              options={'disp': True})
    print(param_Gaussian.x)

    # order [k, mu, sigma]
    x0 = np.array([10, 0.0, 1.0])
    param_Student_t = minimize(MLE_Student_t,
                               x0,
                               method='BFGS',
                               options={'disp': True})
    print(param_Student_t.x)

    x = np.linspace(-5, 5, num_pts)
    plt.figure(figsize=(8, 6), dpi=DPI)
    # Plot points
    plt.hist(s, bins=np.arange(-10, 10 + binwidth, binwidth), density=True)
    # Plot Gaussian
    dist = gaussian(loc=param_Gaussian.x[0], scale=param_Gaussian.x[1])
    plt.plot(x, dist.pdf(x), lw=lw, c='blue', label='Gaussian')
    # Plot Student t
    dist = student_t(df=param_Student_t.x[0],
                     loc=param_Student_t.x[1],
                     scale=param_Student_t.x[2])
    plt.plot(x, dist.pdf(x), lw=lw, c='red', label='Student_t')

    plt.ylim(0, 0.55)
    plt.xlim(-10, 10)
    plt.xlabel('$x$')
    plt.ylabel(r'$p(x)$')
    plt.title(r'Estimated Gaussian and Students $t$ Distribution')
    plt.legend(loc="upper right")
    plt.grid(True)
    print("Saving to " + path)
    plt.savefig(path)
    plt.close()
Exemplo n.º 19
0
def default_options():
    """ Dictionary of options used in Girth.

    Args:
        max_iteration: [int] maximum number of iterations
            allowed during processing. (Default = 25)
        distribution: [callable] function that returns a pdf
            evaluated at quadrature points, p = f(theta).
            (Default = scipy.stats.norm(0, 1).pdf)
        quadrature_bounds: (lower, upper) bounds to limit
            numerical integration. Default = (-5, 5)
        quadrature_n: [int] number of quadrature points to use
                        Default = 61
"""
    return {
        "max_iteration": 25,
        "distribution": gaussian(0, 1).pdf,
        "quadrature_bounds": (-5, 5),
        "quadrature_n": 61
    }
Exemplo n.º 20
0
    def calc_logp_likelihood(self, v, teaching_sets):
        """ Recall that the likelihood is the LEARNER's posterior. """
        if len(teaching_sets.shape) == 1:
            teaching_sets = teaching_sets[:, None]
            set_size = 1

        else:
            set_size = teaching_sets.shape[-1]

            assert len(teaching_sets.shape) > 1

        Psi_model = self.prior_cov_diag

        posterior_cov_diag = Psi_model / (set_size * Psi_model + 1)

        posterior_means = self.data[teaching_sets].sum(axis=-2)
        posterior_means = posterior_means * posterior_cov_diag

        # Since N(mean | posterior_mean, cov) = N(posterior_mean | mean, cov).
        logpdf = gaussian(v, np.diag(posterior_cov_diag)).logpdf

        return logpdf(posterior_means)
Exemplo n.º 21
0
def extract_logpdfs_array(posterior_predictive_params):
    means = []
    cov_diags = []
    labels = []

    for k, k_dict in posterior_predictive_params.items():
        means.append(k_dict['mean'])
        cov_diags.append(k_dict['cov_diag'])
        labels.append(k)

    all_logpdfs = []

    for mean, cov_diag in zip(means, cov_diags):  # For each category.
        category_logpdfs = []

        for mu, var in zip(mean, cov_diag):  # For each dimension.
            dimension_logpdf = gaussian(mu, var).logpdf
            category_logpdfs.append(dimension_logpdf)

        all_logpdfs.append(category_logpdfs)

    return all_logpdfs, labels
Exemplo n.º 22
0
def test_calc_logp_mariginal_likelihood():
    np.random.seed(1234)

    n_k = 100
    K = 5
    dim = 10

    data_dictionary = generate_data(n_k, K, dim)
    X = data_dictionary['data']
    Y = data_dictionary['labels']
    model = plda.Model(X, Y)

    prior_mean = model.prior_params['mean']
    prior_cov_diag = model.prior_params['cov_diag']

    logpdf = gaussian(prior_mean, np.diag(prior_cov_diag + 1)).logpdf

    data = np.random.random((n_k, prior_mean.shape[-1]))
    expected_logps = logpdf(data)
    actual_logps = model.calc_logp_marginal_likelihood(data[:, None])

    assert_allclose(actual_logps, expected_logps)
Exemplo n.º 23
0
class TestPumpProbe(unittest.TestCase):
    pixel = np.arange(1, 100)
    pp_delays = np.linspace(-1, 10, 20)
    wavenumber = pixel[::-1]
    xx, yy = np.meshgrid(pixel, pp_delays, sparse=True)
    intensity = 100000 * gaussian(50, 30).pdf(xx) * gaussian(4, 4).pdf(yy) + 1
    baseline = np.ones_like(intensity)
    intensityE = (intensity - 0.9) * 0.1
    norm = 100000 * gaussian(55, 40).pdf(pixel)
    pp = pysfg.PumpProbe(intensity, baseline, norm, wavenumber, pp_delays,
                         2500, 80, 0.2, intensityE, pixel)
    intensity2 = 100000 * gaussian(52, 30).pdf(xx) * gaussian(4, 4).pdf(yy) + 1
    pp2 = pysfg.PumpProbe(intensity2, baseline, norm, wavenumber, pp_delays,
                          2500, 80, 0.2, intensityE, pixel)

    def test_basesubed(self):
        self.assertTrue(np.all(self.pp.basesubed == self.intensity - 1))

    def test_normalized(self):
        self.assertTrue(
            np.all(self.pp.normalized == (self.intensity - 1) / self.norm))

    def test_to_and_from_json(self):
        os.chdir(dir_path)
        self.pp.to_json(Path("pumpprobe.json"))
        ppp = pysfg.json_to_pumpprobe(Path("results/pumpprobe.json"))
        self.assertTrue(np.all(self.pp.normalized - ppp.normalized < 0.0001))

    def test_PumpProbe_baseline_0(self):
        baseline = None
        pp = pysfg.PumpProbe(self.intensity, baseline, self.norm,
                             self.wavenumber, self.pp_delays, 2500, 80, 0.2,
                             self.intensityE, self.pixel)
        self.assertEqual(pp.basesubed.mean(), 67.76760137069422)

    def test_PumpProbe_baselines(self):
        for baseline in (1, np.ones_like(self.pixel),
                         np.ones_like(self.intensity)):
            pp = pysfg.PumpProbe(self.intensity, baseline, self.norm,
                                 self.wavenumber, self.pp_delays, 2500, 80,
                                 0.2, self.intensityE, self.pixel)
            self.assertEqual(pp.basesubed.mean(), 66.76760137069422)

    def test_PumpProbe_norms_0(self):
        norm = None
        pp = pysfg.PumpProbe(self.intensity, self.baseline, norm,
                             self.wavenumber, self.pp_delays, 2500, 80, 0.2,
                             self.intensityE, self.pixel)
        self.assertEqual(pp.normalized.mean(), 66.76760137069422)

    def test_PumpProbe_norms_1(self):
        pp = pysfg.PumpProbe(self.intensity, self.baseline, self.norm,
                             self.wavenumber, self.pp_delays, 2500, 80, 0.2,
                             self.intensityE, self.pixel)
        self.assertEqual(pp.normalized.mean(), 0.0823400053566616)

    def test_PumpProbe_norms_2(self):
        norm = 2
        pp = pysfg.PumpProbe(self.intensity, self.baseline, norm,
                             self.wavenumber, self.pp_delays, 2500, 80, 0.2,
                             self.intensityE, self.pixel)
        self.assertEqual(pp.normalized.mean(), 33.38380068534711)

    def test_PumpProbe_norms_3(self):
        norm = 100000 * gaussian(55, 40).pdf(self.xx) * gaussian(2, 2).pdf(
            self.yy) + 1
        pp = pysfg.PumpProbe(self.intensity, self.baseline, norm,
                             self.wavenumber, self.pp_delays, 2500, 80, 0.2,
                             self.intensityE, self.pixel)
        self.assertEqual(pp.normalized.mean(), 7.731632631662137)

    def test_substration(self):
        bleach = self.pp - self.pp2
        self.assertEqual(bleach.normalized.mean(), 0.0004376967887611049)

    def test_deviation(self):
        bleach = self.pp / self.pp2
        self.assertEqual(bleach.normalized.mean(), 1.0042468628341095)
Exemplo n.º 24
0
Arquivo: fast.py Projeto: belya/cyber
import numpy as np

from scipy.sparse import random

from scipy import stats

import matplotlib.pyplot as plt

from sklearn.preprocessing import normalize

import scipy.sparse as sps

from tqdm import tqdm_notebook

normal_rvs = stats.gaussian(25, loc=10).rvs

# # Uniformly distributed

test_matrix = random(100, 100, density=1)

plt.hist(test_matrix.todense().reshape(-1).tolist())

# # Normalized by rows

test_matrix = random(3, 4, density=1)

# Rows - senders
#
# Columns - receivers
Exemplo n.º 25
0
def run_em(x, w, phi, mu, sigma):
    """Problem 3(d): EM Algorithm (unsupervised).

    See inline comments for instructions.

    Args:
        x: Design matrix of shape (m, n).
        w: Initial weight matrix of shape (m, k).
        phi: Initial mixture prior, of shape (k,).
        mu: Initial cluster means, list of k arrays of shape (n,).
        sigma: Initial cluster covariances, list of k arrays of shape (n, n).

    Returns:
        Updated weight matrix of shape (m, k) resulting from EM algorithm.
        More specifically, w[i, j] should contain the probability of
        example x^(i) belonging to the j-th Gaussian in the mixture.
    """
    # No need to change any of these parameters
    eps = 1e-3  # Convergence threshold
    max_iter = 1000

    # Stop when the absolute change in log-likelihood is < eps
    # See below for explanation of the convergence criterion
    it = 0
    ll = prev_ll = None
    while it < max_iter and (prev_ll is None or np.abs(ll - prev_ll) >= eps):
        # Just a placeholder for the starter code
        # *** START CODE HERE
        # (1) E-step: Update your estimates in w
        # (2) M-step: Update the model parameters phi, mu, and sigma
        # (3) Compute the log-likelihood of the data to check for convergence.
        # By log-likelihood, we mean `ll = sum_x[log(sum_z[p(x|z) * p(z)])]`.
        # We define convergence by the first iteration where abs(ll - prev_ll) < eps.
        # Hint: For debugging, recall part (a). We showed that ll should be monotonically increasing.
        prev_ll = ll

        from scipy.stats import multivariate_normal as gaussian
        # x (m, n), mu (n), sigma (n, n)
        # loop over k to update w
        m, n = x.shape
        k = len(mu)
        for j, (mu_j, sigma_j) in enumerate(zip(mu, sigma)):
            w[:, j] = gaussian(mu_j, sigma_j).pdf(x) * phi[j]

        # normalize
        w = w / w.sum(axis=1, keepdims=True)

        # update mu and sigma
        for j in range(k):
            mu[j] = w[:, j].dot(x) / w[:, j].sum()
            # (n, n)
            sigma[j] = (x - mu[j]).T.dot(np.diag(
                w[:, j])).dot(x - mu[j]) / w[:, j].sum()
        phi = w.sum(axis=0) / m

        # compute log likelihood
        p_x = np.zeros(m)
        for j in range(k):
            # (m,)
            p_x_given_z = gaussian(mu[j], sigma[j]).pdf(x)
            p_x += p_x_given_z * phi[j]

        ll = np.sum(np.log(p_x))
        it += 1
        print('Iter {}, Likelihood {}'.format(it, ll))

        # *** END CODE HERE ***

    return w
Exemplo n.º 26
0
def run_semi_supervised_em(x, x_tilde, z, w, phi, mu, sigma):
    """Problem 3(e): Semi-Supervised EM Algorithm.

    See inline comments for instructions.

    Args:
        x: Design matrix of unlabeled examples of shape (m, n).
        x_tilde: Design matrix of labeled examples of shape (m_tilde, n).
        z: Array of labels of shape (m_tilde, 1).
        w: Initial weight matrix of shape (m, k).
        phi: Initial mixture prior, of shape (k,).
        mu: Initial cluster means, list of k arrays of shape (n,).
        sigma: Initial cluster covariances, list of k arrays of shape (n, n).

    Returns:
        Updated weight matrix of shape (m, k) resulting from semi-supervised EM algorithm.
        More specifically, w[i, j] should contain the probability of
        example x^(i) belonging to the j-th Gaussian in the mixture.
    """
    # No need to change any of these parameters
    alpha = 20.  # Weight for the labeled examples
    eps = 1e-3  # Convergence threshold
    max_iter = 1000

    # Stop when the absolute change in log-likelihood is < eps
    # See below for explanation of the convergence criterion
    it = 0
    ll = prev_ll = None
    while it < max_iter and (prev_ll is None or np.abs(ll - prev_ll) >= eps):
        # Just a placeholder for the starter code
        # *** START CODE HERE ***
        # (1) E-step: Update your estimates in w
        # (2) M-step: Update the model parameters phi, mu, and sigma
        # (3) Compute the log-likelihood of the data to check for convergence.
        # Hint: Make sure to include alpha in your calculation of ll.
        # Hint: For debugging, recall part (a). We showed that ll should be monotonically increasing.
        prev_ll = ll

        from scipy.stats import multivariate_normal as gaussian
        # x (m, n), mu (n), sigma (n, n)
        # loop over k to update w
        m, n = x.shape
        m_tilde, n = x_tilde.shape
        k = len(mu)
        # w_tilde: (m_tilde, k) indicator
        w_tilde = np.zeros((m_tilde, k))
        for j, (mu_j, sigma_j) in enumerate(zip(mu, sigma)):
            w[:, j] = gaussian(mu_j, sigma_j).pdf(x) * phi[j]
            w_tilde[:, j] = (z == j).squeeze()
        # normalize
        w = w / w.sum(axis=1, keepdims=True)

        # update mu and sigma
        for j in range(k):
            mu[j] = (w[:, j].dot(x) + alpha * w_tilde[:, j].dot(x_tilde)) / (
                w[:, j].sum() + alpha * w_tilde[:, j].sum())
            # (n, n)
            sigma[j] = (
                ((x - mu[j]).T.dot(np.diag(w[:, j])).dot(x - mu[j]) + alpha *
                 (x_tilde - mu[j]).T.dot(np.diag(
                     w_tilde[:, j])).dot(x_tilde - mu[j])) /
                (w[:, j].sum() + alpha * w_tilde[:, j].sum()))
        phi = (w.sum(axis=0) + alpha * w_tilde.sum(axis=0))
        phi = phi / (m + alpha * m_tilde)

        # compute log likelihood
        p_x = np.zeros(m)
        for j in range(k):
            # (m,)
            p_x_given_z = gaussian(mu[j], sigma[j]).pdf(x)
            p_x += p_x_given_z * phi[j]
        p_x_z = np.zeros(m_tilde)
        for j in range(k):
            p_x_z += gaussian(mu[j], sigma[j]).pdf(x_tilde) * phi[j]

        ll = np.sum(np.log(p_x)) + alpha * np.sum(np.log(p_x_z))
        it += 1
        print('Iter {}, Likelihood {}'.format(it, ll))
        # *** END CODE HERE ***

    return w
Exemplo n.º 27
0
    def __init__(self, inarr, Nsamp, modeltype='gaussian'):
        super(clustermodel, self).__init__()
        self.inarr = inarr
        self.Nstars = len(self.inarr)
        self.starid = range(self.Nstars)
        self.Nsamp = Nsamp
        self.modeltype = modeltype

        # generate grid of samples for each star
        # self.starsamples = np.empty( (self.Nstars, self.Nsamp) )
        # for idx in self.starid:
        # 	self.starsamples[idx,:] = gaussian(
        # 		loc=self.inarr['Parallax'][idx],
        # 		scale=self.inarr['Parallax_Error'][idx]).rvs(size=self.Nsamp)
        """
		self.starsamples = np.array([{} for _ in range(self.Nstars)])
		for idx in self.starid:
			RAdist = gaussian(
				loc=self.inarr['RA'][idx], 
				scale=self.inarr['RA_Error'][idx]).rvs(size=self.Nsamp)
			Decdist = gaussian(
				loc=self.inarr['Dec'][idx], 
				scale=self.inarr['Dec_Error'][idx]).rvs(size=self.Nsamp)
			Distdist = 1000.0/gaussian(
				loc=self.inarr['Parallax'][idx], 
				scale=self.inarr['Parallax_Error'][idx]).rvs(size=self.Nsamp)

			Xarr = []
			Yarr = []
			Zarr = []
			for ra_i,dec_i,dist_i in zip(RAdist,Decdist,Distdist):
				c = SkyCoord(ra=ra_i*u.deg,dec=dec_i*u.deg,distance=dist_i*u.pc)
				Xarr.append(float(c.galactocentric.x.value))
				Yarr.append(float(c.galactocentric.y.value))
				Zarr.append(float(c.galactocentric.z.value))

			self.starsamples[idx] = ({
				'X':np.array(Xarr),
				'Y':np.array(Yarr),
				'Z':np.array(Zarr),
				})
		"""
        self.starsamples = np.empty((3, self.Nstars, self.Nsamp))
        for idx in range(self.Nstars):
            RAdist = gaussian(
                loc=self.inarr['RA'][idx],
                scale=self.inarr['RA_Error'][idx]).rvs(size=self.Nsamp)
            Decdist = gaussian(
                loc=self.inarr['Dec'][idx],
                scale=self.inarr['Dec_Error'][idx]).rvs(size=self.Nsamp)
            Distdist = 1000.0 / gaussian(
                loc=self.inarr['Parallax'][idx],
                scale=self.inarr['Parallax_Error'][idx]).rvs(size=self.Nsamp)

            for idd, dim in enumerate(['x', 'y', 'z']):
                c = SkyCoord(ra=RAdist * u.deg,
                             dec=Decdist * u.deg,
                             distance=Distdist * u.pc)
                if dim == 'x':
                    self.starsamples[idd, idx, :] = np.array(
                        c.galactocentric.x.value)
                elif dim == 'y':
                    self.starsamples[idd, idx, :] = np.array(
                        c.galactocentric.y.value)
                elif dim == 'z':
                    self.starsamples[idd, idx, :] = np.array(
                        c.galactocentric.z.value)
                else:
                    raise IOError
Exemplo n.º 28
0
# Define the distribution parameters to be plotted
k_values = [0.1, 1, 10, 100, 10E6]
color_values = ['r', 'magenta', 'k', 'g', 'c']

# Plot the student's t distributions
plt.figure(figsize=(8, 6), dpi=DPI)

for k, col in zip(k_values, color_values):
    # Reference
    # https://www.astroml.org/book_figures/chapter3/fig_student_t_distribution.html
    dist = student_t(df=k, loc=mu, scale=scale)
    label = r't ($\mathrm{\nu}=%.1f$)' % k
    plt.plot(x, dist.pdf(x), lw=lw, c=col, label=label)

# Finally plot Gaussian
dist = gaussian(loc=mu, scale=scale)
plt.plot(x, dist.pdf(x), lw=lw, c='blue', label='Gaussian')

plt.xlim(-xmax, xmax)
plt.ylim(0.0, 0.4)
plt.xlabel('$x$')
plt.ylabel(r'$p(x)$')
plt.title(r'Students $t$ and Gaussian Distribution')

plt.legend()
plt.grid(True)
path = 'coding_1.png'
print("Saving to " + path)
plt.savefig(path)

#==============================================================================
Exemplo n.º 29
0
    def _step(self, action):

        reward = 0.
        self.steps += 1

        # Move the robot
        error = self.np_random.normal(0, MOVERR)  # 0-mean error

        if action == 0:
            # Heading does not change
            self.loc += self.move(error)
        elif action == 1:
            # Rotate CCW and drive forward
            self.heading += (45. + error * 5)
            self.loc += self.move(error)
        elif action == 2:
            # Rotate CW and drive forward
            self.heading -= (20. + error * 5)
            self.loc += self.move(error)
        elif action == 3:
            # Rotate CCW
            self.heading += (20. + error * 5)
        elif action == 4:
            # Rotate CW
            self.heading -= (20. + error * 5)

        # Confine the robot to the world bounds
        self.loc = np.clip(self.loc, 0, self.bounds)

        # Get a radiation reading from the sources
        self.get_reading()

        # Update the particle filter
        self.PF.step(self.reading, np.atleast_2d(self.loc))
        # Return the heatmap of particles
        heatmap = self.PF.get_heatmap(subsampling_factor=self.map_sub)
        obs = self.append_map(heatmap)

        # Get reward
        reward += -0.3  # Cost of living

        # If there is a high probability of the source being at a location
        # make a prediction, see how far it is from the nearest source ( there can only be one per source??) and then get a score based on that
        prediction_made = True
        means, covariances = self.PF.fit_gaussian(self.num_sources)
        if means is not None:
            for covar in covariances:
                v = np.linalg.eigvals(covar)
                v = 2 * np.sqrt(2 * v)

                if np.max(v) > self.cov_thresh:
                    prediction_made = False
                    break

            # Make a prediction if all covariances are small, or we reached max_steps
            if prediction_made or self.steps > self.max_steps:
                # match each prediction with nearest source, without replacement
                pred_nn, source_nn = util.greedy_nearest_neighbor(
                    means, self.sources)
                # compute distance
                dist = np.linalg.norm(pred_nn - source_nn, keepdims=True)
                # reward for each source decays with distance from source as a gaussian
                reward += 100 * gaussian(
                    0, self.cov_thresh).pdf(dist) / self.num_sources
                # Stop running
                self.done = True

        obs = np.atleast_3d(obs)

        if type(reward).__module__ == np.__name__:
            reward = np.asscalar(reward)

        #obs = map? mean and xy location? (probably map since that will work best for A2C input)

        return obs, reward, self.done, {}