示例#1
0
    def testExp2d(self):
        trainSamples = np.loadtxt("exp_2d.csv").T
        # build parameter set
        dist = SGDEdist.byLearnerSGDEConfig(
            trainSamples,
            config={
                "grid_level": 7,
                "grid_type": "linear",
                "grid_maxDegree": 1,
                "refinement_numSteps": 0,
                "refinement_numPoints": 10,
                "solver_threshold": 1e-10,
                "solver_verbose": False,
                "regularization_type": "Laplace",
                "crossValidation_lambda": 0.000562341,
                "crossValidation_enable": False,
                "crossValidation_kfold": 5,
                "crossValidation_silent": False,
                "sgde_makePositive": True,
                "sgde_makePositive_candidateSearchAlgorithm": "joined",
                "sgde_makePositive_interpolationAlgorithm":
                "interpolateBoundaries1d",
                "sgde_unitIntegrand": True
            })

        fig, ax, _ = plotDensity3d(dist)
        ax.scatter(trainSamples[:, 0], trainSamples[:, 1],
                   np.zeros(trainSamples.shape[0]))
        ax.set_title("vol=%.12f" % dist.vol)
        fig.show()
        plt.show()
示例#2
0
    def test1DNormalDist(self):
        # prepare data
        U = dists.TNormal(0.5, .2, -1, 2)
        np.random.seed(1234567)
        trainSamples = np.array([U.rvs(1000)]).T
        testSamples = np.array([U.rvs(1000)]).T

        # build parameter set
        dist = SGDEdist.byLearnerSGDEConfig(
            trainSamples,
            config={
                "grid_level": 6,
                "grid_type": "modlinear",
                "grid_maxDegree": 3,
                "refinement_numSteps": 0,
                "refinement_numPoints": 10,
                "solver_threshold": 1e-10,
                "solver_verbose": True,
                "regularization_type": "Laplace",
                "crossValidation_enable": True,
                "crossValidation_kfold": 5,
                "crossValidation_silent": False,
                "sgde_makePositive": False,
                "sgde_makePositive_candidateSearchAlgorithm": "fullGrid",
                "sgde_makePositive_interpolationAlgorithm": "setToZero",
                "sgde_makePositive_verbose": True,
                "sgde_unitIntegrand": False
            },
            bounds=np.array([U.getBounds()]))

        fig = plt.figure()
        plotDensity1d(U, label="analytic")
        plotDensity1d(dist, label="sgde")
        plt.legend()
        #         plt.title("mean = %g ~ %g (err=%g), var = %g ~ %g (err=%g)" % (np.mean(trainSamples),
        #                                                                        dist.mean(),
        #                                                                        np.abs(np.mean(trainSamples) - dist.mean()) / np.mean(trainSamples),
        #                                                                        np.var(trainSamples),
        #                                                                        dist.var(),
        #                                                                        np.abs(np.var(trainSamples) - dist.var()) / np.var(trainSamples)
        #                                                                        ))

        print("1d: mean = %g ~ %g (err=%g)" %
              (np.mean(trainSamples), dist.mean(),
               (np.abs(np.mean(trainSamples) - dist.mean()) /
                np.mean(trainSamples))))
        print("1d: var = %g ~ %g (err=%g)" %
              (np.var(trainSamples), dist.var(),
               (np.abs(np.var(trainSamples) - dist.var()) /
                np.var(trainSamples))))
        print("KL = %g" % U.klDivergence(dist, testSamples, testSamples))
        print("CE = %g" % dist.crossEntropy(testSamples))
        print("MSE = %g" % dist.l2error(U, testSamples, testSamples))
        plt.show()
示例#3
0
    def test2DNormalDist(self):
        # prepare data
        U = dists.J(
            [dists.Normal(2.0, .5, -1, 4),
             dists.Normal(1.0, .5, -1, 3)])

        U = dists.J(
            [dists.Normal(0.5, .5, -1, 2),
             dists.Normal(0.5, .4, -1, 2)])

        np.random.seed(1234567)
        trainSamples = U.rvs(300)
        testSamples = U.rvs(1000)

        # build parameter set
        dist = SGDEdist.byLearnerSGDEConfig(
            trainSamples,
            config={
                "grid_level": 5,
                "grid_type": "modlinear",
                "refinement_numSteps": 0,
                "refinement_numPoints": 10,
                "regularization_type": "Laplace",
                "crossValidation_lambda": 0.000562341,
                "crossValidation_enable": False,
                "crossValidation_kfold": 5,
                "crossValidation_silent": False,
                "sgde_makePositive": False,
                "sgde_makePositive_candidateSearchAlgorithm": "joined",
                "sgde_makePositive_interpolationAlgorithm": "setToZero",
                "sgde_makePositive_generateConsistentGrid": False,
                "sgde_makePositive_verbose": True,
                "sgde_unitIntegrand": True
            },
            bounds=U.getBounds())
        fig = plt.figure()
        plotDensity2d(U)
        fig.show()

        fig = plt.figure()
        plotSG2d(dist.grid,
                 dist.alpha,
                 addContour=True,
                 show_negative=True,
                 show_grid_points=True)
        fig.show()

        print("2d: mean = %g ~ %g" % (U.mean(), dist.mean()))
        print("2d: var = %g ~ %g" % (U.var(), dist.var()))
        plt.show()
        print("KL = %g" % U.klDivergence(dist, testSamples, testSamples))
        print("CE = %g" % dist.crossEntropy(testSamples))
        print("MSE = %g" % dist.l2error(U, testSamples, testSamples))
示例#4
0
    def test2DPPF(self):
        # prepare data
        C = np.array([[0.1, 0.08], [0.08, 0.1]]) / 10.
        U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1)

        train_samples = U.rvs(1000)

        fig = plt.figure()
        plotDensity2d(U)
        plt.title('true density')
        fig.show()

        dist = SGDEdist.byLearnerSGDEConfig(train_samples,
                                            config={
                                                "grid_level": 5,
                                                "grid_type": "linear",
                                                "refinement_numSteps": 0,
                                                "refinement_numPoints": 10,
                                                "regularization_type":
                                                "Laplace",
                                                "crossValidation_lambda":
                                                0.000562341,
                                                "crossValidation_enable":
                                                False,
                                                "crossValidation_kfold": 5,
                                                "crossValidation_silent": True
                                            },
                                            bounds=U.getBounds())
        fig = plt.figure()
        plotDensity2d(dist)
        plt.title('estimated SGDE density')
        fig.show()

        samples = dists.J([dists.Uniform(0, 1), dists.Uniform(0, 1)]).rvs(1000)

        fig = plt.figure()
        plt.plot(samples[:, 0], samples[:, 1], "o ")
        plt.title('uniformly drawn samples')
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        fig.show()

        transformed_samples = dist.ppf(samples)

        fig = plt.figure()
        plt.plot(transformed_samples[:, 0], transformed_samples[:, 1], "o ")
        plt.title('transformed samples')
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        fig.show()
        plt.show()
示例#5
0
    def test1DCDFandPPF(self):
        # prepare data
        U = Normal(0.5, 0.1, 0, 1)
        train_samples = U.rvs(1000).reshape(1000, 1)

        dist = SGDEdist.byLearnerSGDEConfig(train_samples,
                                            config={
                                                "grid_level": 5,
                                                "grid_type": "poly",
                                                "refinement_numSteps": 0,
                                                "refinement_numPoints": 10,
                                                "regularization_type":
                                                "Laplace",
                                                "crossValidation_lambda":
                                                0.000562341,
                                                "crossValidation_enable":
                                                False,
                                                "crossValidation_kfold": 5,
                                                "crossValidation_silent": True
                                            },
                                            bounds=U.getBounds())

        fig = plt.figure()
        plt.hist(train_samples, bins=10, normed=True)
        plotDensity1d(U)
        plotDensity1d(dist)
        plt.title("original space")
        fig.show()

        transformed_samples = dist.cdf(train_samples)

        fig = plt.figure()
        plt.hist(transformed_samples, bins=10, normed=True)
        plt.title("uniform space")
        fig.show()

        transformed_samples = dist.ppf(transformed_samples)

        fig = plt.figure()
        plt.hist(transformed_samples, bins=10, normed=True)
        plotDensity1d(U)
        plotDensity1d(dist)
        plt.title("original space")
        fig.show()
        plt.show()
示例#6
0
    def testExpPoly2d(self):
        trainSamples = np.loadtxt("exp_2d.csv").T
        # build parameter set
        dist_sgde = SGDEdist.byLearnerSGDEConfig(
            trainSamples,
            config={
                "grid_level": 4,
                "grid_type": "modpoly",
                "grid_maxDegree": 6,
                "refinement_numSteps": 0,
                "refinement_numPoints": 10,
                "solver_threshold": 1e-10,
                "solver_verbose": True,
                "regularization_type": "Laplace",
                "crossValidation_lambda": 0.000562341,
                "crossValidation_enable": False,
                "crossValidation_kfold": 5,
                "crossValidation_silent": True,
                "sgde_makePositive": False,
                "sgde_makePositive_candidateSearchAlgorithm": "joined",
                "sgde_makePositive_interpolationAlgorithm": "setToZero",
                "sgde_makePositive_verbose": True,
                "sgde_unitIntegrand": True
            })

        # build parameter set
        dist_kde = dists.KDEDist(
            trainSamples,
            kernelType=KernelType_GAUSSIAN,
            bandwidthOptimizationType=BandwidthOptimizationType_SILVERMANSRULE)

        # fig = plt.figure()
        # plotSG2d(dist.grid, dist.alpha, show_grid_points=True)
        # plt.scatter(trainSamples[:, 0], trainSamples[:, 1], np.zeros(trainSamples.shape[0]))
        # plt.title("%.12f" % dist.vol)

        fig, _, _ = plotDensity3d(dist_sgde)
        plt.title("SGDE: vol=%g" % dist_sgde.vol)

        fig, _, _ = plotDensity3d(dist_kde)
        plt.title("KDE: vol=1.0")
        plt.show()
示例#7
0
def estimateSGDEDensity(functionName,
                        trainSamples,
                        testSamples=None,
                        bounds=None,
                        iteration=0,
                        plot=False,
                        out=True,
                        label="sgde_zero",
                        candidates="intersections",
                        interpolation="setToZero"):
    print("train: %i x %i (mean=%g, var=%g)" %
          (trainSamples.shape[0], trainSamples.shape[1], np.mean(trainSamples),
           np.var(trainSamples)))
    if testSamples is not None:
        print("test : %i x %i (mean=%g, var=%g)" %
              (testSamples.shape[0], testSamples.shape[1],
               np.mean(testSamples), np.var(testSamples)))

    candidateSearchAlgorithm = strToCandidateSearchAlgorithm(candidates)
    interpolationAlgorithm = strToInterpolationAlgorithm(interpolation)

    results = {}
    crossEntropies = {}
    config = {
        "grid_level": 1,
        "grid_type": "linear",
        "grid_maxDegree": 1,
        "refinement_numSteps": 0,
        "refinement_numPoints": 3,
        "solver_threshold": 1e-10,
        "solver_verbose": False,
        "regularization_type": "Laplace",
        "crossValidation_enable": True,
        "crossValidation_kfold": 5,
        "crossValidation_silent": True,
        "sgde_makePositive": False
    }

    pathResults = os.path.join("data", label)
    key = 1
    bestCV = float("Inf")
    bestDist = None

    # stats
    stats = {
        'config': {
            'functionName': functionName,
            'numDims': 2,
            'adaptive': True,
            'refnums': 0,
            'consistentGrid': True,
            'candidateSearchAlgorithm': candidates,
            'interpolationAlgorithm': interpolation,
            'maxNumGridPoints': 0,
            'iteration': iteration
        },
        'trainSamples': trainSamples,
        'testSamples': testSamples
    }

    for level in range(2, 7):
        print("-" * 60)
        print("l=%i" % level)
        for refinementSteps in range(0, 5):
            config["grid_level"] = level
            config["refinement_numSteps"] = refinementSteps
            sgdeDist = SGDEdist.byLearnerSGDEConfig(trainSamples,
                                                    config=config,
                                                    bounds=bounds)
            # -----------------------------------------------------------
            grid, alpha = sgdeDist.grid, sgdeDist.alpha
            cvSgde = sgdeDist.crossEntropy(testSamples)

            maxLevel = grid.getStorage().getMaxLevel()
            numDims = grid.getStorage().getDimension()

            print("  " + "-" * 30)
            print("  #ref = %i: gs=%i -> CV test = %g" %
                  (refinementSteps, sgdeDist.grid.getSize(), cvSgde))
            # -----------------------------------------------------------
            # make it positive
            positiveGrid = grid.clone()
            positiveAlpha_vec = DataVector(alpha)
            opPositive = createOperationMakePositive(candidateSearchAlgorithm,
                                                     interpolationAlgorithm,
                                                     True, False)
            opPositive.makePositive(positiveGrid, positiveAlpha_vec, True)

            # scale to unit integrand
            positiveAlpha = positiveAlpha_vec.array()
            positiveSgdeDist = SGDEdist(positiveGrid,
                                        positiveAlpha,
                                        trainSamples,
                                        bounds=bounds)
            # -----------------------------------------------------------
            cvPositiveSgde = positiveSgdeDist.crossEntropy(testSamples)

            if plot and numDims == 2:
                fig = plt.figure()
                plotSG2d(grid,
                         alpha,
                         show_negative=True,
                         show_grid_points=True)
                plt.title("pos: N=%i: vol=%g, log=%g" %
                          (positiveGrid.getSize(),
                           doQuadrature(positiveGrid,
                                        positiveAlpha), cvPositiveSgde))
                plt.tight_layout()
                if out:
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_density_pos_i%i_l%i_r%i.jpg" %
                            (label, iteration, level, refinementSteps)))
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_density_pos_i%i_l%i_r%i.pdf" %
                            (label, iteration, level, refinementSteps)))
                else:
                    plt.close(fig)

            # -----------------------------------------------------------
            print("  positive: gs=%i -> CV test = %g" %
                  (positiveGrid.getSize(), cvPositiveSgde))
            # -----------------------------------------------------------
            # select the best density available based on the given criterion
            results[key] = {'config': config, 'dist': positiveSgdeDist}
            crossEntropies[key] = cvPositiveSgde
            key += 1
            candidateSearch = opPositive.getCandidateSetAlgorithm()

            if cvPositiveSgde < bestCV:
                bestCV = cvPositiveSgde
                bestDist = positiveSgdeDist
                numComparisons = candidateSearch.costsComputingCandidates()

                # update the stats -> just for the current best one
                # write the stats of the current best results to the stats dict
                C = np.ndarray(numDims - 1, dtype="int")
                M = np.sum([1 for i in range(len(alpha)) if alpha[i] < 0])
                for d in range(2, numDims + 1):
                    C[d - 2] = binom(M, d)

                stats['config']['refnums'] = refinementSteps
                stats['config']['adaptive'] = refinementSteps > 0
                stats['negSGDE_json'] = sgdeDist.toJson()
                stats['posSGDE_json'] = positiveSgdeDist.toJson()
                stats['level'] = level
                stats['maxLevel'] = maxLevel
                stats['fullGridSize'] = (2**maxLevel - 1)**numDims
                stats['sparseGridSize'] = grid.getSize()
                stats['discretizedGridSize'] = positiveGrid.getSize()
                stats['crossEntropyTrainZeroSGDE'] = sgdeDist.crossEntropy(
                    trainSamples)
                stats[
                    'crossEntropyTrainDiscretizedSGDE'] = positiveSgdeDist.crossEntropy(
                        trainSamples)
                stats['crossEntropyTestZeroSGDE'] = cvSgde
                stats['crossEntropyTestDiscretizedSGDE'] = cvPositiveSgde
                stats['numCandidates'] = int(candidateSearch.numCandidates())
                stats['numCandidatesPerLevel'] = np.array(
                    candidateSearch.numCandidatesPerLevel().array(),
                    dtype="int")
                stats['numCandidatesPerIteration'] = np.array(
                    candidateSearch.numCandidatesPerIteration().array(),
                    dtype="int")
                stats[
                    'costsCandidateSearch'] = candidateSearch.costsComputingCandidates(
                    )
                stats['costsCandidateSearchBinomial'] = int(C.sum())
                stats['costsCandidateSearchPerIteration'] = np.array(
                    candidateSearch.costsComputingCandidatesPerIteration(
                    ).array(),
                    dtype="int")
                stats['costsCandidateSearchPerIterationBinomial'] = C

                if plot and numDims == 2:
                    fig = plt.figure()
                    plotSG2d(
                        positiveGrid,
                        positiveAlpha,
                        show_negative=True,
                        show_grid_points=False,
                        colorbarLabel=
                        r"$f_{\mathcal{I}^\text{SG} \cup \mathcal{I}^\text{ext}}$"
                    )
                    plt.title(r"positive: $N=%i/%i$; \# comparisons$=%i$" %
                              (positiveGrid.getSize(),
                               (2**maxLevel - 1)**numDims, numComparisons))
                    plt.xlabel(r"$\xi_1$")
                    plt.ylabel(r"$\xi_2$")
                    #                     plt.title(r"N=%i $\rightarrow$ %i: log=%g $\rightarrow$ %g" % (sgdeDist.grid.getSize(),
                    #                                                                                    positiveSgdeDist.grid.getSize(),
                    #                                                                                    cvSgde,
                    #                                                                                    cvPositiveSgde))
                    plt.tight_layout()
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_pos_i%i_l%i_r%i.jpg" %
                            (label, iteration, level, refinementSteps)))
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_pos_i%i_l%i_r%i.pdf" %
                            (label, iteration, level, refinementSteps)))
                    if out:
                        plt.close(fig)

                    fig, ax, _ = plotSG3d(positiveGrid, positiveAlpha)
                    ax.set_zlabel(
                        r"$f_{\mathcal{I}^{\text{SG}} \cup \mathcal{I}^\text{ext}}(\xi_1, \xi_2)$",
                        fontsize=20)
                    ax.set_xlabel(r"$\xi_1$", fontsize=20)
                    ax.set_ylabel(r"$\xi_2$", fontsize=20)

                    plt.tight_layout()
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_pos_i%i_l%i_r%i_3d.jpg" %
                            (label, iteration, level, refinementSteps)))
                    plt.savefig(
                        os.path.join(
                            pathResults, "%s_pos_i%i_l%i_r%i_3d.pdf" %
                            (label, iteration, level, refinementSteps)))
                    if out:
                        plt.close(fig)

            if plot and numDims == 2 and not out:
                plt.show()

    if out:
        # save stats
        filename = os.path.join(
            "data", label, "stats_d%i_a%i_r%i_i%i_%s_%s.pkl" %
            (numDims, 1, refinementSteps, iteration, candidates,
             interpolation))
        fd = open(filename, "w")
        pkl.dump(stats, fd)
        fd.close()
        print("stats saved to -> '%s'" % filename)

        # dictionary that stores the information on the estimated densities
        myjson = {
            "Grid": {
                "dimNames": ["phi", "log(K_A)"],
                "matrixEntries": ["phi", "log(K_A)"]
            },
            "Set": {
                "path": "",
                "grids": [],
                "alphas": [],
                "paramValues": [],
                "paramName": "grid_size"
            }
        }

        for key, result in list(results.items()):
            config = result['config']
            dist = result['dist']
            # serialize grid and coefficients
            out = "sgde.i%i.k%i.N%i" % (iteration, key, dist.grid.getSize())
            out_grid = os.path.join(pathResults, "%s.grid" % out)
            out_alpha = os.path.join(pathResults, "%s.alpha.arff" % out)
            writeGrid(out_grid, dist.grid)
            writeAlphaARFF(out_alpha, dist.alpha)

            # collect information for json
            myjson["Set"]["grids"].append(os.path.abspath(out_grid))
            myjson["Set"]["alphas"].append(os.path.abspath(out_alpha))
            myjson["Set"]["paramValues"].append(crossEntropies[key])
            # -----------------------------------------------------------
            # serialize the config
            out_config = os.path.join(pathResults,
                                      "sgde.i%i.k%i.config" % (iteration, key))
            fd = open(out_config, "w")
            json.dump(config, fd, ensure_ascii=True, indent=True)
            fd.close()

            crossEntropies[key] = (crossEntropies[key], out_grid, out_alpha,
                                   out_config)

        # sort the results in myjson according to the cross entropy
        ixs = np.argsort(myjson["Set"]["paramValues"])
        myjson["Set"]["grids"] = [myjson["Set"]["grids"][ix] for ix in ixs]
        myjson["Set"]["alphas"] = [myjson["Set"]["alphas"][ix] for ix in ixs]
        myjson["Set"]["paramValues"] = [
            myjson["Set"]["paramValues"][ix] for ix in ixs
        ]

        # serialize myjson
        out_config = os.path.join(pathResults,
                                  "sgde_visualization.i%i.config" % iteration)
        fd = open(out_config, "w")
        json.dump(myjson, fd, ensure_ascii=True, indent=True)
        fd.close()

        # serialize cross entropies
        out_crossEntropies = os.path.join(
            pathResults, "sgde_cross_entropies.i%i.csv" % iteration)
        fd = open(out_crossEntropies, 'wb')
        file_writer = csv.writer(fd)
        file_writer.writerow(["crossEntropy", "grid", "alpha", "sgdeConfig"])
        for out in list(crossEntropies.values()):
            file_writer.writerow(out)
        fd.close()

        # serialize samples
        np.savetxt(
            os.path.join(pathResults,
                         "sgde_train_samples.i%i.csv" % iteration),
            trainSamples)
        np.savetxt(
            os.path.join(pathResults, "sgde_test_samples.i%i.csv" % iteration),
            testSamples)

        # serialize best configuration to json
        out_bestDist = os.path.join(pathResults,
                                    "sgde_best_config.i%i.json" % iteration)
        text = bestDist.toJson()
        fd = open(out_bestDist, "w")
        fd.write(text)
        fd.close()

    return bestDist, stats
示例#8
0
    def test2DCovarianceMatrix(self):
        # prepare data
        np.random.seed(1234567)
        C = np.array([[0.3, 0.09], [0.09, 0.3]]) / 10.

        U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1)
        samples = U.rvs(2000)
        kde = KDEDist(samples)

        sgde = SGDEdist.byLearnerSGDEConfig(
            samples,
            bounds=U.getBounds(),
            config={
                "grid_level": 5,
                "grid_type": "linear",
                "grid_maxDegree": 1,
                "refinement_numSteps": 0,
                "refinement_numPoints": 10,
                "solver_threshold": 1e-10,
                "solver_verbose": False,
                "regularization_type": "Laplace",
                "crossValidation_lambda": 3.16228e-06,
                "crossValidation_enable": False,
                "crossValidation_kfold": 5,
                "crossValidation_silent": False,
                "sgde_makePositive": True,
                "sgde_makePositive_candidateSearchAlgorithm": "joined",
                "sgde_makePositive_interpolationAlgorithm": "setToZero",
                "sgde_generateConsistentGrid": True,
                "sgde_unitIntegrand": True
            })

        sgde_x1 = sgde.marginalizeToDimX(0)
        sgde_x2 = sgde.marginalizeToDimX(1)

        plt.figure()
        plotDensity1d(sgde_x1, label="x1")
        plotDensity1d(sgde_x2, label="x2")
        plt.title(
            "mean: x1=%g, x2=%g; var: x1=%g, x2=%g" %
            (sgde_x1.mean(), sgde_x2.mean(), sgde_x1.var(), sgde_x2.var()))
        plt.legend()

        jsonStr = sgde.toJson()
        jsonObject = json.loads(jsonStr)
        sgde = Dist.fromJson(jsonObject)

        fig = plt.figure()
        plotDensity2d(U, addContour=True)
        plt.title("analytic")

        fig = plt.figure()
        plotDensity2d(kde, addContour=True)
        plt.title("kde")

        fig = plt.figure()
        plotDensity2d(sgde, addContour=True)
        plt.title("sgde (I(f) = %g)" % (doQuadrature(sgde.grid, sgde.alpha), ))

        # print the results
        print("E(x) ~ %g ~ %g" % (kde.mean(), sgde.mean()))
        print("V(x) ~ %g ~ %g" % (kde.var(), sgde.var()))
        print("-" * 60)

        print(kde.cov())
        print(sgde.cov())

        self.assertTrue(np.linalg.norm(C - kde.cov()) < 1e-2, "KDE cov wrong")
        self.assertTrue(
            np.linalg.norm(np.corrcoef(samples.T) - kde.corrcoeff()) < 1e-1,
            "KDE corrcoef wrong")
        plt.show()
示例#9
0
    def test2DCDFandPPF(self, plot=True):
        # prepare data
        C = np.array([[0.1, 0.08], [0.08, 0.1]]) / 10.
        U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1)
        train_samples = U.rvs(1000)

        if plot:
            fig = plt.figure()
            plotDensity2d(U)
            plt.title('true density')
            fig.show()

        dist = SGDEdist.byLearnerSGDEConfig(train_samples,
                                            config={
                                                "grid_level": 5,
                                                "grid_type":
                                                "polyClenshawCurtis",
                                                "refinement_numSteps": 0,
                                                "refinement_numPoints": 10,
                                                "regularization_type":
                                                "Laplace",
                                                "crossValidation_lambda":
                                                0.000562341,
                                                "crossValidation_enable":
                                                False,
                                                "crossValidation_kfold": 5,
                                                "crossValidation_silent": True,
                                                "sgde_makePositive": False
                                            },
                                            bounds=U.getBounds())

        if plot:
            fig = plt.figure()
            plotDensity2d(dist)
            plt.title('estimated SGDE density')
            fig.show()

        samples = dists.J([dists.Uniform(0, 1), dists.Uniform(0, 1)]).rvs(500)

        if plot:
            fig = plt.figure()
            plt.plot(samples[:, 0], samples[:, 1], "o ")
            plt.title('u space')
            plt.xlim(0, 1)
            plt.ylim(0, 1)
            fig.show()
        else:
            print("-" * 80)
            print(samples)

        transformed_samples = dist.ppf(samples, shuffle=False)

        if plot:
            fig = plt.figure()
            plt.plot(transformed_samples[:, 0], transformed_samples[:, 1],
                     "o ")
            plt.title('x space (transformed)')
            plt.xlim(0, 1)
            plt.ylim(0, 1)
            fig.show()
        else:
            print("-" * 80)
            print(transformed_samples)

        samples = dist.cdf(transformed_samples, shuffle=False)

        if plot:
            fig = plt.figure()
            plt.plot(samples[:, 0], samples[:, 1], "o ")
            plt.title('u space (transformed)')
            plt.xlim(0, 1)
            plt.ylim(0, 1)
            fig.show()

            plt.show()
        else:
            print("-" * 80)
            print(samples)
示例#10
0
    def test2DNormalMoments(self):
        mean = 0
        var = 0.5

        U = dists.J(
            [dists.Normal(mean, var, -2, 2),
             dists.Normal(mean, var, -2, 2)])

        np.random.seed(1234567)
        trainSamples = U.rvs(1000)
        dist = SGDEdist.byLearnerSGDEConfig(trainSamples,
                                            config={
                                                "grid_level": 5,
                                                "grid_type": "linear",
                                                "refinement_numSteps": 0,
                                                "refinement_numPoints": 10,
                                                "regularization_type":
                                                "Laplace",
                                                "crossValidation_lambda":
                                                0.000562341,
                                                "crossValidation_enable":
                                                False,
                                                "crossValidation_kfold": 5,
                                                "crossValidation_silent": True,
                                                "sgde_makePositive": True
                                            },
                                            bounds=U.getBounds())
        samples_dist = dist.rvs(1000, shuffle=True)
        kde = KDEDist(trainSamples)
        samples_kde = kde.rvs(1000, shuffle=True)
        # -----------------------------------------------
        self.assertTrue(
            np.abs(U.mean() - dist.mean()) < 1e-2, "SGDE mean wrong")
        self.assertTrue(
            np.abs(U.var() - dist.var()) < 4e-2, "SGDE variance wrong")
        # -----------------------------------------------

        # print the results
        print("E(x) ~ %g ~ %g" % (kde.mean(), dist.mean()))
        print("V(x) ~ %g ~ %g" % (kde.var(), dist.var()))
        print(
            "log  ~ %g ~ %g" %
            (kde.crossEntropy(trainSamples), dist.crossEntropy(trainSamples)))
        print("-" * 60)

        print(dist.cov())
        print(kde.cov())

        sgde_x1 = dist.marginalizeToDimX(0)
        kde_x1 = kde.marginalizeToDimX(0)

        plt.figure()
        plotDensity1d(U.getDistributions()[0], label="analytic")
        plotDensity1d(sgde_x1, label="sgde")
        plotDensity1d(kde_x1, label="kde")
        plt.title("mean: sgde=%g, kde=%g; var: sgde=%g, kde=%g" %
                  (sgde_x1.mean(), kde_x1.mean(), sgde_x1.var(), kde_x1.var()))
        plt.legend()

        fig = plt.figure()
        plotDensity2d(U, addContour=True)
        plt.title("analytic")

        fig = plt.figure()
        plotDensity2d(kde, addContour=True)
        plt.scatter(samples_kde[:, 0], samples_kde[:, 1])
        plt.title("kde")

        fig = plt.figure()
        plotDensity2d(dist, addContour=True)
        plt.scatter(samples_dist[:, 0], samples_dist[:, 1])
        plt.title(
            "sgde (I(f) = %g)" %
            (np.prod(U.getBounds()) * doQuadrature(dist.grid, dist.alpha), ))

        plt.show()
示例#11
0
# -------------------- prepare data
C = np.array([[0.1, 0.08],
              [0.08, 0.1]]) / 10.
m = np.array([0.5, 0.5])
U = MultivariateNormal(m, C, 0, 1)

np.random.seed(12345)
samples = U.rvs(1000)
testSamples = U.rvs(1000)
# ---------- using SGDE from SG++ ------------------------
dist = SGDEdist.byLearnerSGDEConfig(samples,
                                    config={"grid_level": 6,
                                            "grid_type": "Linear",
                                            "refinement_numSteps": 0,
                                            "refinement_numPoints": 3,
                                            "regularization_type": "Laplace",
                                            "crossValidation_lambda": 0.000562341,
                                            "crossValidation_enable": False,
                                            "crossValidation_kfold": 5,
                                            "crossValidation_silent": False},
                                    bounds=U.getBounds())

fig, ax = plotDensity3d(U)
ax.set_title("true density")
fig.show()
fig, ax, _ = plotSG3d(dist.grid, dist.alpha)
ax.set_title("estimated density")
fig.show()

print("mean = %g ~ %g" % (m.prod(), dist.mean()))
print("var = %g ~ %g" % (np.var(testSamples), dist.var()))
示例#12
0
def test_sgdeLaplace():
    l2_samples = 10000
    # sample_range = np.arange(10, 500, 50)
    sample_range = [10, 20, 50, 100, 200, 500]
    points = {}
    grids = ["linear",
             "modlinear", # keine OperationQuadrature
             "poly",
             "modpoly",
             "polyBoundary",
             "polyClenshawCurtis",
             "modPolyClenshawCurtis",
             "polyClenshawCurtisBoundary",
             "bsplineClenshawCurtis",
             "modBsplineClenshawCurtis" # keine OperationMultipleEval
    ]

    U = dists.J([dists.Lognormal.by_alpha(0.5, 0.1, 0.001),
                 dists.Lognormal.by_alpha(0.5, 0.1, 0.001)])
    l2_errors = {}
    for grid in grids:
        l2_errors[grid] = []
        points[grid] = []

    l2_errors["kde"] = []
    samples = 1000
    for samples in sample_range:
    # for lvl in range(5, 6):
        trainSamples = U.rvs(samples)
        # testSamples = U.rvs(l2_samples)
        for grid_name in grids:
            # build parameter set
            print("--------------------Samples: {} Grid: {}--------------------".format(samples, grid_name))
            dist_sgde = SGDEdist.byLearnerSGDEConfig(trainSamples,
                                                     bounds=U.getBounds(),
                                                     unitIntegrand=True,
                                                     config={"grid_level": 1,
                                                             "grid_type": grid_name,
                                                             "grid_maxDegree": 6,
                                                             "refinement_numSteps": 0,
                                                             "refinement_numPoints": 10,
                                                             "solver_threshold": 1e-10,
                                                             "solver_verbose": False,
                                                             "regularization_type": "Laplace",
                                                             "crossValidation_lambda": 1e-6,
                                                             "crossValidation_enable": True,
                                                             "crossValidation_kfold": 4,
                                                             "crossValidation_lambdaSteps": 10,
                                                             "crossValidation_silent": False})
            points[grid_name].append(dist_sgde.grid.getSize())
            # l2_errors[grid_name].append(dist_sgde.l2error(U, testSamplesUnit=testSamples))
            l2_errors[grid_name].append(dist_sgde.l2error(U, n=l2_samples))
            # plt.figure()
            # plotDensity2d(U, levels=(10, 20, 40, 50, 60))
            # plt.figure()
            # plotDensity2d(dist_sgde, levels=(10, 20, 40, 50, 60))
            # plt.show()

        dist_kde = dists.KDEDist(trainSamples,
                                 kernelType=KernelType_GAUSSIAN,
                                 bandwidthOptimizationType=BandwidthOptimizationType_SILVERMANSRULE)
        l2_errors["kde"].append(dist_kde.l2error(U, testSamplesUnit=testSamples))

    for grid_name in grids:
        plt.plot(sample_range, l2_errors[grid_name], label=grid_name)
        # plt.plot(points[grid], l2_errors[grid_name],".-", label=grid_name)

    plt.plot(sample_range, l2_errors["kde"], label="KDE")

    # plt.plot([x for x in range(1,300, 100)], [l2_errors["kde"][0] for i in range(1,4)], label="KDE")

    plt.xlabel("# Gitterpunkte")
    plt.ylabel("L2-Fehler")
    plt.yscale("log")
    plt.legend()
    plt.show()