def uniform_hyperrectangle_user(data, domain, center_pts_per_edge=1): r""" Creates a simple funciton appoximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho{\mathcal{D}, M}` is a uniform probablity density over the hyperrectangular domain specified by domain. Since :math:`\rho_\mathcal{D}` is a uniform distribution on a hyperrectangle we should we able to represent it exactly with :math:`M=3^{m}` where m is the dimension of the data space or rather ``len(d_distr_samples) == 3**mdim``. :param data: Array containing QoI data where the QoI is mdim diminsional :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param domain: The domain overwhich :math:`\rho_\mathcal{D}` is uniform. :type domain: :class:`numpy.ndarray` of shape (2, mdim) :param list() center_pts_per_edge: number of center points per edge and additional two points will be added to create the bounding layer :rtype: tuple :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for d_distr_samples """ # make sure the shape of the data and the domain are correct data = util.fix_dimensions_data(data) domain = util.fix_dimensions_data(domain, data.shape[1]) domain_center = np.mean(domain, 0) domain_lengths = np.max(domain, 0) - np.min(domain, 0) return uniform_hyperrectangle_binsize(data, domain_center, domain_lengths, center_pts_per_edge)
def simple_fun_uniform(points, volumes, rect_domain): """ Given a set of points, the volumes associated with these points, and ``rect_domain`` creates a simple function approximation of a uniform distribution over the hyperrectangle defined by ``rect_domain``. :param points: points used to define the voronoi tesselation (only the points that define regions of finite volumes) :type points: :class:`numpy.ndarrray` of shape (num_points, mdim) :param list() volumes: finite volumes associated with ``points`` :type points: :class:`numpy.ndarray` of shape (num_points,) :param rect_domain: minima and maxima of each dimension defining the hyperrectangle of uniform probability :type rect_domain: :class:`numpy.ndarray` of shape (mdim, 2) :rtype: tuple :returns: (rho_D_M, points, d_Tree) where ``rho_D_M`` and ``points`` are (mdim, M) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for points """ util.fix_dimensions_data(points) inside = np.logical_and( np.all(np.greater_equal(points, rect_domain[:, 0]), axis=1), np.all(np.less_equal(points, rect_domain[:, 1]), axis=1)) rho_D_M = np.zeros(volumes.shape) # normalize on Lambda not D rho_D_M[inside] = volumes[inside] / np.sum(volumes[inside]) d_Tree = spatial.KDTree(points) return (rho_D_M, points, d_Tree)
def simple_fun_uniform(points, volumes, rect_domain): """ Given a set of points, the volumes associated with these points, and ``rect_domain`` creates a simple function approximation of a uniform distribution over the hyperrectangle defined by ``rect_domain``. :param points: points used to define the voronoi tesselation (only the points that define regions of finite volumes) :type points: :class:`numpy.ndarrray` of shape (num_points, mdim) :param list() volumes: finite volumes associated with ``points`` :type points: :class:`numpy.ndarray` of shape (num_points,) :param rect_domain: minima and maxima of each dimension defining the hyperrectangle of uniform probability :type rect_domain: :class:`numpy.ndarray` of shape (mdim, 2) :rtype: tuple :returns: (rho_D_M, points, d_Tree) where ``rho_D_M`` and ``points`` are (mdim, M) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for points """ util.fix_dimensions_data(points) inside = np.logical_and(np.all(np.greater_equal(points, rect_domain[:, 0]), axis=1), np.all(np.less_equal(points, rect_domain[:, 1]), axis=1)) rho_D_M = np.zeros(volumes.shape) # normalize on Lambda not D rho_D_M[inside] = volumes[inside]/np.sum(volumes[inside]) d_Tree = spatial.KDTree(points) return (rho_D_M, points, d_Tree)
def regular_partition_uniform_distribution_rectangle_domain( data_set, rect_domain, cells_per_dimension=1): r""" Creates a simple function appoximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho{\mathcal{D}, M}` is a uniform probablity density over the hyperrectangular domain specified by ``rect_domain``. Since :math:`\rho_\mathcal{D}` is a uniform distribution on a hyperrectangle we are able to represent it exactly. :param data_set: Sample set that the probability measure is defined for. :type data_set: :class:`~bet.sample.discretization` or :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` :param rect_domain: The domain overwhich :math:`\rho_\mathcal{D}` is uniform. :type rect_domain: :class:`numpy.ndarray` of shape (2, mdim) :param list cells_per_dimension: number of cells per dimension :rtype: :class:`~bet.sample.rectangle_sample_set` :returns: sample_set object defining simple function approximation """ # make sure the shape of the data and the domain are correct (num, dim, values) = check_inputs_no_reference(data_set) data = values rect_domain = util.fix_dimensions_data(rect_domain, data.shape[1]) domain_center = np.mean(rect_domain, 0) domain_lengths = np.max(rect_domain, 0) - np.min(rect_domain, 0) return regular_partition_uniform_distribution_rectangle_size( data_set, domain_center, domain_lengths, cells_per_dimension)
def uniform_data(data): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho_{\mathcal{D},M}` is a uniform probability density over the entire ``data_domain``. Here the ``data_domain`` is the union of voronoi cells defined by ``data``. In other words we assign each sample the same probability, so ``M = len(data)`` or rather ``len(d_distr_samples) == len(data)``. The purpose of this method is to approximate uniform distributions over irregularly shaped domains. :param data: Array containing QoI data where the QoI is mdim diminsional :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param list() center_pts_per_edge: number of center points per edge and additional two points will be added to create the bounding layer :rtype: tuple :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for d_distr_samples """ data = util.fix_dimensions_data(data) d_distr_prob = np.ones((data.shape[0],), dtype=np.float)/data.shape[0] d_Tree = spatial.KDTree(data) return (d_distr_prob, data, d_Tree)
def show_data_domain_2D(samples, data, Q_ref, ref_markers=None, ref_colors=None, xlabel=r'$q_1$', ylabel=r'$q_2$', triangles=None, save=True, interactive=False, filenames=None): r""" Plot the data domain D using a triangulation based on the generating samples with a marker for various :math:`Q_{ref}`. Assumes that the first dimension of data is :math:`q_1`. :param samples: Samples to plot :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) :param data: Data associated with ``samples`` :type data: :class:`numpy.ndarray` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, 2) :param list ref_markers: list of marker types for :math:`Q_{ref}` :param list ref_colors: list of colors for :math:`Q_{ref}` :param string xlabel: x-axis label :param string ylabel: y-axis label :param triangles: triangulation defined by ``samples`` :type triangles: :class:`tri.Triuangulation.triangles` :param bool save: flag whether or not to save the figure :param bool interactive: flag whether or not to show the figure :param list filenames: file names for the unmarked and marked domain plots """ if ref_markers == None: ref_markers = markers if ref_colors == None: ref_colors = colors if type(triangles) == type(None): triangulation = tri.Triangulation(samples[:, 0], samples[:, 1]) triangles = triangulation.triangles if filenames == None: filenames = ['domain_q1_q2_cs.eps', 'q1_q2_domain_Q_cs.eps'] Q_ref = util.fix_dimensions_data(Q_ref, 2) # Create figure plt.tricontourf(data[:, 0], data[:, 1], np.zeros((data.shape[0],)), triangles=triangles, colors='grey') plt.autoscale(tight=True) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.savefig(filenames[0], bbox_inches='tight', transparent=True, pad_inches=0) # Add truth markers for i in xrange(Q_ref.shape[0]): plt.scatter(Q_ref[i, 0], Q_ref[i, 1], s=60, c=ref_colors[i], marker=ref_markers[i]) if save: plt.savefig(filenames[1], bbox_inches='tight', transparent=True, pad_inches=0) if interactive: plt.show() else: plt.close()
def test_fix_dimensions_data_nodim(): """ Tests :meth`bet.util.fix_dimensions_domain` when `dim` is not specified """ values = [1, [1], range(2), np.empty((2,)), np.empty((2, 1)), np.empty((1, 2)), np.empty((5, 2)), np.empty((2, 5))] shapes = [(1, 1), (1, 1), (2, 1), (2, 1), (2, 1), (1, 2), (5, 2), (2, 5)] print len(values), len(shapes) for value, shape in zip(values, shapes): vector = util.fix_dimensions_data(value) print vector, value print vector.shape, shape assert vector.shape == shape
def uniform_hyperrectangle_binsize(data, Q_ref, bin_size, center_pts_per_edge=1): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho_{\mathcal{D},M}` is a uniform probability density centered at Q_ref with bin_size of the width of D. Since rho_D is a uniform distribution on a hyperrectanlge we should be able to represent it exactly with ``M = 3^mdim`` or rather ``len(d_distr_samples) == 3^mdim``. :param bin_size: The size used to determine the width of the uniform distribution :type bin_size: double or list() :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param data: Array containing QoI data where the QoI is mdim diminsional :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param Q_ref: :math:`Q(\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :param list() center_pts_per_edge: number of center points per edge and additional two points will be added to create the bounding layer :rtype: tuple :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for d_distr_samples """ data = util.fix_dimensions_data(data) if not isinstance(center_pts_per_edge, collections.Iterable): center_pts_per_edge = np.ones((data.shape[1],)) * center_pts_per_edge else: if not len(center_pts_per_edge) == data.shape[1]: center_pts_per_edge = np.ones((data.shape[1],)) print 'Warning: center_pts_per_edge dimension mismatch.' print 'Using 1 in each dimension.' if np.any(np.less(center_pts_per_edge, 0)): print 'Warning: center_pts_per_edge must be greater than 0' if not isinstance(bin_size, collections.Iterable): bin_size = bin_size*np.ones((data.shape[1],)) if np.any(np.less(bin_size, 0)): print 'Warning: center_pts_per_edge must be greater than 0' sur_domain = np.array([np.min(data, 0), np.max(data, 0)]).transpose() points, _, rect_domain = vHist.center_and_layer1_points_binsize\ (center_pts_per_edge, Q_ref, bin_size, sur_domain) edges = vHist.edges_regular(center_pts_per_edge, rect_domain, sur_domain) _, volumes, _ = vHist.histogramdd_volumes(edges, points) return vHist.simple_fun_uniform(points, volumes, rect_domain)
def uniform_hyperrectangle_binsize(data, Q_ref, bin_size, center_pts_per_edge=1): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho_{\mathcal{D},M}` is a uniform probability density centered at Q_ref with bin_size of the width of D. Since rho_D is a uniform distribution on a hyperrectanlge we should be able to represent it exactly with ``M = 3^mdim`` or rather ``len(d_distr_samples) == 3^mdim``. :param bin_size: The size used to determine the width of the uniform distribution :type bin_size: double or list() :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param data: Array containing QoI data where the QoI is mdim diminsional :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param Q_ref: :math:`Q(\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :param list() center_pts_per_edge: number of center points per edge and additional two points will be added to create the bounding layer :rtype: tuple :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for d_distr_samples """ data = util.fix_dimensions_data(data) if not isinstance(center_pts_per_edge, collections.Iterable): center_pts_per_edge = np.ones((data.shape[1],)) * center_pts_per_edge else: if not len(center_pts_per_edge) == data.shape[1]: center_pts_per_edge = np.ones((data.shape[1],)) print 'Warning: center_pts_per_edge dimension mismatch.' print 'Using 1 in each dimension.' if np.any(np.less(center_pts_per_edge, 0)): print 'Warning: center_pts_per_edge must be greater than 0' if not isinstance(bin_size, collections.Iterable): bin_size = bin_size*np.ones((data.shape[1],)) if np.any(np.less(bin_size, 0)): print 'Warning: center_pts_per_edge must be greater than 0' sur_domain = np.array([np.min(data, 0), np.max(data, 0)]).transpose() points, _, rect_domain = vHist.center_and_layer1_points_binsize(center_pts_per_edge, Q_ref, bin_size, sur_domain) edges = vHist.edges_regular(center_pts_per_edge, rect_domain, sur_domain) _, volumes, _ = vHist.histogramdd_volumes(edges, points) return vHist.simple_fun_uniform(points, volumes, rect_domain)
def test_fix_dimensions_data_dim(): """ Tests :meth`bet.util.fix_dimensions_domain` when `dim` is specified """ values = [1, [1], np.arange(2), np.empty((2,)), np.empty((2, 1)), np.empty((1, 2)), np.empty((5, 2)), np.empty((2, 5)), np.empty((5, 2)), np.empty((2, 5))] shapes = [(1, 1), (1, 1), (1, 2), (1, 2), (1, 2), (1, 2), (5, 2), (5, 2), (2, 5), (2, 5)] dims = [1, 1, 2, 2, 2, 2, 2, 2, 5, 5] for value, shape, dim in zip(values, shapes, dims): vector = util.fix_dimensions_data(value, dim) print(vector, value) print(vector.shape, shape, dim) assert vector.shape == shape
def uniform_partition_uniform_distribution_rectangle_domain(data_set, rect_domain, M=50, num_d_emulate=1E6): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D}}` where :math:`\rho_{\mathcal{D}}` is a uniform probability density on a generalized rectangle defined by ``rect_domain``. The simple function approximation is then defined by determining ``M`` Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These bins are only implicitly defined by ``M ``samples in :math:`\mathcal{D}`. Finally, the probabilities of each of these bins is computed by sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor searches to bin these samples in the ``M`` implicitly defined bins. The result is the simple function approximation denoted by :math:`\rho_{\mathcal{D},M}`. Note that all computations in the measure-theoretic framework that follow from this are for the fixed simple function approximation :math:`\rho_{\mathcal{D},M}`. :param int M: Defines number M samples in D used to define :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a relatively small number here like 50. :param rect_domain: The support of the density :type rect_domain: double or list :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param data_set: Sample set that the probability measure is defined for. :type data_set: :class:`~bet.sample.discretization` or :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` :param Q_ref: :math:`Q(`\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :rtype: :class:`~bet.sample.voronoi_sample_set` :returns: sample_set object defininng simple function approximation """ (num, dim, values) = check_inputs_no_reference(data_set) data = values rect_domain = util.fix_dimensions_data(rect_domain, data.shape[1]) domain_center = np.mean(rect_domain, 0) domain_lengths = np.max(rect_domain, 0) - np.min(rect_domain, 0) return uniform_partition_uniform_distribution_rectangle_size(data_set, domain_center, domain_lengths, M, num_d_emulate)
def test_fix_dimensions_data_nodim(): """ Tests :meth`bet.util.fix_dimensions_domain` when `dim` is not specified """ values = [ 1, [1], range(2), np.empty((2, )), np.empty((2, 1)), np.empty((1, 2)), np.empty((5, 2)), np.empty((2, 5)) ] shapes = [(1, 1), (1, 1), (2, 1), (2, 1), (2, 1), (1, 2), (5, 2), (2, 5)] print len(values), len(shapes) for value, shape in zip(values, shapes): vector = util.fix_dimensions_data(value) print vector, value print vector.shape, shape assert vector.shape == shape
def test_fix_dimensions_data_dim(): """ Tests :meth`bet.util.fix_dimensions_domain` when `dim` is specified """ values = [ 1, [1], range(2), np.empty((2, )), np.empty((2, 1)), np.empty((1, 2)), np.empty((5, 2)), np.empty((2, 5)), np.empty((5, 2)), np.empty((2, 5)) ] shapes = [(1, 1), (1, 1), (1, 2), (1, 2), (1, 2), (1, 2), (5, 2), (5, 2), (2, 5), (2, 5)] dims = [1, 1, 2, 2, 2, 2, 2, 2, 5, 5] for value, shape, dim in zip(values, shapes, dims): vector = util.fix_dimensions_data(value, dim) print vector, value print vector.shape, shape, dim assert vector.shape == shape
def regular_partition_uniform_distribution_rectangle_domain(data_set, rect_domain, cells_per_dimension=1): r""" Creates a simple function appoximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho{\mathcal{D}, M}` is a uniform probablity density over the hyperrectangular domain specified by ``rect_domain``. Since :math:`\rho_\mathcal{D}` is a uniform distribution on a hyperrectangle we are able to represent it exactly. :param data_set: Sample set that the probability measure is defined for. :type data_set: :class:`~bet.sample.discretization` or :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` :param rect_domain: The domain overwhich :math:`\rho_\mathcal{D}` is uniform. :type rect_domain: :class:`numpy.ndarray` of shape (2, mdim) :param list cells_per_dimension: number of cells per dimension :rtype: :class:`~bet.sample.rectangle_sample_set` :returns: sample_set object defining simple function approximation """ # make sure the shape of the data and the domain are correct (num, dim, values) = check_inputs_no_reference(data_set) data = values rect_domain = util.fix_dimensions_data(rect_domain, data.shape[1]) domain_center = np.mean(rect_domain, 0) domain_lengths = np.max(rect_domain, 0) - np.min(rect_domain, 0) return regular_partition_uniform_distribution_rectangle_size(data_set, domain_center, domain_lengths, cells_per_dimension)
def uniform_hyperrectangle(data, Q_ref, bin_ratio, center_pts_per_edge=1): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho_{\mathcal{D},M}` is a uniform probability density centered at Q_ref with bin_ratio of the width of D. Since rho_D is a uniform distribution on a hyperrectanlge we should be able to represent it exactly with ``M = 3^mdim`` or rather ``len(d_distr_samples) == 3^mdim``. :param bin_ratio: The ratio used to determine the width of the uniform distributiion as ``bin_size = (data_max-data_min)*bin_ratio`` :type bin_ratio: double or list() :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param data: Array containing QoI data where the QoI is mdim diminsional :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param Q_ref: :math:`Q(\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :param list() center_pts_per_edge: number of center points per edge and additional two points will be added to create the bounding layer :rtype: tuple :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for d_distr_samples """ data = util.fix_dimensions_data(data) if not isinstance(bin_ratio, collections.Iterable): bin_ratio = bin_ratio*np.ones((data.shape[1], )) bin_size = (np.max(data, 0) - np.min(data, 0))*bin_ratio return uniform_hyperrectangle_binsize(data, Q_ref, bin_size, center_pts_per_edge)
def show_data_domain_multi(sample_disc, Q_ref=None, Q_nums=None, img_folder='figs/', ref_markers=None, ref_colors=None, showdim=None, file_extension=".png", markersize=75): r""" Plots 2-D projections of the data domain D using a triangulation based on the first two coordinates (parameters) of the generating samples where :math:`Q={q_1, q_i}` for ``i=Q_nums``, with a marker for various :math:`Q_{ref}`. :param sample_disc: Object containing the samples to plot :type sample_disc: :class:`~bet.sample.discretization` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, mdim) :param list Q_nums: dimensions of the QoI to plot :param string img_folder: folder to save the plots to :param list ref_markers: list of marker types for :math:`Q_{ref}` :param list ref_colors: list of colors for :math:`Q_{ref}` :param showdim: default 1. If int then flag to show all combinations with a given dimension (:math:`q_i`) or if ``all`` show all combinations. :type showdim: int or string :param string file_extension: file extension """ if not isinstance(sample_disc, sample.discretization): raise bad_object("Improper sample object") # Set the default marker and colors if ref_markers is None: ref_markers = markers if ref_colors is None: ref_colors = colors data_obj = sample_disc._output_sample_set sample_obj = sample_disc._input_sample_set if Q_ref is None: Q_ref = data_obj._reference_value # If no specific coordinate numbers are given for the data coordinates # (e.g. i, where \q_i is a coordinate in the data space), then # set them to be the the counting numbers. if Q_nums is None: Q_nums = list(range(data_obj.get_dim())) # If no specific coordinate number of choice is given set to be the first # coordinate direction. if showdim is None: showdim = 0 # Create a folder for these figures if it doesn't already exist if not os.path.isdir(img_folder): os.mkdir(img_folder) # Make sure the shape of Q_ref is correct if Q_ref is not None: Q_ref = util.fix_dimensions_data(Q_ref, data_obj.get_dim()) # Create the triangulization to use to define the topology of the samples # in the data space from the first two parameters in the parameter space triangulation = tri.Triangulation(sample_obj.get_values()[:, 0], sample_obj.get_values()[:, 1]) triangles = triangulation.triangles # Create plots of the showdim^th QoI (q_{showdim}) with all other QoI (q_i) if isinstance(showdim, int): for i in Q_nums: if i != showdim: xlabel = r'$q_{' + str(showdim + 1) + r'}$' ylabel = r'$q_{' + str(i + 1) + r'}$' filenames = [ img_folder + 'domain_q' + str(showdim + 1) + '_q' + str(i + 1), img_folder + 'q' + str(showdim + 1) + '_q' + str(i + 1) + '_domain_Q_cs' ] data_obj_temp = sample.sample_set(2) data_obj_temp.set_values(data_obj.get_values()[:, [showdim, i]]) sample_disc_temp = sample.discretization( sample_obj, data_obj_temp) if Q_ref is not None: show_data_domain_2D(sample_disc_temp, Q_ref[:, [showdim, i]], ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames, file_extension=file_extension, markersize=markersize) else: show_data_domain_2D(sample_disc_temp, None, ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames, file_extension=file_extension, markersize=markersize) # Create plots of all combinations of QoI in 2D elif showdim == 'all' or showdim == 'ALL': for x, y in combinations(Q_nums, 2): xlabel = r'$q_{' + str(x + 1) + r'}$' ylabel = r'$q_{' + str(y + 1) + r'}$' filenames = [ img_folder + 'domain_q' + str(x + 1) + '_q' + str(y + 1), img_folder + 'q' + str(x + 1) + '_q' + str(y + 1) + '_domain_Q_cs' ] data_obj_temp = sample.sample_set(2) data_obj_temp.set_values(data_obj.get_values()[:, [x, y]]) sample_disc_temp = sample.discretization(sample_obj, data_obj_temp) if Q_ref is not None: show_data_domain_2D(sample_disc_temp, Q_ref[:, [x, y]], ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames, file_extension=file_extension, markersize=markersize) else: show_data_domain_2D(sample_disc_temp, None, ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames, file_extension=file_extension, markersize=markersize)
def show_data_domain_multi(samples, data, Q_ref, Q_nums=None, img_folder='figs/', ref_markers=None, ref_colors=None, showdim=None): r""" Plot the data domain D using a triangulation based on the generating samples where :math:`Q={q_1, q_i}` for ``i=Q_nums``, with a marker for various :math:`Q_{ref}`. :param samples: Samples to plot :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim). Only uses the first two dimensions. :param data: Data associated with ``samples`` :type data: :class:`numpy.ndarray` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, mdim) :param list Q_nums: dimensions of the QoI to plot :param string img_folder: folder to save the plots to :param list ref_markers: list of marker types for :math:`Q_{ref}` :param list ref_colors: list of colors for :math:`Q_{ref}` :param showdim: default 1. If int then flag to show all combinations with a given dimension or if ``all`` show all combinations. :type showdim: int or string """ if ref_markers == None: ref_markers = markers if ref_colors == None: ref_colors = colors if type(Q_nums) == type(None): Q_nums = range(data.shape[1]) if showdim == None: showdim = 0 if not os.path.isdir(img_folder): os.mkdir(img_folder) Q_ref = util.fix_dimensions_data(Q_ref, data.shape[1]) triangulation = tri.Triangulation(samples[:, 0], samples[:, 1]) triangles = triangulation.triangles if type(showdim) == int: for i in Q_nums: xlabel = r'$q_{'+str(showdim+1)+r'}$' ylabel = r'$q_{'+str(i+1)+r'}$' filenames = [img_folder+'domain_q'+str(showdim+1)+'_q'+str(i+1)+'.eps', img_folder+'q'+str(showdim+1)+'_q'+str(i+1)+'_domain_Q_cs.eps'] show_data_domain_2D(samples, data[:, [showdim, i]], Q_ref[:, [showdim, i]], ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames) elif showdim == 'all' or showdim == 'ALL': for x, y in combinations(Q_nums, 2): xlabel = r'$q_{'+str(x+1)+r'}$' ylabel = r'$q_{'+str(y+1)+r'}$' filenames = [img_folder+'domain_q'+str(x+1)+'_q'+str(y+1)+'.eps', img_folder+'q'+str(x+1)+'_q'+str(y+1)+'_domain_Q_cs.eps'] show_data_domain_2D(samples, data[:, [x, y]], Q_ref[:, [x, y]], ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames)
def show_data_domain_2D(samples, data, Q_ref, ref_markers=None, ref_colors=None, xlabel=r'$q_1$', ylabel=r'$q_2$', triangles=None, save=True, interactive=False, filenames=None): r""" Plot the data domain D using a triangulation based on the generating samples with a marker for various :math:`Q_{ref}`. Assumes that the first dimension of data is :math:`q_1`. :param samples: Samples to plot :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) :param data: Data associated with ``samples`` :type data: :class:`numpy.ndarray` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, 2) :param list ref_markers: list of marker types for :math:`Q_{ref}` :param list ref_colors: list of colors for :math:`Q_{ref}` :param string xlabel: x-axis label :param string ylabel: y-axis label :param triangles: triangulation defined by ``samples`` :type triangles: :class:`tri.Triuangulation.triangles` :param bool save: flag whether or not to save the figure :param bool interactive: flag whether or not to show the figure :param list filenames: file names for the unmarked and marked domain plots """ if ref_markers == None: ref_markers = markers if ref_colors == None: ref_colors = colors if type(triangles) == type(None): triangulation = tri.Triangulation(samples[:, 0], samples[:, 1]) triangles = triangulation.triangles if filenames == None: filenames = ['domain_q1_q2_cs.eps', 'q1_q2_domain_Q_cs.eps'] Q_ref = util.fix_dimensions_data(Q_ref, 2) # Create figure plt.tricontourf(data[:, 0], data[:, 1], np.zeros((data.shape[0], )), triangles=triangles, colors='grey') plt.autoscale(tight=True) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.savefig(filenames[0], bbox_inches='tight', transparent=True, pad_inches=0) # Add truth markers for i in xrange(Q_ref.shape[0]): plt.scatter(Q_ref[i, 0], Q_ref[i, 1], s=60, c=ref_colors[i], marker=ref_markers[i]) if save: plt.savefig(filenames[1], bbox_inches='tight', transparent=True, pad_inches=0) if interactive: plt.show() else: plt.close()
def show_data_domain_multi(samples, data, Q_ref, Q_nums=None, img_folder='figs/', ref_markers=None, ref_colors=None, showdim=None): r""" Plot the data domain D using a triangulation based on the generating samples where :math:`Q={q_1, q_i}` for ``i=Q_nums``, with a marker for various :math:`Q_{ref}`. :param samples: Samples to plot :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim). Only uses the first two dimensions. :param data: Data associated with ``samples`` :type data: :class:`numpy.ndarray` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, mdim) :param list Q_nums: dimensions of the QoI to plot :param string img_folder: folder to save the plots to :param list ref_markers: list of marker types for :math:`Q_{ref}` :param list ref_colors: list of colors for :math:`Q_{ref}` :param showdim: default 1. If int then flag to show all combinations with a given dimension or if ``all`` show all combinations. :type showdim: int or string """ if ref_markers == None: ref_markers = markers if ref_colors == None: ref_colors = colors if type(Q_nums) == type(None): Q_nums = range(data.shape[1]) if showdim == None: showdim = 0 if not os.path.isdir(img_folder): os.mkdir(img_folder) Q_ref = util.fix_dimensions_data(Q_ref, data.shape[1]) triangulation = tri.Triangulation(samples[:, 0], samples[:, 1]) triangles = triangulation.triangles if type(showdim) == int: for i in Q_nums: xlabel = r'$q_{' + str(showdim + 1) + r'}$' ylabel = r'$q_{' + str(i + 1) + r'}$' filenames = [ img_folder + 'domain_q' + str(showdim + 1) + '_q' + str(i + 1) + '.eps', img_folder + 'q' + str(showdim + 1) + '_q' + str(i + 1) + '_domain_Q_cs.eps' ] show_data_domain_2D(samples, data[:, [showdim, i]], Q_ref[:, [showdim, i]], ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames) elif showdim == 'all' or showdim == 'ALL': for x, y in combinations(Q_nums, 2): xlabel = r'$q_{' + str(x + 1) + r'}$' ylabel = r'$q_{' + str(y + 1) + r'}$' filenames = [ img_folder + 'domain_q' + str(x + 1) + '_q' + str(y + 1) + '.eps', img_folder + 'q' + str(x + 1) + '_q' + str(y + 1) + '_domain_Q_cs.eps' ] show_data_domain_2D(samples, data[:, [x, y]], Q_ref[:, [x, y]], ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames)
def show_data_domain_multi(sample_disc, Q_ref=None, Q_nums=None, img_folder='figs/', ref_markers=None, ref_colors=None, showdim=None, file_extension=".png", markersize=75): r""" Plots 2-D projections of the data domain D using a triangulation based on the first two coordinates (parameters) of the generating samples where :math:`Q={q_1, q_i}` for ``i=Q_nums``, with a marker for various :math:`Q_{ref}`. :param sample_disc: Object containing the samples to plot :type sample_disc: :class:`~bet.sample.discretization` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, mdim) :param list Q_nums: dimensions of the QoI to plot :param string img_folder: folder to save the plots to :param list ref_markers: list of marker types for :math:`Q_{ref}` :param list ref_colors: list of colors for :math:`Q_{ref}` :param showdim: default 1. If int then flag to show all combinations with a given dimension (:math:`q_i`) or if ``all`` show all combinations. :type showdim: int or string :param string file_extension: file extension """ if not isinstance(sample_disc, sample.discretization): raise bad_object("Improper sample object") # Set the default marker and colors if ref_markers is None: ref_markers = markers if ref_colors is None: ref_colors = colors data_obj = sample_disc._output_sample_set sample_obj = sample_disc._input_sample_set if Q_ref is None: Q_ref = data_obj._reference_value # If no specific coordinate numbers are given for the data coordinates # (e.g. i, where \q_i is a coordinate in the data space), then # set them to be the the counting numbers. if Q_nums is None: Q_nums = list(range(data_obj.get_dim())) # If no specific coordinate number of choice is given set to be the first # coordinate direction. if showdim is None: showdim = 0 # Create a folder for these figures if it doesn't already exist if not os.path.isdir(img_folder): os.mkdir(img_folder) # Make sure the shape of Q_ref is correct if Q_ref is not None: Q_ref = util.fix_dimensions_data(Q_ref, data_obj.get_dim()) # Create the triangulization to use to define the topology of the samples # in the data space from the first two parameters in the parameter space triangulation = tri.Triangulation(sample_obj.get_values()[:, 0], sample_obj.get_values()[:, 1]) triangles = triangulation.triangles # Create plots of the showdim^th QoI (q_{showdim}) with all other QoI (q_i) if isinstance(showdim, int): for i in Q_nums: if i != showdim: xlabel = r'$q_{' + str(showdim + 1) + r'}$' ylabel = r'$q_{' + str(i + 1) + r'}$' filenames = [img_folder + 'domain_q' + str(showdim + 1) + '_q' + str(i + 1), img_folder + 'q' + str(showdim + 1) + '_q' + str(i + 1) + '_domain_Q_cs'] data_obj_temp = sample.sample_set(2) data_obj_temp.set_values( data_obj.get_values()[:, [showdim, i]]) sample_disc_temp = sample.discretization( sample_obj, data_obj_temp) if Q_ref is not None: show_data_domain_2D(sample_disc_temp, Q_ref[:, [showdim, i]], ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames, file_extension=file_extension, markersize=markersize) else: show_data_domain_2D(sample_disc_temp, None, ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames, file_extension=file_extension, markersize=markersize) # Create plots of all combinations of QoI in 2D elif showdim == 'all' or showdim == 'ALL': for x, y in combinations(Q_nums, 2): xlabel = r'$q_{' + str(x + 1) + r'}$' ylabel = r'$q_{' + str(y + 1) + r'}$' filenames = [img_folder + 'domain_q' + str(x + 1) + '_q' + str(y + 1), img_folder + 'q' + str(x + 1) + '_q' + str(y + 1) + '_domain_Q_cs'] data_obj_temp = sample.sample_set(2) data_obj_temp.set_values(data_obj.get_values()[:, [x, y]]) sample_disc_temp = sample.discretization(sample_obj, data_obj_temp) if Q_ref is not None: show_data_domain_2D(sample_disc_temp, Q_ref[:, [x, y]], ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames, file_extension=file_extension, markersize=markersize) else: show_data_domain_2D(sample_disc_temp, None, ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames, file_extension=file_extension, markersize=markersize)
def show_data_domain_2D(sample_disc, Q_ref=None, ref_markers=None, ref_colors=None, xlabel=r'$q_1$', ylabel=r'$q_2$', triangles=None, save=True, interactive=False, filenames=None, file_extension=".png", markersize=75): r""" Plots 2-D a single data domain D using a triangulation based on the first two coordinates (parameters) of the generating samples where :math:`Q={q_1, q_i}` for ``i=Q_nums``, with a marker for various :math:`Q_{ref}`. Assumes that the first dimension of data is :math:`q_1`. .. note:: Do not specify the file extension in BOTH ``filenames`` and ``file_extension``. :param sample_disc: Object containing the samples to plot :type sample_disc: :class:`~bet.sample.discretization` or :class:`~bet.sample.sample_set_base` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, 2) :param list ref_markers: list of marker types for :math:`Q_{ref}` :param list ref_colors: list of colors for :math:`Q_{ref}` :param string xlabel: x-axis label :param string ylabel: y-axis label :param triangles: triangulation defined by ``samples`` :type triangles: :class:`tri.Triuangulation.triangles` :param bool save: flag whether or not to save the figure :param bool interactive: flag whether or not to show the figure :param list filenames: file names for the unmarked and marked domain plots :param string file_extension: file extension """ if not isinstance(sample_disc, sample.discretization): raise bad_object("Improper sample object") data_obj = sample_disc._output_sample_set sample_obj = sample_disc._input_sample_set if Q_ref is None: Q_ref = data_obj._reference_value # Set the default marker and colors if ref_markers is None: ref_markers = markers if ref_colors is None: ref_colors = colors # If no specific coordinate numbers are given for the data coordinates # (e.g. i, where \q_i is a coordinate in the data space), then # set them to be the the counting numbers. if triangles is None: triangulation = tri.Triangulation(sample_obj.get_values()[:, 0], sample_obj.get_values()[:, 1]) triangles = triangulation.triangles # Set default file names if filenames is None: filenames = ['domain_q1_q2_cs', 'q1_q2_domain_Q_cs'] # Make sure the shape of Q_ref is correct if Q_ref is not None: Q_ref = util.fix_dimensions_data(Q_ref, 2) # Create figure plt.tricontourf(data_obj.get_values()[:, 0], data_obj.get_values()[:, 1], np.zeros((data_obj.get_values().shape[0], )), triangles=triangles, colors='grey') plt.autoscale(tight=True) plt.xlabel(xlabel) plt.ylabel(ylabel) if "." not in filenames[0]: full_filenames0 = filenames[0] + file_extension else: full_filenames0 = filenames[0] if "." not in filenames[1]: full_filenames1 = filenames[1] + file_extension else: full_filenames1 = filenames[1] plt.savefig(full_filenames0, bbox_inches='tight', transparent=True, pad_inches=.2) # Add truth markers if Q_ref is not None: for i in range(Q_ref.shape[0]): plt.scatter(Q_ref[i, 0], Q_ref[i, 1], s=60, c=ref_colors[i], marker=ref_markers[i]) plt.tight_layout() if save: plt.savefig(full_filenames1, bbox_inches='tight', transparent=True, pad_inches=.2) if interactive: plt.show() else: plt.close()
def show_data_domain_2D(samples, data, Q_ref=None, ref_markers=None, ref_colors=None, xlabel=r'$q_1$', ylabel=r'$q_2$', triangles=None, save=True, interactive=False, filenames=None): r""" Plots 2-D a single data domain D using a triangulation based on the first two coordinates (parameters) of the generating samples where :math:`Q={q_1, q_i}` for ``i=Q_nums``, with a marker for various :math:`Q_{ref}`. Assumes that the first dimension of data is :math:`q_1`. :param samples: Samples to plot :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim). Only uses the first two dimensions. :param data: Data associated with ``samples`` :type data: :class:`numpy.ndarray` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, 2) :param list ref_markers: list of marker types for :math:`Q_{ref}` :param list ref_colors: list of colors for :math:`Q_{ref}` :param string xlabel: x-axis label :param string ylabel: y-axis label :param triangles: triangulation defined by ``samples`` :type triangles: :class:`tri.Triuangulation.triangles` :param bool save: flag whether or not to save the figure :param bool interactive: flag whether or not to show the figure :param list filenames: file names for the unmarked and marked domain plots """ # Set the default marker and colors if ref_markers == None: ref_markers = markers if ref_colors == None: ref_colors = colors # If no specific coordinate numbers are given for the data coordinates # (e.g. i, where \q_i is a coordinate in the data space), then # set them to be the the counting numbers. if triangles is None: triangulation = tri.Triangulation(samples[:, 0], samples[:, 1]) triangles = triangulation.triangles # Set default file names if filenames == None: filenames = ['domain_q1_q2_cs.eps', 'q1_q2_domain_Q_cs.eps'] # Make sure the shape of Q_ref is correct if Q_ref is not None: Q_ref = util.fix_dimensions_data(Q_ref, 2) # Create figure plt.tricontourf(data[:, 0], data[:, 1], np.zeros((data.shape[0],)), triangles=triangles, colors='grey') plt.autoscale(tight=True) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.savefig(filenames[0], bbox_inches='tight', transparent=True, pad_inches=.2) # Add truth markers if Q_ref is not None: for i in xrange(Q_ref.shape[0]): plt.scatter(Q_ref[i, 0], Q_ref[i, 1], s=60, c=ref_colors[i], marker=ref_markers[i]) if save: plt.savefig(filenames[1], bbox_inches='tight', transparent=True, pad_inches=.2) if interactive: plt.show() else: plt.close()
def show_data_domain_multi(samples, data, Q_ref=None, Q_nums=None, img_folder='figs/', ref_markers=None, ref_colors=None, showdim=None): r""" Plots 2-D projections of the data domain D using a triangulation based on the first two coordinates (parameters) of the generating samples where :math:`Q={q_1, q_i}` for ``i=Q_nums``, with a marker for various :math:`Q_{ref}`. :param samples: Samples to plot :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim). Only uses the first two dimensions. :param data: Data associated with ``samples`` :type data: :class:`numpy.ndarray` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, mdim) :param list Q_nums: dimensions of the QoI to plot :param string img_folder: folder to save the plots to :param list ref_markers: list of marker types for :math:`Q_{ref}` :param list ref_colors: list of colors for :math:`Q_{ref}` :param showdim: default 1. If int then flag to show all combinations with a given dimension (:math:`q_i`) or if ``all`` show all combinations. :type showdim: int or string """ # Set the default marker and colors if ref_markers == None: ref_markers = markers if ref_colors == None: ref_colors = colors # If no specific coordinate numbers are given for the data coordinates # (e.g. i, where \q_i is a coordinate in the data space), then # set them to be the the counting numbers. if Q_nums is None: Q_nums = range(data.shape[1]) # If no specific coordinate number of choice is given set to be the first # coordinate direction. if showdim == None: showdim = 0 # Create a folder for these figures if it doesn't already exist if not os.path.isdir(img_folder): os.mkdir(img_folder) # Make sure the shape of Q_ref is correct if Q_ref is not None: Q_ref = util.fix_dimensions_data(Q_ref, data.shape[1]) # Create the triangulization to use to define the topology of the samples # in the data space from the first two parameters in the parameter space triangulation = tri.Triangulation(samples[:, 0], samples[:, 1]) triangles = triangulation.triangles # Create plots of the showdim^th QoI (q_{showdim}) with all other QoI (q_i) if isinstance(showdim, int): for i in Q_nums: xlabel = r'$q_{'+str(showdim+1)+r'}$' ylabel = r'$q_{'+str(i+1)+r'}$' filenames = [img_folder+'domain_q'+str(showdim+1)+'_q'+\ str(i+1)+'.eps', img_folder+'q'+str(showdim+1)+\ '_q'+str(i+1)+'_domain_Q_cs.eps'] if Q_ref is not None: show_data_domain_2D(samples, data[:, [showdim, i]], Q_ref[:, [showdim, i]], ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames) else: show_data_domain_2D(samples, data[:, [showdim, i]], None, ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames) # Create plots of all combinations of QoI in 2D elif showdim == 'all' or showdim == 'ALL': for x, y in combinations(Q_nums, 2): xlabel = r'$q_{'+str(x+1)+r'}$' ylabel = r'$q_{'+str(y+1)+r'}$' filenames = [img_folder+'domain_q'+str(x+1)+'_q'+str(y+1)+'.eps', img_folder+'q'+str(x+1)+'_q'+str(y+1)+'_domain_Q_cs.eps'] if Q_ref is not None: show_data_domain_2D(samples, data[:, [x, y]], Q_ref[:, [x, y]], ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames) else: show_data_domain_2D(samples, data[:, [x, y]], None, ref_markers, ref_colors, xlabel=xlabel, ylabel=ylabel, triangles=triangles, save=True, interactive=False, filenames=filenames)
def unif_unif(data, Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E6): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D}}` where :math:`\rho_{\mathcal{D}}` is a uniform probability density on a generalized rectangle centered at Q_ref. The support of this density is defined by bin_ratio, which determines the size of the generalized rectangle by scaling the circumscribing generalized rectangle of :math:`\mathcal{D}`. The simple function approximation is then defined by determining M Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These bins are only implicitly defined by M samples in :math:`\mathcal{D}`. Finally, the probabilities of each of these bins is computed by sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor searches to bin these samples in the M implicitly defined bins. The result is the simple function approximation denoted by :math:`\rho_{\mathcal{D},M}`. Note that all computations in the measure-theoretic framework that follow from this are for the fixed simple function approximation :math:`\rho_{\mathcal{D},M}`. :param int M: Defines number M samples in D used to define :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a relatively small number here like 50. :param bin_ratio: The ratio used to determine the width of the uniform distributiion as ``bin_size = (data_max-data_min)*bin_ratio`` :type bin_ratio: double or list() :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param data: Array containing QoI data where the QoI is mdim diminsional :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param Q_ref: :math:`Q(`\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :rtype: tuple :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for d_distr_samples """ data = util.fix_dimensions_data(data) bin_size = (np.max(data, 0) - np.min(data, 0))*bin_ratio r''' Create M samples defining M Voronoi cells (i.e., "bins") in D used to define the simple function approximation :math:`\rho_{\mathcal{D},M}`. This does not have to be random, but here we assume this to be the case. We can choose these samples deterministically but that fails to scale with dimension efficiently. Note that these M samples are chosen for the sole purpose of determining the bins used to create the approximation to :math:`rho_{\mathcal{D}}`. We call these M samples "d_distr_samples" because they are samples on the data space and the distr implies these samples are chosen to create the approximation to the probability measure (distribution) on D. Note that we create these samples in a set containing the hyperrectangle in order to get output cells with zero probability. If all of the d_dstr_samples were taken from within the support of :math:`\rho_{\mathcal{D}}` then each of the M bins would have positive probability. This would in turn imply that the support of :math:`\rho_{\Lambda}` is all of :math:`\Lambda`. ''' if comm.rank == 0: d_distr_samples = 1.5*bin_size*(np.random.random((M, data.shape[1]))-0.5)+Q_ref else: d_distr_samples = np.empty((M, data.shape[1])) comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0) r''' Compute probabilities in the M bins used to define :math:`\rho_{\mathcal{D},M}` by Monte Carlo approximations that in this context amount to binning with nearest neighbor approximations the num_d_emulate samples taken from :math:`\rho_{\mathcal{D}}`. ''' # Generate the samples from :math:`\rho_{\mathcal{D}}` num_d_emulate = int(num_d_emulate/comm.size)+1 d_distr_emulate = bin_size*(np.random.random((num_d_emulate, data.shape[1]))-0.5) + Q_ref # Bin these samples using nearest neighbor searches d_Tree = spatial.KDTree(d_distr_samples) (_, k) = d_Tree.query(d_distr_emulate) count_neighbors = np.zeros((M,), dtype=np.int) for i in range(M): count_neighbors[i] = np.sum(np.equal(k, i)) # Use the binning to define :math:`\rho_{\mathcal{D},M}` ccount_neighbors = np.copy(count_neighbors) comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], op=MPI.SUM) count_neighbors = ccount_neighbors rho_D_M = count_neighbors.astype(np.float64) / \ float(num_d_emulate*comm.size) ''' NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples above, while possibly informed by the sampling of the map Q, do not require solving the model EVER! This can be done "offline" so to speak. The results can then be stored and accessed later by the algorithm using a completely different set of parameter samples and model solves. ''' return (rho_D_M, d_distr_samples, d_Tree)
def show_data_domain_2D(sample_disc, Q_ref=None, ref_markers=None, ref_colors=None, xlabel=r'$q_1$', ylabel=r'$q_2$', triangles=None, save=True, interactive=False, filenames=None, file_extension=".png", markersize=75): r""" Plots 2-D a single data domain D using a triangulation based on the first two coordinates (parameters) of the generating samples where :math:`Q={q_1, q_i}` for ``i=Q_nums``, with a marker for various :math:`Q_{ref}`. Assumes that the first dimension of data is :math:`q_1`. .. note:: Do not specify the file extension in BOTH ``filenames`` and ``file_extension``. :param sample_disc: Object containing the samples to plot :type sample_disc: :class:`~bet.sample.discretization` or :class:`~bet.sample.sample_set_base` :param Q_ref: reference data value :type Q_ref: :class:`numpy.ndarray` of shape (M, 2) :param list ref_markers: list of marker types for :math:`Q_{ref}` :param list ref_colors: list of colors for :math:`Q_{ref}` :param string xlabel: x-axis label :param string ylabel: y-axis label :param triangles: triangulation defined by ``samples`` :type triangles: :class:`tri.Triuangulation.triangles` :param bool save: flag whether or not to save the figure :param bool interactive: flag whether or not to show the figure :param list filenames: file names for the unmarked and marked domain plots :param string file_extension: file extension """ if not isinstance(sample_disc, sample.discretization): raise bad_object("Improper sample object") data_obj = sample_disc._output_sample_set sample_obj = sample_disc._input_sample_set if Q_ref is None: Q_ref = data_obj._reference_value # Set the default marker and colors if ref_markers is None: ref_markers = markers if ref_colors is None: ref_colors = colors # If no specific coordinate numbers are given for the data coordinates # (e.g. i, where \q_i is a coordinate in the data space), then # set them to be the the counting numbers. if triangles is None: triangulation = tri.Triangulation(sample_obj.get_values()[:, 0], sample_obj.get_values()[:, 1]) triangles = triangulation.triangles # Set default file names if filenames is None: filenames = ['domain_q1_q2_cs', 'q1_q2_domain_Q_cs'] # Make sure the shape of Q_ref is correct if Q_ref is not None: Q_ref = util.fix_dimensions_data(Q_ref, 2) # Create figure plt.tricontourf(data_obj.get_values()[:, 0], data_obj.get_values()[:, 1], np.zeros((data_obj.get_values().shape[0],)), triangles=triangles, colors='grey') plt.autoscale(tight=True) plt.xlabel(xlabel) plt.ylabel(ylabel) if "." not in filenames[0]: full_filenames0 = filenames[0]+file_extension else: full_filenames0 = filenames[0] if "." not in filenames[1]: full_filenames1 = filenames[1]+file_extension else: full_filenames1 = filenames[1] plt.savefig(full_filenames0, bbox_inches='tight', transparent=True, pad_inches=.2) # Add truth markers if Q_ref is not None: for i in range(Q_ref.shape[0]): plt.scatter(Q_ref[i, 0], Q_ref[i, 1], s=60, c=ref_colors[i], marker=ref_markers[i]) plt.tight_layout() if save: plt.savefig(full_filenames1, bbox_inches='tight', transparent=True, pad_inches=.2) if interactive: plt.show() else: plt.close()
def unif_unif(data, Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E6): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D}}` where :math:`\rho_{\mathcal{D}}` is a uniform probability density on a generalized rectangle centered at Q_ref. The support of this density is defined by bin_ratio, which determines the size of the generalized rectangle by scaling the circumscribing generalized rectangle of :math:`\mathcal{D}`. The simple function approximation is then defined by determining M Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These bins are only implicitly defined by M samples in :math:`\mathcal{D}`. Finally, the probabilities of each of these bins is computed by sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor searches to bin these samples in the M implicitly defined bins. The result is the simple function approximation denoted by :math:`\rho_{\mathcal{D},M}`. Note that all computations in the measure-theoretic framework that follow from this are for the fixed simple function approximation :math:`\rho_{\mathcal{D},M}`. :param int M: Defines number M samples in D used to define :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a relatively small number here like 50. :param bin_ratio: The ratio used to determine the width of the uniform distributiion as ``bin_size = (data_max-data_min)*bin_ratio`` :type bin_ratio: double or list() :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param data: Array containing QoI data where the QoI is mdim diminsional :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param Q_ref: :math:`Q(`\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :rtype: tuple :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for d_distr_samples """ data = util.fix_dimensions_data(data) bin_size = (np.max(data, 0) - np.min(data, 0))*bin_ratio r''' Create M samples defining M Voronoi cells (i.e., "bins") in D used to define the simple function approximation :math:`\rho_{\mathcal{D},M}`. This does not have to be random, but here we assume this to be the case. We can choose these samples deterministically but that fails to scale with dimension efficiently. Note that these M samples are chosen for the sole purpose of determining the bins used to create the approximation to :math:`rho_{\mathcal{D}}`. We call these M samples "d_distr_samples" because they are samples on the data space and the distr implies these samples are chosen to create the approximation to the probability measure (distribution) on D. Note that we create these samples in a set containing the hyperrectangle in order to get output cells with zero probability. If all of the d_dstr_samples were taken from within the support of :math:`\rho_{\mathcal{D}}` then each of the M bins would have positive probability. This would in turn imply that the support of :math:`\rho_{\Lambda}` is all of :math:`\Lambda`. ''' if comm.rank == 0: d_distr_samples = 1.5*bin_size*(np.random.random((M, data.shape[1]))-0.5)+Q_ref else: d_distr_samples = np.empty((M, data.shape[1])) comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0) r''' Compute probabilities in the M bins used to define :math:`\rho_{\mathcal{D},M}` by Monte Carlo approximations that in this context amount to binning with nearest neighbor approximations the num_d_emulate samples taken from :math:`\rho_{\mathcal{D}}`. ''' # Generate the samples from :math:`\rho_{\mathcal{D}}` num_d_emulate = int(num_d_emulate/comm.size)+1 d_distr_emulate = bin_size*(np.random.random((num_d_emulate, data.shape[1]))-0.5) + Q_ref # Bin these samples using nearest neighbor searches d_Tree = spatial.KDTree(d_distr_samples) (_, k) = d_Tree.query(d_distr_emulate) count_neighbors = np.zeros((M,), dtype=np.int) for i in range(M): count_neighbors[i] = np.sum(np.equal(k, i)) # Use the binning to define :math:`\rho_{\mathcal{D},M}` ccount_neighbors = np.copy(count_neighbors) comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], op=MPI.SUM) count_neighbors = ccount_neighbors rho_D_M = count_neighbors.astype(np.float64) / float(num_d_emulate*comm.size) ''' NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples above, while possibly informed by the sampling of the map Q, do not require solving the model EVER! This can be done "offline" so to speak. The results can then be stored and accessed later by the algorithm using a completely different set of parameter samples and model solves. ''' return (rho_D_M, d_distr_samples, d_Tree)