示例#1
0
    def define_val_model(self, N, P, K):
        # Define new graph
        self.z_test = Gamma(2. * tf.ones([N, K]), 1. * tf.ones([N, K]))
        self.l_test = TransformedDistribution(
            distribution=Normal(self.mean_llib * tf.ones([N, 1]),
                                np.sqrt(self.std_llib) * tf.ones([N, 1])),
            bijector=tf.contrib.distributions.bijectors.Exp())

        rho_test = tf.matmul(self.z_test, self.W0)
        rho_test = rho_test / tf.reshape(tf.reduce_sum(rho_test, axis=1),
                                         (-1, 1))  # NxP

        self.lam_test = Gamma(self.r, self.r / (rho_test * self.l_test))

        if self.zero_inflation:
            logit_pi_test = tf.matmul(self.z_test, self.W1)

            pi_test = tf.minimum(
                tf.maximum(tf.nn.sigmoid(logit_pi_test), 1e-7), 1. - 1e-7)
            cat_test = Categorical(
                probs=tf.stack([pi_test, 1. - pi_test], axis=2))

            components_test = [
                Poisson(rate=1e-30 * tf.ones([N, P])),
                Poisson(rate=self.lam_test)
            ]
            self.likelihood_test = Mixture(cat=cat_test,
                                           components=components_test)
        else:
            self.likelihood_test = Poisson(rate=self.lam_test)
示例#2
0
 def __init__(self, datastore=None, USE_FEEDBACK=USE_FEEDBACK):
     """Set the variables and load model data."""
     self.datastore = datastore
     self.USE_FEEDBACK = convert_string2bool_env(USE_FEEDBACK)
     self.package_id_dict = OrderedDict()
     self.id_package_dict = OrderedDict()
     self.beta = None
     self.theta = None
     self.alpha = None
     self.manifest_id_dict = OrderedDict()
     self.feedback_id_dict = OrderedDict()
     self.manifests = 0
     self.packages = 0
     self.epsilon = Gamma(tf.constant(
         a_c), tf.constant(a_c) / tf.constant(b_c)).\
         prob(tf.constant(K, dtype=tf.float32)).eval(session=tf.Session())
     self.theta_dummy = Poisson(
         np.array([
             self.epsilon * Gamma(tf.constant(a), self.epsilon).prob(
                 tf.constant(K,
                             dtype=tf.float32)).eval(session=tf.Session())
         ] * K,
                  dtype=float))
     if isinstance(datastore, S3DataStore):  # pragma: no-cover
         self.load_s3()
     else:
         self.load_local()
     self.manifests = self.theta.shape[0]
     self.packages = self.beta.shape[0]
     self.dummy_result = self.theta_dummy.prob(
         self.beta).eval(session=tf.Session())
示例#3
0
def calc_prob(pi_samples, lam_samples, y, S, K):
    log_prob = tf.constant(0.0, dtype=tf.float64)
    prob = tf.constant(0.0, dtype=tf.float64)
    for s in range(S):
        p_y = tf.gather_nd(pi_samples, [s, 0]) * \
              Poisson(tf.gather_nd(lam_samples, [s, 0])).prob(y)
        for j in range(1, K):
            p_y += tf.gather_nd(pi_samples, [s, j]) * \
                   Poisson(tf.gather_nd(lam_samples, [s, j])).prob(y)
        log_prob += tf.log(tf.cast(p_y, tf.float64))
        prob += tf.cast(p_y, tf.float64)
    log_prob = log_prob / S
    prob = prob / S
    return log_prob.eval(), prob.eval()
示例#4
0
def main(_):
    ed.set_seed(42)

    # DATA
    x_data = build_toy_dataset(FLAGS.N, FLAGS.V)

    # MODEL
    x_ph = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.V])

    # Form (N, V, V) covariance, one matrix per data point.
    K = tf.stack([
        rbf(tf.reshape(xn, [FLAGS.V, 1])) + tf.diag([1e-6, 1e-6])
        for xn in tf.unstack(x_ph)
    ])
    f = MultivariateNormalTriL(loc=tf.zeros([FLAGS.N, FLAGS.V]),
                               scale_tril=tf.cholesky(K))
    x = Poisson(rate=tf.exp(f))

    # INFERENCE
    qf = Normal(loc=tf.get_variable("qf/loc", [FLAGS.N, FLAGS.V]),
                scale=tf.nn.softplus(
                    tf.get_variable("qf/scale", [FLAGS.N, FLAGS.V])))

    inference = ed.KLqp({f: qf}, data={x: x_data, x_ph: x_data})
    inference.run(n_iter=5000)
示例#5
0
def latent_space_model_example():
	x_train = celegans('~/data')

	#--------------------
	N = x_train.shape[0]  # Number of data points.
	K = 3  # Latent dimensionality.

	z = Normal(loc=tf.zeros([N, K]), scale=tf.ones([N, K]))

	# Calculate N x N distance matrix.
	# 1. Create a vector, [||z_1||^2, ||z_2||^2, ..., ||z_N||^2], and tile it to create N identical rows.
	xp = tf.tile(tf.reduce_sum(tf.pow(z, 2), 1, keep_dims=True), [1, N])
	# 2. Create a N x N matrix where entry (i, j) is ||z_i||^2 + ||z_j||^2 - 2 z_i^T z_j.
	xp = xp + tf.transpose(xp) - 2 * tf.matmul(z, z, transpose_b=True)
	# 3. Invert the pairwise distances and make rate along diagonals to be close to zero.
	xp = 1.0 / tf.sqrt(xp + tf.diag(tf.zeros(N) + 1e3))

	x = Poisson(rate=xp)

	#--------------------
	if True:
		# Maximum a posteriori (MAP) estimation is simple in Edward.
		inference = ed.MAP([z], data={x: x_train})
	else:
		# One could run variational inference.
		qz = Normal(loc=tf.get_variable('qz/loc', [N * K]), scale=tf.nn.softplus(tf.get_variable('qz/scale', [N * K])))
		inference = ed.KLqp({z: qz}, data={x: x_train})
	def main():
		latent_space_model_example()

	inference.run(n_iter=2500)
示例#6
0
    def train(self, n_iter=1000):
        D = len(self.team_num_map.keys())
        N = self.xs.shape[0]
        with tf.name_scope('model'):
            self.X = tf.placeholder(tf.float32, [N, D])
            self.w1 = Normal(loc=tf.zeros(D), scale=tf.ones(D))
            # self.b1 = Normal(loc=tf.zeros(1), scale=tf.ones(1))
            self.y1 = Poisson(rate=tf.exp(ed.dot(self.X, self.w1)))

        with tf.name_scope('posterior'):
            if self.inf_type == 'Var':
                self.qw1 = Normal(loc=tf.get_variable("qw1_ll/loc", [D]),
                                  scale=tf.nn.softplus(
                                      tf.get_variable("qw1_ll/scale", [D])))
                # self.qb1 = Normal(loc=tf.get_variable("qb1/loc", [1]),
                #                  scale=tf.nn.softplus(tf.get_variable("qb1/scale",
                #                                                        [1])))
            elif self.inf_type == 'MAP':
                self.qw1 = PointMass(
                    Normal(loc=tf.get_variable("qw1_ll/loc", [D]),
                           scale=tf.nn.softplus(
                               tf.get_variable("qw1_ll/scale", [D]))))

        if self.inf_type == 'Var':
            inference = ed.ReparameterizationKLqp({self.w1: self.qw1},
                                                  data={
                                                      self.X: self.xs,
                                                      self.y1: self.ys
                                                  })
        elif self.inf_type == 'MAP':
            inference = ed.MAP({self.w1: self.qw1},
                               data={
                                   self.X: self.xs,
                                   self.y1: self.ys
                               })
        inference.initialize(optimizer=tf.train.AdamOptimizer(
            learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08),
                             n_iter=n_iter)
        tf.global_variables_initializer().run()

        self.loss = np.empty(n_iter, dtype=np.float32)
        for i in range(n_iter):
            info_dict = inference.update()
            self.loss[i] = info_dict["loss"]
            inference.print_progress(info_dict)

        self._trained = True

        graph = tf.get_default_graph()
        self.team_skill = graph.get_tensor_by_name("qw1_ll/loc:0").eval()
        self.perf_variance = graph.get_tensor_by_name("qw1_ll/scale:0").eval()
        # self.bias = (graph.get_tensor_by_name("qb1/loc:0").eval(),
        #              graph.get_tensor_by_name("qb2/loc:0").eval())

        self.y_post = ed.copy(self.y1, {self.w1: self.qw1})
        return
示例#7
0
    def define_stochastic_model(self, P, K):
        M = self.minibatch_size

        self.W0 = Gamma(0.1 * tf.ones([K, P]), 0.3 * tf.ones([K, P]))
        if self.zero_inflation:
            self.W1 = Normal(tf.zeros([K, P]), tf.ones([K, P]))

        self.z = Gamma(2. * tf.ones([M, K]), 1. * tf.ones([M, K]))

        self.r = Gamma(2. * tf.ones([
            P,
        ]), 1. * tf.ones([
            P,
        ]))

        self.l = TransformedDistribution(
            distribution=Normal(self.mean_llib * tf.ones([M, 1]),
                                self.std_llib * tf.ones([M, 1])),
            bijector=tf.contrib.distributions.bijectors.Exp())

        self.rho = tf.matmul(self.z, self.W0)
        self.rho = self.rho / tf.reshape(tf.reduce_sum(self.rho, axis=1),
                                         (-1, 1))  # NxP

        self.lam = Gamma(self.r, self.r / (self.rho * self.l))

        if self.zero_inflation:
            self.logit_pi = tf.matmul(self.z, self.W1)
            self.pi = tf.minimum(
                tf.maximum(tf.nn.sigmoid(self.logit_pi), 1e-7), 1. - 1e-7)

            self.cat = Categorical(
                probs=tf.stack([self.pi, 1. - self.pi], axis=2))

            self.components = [
                Poisson(rate=1e-30 * tf.ones([M, P])),
                Poisson(rate=self.lam)
            ]

            self.likelihood = Mixture(cat=self.cat, components=self.components)
        else:
            self.likelihood = Poisson(rate=self.lam)
示例#8
0
 def __init__(self, datastore=None,
              scoring_region=HPF_SCORING_REGION):
     """Set the variables and load model data."""
     self.datastore = datastore
     self.scoring_region = scoring_region
     self.package_id_dict = None
     self.id_package_dict = None
     self.rating_matrix = None
     self.beta = None
     self.manifest_id_dict = None
     self.manifests = 0
     self.packages = 0
     self.sess = tf.Session()
     self.epsilon = Gamma(tf.constant(
         a_c), tf.constant(a_c) / tf.constant(b_c)).eval(session=self.sess)
     self.theta = np.array([self.epsilon * Gamma(tf.constant(
         a), self.epsilon).eval(session=self.sess)] * K)
     self.loadS3()
     self.dummy_result = Poisson(
         np.dot(self.theta, np.transpose(self.beta))).eval(session=self.sess)
     self.normalize_result()
示例#9
0
    def folding_in(self, input_id_set):
        """Folding in logic for prediction.

        :param  input_id_set: A set containing package ids of user's input package list.
        :return: Filter companion recommendations and their topics.
        """
        manifest_id = int(self.match_manifest(input_id_set))
        if manifest_id == -1:
            result = np.array(self.dummy_result)
        else:
            graph_new = tf.Graph()
            with graph_new.as_default():
                result = Poisson(self.theta[manifest_id])
                result = result.prob(self.beta)
            with tf.Session(graph=graph_new) as sess_new:
                result = sess_new.run(result)
        normalised_result = self.normalize_result(result, input_id_set)
        if self.USE_FEEDBACK:
            alpha_id = int(self.match_feedback_manifest(input_id_set))
            return self.filter_recommendation(normalised_result,
                                              alpha_id=alpha_id)
        return self.filter_recommendation(normalised_result)
示例#10
0
            cau = exp_to_imp(train_data)

            tf.reset_default_graph()
            sess = tf.InteractiveSession()

            idx_ph = tf.placeholder(tf.int32, M)
            cau_ph = tf.placeholder(tf.float32, [M, N])
            sd_ph = tf.placeholder(tf.float32, [M, N])
            reconstr_cau_ph = tf.placeholder(tf.float32, [M, N])

            U = Gamma(0.3*tf.ones([M, K]), 0.3*tf.ones([M, K]))
            V = Gamma(0.3*tf.ones([N, K]), 0.3*tf.ones([N, K]))
            gamma = Gamma(tf.ones([1, 1]), tf.ones([1, 1]))
            beta0 = Gamma(0.3*tf.ones([1, 1]), 0.3*tf.ones([1, 1]))

            x = Poisson(tf.add(tf.matmul(U, V, transpose_b=True),\
                gamma * reconstr_cau_ph) + beta0)


            qU_variables = [tf.Variable(tf.random_uniform([D, K])), \
                           tf.Variable(tf.random_uniform([D, K]))]

            qU = PointMass(params=tf.nn.softplus(tf.gather(qU_variables[0], idx_ph)))


            qV_variables = [tf.Variable(tf.random_uniform([N, K])), \
                           tf.Variable(tf.random_uniform([N, K]))]

            qV = PointMass(params=tf.nn.softplus(qV_variables[0]))


            qgamma_variables = [tf.Variable(tf.random_uniform([1, 1])), \
示例#11
0
class HPFScoring:
    """The HPF Model scoring class."""

    def __init__(self, datastore=None,
                 scoring_region=HPF_SCORING_REGION):
        """Set the variables and load model data."""
        self.datastore = datastore
        self.scoring_region = scoring_region
        self.package_id_dict = None
        self.id_package_dict = None
        self.rating_matrix = None
        self.beta = None
        self.manifest_id_dict = None
        self.manifests = 0
        self.packages = 0
        self.sess = tf.Session()
        self.epsilon = Gamma(tf.constant(
            a_c), tf.constant(a_c) / tf.constant(b_c)).eval(session=self.sess)
        self.theta = np.array([self.epsilon * Gamma(tf.constant(
            a), self.epsilon).eval(session=self.sess)] * K)
        self.loadS3()
        self.dummy_result = Poisson(
            np.dot(self.theta, np.transpose(self.beta))).eval(session=self.sess)
        self.normalize_result()

    @staticmethod
    def _getsizeof(attribute):
        """Return the size of attribute in MBs.

        param attribute: The object's attribute.
        """
        return "{} MB".format(getsizeof(attribute) / 1024 / 1024)

    def model_details(self):
        """Return the model details size."""
        return(
            "The model will be scored against\
                {} Packages,\
                {} Manifests,\
                Rating matrix of size {}, and\
                Beta matrix of size {}.".format(
                len(self.package_id_dict),
                len(self.manifest_id_dict),
                HPFScoring._getsizeof(self.rating_matrix),
                HPFScoring._getsizeof(self.beta))
        )

    def loadS3(self):
        """Load the model data from AWS S3."""
        package_id_dict_filename = os.path.join(
            self.scoring_region, HPF_output_package_id_dict)
        self.package_id_dict = self.datastore.read_json_file(
            package_id_dict_filename)
        self.id_package_dict = {x: n for n, x in self.package_id_dict.items()}
        manifest_id_dict_filename = os.path.join(
            self.scoring_region, HPF_output_manifest_id_dict)
        self.manifest_id_dict = self.datastore.read_json_file(
            manifest_id_dict_filename)
        self.manifest_id_dict = {n: set(x)
                                 for n, x in self.manifest_id_dict.items()}
        rating_matrix_filename = os.path.join(
            self.scoring_region, HPF_output_rating_matrix)
        self.datastore.download_file(
            rating_matrix_filename, "/tmp/rating_matrix.npz")
        sparse_matrix = sparse.load_npz('/tmp/rating_matrix.npz')
        self.rating_matrix = sparse_matrix.toarray()
        del(sparse_matrix)
        beta_matrix_filename = os.path.join(
            self.scoring_region, HPF_output_item_matrix)
        self.datastore.download_file(
            beta_matrix_filename, "/tmp/item_matrix.npz")
        sparse_matrix = sparse.load_npz('/tmp/item_matrix.npz')
        self.beta = sparse_matrix.toarray()
        del(sparse_matrix)
        self.manifests, self.packages = self.rating_matrix.shape

    def predict(self, input_stack):
        """Prediction function.

        :param input_stack: The user's package list
        for which companion recommendation are to be generated.
        :return companion_recommendation: The list of recommended companion packages
        along with condifence score.
        :return package_topic_dict: The topics associated with the packages
        in the input_stack+recommendation.
        :return missing_packages: The list of packages unknown to the HPF model.
        """
        input_stack = set(input_stack)
        input_id_set = set()
        missing_packages = set()
        package_topic_dict = {}
        companion_recommendation = []
        for package_name in input_stack:
            package_id = self.package_id_dict.get(package_name)
            if package_id:
                input_id_set.add(package_id)
                package_topic_dict[package_name] = []
            else:
                missing_packages.add(package_name)
        if len(missing_packages) / len(input_stack) < UNKNOWN_PACKAGES_THRESHOLD:
            companion_recommendation = self.folding_in(
                input_id_set)
        else:
            current_app.logger.error(
                "{} length of missing packages beyond unknow threshold value of {}".format(
                    len(missing_packages), UNKNOWN_PACKAGES_THRESHOLD))
        return companion_recommendation, package_topic_dict, list(missing_packages)

    def match_manifest(self, input_id_set):
        """Find a manifest list that matches user's input package list and return its index.

        :param input_id_set: A set containing package ids of user's input package list.
        :return manifest_id: The index of the matched manifest.
        """
        manifest_id = -1
        for manifest_id, dependency_set in self.manifest_id_dict.items():
            if dependency_set == input_id_set:
                break
        current_app.logger.debug(
            "input_id_set {} and manifest_id {}".format(input_id_set, manifest_id))
        return manifest_id

    def folding_in(self, input_id_set):
        """Folding in logic for prediction.

        :param  input_id_set: A set containing package ids of user's input package list.
        :return: Filter companion recommendations and their topics.
        """
        manifest_id = int(self.match_manifest(input_id_set))
        if manifest_id == -1:
            result = np.array(self.dummy_result)
        else:
            result = self.rating_matrix[manifest_id]
        return self.filter_recommendation(result)

    def normalize_result(self):
        """Normalise the probability score of the resulting recommendation.

        :param result: The Unnormalised recommendation result array.
        :return result: The normalised recommendation result array.
        """
        maxn = self.dummy_result.max()
        min_max = maxn - self.dummy_result.min()
        for i in range(self.packages):
            value = 0
            try:
                value = (maxn - self.dummy_result[i]) / min_max
            except Exception as e:
                current_app.logger.error(
                    "Exception occured in normalization {}".format(e))
            finally:
                self.dummy_result[i] = value

    def filter_recommendation(self, result):
        """Filter companion recommendations based on sorted threshold score.

        :param result: The unfiltered companion recommendation result.
        :return companion_recommendation: The filtered list of recommended companion packges
        along with condifence score.
        :return package_topic_dict: The topics associated with the packages
        in the input_stack+recommendation.
        """
        highest_indices = result.argsort()[-MAX_COMPANION_REC_COUNT:len(result)]
        companion_recommendation = []
        for package_id in highest_indices:
            recommendation = {
                "cooccurrence_probability": result[package_id] * 100,
                "package_name": self.id_package_dict[package_id],
                "topic_list": []
            }
            companion_recommendation.append(recommendation)
        return companion_recommendation
示例#12
0
            ips_weights = 1. / 0.25**np.array(4 - train_data.todense())

            # ips_weights = ips_weights / np.sum(ips_weights) * np.sum(cau.todense())

            # ips different end

            tf.reset_default_graph()
            sess = tf.InteractiveSession()

            idx_ph = tf.placeholder(tf.int32, M)
            cau_ph = tf.placeholder(tf.float32, [M, N])
            sd_ph = tf.placeholder(tf.float32, [M, N])

            U = Gamma(0.3 * tf.ones([M, K]), 0.3 * tf.ones([M, K]))
            V = Gamma(0.3 * tf.ones([N, K]), 0.3 * tf.ones([N, K]))
            x = Poisson(tf.matmul(U, V, transpose_b=True))

            qU_variables = [tf.Variable(tf.random_uniform([D, K])), \
                           tf.Variable(tf.random_uniform([D, K]))]

            qU = PointMass(
                params=tf.nn.softplus(tf.gather(qU_variables[0], idx_ph)))


            qV_variables = [tf.Variable(tf.random_uniform([N, K])), \
                           tf.Variable(tf.random_uniform([N, K]))]

            qV = PointMass(params=tf.nn.softplus(qV_variables[0]))

            x_ph = tf.placeholder(tf.float32, [M, N])
示例#13
0
    def evaluate_loglikelihood(self, X, batch_idx=None):
        """
		This is the ELBO, which is a lower bound on the marginal log-likelihood.
		We perform some local optimization on the new data points to obtain the ELBO of the new data.
		"""
        N = X.shape[0]
        P = X.shape[1]
        K = self.n_components

        # Define new graph conditioned on the posterior global factors
        z_test = Gamma(2. * tf.ones([N, K]), 1. * tf.ones([N, K]))
        l_test = TransformedDistribution(
            distribution=Normal(self.mean_llib * tf.ones([N, 1]),
                                np.sqrt(self.std_llib) * tf.ones([N, 1])),
            bijector=tf.contrib.distributions.bijectors.Exp())

        if batch_idx is not None and self.n_batches > 0:
            rho_test = tf.matmul(
                tf.concat([
                    z_test,
                    tf.cast(tf.one_hot(batch_idx[:, 0], self.n_batches),
                            tf.float32)
                ],
                          axis=1), self.W0)
        else:
            rho_test = tf.matmul(z_test, self.W0)
        rho_test = rho_test / tf.reshape(tf.reduce_sum(rho_test, axis=1),
                                         (-1, 1))  # NxP

        lam_test = Gamma(self.r, self.r / (rho_test * l_test))

        if self.zero_inflation:
            if batch_idx is not None and self.n_batches > 0:
                logit_pi_test = tf.matmul(
                    tf.concat([
                        z_test,
                        tf.cast(tf.one_hot(batch_idx[:, 0], self.n_batches),
                                tf.float32)
                    ],
                              axis=1), self.W1)
            else:
                logit_pi_test = tf.matmul(z_test, self.W1)

            pi_test = tf.minimum(
                tf.maximum(tf.nn.sigmoid(logit_pi_test), 1e-7), 1. - 1e-7)
            cat_test = Categorical(
                probs=tf.stack([pi_test, 1. - pi_test], axis=2))

            components_test = [
                Poisson(rate=1e-30 * tf.ones([N, P])),
                Poisson(rate=lam_test)
            ]
            likelihood_test = Mixture(cat=cat_test, components=components_test)
        else:
            likelihood_test = Poisson(rate=lam_test)

        qz_test = TransformedDistribution(
            distribution=Normal(
                tf.Variable(tf.ones(z_test.shape)),
                tf.nn.softplus(tf.Variable(1. * tf.ones(z_test.shape)))),
            bijector=tf.contrib.distributions.bijectors.Exp())
        qlam_test = TransformedDistribution(
            distribution=Normal(
                tf.Variable(tf.ones(lam_test.shape)),
                tf.nn.softplus(tf.Variable(0.01 * tf.ones(lam_test.shape)))),
            bijector=tf.contrib.distributions.bijectors.Exp())
        ql_test = TransformedDistribution(
            distribution=Normal(
                tf.Variable(self.mean_llib * tf.ones(l_test.shape)),
                tf.nn.softplus(
                    tf.Variable(
                        np.sqrt(self.std_llib) * tf.ones(l_test.shape)))),
            bijector=tf.contrib.distributions.bijectors.Exp())

        if self.zero_inflation:
            inference_local = ed.ReparameterizationKLqp(
                {
                    z_test: qz_test,
                    lam_test: qlam_test,
                    l_test: ql_test
                },
                data={
                    likelihood_test: tf.cast(X, tf.float32),
                    self.W0: self.est_qW0,
                    self.W1: self.est_qW1,
                    self.r: self.est_qr
                })
        else:
            inference_local = ed.ReparameterizationKLqp(
                {
                    z_test: qz_test,
                    lam_test: qlam_test,
                    l_test: ql_test
                },
                data={
                    likelihood_test: tf.cast(X, tf.float32),
                    self.W0: self.est_qW0,
                    self.r: self.est_qr
                })

        inference_local.run(n_iter=self.test_iterations,
                            n_samples=self.n_mc_samples)

        return -self.sess.run(inference_local.loss,
                              feed_dict={likelihood_test: X.astype('float32')
                                         }) / N
示例#14
0
#!/usr/bin/env python
"""Use analytic KL for Poisson distributions.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from edward.models import Poisson
import tensorflow as tf
import tensorflow.contrib.distributions as dt


@dt.RegisterKL(dt.Poisson, dt.Poisson)
def _kl_poisson(poisson1, poisson2, name=None):
    """KL divergence between two Poisson distributions."""
    with tf.name_scope(name, "KL_Poisson", [poisson1, poisson2]):
        return poisson1.rate * (tf.log(poisson1.rate) - tf.log(
            poisson2.rate)) + poisson2.rate - poisson1.rate


p1 = Poisson(rate=1.)
p2 = Poisson(rate=2.)
kl = dt.kl_divergence(p1, p2)
tf.Session().run(kl)
N = x_train.shape[0]  # number of documents
D = x_train.shape[1]  # vocabulary size
K = [100, 30, 15]  # number of components per layer
q = 'lognormal'  # choice of q; 'lognormal' or 'gamma'
shape = 0.1  # gamma shape parameter
lr = 1e-4  # learning rate step-size

# MODEL
W2 = Gamma(0.1, 0.3, sample_shape=[K[2], K[1]])
W1 = Gamma(0.1, 0.3, sample_shape=[K[1], K[0]])
W0 = Gamma(0.1, 0.3, sample_shape=[K[0], D])

z3 = Gamma(0.1, 0.1, sample_shape=[N, K[2]])
z2 = Gamma(shape, shape / tf.matmul(z3, W2))
z1 = Gamma(shape, shape / tf.matmul(z2, W1))
x = Poisson(tf.matmul(z1, W0))


# INFERENCE
def pointmass_q(shape):
    min_mean = 1e-3
    mean_init = tf.random_normal(shape)
    rv = PointMass(tf.maximum(tf.nn.softplus(tf.Variable(mean_init)),
                              min_mean))
    return rv


def gamma_q(shape):
    # Parameterize Gamma q's via shape and scale, with softplus unconstraints.
    min_shape = 1e-3
    min_scale = 1e-5
示例#16
0
    # init
    lambda_1 = Exponential(alpha, name="lambda1")
    lambda_2 = Exponential(alpha, name="lambda2")
    tau = Uniform(low=0.0, high=float(n_count_data - 1), name="tau")
    idx = np.arange(n_count_data)
    lambda_ = tf.where(
        tau >= idx,
        tf.ones(shape=[
            n_count_data,
        ], dtype=tf.float32) * lambda_1,
        tf.ones(shape=[
            n_count_data,
        ], dtype=tf.float32) * lambda_2)

    # error
    z = Poisson(lambda_, value=tf.Variable(tf.ones(n_count_data)), name="poi")

# model
T = 5000  # number of posterior samples
with tf.name_scope("posterior"):

    qlambda_1 = Empirical(params=tf.Variable(tf.zeros([T])), name="qlambda1")
    qlambda_2 = Empirical(params=tf.Variable(tf.zeros([T])), name="qlambda2")
    qtau = Empirical(params=tf.Variable(tf.zeros([T])), name="qtau")
"""
qlambda_1 =  Empirical(params=tf.Variable(tf.zeros([n_count_data])))
qlambda_2 =  Empirical(params=tf.Variable(tf.zeros([n_count_data])))
"""
# qlambda_  =  Empirical(params=tf.Variable(tf.zeros([T,n_count_data,1])))
# qz = Empirical(params=tf.Variable(tf.random_normal([n_count_data,1])))
示例#17
0
def val_loocv(X_input,
              y_input,
              param_in,
              sigma_sq_in,
              max_VI_iter,
              qf_in,
              mean_prior=0):
    f_pred_all = np.zeros(X_input.shape[0])
    loo = LeaveOneOut()
    temp_sess = tf.Session()
    N = int(X_input.shape[0])
    D = int(X_input.shape[1])
    for train_index, test_index in loo.split(X_input):
        X_star_input = X_input[test_index, :].reshape(1, -1)
        X_other_input = X_input[train_index, :]
        y_other_input = y_input[train_index].reshape(-1, 1)
        k_star = rbf_fun(X_other_input,
                         X_star_input,
                         lengthscale=param_in[0],
                         variance=param_in[1])[0]
        k_star_1 = matern_fun(X_other_input,
                              X_star_input,
                              lengthscale_in=param_in[2],
                              gamma_in=param_in[3])[0]
        k_star_2 = rat_quadratic_fun(X_other_input,
                                     X_star_input,
                                     magnitude=param_in[4],
                                     lengthscale=param_in[5],
                                     diffuseness=param_in[6])[0]
        k_star_all = tf.add(tf.add(k_star, k_star_1), k_star_2)
        x_only_part = rbf_fun(X_other_input,
                              lengthscale=param_in[0],
                              variance=param_in[1])[0]
        x_only_part = tf.add(
            x_only_part,
            matern_fun(X_other_input,
                       lengthscale_in=param_in[2],
                       gamma_in=param_in[3])[0])
        x_only_part = tf.add(
            x_only_part,
            rat_quadratic_fun(X_other_input,
                              magnitude=param_in[4],
                              lengthscale=param_in[5],
                              diffuseness=param_in[6])[0])
        x_only_part = tf.add(
            x_only_part, tf.multiply(sigma_sq_in,
                                     tf.eye(int(X_input.shape[0]))))
        x_only_part_inv = tf.linalg.inv(x_only_part)
        # Inference from Edward Part
        X = tf.placeholder(tf.float32, [N - 1, D])
        f = MultivariateNormalTriL(loc=tf.zeros(N - 1),
                                   scale_tril=tf.cholesky(x_only_part))
        y = Poisson(rate=tf.nn.softplus(f))
        w_mat = tf.matmul(x_only_part_inv, k_star_all)
        y_other_input = tf.reshape(y_other_input, [-1])
        y_other_input = tf.cast(y_other_input, dtype=tf.float32)
        inference_vi = ed.KLqp({f: qf_in},
                               data={
                                   X: X_other_input,
                                   y: y_other_input
                               })
        inference_vi.run(n_iter=max_VI_iter)
        y_post = ed.copy(y, {f: qf_in})
        m_mat = y_post.eval()
        f_star_each = mean_prior + \
            tf.matmul(tf.transpose(w_mat),
                      (tf.reshape(y_other_input, [-1, 1]) - m_mat))
        f_pred_all[test_index] = temp_sess.run(f_star_each)
    sum_sq_err = np.sum(np.square(y_input - f_pred_all))
    return f_pred_all, sum_sq_err
示例#18
0
# MODEL
N = x_train.shape[0]  # number of data points
K = 3  # latent dimensionality

z = Normal(mu=tf.zeros([N, K]), sigma=tf.ones([N, K]))

# Calculate N x N distance matrix.
# 1. Create a vector, [||z_1||^2, ||z_2||^2, ..., ||z_N||^2], and tile
# it to create N identical rows.
xp = tf.tile(tf.reduce_sum(tf.pow(z, 2), 1, keep_dims=True), [1, N])
# 2. Create a N x N matrix where entry (i, j) is ||z_i||^2 + ||z_j||^2
# - 2 z_i^T z_j.
xp = xp + tf.transpose(xp) - 2 * tf.matmul(z, z, transpose_b=True)
# 3. Invert the pairwise distances and make rate along diagonals to
# be close to zero.
xp = 1.0 / tf.sqrt(xp + tf.diag(tf.zeros(N) + 1e3))

# Note Edward doesn't currently support sampling for Poisson.
# Hard-code it to 0's for now; it isn't used during inference.
x = Poisson(lam=xp, value=tf.zeros_like(xp))

# INFERENCE
inference = ed.MAP([z], data={x: x_train})

# Alternatively, run
# qz = Normal(mu=tf.Variable(tf.random_normal([N * K])),
#             sigma=tf.nn.softplus(tf.Variable(tf.random_normal([N * K]))))
# inference = ed.KLqp({z: qz}, data={x: x_train})

inference.run(n_iter=2500)
示例#19
0
class HPFScoring:
    """The HPF Model scoring class."""
    def __init__(self, datastore=None, USE_FEEDBACK=USE_FEEDBACK):
        """Set the variables and load model data."""
        self.datastore = datastore
        self.USE_FEEDBACK = convert_string2bool_env(USE_FEEDBACK)
        self.package_id_dict = OrderedDict()
        self.id_package_dict = OrderedDict()
        self.beta = None
        self.theta = None
        self.alpha = None
        self.manifest_id_dict = OrderedDict()
        self.feedback_id_dict = OrderedDict()
        self.manifests = 0
        self.packages = 0
        self.epsilon = Gamma(tf.constant(
            a_c), tf.constant(a_c) / tf.constant(b_c)).\
            prob(tf.constant(K, dtype=tf.float32)).eval(session=tf.Session())
        self.theta_dummy = Poisson(
            np.array([
                self.epsilon * Gamma(tf.constant(a), self.epsilon).prob(
                    tf.constant(K,
                                dtype=tf.float32)).eval(session=tf.Session())
            ] * K,
                     dtype=float))
        if isinstance(datastore, S3DataStore):  # pragma: no-cover
            self.load_s3()
        else:
            self.load_local()
        self.manifests = self.theta.shape[0]
        self.packages = self.beta.shape[0]
        self.dummy_result = self.theta_dummy.prob(
            self.beta).eval(session=tf.Session())

    @staticmethod
    def _getsizeof(attribute):
        """Return the size of attribute in MBs.

        param attribute: The object's attribute.
        """
        return "{} MB".format(getsizeof(attribute) / 1024 / 1024)

    def model_details(self):
        """Return the model details size."""
        details = """The model will be scored against
        {} Packages,
        {} Manifests,
        Theta matrix of size {}, and
        Beta matrix of size {}.""".\
            format(
                len(self.package_id_dict),
                len(self.manifest_id_dict),
                HPFScoring._getsizeof(self.theta),
                HPFScoring._getsizeof(self.beta))
        return details

    def load_s3(self):  # pragma: no cover
        """Load the model data from AWS S3."""
        theta_matrix_filename = os.path.join(HPF_SCORING_REGION,
                                             HPF_output_user_matrix)
        self.datastore.download_file(theta_matrix_filename,
                                     "/tmp/user_matrix.npz")
        sparse_matrix = sparse.load_npz('/tmp/user_matrix.npz')
        self.theta = sparse_matrix.toarray()
        del (sparse_matrix)
        os.remove("/tmp/user_matrix.npz")
        beta_matrix_filename = os.path.join(HPF_SCORING_REGION,
                                            HPF_output_item_matrix)
        self.datastore.download_file(beta_matrix_filename,
                                     "/tmp/item_matrix.npz")
        sparse_matrix = sparse.load_npz('/tmp/item_matrix.npz')
        self.beta = sparse_matrix.toarray()
        del (sparse_matrix)
        os.remove("/tmp/item_matrix.npz")
        if self.USE_FEEDBACK:
            alpha_matrix_filename = os.path.join(HPF_SCORING_REGION,
                                                 HPF_output_feedback_matrix)
            self.datastore.download_file(alpha_matrix_filename,
                                         "/tmp/alpha_matrix.npz")
            sparse_matrix = sparse.load_npz("/tmp/alpha_matrix.npz")
            self.alpha = sparse_matrix.toarray()
            del (sparse_matrix)
        self.load_jsons()

    def load_local(self):
        """Load the model data from AWS S3."""
        theta_matrix_filename = os.path.join(self.datastore.src_dir,
                                             HPF_SCORING_REGION,
                                             HPF_output_user_matrix)
        sparse_matrix = sparse.load_npz(theta_matrix_filename)
        self.theta = sparse_matrix.toarray()
        del (sparse_matrix)
        beta_matrix_filename = os.path.join(self.datastore.src_dir,
                                            HPF_SCORING_REGION,
                                            HPF_output_item_matrix)
        sparse_matrix = sparse.load_npz(beta_matrix_filename)
        self.beta = sparse_matrix.toarray()
        del (sparse_matrix)
        if self.USE_FEEDBACK:
            alpha_matrix_filename = os.path.join(self.datastore.src_dir,
                                                 HPF_SCORING_REGION,
                                                 HPF_output_feedback_matrix)
            sparse_matrix = sparse.load_npz(alpha_matrix_filename)
            self.alpha = sparse_matrix.toarray()
            del (sparse_matrix)
        self.load_jsons()

    def load_jsons(self):
        """Load Json files via common methods for S3 and local."""
        package_id_dict_filename = os.path.join(HPF_SCORING_REGION,
                                                HPF_output_package_id_dict)
        self.package_id_dict = self.datastore.read_json_file(
            package_id_dict_filename)
        self.id_package_dict = OrderedDict({
            x: n
            for n, x in self.package_id_dict[0].get("package_list",
                                                    {}).items()
        })
        self.package_id_dict = OrderedDict(self.package_id_dict[0].get(
            "package_list", {}))
        manifest_id_dict_filename = os.path.join(HPF_SCORING_REGION,
                                                 HPF_output_manifest_id_dict)
        self.manifest_id_dict = self.datastore.read_json_file(
            manifest_id_dict_filename)
        self.manifest_id_dict = OrderedDict({
            n: set(x)
            for n, x in self.manifest_id_dict[0].get("manifest_list",
                                                     {}).items()
        })
        if self.USE_FEEDBACK:
            feedback_id_dict_filename = os.path.join(
                HPF_SCORING_REGION, HPF_output_feedback_id_dict)
            self.feedback_id_dict = self.datastore.read_json_file(
                feedback_id_dict_filename)
            self.feedback_id_dict = OrderedDict({
                n: set(x)
                for n, x in self.feedback_id_dict[0].get("feedback_list",
                                                         {}).items()
            })

    def predict(self, input_stack):
        """Prediction function.

        :param input_stack: The user's package list
        for which companion recommendation are to be generated.
        :return companion_recommendation: The list of recommended companion packages
        along with condifence score.
        :return package_topic_dict: The topics associated with the packages
        in the input_stack+recommendation.
        :return missing_packages: The list of packages unknown to the HPF model.
        """
        input_stack = set(input_stack)
        input_id_set = set()
        missing_packages = set()
        package_topic_dict = {}
        companion_recommendation = []
        if not input_stack:
            return companion_recommendation, package_topic_dict, list(
                missing_packages)
        for package_name in input_stack:
            package_id = self.package_id_dict.get(package_name)
            if package_id:
                input_id_set.add(package_id)
                package_topic_dict[package_name] = []
            else:
                missing_packages.add(package_name)
        if len(missing_packages) / len(
                input_stack) < UNKNOWN_PACKAGES_THRESHOLD:
            companion_recommendation = self.folding_in(input_id_set)
        else:
            _logger.error(
                "{} length of missing packages beyond unknow threshold value of {}"
                .format(len(missing_packages), UNKNOWN_PACKAGES_THRESHOLD))
        return companion_recommendation, package_topic_dict, list(
            missing_packages)

    def match_manifest(self, input_id_set):  # pragma: no cover
        """Find a manifest list that matches user's input package list and return its index.

        :param input_id_set: A set containing package ids of user's input package list.
        :return manifest_id: The index of the matched manifest.
        """
        closest_manifest_id = -1
        max_diff = maxsize
        for manifest_id, dependency_set in self.manifest_id_dict.items():
            curr_diff = len(dependency_set.difference(input_id_set))
            if dependency_set == input_id_set:
                closest_manifest_id = manifest_id
                break
            elif input_id_set.issubset(
                    dependency_set) and curr_diff < max_diff:
                closest_manifest_id = manifest_id
                max_diff = curr_diff
        _logger.debug("input_id_set {} and manifest_id {}".format(
            input_id_set, closest_manifest_id))
        return closest_manifest_id

    def match_feedback_manifest(self, input_id_set):
        """Find a feedback manifest that matches user's input package list and return its index.

        :param input_id_set: A set containing package ids of user's input package list.
        :return manifest_id: The index of the matched feedback manifest.
        """
        for manifest_id, dependency_set in self.feedback_id_dict.items():
            if dependency_set == input_id_set:
                break
        else:
            manifest_id = -1
        _logger.debug("input_id_set {} and feedback_manifest_id {}".format(
            input_id_set, manifest_id))
        return manifest_id

    def folding_in(self, input_id_set):
        """Folding in logic for prediction.

        :param  input_id_set: A set containing package ids of user's input package list.
        :return: Filter companion recommendations and their topics.
        """
        manifest_id = int(self.match_manifest(input_id_set))
        if manifest_id == -1:
            result = np.array(self.dummy_result)
        else:
            graph_new = tf.Graph()
            with graph_new.as_default():
                result = Poisson(self.theta[manifest_id])
                result = result.prob(self.beta)
            with tf.Session(graph=graph_new) as sess_new:
                result = sess_new.run(result)
        normalised_result = self.normalize_result(result, input_id_set)
        if self.USE_FEEDBACK:
            alpha_id = int(self.match_feedback_manifest(input_id_set))
            return self.filter_recommendation(normalised_result,
                                              alpha_id=alpha_id)
        return self.filter_recommendation(normalised_result)

    def normalize_result(self, result, input_id_set, array_len=None):
        """Normalise the probability score of the resulting recommendation.

        :param result: The non-normalised recommendation result array.
        :param input_id_set: The user's input package ids.
        :param array_len: length of normalised result array.
        :return normalised_result: The normalised recommendation result array.
        """
        if array_len is None:
            array_len = self.packages
        normalised_result = np.array([
            -1.0 if i in input_id_set else result[i].mean()
            for i in range(array_len)
        ])
        return normalised_result

    def filter_recommendation(self,
                              result,
                              alpha_id=-1,
                              max_count=MAX_COMPANION_REC_COUNT):
        """Filter companion recommendations based on sorted threshold score.

        :param result: The unfiltered companion recommendation result.
        :param max_count: Maximum number of recommendations to return.
        :return companion_recommendation: The filtered list of recommended companion packages
        along with condifence score.
        :return package_topic_dict: The topics associated with the packages
        in the input_stack+recommendation.
        """
        highest_indices = set(result.argsort()[-max_count:len(result)])
        companion_recommendation = []
        if self.USE_FEEDBACK and alpha_id != -1:
            alpha_set = set(
                np.where(self.alpha[alpha_id] >= feedback_threshold)[0])
            highest_indices = highest_indices.intersection(alpha_set)

        for package_id in highest_indices:
            recommendation = {
                "cooccurrence_probability": result[package_id] * 100,
                "package_name": self.id_package_dict[package_id],
                "topic_list": []
            }
            companion_recommendation.append(recommendation)
        return companion_recommendation
示例#20
0
def main(_):
    ed.set_seed(42)

    # DATA
    x_train, metadata = nips(FLAGS.data_dir)
    documents = metadata['columns']
    words = metadata['rows']

    # Subset to documents in 2011 and words appearing in at least two
    # documents and have a total word count of at least 10.
    doc_idx = [
        i for i, document in enumerate(documents)
        if document.startswith('2011')
    ]
    documents = [documents[doc] for doc in doc_idx]
    x_train = x_train[:, doc_idx]
    word_idx = np.logical_and(
        np.sum(x_train != 0, 1) >= 2,
        np.sum(x_train, 1) >= 10)
    words = [word for word, idx in zip(words, word_idx) if idx]
    x_train = x_train[word_idx, :]
    x_train = x_train.T

    N = x_train.shape[0]  # number of documents
    D = x_train.shape[1]  # vocabulary size

    # MODEL
    W2 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[2], FLAGS.K[1]])
    W1 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[1], FLAGS.K[0]])
    W0 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[0], D])

    z3 = Gamma(0.1, 0.1, sample_shape=[N, FLAGS.K[2]])
    z2 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z3, W2))
    z1 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z2, W1))
    x = Poisson(tf.matmul(z1, W0))

    # INFERENCE
    qW2 = pointmass_q(W2.shape)
    qW1 = pointmass_q(W1.shape)
    qW0 = pointmass_q(W0.shape)
    if FLAGS.q == 'gamma':
        qz3 = gamma_q(z3.shape)
        qz2 = gamma_q(z2.shape)
        qz1 = gamma_q(z1.shape)
    else:
        qz3 = lognormal_q(z3.shape)
        qz2 = lognormal_q(z2.shape)
        qz1 = lognormal_q(z1.shape)

    # We apply variational EM with E-step over local variables
    # and M-step to point estimate the global weight matrices.
    inference_e = ed.KLqp({
        z1: qz1,
        z2: qz2,
        z3: qz3
    },
                          data={
                              x: x_train,
                              W0: qW0,
                              W1: qW1,
                              W2: qW2
                          })
    inference_m = ed.MAP({
        W0: qW0,
        W1: qW1,
        W2: qW2
    },
                         data={
                             x: x_train,
                             z1: qz1,
                             z2: qz2,
                             z3: qz3
                         })

    optimizer_e = tf.train.RMSPropOptimizer(FLAGS.lr)
    optimizer_m = tf.train.RMSPropOptimizer(FLAGS.lr)
    kwargs = {
        'optimizer': optimizer_e,
        'n_print': 100,
        'logdir': FLAGS.logdir,
        'log_timestamp': False
    }
    if FLAGS.q == 'gamma':
        kwargs['n_samples'] = 30
    inference_e.initialize(**kwargs)
    inference_m.initialize(optimizer=optimizer_m)

    sess = ed.get_session()
    tf.global_variables_initializer().run()

    n_epoch = 20
    n_iter_per_epoch = 10000
    for epoch in range(n_epoch):
        print("Epoch {}".format(epoch))
        nll = 0.0

        pbar = Progbar(n_iter_per_epoch)
        for t in range(1, n_iter_per_epoch + 1):
            pbar.update(t)
            info_dict_e = inference_e.update()
            info_dict_m = inference_m.update()
            nll += info_dict_e['loss']

        # Compute perplexity averaged over a number of training iterations.
        # The model's negative log-likelihood of data is upper bounded by
        # the variational objective.
        nll /= n_iter_per_epoch
        perplexity = np.exp(nll / np.sum(x_train))
        print("Negative log-likelihood <= {:0.3f}".format(nll))
        print("Perplexity <= {:0.3f}".format(perplexity))

        # Print top 10 words for first 10 topics.
        qW0_vals = sess.run(qW0)
        for k in range(10):
            top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1]
            top_words = " ".join([words[i] for i in top_words_idx])
            print("Topic {}: {}".format(k, top_words))
示例#21
0
# In[21]:

K = 175
train_data = np.array(x_train, dtype=int)
N = train_data.shape[0]
D = train_data.shape[1]

tf.reset_default_graph()
sess = tf.InteractiveSession()

idx_ph = tf.placeholder(tf.int32, M)
x_ph = tf.placeholder(tf.float32, [M, D])

U = Gamma(0.1, 0.5, sample_shape=[M, K])
V = Gamma(0.1, 0.3, sample_shape=[D, K])
x = Poisson(tf.matmul(U, V, transpose_b=True))

min_scale = 1e-5

qV_variables = [
    tf.Variable(tf.random_uniform([D, K])),
    tf.Variable(tf.random_uniform([D, K]))
]

qV = TransformedDistribution(
            distribution=Normal(qV_variables[0],\
                                tf.maximum(tf.nn.softplus(qV_variables[1]), \
                                           min_scale)),
            bijector=tf.contrib.distributions.bijectors.Exp())

qU_variables = [
  for n in range(N):
    f_n = multivariate_normal.rvs(cov=K, size=1)
    for v in range(V):
      x[n, v] = poisson.rvs(mu=np.exp(f_n[v]), size=1)

  return x

ed.set_seed(42)

N = 308  # number of NBA players
V = 2  # number of shot locations

# DATA
x_data = build_toy_dataset(N, V)

# MODEL
x_ph = tf.placeholder(tf.float32, [N, V])  # inputs to Gaussian Process

# Form (N, V, V) covariance, one matrix per data point.
K = tf.stack([rbf(tf.reshape(xn, [V, 1])) + tf.diag([1e-6, 1e-6])
              for xn in tf.unstack(x_ph)])
f = MultivariateNormalTriL(loc=tf.zeros([N, V]), scale_tril=tf.cholesky(K))
x = Poisson(rate=tf.exp(f))

# INFERENCE
qf = Normal(loc=tf.Variable(tf.random_normal([N, V])),
            scale=tf.nn.softplus(tf.Variable(tf.random_normal([N, V]))))

inference = ed.KLqp({f: qf}, data={x: x_data, x_ph: x_data})
inference.run(n_iter=5000)
示例#23
0
        def _log_prob(self, value):
            raise NotImplementedError("log_prob is not implemented")

        def _sample_n(self, n, seed=None):
            # shape为(n,)的Tensor
            raise NotImplementedError("sample_n is not implemented")


import numpy as np
import tensorflow as tf
import edward as ed
from edward.models import Poisson
from scipy.stats import poisson


def _sample_n(self, n=1, seed=None):
    def np_sample(rate, n):
        return poisson.rvs(mu=rate, size=n, random_state=seed).astype(np.float32)
    val = tf.py_func(np_sample, [self.rate, n], [tf.float32])[0]
    batch_event_shape = self.batch_shape.concatenate(self.event_shape)
    shape = tf.concat([tf.expand_dims(n, 0), tf.convert_to_tensor(batch_event_shape)], 0)
    val = tf.reshape(val, shape)
    return val


Poisson._sample_n = _sample_n
sess = ed.get_session()
x = Poisson(rate=1.0)
sess.run(x)
sess.run(x)
示例#24
0
            tf.reset_default_graph()
            sess = tf.InteractiveSession()

            idx_ph = tf.placeholder(tf.int32, M)
            cau_ph = tf.placeholder(tf.float32, [M, N])
            sd_ph = tf.placeholder(tf.float32, [M, N])
            reconstr_cau_ph = tf.placeholder(tf.float32, [M, N])

            U = Gamma(0.3 * tf.ones([M, K]), 0.3 * tf.ones([M, K]))
            V = Gamma(0.3 * tf.ones([N, K]), 0.3 * tf.ones([N, K]))
            gamma = Gamma(tf.ones([M, 1]), tf.ones([M, 1]))
            beta0 = Gamma(0.3 * tf.ones([1, 1]), 0.3 * tf.ones([1, 1]))

            x = Poisson(tf.add(tf.matmul(U, V, transpose_b=True),\
                tf.multiply(tf.matmul(gamma, tf.ones([1, N])), \
                    reconstr_cau_ph)) + beta0)


            qU_variables = [tf.Variable(tf.random_uniform([D, K])), \
                           tf.Variable(tf.random_uniform([D, K]))]

            qU = PointMass(
                params=tf.nn.softplus(tf.gather(qU_variables[0], idx_ph)))


            qV_variables = [tf.Variable(tf.random_uniform([N, K])), \
                           tf.Variable(tf.random_uniform([N, K]))]

            qV = PointMass(params=tf.nn.softplus(qV_variables[0]))
示例#25
0
    def define_model(self, N, P, K, batch_idx=None):
        self.W0 = Gamma(.1 * tf.ones([K + self.n_batches, P]),
                        .3 * tf.ones([K + self.n_batches, P]))
        if self.zero_inflation:
            self.W1 = Normal(tf.zeros([K + self.n_batches, P]),
                             tf.ones([K + self.n_batches, P]))

        self.z = Gamma(2. * tf.ones([N, K]), 1. * tf.ones([N, K]))

        disp_size = 1
        if self.gene_dispersion:
            disp_size = P
        self.r = Gamma(2. * tf.ones([
            disp_size,
        ]), 1. * tf.ones([
            disp_size,
        ]))

        self.l = TransformedDistribution(
            distribution=Normal(self.mean_llib * tf.ones([N, 1]),
                                np.sqrt(self.std_llib) * tf.ones([N, 1])),
            bijector=tf.contrib.distributions.bijectors.Exp())

        if batch_idx is not None and self.n_batches > 0:
            self.rho = tf.matmul(
                tf.concat([
                    self.z,
                    tf.cast(tf.one_hot(batch_idx[:, 0], self.n_batches),
                            tf.float32)
                ],
                          axis=1), self.W0)
        else:
            self.rho = tf.matmul(self.z, self.W0)

        if self.scalings:
            self.rho = self.rho / tf.reshape(tf.reduce_sum(self.rho, axis=1),
                                             (-1, 1))  # NxP
            self.lam = Gamma(self.r, self.r / (self.rho * self.l))
        else:
            self.lam = Gamma(self.r, self.r / self.rho)

        if self.zero_inflation:
            if batch_idx is not None and self.n_batches > 0:
                self.logit_pi = tf.matmul(
                    tf.concat([
                        self.z,
                        tf.cast(tf.one_hot(batch_idx[:, 0], self.n_batches),
                                tf.float32)
                    ],
                              axis=1), self.W1)
            else:
                self.logit_pi = tf.matmul(self.z, self.W1)
            self.pi = tf.minimum(
                tf.maximum(tf.nn.sigmoid(self.logit_pi), 1e-7), 1. - 1e-7)

            self.cat = Categorical(
                probs=tf.stack([self.pi, 1. - self.pi], axis=2))

            self.components = [
                Poisson(rate=1e-30 * tf.ones([N, P])),
                Poisson(rate=self.lam)
            ]

            self.likelihood = Mixture(cat=self.cat, components=self.components)
        else:
            self.likelihood = Poisson(rate=self.lam)