Пример #1
0
 def _free_energy(self, v):
     K = float(self.n_hidden)
     M = float(self.n_samples)
     with tf.name_scope('free_energy'):
         T1 = -tf.einsum('ij,j->i', v, self._vb)
         T2 = -tf.matmul(v, self._W)
         h_hat = Multinomial(total_count=M, logits=tf.ones([K])).sample()
         T3 = tf.einsum('ij,j->i', T2, h_hat)
         fe = tf.reduce_mean(T1 + T3, axis=0)
         fe += -tf.lgamma(M + K) + tf.lgamma(M + 1) + tf.lgamma(K)
     return fe
Пример #2
0
def multinomial(policy, game_state):

    ## identify the free positions:
    free_positions = tf.to_float(tf.equal(game_state, tf.zeros((1, 9))))

    fm_mapping = lambda x: tf.diag(tf.reshape(x, (9, )))

    free_matrices = tf.map_fn(fm_mapping, free_positions)

    ## calculate probability vector:
    pvec_mapping = lambda x: tf.transpose(tf.matmul(x, tf.transpose(policy)))

    prob_vec = tf.map_fn(pvec_mapping, free_matrices)
    prob = prob_vec / (tf.reduce_sum(prob_vec) + tf.constant(1e-5))

    return Multinomial(total_count=1., probs=prob)
Пример #3
0
    def __call__(self, session, trainX, trainY, testX, testY):
        """ Initialize the actual graph

        Parameters
        ----------
        session : tf.Session
            Tensorflow session
        trainX : sparse array in coo format
            Test input OTU table, where rows are samples and columns are
            observations
        trainY : np.array
            Test output metabolite table
        testX : sparse array in coo format
            Test input OTU table, where rows are samples and columns are
            observations.  This is mainly for cross validation.
        testY : np.array
            Test output metabolite table.  This is mainly for cross validation.
        """
        self.session = session
        self.nnz = len(trainX.data)
        self.d1 = trainX.shape[1]
        self.d2 = trainY.shape[1]
        self.cv_size = len(testX.data)

        # keep the multinomial sampling on the cpu
        # https://github.com/tensorflow/tensorflow/issues/18058
        with tf.device('/cpu:0'):
            X_ph = tf.SparseTensor(indices=np.array([trainX.row,
                                                     trainX.col]).T,
                                   values=trainX.data,
                                   dense_shape=trainX.shape)
            Y_ph = tf.constant(trainY, dtype=tf.float32)

            X_holdout = tf.SparseTensor(indices=np.array(
                [testX.row, testX.col]).T,
                                        values=testX.data,
                                        dense_shape=testX.shape)
            Y_holdout = tf.constant(testY, dtype=tf.float32)

            total_count = tf.reduce_sum(Y_ph, axis=1)
            batch_ids = tf.multinomial(
                tf.log(tf.reshape(X_ph.values, [1, -1])), self.batch_size)
            batch_ids = tf.squeeze(batch_ids)
            X_samples = tf.gather(X_ph.indices, 0, axis=1)
            X_obs = tf.gather(X_ph.indices, 1, axis=1)
            sample_ids = tf.gather(X_samples, batch_ids)

            Y_batch = tf.gather(Y_ph, sample_ids)
            X_batch = tf.gather(X_obs, batch_ids)

        with tf.device(self.device_name):
            self.qUmain = tf.Variable(tf.random_normal([self.d1, self.p]),
                                      name='qU')
            self.qUbias = tf.Variable(tf.random_normal([self.d1, 1]),
                                      name='qUbias')
            self.qVmain = tf.Variable(tf.random_normal([self.p, self.d2 - 1]),
                                      name='qV')
            self.qVbias = tf.Variable(tf.random_normal([1, self.d2 - 1]),
                                      name='qVbias')

            qU = tf.concat([tf.ones([self.d1, 1]), self.qUbias, self.qUmain],
                           axis=1)
            qV = tf.concat(
                [self.qVbias,
                 tf.ones([1, self.d2 - 1]), self.qVmain], axis=0)

            # regression coefficents distribution
            Umain = Normal(loc=tf.zeros([self.d1, self.p]) + self.u_mean,
                           scale=tf.ones([self.d1, self.p]) * self.u_scale,
                           name='U')
            Ubias = Normal(loc=tf.zeros([self.d1, 1]) + self.u_mean,
                           scale=tf.ones([self.d1, 1]) * self.u_scale,
                           name='biasU')

            Vmain = Normal(loc=tf.zeros([self.p, self.d2 - 1]) + self.v_mean,
                           scale=tf.ones([self.p, self.d2 - 1]) * self.v_scale,
                           name='V')
            Vbias = Normal(loc=tf.zeros([1, self.d2 - 1]) + self.v_mean,
                           scale=tf.ones([1, self.d2 - 1]) * self.v_scale,
                           name='biasV')

            du = tf.gather(qU, X_batch, axis=0, name='du')
            dv = tf.concat([tf.zeros([self.batch_size, 1]), du @ qV],
                           axis=1,
                           name='dv')

            tc = tf.gather(total_count, sample_ids)
            Y = Multinomial(total_count=tc, logits=dv, name='Y')
            num_samples = trainX.shape[0]
            norm = num_samples / self.batch_size
            logprob_vmain = tf.reduce_sum(Vmain.log_prob(self.qVmain),
                                          name='logprob_vmain')
            logprob_vbias = tf.reduce_sum(Vbias.log_prob(self.qVbias),
                                          name='logprob_vbias')
            logprob_umain = tf.reduce_sum(Umain.log_prob(self.qUmain),
                                          name='logprob_umain')
            logprob_ubias = tf.reduce_sum(Ubias.log_prob(self.qUbias),
                                          name='logprob_ubias')
            logprob_y = tf.reduce_sum(Y.log_prob(Y_batch), name='logprob_y')
            self.log_loss = -(logprob_y * norm + logprob_umain +
                              logprob_ubias + logprob_vmain + logprob_vbias)

        # keep the multinomial sampling on the cpu
        # https://github.com/tensorflow/tensorflow/issues/18058
        with tf.device('/cpu:0'):
            # cross validation
            with tf.name_scope('accuracy'):
                cv_batch_ids = tf.multinomial(
                    tf.log(tf.reshape(X_holdout.values, [1, -1])),
                    self.cv_size)
                cv_batch_ids = tf.squeeze(cv_batch_ids)
                X_cv_samples = tf.gather(X_holdout.indices, 0, axis=1)
                X_cv = tf.gather(X_holdout.indices, 1, axis=1)
                cv_sample_ids = tf.gather(X_cv_samples, cv_batch_ids)

                Y_cvbatch = tf.gather(Y_holdout, cv_sample_ids)
                X_cvbatch = tf.gather(X_cv, cv_batch_ids)
                holdout_count = tf.reduce_sum(Y_cvbatch, axis=1)
                cv_du = tf.gather(qU, X_cvbatch, axis=0, name='cv_du')
                pred = tf.reshape(holdout_count, [-1, 1]) * tf.nn.softmax(
                    tf.concat([tf.zeros([self.cv_size, 1]), cv_du @ qV],
                              axis=1,
                              name='pred'))

                self.cv = tf.reduce_mean(tf.squeeze(tf.abs(pred - Y_cvbatch)))

        # keep all summaries on the cpu
        with tf.device('/cpu:0'):
            tf.summary.scalar('logloss', self.log_loss)
            tf.summary.scalar('cv_rmse', self.cv)
            tf.summary.histogram('qUmain', self.qUmain)
            tf.summary.histogram('qVmain', self.qVmain)
            tf.summary.histogram('qUbias', self.qUbias)
            tf.summary.histogram('qVbias', self.qVbias)
            self.merged = tf.summary.merge_all()

            self.writer = tf.summary.FileWriter(self.save_path,
                                                self.session.graph)

        with tf.device(self.device_name):
            with tf.name_scope('optimize'):
                optimizer = tf.train.AdamOptimizer(self.learning_rate,
                                                   beta1=self.beta_1,
                                                   beta2=self.beta_2)

                gradients, self.variables = zip(
                    *optimizer.compute_gradients(self.log_loss))
                self.gradients, _ = tf.clip_by_global_norm(
                    gradients, self.clipnorm)
                self.train = optimizer.apply_gradients(
                    zip(self.gradients, self.variables))

        tf.global_variables_initializer().run()
Пример #4
0
 def _sample(self, means):
     probs = tf.to_float(means / tf.reduce_sum(means))
     return Multinomial(total_count=self.n_samples, probs=probs)
Пример #5
0
def main(_):

    opts = Options(save_path=FLAGS.save_path,
                   train_biom=FLAGS.train_biom,
                   test_biom=FLAGS.test_biom,
                   train_metadata=FLAGS.train_metadata,
                   test_metadata=FLAGS.test_metadata,
                   formula=FLAGS.formula,
                   learning_rate=FLAGS.learning_rate,
                   clipping_size=FLAGS.clipping_size,
                   beta_mean=FLAGS.beta_mean,
                   beta_scale=FLAGS.beta_scale,
                   gamma_mean=FLAGS.gamma_mean,
                   gamma_scale=FLAGS.gamma_scale,
                   epochs_to_train=FLAGS.epochs_to_train,
                   num_neg_samples=FLAGS.num_neg_samples,
                   batch_size=FLAGS.batch_size,
                   min_sample_count=FLAGS.min_sample_count,
                   min_feature_count=FLAGS.min_feature_count,
                   statistics_interval=FLAGS.statistics_interval,
                   summary_interval=FLAGS.summary_interval,
                   checkpoint_interval=FLAGS.checkpoint_interval)
    # preprocessing
    train_table, train_metadata = opts.train_table, opts.train_metadata
    train_metadata = train_metadata.loc[train_table.ids(axis='sample')]

    sample_filter = lambda val, id_, md: (
        (id_ in train_metadata.index) and np.sum(val) > opts.min_sample_count)
    read_filter = lambda val, id_, md: np.sum(val) > opts.min_feature_count
    metadata_filter = lambda val, id_, md: id_ in train_metadata.index

    train_table = train_table.filter(metadata_filter, axis='sample')
    train_table = train_table.filter(sample_filter, axis='sample')
    train_table = train_table.filter(read_filter, axis='observation')
    train_metadata = train_metadata.loc[train_table.ids(axis='sample')]
    sort_f = lambda xs: [xs[train_metadata.index.get_loc(x)] for x in xs]
    train_table = train_table.sort(sort_f=sort_f, axis='sample')
    train_metadata = dmatrix(opts.formula,
                             train_metadata,
                             return_type='dataframe')

    # hold out data preprocessing
    test_table, test_metadata = opts.test_table, opts.test_metadata
    metadata_filter = lambda val, id_, md: id_ in test_metadata.index
    obs_lookup = set(train_table.ids(axis='observation'))
    feat_filter = lambda val, id_, md: id_ in obs_lookup

    test_table = test_table.filter(metadata_filter, axis='sample')
    test_table = test_table.filter(feat_filter, axis='observation')
    test_metadata = test_metadata.loc[test_table.ids(axis='sample')]
    sort_f = lambda xs: [xs[test_metadata.index.get_loc(x)] for x in xs]
    test_table = test_table.sort(sort_f=sort_f, axis='sample')
    test_metadata = dmatrix(opts.formula,
                            test_metadata,
                            return_type='dataframe')

    p = train_metadata.shape[1]  # number of covariates
    G_data = train_metadata.values
    y_data = np.array(train_table.matrix_data.todense()).T
    y_test = np.array(test_table.matrix_data.todense()).T
    N, D = y_data.shape
    save_path = opts.save_path
    learning_rate = opts.learning_rate
    batch_size = opts.batch_size
    gamma_mean, gamma_scale = opts.gamma_mean, opts.gamma_scale
    beta_mean, beta_scale = opts.beta_mean, opts.beta_scale
    num_iter = (N // batch_size) * opts.epochs_to_train
    holdout_size = test_metadata.shape[0]
    checkpoint_interval = opts.checkpoint_interval

    # Model code
    with tf.Graph().as_default(), tf.Session() as session:
        with tf.device("/cpu:0"):
            # Place holder variables to accept input data
            G_ph = tf.placeholder(tf.float32, [batch_size, p], name='G_ph')
            Y_ph = tf.placeholder(tf.float32, [batch_size, D], name='Y_ph')
            G_holdout = tf.placeholder(tf.float32, [holdout_size, p],
                                       name='G_holdout')
            Y_holdout = tf.placeholder(tf.float32, [holdout_size, D],
                                       name='Y_holdout')
            total_count = tf.placeholder(tf.float32, [batch_size],
                                         name='total_count')

            # Define PointMass Variables first
            qgamma = tf.Variable(tf.random_normal([1, D]), name='qgamma')
            qbeta = tf.Variable(tf.random_normal([p, D]), name='qB')

            # Distributions
            # species bias
            gamma = Normal(loc=tf.zeros([1, D]) + gamma_mean,
                           scale=tf.ones([1, D]) * gamma_scale,
                           name='gamma')
            # regression coefficents distribution
            beta = Normal(loc=tf.zeros([p, D]) + beta_mean,
                          scale=tf.ones([p, D]) * beta_scale,
                          name='B')

            Bprime = tf.concat([qgamma, qbeta], axis=0)

            # add bias terms for samples
            Gprime = tf.concat([tf.ones([batch_size, 1]), G_ph], axis=1)

            eta = tf.matmul(Gprime, Bprime)
            phi = tf.nn.log_softmax(eta)
            Y = Multinomial(total_count=total_count, logits=phi, name='Y')

            loss = -(tf.reduce_mean(gamma.log_prob(qgamma)) + \
                     tf.reduce_mean(beta.log_prob(qbeta)) + \
                     tf.reduce_mean(Y.log_prob(Y_ph)) * (N / batch_size))
            loss = tf.Print(loss, [loss])
            optimizer = tf.train.AdamOptimizer(learning_rate)

            gradients, variables = zip(*optimizer.compute_gradients(loss))
            gradients, _ = tf.clip_by_global_norm(gradients,
                                                  opts.clipping_size)
            train = optimizer.apply_gradients(zip(gradients, variables))

            with tf.name_scope('accuracy'):
                holdout_count = tf.reduce_sum(Y_holdout, axis=1)
                pred = tf.reshape(holdout_count, [-1, 1]) * tf.nn.softmax(
                    tf.matmul(G_holdout, qbeta) + qgamma)
                mse = tf.reduce_mean(tf.squeeze(tf.abs(pred - Y_holdout)))
                tf.summary.scalar('mean_absolute_error', mse)

            tf.summary.scalar('loss', loss)
            tf.summary.histogram('qbeta', qbeta)
            tf.summary.histogram('qgamma', qgamma)
            merged = tf.summary.merge_all()

            tf.global_variables_initializer().run()

            writer = tf.summary.FileWriter(save_path, session.graph)

            losses = np.array([0.] * num_iter)
            idx = np.arange(train_metadata.shape[0])
            log_handle = open(os.path.join(save_path, 'run.log'), 'w')

            last_checkpoint_time = 0
            start_time = time.time()
            saver = tf.train.Saver()
            for i in range(num_iter):
                batch_idx = np.random.choice(idx, size=batch_size)
                feed_dict = {
                    Y_ph: y_data[batch_idx].astype(np.float32),
                    G_ph: train_metadata.values[batch_idx].astype(np.float32),
                    Y_holdout: y_test.astype(np.float32),
                    G_holdout: test_metadata.values.astype(np.float32),
                    total_count:
                    y_data[batch_idx].sum(axis=1).astype(np.float32)
                }

                if i % 1000 == 0:
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    _, summary, train_loss, grads = session.run(
                        [train, merged, loss, gradients],
                        feed_dict=feed_dict,
                        options=run_options,
                        run_metadata=run_metadata)
                    writer.add_run_metadata(run_metadata, 'step%d' % i)
                    writer.add_summary(summary, i)
                elif i % 5000 == 0:
                    _, summary, err, train_loss, grads = session.run(
                        [train, mse, merged, loss, gradients],
                        feed_dict=feed_dict)
                    writer.add_summary(summary, i)
                else:
                    _, summary, train_loss, grads = session.run(
                        [train, merged, loss, gradients], feed_dict=feed_dict)
                    writer.add_summary(summary, i)

                now = time.time()
                if now - last_checkpoint_time > checkpoint_interval:
                    saver.save(session,
                               os.path.join(opts.save_path, "model.ckpt"),
                               global_step=i)
                    last_checkpoint_time = now

                losses[i] = train_loss
            elapsed_time = time.time() - start_time
            print('Elapsed Time: %f seconds' % elapsed_time)

            # Cross validation
            pred_beta = qbeta.eval()
            pred_gamma = qgamma.eval()
            mse, mrc = cross_validation(test_metadata.values, pred_beta,
                                        pred_gamma, y_test)
            print("MSE: %f, MRC: %f" % (mse, mrc))
Пример #6
0
    def __call__(self, session, trainX, trainY, testX, testY):
        """ Initialize the actual graph

        Parameters
        ----------
        session : tf.Session
            Tensorflow session
        trainX : np.array
            Input training design matrix.
        trainY : np.array
            Output training OTU table, where rows are samples and columns are
            observations.
        testX : np.array
            Input testing design matrix.
        testY : np.array
            Output testing OTU table, where rows are samples and columns are
            observations.
        """
        self.session = session
        self.N, self.p = trainX.shape
        self.D = trainY.shape[1]
        holdout_size = testX.shape[0]

        # Place holder variables to accept input data
        self.X_ph = tf.constant(trainX, dtype=tf.float32, name='G_ph')
        self.Y_ph = tf.constant(trainY, dtype=tf.float32, name='Y_ph')
        self.X_holdout = tf.constant(testX, dtype=tf.float32, name='G_holdout')
        self.Y_holdout = tf.constant(testY, dtype=tf.float32, name='Y_holdout')

        batch_ids = tf.multinomial(tf.ones([1, self.N]), self.batch_size)
        sample_ids = tf.squeeze(batch_ids)

        Y_batch = tf.gather(self.Y_ph, sample_ids, axis=0)
        X_batch = tf.gather(self.X_ph, sample_ids, axis=0)

        total_count = tf.reduce_sum(Y_batch, axis=1)
        holdout_count = tf.reduce_sum(self.Y_holdout, axis=1)

        # Define PointMass Variables first
        self.qbeta = tf.Variable(tf.random_normal([self.p, self.D - 1]),
                                 name='qB')

        # regression coefficents distribution
        beta = Normal(loc=tf.zeros([self.p, self.D - 1]) + self.beta_mean,
                      scale=tf.ones([self.p, self.D - 1]) * self.beta_scale,
                      name='B')

        eta = tf.matmul(X_batch, self.qbeta, name='eta')

        phi = tf.nn.log_softmax(tf.concat(
            [tf.zeros([self.batch_size, 1]), eta], axis=1),
                                name='phi')

        Y = Multinomial(total_count=total_count, logits=phi, name='Y')

        # cross validation
        with tf.name_scope('accuracy'):
            pred = tf.reshape(holdout_count, [-1, 1]) * tf.nn.softmax(
                tf.concat([
                    tf.zeros([holdout_size, 1]),
                    tf.matmul(self.X_holdout, self.qbeta)
                ],
                          axis=1),
                name='phi')

            self.cv = tf.reduce_mean(tf.squeeze(tf.abs(pred - self.Y_holdout)))
            tf.summary.scalar('mean_absolute_error', self.cv)

        self.loss = -(tf.reduce_sum(beta.log_prob(self.qbeta)) +
                      tf.reduce_sum(Y.log_prob(Y_batch)) *
                      (self.N / self.batch_size))

        optimizer = tf.train.AdamOptimizer(self.learning_rate,
                                           beta1=self.beta_1,
                                           beta2=self.beta_2)

        gradients, variables = zip(*optimizer.compute_gradients(self.loss))
        self.gradients, _ = tf.clip_by_global_norm(gradients, self.clipnorm)
        self.train = optimizer.apply_gradients(zip(gradients, variables))

        tf.summary.scalar('loss', self.loss)
        tf.summary.histogram('qbeta', self.qbeta)
        self.merged = tf.summary.merge_all()
        if self.save_path is not None:
            self.writer = tf.summary.FileWriter(self.save_path,
                                                self.session.graph)
        else:
            self.writer = None
        tf.global_variables_initializer().run()
Пример #7
0
class GraphexNMF(object):

    def __init__(self, edge_idx, edge_vals, U, I,  K, hparams, ground_truth=None, simple_graph=False, GPU=False,
                 fix_item_params=False, comp_rem=True, edge_param_splits=1, seed=None, sess=None, device='/cpu:0',
                 ppm=False):
        """
        Model for Sparse Exchangeable bipartite graph
        
        """

        self.ppm = ppm
        # Launch the session
        if sess:
            self.sess = sess
        else:
            if GPU:
                # For GPU mode
                config = tf.ConfigProto(allow_soft_placement=True)
                config.gpu_options.allow_growth = True
                config.gpu_options.allocator_type = 'BFC'
                self.sess = tf.Session(config=config)
            else:
                config = tf.ConfigProto(allow_soft_placement=True)
                self.sess = tf.Session(config=config)

        self.device = device
        self.comp_rem = comp_rem
        self.seed = seed
        self.K = K
        self.ground_truth = ground_truth
        self.simple_graph = simple_graph
        self.U, self.I = U, I
        self.fix_item_params = fix_item_params
        self.hparams = hparams
        self.edge_param_splits = edge_param_splits # Splitting GPU parameters to fit according to GPU size
        self.GPU = GPU

        # store the data here:
        self.edge_idx_d = edge_idx

        if self.simple_graph:
            self.edge_vals_d = np.ones(edge_vals.shape[0], dtype=np.float32)
        else:
            self.edge_vals_d = edge_vals.astype(np.float32)

        # create placeholders for the computational graph
        with tf.name_scope("placeholders"):
            with tf.device(self.device):
                self.edge_idx = tf.placeholder(dtype=tf.int32,shape=(edge_idx.shape[0], edge_idx.shape[1]))
                self.edge_vals = tf.placeholder(dtype=tf.float32,shape=(edge_idx.shape[0]))

        if simple_graph:
            # Degree computation without tensorflow. Only works for simple graphs
            _,self.user_degree = np.unique(self.edge_idx_d[:,0],return_counts=True)
            _,self.item_degree = np.unique(self.edge_idx_d[:,1],return_counts=True)
            self.user_degree = self.user_degree.astype(np.float32)
            self.item_degree = self.item_degree.astype(np.float32)
        else:
            with tf.name_scope("init_deg_comp"):
                with tf.device(self.device):
                    user_degree, item_degree = compute_degrees2(tf.expand_dims(self.edge_vals, axis=1), self.edge_idx,
                                                                self.U, self.I)
                    user_degree = tf.squeeze(user_degree)
                    item_degree = tf.squeeze(item_degree)

            with tf.Session(config=config) as sess:
                self.user_degree, self.item_degree = sess.run([user_degree, item_degree],
                                                              feed_dict={self.edge_vals: self.edge_vals_d,
                                                                         self.edge_idx: self.edge_idx_d})

        
        print repr(np.sum(self.user_degree))
        print repr(np.sum(self.item_degree))

        self.occupied_pairs = edge_idx.shape[0] # oc_pa

        self._initialize_parameters(hparams, ppm)

        # random sample for diagnostics
        np.random.seed(self.seed)
        self.included_sample = self.edge_idx_d[np.random.choice(self.edge_idx_d.shape[0], 1000, replace=False)]
        user_sample = np.random.choice(self.U, 1000)
        item_sample = np.random.choice(self.I, 1000)
        self.pair_sample = np.vstack((user_sample, item_sample)).T

        # appx llhd for assessing convergence
        with tf.name_scope("appx_llhd"):
             self._build_appx_elbo()

        # computational graph for coordinate ascent
        with tf.name_scope("coordinate_ascent"):
            self._build_computation_graph()

        with tf.name_scope("evaluation"):
            with tf.device(self.device):
                self._build_predict_edges()
                self.edge_mean_summary = tf.reduce_mean(self.q_e_aux_vals.mean(), axis=0)

        with tf.name_scope("recommendation"), tf.device(self.device):
            self._build_rec_uncensored_edge_pops()

            self._censored_edge_pops = tf.placeholder(dtype=tf.float32)
            self._num_rec = tf.placeholder(dtype=tf.int32, shape=())
            self._top_k = tf.nn.top_k(self._censored_edge_pops, self._num_rec)

        # logging
        self.summary_writer = tf.summary.FileWriter('../logs', graph=self.sess.graph)

        # Initializing the tensor flow variables
        with tf.device(self.device):
            init = tf.global_variables_initializer()
        self.sess.run(init)

        # qm_du, qm_di were initialized arbitrarily, and are thus inconsistent w initialize value of the edge params
        # this line fixes that
        if not(ppm):
            self.sess.run(self.deg_update, feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d})


    def _initialize_parameters(self, hparams, ppm):

        K = np.float32(self.K)

        su, tu, a, b, self.size_u = (hparams['su'], hparams['tu'], hparams['a'], hparams['b'], hparams['size_u'])
        si, ti, c, d, self.size_i = (hparams['si'], hparams['ti'], hparams['c'], hparams['d'], hparams['size_i'])

        with tf.name_scope("hparams"), tf.device(self.device):
            ## Hyperparameters
            self.lsu = tf.Variable(softplus_inverse(-hparams['su'] + 1.), dtype=tf.float32, name="lsu")
            self.su = -tf.nn.softplus(self.lsu) + 1.

            self.tu = tf.Variable(hparams['tu'], dtype=tf.float32, name="tu")

            self.a = tf.Variable(hparams['a'], dtype=tf.float32, name="a")
            self.b = tf.Variable(hparams['b'], dtype=tf.float32, name="b")

            self.lsi = tf.Variable(softplus_inverse(-hparams['si'] + 1.), dtype=tf.float32, name="lsi")
            self.si = -tf.nn.softplus(self.lsi) + 1.

            self.ti = tf.Variable(hparams['ti'], dtype=tf.float32, name="ti")

            self.c = tf.Variable(hparams['c'], dtype=tf.float32, name="c")
            self.d = tf.Variable(hparams['d'], dtype=tf.float32, name="d")

        e = np.sum(self.edge_vals_d, dtype=np.float32)

        # initial values for total user and total item masses of type K
        # set st \sum_k tim_k * tum_k = e (which is in fact a bit higher than it oughta be)
        # and using item_mass / user_mass ~ item_size / user_size (which is only kind of true)
        tum_init = np.sqrt(self.size_u / self.size_i * e / K)
        tim_init = np.sqrt(self.size_i / self.size_u * e / K)

        with tf.name_scope("user_params"), tf.device(self.device):
            # shape params are read off immediately from update equations
            # rate params set to be consistent w \gam_i ~ 1, \sum_j beta_jk beta_k ~ \sqrt(e/k) (which is self consistent)
            if ppm :
                # If creating the principled predictive (ppm), don't have the user_degree. Just create some random initialization for now, we'll update it with a default value
                self.gam_shp = tf.Variable(tf.random_gamma([self.U, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="gam_rte") 
                self.gam_rte = tf.Variable(tf.random_gamma([self.U, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="gam_rte") 
                self.theta_shp = tf.Variable(tf.random_gamma([self.U, self.K], 10., 10., seed=self.seed), name="theta_shp")
                self.theta_rte =tf.Variable(tf.random_gamma([self.U, self.K], 5., 5., seed=self.seed), name="theta_rte") 
                self.g = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="g") 
            else:
                user_degs = np.expand_dims(self.user_degree, axis=1)
                self.gam_shp = tf.Variable((user_degs - su), name="gam_shp")  # s^U
                self.gam_rte = tf.Variable(np.sqrt(e) * (0.9 + 0.1*tf.random_gamma([self.U, 1], 5., 5., seed=self.seed)), dtype=tf.float32, name="gam_rte")  # r^U
                init_gam_mean = self.gam_shp.initial_value / self.gam_rte.initial_value
                self.theta_shp = tf.Variable((a + user_degs/K) * tf.random_gamma([self.U, self.K], 10., 10., seed=self.seed), name="theta_shp")  # kap^U
                self.theta_rte = tf.Variable((b + init_gam_mean * tim_init)*(0.9 + 0.1*tf.random_gamma([self.U, self.K], 5., 5., seed=self.seed)), name="theta_rte")  # lam^U
                self.g = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="g")  # g


        with tf.name_scope("item_params"), tf.device(self.device):
            ## Items
            if ppm:
                self.omega_shp = tf.Variable(tf.random_gamma([self.I, 1], 5., 5., seed=self.seed), name="omega_shp")  # s^I
                self.omega_rte = tf.Variable(tf.random_gamma([self.I, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="omega_rte")  # r^I
                self.beta_shp = tf.Variable(tf.random_gamma([self.I, self.K], 10., 10., seed=self.seed), name="beta_shp")  # kap^I
                self.beta_rte = tf.Variable(tf.random_gamma([self.I, self.K], 5., 5., seed=self.seed), name="beta_rte")  # lam^I
                self.w = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="w")  # w
            else:
                item_degs = np.expand_dims(self.item_degree, axis=1)
                self.omega_shp = tf.Variable((item_degs - si), name="omega_shp")  # s^I
                self.omega_rte = tf.Variable(np.sqrt(e) * (0.9 + 0.1*tf.random_gamma([self.I, 1], 5., 5., seed=self.seed)), dtype=tf.float32, name="omega_rte")  # r^I
                init_omega_mean = self.omega_shp.initial_value / self.omega_rte.initial_value
                self.beta_shp = tf.Variable((c + item_degs/K) * tf.random_gamma([self.I, self.K], 10., 10., seed=self.seed), name="beta_shp")  # kap^I
                self.beta_rte = tf.Variable((d + init_omega_mean*tum_init) * (0.9 + 0.1*tf.random_gamma([self.I, self.K], 5., 5., seed=self.seed)), name="beta_rte")  # lam^I
                self.w = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="w")  # w

        with tf.device('/cpu:0'):
            with tf.variable_scope("edge_params", reuse=None):
                ## Edges
                if self.simple_graph:
                    # set init value so there's approximately 1 expected edge between each pair... WARNING: this may be profoundly stupid
                    self.sg_edge_param = tf.get_variable(name="sg_edge_param", shape=[self.occupied_pairs, self.K], dtype=tf.float32,
                                    initializer=tf.random_normal_initializer(mean=-np.log(K), stddev=1. / K, seed=self.seed),
                                    partitioner=tf.fixed_size_partitioner(self.edge_param_splits, 0))
                else:
                    self.lphi = tf.get_variable(name="lphi", shape=[self.occupied_pairs, self.K], dtype=tf.float32,
                                    initializer=tf.random_normal_initializer(mean=0, stddev=1. / K, seed=self.seed),
                                    partitioner=tf.fixed_size_partitioner(self.edge_param_splits, 0))

        with tf.name_scope("variational_post"), tf.device(self.device):

            # Variational posterior distributions
            self.q_gam = Gamma(concentration=self.gam_shp, rate=self.gam_rte, name="q_gam")
            self.q_theta = Gamma(concentration=self.theta_shp, rate=self.theta_rte, name="q_theta")
            self.q_g = PointMass(self.g, name="q_g")

            self.q_omega = Gamma(concentration=self.omega_shp, rate=self.omega_rte, name="q_omega")
            self.q_beta = Gamma(concentration=self.beta_shp, rate=self.beta_rte, name="q_beta")
            self.q_w = PointMass(self.w, name="q_w")

            if self.simple_graph:
                self.q_e_aux_vals = tPoissonMulti(log_lams=self.sg_edge_param, name="q_e_aux_vals") # q_edges_aux_flat
            else:
                self.q_e_aux_vals = Multinomial(total_count=self.edge_vals, logits=self.lphi, name="q_e_aux_vals") # q_edges_aux_flat
                self.q_e_aux_vals_mean = self.q_e_aux_vals.mean()

        with tf.name_scope("degree_vars"):
            # create some structures to make it easy to work with the expected value (wrt q) of the edges

            # qm_du[u,k] is the expected weighted degree of user u counting only edges of type k
            # qm_du[u,k] = E_q[e^k_i.] in the language of the paper
            # initialized arbitrarily, will override at end of init to set to
            # we use a tf.Variable here to cache the q_e_aux_vals.mean() value
            self.qm_du = tf.Variable(tf.ones([self.U, self.K], dtype=tf.float32), name="qm_du")
            self.qm_di = tf.Variable(tf.ones([self.I, self.K], dtype=tf.float32), name="qm_di")

        # Total Item Mass:
        self.i_tot_mass_m = self.q_w.mean() + tf.matmul(self.q_beta.mean(), self.q_omega.mean(), transpose_a=True)
        # Total User Mass:
        self.u_tot_mass_m = self.q_g.mean() + tf.matmul(self.q_theta.mean(), self.q_gam.mean(), transpose_a=True)

    def _build_computation_graph(self):
        with tf.name_scope("user_update"):
            with tf.device(self.device):
                [new_gam_shp, new_gam_rte,
                 new_theta_shp, new_theta_rte, new_g] = user_updates(
                    q_gam=self.q_gam, q_theta=self.q_theta, q_omega=self.q_omega, q_beta=self.q_beta, q_w=self.q_w,
                    qm_du=self.qm_du,
                    a=self.a, b=self.b, su=self.su, tu=self.tu,
                    size=self.size_u,
                    comp_rem=self.comp_rem,
                    n_samp=95
                    )

            # observation: gamma_rte depends on theta, and theta_rte depends on gamma
            # so these shouldn't update simultaneously
            # logical division: compute gamma update, then compute theta update.
            # we compute theta_shp as part of gamma update to avoid (huge) repeated computation
            self.u_update_one = assign_list(vars=[self.gam_shp, self.gam_rte, self.theta_shp, self.g],
                                            new_values=[new_gam_shp, new_gam_rte, new_theta_shp, new_g])
            self.u_update_two = assign_list(vars=[self.theta_rte], new_values=[new_theta_rte])

        with tf.name_scope("item_update"):
            with tf.device(self.device):
                [new_omega_shp, new_omega_rte,
                 new_beta_shp, new_beta_rte, new_w] = user_updates(
                    self.q_omega, self.q_beta,
                    self.q_gam, self.q_theta, self.q_g,
                    self.qm_di,
                    self.c, self.d, self.si, self.ti,
                    size=self.size_i,
                    comp_rem=self.comp_rem,
                    n_samp=95)

            # division into two updates for same reason as users
            self.i_update_one = assign_list(vars=[self.omega_shp, self.omega_rte, self.beta_shp, self.w],
                             new_values=[new_omega_shp, new_omega_rte, new_beta_shp, new_w])
            self.i_update_two = assign_list(vars=[self.beta_rte], new_values=[new_beta_rte])

        with tf.name_scope("edge_update"):
            with tf.device(self.device):
                # split the edge list to avoid memory issues
                edge_idx_split = tensor_split(self.edge_idx, self.edge_param_splits)

                if self.simple_graph:
                    new_sg_edge_params = \
                        [simple_graph_edge_update(self.q_theta, self.q_beta, self.q_gam, self.q_omega, edge_idx) for edge_idx in edge_idx_split]
                else:
                    new_lphis = \
                        [multi_graph_edge_update(self.q_theta, self.q_beta, edge_idx) for edge_idx in edge_idx_split]

            if self.simple_graph:
                self.sg_edge_param_update = [sg_edge_param.assign(new_sg_edge_param) for sg_edge_param, new_sg_edge_param
                                             in zip(self.sg_edge_param._get_variable_list(), new_sg_edge_params)]
            else:
                self.lphi_update = [lphi.assign(new_lphi) for (lphi, new_lphi) in zip(self.lphi._get_variable_list(), new_lphis)]

        with tf.name_scope("qm_deg_update"):
            with tf.device(self.device):
                new_qm_du, new_qm_di = compute_degrees2(self.q_e_aux_vals.mean(), self.edge_idx, self.U, self.I)
            
            self.deg_update = assign_list(vars=[self.qm_du, self.qm_di], new_values=[new_qm_du, new_qm_di])


    def _fix_post_assigns(self, true_omega, true_beta, users=False, items=True):
        """
        Method to be used for debugging:
        Fix item parameters to ground truth values
        """
        # fix variational posteriors to be tightly concentrated around true values

        item_assigns = assign_list([self.omega_shp, self.omega_rte, self.beta_shp, self.beta_rte, self.w],
                         [100 * true_omega, 100 * tf.ones_like(true_omega),
                       100 * true_beta, 100 * tf.ones_like(true_beta),
                       0.01 * tf.ones_like(self.w)]) # actually, I'm not sure about this one
        self.sess.run(item_assigns, feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d})

    def _edge_prob_samples(self, pred_edges, N=100):
        """

        :param pred_edges: edge list
        :param N: number of samples
        :param log: if True, return E[log(p(e_ij = 1 | params)]
        :return: E[p(e_ij = 1 | sampled_params)] for each ij in pred_edges
        """
        users_idx = pred_edges[:, 0]
        items_idx = pred_edges[:, 1]

        # MC estimate
        # this is logically equivalent to drawing samples from q and then gathering the necessary ones,
        # but much faster when there are many users and items

        # relevant params for simulation
        omega_shp = tf.gather(self.omega_shp, items_idx)
        omega_rte = tf.gather(self.omega_rte, items_idx)
        beta_shp = tf.gather(self.beta_shp, items_idx)
        beta_rte = tf.gather(self.beta_rte, items_idx)

        gam_shp = tf.gather(self.gam_shp, users_idx)
        gam_rte = tf.gather(self.gam_rte, users_idx)
        theta_shp = tf.gather(self.theta_shp, users_idx)
        theta_rte = tf.gather(self.theta_rte, users_idx)

        # samples for MC estimate
        omega_smp = tf.random_gamma([N], omega_shp, omega_rte, seed=self.seed)
        beta_smp = tf.random_gamma([N], beta_shp, beta_rte, seed=self.seed)
        gam_smp = tf.random_gamma([N], gam_shp, gam_rte, seed=self.seed)
        theta_smp = tf.random_gamma([N], theta_shp, theta_rte, seed=self.seed)

        user_weights_s = gam_smp * theta_smp
        item_weights_s = omega_smp * beta_smp
        edge_weight_s = tf.reduce_sum(user_weights_s * item_weights_s, axis=2)

        prob_samp = 1. - tf.exp(-edge_weight_s)
        return prob_samp

    def _build_predict_edges(self, N=100):
        """
        Only handles SG
        Returns prob given an edge list
        """
        with tf.device(self.device):
            self.pred_edges_ph = tf.placeholder(dtype=tf.int32)
            # MC estimate
            self.predict_edges = tf.reduce_mean(self._edge_prob_samples(self.pred_edges_ph, N=N), axis=0)


    def _build_rec_uncensored_edge_pops(self):
        """
        Builds matrix of expected number of edges between all items and self._rec_users
        """
        with tf.device(self.device):
            self._rec_users = tf.placeholder(dtype=tf.int32)

            q_gam_mean = self.q_gam.mean()
            q_theta_mean = self.q_theta.mean()

            q_omega_mean = self.q_omega.mean()
            q_beta_mean = self.q_beta.mean()

            user_params = tf.gather(q_gam_mean, self._rec_users) * tf.gather(q_theta_mean, self._rec_users)
            item_params  = q_omega_mean * q_beta_mean

            # edge_pops[user,item] gives the affinity of user to item
            self._rec_uncensored_edge_pops = tf.matmul(user_params, item_params, transpose_b=True)

    def _build_appx_elbo(self):
        """
        Returns an estimate of \sum_{e in test_idxs} log(prob(e)) + \sum{e not in test_idxs} log(1-prob(e))
        this is not actually the log likelihood because it ignores the contribution of uninstantiated atoms
        (actually, maybe this is handled after all...)
        :param test_idxs: tensor of shape [e, 2], indices of edges of graph
        :return: estimate of \sum_{e in test_idxs} log(prob(e)) + \sum{e not in test_idxs} log(1-prob(e))
        """

        # MC estimate of contribution from edges
        # obvious choice: uniformly sample terms... but resulting estimator is super high variance
        # edges_sample = np.copy(self.edge_idx_d[np.random.choice(self.edge_idx_d.shape[0], 3000, replace=False)]).astype(np.int32)
        # so instead use p-sampling... although it's unclear whether this really represents a major improvement
        e = self.edge_vals_d.shape[0]
        p_inc = np.sqrt(5000. / e) #use about 5000 edges for MC est
        edges_sample = item_p_sample(user_p_sample(self.edge_idx_d, p_inc)[0], p_inc)[0].astype(np.int32)

        # clip by value because of numerical issues
        p_edge_samples = tf.clip_by_value(self._edge_prob_samples(edges_sample), 1e-15, 1.)

        # reduce_mean is MC estimate over params of model, reduce_sum is summing cont from p-samp
        edge_llhd_est = 1. / p_inc**2 * tf.reduce_sum(tf.reduce_mean(tf.log(p_edge_samples), axis=0))

        # log(1-p_ij) = -lambda_ij, so:
        tot_lam_sum = tf.reduce_sum(self.i_tot_mass_m*self.u_tot_mass_m) # includes contribution from edges as well as non-edges
        # subtract off edge contribution:
        user_params = tf.gather(self.q_gam.mean() * self.q_theta.mean(), self.edge_idx[:,0])
        item_params = tf.gather(self.q_omega.mean() * self.q_beta.mean(), self.edge_idx[:,1])
        edges_lam_sum = tf.reduce_sum(user_params * item_params)
        nonedge_llhd_term = -(tot_lam_sum - edges_lam_sum)

        # hopefully lower variance than direct MC est
        #\sum_edges log(p_ij) = -\sum_edges lam_ij + \sum_ij log(p_ij / (1-p_ij))

        # note: the reduce mean here averages over both the sampled params in p_edge_samples, and over the random choice of edges
        # edge_llhd_est = -edges_lam_sum + e*tf.reduce_mean(tf.reduce_mean(tf.log(p_edge_samples / (1. - p_edge_samples)), axis=0))

        self.appx_elbo = [edge_llhd_est, nonedge_llhd_term]



    def load_pretrained_model(self, gam_shp, gam_rte, theta_shp, theta_rte, g, omega_shp, omega_rte, beta_shp, beta_rte, w):
        user_assign = assign_list([self.gam_shp, self.gam_rte, self.theta_shp, self.theta_rte, self.g],
                                           [gam_shp, gam_rte, theta_shp, theta_rte, g])
        item_assign = assign_list([self.omega_shp, self.omega_rte, self.beta_shp, self.beta_rte, self.w],
                                           [omega_shp, omega_rte, beta_shp, beta_rte, w])

        if self.simple_graph:
            self.sess.run(self.sg_edge_param_update, feed_dict={self.edge_idx: self.edge_idx_d})
        else:
            self.sess.run(self.lphi_update, feed_dict={self.edge_idx: self.edge_idx_d})

        self.sess.run(self.deg_update, feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d})
        pass

    def infer(self, n_iter=150):
        """
        Runs the co-ordinate ascent inference on the model. 
        """
        if self.ppm:
            print("Running infer is forbidden for principled predictive model.")
            return
        if DEBUG:
            # fix some variables to their true values
            self._fix_post_assigns(self.ground_truth['true_omega'], self.ground_truth['true_beta'])

        with self.sess.as_default():
            for i in range(n_iter):

                # users
                start_time = time.time()
                self.sess.run(self.u_update_one, feed_dict={self.edge_idx: self.edge_idx_d})
                self.sess.run(self.u_update_two, feed_dict={self.edge_idx: self.edge_idx_d})

                # items
                if not(self.fix_item_params):
                    start_time = time.time()
                    self.sess.run(self.i_update_one, feed_dict={self.edge_idx: self.edge_idx_d})
                    self.sess.run(self.i_update_two, feed_dict={self.edge_idx: self.edge_idx_d})

                # edges
                start_time = time.time()
                if self.simple_graph:
                    for sg_edge_param_update in self.sg_edge_param_update:
                        self.sess.run(sg_edge_param_update, feed_dict={self.edge_idx: self.edge_idx_d})
                else:
                    for lphi_update in self.lphi_update:
                        self.sess.run(lphi_update, feed_dict={self.edge_idx: self.edge_idx_d})

                # mean degree (caching)
                start_time = time.time()
                self.sess.run(self.deg_update, feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d})

                ### Print the total item and user mass ###
                if np.mod(i, 30) == 0:
                    self._logging(i)
                    print("appx_elbo: {}".format(self.sess.run(self.appx_elbo,
                                                           feed_dict={self.edge_idx: self.edge_idx_d})))

            ## DONE TRAINING
            self.user_affil_est = to_prob(self.theta_shp / self.theta_rte).eval()
            self.item_affil_est = to_prob(self.beta_shp / self.beta_rte).eval()
            if DEBUG: 
                self.true_user_affil = to_prob(self.ground_truth['true_theta']).eval()
                self.true_item_affil = to_prob(self.ground_truth['true_beta']).eval()

            # User params
            gam_shp, gam_rte, theta_shp, theta_rte, g = self.sess.run([self.gam_shp, self.gam_rte, self.theta_shp, self.theta_rte, self.g])

            # Item params
            omega_shp, omega_rte, beta_shp, beta_rte, w = self.sess.run([self.omega_shp, self.omega_rte, self.beta_shp, self.beta_rte, self.w])

            return gam_shp, gam_rte, theta_shp, theta_rte, g, omega_shp, omega_rte, beta_shp, beta_rte, w


    def test_llhd(self, test_idxs):
        """
        Returns an estimate of \sum_{e in test_idxs} log(prob(e)) and of \sum{e not in test_idxs} log(1-prob(e))
        :param test_idxs: tensor of shape [e, 2], indices of edges of graph
        :return: estimate of [\sum_{e in test_idxs} log(prob(e)), \sum{e not in test_idxs} log(1-prob(e))]
        """

        test_idxs_ = np.copy(test_idxs)

        users = np.unique(test_idxs_[:, 0])
        train_idxs = np.copy(self.edge_idx_d[np.in1d(self.edge_idx_d[:, 0], users),:])

        for en, user in enumerate(users):
            test_idxs_[test_idxs_[:, 0] == user, 0] = en
            train_idxs[train_idxs[:,0] == user, 0] = en

        matrix = np.ones((users.shape[0], self.I))
        matrix[train_idxs.T.tolist()] = 0
        matrix[test_idxs_.T.tolist()] = 0
        all_but_test_idxs = np.array(matrix.nonzero()).T

        # Select 1000 edges randomly from test_idx to get an estimate of the expected value
        np.random.seed(self.seed)
        selected_edges = np.random.choice(test_idxs_.shape[0], min(1000, test_idxs_.shape[0]), replace=False)
        test_idxs_ = test_idxs_[selected_edges]

        # Select 1000 edges randomly from all_but_traintest_idx to get an estimate of the expected value
        selected_edges = np.random.choice(all_but_test_idxs.shape[0], min(1000, all_but_test_idxs.shape[0]), replace=False)
        all_but_test_idxs = all_but_test_idxs[selected_edges]

        p_test_idx = self.sess.run(self.predict_edges, feed_dict={self.pred_edges_ph: test_idxs_})
        p_not_test_idx = self.sess.run(self.predict_edges, feed_dict={self.pred_edges_ph: all_but_test_idxs})

        return np.mean(np.log(p_test_idx)), np.mean(np.log(1.-p_not_test_idx))


    def recommend(self, K, users=None, excluded_items=None):
        """
        Recommend Top-K Items
        NOTE: Does not censor train edges while recommending
        Reasoning: If we pass holdout set as train data and do not run infer, 
                   then we don't want to censor "train" edges while making recommendations
        outputs top K recommendations for each user in users

        Warning: assumes number of items > K

        :param users: numpy array, users to make recommendations for
        :param K: number of recommendations to output
        :param excluded_items: (optional) numpy array, items to exclude from recommendations

        """
        # sort users and remove redundant (for easy 0-indexing later)
        # uniq_inv will be used to restore original ordering at output
        if users is None:
            users_ =  np.unique(self.edge_idx_d[:,0])
            uniq_inv = range(users_.shape[0])
        else:
            users_, uniq_inv = np.unique(users, return_inverse=True)

        # probability of connection for each user in users and all items
        edge_pops = self.sess.run(self._rec_uncensored_edge_pops, feed_dict={self._rec_users: users_})
        # do any necessary additional censoring
        if excluded_items is not None:
            edge_pops[:, excluded_items] = 0.

        recs = self.sess.run(self._top_k, feed_dict={self._censored_edge_pops: edge_pops, self._num_rec: K})
        # restore original ordering
        recs_orig_ordering = recs
        recs_orig_ordering._replace(indices=recs.indices[uniq_inv, :])
        return recs_orig_ordering


    def nDCG(self, p, users=None, test=None, ranks=None, excluded_items=None):
        """
        Computes the normalized Discounted Cumulative Gain at rank p
        """

        # returns a sorted array
        if users is None:
            users = np.unique(self.edge_idx_d[:,0])
        if test is None:
            test = self.edge_idx_d
        if ranks is None:
            ranks = self.recommend(p, users, excluded_items).indices

        nDCG = np.zeros(users.shape[0])
        for en, user in enumerate(users):
            user_test = np.copy(test[test[:, 0] == user])
            test_ranks = np.isin(ranks[en, :], user_test[:, 1]).nonzero()[0] + 1
            DCG = np.sum(np.log(2.) / (np.log(test_ranks + 1)))
            num_rel = min(p, user_test.shape[0])  # number of relevant
            itest_ranks = np.array(range(num_rel)) + 1
            iDCG = np.sum(np.log(2.) / (np.log(itest_ranks + 1)))
            nDCG[en] = DCG / iDCG
        return nDCG

    def sample_one(self, user_size = None, item_size = None, eps=1e-8):
        """
        Draw a posterior sample from the fitted model

        :param user_size: float, user size
        :param item_size: float, item size
        :param eps: float, approximation level for ggp (default 1e-8); atom weights smaller than this are ignored
        :return: An approximate sample of the multigraph with the associated parameters
        a numpy array [user_idx, item_idx, num_edges] of length equal to the total occupied pairs
        """

        if user_size is None:
            _user_size = self.size_u
        else:
            _user_size = user_size

        if item_size is None:
            _item_size = self.size_i
        else:
            _item_size = item_size

        i_mass_samp = self.sess.run(self.q_omega.sample(seed=self.seed) * self.q_beta.sample(seed=self.seed))
        u_mass_samp = self.sess.run(self.q_gam.sample(seed=self.seed) * self.q_theta.sample(seed=self.seed))

        i_mass_tots = np.sum(i_mass_samp, 0) # total mass of each type in items
        u_mass_tots = np.sum(u_mass_samp, 0)

        """
        edges between instantiated vertices
        """
        # total number of edges of each type
        tot_edges_mean = u_mass_tots * i_mass_tots
        tot_edges = np.random.poisson(tot_edges_mean)

        # K probability distributions over items / users
        i_probs = i_mass_samp / i_mass_tots
        i_probs[i_probs < 1e-8] = 0 # numerical precision hack
        u_probs = u_mass_samp / u_mass_tots
        u_probs[u_probs < 1e-8] = 0 # numerical precision hack

        # assign edges to pairs
        item_assignments = [np.random.choice(self.I, size=tot_edges[k],replace=True,p=i_probs[:,k]) for k in range(self.K)]
        user_assignments = [np.random.choice(self.U, size=tot_edges[k],replace=True,p=u_probs[:,k]) for k in range(self.K)]

        edge_list = np.concatenate(
            [np.vstack([user_assignments[k], item_assignments[k]]) for k in range(self.K)], -1).T

        """
        leftover mass contribution
        Approximation: uninstantiated points never connect to each other
        """
        if _item_size != 0:

            # total mass belonging to uninstantiated items in each dimension
            rem_item_mass = (_item_size / self.size_i) * self.sess.run(
                self.q_w.sample(seed=self.seed)[:, 0])  # since q_w = size * rate

            # number of edges between instantiated users and uninstantiated items
            n_insu_remi = np.random.poisson(u_mass_tots * rem_item_mass)

            # ids of users connecting to uninstantiated atoms
            u_assign = np.concatenate([np.random.choice(self.U, size=n_insu_remi[k], replace=True, p=u_probs[:, k]) for k in range(self.K)])

            """
            it remains to assign the termini to atoms in the uninstantiated part of the marked GGPs
            strategy: simulate the posterior marked GGPs, and use the same multinomial assignment
            warning: this is computationally pricey
            """

            # sample from the point process of atoms that failed to connect to anything when the dataset was originally generated
            si, ti, c, d = self.sess.run([self.si, self.ti, self.c, self.d])
            new_ggp = sample_ggp(_item_size, si, ti, eps)
            sim_marks = np.random.gamma(shape=c, scale=1./d, size=new_ggp.shape + (self.K,))
            atom_weights = np.expand_dims(new_ggp,1) * sim_marks

            # uninstantiated atoms
            not_inc_prob = np.exp(-np.sum(atom_weights * u_mass_tots, axis=1)) # probability each item atom failed to connect to any user
            uninstant_atom_weights = atom_weights[np.nonzero(np.random.binomial(1,p=not_inc_prob))] # weights
            uninstant_atom_dist = uninstant_atom_weights / np.sum(uninstant_atom_weights, 0) # K probability dists

            # assign edges to these new atoms in the usual multinomial way
            i_rem_assign = np.concatenate([np.random.choice(uninstant_atom_dist.shape[0], size=n_insu_remi[k], replace=True, p=uninstant_atom_dist[:, k]) for k in range(self.K)])
            # these atoms should have labels not already taken by any previously instantiated atom
            i_rem_assign += self.I

            # and now compile the edge list
            insu_remi = np.vstack([u_assign , i_rem_assign]).T
            edge_list = np.concatenate([edge_list, insu_remi], axis=0)

        # repeat this for instantiated items + remaining users
        if _user_size != 0:

            rem_user_mass = (_user_size / self.size_u) * self.sess.run(self.q_g.sample(seed=self.seed)[:, 0])

            # number of edges connecting to previously uninstantiated atoms
            n_insi_remu = np.random.poisson(i_mass_tots * rem_user_mass)  # instantiated items, remaining users

            # ids of atoms connecting to uninstantiated users
            i_assign = np.concatenate([np.random.choice(self.I, size=n_insi_remu[k], replace=True, p=i_probs[:, k]) for k in range(self.K)])

            """
            assign the termini to atoms in the uninstantiated part of the marked GGPs
            """

            # sample from the point process of atoms that failed to connect to anything when the dataset was originally generated
            su, tu, a, b = self.sess.run([self.su, self.tu, self.a, self.b])
            new_ggp = sample_ggp(_user_size, su, tu, eps)
            sim_marks = np.random.gamma(shape=a, scale=1./b, size=new_ggp.shape + (self.K,))
            atom_weights = np.expand_dims(new_ggp,1) * sim_marks
            not_inc_prob = np.exp(-np.sum(atom_weights * i_mass_tots, axis=1)) # probability each user atom failed to connect to any item
            # uninstantiated atoms
            uninstant_atom_weights = atom_weights[np.nonzero(np.random.binomial(1,p=not_inc_prob))] # weights
            uninstant_atom_dist = uninstant_atom_weights / np.sum(uninstant_atom_weights, 0) # K probability dists

            # now assign to these new atoms in the usual multinomial way
            u_rem_assign = np.concatenate([np.random.choice(uninstant_atom_dist.shape[0], size=n_insi_remu[k], replace=True, p=uninstant_atom_dist[:, k]) for k in range(self.K)])
            # these atoms should have labels not already taken by any previously instantiated atom
            u_rem_assign += self.U

            # and now do the edge assignment
            insi_remu = np.vstack([u_rem_assign, i_assign]).T
            edge_list = np.concatenate([edge_list, insi_remu], axis=0)

        # cleanup
        uniques = np.unique(edge_list, return_counts=True, axis=0)
        return np.hstack([uniques[0], np.expand_dims(uniques[1], 1)])


    def principled_predictive_model(self, test_look_edge_idx, test_look_edge_vals, test_holdout,  user_update_iters=100, p=0.8,
                                free_model_resources = True, device='/cpu:0', seed=None):
        """
        Idea: data is originally divided into a test and train set using p-sampling of the users. Test set is further
         divided into test_lookup and test_holdout using p-sampling of the items in test set. 
         The object owning this method has been trained on the train set.

         We now further divide the test set into test_look, which will be used to propagate the fitted model to get
         parameter values for the users in the test set, and test_holdout, which we use to assess our algorithm using 
         item .

         This function returns a GNMF object on [items in test, users in test] to be used for further prediction.
         The item parameters are inherited from the trained model. The user parameters are set to be compatible with
         the item parameters using the usual edge+user update scheme.

        Remark: we return a model that includes all the items (rather than just the ones in test_look) because the items
        in test_holdout generally contain items not in test_look

        :param test_look_edge_idx:
        :param test_look_edge_vals:
        :param user_update_iters: number of iterations used to set users to be compatible w items
        :return:
        """
        """
        WARNING: self.hparams doesn't reflect any updates that have happened to the hyperparams, so if we ever write
        tuning code we'll have to be cognizant of this
        """
        U = np.unique(test_look_edge_idx[:, 0]).shape[0]
        I = np.unique(test_look_edge_idx[:, 1]).shape[0]
        """
        First, propogate the trained item values to the test set users
        """
        lookup_items = np.unique(test_look_edge_idx[:,1]) # items connected to any lookup user
        lookup_I = lookup_items.shape[0]

        # make the lookup item labels contiguous for passing into GNMF (zero indexing)      
        [lookup_relabel, convert] = zero_index(test_look_edge_idx, 1)

        with tf.variable_scope("holdout_fitter"):
            holdout_hparams = self.hparams.copy()
            holdout_hparams['size_i'] = p * self.hparams['size_i']
            holdout_hparams['size_u'] = (1.-p) / p * self.hparams['size_u']

            with GraphexNMF(lookup_relabel, test_look_edge_vals, U, lookup_I,  self.K, holdout_hparams,
                                        ground_truth=None, simple_graph=self.simple_graph, GPU=self.GPU,
                                        comp_rem=False, # comp_rem won't work because item weights are wrong
                                        fix_item_params=True, device=device, seed=seed) \
                    as holdout_fitter:

                # item parameters for the items in the lookup set
                omega_shp_lookup_op = tf.gather(self.omega_shp, lookup_items)
                omega_rte_lookup_op = tf.gather(self.omega_rte, lookup_items)
                beta_shp_lookup_op = tf.gather(self.beta_shp, lookup_items)
                beta_rte_lookup_op = tf.gather(self.beta_rte, lookup_items)
                w_lookup_op = p * self.w # w is implicitly item size times w, size transforms as s -> p*s under p-sampling

                # run it
                [omega_shp_lookup, omega_rte_lookup, beta_shp_lookup, beta_rte_lookup, w_lookup] = self.sess.run(
                    [omega_shp_lookup_op, omega_rte_lookup_op, beta_shp_lookup_op, beta_rte_lookup_op, w_lookup_op])

                # fix the item parameters to the fitted values
                item_assign = assign_list([holdout_fitter.omega_shp, holdout_fitter.omega_rte, holdout_fitter.beta_shp, holdout_fitter.beta_rte, holdout_fitter.w],
                                           [omega_shp_lookup, omega_rte_lookup, beta_shp_lookup, beta_rte_lookup, w_lookup])
                holdout_fitter.sess.run(item_assign)

                # infer the user parameters
                holdout_fitter.infer(user_update_iters)

                [fit_gam_shp, fit_gam_rte, fit_theta_shp, fit_theta_rte, fit_g] = holdout_fitter.sess.run(
                    [holdout_fitter.gam_shp, holdout_fitter.gam_rte, holdout_fitter.theta_shp, holdout_fitter.theta_rte, holdout_fitter.g])
        
        """
        Next, return the model that we'll use for prediction by taking the item values from the original trained model,
        and the user values from the holdout_fitter
        """ 
        test_holdout_users = np.unique(test_holdout[:,0])
        test_holdout_items = np.unique(test_holdout[:,1])
        holdout_U = test_holdout_users.shape[0]
        holdout_I = test_holdout_items.shape[0]
        # fix the item parameters to the fitted values - only users in holdout
        [omega_shp, omega_rte, beta_shp, beta_rte, w] = \
            self.sess.run([self.omega_shp, self.omega_rte, self.beta_shp, self.beta_rte, self.w])
        omega_shp = omega_shp[test_holdout_items]
        omega_rte = omega_rte[test_holdout_items]
        beta_shp = beta_shp[test_holdout_items]
        beta_rte = beta_rte[test_holdout_items]
        # fix the user parameters to the fitted values - only users in holdout
        gam_shp = fit_gam_shp[test_holdout_users]
        gam_rte = fit_gam_rte[test_holdout_users]
        theta_shp = fit_theta_shp[test_holdout_users]
        theta_rte = fit_theta_rte[test_holdout_users]
        g = fit_g
        # make the holdout item labels contiguous for passing into GNMF (zero indexing)      
        [holdout_relabel, convert_users] = zero_index(test_holdout, 0)
        [holdout_relabel, convert_items] = zero_index(holdout_relabel, 1)
        # release the session to free up resources to do recommendation with.
        # this is a bit nasty, and is used in part 'cause sess.close() doesn't work properly
        # WARNING: I'm not sure what happens if this command is run on a server... might be a good way of making enemies
        if free_model_resources : tf.Session.reset(None)

        with tf.variable_scope("ppm_init"):
            ppm_hparams = self.hparams.copy()
            ppm_hparams['size_i'] = (1.-p) * ppm_hparams['size_i'] # 1-p of items get into the holdout
            ppm_hparams['size_u'] = (1.-p) / p * ppm_hparams['size_u'] # 1-p of users get into the holdout (and self.hparams[size_u] is size of *train*)

            # passing in holdout data so we can compute appx_llhd for holdout.
            # IMPORTANT: MUST NOT RUN ppm.infer()!!
            # We do not want to update user and item parameters based on holdout dataset
            # Holdout is strictly for testing
            ppm = GraphexNMF(holdout_relabel[:,:2], holdout_relabel[:,2], holdout_U, holdout_I, self.K, ppm_hparams,
                                        ground_truth=None, simple_graph=self.simple_graph, GPU=self.GPU,
                                        comp_rem=False, fix_item_params=False,
                                        device=device, seed=seed, ppm=True)

            # correct size factor on w
            w = (1. - p) * w

            ppm_item_assign = assign_list(
                [ppm.omega_shp, ppm.omega_rte, ppm.beta_shp, ppm.beta_rte, ppm.w],
                [omega_shp, omega_rte, beta_shp, beta_rte, w])

            # correct size factor on g
            fit_g = (1. - p) / p * fit_g

            # fix the user parameters to the fitted values
            ppm_user_assign = assign_list(
                [ppm.gam_shp, ppm.gam_rte, ppm.theta_shp, ppm.theta_rte, ppm.g],
                [gam_shp, gam_rte, theta_shp, theta_rte, g])

            ppm.sess.run([ppm_user_assign, ppm_item_assign])

            # edge updates... strictly speaking, this doesn't matter for recommendations
            if ppm.simple_graph:
                ppm.sess.run(ppm.sg_edge_param_update, feed_dict={ppm.edge_idx: ppm.edge_idx_d})
            else:
                ppm.sess.run(ppm.lphi_update, feed_dict={ppm.edge_idx: ppm.edge_idx_d})
            ppm.sess.run(ppm.deg_update, feed_dict={ppm.edge_vals: ppm.edge_vals_d, ppm.edge_idx: ppm.edge_idx_d})

        return ppm


    def _logging(self, itr):
        print("----------------------------------------------------------------")
        print("ITERATION #{}".format(itr))
        print("mean community edge weights:{}").format(
            self.sess.run(self.edge_mean_summary, feed_dict={self.edge_vals: self.edge_vals_d, self.edge_idx: self.edge_idx_d}))
        print("----------------------------------------------------------------")
        print("P(inclusion | included): {}").format(
            np.mean(self.sess.run(self.predict_edges, feed_dict={self.pred_edges_ph: self.included_sample})))
        print("----------------------------------------------------------------")
        print("P(inclusion | random pair): {}").format(
            np.mean(self.sess.run(self.predict_edges, feed_dict={self.pred_edges_ph: self.pair_sample})))
        print("----------------------------------------------------------------")

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.sess.close()
Пример #8
0
    def _initialize_parameters(self, hparams, ppm):

        K = np.float32(self.K)

        su, tu, a, b, self.size_u = (hparams['su'], hparams['tu'], hparams['a'], hparams['b'], hparams['size_u'])
        si, ti, c, d, self.size_i = (hparams['si'], hparams['ti'], hparams['c'], hparams['d'], hparams['size_i'])

        with tf.name_scope("hparams"), tf.device(self.device):
            ## Hyperparameters
            self.lsu = tf.Variable(softplus_inverse(-hparams['su'] + 1.), dtype=tf.float32, name="lsu")
            self.su = -tf.nn.softplus(self.lsu) + 1.

            self.tu = tf.Variable(hparams['tu'], dtype=tf.float32, name="tu")

            self.a = tf.Variable(hparams['a'], dtype=tf.float32, name="a")
            self.b = tf.Variable(hparams['b'], dtype=tf.float32, name="b")

            self.lsi = tf.Variable(softplus_inverse(-hparams['si'] + 1.), dtype=tf.float32, name="lsi")
            self.si = -tf.nn.softplus(self.lsi) + 1.

            self.ti = tf.Variable(hparams['ti'], dtype=tf.float32, name="ti")

            self.c = tf.Variable(hparams['c'], dtype=tf.float32, name="c")
            self.d = tf.Variable(hparams['d'], dtype=tf.float32, name="d")

        e = np.sum(self.edge_vals_d, dtype=np.float32)

        # initial values for total user and total item masses of type K
        # set st \sum_k tim_k * tum_k = e (which is in fact a bit higher than it oughta be)
        # and using item_mass / user_mass ~ item_size / user_size (which is only kind of true)
        tum_init = np.sqrt(self.size_u / self.size_i * e / K)
        tim_init = np.sqrt(self.size_i / self.size_u * e / K)

        with tf.name_scope("user_params"), tf.device(self.device):
            # shape params are read off immediately from update equations
            # rate params set to be consistent w \gam_i ~ 1, \sum_j beta_jk beta_k ~ \sqrt(e/k) (which is self consistent)
            if ppm :
                # If creating the principled predictive (ppm), don't have the user_degree. Just create some random initialization for now, we'll update it with a default value
                self.gam_shp = tf.Variable(tf.random_gamma([self.U, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="gam_rte") 
                self.gam_rte = tf.Variable(tf.random_gamma([self.U, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="gam_rte") 
                self.theta_shp = tf.Variable(tf.random_gamma([self.U, self.K], 10., 10., seed=self.seed), name="theta_shp")
                self.theta_rte =tf.Variable(tf.random_gamma([self.U, self.K], 5., 5., seed=self.seed), name="theta_rte") 
                self.g = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="g") 
            else:
                user_degs = np.expand_dims(self.user_degree, axis=1)
                self.gam_shp = tf.Variable((user_degs - su), name="gam_shp")  # s^U
                self.gam_rte = tf.Variable(np.sqrt(e) * (0.9 + 0.1*tf.random_gamma([self.U, 1], 5., 5., seed=self.seed)), dtype=tf.float32, name="gam_rte")  # r^U
                init_gam_mean = self.gam_shp.initial_value / self.gam_rte.initial_value
                self.theta_shp = tf.Variable((a + user_degs/K) * tf.random_gamma([self.U, self.K], 10., 10., seed=self.seed), name="theta_shp")  # kap^U
                self.theta_rte = tf.Variable((b + init_gam_mean * tim_init)*(0.9 + 0.1*tf.random_gamma([self.U, self.K], 5., 5., seed=self.seed)), name="theta_rte")  # lam^U
                self.g = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="g")  # g


        with tf.name_scope("item_params"), tf.device(self.device):
            ## Items
            if ppm:
                self.omega_shp = tf.Variable(tf.random_gamma([self.I, 1], 5., 5., seed=self.seed), name="omega_shp")  # s^I
                self.omega_rte = tf.Variable(tf.random_gamma([self.I, 1], 5., 5., seed=self.seed), dtype=tf.float32, name="omega_rte")  # r^I
                self.beta_shp = tf.Variable(tf.random_gamma([self.I, self.K], 10., 10., seed=self.seed), name="beta_shp")  # kap^I
                self.beta_rte = tf.Variable(tf.random_gamma([self.I, self.K], 5., 5., seed=self.seed), name="beta_rte")  # lam^I
                self.w = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="w")  # w
            else:
                item_degs = np.expand_dims(self.item_degree, axis=1)
                self.omega_shp = tf.Variable((item_degs - si), name="omega_shp")  # s^I
                self.omega_rte = tf.Variable(np.sqrt(e) * (0.9 + 0.1*tf.random_gamma([self.I, 1], 5., 5., seed=self.seed)), dtype=tf.float32, name="omega_rte")  # r^I
                init_omega_mean = self.omega_shp.initial_value / self.omega_rte.initial_value
                self.beta_shp = tf.Variable((c + item_degs/K) * tf.random_gamma([self.I, self.K], 10., 10., seed=self.seed), name="beta_shp")  # kap^I
                self.beta_rte = tf.Variable((d + init_omega_mean*tum_init) * (0.9 + 0.1*tf.random_gamma([self.I, self.K], 5., 5., seed=self.seed)), name="beta_rte")  # lam^I
                self.w = tf.Variable(tf.random_gamma([self.K, 1], 0.001, 1, seed=self.seed) + TINY, name="w")  # w

        with tf.device('/cpu:0'):
            with tf.variable_scope("edge_params", reuse=None):
                ## Edges
                if self.simple_graph:
                    # set init value so there's approximately 1 expected edge between each pair... WARNING: this may be profoundly stupid
                    self.sg_edge_param = tf.get_variable(name="sg_edge_param", shape=[self.occupied_pairs, self.K], dtype=tf.float32,
                                    initializer=tf.random_normal_initializer(mean=-np.log(K), stddev=1. / K, seed=self.seed),
                                    partitioner=tf.fixed_size_partitioner(self.edge_param_splits, 0))
                else:
                    self.lphi = tf.get_variable(name="lphi", shape=[self.occupied_pairs, self.K], dtype=tf.float32,
                                    initializer=tf.random_normal_initializer(mean=0, stddev=1. / K, seed=self.seed),
                                    partitioner=tf.fixed_size_partitioner(self.edge_param_splits, 0))

        with tf.name_scope("variational_post"), tf.device(self.device):

            # Variational posterior distributions
            self.q_gam = Gamma(concentration=self.gam_shp, rate=self.gam_rte, name="q_gam")
            self.q_theta = Gamma(concentration=self.theta_shp, rate=self.theta_rte, name="q_theta")
            self.q_g = PointMass(self.g, name="q_g")

            self.q_omega = Gamma(concentration=self.omega_shp, rate=self.omega_rte, name="q_omega")
            self.q_beta = Gamma(concentration=self.beta_shp, rate=self.beta_rte, name="q_beta")
            self.q_w = PointMass(self.w, name="q_w")

            if self.simple_graph:
                self.q_e_aux_vals = tPoissonMulti(log_lams=self.sg_edge_param, name="q_e_aux_vals") # q_edges_aux_flat
            else:
                self.q_e_aux_vals = Multinomial(total_count=self.edge_vals, logits=self.lphi, name="q_e_aux_vals") # q_edges_aux_flat
                self.q_e_aux_vals_mean = self.q_e_aux_vals.mean()

        with tf.name_scope("degree_vars"):
            # create some structures to make it easy to work with the expected value (wrt q) of the edges

            # qm_du[u,k] is the expected weighted degree of user u counting only edges of type k
            # qm_du[u,k] = E_q[e^k_i.] in the language of the paper
            # initialized arbitrarily, will override at end of init to set to
            # we use a tf.Variable here to cache the q_e_aux_vals.mean() value
            self.qm_du = tf.Variable(tf.ones([self.U, self.K], dtype=tf.float32), name="qm_du")
            self.qm_di = tf.Variable(tf.ones([self.I, self.K], dtype=tf.float32), name="qm_di")

        # Total Item Mass:
        self.i_tot_mass_m = self.q_w.mean() + tf.matmul(self.q_beta.mean(), self.q_omega.mean(), transpose_a=True)
        # Total User Mass:
        self.u_tot_mass_m = self.q_g.mean() + tf.matmul(self.q_theta.mean(), self.q_gam.mean(), transpose_a=True)