Exemplo n.º 1
0
    def general_model_body(self, r, f_param, vis_transformer_params, m_t, v_t, prev_ll, the_number_of_rounds_under_acc_diff, prev_delta,
                           stable_r, stable_f_param, stable_vis_transformer_params, stable_m_t, stable_v_t, stable_prev_ll,
                           stable_the_number_of_rounds_under_acc_diff, stable_prev_delta,
                           step, alpha, max_ll, extra_took_steps):

        # checking out the turn!
        new_r, new_f_param, new_vis_transformer_params, new_m_t, new_v_t, new_prev_ll, \
        new_the_number_of_rounds_under_acc_diff, new_prev_delta, \
        new_stable_r, new_stable_f_param, new_stable_vis_transformer_params, new_stable_m_t, new_stable_v_t, new_stable_prev_ll, \
        new_stable_the_number_of_rounds_under_acc_diff, new_stable_prev_delta, new_step, new_alpha, new_max_ll, new_extra_took_steps = \
            tf.cond(
                tf.equal(step % 100, tf.constant(0, dtype=tf.int32)),
                lambda: self.recheck_model_body(r, f_param, vis_transformer_params, m_t, v_t, prev_ll, the_number_of_rounds_under_acc_diff, prev_delta,
                                        stable_r, stable_f_param, stable_vis_transformer_params, stable_m_t, stable_v_t, stable_prev_ll,
                                        stable_the_number_of_rounds_under_acc_diff, stable_prev_delta, step, alpha, max_ll, extra_took_steps),
                lambda: self.normal_loop_body(r, f_param, vis_transformer_params, m_t, v_t, prev_ll, the_number_of_rounds_under_acc_diff, prev_delta,
                                      stable_r, stable_f_param, stable_vis_transformer_params, stable_m_t, stable_v_t, stable_prev_ll,
                                      stable_the_number_of_rounds_under_acc_diff, stable_prev_delta, step, alpha, max_ll, extra_took_steps)
            )

        # checking nan s!
        new_r = check_nan(new_r, 'r')
        new_f_param = check_nan(new_f_param, 'trans_f_params')
        new_vis_transformer_params = check_nan(new_vis_transformer_params, 'trans vis params')
        new_m_t = check_nan(new_m_t, 'm')
        new_v_t = check_nan(new_v_t, 'v')

        new_r.set_shape(r.get_shape())
        new_f_param.set_shape(f_param.get_shape())
        new_vis_transformer_params.set_shape(vis_transformer_params.get_shape())
        new_m_t.set_shape(m_t.get_shape())
        new_v_t.set_shape(v_t.get_shape())
        new_prev_ll.set_shape(prev_ll.get_shape())
        new_the_number_of_rounds_under_acc_diff.set_shape(the_number_of_rounds_under_acc_diff.get_shape())
        new_prev_delta.set_shape(prev_delta.get_shape())
        new_stable_r.set_shape(stable_r.get_shape())
        new_stable_f_param.set_shape(stable_f_param.get_shape())
        new_stable_vis_transformer_params.set_shape(stable_vis_transformer_params.get_shape())
        new_stable_m_t.set_shape(stable_m_t.get_shape())
        new_stable_v_t.set_shape(stable_v_t.get_shape())
        new_stable_prev_ll.set_shape(stable_prev_ll.get_shape())
        new_stable_the_number_of_rounds_under_acc_diff.set_shape(stable_the_number_of_rounds_under_acc_diff.get_shape())
        new_stable_prev_delta.set_shape(stable_prev_delta.get_shape())
        new_step.set_shape(step.get_shape())
        new_alpha.set_shape(alpha.get_shape())
        new_max_ll.set_shape(max_ll.get_shape())
        new_extra_took_steps.set_shape(extra_took_steps.get_shape())

        return new_r, new_f_param, new_vis_transformer_params, new_m_t, new_v_t, new_prev_ll, \
            new_the_number_of_rounds_under_acc_diff, new_prev_delta, \
            new_stable_r, new_stable_f_param, new_stable_vis_transformer_params, new_stable_m_t, new_stable_v_t, new_stable_prev_ll, \
            new_stable_the_number_of_rounds_under_acc_diff, new_stable_prev_delta, new_step, new_alpha, new_max_ll, new_extra_took_steps
Exemplo n.º 2
0
    def normal_loop_body(self, r, f_params, vis_transformer_params, m_t, v_t,
                         prev_ll, the_number_of_rounds_under_acc_diff,
                         prev_delta, stable_r, stable_f_params,
                         stable_vis_transformer_params, stable_m_t, stable_v_t,
                         stable_prev_ll,
                         stable_the_number_of_rounds_under_acc_diff,
                         stable_prev_delta, step, alpha, max_ll,
                         extra_took_steps):

        # sub-sampling for mini batch
        nzero_mb = tf.cond(
            tf.equal(self.non_zero_batch_size, self.non_zero_samples_num),
            lambda: self.insig_interactions, lambda: tf.gather(
                self.insig_interactions,
                tf.random_uniform([self.non_zero_batch_size],
                                  maxval=self.non_zero_samples_num,
                                  dtype=tf.int32)))

        # calculating derivations of nonzero batch
        si = tf.gather(self.vis, tf.squeeze(nzero_mb[:, 0]))
        sj = tf.gather(self.vis, tf.squeeze(nzero_mb[:, 1]))

        xij = tf.cast(nzero_mb[:, 2], tf.float64)

        d_ij = tf.cast(tf.abs(nzero_mb[:, 0] - nzero_mb[:, 1]), tf.float64)
        ln_dij = tf.log(d_ij)

        # %%
        bv = vis_transformer_params[1]
        spower = vis_transformer_params[0]

        tsi = tf.pow(si, spower)
        tsj = tf.pow(sj, spower)

        tvi, tsi_share, bv_i_share = tf_soft_max(tsi, bv, True)
        tvj, tsj_share, bv_j_share = tf_soft_max(tsj, bv, True)

        f_var_ij = tf_cis_var_func(f_params, d_ij)
        f_free = tf.exp(f_params[4])

        f_d_ij, var_f_share, free_f_share = tf_soft_max(f_var_ij, f_free, True)
        mu_ij = tvi * tvj * f_d_ij

        common_der = (xij - mu_ij) / (r + mu_ij)

        r_der = r * (
            -2.0 * self.init_regularization_factor * r +
            (tf.log(r) - tf.digamma(r - 1)) + tf.reduce_mean(
                tf.digamma(xij + r - 1) - common_der - tf.log(r + mu_ij)))

        # %%
        vis_transformer_params_der = tf.stack([
            2.0 * tf.sqrt(spower) * r * tf_weighted_average(
                common_der * ((tsi_share * tsi * tf.log(si)) +
                              (tsj_share * tsj * tf.log(sj)) /
                              (tsi_share + tsj_share)), tsi_share + tsj_share),
            2.0 * tf.sqrt(bv) * r *
            tf_weighted_average(common_der, bv_i_share + bv_j_share)
        ])

        f_var_common_der = common_der * f_var_ij / f_d_ij

        f_var_common_der = check_nan(f_var_common_der, 'f_var_common_der')
        ln_dij = check_nan(ln_dij, 'ln_dij')
        var_f_share = check_nan(var_f_share, 'var_f_share')
        var_f_share = check_inf(var_f_share, 'var_f_share')
        f_params = check_nan(f_params, 'f_params')

        f_drev = tf.stack([
            f_params[0] * r * tf_weighted_average(
                f_var_common_der * tf.pow(ln_dij, 3), var_f_share),
            r * tf_weighted_average(f_var_common_der * tf.pow(ln_dij, 2),
                                    var_f_share),
            r * tf_weighted_average(f_var_common_der * ln_dij, var_f_share),
            r * tf_weighted_average(f_var_common_der, var_f_share)
        ])

        f_drev = check_nan(f_drev, 'f_drev')

        free_f_drev = r * tf_weighted_average(
            (common_der / f_d_ij), free_f_share)

        # %%
        non_zero_g_t = tf.concat([
            tf.expand_dims(r_der, 0), f_drev,
            tf.expand_dims(free_f_drev, 0), vis_transformer_params_der
        ],
                                 axis=0)

        # ****************************************
        g_t = non_zero_g_t

        g_t = check_nan(g_t, 'g_t')

        # updating values:
        b1 = tf.constant(self.beta1, dtype=tf.float64)
        b2 = tf.constant(self.beta2, dtype=tf.float64)

        r_t = tf.cast(step + 1, tf.float64)
        r_m = b1 * m_t + (1 - b1) * g_t
        r_v = b2 * v_t + (1 - b2) * tf.pow(g_t, 2)

        a_t = alpha * (tf.sqrt(1 - tf.pow(b2, r_t))) / (1 - tf.pow(b1, r_t))
        delta_vals = a_t * r_m / (tf.sqrt(r_v) + self.eps)

        delta_vals = check_nan(delta_vals, 'delta_vals')

        r_p = tf.log(r) + delta_vals[0]
        new_r = tf.exp(r_p)

        a3_p = tf.log(-1.0 * f_params[0]) + delta_vals[1]
        new_f_params = tf.stack([
            -1.0 * tf.exp(a3_p), f_params[1] + delta_vals[2],
            f_params[2] + delta_vals[3], f_params[3] + delta_vals[4],
            f_params[4] + delta_vals[5]
        ])

        # %%
        new_vis_transformer_params = tf.pow(
            tf.sqrt(vis_transformer_params) +
            delta_vals[6:6 + self.n_vis_t_params], 2)

        new_the_number_of_rounds_under_acc_diff = tf.cond(
            tf.less(tf.reduce_max(tf.abs(delta_vals)), self.acc_diff_limit),
            lambda: the_number_of_rounds_under_acc_diff + 1,
            lambda: tf.constant(0))

        # checking flips
        new_alpha = tf.cond(
            tf.less(tf.reduce_max(delta_vals * prev_delta),
                    tf.constant(0.0, dtype=tf.float64)), lambda: 0.1 * alpha,
            lambda: alpha)

        new_step = step + tf.constant(1, dtype=tf.int32)

        return new_r, new_f_params, new_vis_transformer_params, r_m, r_v, prev_ll, \
            new_the_number_of_rounds_under_acc_diff, delta_vals, \
            stable_r, stable_f_params, stable_vis_transformer_params, stable_m_t, stable_v_t, stable_prev_ll,\
            stable_the_number_of_rounds_under_acc_diff, stable_prev_delta, new_step, new_alpha, max_ll, extra_took_steps
    def normal_loop_body(self, r, dist_params, b1_v_params, b2_v_params, m_t,
                         v_t, prev_ll, the_number_of_rounds_under_acc_diff,
                         prev_delta, stable_r, stable_dist_params,
                         stable_b1_v_params, stable_b2_v_params, stable_m_t,
                         stable_v_t, stable_prev_ll,
                         stable_the_number_of_rounds_under_acc_diff,
                         stable_prev_delta, step, alpha, max_ll,
                         extra_took_steps):

        # sub-sampling for mini batch
        nzero_mb = tf.cond(
            tf.equal(self.non_zero_batch_size, self.non_zero_samples_num),
            lambda: self.training_ints, lambda: tf.gather(
                self.training_ints,
                tf.random_uniform([self.non_zero_batch_size],
                                  maxval=self.non_zero_samples_num,
                                  dtype=tf.int32)))

        # abbreviate: d_Y_d_X = dY/dX, l = LogL, b = base, v = var as softmax(v, b), sh as share

        # calculating derivations of nonzero batch
        si = tf.gather(self.vis, tf.squeeze(nzero_mb[:, 0]))
        sj = tf.gather(self.vis, tf.squeeze(nzero_mb[:, 1]))

        xij = tf.cast(nzero_mb[:, 2], tf.float64)

        d_ij = tf.cast(tf.abs(nzero_mb[:, 0] - nzero_mb[:, 1]), tf.float64)
        ln_dij = tf.log(d_ij)

        # %%
        tsi = tf.pow(si, b1_v_params[0])
        tsj = tf.pow(sj, b2_v_params[0])

        tvi, tsi_share, bv_i_share = tf_soft_max(tsi, b1_v_params[1], True)
        tvj, tsj_share, bv_j_share = tf_soft_max(tsj, b2_v_params[1], True)

        f_var_ij = tf_cis_var_func(dist_params, d_ij)
        f_free = tf.exp(dist_params[4])

        f_d_ij, var_f_share, free_f_share = tf_soft_max(f_var_ij, f_free, True)
        mu_ij = tvi * tvj * f_d_ij

        # MAIN PARTS DERIVATION

        common_der = (xij - mu_ij) / (r + mu_ij)

        d_r = r * (
            -2.0 * self.init_regularization_factor * r +
            (tf.log(r) - tf.digamma(r - 1)) + tf.reduce_mean(
                tf.digamma(xij + r - 1) - common_der - tf.log(r + mu_ij)))

        # MU PARTS

        # VIS PARAMS

        # %%
        #tf.maximum(w_f_sum,

        if self.equal_v_params:
            b1_v_params_der = tf.stack([
                2.0 * tf.sqrt(b1_v_params[0]) * r * tf_weighted_average(
                    common_der * ((tsi_share * tsi * tf.log(si)) +
                                  (tsj_share * tsj * tf.log(sj)) / tf.maximum(
                                      (tsi_share + tsj_share),
                                      tf.constant(1.0, dtype=tf.float64))),
                    tsi_share + tsj_share), 2.0 * tf.sqrt(b1_v_params[1]) * r *
                tf_weighted_average(common_der, bv_i_share + bv_j_share)
            ])
            b2_v_params_der = b1_v_params_der
        else:
            b1_v_params_der = tf.stack([
                2.0 * tf.sqrt(b1_v_params[0]) * r *
                tf_weighted_average(common_der *
                                    (tsi * tf.log(si)), tsi_share),
                2.0 * tf.sqrt(b1_v_params[1]) * r *
                tf_weighted_average(common_der, bv_i_share)
            ])
            b2_v_params_der = tf.stack([
                2.0 * tf.sqrt(b2_v_params[0]) * r *
                tf_weighted_average(common_der *
                                    (tsj * tf.log(sj)), tsj_share),
                2.0 * tf.sqrt(b2_v_params[1]) * r *
                tf_weighted_average(common_der, bv_j_share)
            ])

        #b1_v_params_der = tf.Print(b1_v_params_der, [b1_v_params_der, d_mu_ij_mult_v_mu_ij, sh_v_mu_ij, (sh_b_vi + sh_b_vj)], '>>>>', summarize=30)

        # FUNCTION PARAMS

        f_var_common_der = common_der * f_var_ij / f_d_ij

        # checking nans
        f_var_common_der = check_nan(f_var_common_der, 'f_var_common_der')
        ln_dij = check_nan(ln_dij, 'ln_dij')
        var_f_share = check_nan(var_f_share, 'var_f_share')
        var_f_share = check_inf(var_f_share, 'var_f_share')
        f_params = check_nan(dist_params, 'dist_params')

        f_drev = tf.stack([
            f_params[0] * r * tf_weighted_average(
                f_var_common_der * tf.pow(ln_dij, 3), var_f_share),
            r * tf_weighted_average(f_var_common_der * tf.pow(ln_dij, 2),
                                    var_f_share),
            r * tf_weighted_average(f_var_common_der * ln_dij, var_f_share),
            r * tf_weighted_average(f_var_common_der, var_f_share)
        ])

        f_drev = check_nan(f_drev, 'f_drev')

        free_f_drev = r * tf_weighted_average(
            (common_der / f_d_ij), free_f_share)

        # %%
        non_zero_g_t = tf.concat([
            tf.expand_dims(d_r, 0), f_drev,
            tf.expand_dims(free_f_drev, 0), b1_v_params_der, b2_v_params_der
        ],
                                 axis=0)

        # ****************************************
        g_t = non_zero_g_t

        g_t = check_nan(g_t, 'g_t')

        # updating values:
        b1 = tf.constant(self.beta1, dtype=tf.float64)
        b2 = tf.constant(self.beta2, dtype=tf.float64)

        r_t = tf.cast(step + 1, tf.float64)
        r_m = b1 * m_t + (1 - b1) * g_t
        r_v = b2 * v_t + (1 - b2) * tf.pow(g_t, 2)

        a_t = alpha * (tf.sqrt(1 - tf.pow(b2, r_t))) / (1 - tf.pow(b1, r_t))
        delta_vals = a_t * r_m / (tf.sqrt(r_v) + self.eps)

        delta_vals = check_nan(delta_vals, 'delta_vals')

        r_p = tf.log(r) + delta_vals[0]
        new_r = tf.exp(r_p)

        a3_p = tf.log(-1.0 * dist_params[0]) + delta_vals[1]
        new_dist_params = tf.stack([
            -1.0 * tf.exp(a3_p), dist_params[1] + delta_vals[2],
            dist_params[2] + delta_vals[3], dist_params[3] + delta_vals[4],
            dist_params[4] + delta_vals[5]
        ])

        # %%
        new_b1_v_params = tf.pow(tf.sqrt(b1_v_params) + delta_vals[6:6 + 2], 2)
        new_b2_v_params = tf.pow(tf.sqrt(b2_v_params) + delta_vals[8:8 + 2], 2)

        new_the_number_of_rounds_under_acc_diff = tf.cond(
            tf.less(tf.reduce_max(tf.abs(delta_vals)), self.acc_diff_limit),
            lambda: the_number_of_rounds_under_acc_diff + 1,
            lambda: tf.constant(0))

        # checking flips
        new_alpha = tf.cond(
            tf.less(tf.reduce_max(delta_vals * prev_delta),
                    tf.constant(0.0, dtype=tf.float64)), lambda: 0.1 * alpha,
            lambda: alpha)

        new_step = step + tf.constant(1, dtype=tf.int32)

        return new_r, new_dist_params, new_b1_v_params, new_b2_v_params, r_m, r_v, prev_ll, \
               new_the_number_of_rounds_under_acc_diff, delta_vals, \
               stable_r, stable_dist_params, stable_b1_v_params, stable_b2_v_params, \
               stable_m_t, stable_v_t, stable_prev_ll, stable_the_number_of_rounds_under_acc_diff, \
               stable_prev_delta, new_step, new_alpha, max_ll, extra_took_steps
Exemplo n.º 4
0
    def normal_loop_body(self, r, f_param, vis_transformer_params, m_t, v_t, prev_ll, the_number_of_rounds_under_acc_diff, prev_delta,
                         stable_r, stable_f_param, stable_vis_transformer_params, stable_m_t, stable_v_t, stable_prev_ll,
                         stable_the_number_of_rounds_under_acc_diff, stable_prev_delta, step, alpha, max_ll, extra_took_steps):

        # sub-sampling for mini batch
        nzero_mb = tf.cond(tf.equal(self.non_zero_batch_size, self.non_zero_samples_num),
                           lambda: self.insig_interactions,
                           lambda: tf.gather(self.insig_interactions,
                                             tf.random_uniform([self.non_zero_batch_size], maxval=self.non_zero_samples_num, dtype=tf.int32)))

        # calculating derivations of nonzero batch
        si = tf.gather(self.vis, tf.squeeze(nzero_mb[:, 0]))
        sj = tf.gather(self.vis, tf.squeeze(nzero_mb[:, 1]))

        # %%
        bv = vis_transformer_params[1]
        spower = vis_transformer_params[0]

        tsi = tf.pow(si, spower)
        tsj = tf.pow(sj, spower)

        tvi, tsi_share, bv_i_share = tf_soft_max(tsi, bv, True)
        tvj, tsj_share, bv_j_share = tf_soft_max(tsj, bv, True)

        xij = tf.cast(nzero_mb[:, 2], tf.float64)
        mu_ij = tvi * tvj * tf.exp(f_param)

        mu_ij = check_nan(mu_ij, 'mu ij')
        xij = check_nan(xij, 'x ij')

        common_der = (xij - mu_ij) / (r + mu_ij)

        r_der = r * (-2.0 * self.init_regularization_factor * r + (tf.log(r) - tf.digamma(r - 1)) +
                     tf.reduce_mean(tf.digamma(xij + r - 1) - common_der - tf.log(r + mu_ij)))

        # %%
        vis_transformer_params_der = tf.stack([
            2.0 * tf.sqrt(spower) * r * tf_weighted_average(
                common_der * ((tsi_share * tsi * tf.log(si)) + (tsj_share * tsj * tf.log(sj)) / (tsi_share + tsj_share)), tsi_share + tsj_share
            ),
            2.0 * tf.sqrt(bv) * r * tf_weighted_average(
                common_der, bv_i_share + bv_j_share
            )
        ])

        common_der = check_nan(common_der, 'common der')
        r = check_nan(r, 'r via update')

        # separating samples that have higher f_func (close distance) than free f (far distance)
        free_f_drev = r * tf.reduce_mean(common_der)

        # %%
        non_zero_g_t = tf.stack([r_der, free_f_drev, vis_transformer_params_der[0], vis_transformer_params_der[1]])

        # ****************************************
        g_t = non_zero_g_t

        # updating values:
        b1 = tf.constant(self.beta1, dtype=tf.float64)
        b2 = tf.constant(self.beta2, dtype=tf.float64)

        r_t = tf.cast(step + 1, tf.float64)
        r_m = b1 * m_t + (1 - b1) * g_t
        r_v = b2 * v_t + (1 - b2) * tf.pow(g_t, 2)

        a_t = alpha * (tf.sqrt(1 - tf.pow(b2, r_t))) / (1 - tf.pow(b1, r_t))
        delta_vals = a_t * r_m / (tf.sqrt(r_v) + self.eps)

        r_p = tf.log(r) + delta_vals[0]
        new_r = tf.exp(r_p)
        new_f_param = tf.exp(tf.log(f_param) + delta_vals[1])
        new_vis_transformer_params = tf.pow(tf.sqrt(vis_transformer_params) + delta_vals[2:2 + self.n_vis_t_params], 2)

        new_the_number_of_rounds_under_acc_diff = tf.cond(
            tf.less(tf.reduce_max(tf.abs(delta_vals)), self.acc_diff_limit),
            lambda: the_number_of_rounds_under_acc_diff + 1,
            lambda: tf.constant(0))

        # checking flips
        new_alpha = tf.cond(
            tf.less(tf.reduce_max(delta_vals * prev_delta), tf.constant(0.0, dtype=tf.float64)),
            lambda: 0.1 * alpha,
            lambda: alpha
        )

        new_step = step + 1

        # reshaping
        new_r.set_shape(r.get_shape())
        new_f_param.set_shape(f_param.get_shape())
        new_vis_transformer_params.set_shape(vis_transformer_params.get_shape())
        r_m.set_shape(m_t.get_shape())
        r_v.set_shape(v_t.get_shape())
        new_step.set_shape(step.get_shape())
        new_the_number_of_rounds_under_acc_diff.set_shape(the_number_of_rounds_under_acc_diff.get_shape())
        delta_vals.set_shape(prev_delta.get_shape())

        return new_r, new_f_param, new_vis_transformer_params, r_m, r_v, prev_ll, new_the_number_of_rounds_under_acc_diff, delta_vals,\
               stable_r, stable_f_param, stable_vis_transformer_params, stable_m_t, stable_v_t, stable_prev_ll,\
               stable_the_number_of_rounds_under_acc_diff, stable_prev_delta, new_step, new_alpha, max_ll, extra_took_steps