Exemplo n.º 1
0
    def train_one_epoch(self, step_size, n_list_set):
        batch_nodes_index = np.random.choice(self.n, size=self.mini_batch_nodes, replace=False)
        temp_phi = self.update_phi(batch_nodes_index, step_size, n_list_set)

        self.phi[batch_nodes_index] = temp_phi
        self.pi, self.phi_constant = reparameterized_to_pi(self.phi, self.n)

        temp_theta = self.update_theta(step_size)
        self.beta, self.theta_constant = reparameterized_to_beta(temp_theta)
        self.theta = temp_theta

        return
Exemplo n.º 2
0
    def __init__(self,
                 flags,
                 n,
                 k,
                 edges,
                 nonedges,
                 beta_prior,
                 membership_prior,
                 theta_constant,
                 phi_constant,
                 true_labels,
                 better_initialization_flag,
                 step_size_scalar,
                 node_neighbors_dict,
                 val_set_index,
                 mu=1,
                 max_iter=10000):
        """ follows the notations in the original paper
        :param flags: hyper-parameters for GCN and MMSBM
        :param n: node number
        :param k: class number
        :param edges: edge indices
        :param nonedges: non-edge indices

        :param beta_prior: prior for the community strength
        :param membership_prior: prior for the membership
        :param theta_constant: re-parameterization constant for community strength beta
        :param phi_constant: re-parameterization constant for membership
        :param true_labels: ground truth labels
        :param better_initialization_flag: a flag indicate if we train the MMSBM from scratch or we use the better initialization output from gcn
        :param step_size_scalar: step size for the MMSBM
        :param node_neighbors_dict: a dict for query the neighborhood node indices
        :param val_set_index: indices for the validation set
        """
        self.gamma_scale = flags.gamma_scale
        self.better_initialization_flag = better_initialization_flag
        self.step_size_scalar = step_size_scalar
        self.flags = flags
        self.n = n  # number of nodes
        self.k = k
        self.val_set_index = val_set_index

        self.alpha = 1.0 / k
        self.mu = mu
        self.tao = 1024
        self.n_list_set = np.array([i for i in range(self.n)])

        self.max_iter = max_iter
        self.mini_batch_nodes = flags.batch_size
        self.true_labels = true_labels

        self.sample_n = 20  # sample size for update each local parameters
        self.T = 1  # sample number of pi and beta for each edge during the evaluation process
        self.test_edges_n = 500  # test set edges for the perplexity test
        self.delta = flags.delta
        self.node_neighbors_dict = node_neighbors_dict
        self.avg_predict_label = 0

        # variable initialization (random initialization)
        if not self.better_initialization_flag:
            self.phi = np.random.gamma(self.alpha, 1, size=(self.n, self.k))
            self.theta = np.random.gamma(self.mu, 1, size=(self.k, 2))

            self.beta, self.theta_constant = reparameterized_to_beta(
                self.theta)
            self.pi, self.phi_constant = reparameterized_to_pi(
                self.phi, self.n)
            self.initial_prediction_labels = self.pi.argmax(axis=1)

        else:
            self.theta_constant = theta_constant
            self.phi_constant = phi_constant

            self.beta = beta_prior
            self.pi = membership_prior
            self.initial_prediction_labels = membership_prior.argmax(axis=1)
            self.theta, self.phi = initialize_theta_phi_with_better_initialization(
                self.beta, self.pi, self.theta_constant, self.phi_constant, k)
        self.MCMC_MMSBM_prediction_labels = self.initial_prediction_labels
        self.B = np.ones((self.k, self.k)) * flags.delta

        # Info of the given topology, split into the edges and non-edges
        self.edges = edges
        self.nonedges = nonedges
        self.edges_n, self.nonedges_n, self.test_set, self.y_test_set = graph_preparation(
            self.edges, self.nonedges, test_edges_n=self.test_edges_n)

        self.sampled_non_edges_ratio = self.flags.sampled_non_edges_ratio
        self.sampled_non_edges_n = int(self.sampled_non_edges_ratio *
                                       self.nonedges_n)
        self.dir = 'figures/'