예제 #1
0
 def _spacey_metagraph_only_random_walk(
         self, root_node):  # metapath, multi-metapath, metagraph
     root_type = self._net.get_node_type(root_node)
     if root_type not in self._metagraph:
         # root_type = random.choice(list(self._metagraph.nodes))
         # root_node = random.choice(self._nodes_type_dict[root_type][0])
         return []
     if self._history_position == "local":
         history = np.ones([self.node_types_size], dtype=np.float64)
     elif self._history_position == "global":
         history = self._history
     # current node
     cur_node = root_node
     cur_type = root_type
     path = [str(cur_node)]
     for __ in range(self._walk_length):
         # logger.info("history={}".format(history))
         # choose next type
         if random.random() < self._walk_restart:
             cur_node = root_node
             cur_type = root_type
         if random.random() < self._alpha:
             cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys()
             next_type_list = [
                 v for v in self._metagraph[cur_type]
                 if v in cur_node_adj_typelist
             ]
             if len(next_type_list) == 0:
                 # cur_type = root_type
                 # cur_node = root_node
                 break
             elif len(next_type_list) == 1:
                 cur_type = next_type_list[0]
                 cur_node = random.choice(
                     self._adj_lookupdict[cur_node][cur_type])
             else:
                 occupancy = history[next_type_list]
                 cur_type = utils.unigram_sample(population=next_type_list,
                                                 size=1,
                                                 weight=occupancy)[0]
                 cur_node = random.choice(
                     self._adj_lookupdict[cur_node][cur_type])
             history[cur_type] += 1
         else:
             cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys()
             next_type_list = [
                 v for v in self._metagraph[cur_type]
                 if v in cur_node_adj_typelist
             ]
             if len(next_type_list) == 0:
                 # cur_type = root_type
                 # cur_node = root_node
                 break
             else:
                 cur_type = random.choice(next_type_list)
                 cur_node = random.choice(
                     self._adj_lookupdict[cur_node][cur_type])
             history[cur_type] += 1
         path.append(str(cur_node))
     return path
예제 #2
0
    def _spacey_metaschema_only_random_walk(self, root_node, file_vector,
                                            file_dist, window_size):
        root_type = self._net.get_node_type(root_node)
        if self._history_position == "local":
            history = np.ones([self.node_types_size], dtype=np.float64)
        elif self._history_position == "global":
            history = self._history

        occur_count_vector = np.zeros(self.nodes_size, dtype=np.int32)
        occur_count_vector[root_node] = 1
        last_distribution = occur_count_vector / np.sum(occur_count_vector)
        fr_dist = open(file_dist, "w")

        cur_node = root_node
        cur_type = root_type
        for wl_cnt in range(1, self._walk_length + 1):
            next_type_list = list(self._adj_lookupdict[cur_node].keys())
            if random.random() < self._alpha:
                if len(next_type_list) == 0:
                    cur_type = root_type
                    cur_node = root_node
                elif len(next_type_list) == 1:
                    cur_type = next_type_list[0]
                    cur_node = random.choice(
                        self._adj_lookupdict[cur_node][cur_type])
                else:
                    occupancy = history[next_type_list]
                    cur_type = utils.unigram_sample(population=next_type_list,
                                                    size=1,
                                                    weight=occupancy)[0]
                    cur_node = random.choice(
                        self._adj_lookupdict[cur_node][cur_type])
                history[cur_type] += 1
            else:
                if len(next_type_list) == 0:
                    cur_type = root_type
                    cur_node = root_node
                else:
                    cur_type = random.choice(next_type_list)
                    cur_node = random.choice(
                        self._adj_lookupdict[cur_node][cur_type])
                history[cur_type] += 1
            occur_count_vector[cur_node] += 1
            if wl_cnt % window_size == 0:
                cur_distribution = occur_count_vector / np.sum(
                    occur_count_vector)
                mix_distribution = (cur_distribution + last_distribution) / 2
                JS_dist = (scipy.stats.entropy(
                    cur_distribution, mix_distribution) + scipy.stats.entropy(
                        last_distribution, mix_distribution)) / 2
                EUC_dist = np.sqrt(
                    np.sum(np.square(cur_distribution - last_distribution)))
                fr_dist.write("{} {}\n".format(JS_dist, EUC_dist))
                fr_dist.flush()
                last_distribution = cur_distribution
        fr_dist.close()
        np.savetxt(file_vector, occur_count_vector, fmt="%d")
        return
예제 #3
0
    def _spacey_metatree_only_random_walk(
            self, root_node, file_vector, file_dist,
            window_size):  # metapath, multi-metapath, metagraph
        root_type = self._net.get_node_type(root_node)
        if root_type not in self._metatree_type_id_dict:
            return []
        root_id = random.choice(self._metatree_type_id_dict[root_type])
        if self._history_position == "local":
            history = np.ones([len(self._metagraph.nodes())], dtype=np.float64)
        elif self._history_position == "global":
            history = self._history

        occur_count_vector = np.zeros(self.nodes_size, dtype=np.int32)
        occur_count_vector[root_node] = 1
        last_distribution = occur_count_vector / np.sum(occur_count_vector)
        fr_dist = open(file_dist, "w")

        # current node
        cur_node = root_node
        cur_type = root_type
        cur_id = root_id
        for wl_cnt in range(1, self._walk_length + 1):
            if random.random() < self._alpha:
                cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys()
                next_id_list = [
                    v for v in self._metagraph[cur_id] if
                    self._metagraph.nodes[v]["type"] in cur_node_adj_typelist
                ]
                if len(next_id_list) == 0:
                    cur_node = root_node
                    cur_type = root_type
                    cur_id = root_id
                elif len(next_id_list) == 1:
                    cur_id = next_id_list[0]
                    cur_type = self._metagraph.nodes[cur_id]["type"]
                    cur_node = random.choice(
                        self._adj_lookupdict[cur_node][cur_type])
                else:
                    occupancy = history[next_id_list]
                    cur_id = utils.unigram_sample(population=next_id_list,
                                                  size=1,
                                                  weight=occupancy)[0]
                    cur_type = self._metagraph.nodes[cur_id]["type"]
                    cur_node = random.choice(
                        self._adj_lookupdict[cur_node][cur_type])
                # logger.info('next: %d %d' % (cur_type, cur_id))
                history[cur_id] += 1
                # spacey out
                cur_id = utils.unigram_sample(
                    population=self._metatree_type_id_dict[cur_type],
                    size=1,
                    weight=history[self._metatree_type_id_dict[cur_type]])[0]
            else:
                cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys()
                next_id_list = [
                    v for v in self._metagraph[cur_id] if
                    self._metagraph.nodes[v]["type"] in cur_node_adj_typelist
                ]
                if len(next_id_list) == 0:
                    cur_node = root_node
                    cur_type = root_type
                    cur_id = root_id
                else:
                    cur_id = random.choice(next_id_list)
                    cur_type = self._metagraph.nodes[cur_id]["type"]
                    cur_node = random.choice(
                        self._adj_lookupdict[cur_node][cur_type])
                history[cur_id] += 1
            occur_count_vector[cur_node] += 1
            if wl_cnt % window_size == 0:
                cur_distribution = occur_count_vector / np.sum(
                    occur_count_vector)
                mix_distribution = (cur_distribution + last_distribution) / 2
                JS_dist = (scipy.stats.entropy(
                    cur_distribution, mix_distribution) + scipy.stats.entropy(
                        last_distribution, mix_distribution)) / 2
                EUC_dist = np.sqrt(
                    np.sum(np.square(cur_distribution - last_distribution)))
                fr_dist.write("{} {}\n".format(JS_dist, EUC_dist))
                fr_dist.flush()
                last_distribution = cur_distribution
        fr_dist.close()
        np.savetxt(file_vector, occur_count_vector, fmt="%d")
        return
예제 #4
0
 def _spacey_metatree_random_walk(
         self,
         root_node,
         walk_times=0):  # metapath, multi-metapath, metagraph
     root_type = self._net.get_node_type(root_node)
     if root_type not in self._metatree_type_id_dict:
         root_type = random.choice(list(self._metatree_type_id_dict.keys()))
         root_node = random.choice(self._nodes_type_dict[root_type][0])
     root_id = random.choice(self._metatree_type_id_dict[root_type])
     context_nodes_dict = {}  #
     if self._history_position == "local":
         history = np.ones([len(self._metagraph.nodes())], dtype=np.float64)
     elif self._history_position == "global":
         history = self._history
     for _ in range(walk_times):
         if self._history_position == "local_walktime":
             history = np.ones([len(self._metagraph.nodes())],
                               dtype=np.float64)
         # current node
         cur_node = root_node
         cur_type = root_type
         cur_id = root_id
         # logger.info('start: %d %d' % (cur_type, cur_id))
         for __ in range(self._walk_length):
             # choose next type
             if random.random() < self._walk_restart:
                 cur_node = root_node
                 cur_type = root_type
                 cur_id = root_id
             cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys()
             next_id_list = [
                 v for v in self._metagraph[cur_id] if
                 self._metagraph.nodes[v]["type"] in cur_node_adj_typelist
             ]
             if len(next_id_list) == 0:
                 cur_type = root_type
                 cur_node = root_node
                 cur_id = root_id
             elif len(next_id_list) == 1:
                 cur_id = next_id_list[0]
                 cur_type = self._metagraph.nodes[cur_id]["type"]
                 cur_node = random.choice(
                     self._adj_lookupdict[cur_node][cur_type])
                 history[cur_id] += 1
             else:
                 occupancy = history[next_id_list]
                 cur_id = utils.unigram_sample(population=next_id_list,
                                               size=1,
                                               weight=occupancy)[0]
                 cur_type = self._metagraph.nodes[cur_id]["type"]
                 cur_node = random.choice(
                     self._adj_lookupdict[cur_node][cur_type])
                 history[cur_id] += 1
             cur_id = utils.unigram_sample(
                 population=self._metatree_type_id_dict[cur_type],
                 size=1,
                 weight=history[self._metatree_type_id_dict[cur_type]])[0]
             if cur_type in context_nodes_dict:
                 context_nodes_dict[cur_type][0].append(
                     cur_node)  # context_list
                 context_nodes_dict[cur_type][1].add(cur_node)  # except_set
             else:
                 context_nodes_dict[cur_type] = [[cur_node],
                                                 {cur_node, root_node}]
     type_context_nodes_list = []
     type_neg_nodes_list = []
     type_mask_list = []
     for k in range(self.node_types_size):
         if k in context_nodes_dict:
             context_nodes = context_nodes_dict[k][0]
             except_set = context_nodes_dict[k][1]
             type_mask_list.append(1)
             type_context_nodes_list.append(context_nodes)
             type_neg_nodes_list.append(
                 utils.neg_sample(self._nodes_type_dict[k][0],
                                  except_set,
                                  num=self._neg_sampled,
                                  alias_table=self._nodes_type_dict[k][1]))
         else:
             type_mask_list.append(0)
             type_context_nodes_list.append([0])
             type_neg_nodes_list.append([0])
     return root_node, type_context_nodes_list, type_mask_list, type_neg_nodes_list
예제 #5
0
 def _spacey_metaschema_random_walk(self, root_node, walk_times=0):
     root_type = self._net.get_node_type(root_node)
     context_nodes_dict = {}  #
     if self._history_position == "local":
         history = np.ones([self.node_types_size], dtype=np.float64)
     elif self._history_position == "global":
         history = self._history
     for _ in range(walk_times):
         if self._history_position == "local_walktime":
             history = np.ones([self.node_types_size], dtype=np.float64)
         # current node
         cur_node = root_node
         cur_type = root_type
         for __ in range(self._walk_length):
             # choose next type
             if random.random() < self._walk_restart:
                 cur_node = root_node
                 cur_type = root_type
             next_type_list = list(self._adj_lookupdict[cur_node].keys())
             if len(next_type_list) == 0:
                 cur_type = root_type
                 cur_node = root_node
             elif len(next_type_list) == 1:
                 cur_type = next_type_list[0]
                 cur_node = random.choice(
                     self._adj_lookupdict[cur_node][cur_type])
                 history[cur_type] += 1
             else:
                 occupancy = history[next_type_list]
                 cur_type = utils.unigram_sample(population=next_type_list,
                                                 size=1,
                                                 weight=occupancy)[0]
                 cur_node = random.choice(
                     self._adj_lookupdict[cur_node][cur_type])
                 history[cur_type] += 1
             if cur_type in context_nodes_dict:
                 context_nodes_dict[cur_type][0].append(
                     cur_node)  # context_list
                 context_nodes_dict[cur_type][1].add(cur_node)  # except_set
             else:
                 context_nodes_dict[cur_type] = [[cur_node],
                                                 {cur_node, root_node}]
     type_context_nodes_list = []
     type_neg_nodes_list = []
     type_mask_list = []
     for k in range(self.node_types_size):
         if k in context_nodes_dict:
             context_nodes = context_nodes_dict[k][0]
             except_set = context_nodes_dict[k][1]
             type_mask_list.append(1)
             type_context_nodes_list.append(context_nodes)
             type_neg_nodes_list.append(
                 utils.neg_sample(self._nodes_type_dict[k][0],
                                  except_set,
                                  num=self._neg_sampled,
                                  alias_table=self._nodes_type_dict[k][1]))
         else:
             type_mask_list.append(0)
             type_context_nodes_list.append([0])
             type_neg_nodes_list.append([0])
     return root_node, type_context_nodes_list, type_mask_list, type_neg_nodes_list
예제 #6
0
 def _spacey_metatree_only_random_walk(
         self, root_node):  # metapath, multi-metapath, metagraph
     root_type = self._net.get_node_type(root_node)
     if root_type not in self._metatree_type_id_dict:
         return []
     root_id = random.choice(self._metatree_type_id_dict[root_type])
     if self._history_position == "local":
         history = np.ones([len(self._metagraph.nodes())], dtype=np.float64)
     elif self._history_position == "global":
         history = self._history
     # current node
     cur_node = root_node
     cur_type = root_type
     cur_id = root_id
     path = [str(cur_node)]
     for __ in range(self._walk_length):
         # logger.info("history={}".format(history))
         # choose next type
         if random.random() < self._walk_restart:
             cur_node = root_node
             cur_type = root_type
             cur_id = root_id
         if random.random() < self._alpha:
             cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys()
             next_id_list = [
                 v for v in self._metagraph[cur_id] if
                 self._metagraph.nodes[v]["type"] in cur_node_adj_typelist
             ]
             if len(next_id_list) == 0:
                 break
             elif len(next_id_list) == 1:
                 cur_id = next_id_list[0]
                 cur_type = self._metagraph.nodes[cur_id]["type"]
                 cur_node = random.choice(
                     self._adj_lookupdict[cur_node][cur_type])
             else:
                 occupancy = history[next_id_list]
                 cur_id = utils.unigram_sample(population=next_id_list,
                                               size=1,
                                               weight=occupancy)[0]
                 cur_type = self._metagraph.nodes[cur_id]["type"]
                 cur_node = random.choice(
                     self._adj_lookupdict[cur_node][cur_type])
             # logger.info('next: %d %d' % (cur_type, cur_id))
             history[cur_id] += 1
             # spacey out
             cur_id = utils.unigram_sample(
                 population=self._metatree_type_id_dict[cur_type],
                 size=1,
                 weight=history[self._metatree_type_id_dict[cur_type]])[0]
         else:
             cur_node_adj_typelist = self._adj_lookupdict[cur_node].keys()
             next_id_list = [
                 v for v in self._metagraph[cur_id] if
                 self._metagraph.nodes[v]["type"] in cur_node_adj_typelist
             ]
             if len(next_id_list) == 0:
                 break
             else:
                 cur_id = random.choice(next_id_list)
                 cur_type = self._metagraph.nodes[cur_id]["type"]
                 cur_node = random.choice(
                     self._adj_lookupdict[cur_node][cur_type])
             history[cur_id] += 1
             # logger.info('next: %d %d' % (cur_type, cur_id))
             # spacey out
         path.append(str(cur_node))
     return path