示例#1
0
    def __call__(self, nodes):
        walks = random_walk(self.graph, nodes, self.walk_len)
        src_list, pos_list = [], []
        for walk in walks:
            s, p = skip_gram_gen_pair(walk, self.win_size)
            src_list.append(s), pos_list.append(p)
        src = [s for x in src_list for s in x]
        pos = [s for x in pos_list for s in x]
        src = np.array(src, dtype=np.int64),
        pos = np.array(pos, dtype=np.int64)
        src, pos = np.reshape(src, [-1, 1]), np.reshape(pos, [-1, 1])

        neg_sample_size = [len(pos), self.neg_num]
        if self.neg_sample_type == "average":
            negs = np.random.randint(
                low=0, high=self.graph.num_nodes, size=neg_sample_size)
        elif self.neg_sample_type == "outdegree":
            pass
            #negs = alias_sample(neg_sample_size, alias, events)
        elif self.neg_sample_type == "inbatch":
            pass
        else:
            raise ValueError
        dsts = np.concatenate([pos, negs], 1)
        # [batch_size, 1] [batch_size, neg_num+1]
        return src, dsts
示例#2
0
    def pair_generate(self):
        for walks in self.walk_generator():
            try:
                src_list, pos_list = [], []
                for walk in walks:
                    s, p = skip_gram_gen_pair(walk, self.config.win_size)
                    src_list.append(s), pos_list.append(p)
                src = [s for x in src_list for s in x]
                pos = [s for x in pos_list for s in x]

                if len(src) == 0:
                    continue

                negs = self.negative_sample(
                    src,
                    pos,
                    neg_num=self.config.neg_num,
                    neg_sample_type=self.config.neg_sample_type)

                src = np.array(src, dtype=np.int64).reshape(-1, 1, 1)
                pos = np.array(pos, dtype=np.int64).reshape(-1, 1, 1)

                yield src, pos, negs

            except Exception as e:
                log.exception(e)
示例#3
0
文件: reader.py 项目: Yelrose/PGL
    def __call__(self):
        np.random.seed(os.getpid())
        if self.neg_sample_type == "outdegree":
            outdegree = self.graph.outdegree()
            distribution = 1. * outdegree / outdegree.sum()
            alias, events = alias_sample_build_table(distribution)
        max_len = int(self.batch_size * self.walk_len *
                      ((1 + self.win_size) - 0.3))
        for walks in self.walk_generator():
            src, pos = [], []
            for walk in walks:
                s, p = skip_gram_gen_pair(walk, self.win_size)
                src.extend(s), pos.extend(p)
            src = np.array(src, dtype=np.int64),
            pos = np.array(pos, dtype=np.int64)
            src, pos = np.reshape(src, [-1, 1, 1]), np.reshape(pos, [-1, 1, 1])

            if src.shape[0] == 0:
                continue
            neg_sample_size = [len(pos), self.neg_num, 1]
            if self.neg_sample_type == "average":
                negs = self.graph.sample_nodes(neg_sample_size)
            elif self.neg_sample_type == "outdegree":
                negs = alias_sample(neg_sample_size, alias, events)
            # [batch_size, 1, 1] [batch_size, neg_num+1, 1]
            dst = np.concatenate([pos, negs], 1)
            src_feat = np.concatenate([src, self.node_feat[src[:, :, 0]]], -1)
            dst_feat = np.concatenate([dst, self.node_feat[dst[:, :, 0]]], -1)
            src_feat, dst_feat = np.expand_dims(src_feat, -1), np.expand_dims(
                dst_feat, -1)
            yield src_feat[:max_len], dst_feat[:max_len]
示例#4
0
    def __call__(self):
        iterval = 20000000 * 24 // self.config.walk_len
        pair_count = 0
        for walks in self.walk_generator():
            try:
                for walk in walks:
                    index = np.arange(0, len(walk), dtype="int64")
                    batch_s, batch_p = skip_gram_gen_pair(
                        index, self.config.win_size)
                    for s, p in zip(batch_s, batch_p):
                        yield walk[s], walk[p]
                        pair_count += 1
                        if pair_count % iterval == 0 and self.rank == 0:
                            log.info("[%s] pairs have been loaded in rank [%s]" \
                                    % (pair_count, self.rank))

            except Exception as e:
                log.exception(e)

        log.info("total [%s] pairs in rank [%s]" % (pair_count, self.rank))
示例#5
0
    def __call__(self):
        np.random.seed(os.getpid())
        if self.neg_sample_type == "outdegree":
            outdegree = self.graph.outdegree()
            distribution = 1. * outdegree / outdegree.sum()
            alias, events = alias_sample_build_table(distribution)
        max_len = int(self.batch_size * self.walk_len *
                      ((1 + self.win_size) - 0.3))
        for walks in self.walk_generator():
            try:
                src_list, pos_list = [], []
                for walk in walks:
                    s, p = skip_gram_gen_pair(walk, self.win_size)
                    src_list.append(s[:max_len]), pos_list.append(p[:max_len])
                src = [s for x in src_list for s in x]
                pos = [s for x in pos_list for s in x]
                src = np.array(src, dtype=np.int64),
                pos = np.array(pos, dtype=np.int64)
                src, pos = np.reshape(src,
                                      [-1, 1, 1]), np.reshape(pos, [-1, 1, 1])

                neg_sample_size = [len(pos), self.neg_num, 1]
                if src.shape[0] == 0:
                    continue
                if self.neg_sample_type == "average":
                    negs = np.random.randint(low=0,
                                             high=self.graph.num_nodes,
                                             size=neg_sample_size)
                elif self.neg_sample_type == "outdegree":
                    negs = alias_sample(neg_sample_size, alias, events)
                elif self.neg_sample_type == "inbatch":
                    pass
                dst = np.concatenate([pos, negs], 1)
                # [batch_size, 1, 1] [batch_size, neg_num+1, 1]
                yield src[:max_len], dst[:max_len]
            except Exception as e:
                log.exception(e)