def _generate(self, g, eids, canonical_etype): dtype = F.dtype(eids) ctx = F.context(eids) # find 起始点 src, _dst = g.find_edges(eids, etype=canonical_etype) etype = self.etype_dict[eids] src = F.repeat(src, self.k, 0) etype = F.repeat(etype, self.k, 0) dsts = None # dst 应该在set里选哈 for i in _dst : ''' 没有过滤掉负采样中的正例,并且在负采样中没有去除正确的原dst 同时没有为这条边生成采样系数权值subsampling weight 同时这里的采样只取true src与negative dst,暨tail batch, 应该根据mode来进行正负采样 这里在UniformBaseOnTriples进行修正 ''' nid = i.numel() ntype = self.ntype_dict[nid] node_set = self.type_set[ntype] node_limit = len(node_set) # uniform sampling dst = F.randint((1, 2 * self.k), dtype, ctx, 0, node_limit) dst = node_set[dst] if dsts is None : dsts = dst else : dsts = torch.cat((dsts, dst), dim = 1) return (src, dsts.squeeze(dim = 0)), etype
def _generate(self, g, eids, canonical_etype): _, _, vtype = canonical_etype shape = F.shape(eids) dtype = F.dtype(eids) ctx = F.context(eids) shape = (shape[0] * self.k,) src, _ = g.find_edges(eids, etype=canonical_etype) src = F.repeat(src, self.k, 0) dst = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype)) return src, dst
def negative_sampling(self, nsrcs, ndsts, node_limit, node_set): subsampling_w = [] neg_sample = None for indice, src in enumerate(nsrcs): negative_sample_size = 0 negative_sample_list = [] subsampling_weight = self.count[src] + self.count[- ndsts[indice] - 1] subsampling_weight = torch.sqrt(1 / torch.Tensor([subsampling_weight])) while negative_sample_size < self.neg_num: # TODO 这里的负采样是全局负采样,对于有关系的需要考虑 metapath # 这里都是tail负采样 negative_sample = numpy.random.randint(node_limit, size=self.neg_num * 2) # 排除正例 mask = numpy.in1d( negative_sample, self.true_tuple[src], # sample pure tail (not true) assume_unique=True, invert=True) negative_sample = negative_sample[mask] negative_sample_list.append(negative_sample) negative_sample_size += negative_sample.size subsampling_w.append(subsampling_weight) negative_sample = numpy.concatenate(negative_sample_list)[:self.neg_num] if neg_sample is None: neg_sample = negative_sample else: neg_sample = numpy.concatenate((neg_sample, negative_sample)) subsampling_w = torch.cat(subsampling_w, dim=-1) subsampling_w = F.repeat(subsampling_w, self.neg_num, 0) pos_src = torch.tensor(nsrcs, dtype=torch.long) pos_src = F.repeat(pos_src, self.neg_num, 0) dsts = torch.from_numpy(neg_sample) for i in dsts: num = i.item() node_set.add(num) return pos_src, dsts, subsampling_w
def _generate(self, g, eids, canonical_etype): _, _, vtype = canonical_etype shape = F.shape(eids) dtype = F.dtype(eids) ctx = F.context(eids) shape = (shape[0] * self.k, ) src, _ = g.find_edges(eids, etype=canonical_etype) src = F.repeat(src, self.k, 0) dst = np.random.choice(np.arange(0, g.number_of_nodes()), shape, replace=True, p=self.p) # dst = F.randint(shape, dtype, ctx, 0, g.number_of_nodes(vtype)) dst = th.tensor(dst, dtype=dtype, device=ctx) return src, dst
def _generate(self, g, eids, canonical_etype) : srcs, dsts = g.find_edges(eids, etype=canonical_etype) neg_sample = None nsrcs, ndsts = srcs.numpy(), dsts.numpy() etype = self.etype_dict[eids] netype = etype.numpy() _, _, vtype = canonical_etype subsampling_w = [] for indice, src in enumerate(nsrcs) : negative_sample_size = 0 rel = netype[indice] subsampling_weight = self.count[(src, rel)] + self.count[(ndsts[indice], -rel - 1)] subsampling_weight = torch.sqrt(1 / torch.Tensor([subsampling_weight])) negative_sample_list = [] while negative_sample_size < self.k: negative_sample = numpy.random.randint(g.number_of_nodes(vtype), size=self.k * 2) if self.mode == 'head-batch': mask = numpy.in1d( negative_sample, self.true_head[(rel, ndsts[indice])], # sample pure head (not true) assume_unique=True, invert=True ) elif self.mode == 'tail-batch': mask = numpy.in1d( negative_sample, self.true_tail[(src, rel)], # sample pure tail (not true) assume_unique=True, invert=True ) else: raise ValueError('Training batch mode %s not supported' % self.mode) negative_sample = negative_sample[mask] negative_sample_list.append(negative_sample) negative_sample_size += negative_sample.size subsampling_w.append(subsampling_weight) # negative sample negative_sample = numpy.concatenate(negative_sample_list)[:self.k] # add positive sample to the head # if self.mode == 'head-batch' : # negative_sample = numpy.append(numpy.array(ndsts[indice]), negative_sample) # elif self.mode == 'tail-batch' : # negative_sample = numpy.append(numpy.array(src), negative_sample) # else: # raise ValueError('Training batch mode %s not supported' % self.mode) if neg_sample is None : neg_sample = negative_sample else : neg_sample = numpy.concatenate((neg_sample, negative_sample)) if self.mode == 'head-batch': dsts = F.repeat(dsts, self.k, 0) srcs = torch.from_numpy(neg_sample) elif self.mode == 'tail-batch': srcs = F.repeat(srcs, self.k, 0) dsts = torch.from_numpy(neg_sample) else: raise ValueError('Training batch mode %s not supported' % self.mode) subsampling_w = torch.cat(subsampling_w, dim = -1) etype = F.repeat(etype, self.k, 0) subsampling_w = F.repeat(subsampling_w, self.k, 0) return (srcs, dsts), etype, subsampling_w