def getpairs(self, batch, params): g1 = [] g2 = [] for i in batch: g1.append(i[0].embeddings) g2.append(i[1].embeddings) g1x, g1mask = self.prepare_data(g1) g2x, g2mask = self.prepare_data(g2) embg1 = self.feedforward_function(g1x, g1mask) embg2 = self.feedforward_function(g2x, g2mask) for idx, i in enumerate(batch): i[0].representation = embg1[idx, :] i[1].representation = embg2[idx, :] pairs = utils.get_pairs_fast(batch, params.samplingtype) p1 = [] p2 = [] for i in pairs: p1.append(i[0].embeddings) p2.append(i[1].embeddings) p1x, p1mask = self.prepare_data(p1) p2x, p2mask = self.prepare_data(p2) return (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask)
def get_pairs(self, batch, params): g1 = [] g2 = [] g1_s = [] g2_s = [] for i in batch: g1.append(i[0].embeddings) g2.append(i[1].embeddings) g1_s.append(i[0].phrase) g2_s.append(i[1].phrase) g1_ = [g1[i:i + params.batchsize] for i in range(0, len(g1), params.batchsize)] g2_ = [g2[i:i + params.batchsize] for i in range(0, len(g2), params.batchsize)] embg1 = [] embg2 = [] for i in range(len(g1_)): g1x, g1mask = self.prepare_data(g1_[i]) g2x, g2mask = self.prepare_data(g2_[i]) embg1_ = self.feedforward_function(g1x, g1mask) embg2_ = self.feedforward_function(g2x, g2mask) embg1.append(embg1_) embg2.append(embg2_) embg1 = np.vstack(embg1) embg2 = np.vstack(embg2) #update representations for idx, i in enumerate(batch): i[0].representation = embg1[idx, :] i[1].representation = embg2[idx, :] pairs = utils.get_pairs_fast(batch, params.samplingtype) p1 = [] p2 = [] p1_s = [] p2_s = [] for i in pairs: p1.append(i[0].embeddings) p2.append(i[1].embeddings) p1_s.append(i[0].phrase) p2_s.append(i[1].phrase) p1x, p1mask = self.prepare_data(p1) p2x, p2mask = self.prepare_data(p2) g1x, g1mask = self.prepare_data(g1) g2x, g2mask = self.prepare_data(g2) return (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask),(g1_s,g2_s,p1_s,p2_s)
def get_pairs(self, batch, params): g1n = [] g1w = [] g1l = [] g2n = [] g2w = [] g2l = [] for i in batch: if params.combination_type != "ngram-word-lstm": g1n.append(i[0].embeddings) g1w.append(i[1].embeddings) g2n.append(i[2].embeddings) g2w.append(i[3].embeddings) else: g1n.append(i[0].embeddings) g1w.append(i[1].embeddings) g1l.append(i[2].embeddings) g2n.append(i[3].embeddings) g2w.append(i[4].embeddings) g2l.append(i[5].embeddings) g1nx, g1nmask = self.prepare_data(g1n) g1wx, g1wmask = self.prepare_data(g1w) g2nx, g2nmask = self.prepare_data(g2n) g2wx, g2wmask = self.prepare_data(g2w) if params.combination_type == "ngram-word-lstm": g1lx, g1lmask = self.prepare_data(g1l) g2lx, g2lmask = self.prepare_data(g2l) if params.combination_type != "ngram-word-lstm": embg1 = self.feedforward_function(g1nx, g1nmask, g1wx, g1wmask) embg2 = self.feedforward_function(g2nx, g2nmask, g2wx, g2wmask) else: embg1 = self.feedforward_function(g1nx, g1nmask, g1wx, g1wmask, g1lx, g1lmask) embg2 = self.feedforward_function(g2nx, g2nmask, g2wx, g2wmask, g2lx, g2lmask) if params.combination_type != "ngram-word-lstm": batch_n = [] batch_w = [] for idx, i in enumerate(batch): i[0].representation = embg1[idx, :] i[1].representation = embg1[idx, :] i[2].representation = embg2[idx, :] i[3].representation = embg2[idx, :] batch_n.append((i[0], i[2])) batch_w.append((i[1], i[3])) else: batch_n = [] batch_w = [] batch_l = [] for idx, i in enumerate(batch): i[0].representation = embg1[idx, :] i[1].representation = embg1[idx, :] i[2].representation = embg1[idx, :] i[3].representation = embg2[idx, :] i[4].representation = embg2[idx, :] i[5].representation = embg2[idx, :] batch_n.append((i[0], i[3])) batch_w.append((i[1], i[4])) batch_l.append((i[2], i[5])) pairs1 = utils.get_pairs_fast(batch_n, params.samplingtype) p1n = [] p2n = [] for i in pairs1: p1n.append(i[0].embeddings) p2n.append(i[1].embeddings) p1nx, p1nmask = self.prepare_data(p1n) p2nx, p2nmask = self.prepare_data(p2n) pairs2 = utils.get_pairs_fast(batch_w, params.samplingtype) p1w = [] p2w = [] for i in pairs2: p1w.append(i[0].embeddings) p2w.append(i[1].embeddings) p1wx, p1wmask = self.prepare_data(p1w) p2wx, p2wmask = self.prepare_data(p2w) if params.combination_type == "ngram-word-lstm": pairs3 = utils.get_pairs_fast(batch_l, params.samplingtype) p1l = [] p2l = [] for i in pairs3: p1l.append(i[0].embeddings) p2l.append(i[1].embeddings) p1lx, p1lmask = self.prepare_data(p1l) p2lx, p2lmask = self.prepare_data(p2l) if params.combination_type != "ngram-word-lstm": return (g1nx, g1nmask, g1wx, g1wmask, g2nx, g2nmask, g2wx, g2wmask, p1nx, p1nmask, p1wx, p1wmask, p2nx, p2nmask, p2wx, p2wmask) else: return (g1nx, g1nmask, g1wx, g1wmask, g1lx, g1lmask, g2nx, g2nmask, g2wx, g2wmask, g2lx, g2lmask, p1nx, p1nmask, p1wx, p1wmask, p1lx, p1lmask, p2nx, p2nmask, p2wx, p2wmask, p2lx, p2lmask)