def main(args): LongTensor = torch.cuda.LongTensor if args.gpu else torch.LongTensor data = get_pickle('assets/dataset.pkl') i2s = get_pickle('assets/i2s.pkl') dataset = skipDataset(data) model = SkipGram(len(i2s), 300) if args.gpu: model.cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) losses = [] epoch_losses = [np.inf, np.inf, np.inf] total_n = len(dataset) tmplt = "E:{:2d} - i:{:5d}({:4.2f}%) - L:{:5.5f}" for epoch in range(args.epoch): dataloader = DataLoader(dataset, batch_size=args.bs, collate_fn=collate_fn, shuffle=True) model.train() losses = [] for i, batch in enumerate(dataloader): center, target = batch center = LongTensor(center) target = LongTensor(target) loss = model(center, target) loss.backward() optimizer.step() model.zero_grad() losses.append(loss.data) if i % 100 == 0: ml = np.mean(losses) t = tmplt.format(epoch, i, i * args.bs / total_n * 100, ml) print(t) losses = [] model.eval() dataloader = DataLoader(dataset, batch_size=args.bs, collate_fn=collate_fn, shuffle=True) losses = [] for i, batch in enumerate(dataloader): center, target = batch center = torch.LongTensor(center) target = torch.LongTensor(target) loss = model(center, target) losses.append(loss.data) epoch_losses.append(np.mean(losses)) print('Epoch loss {}'.format(epoch_losses[-1])) if epoch_losses[-1] > epoch_losses[-4]: break else: filename = 'assets/model/model_skip.torch' state = dict(state_dict=model.state_dict(), loss=epoch_losses, args=args) torch.save(state, filename)
def train(self): if self.model_name == 'SkipGram': model = SkipGram(self.vocabulary_size, self.embedding_dim) elif self.model_name == 'CBOW': return if torch.cuda.is_available(): model.cuda() optimizer = optim.SGD(model.parameters(), lr=0.2) for epoch in range(self.epoch): start = time.time() self.op.process = True batch_num = 0 batch_new = 0 while self.op.process: pos_u, pos_v, neg_v = self.op.generate_batch( self.windows_size, self.batch_size, self.neg_sample_size) pos_u = Variable(torch.LongTensor(pos_u)) pos_v = Variable(torch.LongTensor(pos_v)) neg_v = Variable(torch.LongTensor(neg_v)) if torch.cuda.is_available(): pos_u = pos_u.cuda() pos_v = pos_v.cuda() neg_v = neg_v.cuda() optimizer.zero_grad() loss = model(pos_u, pos_v, neg_v, self.batch_size) loss.backward() optimizer.step() if batch_num % 3000 == 0: end = time.time() print( 'epoch,batch = %2d %5d: pair/sec = %4.2f loss = %4.3f\r' % (epoch, batch_num, (batch_num - batch_new) * self.batch_size / (end - start), loss.data[0]), end="\n") batch_new = batch_num start = time.time() batch_num += 1 model.save_embeddings(self.op.idx2word, 'word_embdding.txt', torch.cuda.is_available())
def train(self, report=True): model = SkipGram(self.vocabulary_size, self.embedding_dim) loss_list = list() if torch.cuda.is_available(): model.cuda() optimizer = optim.SGD(model.parameters(), lr=0.2) for epoch in range(self.epoch): start = time.time() self.data.process = True batch_num = 0 batch_new = 0 for data_word, data_sentence in self.data_loader(): optimizer.zero_grad() loss = model(data_word) / self.batch_size # loss = model(pos_u, pos_v, neg_v, self.batch_size, target, contex, labels) loss_list.append(loss) loss.backward() optimizer.step() if report and batch_num % 7 == 0: # 3000 end = time.time() print( 'epoch,batch = %2d %5d: batch_size = %5d loss = %4.3f\r' % (epoch, batch_num, self.batch_size, loss.item()), end="\n") batch_new = batch_num start = time.time() batch_num += 1 self.showPlot(loss_list, 'Losses') model.save_embeddings(self.data.idx2word, 'word_embdding.txt')