def gen_validation_data(p, data, seq_len, transliteration, trans_vocab_size, trans_to_index): x = np.zeros((1, int(seq_len), trans_vocab_size)) turned = False new_p = min(p + seq_len, len(data)) raw_translit = data[p:new_p] if new_p != len(data): if max([ raw_translit.rfind(u' '), raw_translit.rfind(u'\t'), raw_translit.rfind(u'\n') ]) > 0: new_p = max([ raw_translit.rfind(u' '), raw_translit.rfind(u'\t'), raw_translit.rfind(u'\n') ]) raw_translit = raw_translit[:new_p] p += new_p else: p = new_p else: p = 0 turned = True (translit, non_valids) = utils.valid(raw_translit, transliteration) for ind in range(len(translit)): x[0, ind, trans_to_index[translit[ind]]] = 1 for ind in range(len(translit), int(seq_len)): x[0, ind, trans_to_index[u'\u2001']] = 1 return (x, non_valids, p, turned)
def login(): if request.method == "GET": return render_template("webpage.html") else: email=request.form['Email'] button=request.form['button'] if button=='Login': if utils.valid(email): session['user']=email else: flash('Incorrect Email/Password') return redirect(url_for('login')) return redirect(url_for('home')) return redirect(url_for('login'))
def add_dtd(): file = {'lang': {'java': False, 'php': False}} language = request.args.get('lang') if language == 'java': # Java file['lang']['java'] = True elif language == 'php': # PHP file['lang']['php'] = True # php filter: base64 b64 = request.args.get("base64") if valid(b64): file['base64'] = True else: # Unsupported return 'Unsupported language.' file['url'] = request.args.get('url') if valid(file['url']): return 'No url defined' file['domain'] = request.args.get('domain') if valid(file['domain']): return 'No domain specified.' file['port'] = request.args.get('port') if valid(file['port']): return 'No port specified.' filename = str(len(files)) + '.dtd' file['name'] = filename files[filename] = file save() return redirect("/dtd/%s" % filename)
def get_dtd(file): if file in files: # static files f = files[file] else: # dynamic generate files f = {'lang': {'java': False, 'php': False}} url = '' # protocol if exist(request.args.get('f')): url += 'file' elif exist(request.args.get('j')): url += 'jar' f['lang']['java'] = True elif exist(request.args.get('p')): url += 'php' f['lang']['php'] = True elif exist(request.args.get('ft')): url += 'ftp' else: return "No url scheme specified." url += '://' if valid(request.args.get('fu')): url += request.args.get('fu') else: return "No url content specified." f['url'] = url # language if not f['lang']['java'] and not f['lang']['php']: if exist(request.args.get('lj')): f['lang']['java'] = True elif exist(request.args.get('lp')): f['lang']['php'] = True elif exist(request.args.get('lpb')): f['lang']['php'] = True f['base64'] = True else: return "No language specified." f['domain'] = host f['port'] = port return render_template('dtd.html', file=f)
def calculate_moves(self, b): t, a = self.type, self.ally if not t in 'RNBQKP': return [] p = self.pos d = -1 if a == 'W' else 1 if t in 'QK': offs = [-9, -8, -7, -1, 1, 7, 8, 9] elif t == 'R': offs = [-8, -1, 1, 8] elif t == 'B': offs = [-9, -7, 7, 9] elif t == 'N': offs = [-17, -15, -10, -6, 6, 10, 15, 17] else: offs = [i * d for i in [7, 8, 9, 16]] moves = [ list(utils.get_moves(b, self, p + o, o, t in 'RBQ')) for o in offs if utils.valid(p + o) and not self.has_exclusion(p, o) ] return reduce(lambda a, b: [*a, *b], moves, [])
def train(): # data train_data = DGDataset(file_path=args.train_data, tag2id=tag2id, maxlen=args.max_len) train_dl = DataLoader(train_data, batch_size=args.batch_size, num_workers=2) valid_data = DGDataset(file_path=args.valid_data, tag2id=tag2id, maxlen=args.max_len) valid_dl = DataLoader(valid_data, batch_size=args.batch_size, num_workers=2) num_of_batch = len(train_data) // args.batch_size + 1 print("start training...") for epoch in range(args.epoch): if epoch == args.epoch - 1: torch.save(ner.state_dict(), args.save_path) running_loss = 0.0 for i, data in enumerate(train_dl): sent2id, label, length = data if args.use_gpu: sent2id = sent2id.to(device) label = label.to(device) optimizer.zero_grad() # pred = ner(sent2id) # pred:[batch, seq_len, len(tag2id)], label:[batch, seq_len] # print(length) # loss = criterion(pred.view(-1, len(tag2id)), label.view(-1)) loss = ner.neg_log_likelihood(sent2id, label) loss.backward() optimizer.step() running_loss += loss.item() if i % 10 == 0 and i != 0: print('epoch {}/{}, batch {}/{}, loss:{:.3f}'.format( epoch + 1, args.epoch, i + 1, num_of_batch, running_loss / 2)) running_loss = 0.0 # valid if args.isValid: print("===========epoch{},valid===========".format(epoch + 1)) with torch.no_grad(): preds = [] lengths = [] sent2ids = [] for sent2id, label, length in valid_dl: sent2ids.append(sent2id) if args.use_gpu: sent2id = sent2id.to(device) # shape:[batch, seqlen, len(tag2id)] # pred = ner(sent2id) # shape:[batch, seqlen] # pred = torch.argmax(pred, dim=-1) pred = ner(sent2id) # preds.append(pred.cpu().numpy()) preds.append(pred) lengths.append(length.numpy()) # acc = compute_accuracy(preds, labels, lengths) # print("epoch{}: ACC{:.3f}".format(epoch+1, acc)) preds = np.concatenate(preds, axis=0) lengths = np.concatenate(lengths, axis=0) sent2ids = np.concatenate(sent2ids, axis=0) write_to_file(preds, lengths, sent2ids, "./result/valid_result.txt", tag2id) valid(result_file="./result/valid_result.txt", label_file=args.valid_data)
parser.add_argument('--total-words', type=int, default=20000, help='') parser.add_argument('--dropout', type=float, default=0.3, help='dropout probability used all throughout') parser.add_argument('--turn-hidden-state', type=int, default=1200, help='encoder session hidden state') parser.add_argument('--embedding', default=True, help='') use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} if __name__ == '__main__': options = parser.parse_args() if options.toy: valid_dataset = Dataset(data_type='valid', length=options.bs) test_dataset = Dataset(data_type='test', length=options.bs) else: valid_dataset = Dataset(data_type='valid') test_dataset = Dataset(data_type='test') valid_loader = DataLoader(dataset=valid_dataset, batch_size=options.bs, shuffle=False, collate_fn=batch_collect, drop_last=True) test_loader = DataLoader(dataset=test_dataset, batch_size=options.bs, shuffle=False, collate_fn=batch_collect, drop_last=True) model = Model(options) model = model.to(device) saved_state = torch.load(options.sustain) model.load_state_dict(saved_state) print('Already Load Pre Train Model') valid_loss, valid_ppl = valid(options, model, device, valid_loader) test_loss, test_ppl = valid(options, model, device, test_loader) # 设置所有的turn 精调后 # Validation Average Loss: 4.528276 PPL: 92.525948 # Validation Average Loss: 4.533181 PPL: 92.969895
print(f) for epoch in range(n_epochs): if epoch > start_transforming: t_f[4] = True else: t_f[4] = False if epoch == start_transforming: patience = 0 c = train(data[0], data[1], batch_size - 1, n_tr_batch, optimizer, criterion, model, t_f[0], t_f[1], t_f[2], t_f[3], t_f[4], cuda) v = valid(data[2], data[3], n_va_batch, n_c, model, cuda) loss = test(data[4], data[5], n_te_batch, n_c, model, cuda) patience += 1 cost.append(np.mean(c, 0, dtype='float64')) validation.append(np.mean(v, 0, dtype='float64')) score.append(np.mean(loss, 0, dtype='float64')) print( pandas.DataFrame([ epoch, np.mean(c, 0, dtype='float64'), datetime.datetime.now() - time_start ], ['Iteration', 'Cost', 'Elapsed time'], [f]))
optimizer.step() if batch_idx % options.log_interval == 0: print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t'.format( epoch, batch_idx * options.bs, len(train_loader.dataset), 100.0 * batch_idx / len(train_loader), loss)) # ------------------------------------------------------- if batch_idx % options.valid_interval == 0: valid_dataset = Dataset(data_type='valid', length=1000) valid_loader = DataLoader(dataset=valid_dataset, batch_size=options.bs, shuffle=False, collate_fn=batch_collect, drop_last=True) valid_loss, ppl = valid(options, model, device, valid_loader) history_list.append({ 'epoch': epoch, 'batch_idx': batch_idx, 'loss': valid_loss, 'ppl': ppl }) if valid_loss <= best_loss: best_epo = epoch best_loss = valid_loss best_step = batch_idx torch.save( model.state_dict(), "../model/e{}s{}.pt".format(best_epo, best_step)) print('save model when epoch = {} step = {} as e{}s{}.pt'. format(best_epo, best_step, best_epo, best_step))
# cw, cc, qw, qc, y1s, y2s, ids = next(train_iter) for cw, cc, qw, qc, y1s, y2s, ids in train_dataloader: cw, cc, qw, qc, y1s, y2s = cw.to(device), cc.to(device), qw.to( device), qc.to(device), y1s.to(device), y2s.to(device) p1, p2 = model(cw, cc, qw, qc) loss_1 = F.nll_loss(p1, y1s) loss_2 = F.nll_loss(p2, y2s) loss = (loss_1 + loss_2) / 2 losses.append(loss.item()) loss.backward() optimizer.step() scheduler.step() f1s.append(f1_score(p1, p2, y1s, y2s)) ems.append(utils.em(p1, p2, y1s, y2s)) process_bar.update(config.batch_size) process_bar.set_postfix(NLL=loss.item()) # if(step % 100 == 0): # print('Epoch: %2d | Step: %3d | Loss: %3f' % (epoch, step, loss)) print('Epoch: %2d | F1: %.2f | EM: %.2f | LOSS: %.2f' % (epoch, np.mean(f1s), np.mean(ems), loss.item())) torch.save(model, 'model/model.pt') f1, em, loss = valid(model, dev_dataset) print('-' * 30) print('Valid:') message = 'Epoch: %2d | F1: %.2f | EM: %.2f | LOSS: %.2f' % (epoch, f1, em, loss) print(message) f.write(message + '\n')
def evaluate(self, label_list, seq_len_list, data, file): sents = [x[0] for x in data] write_to_file(label_list, seq_len_list, sent2ids=sents, file=file) valid(result_file=file, label_file='./data/new_valid.txt')