Пример #1
0
def gen_validation_data(p, data, seq_len, transliteration, trans_vocab_size,
                        trans_to_index):

    x = np.zeros((1, int(seq_len), trans_vocab_size))
    turned = False
    new_p = min(p + seq_len, len(data))
    raw_translit = data[p:new_p]

    if new_p != len(data):
        if max([
                raw_translit.rfind(u' '),
                raw_translit.rfind(u'\t'),
                raw_translit.rfind(u'\n')
        ]) > 0:
            new_p = max([
                raw_translit.rfind(u' '),
                raw_translit.rfind(u'\t'),
                raw_translit.rfind(u'\n')
            ])
            raw_translit = raw_translit[:new_p]
            p += new_p
        else:
            p = new_p
    else:
        p = 0
        turned = True
    (translit, non_valids) = utils.valid(raw_translit, transliteration)
    for ind in range(len(translit)):
        x[0, ind, trans_to_index[translit[ind]]] = 1
    for ind in range(len(translit), int(seq_len)):
        x[0, ind, trans_to_index[u'\u2001']] = 1

    return (x, non_valids, p, turned)
Пример #2
0
def login():
	if request.method == "GET":
		return render_template("webpage.html")
	else:
		email=request.form['Email']
		button=request.form['button']
		if button=='Login': 
			if utils.valid(email):
				session['user']=email
			else:
				flash('Incorrect Email/Password')
				return redirect(url_for('login'))
			return redirect(url_for('home')) 
		return redirect(url_for('login'))
Пример #3
0
def add_dtd():
    file = {'lang': {'java': False, 'php': False}}
    language = request.args.get('lang')
    if language == 'java':
        # Java
        file['lang']['java'] = True
    elif language == 'php':
        # PHP
        file['lang']['php'] = True

        # php filter: base64
        b64 = request.args.get("base64")
        if valid(b64):
            file['base64'] = True
    else:
        # Unsupported
        return 'Unsupported language.'

    file['url'] = request.args.get('url')
    if valid(file['url']):
        return 'No url defined'

    file['domain'] = request.args.get('domain')
    if valid(file['domain']):
        return 'No domain specified.'

    file['port'] = request.args.get('port')
    if valid(file['port']):
        return 'No port specified.'

    filename = str(len(files)) + '.dtd'
    file['name'] = filename
    files[filename] = file
    save()

    return redirect("/dtd/%s" % filename)
Пример #4
0
def get_dtd(file):
    if file in files:
        # static files
        f = files[file]
    else:
        # dynamic generate files
        f = {'lang': {'java': False, 'php': False}}
        url = ''
        # protocol
        if exist(request.args.get('f')):
            url += 'file'
        elif exist(request.args.get('j')):
            url += 'jar'
            f['lang']['java'] = True
        elif exist(request.args.get('p')):
            url += 'php'
            f['lang']['php'] = True
        elif exist(request.args.get('ft')):
            url += 'ftp'
        else:
            return "No url scheme specified."
        url += '://'
        if valid(request.args.get('fu')):
            url += request.args.get('fu')
        else:
            return "No url content specified."
        f['url'] = url

        # language
        if not f['lang']['java'] and not f['lang']['php']:
            if exist(request.args.get('lj')):
                f['lang']['java'] = True
            elif exist(request.args.get('lp')):
                f['lang']['php'] = True
            elif exist(request.args.get('lpb')):
                f['lang']['php'] = True
                f['base64'] = True
            else:
                return "No language specified."
        f['domain'] = host
        f['port'] = port

    return render_template('dtd.html', file=f)
Пример #5
0
 def calculate_moves(self, b):
     t, a = self.type, self.ally
     if not t in 'RNBQKP':
         return []
     p = self.pos
     d = -1 if a == 'W' else 1
     if t in 'QK':
         offs = [-9, -8, -7, -1, 1, 7, 8, 9]
     elif t == 'R':
         offs = [-8, -1, 1, 8]
     elif t == 'B':
         offs = [-9, -7, 7, 9]
     elif t == 'N':
         offs = [-17, -15, -10, -6, 6, 10, 15, 17]
     else:
         offs = [i * d for i in [7, 8, 9, 16]]
     moves = [
         list(utils.get_moves(b, self, p + o, o, t in 'RBQ')) for o in offs
         if utils.valid(p + o) and not self.has_exclusion(p, o)
     ]
     return reduce(lambda a, b: [*a, *b], moves, [])
def train():
    # data
    train_data = DGDataset(file_path=args.train_data,
                           tag2id=tag2id,
                           maxlen=args.max_len)
    train_dl = DataLoader(train_data,
                          batch_size=args.batch_size,
                          num_workers=2)

    valid_data = DGDataset(file_path=args.valid_data,
                           tag2id=tag2id,
                           maxlen=args.max_len)
    valid_dl = DataLoader(valid_data,
                          batch_size=args.batch_size,
                          num_workers=2)

    num_of_batch = len(train_data) // args.batch_size + 1
    print("start training...")
    for epoch in range(args.epoch):
        if epoch == args.epoch - 1:
            torch.save(ner.state_dict(), args.save_path)
        running_loss = 0.0
        for i, data in enumerate(train_dl):
            sent2id, label, length = data
            if args.use_gpu:
                sent2id = sent2id.to(device)
                label = label.to(device)

            optimizer.zero_grad()

            # pred = ner(sent2id)

            # pred:[batch, seq_len, len(tag2id)], label:[batch, seq_len]
            # print(length)

            # loss = criterion(pred.view(-1, len(tag2id)), label.view(-1))
            loss = ner.neg_log_likelihood(sent2id, label)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 0 and i != 0:
                print('epoch {}/{}, batch {}/{}, loss:{:.3f}'.format(
                    epoch + 1, args.epoch, i + 1, num_of_batch,
                    running_loss / 2))
                running_loss = 0.0

        # valid
        if args.isValid:
            print("===========epoch{},valid===========".format(epoch + 1))
            with torch.no_grad():
                preds = []
                lengths = []
                sent2ids = []
                for sent2id, label, length in valid_dl:
                    sent2ids.append(sent2id)
                    if args.use_gpu:
                        sent2id = sent2id.to(device)

                    # shape:[batch, seqlen, len(tag2id)]
                    # pred = ner(sent2id)
                    # shape:[batch, seqlen]
                    # pred = torch.argmax(pred, dim=-1)
                    pred = ner(sent2id)

                    # preds.append(pred.cpu().numpy())
                    preds.append(pred)
                    lengths.append(length.numpy())
                # acc = compute_accuracy(preds, labels, lengths)
                # print("epoch{}: ACC{:.3f}".format(epoch+1, acc))
            preds = np.concatenate(preds, axis=0)
            lengths = np.concatenate(lengths, axis=0)
            sent2ids = np.concatenate(sent2ids, axis=0)
            write_to_file(preds, lengths, sent2ids,
                          "./result/valid_result.txt", tag2id)
            valid(result_file="./result/valid_result.txt",
                  label_file=args.valid_data)
Пример #7
0
parser.add_argument('--total-words', type=int, default=20000, help='')
parser.add_argument('--dropout', type=float, default=0.3, help='dropout probability used all throughout')
parser.add_argument('--turn-hidden-state', type=int, default=1200, help='encoder session hidden state')
parser.add_argument('--embedding', default=True, help='')
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
if __name__ == '__main__':
    options = parser.parse_args()
    if options.toy:
        valid_dataset = Dataset(data_type='valid', length=options.bs)
        test_dataset = Dataset(data_type='test', length=options.bs)
    else:
        valid_dataset = Dataset(data_type='valid')
        test_dataset = Dataset(data_type='test')
    valid_loader = DataLoader(dataset=valid_dataset, batch_size=options.bs, shuffle=False, collate_fn=batch_collect,
                              drop_last=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=options.bs, shuffle=False, collate_fn=batch_collect,
                             drop_last=True)
    model = Model(options)
    model = model.to(device)
    saved_state = torch.load(options.sustain)
    model.load_state_dict(saved_state)
    print('Already Load Pre Train Model')
    valid_loss, valid_ppl = valid(options, model, device, valid_loader)
    test_loss, test_ppl = valid(options, model, device, test_loader)

    # 设置所有的turn 精调后
    # Validation Average Loss: 4.528276        PPL: 92.525948
    # Validation Average Loss: 4.533181        PPL: 92.969895
Пример #8
0
print(f)

for epoch in range(n_epochs):

    if epoch > start_transforming:
        t_f[4] = True
    else:
        t_f[4] = False

    if epoch == start_transforming:
        patience = 0

    c = train(data[0], data[1], batch_size - 1, n_tr_batch, optimizer,
              criterion, model, t_f[0], t_f[1], t_f[2], t_f[3], t_f[4], cuda)

    v = valid(data[2], data[3], n_va_batch, n_c, model, cuda)

    loss = test(data[4], data[5], n_te_batch, n_c, model, cuda)

    patience += 1

    cost.append(np.mean(c, 0, dtype='float64'))
    validation.append(np.mean(v, 0, dtype='float64'))
    score.append(np.mean(loss, 0, dtype='float64'))

    print(
        pandas.DataFrame([
            epoch,
            np.mean(c, 0, dtype='float64'),
            datetime.datetime.now() - time_start
        ], ['Iteration', 'Cost', 'Elapsed time'], [f]))
Пример #9
0
 optimizer.step()
 if batch_idx % options.log_interval == 0:
     print(
         'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t'.format(
             epoch, batch_idx * options.bs,
             len(train_loader.dataset),
             100.0 * batch_idx / len(train_loader), loss))
 # -------------------------------------------------------
 if batch_idx % options.valid_interval == 0:
     valid_dataset = Dataset(data_type='valid', length=1000)
     valid_loader = DataLoader(dataset=valid_dataset,
                               batch_size=options.bs,
                               shuffle=False,
                               collate_fn=batch_collect,
                               drop_last=True)
     valid_loss, ppl = valid(options, model, device, valid_loader)
     history_list.append({
         'epoch': epoch,
         'batch_idx': batch_idx,
         'loss': valid_loss,
         'ppl': ppl
     })
     if valid_loss <= best_loss:
         best_epo = epoch
         best_loss = valid_loss
         best_step = batch_idx
         torch.save(
             model.state_dict(),
             "../model/e{}s{}.pt".format(best_epo, best_step))
         print('save model when epoch = {} step = {} as e{}s{}.pt'.
               format(best_epo, best_step, best_epo, best_step))
Пример #10
0
        # cw, cc, qw, qc, y1s, y2s, ids = next(train_iter)
        for cw, cc, qw, qc, y1s, y2s, ids in train_dataloader:
            cw, cc, qw, qc, y1s, y2s = cw.to(device), cc.to(device), qw.to(
                device), qc.to(device), y1s.to(device), y2s.to(device)
            p1, p2 = model(cw, cc, qw, qc)
            loss_1 = F.nll_loss(p1, y1s)
            loss_2 = F.nll_loss(p2, y2s)
            loss = (loss_1 + loss_2) / 2
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            scheduler.step()
            f1s.append(f1_score(p1, p2, y1s, y2s))
            ems.append(utils.em(p1, p2, y1s, y2s))
            process_bar.update(config.batch_size)
            process_bar.set_postfix(NLL=loss.item())

        # if(step % 100 == 0):
        #     print('Epoch: %2d | Step: %3d | Loss: %3f' % (epoch, step, loss))

    print('Epoch: %2d | F1: %.2f | EM: %.2f | LOSS: %.2f' %
          (epoch, np.mean(f1s), np.mean(ems), loss.item()))
    torch.save(model, 'model/model.pt')
    f1, em, loss = valid(model, dev_dataset)
    print('-' * 30)
    print('Valid:')
    message = 'Epoch: %2d | F1: %.2f | EM: %.2f | LOSS: %.2f' % (epoch, f1, em,
                                                                 loss)
    print(message)
    f.write(message + '\n')
 def evaluate(self, label_list, seq_len_list, data, file):
     sents = [x[0] for x in data]
     write_to_file(label_list, seq_len_list, sent2ids=sents, file=file)
     valid(result_file=file, label_file='./data/new_valid.txt')