Exemplo n.º 1
0
def main(params):

    model = Task5Model(params)
    train_dataset = TaskDataset(params, 'data/task6_train_sequence.csv',
                                'data/task6_train_label.csv')
    test_dataset = TaskDataset(params, 'data/task6_test_sequence.csv',
                               'data/task6_test_label.csv')

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=params['batch_size'],
                                  shuffle=True)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=params['batch_size'],
                                 shuffle=False)

    loss_fn = torch.nn.BCEWithLogitsLoss()
    # loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=params['lr'])

    model = torcheras.Model(model, 'log/')
    model.compile(loss_fn, optimizer, metrics=['binary_acc'])

    writer = SummaryWriter()

    result = []

    def callback(epoch, i_batch, metrics_result):
        result.append(metrics_result['binary_acc'])

    model.fit(train_dataloader,
              test_dataloader,
              epochs=10,
              batch_callback=callback)
Exemplo n.º 2
0
def evaluate_model(params, dtype='test', model_folder='', model_epoch=''):
    test_dataset = QANetDataset('data', dtype)
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

    test_eval = pickle.load(open('data/' + dtype + '_eval.pkl', 'rb'))

    word_emb_mat = np.array(pickle.load(
        open(os.path.join(params['target_dir'], 'word_emb_mat.pkl'), 'rb')),
                            dtype=np.float32)
    char_emb_mat = np.array(pickle.load(
        open(os.path.join(params['target_dir'], 'char_emb_mat.pkl'), 'rb')),
                            dtype=np.float32)

    qanet = QANet(params, word_emb_mat, char_emb_mat).to(device)
    qanet = torcheras.Model(qanet, 'log/qanet')
    qanet.load_model(model_folder, epoch=model_epoch, ema=True)
    qanet = qanet.model
    qanet.eval()

    all_scores = {'em': 0, 'f1': 0}
    with torch.no_grad():
        for i_batch, sample_batched in enumerate(test_dataloader):
            x, y_true = variable_data(sample_batched, device)
            y_pred = qanet(x)
            metrics = evaluate_scores(y_true, y_pred, test_eval)
            print(metrics)
            all_scores['em'] += metrics['exact_match']
            all_scores['f1'] += metrics['f1']

        print('em', all_scores['em'] / i_batch, 'f1',
              all_scores['f1'] / i_batch)
Exemplo n.º 3
0
def main(params):

    model = Task3Model()
    dataset = TestDataset(params, 'data/task3_passage.csv',
                          'data/task3_query.csv', 'data/task3_label.csv')

    dataloader = DataLoader(dataset,
                            batch_size=params['batch_size'],
                            shuffle=True)

    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])

    print(model)
    model = torcheras.Model(model, 'log/')
    model.compile(loss_fn, optimizer, metrics=['categorical_acc'])

    writer = SummaryWriter()

    result = []

    def callback(epoch, i_batch, metrics_result):
        # print(i_batch, metrics_result)
        for metric, value in metrics_result.items():
            writer.add_scalar('data/' + metric, value, i_batch)
        result.append(metrics_result['categorical_acc'])

    model.fit(dataloader, epochs=1, batch_callback=callback)

    # writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
    print(sum(result[-100:]))
Exemplo n.º 4
0
def main(params):

    device = torch.device('cuda:0')

    model = TaskModel(params)
    train_dataset = TaskDataset(params, 'data/blob_train_image_data/',
                                'data/train_sym.txt')
    # test_dataset = train_dataset[int(len(train_dataset)*0.8):]
    # train_dataset = train_dataset[:int(len(train_dataset)*0.8)]

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=params['batch_size'],
                                  shuffle=True)
    # test_dataloader = DataLoader(test_dataset, batch_size=params['batch_size'], shuffle=False)

    loss_fn = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=params['lr'])

    model = torcheras.Model(model, 'log/')
    model.compile(loss_fn, optimizer, metrics=['binary_acc'], device=device)

    writer = SummaryWriter()

    result = []

    def callback(epoch, i_batch, metrics_result):
        result.append(metrics_result['binary_acc'])

    model.fit(train_dataloader, epochs=10, batch_callback=callback)
def main(params):

    model = Test2Model(params['in_channels'], params['out_channels'])
    dataset = TestDataset(params, 'data/task2_data.csv',
                          'data/task2_label.csv')

    dataloader = DataLoader(dataset,
                            batch_size=params['batch_size'],
                            shuffle=True)

    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])

    model = torcheras.Model(model, 'log/')
    model.compile(loss_fn, optimizer, metrics=['categorical_acc'])

    writer = SummaryWriter()

    def callback(epoch, i_batch, metrics_result):
        print(i_batch, metrics_result)
        for metric, value in metrics_result.items():
            writer.add_scalar('data/' + metric, value, i_batch)

    model.fit(dataloader, epochs=1, batch_callback=callback)

    writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
Exemplo n.º 6
0
def main(params):

    model = Task5Model(params)
    dataset = TaskDataset(params, 'data/task5_2_train_paragraph1.csv',
                          'data/task5_2_train_paragraph2.csv',
                          'data/task5_2_train_label.csv')

    dataloader = DataLoader(dataset,
                            batch_size=params['batch_size'],
                            shuffle=True)

    loss_fn = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])

    model = torcheras.Model(model, 'log/')
    model.compile(loss_fn, optimizer, metrics=['binary_acc'])

    writer = SummaryWriter()

    result = []

    def callback(epoch, i_batch, metrics_result):
        result.append(metrics_result['binary_acc'])

    model.fit(dataloader, epochs=1, batch_callback=callback)
    print(sum(result[-100:]))

    p1 = np.array(pd.read_csv('data/task5_2_test_paragraph1.csv', header=None),
                  dtype=np.int)
    p2 = np.array(pd.read_csv('data/task5_2_test_paragraph2.csv', header=None),
                  dtype=np.int)

    results = []
    for i in range(p1.shape[0]):
        result = model.model.forward(
            (torch.LongTensor([p1[i]]), torch.LongTensor([p2[i]])))
        if result > 0.5:
            results.append(1)
        else:
            results.append(0)
    print(results)
Exemplo n.º 7
0
def train(params, description):
    train_dataset = QANetDataset('data', 'train')
    dev_dataset = QANetDataset('data', 'dev')

    train_eval = pickle.load(open('data/train_eval.pkl', 'rb'))
    dev_eval = pickle.load(open('data/dev_eval.pkl', 'rb'))

    def evaluate_em(y_true, y_pred):
        qa_id = y_true[1]
        c_mask, q_mask = y_pred[2:]

        y_p1 = F.softmax(y_pred[0], dim=-1)
        y_p2 = F.softmax(y_pred[1], dim=-1)

        p1 = []
        p2 = []

        p_matrix = torch.bmm(y_p1.unsqueeze(2), y_p2.unsqueeze(1))
        for i in range(p_matrix.shape[0]):
            p = torch.triu(p_matrix[i])
            indexes = torch.argmax(p).item()
            p1.append(indexes // p.shape[0])
            p2.append(indexes % p.shape[0])

        if y_pred[0].requires_grad:
            answer_dict, _ = convert_tokens(train_eval, qa_id.tolist(), p1, p2)
            metrics = evaluate(train_eval, answer_dict)
        else:
            answer_dict, _ = convert_tokens(dev_eval, qa_id.tolist(), p1, p2)
            metrics = evaluate(dev_eval, answer_dict)

        return torch.Tensor([metrics['exact_match']])

    train_loader = DataLoader(train_dataset,
                              batch_size=params['batch_size'],
                              shuffle=True)
    dev_loader = DataLoader(dev_dataset,
                            batch_size=params['batch_size'],
                            shuffle=True)

    word_emb_mat = np.array(pickle.load(
        open(os.path.join(params['target_dir'], 'word_emb_mat.pkl'), 'rb')),
                            dtype=np.float32)
    char_emb_mat = np.array(pickle.load(
        open(os.path.join(params['target_dir'], 'char_emb_mat.pkl'), 'rb')),
                            dtype=np.float32)

    qanet = QANet(params, word_emb_mat, char_emb_mat).to(device)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        qanet.parameters()),
                                 lr=params['learning_rate'],
                                 betas=(params['beta1'], params['beta2']),
                                 weight_decay=params['weight_decay'])
    crit = 1 / math.log(1000)

    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda ee: crit * math.log(ee + 1)
        if (ee + 1) <= 1000 else 1)

    qanet = torcheras.Model(qanet, 'log/qanet')

    print(description)
    qanet.set_description(description)

    custom_objects = {'em': evaluate_em}
    qanet.compile(loss_function,
                  scheduler,
                  metrics=['em'],
                  device=device,
                  custom_objects=custom_objects)
    qanet.fit(train_loader, dev_loader, ema_decay=0.9999, grad_clip=5)
import torch
import torcheras
from torch.utils.data import DataLoader
import torch.nn.functional as F
import os
from PIL import Image

device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device(
    'cpu:0')

model = TaskModel(params).to(device)

test_img_path = 'data/blob_test_image_data/'

model = torcheras.Model(model, 'log')
model.load_model('', epoch=5)
model = model.model
model.eval()

with torch.no_grad():
    for img_path in os.listdir(test_img_path):
        img = np.array(Image.open(test_img_path + img_path),
                       dtype=np.float32) / 255.
        img = torch.Tensor(img).to(device)
        y_pred = model(img)
        y_pred = F.sigmoid(y_pred)

        print(y_pred)