Exemplo n.º 1
0
 def test_numpy_construction(self):
     x = np.random.random((100, 5)).astype(dtype="float32")
     dataset = q.TensorDataset(x)
     x1 = x[1]
     xd1 = dataset[1][0]
     self.assertTrue(np.allclose(x1, xd1.numpy()))
     print(type(xd1))
     self.assertTrue(isinstance(xd1, torch.FloatTensor))
Exemplo n.º 2
0
    def test_iter_single_tensor_error(self):
        x = np.arange(0, 100)
        dataset = q.TensorDataset(x)
        dl = DataLoader(dataset, shuffle=True, batch_size=10)
        epoch1batches = []
        epoch2batches = []
        batches = []
        dl_iter = iter(dl)

        def fn():
            for i in range(200):  # 1000 batches, 10 * data
                batch = next(dl_iter)[0].numpy()
                batches.append(batch)

        self.assertRaises(StopIteration, fn)
Exemplo n.º 3
0
    def test_iter_single_tensor(self):
        x = np.arange(0, 100)
        dataset = q.TensorDataset(x)
        dl = DataLoader(dataset, shuffle=True, batch_size=10)
        epoch1batches = []
        epoch2batches = []
        for batch in dl:
            epoch1batches.append(batch[0].numpy())
        for batch in dl:
            epoch2batches.append(batch[0].numpy())
        for batcha, batchb in zip(epoch1batches, epoch2batches):
            self.assertFalse(np.allclose(batcha, batchb))

        epoch1 = np.concatenate(epoch1batches)
        epoch2 = np.concatenate(epoch2batches)

        print(epoch1)
        print(epoch2)
        self.assertEqual(set(epoch1), set(epoch2))
Exemplo n.º 4
0
def main(
    lr=0.5,
    epochs=30,
    batsize=32,
    embdim=90,
    encdim=90,
    mode="cell",  # "fast" or "cell"
    wreg=0.0001,
    cuda=False,
    gpu=1,
):
    if cuda:
        torch.cuda.set_device(gpu)
    usecuda = cuda
    vocsize = 50
    # create datasets tensor
    tt.tick("loading data")
    sequences = np.random.randint(0, vocsize, (batsize * 100, 16))
    # wrap in dataset
    dataset = q.TensorDataset(sequences[:batsize * 80],
                              sequences[:batsize * 80])
    validdataset = q.TensorDataset(sequences[batsize * 80:],
                                   sequences[batsize * 80:])
    dataloader = DataLoader(dataset=dataset, batch_size=batsize, shuffle=True)
    validdataloader = DataLoader(dataset=validdataset,
                                 batch_size=batsize,
                                 shuffle=False)
    tt.tock("data loaded")
    # model
    tt.tick("building model")
    embedder = nn.Embedding(vocsize, embdim)

    encoder = q.RecurrentStack(
        embedder,
        q.SRUCell(encdim).to_layer(),
        q.SRUCell(encdim).to_layer(),
        q.SRUCell(encdim).to_layer(),
        q.SRUCell(encdim).to_layer().return_final(),
    )
    if mode == "fast":
        decoder = q.AttentionDecoder(
            attention=q.Attention().forward_gen(encdim, encdim, encdim),
            embedder=embedder,
            core=q.RecurrentStack(q.GRULayer(embdim, encdim)),
            smo=q.Stack(nn.Linear(encdim + encdim, vocsize), q.LogSoftmax()),
            return_att=True)
    else:
        decoder = q.AttentionDecoderCell(
            attention=q.Attention().forward_gen(encdim, encdim + embdim,
                                                encdim),
            embedder=embedder,
            core=q.RecStack(
                q.GRUCell(embdim + encdim,
                          encdim,
                          use_cudnn_cell=False,
                          rec_batch_norm=None,
                          activation="crelu")),
            smo=q.Stack(nn.Linear(encdim + encdim, vocsize), q.LogSoftmax()),
            att_after_update=False,
            ctx_to_decinp=True,
            decinp_to_att=True,
            return_att=True,
        ).to_decoder()

    m = EncDec(encoder, decoder, mode=mode)

    losses = q.lossarray(q.SeqNLLLoss(ignore_index=None),
                         q.SeqAccuracy(ignore_index=None),
                         q.SeqElemAccuracy(ignore_index=None))
    validlosses = q.lossarray(q.SeqNLLLoss(ignore_index=None),
                              q.SeqAccuracy(ignore_index=None),
                              q.SeqElemAccuracy(ignore_index=None))

    optimizer = torch.optim.Adadelta(m.parameters(), lr=lr, weight_decay=wreg)
    tt.tock("model built")

    q.train(m).cuda(usecuda).train_on(dataloader, losses)\
        .set_batch_transformer(lambda x, y: (x, y[:, :-1], y[:, 1:]))\
        .valid_on(validdataloader, validlosses)\
        .optimizer(optimizer).clip_grad_norm(2.)\
        .train(epochs)

    testdat = np.random.randint(0, vocsize, (batsize, 20))
    testdata = q.var(torch.from_numpy(testdat)).cuda(usecuda).v
    testdata_out = q.var(torch.from_numpy(testdat)).cuda(usecuda).v
    if mode == "cell" and False:
        inv_idx = torch.arange(testdata.size(1) - 1, -1, -1).long()
        testdata = testdata.index_select(1, inv_idx)
    probs, attw = m(testdata, testdata_out[:, :-1])

    def plot(x):
        sns.heatmap(x)
        plt.show()

    embed()
Exemplo n.º 5
0
def dataload(*tensors, **kw):
    if "shuffle" not in kw:
        kw["shuffle"] = True
    tensordataset = q.TensorDataset(*tensors)
    dataloader = DataLoader(tensordataset, **kw)