datasets = []
    for i, dataset_path in dataset_paths:
        datasets.append(
            TaskSpecificARCDataset(index=i,
                                   dataset_path=Path(dataset_path),
                                   method='train'))
    dataset = MixtureDataset(datasets)
    train = DataLoader(dataset,
                       shuffle=True,
                       num_workers=workernum,
                       batch_size=batchSize,
                       collate_fn=TaskSpecificARCDataset.collate_fn)
    for index, batch in enumerate(train):
        if index > 3500:  # 提前退出快速进行val
            break
        inputs, inputs_mask, targets, targets_mask, task, task_mask = to_device(
            device, *batch)
        # answers = inputs
        start_time = time.time()
        step += len(inputs)
        inp = inputs[:, 6:7]
        # output is (b, d1, d2, ..., dn, c) c is onehot logits

        # inputs = random_mask(targets, ARCDataset.WordMap["start_symbol"], ARCDataset.WordMap['pad_symbol'])

        answers = targets.ne(inp).to(torch.long)
        # print(inputs.shape, inputs_mask.shape)
        answers[inputs_mask[:, 6:7].eq(
            False)] = TaskSpecificARCDataset.WordMap['pad_symbol']

        outputs = train_forward(net, inputs, inputs_mask, task, task_mask)
示例#2
0
    for i, dataset_path in dataset_paths:
        datasets.append(
            ContextARCDataset(index=i,
                              dataset_path=Path(dataset_path),
                              method='train'))
    dataset = MixtureDataset(datasets)
    train = DataLoader(dataset,
                       shuffle=True,
                       num_workers=workernum,
                       batch_size=batchSize,
                       collate_fn=ContextARCDataset.collate_fn)
    for index, batch in enumerate(train):
        if index > 3500:  # 提前退出快速进行val
            break
        inputs, ctx_input, targets, ctx_targets, task = to_same_size(
            padding, *to_device(device, *batch))

        start_time = time.time()
        step += len(inputs)

        answers = targets.ne(inputs).to(torch.long)
        answers[inputs.eq(padding)] = padding

        outputs, predict_task = train_forward(
            net, inputs,
            inputs.ne(padding).to(torch.float), ctx_input,
            ctx_input.ne(padding).to(torch.float), ctx_targets,
            ctx_targets.ne(padding).to(torch.float))

        loss = compute_balance_loss(outputs, targets, answers,
                                    padding) + compute_task_loss(