datasets = [] for i, dataset_path in dataset_paths: datasets.append( TaskSpecificARCDataset(index=i, dataset_path=Path(dataset_path), method='train')) dataset = MixtureDataset(datasets) train = DataLoader(dataset, shuffle=True, num_workers=workernum, batch_size=batchSize, collate_fn=TaskSpecificARCDataset.collate_fn) for index, batch in enumerate(train): if index > 3500: # 提前退出快速进行val break inputs, inputs_mask, targets, targets_mask, task, task_mask = to_device( device, *batch) # answers = inputs start_time = time.time() step += len(inputs) inp = inputs[:, 6:7] # output is (b, d1, d2, ..., dn, c) c is onehot logits # inputs = random_mask(targets, ARCDataset.WordMap["start_symbol"], ARCDataset.WordMap['pad_symbol']) answers = targets.ne(inp).to(torch.long) # print(inputs.shape, inputs_mask.shape) answers[inputs_mask[:, 6:7].eq( False)] = TaskSpecificARCDataset.WordMap['pad_symbol'] outputs = train_forward(net, inputs, inputs_mask, task, task_mask)
for i, dataset_path in dataset_paths: datasets.append( ContextARCDataset(index=i, dataset_path=Path(dataset_path), method='train')) dataset = MixtureDataset(datasets) train = DataLoader(dataset, shuffle=True, num_workers=workernum, batch_size=batchSize, collate_fn=ContextARCDataset.collate_fn) for index, batch in enumerate(train): if index > 3500: # 提前退出快速进行val break inputs, ctx_input, targets, ctx_targets, task = to_same_size( padding, *to_device(device, *batch)) start_time = time.time() step += len(inputs) answers = targets.ne(inputs).to(torch.long) answers[inputs.eq(padding)] = padding outputs, predict_task = train_forward( net, inputs, inputs.ne(padding).to(torch.float), ctx_input, ctx_input.ne(padding).to(torch.float), ctx_targets, ctx_targets.ne(padding).to(torch.float)) loss = compute_balance_loss(outputs, targets, answers, padding) + compute_task_loss(