Пример #1
0
 def train_dataloader(self):
     with open('Samples/' + self.dataset + '_train_samples.pickle',
               'rb') as handle:
         train = pickle.load(handle)
     train_dataset = SignedPairsDataset(train, get_index_dicts(train))
     return DataLoader(train_dataset,
                       batch_size=128,
                       shuffle=True,
                       num_workers=10,
                       collate_fn=lambda b: train_dataset.collate(
                           b,
                           tcr_encoding=self.tcr_encoding_model,
                           cat_encoding=self.cat_encoding))
Пример #2
0
def predict(dataset, test_file):
    model, train_file = get_model(dataset)
    train_dicts = get_train_dicts(train_file)
    test_samples, dataframe = read_input_file(test_file)
    test_dataset = SignedPairsDataset(test_samples, train_dicts)
    batch_size = 1000
    loader = DataLoader(test_dataset,
                        batch_size=batch_size,
                        shuffle=False,
                        collate_fn=lambda b: test_dataset.collate(
                            b,
                            tcr_encoding=model.tcr_encoding_model,
                            cat_encoding=model.cat_encoding))
    outputs = []
    for batch_idx, batch in enumerate(loader):
        output = model.validation_step(batch, batch_idx)
        if output:
            outputs.extend(output['y_hat'].tolist())
    dataframe['Score'] = outputs
    return dataframe