def train_dataloader(self): with open('Samples/' + self.dataset + '_train_samples.pickle', 'rb') as handle: train = pickle.load(handle) train_dataset = SignedPairsDataset(train, get_index_dicts(train)) return DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=10, collate_fn=lambda b: train_dataset.collate( b, tcr_encoding=self.tcr_encoding_model, cat_encoding=self.cat_encoding))
def predict(dataset, test_file): model, train_file = get_model(dataset) train_dicts = get_train_dicts(train_file) test_samples, dataframe = read_input_file(test_file) test_dataset = SignedPairsDataset(test_samples, train_dicts) batch_size = 1000 loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda b: test_dataset.collate( b, tcr_encoding=model.tcr_encoding_model, cat_encoding=model.cat_encoding)) outputs = [] for batch_idx, batch in enumerate(loader): output = model.validation_step(batch, batch_idx) if output: outputs.extend(output['y_hat'].tolist()) dataframe['Score'] = outputs return dataframe