def train_dataloader(self): with open('Samples/' + self.dataset + '_train_samples.pickle', 'rb') as handle: train = pickle.load(handle) train_dataset = SignedPairsDataset(train, get_index_dicts(train)) return DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=10, collate_fn=lambda b: train_dataset.collate( b, tcr_encoding=self.tcr_encoding_model, cat_encoding=self.cat_encoding))
def auc_predict(model, test, train_dicts, peptide=None): if peptide: test_dataset = SinglePeptideDataset(test, train_dicts, peptide, force_peptide=False) else: test_dataset = SignedPairsDataset(test, train_dicts) # print(test_dataset.data) loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0, collate_fn=lambda b: test_dataset.collate( b, tcr_encoding=model.tcr_encoding_model, cat_encoding=model.cat_encoding)) outputs = [] for batch_idx, batch in enumerate(loader): output = model.validation_step(batch, batch_idx) if output: outputs.append(output) # print(output['y']) auc = model.validation_end(outputs)['val_auc'] return auc
def predict(dataset, test_file): model, train_file = get_model(dataset) train_dicts = get_train_dicts(train_file) test_samples, dataframe = read_input_file(test_file) test_dataset = SignedPairsDataset(test_samples, train_dicts) batch_size = 1000 loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda b: test_dataset.collate( b, tcr_encoding=model.tcr_encoding_model, cat_encoding=model.cat_encoding)) outputs = [] for batch_idx, batch in enumerate(loader): output = model.validation_step(batch, batch_idx) if output: outputs.extend(output['y_hat'].tolist()) dataframe['Score'] = outputs return dataframe