示例#1
0
 def train_dataloader(self):
     with open('Samples/' + self.dataset + '_train_samples.pickle',
               'rb') as handle:
         train = pickle.load(handle)
     train_dataset = SignedPairsDataset(train, get_index_dicts(train))
     return DataLoader(train_dataset,
                       batch_size=128,
                       shuffle=True,
                       num_workers=10,
                       collate_fn=lambda b: train_dataset.collate(
                           b,
                           tcr_encoding=self.tcr_encoding_model,
                           cat_encoding=self.cat_encoding))
示例#2
0
def auc_predict(model, test, train_dicts, peptide=None):
    if peptide:
        test_dataset = SinglePeptideDataset(test,
                                            train_dicts,
                                            peptide,
                                            force_peptide=False)
    else:
        test_dataset = SignedPairsDataset(test, train_dicts)
    # print(test_dataset.data)
    loader = DataLoader(test_dataset,
                        batch_size=64,
                        shuffle=False,
                        num_workers=0,
                        collate_fn=lambda b: test_dataset.collate(
                            b,
                            tcr_encoding=model.tcr_encoding_model,
                            cat_encoding=model.cat_encoding))
    outputs = []
    for batch_idx, batch in enumerate(loader):
        output = model.validation_step(batch, batch_idx)
        if output:
            outputs.append(output)
            # print(output['y'])
    auc = model.validation_end(outputs)['val_auc']
    return auc
示例#3
0
def predict(dataset, test_file):
    model, train_file = get_model(dataset)
    train_dicts = get_train_dicts(train_file)
    test_samples, dataframe = read_input_file(test_file)
    test_dataset = SignedPairsDataset(test_samples, train_dicts)
    batch_size = 1000
    loader = DataLoader(test_dataset,
                        batch_size=batch_size,
                        shuffle=False,
                        collate_fn=lambda b: test_dataset.collate(
                            b,
                            tcr_encoding=model.tcr_encoding_model,
                            cat_encoding=model.cat_encoding))
    outputs = []
    for batch_idx, batch in enumerate(loader):
        output = model.validation_step(batch, batch_idx)
        if output:
            outputs.extend(output['y_hat'].tolist())
    dataframe['Score'] = outputs
    return dataframe