def get_activator_web(dev_split, test_split): data_name = "Web" ext_train = ExternalData(WebAllExternalDataParams()) ds = BilinearDataset(WebDatasetAllParams(), external_data=ext_train) activator_params = WebBilinearActivatorParams() activator_params.TEST_SPLIT = test_split activator_params.DEV_SPLIT = dev_split module_params = WebLayeredBilinearModuleParams( ftr_len=ds.len_features, embed_vocab_dim=ext_train.len_embed()) return data_name, BilinearMultiClassActivator( LayeredBilinearModuleGPU(module_params), activator_params, ds)
def get_activator_protein(dev_split, test_split, topological_ftrs=True): data_name = "Protein" ext_train = ExternalData(ProteinAllExternalDataParams()) ds_params = ProteinDatasetAllParams() if not topological_ftrs: ds_params.FEATURES = [] ds = BilinearDataset(ds_params, external_data=ext_train) activator_params = ProteinBilinearActivatorParams() activator_params.TEST_SPLIT = test_split activator_params.DEV_SPLIT = dev_split module_params = ProteinLayeredBilinearModuleParams( ftr_len=ds.len_features, embed_vocab_dim=ext_train.len_embed()) return data_name, BilinearMultiClassActivator( LayeredBilinearModule(module_params), activator_params, ds)
def run_trial(params, dataset_param_class, module_param_class, activator_param_class, ext_data, is_multi_class): ds_params = dataset_param_class() ds_params.FEATURES = [globals()[ftr] for ftr in params['input_vec']] dataset = BilinearDataset(ds_params, external_data=ext_data) # model layers = [] if params['layers_config']["_name"] == "2_layers": layers.append([None, int(params['layers_config']["h1_dim"])]) layers.append([ int(params['layers_config']["h1_dim"]), int(params['layers_config']["h2_dim"]) ]) elif params['layers_config']["_name"] == "3_layers": layers.append([None, int(params['layers_config']["h1_dim"])]) layers.append([ int(params['layers_config']["h1_dim"]), int(params['layers_config']["h2_dim"]) ]) layers.append([ int(params['layers_config']["h2_dim"]), int(params['layers_config']["h3_dim"]) ]) model_params = module_param_class(ftr_len=dataset.len_features, layer_dim=layers, embed_vocab_dim=ext_data.len_embed()) model_params.DROPOUT = params['dropout'] model_params.WEIGHT_DECAY = params['regularization'] model_params.LR = params['learning_rate'] model_params.OPTIMIZER = globals()[params['optimizer']] # activator activator_params = activator_param_class() activator_params.BATCH_SIZE = params['batch_size'] activator_params.EPOCHS = params['epochs'] model = LayeredBilinearModule(model_params) activator = BilinearMultiClassActivator(model, activator_params, dataset, nni=True) if is_multi_class else \ BilinearActivator(model, activator_params, dataset, nni=True) activator.train(show_plot=False, early_stop=True)
def _all_configurations(self): """ set grid parameters here """ data_split = [1] input_vec = [[DEG, CENTRALITY, BFS]] batch_size = [16] optimizer = [Adam, SGD] lrs = [1e-3, 1e-1, 1] dropout = [0, 0.1, 0.15] regularization = [0, 1e-2, 1e-3, 1e-4] layers_config = [[[None, 50], [50, 25]], [[None, 100], [100, 50], [50, 25]]] if self._layers is None else self._layers configurations = list(product(*[data_split, optimizer, lrs, dropout, regularization, input_vec, layers_config, batch_size])) # prepare param objects for split, optimizer, lr, dropout, regularization, input_vec, layers_config, batch_size in configurations: for _ in range(2): # str for configuration config_str = "|".join([str(split), str(optimizer), str(lr), str(dropout), str(regularization), str(input_vec), str(layers_config), str(batch_size)]) # dataset ds_params = self._dataset_param_class() ds_params.FEATURES = input_vec ds_params.PERCENTAGE = split dataset = BilinearDataset(ds_params, external_data=self._ext_data) # model model_params = self._module_param_class(ftr_len=dataset.len_features, layer_dim=layers_config, embed_vocab_dim=self._ext_data.len_embed()) model_params.DROPOUT = dropout model_params.WEIGHT_DECAY = regularization model_params.LR = lr model_params.OPTIMIZER = optimizer # activator activator_params = self._activator_param_class() activator_params.BATCH_SIZE = batch_size yield dataset, model_params, activator_params, config_str
class AidsBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.1153 self.TEST_SPLIT = 0.538 self.LOSS = functional.binary_cross_entropy_with_logits # f.factor_loss # self.BATCH_SIZE = 128 self.EPOCHS = 250 self.DATASET = "Aids" if __name__ == '__main__': ALL = True if ALL == True: ext_train = ExternalData(AidsAllExternalDataParams()) aids_train_ds = BilinearDataset(AidsDatasetAllParams(), external_data=ext_train) activator = BilinearActivator( LayeredBilinearModule( AidsLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), AidsBilinearActivatorParams(), aids_train_ds) activator.train() if ALL == False: ext_train = ExternalData(AidsTrainExternalDataParams()) ext_dev = ExternalData(AidsDevExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict) ext_test = ExternalData(AidsTestExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict)
self.EPOCHS = 300 self.DATASET = "Protein - MultiClass" if __name__ == '__main__': from bilinear_model import LayeredBilinearModule from dataset.dataset_model import BilinearDataset from dataset.dataset_external_data import ExternalData from multi_class_bilinear_activator import BilinearMultiClassActivator t = time.time() ALL = True if ALL == True: ext_train = ExternalData(ProteinAllExternalDataParams()) aids_train_ds = BilinearDataset(ProteinDatasetAllParams(), external_data=ext_train) activator = BilinearMultiClassActivator( LayeredBilinearModule( ProteinLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), ProteinBilinearActivatorParams(), aids_train_ds) activator.train() if ALL == False: ext_train = ExternalData(ProteinTrainExternalDataParams()) ext_dev = ExternalData(ProteinDevExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict) ext_test = ExternalData(ProteinTestExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict)
# calculate total loss loss_count += self._loss_func(output.squeeze(dim=1), l) true_labels += l.tolist() pred_labels += output.squeeze(dim=1).argmax(dim=1).tolist() pred_auc_labels += output.squeeze(dim=1).tolist() # update loss accuracy loss = float(loss_count / len(data_loader)) # pred_labels = [0 if np.isnan(i) else i for i in pred_labels] self._update_loss(loss, job=job) self._update_accuracy(pred_labels, true_labels, job=job) # self._update_auc(pred_auc_labels, true_labels, job=job) ##### TODO AUC check return loss if __name__ == '__main__': ds = BilinearDataset(YanivDatasetParams()) activator = BilinearActivator( LayeredBilinearModule( LayeredBilinearModuleParams(ftr_len=ds.len_features)), BilinearActivatorParams(), BilinearDataset(YanivDatasetParams())) # protein_train_ds = BilinearDataset(ProteinDatasetTrainParams()) # protein_dev_ds = BilinearDataset(ProteinDatasetDevParams()) # protein_test_ds = BilinearDataset(ProteinDatasetTestParams()) # _activator = BilinearActivator(LayeredBilinearModule(LayeredBilinearModuleParams( # ftr_len=protein_train_ds.len_features)), BilinearActivatorParams(), protein_train_ds, # dev_data=protein_dev_ds, test_data=protein_test_ds) activator.train()
class MutagenBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.125 self.TEST_SPLIT = 0.75 self.LOSS = functional.binary_cross_entropy_with_logits # f.factor_loss # self.BATCH_SIZE = 16 self.EPOCHS = 400 self.DATASET = "Mutagen" if __name__ == '__main__': ALL = True if ALL == True: ext_train = ExternalData(MutagenAllExternalDataParams()) aids_train_ds = BilinearDataset(MutagenDatasetAllParams(), external_data=ext_train) activator = BilinearActivator( LayeredBilinearModule( MutagenLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), MutagenBilinearActivatorParams(), aids_train_ds) activator.train() if ALL == False: ext_train = ExternalData(MutagenTrainExternalDataParams()) ext_dev = ExternalData(MutagenDevExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict) ext_test = ExternalData(MutagenTestExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict)
x0 = torch.cat([x0] + list_embed, dim=2) x1 = x0 self._sync() for i in range(self._num_layers): x1 = self._linear_layers[i](A, x1) x2 = self._bilinear_layer(A, x0, x1) return x2 if __name__ == "__main__": from dataset.datset_sampler import ImbalancedDatasetSampler from params.aids_params import AidsAllExternalDataParams, AidsDatasetAllParams from dataset.dataset_external_data import ExternalData from dataset.dataset_model import BilinearDataset ext_train = ExternalData(AidsAllExternalDataParams()) ds = BilinearDataset(AidsDatasetAllParams(), external_data=ext_train) dl = DataLoader(dataset=ds, collate_fn=ds.collate_fn, batch_size=64, sampler=ImbalancedDatasetSampler(ds)) m_params = LayeredBilinearModuleParams( ftr_len=ds.len_features, embed_vocab_dim=ext_train.len_embed()) m_params.EMBED_DIMS = [20, 20] module = LayeredBilinearModule(m_params) # module = BilinearModule(BilinearModuleParams()) for i, (_A, _D, _x0, _l) in enumerate(dl): _x2 = module(_A, _D, _x0) e = 0
dropout=self.DROPOUT) ] self.BILINEAR_PARAMS = GrecBilinearLayerParams( self.LINEAR_PARAMS_LIST[self.NUM_LAYERS - 1].COL_DIM, self.LINEAR_PARAMS_LIST[0].ROW_DIM) class GrecBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.15 self.TEST_SPLIT = 0.15 self.LOSS = cross_entropy # f.factor_loss # self.BATCH_SIZE = 16 self.EPOCHS = 500 self.DATASET = "GREC - MultiClass" if __name__ == '__main__': ext_train = ExternalData(GrecAllExternalDataParams()) aids_train_ds = BilinearDataset(GrecDatasetAllParams(), external_data=ext_train) activator = BilinearMultiClassActivator( LayeredBilinearModule( GrecLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), GrecBilinearActivatorParams(), aids_train_ds) activator.train()
if self._gpu: A, x0, embed, l = A.cuda(), x0.cuda(), embed.cuda(), l.cuda() # print progress if not self._nni: self._print_progress(batch_index, len_data, job=VALIDATE_JOB) output = self._model(A, x0, embed) # calculate total loss loss_count += self._loss_func(output.squeeze(dim=1).squeeze(dim=1), l.float()) true_labels += l.tolist() pred += output.squeeze(dim=1).squeeze(dim=1).tolist() # update loss accuracy loss = float(loss_count / len(data_loader)) # pred_labels = [0 if np.isnan(i) else i for i in pred_labels] self._update_loss(loss, job=job) self._update_accuracy(pred, true_labels, job=job) self._update_auc(pred, true_labels, job=job) return loss if __name__ == '__main__': from params.aids_params import AidsDatasetTrainParams, AidsDatasetDevParams, AidsDatasetTestParams aids_train_ds = BilinearDataset(AidsDatasetTrainParams()) aids_dev_ds = BilinearDataset(AidsDatasetDevParams()) aids_test_ds = BilinearDataset(AidsDatasetTestParams()) activator = BilinearActivator( LayeredBilinearModule(LayeredBilinearModuleParams(ftr_len=aids_train_ds.len_features)), BilinearActivatorParams(), aids_train_ds, dev_data=aids_dev_ds, test_data=aids_test_ds) activator.train()
dropout=self.DROPOUT), YanivLinearLayerParams(in_dim=50, out_dim=10, dropout=self.DROPOUT), YanivLinearLayerParams(in_dim=50, out_dim=10, dropout=self.DROPOUT), YanivLinearLayerParams(in_dim=200, out_dim=1, dropout=self.DROPOUT) ] self.BILINEAR_PARAMS = YanivBilinearLayerParams( self.LINEAR_PARAMS_LIST[self.NUM_LAYERS - 1].COL_DIM, self.LINEAR_PARAMS_LIST[0].ROW_DIM) class YanivBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.15 self.TEST_SPLIT = 0.15 self.LOSS = functional.binary_cross_entropy_with_logits # f.factor_loss # self.BATCH_SIZE = 16 self.EPOCHS = 100 if __name__ == '__main__': refael_train_ds = BilinearDataset(YanivDatasetParams()) activator = BilinearActivator( LayeredBilinearModule( YanivLayeredBilinearModuleParams( ftr_len=refael_train_ds.len_features)), YanivBilinearActivatorParams(), refael_train_ds) activator.train()
dropout=self.DROPOUT) ] self.BILINEAR_PARAMS = CoilRagBilinearLayerParams( self.LINEAR_PARAMS_LIST[self.NUM_LAYERS - 1].COL_DIM, self.LINEAR_PARAMS_LIST[0].ROW_DIM) class CoilRagBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.15 self.TEST_SPLIT = 0.15 self.LOSS = cross_entropy # f.factor_loss # self.BATCH_SIZE = 16 self.EPOCHS = 500 self.DATASET = "COIL_RAG - MultiClass" if __name__ == '__main__': ext_train = ExternalData(CoilRagAllExternalDataParams()) aids_train_ds = BilinearDataset(CoilRagDatasetAllParams(), external_data=ext_train) activator = BilinearMultiClassActivator( LayeredBilinearModule( CoilRagLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), CoilRagBilinearActivatorParams(), aids_train_ds) activator.train()