def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ # handling for MNIST or CIFAR Superpixels if DATASET_NAME == 'MNIST' or DATASET_NAME == 'CIFAR10': return SuperPixDataset(DATASET_NAME) # handling for (ZINC) molecule dataset if DATASET_NAME == 'ZINC': return MoleculeDataset(DATASET_NAME) # handling for the TU Datasets TU_DATASETS = ['COLLAB', 'ENZYMES', 'DD', 'PROTEINS_full'] if DATASET_NAME in TU_DATASETS: return TUsDataset(DATASET_NAME) # handling for SBM datasets SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN'] if DATASET_NAME in SBM_DATASETS: return SBMsDataset(DATASET_NAME) # handling for TSP dataset if DATASET_NAME == 'TSP': return TSPDataset(DATASET_NAME)
def get_multiplicity(DATASET_NAME, first, second, tol, dim, norm, tol_scipy): if DATASET_NAME == 'ZINC': dataset = MoleculeDataset(DATASET_NAME) elif DATASET_NAME == 'SBM_PATTERN': dataset = SBMsDataset(DATASET_NAME) elif DATASET_NAME == 'CIFAR10': dataset = SuperPixDataset(DATASET_NAME) elif DATASET_NAME == 'COLLAB': dataset = COLLABDataset(DATASET_NAME) if DATASET_NAME == 'COLLAB': pass else: train_graphs = dataset.train.graph_lists val_graphs = dataset.val.graph_lists test_graphs = dataset.test.graph_lists train_eigs = [ get_eig_val(g, pos_enc_dim=dim, norm=norm, tol=tol_scipy) for g in train_graphs ] val_eigs = [ get_eig_val(g, pos_enc_dim=dim, norm=norm, tol=tol_scipy) for g in val_graphs ] test_eigs = [ get_eig_val(g, pos_enc_dim=dim, norm=norm, tol=tol_scipy) for g in test_graphs ] eigs = train_eigs + val_eigs + test_eigs i = 0 n = len(eigs) for eig in eigs: if abs(eig[first] - eig[second]) > tol: i += 1 return i / n, i, n
def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ # handling for MNIST or CIFAR Superpixels if DATASET_NAME == 'MNIST' or DATASET_NAME == 'CIFAR10': return SuperPixDataset(DATASET_NAME)
def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ # handling for MNIST or CIFAR Superpixels if DATASET_NAME == 'MNIST' or DATASET_NAME == 'CIFAR10': return SuperPixDataset(DATASET_NAME) # handling for SBM datasets SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN'] if DATASET_NAME in SBM_DATASETS: return SBMsDataset(DATASET_NAME)
def load_data(args): if args.data in ['ZINC']: return MoleculeDataset(args.data) elif args.data in ['QM9']: return QM9Dataset(args.data, args.extra) elif args.data in ['TSP']: return TSPDataset(args.data) elif args.data in ['MNIST', 'CIFAR10']: return SuperPixDataset(args.data) elif args.data in ['SBM_CLUSTER', 'SBM_PATTERN']: return SBMsDataset(args.data) elif args.data in ['Cora']: return CoraDataset(args.data) else: raise Exception('Unknown dataset!')
def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ toxic_reps = [ 'PBT_Repn1', 'PBT_Repn2', 'PBT_Repn3', 'PBT_Repn4', 'PBT_Rep1', 'PBT_Rep2', 'PBT_Rep3', 'PBT_Rep4', 'CMR_Rep1', 'CMR_Rep2', 'CMR_Rep3', 'CMR_Rep4' ] # handling for (TOX) molecule dataset if DATASET_NAME in toxic_reps: return TOXDataset(DATASET_NAME) # handling for MNIST or CIFAR Superpixels if DATASET_NAME == 'MNIST' or DATASET_NAME == 'CIFAR10': return SuperPixDataset(DATASET_NAME) # handling for (ZINC) molecule dataset if DATASET_NAME == 'ZINC': return MoleculeDataset(DATASET_NAME) # handling for the TU Datasets TU_DATASETS = ['COLLAB', 'ENZYMES', 'DD', 'PROTEINS_full'] if DATASET_NAME in TU_DATASETS: return TUsDataset(DATASET_NAME) # handling for SBM datasets SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN'] if DATASET_NAME in SBM_DATASETS: return SBMsDataset(DATASET_NAME) # handling for TSP dataset if DATASET_NAME == 'TSP': return TSPDataset(DATASET_NAME) # handling for the CITATIONGRAPHS Datasets CITATIONGRAPHS_DATASETS = ['CORA', 'CITESEER', 'PUBMED'] if DATASET_NAME in CITATIONGRAPHS_DATASETS: return CitationGraphsDataset(DATASET_NAME)
def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ # handling for MNIST or CIFAR Superpixels if DATASET_NAME == 'MNIST' or DATASET_NAME == 'CIFAR10': return SuperPixDataset(DATASET_NAME) # handling for (ZINC) molecule dataset if DATASET_NAME == 'ZINC': return MoleculeDataset(DATASET_NAME) # handling for SBM datasets SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN'] if DATASET_NAME in SBM_DATASETS: return SBMsDataset(DATASET_NAME) # handling for the SGS (Synthetic Graph Spectrum) Dataset SGS_DATASETS = ['SGS_HIGH_PASS', 'SGS_BAND_PASS', 'SGS_LOW_PASS'] if DATASET_NAME in SGS_DATASETS: return SGSDataset(DATASET_NAME)
def main(): """ USER CONTROLS """ parser = argparse.ArgumentParser() parser.add_argument('--config', help="Please give a config.json file with training/model/data/param details") parser.add_argument('--gpu_id', help="Please give a value for gpu id") parser.add_argument('--model', help="Please give a value for model name") parser.add_argument('--dataset', help="Please give a value for dataset name") parser.add_argument('--out_dir', help="Please give a value for out_dir") parser.add_argument('--seed', help="Please give a value for seed") parser.add_argument('--epochs', help="Please give a value for epochs") parser.add_argument('--batch_size', help="Please give a value for batch_size") parser.add_argument('--init_lr', help="Please give a value for init_lr") parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor") parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience") parser.add_argument('--min_lr', help="Please give a value for min_lr") parser.add_argument('--weight_decay', help="Please give a value for weight_decay") parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval") parser.add_argument('--L', help="Please give a value for L") parser.add_argument('--hidden_dim', help="Please give a value for hidden_dim") parser.add_argument('--out_dim', help="Please give a value for out_dim") parser.add_argument('--residual', help="Please give a value for residual") parser.add_argument('--JK', default='last', help='Jumping Knowledge') parser.add_argument('--edge_feat', help="Please give a value for edge_feat") parser.add_argument('--readout', help="Please give a value for readout") parser.add_argument('--kernel', help="Please give a value for kernel") parser.add_argument('--n_heads', help="Please give a value for n_heads") parser.add_argument('--gated', help="Please give a value for gated") parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout") parser.add_argument('--dropout', help="Please give a value for dropout") parser.add_argument('--graph_norm', help="Please give a value for graph_norm") parser.add_argument('--batch_norm', help="Please give a value for batch_norm") parser.add_argument('--sage_aggregator', help="Please give a value for sage_aggregator") parser.add_argument('--data_mode', help="Please give a value for data_mode") parser.add_argument('--num_pool', help="Please give a value for num_pool") parser.add_argument('--gnn_per_block', help="Please give a value for gnn_per_block") parser.add_argument('--embedding_dim', help="Please give a value for embedding_dim") parser.add_argument('--pool_ratio', help="Please give a value for pool_ratio") parser.add_argument('--linkpred', help="Please give a value for linkpred") parser.add_argument('--cat', help="Please give a value for cat") parser.add_argument('--self_loop', help="Please give a value for self_loop") parser.add_argument('--max_time', help="Please give a value for max_time") parser.add_argument('--expid', help='Experiment id.') parser.add_argument('--type_net', default='simple', help='Type of net') parser.add_argument('--augmentation', type=float, default=0., help='Dynamically augmenting with rotations, angle in degrees') parser.add_argument('--distortion', type=float, default=0., help='Distortion of the vector field') parser.add_argument('--proportion', type=float, default=1., help='Proportion of the dataset to use') parser.add_argument('--flip', action='store_true', default=False, help='Flip x-axis') # eig params parser.add_argument('--coord_eig', action='store_true', default=False, help='Having the coord. weights') parser.add_argument('--aggregators', type=str, help='Aggregators to use.') parser.add_argument('--scalers', type=str, help='Scalers to use.') parser.add_argument('--NN_eig', action='store_true', default=False, help='NN eig aggr.') parser.add_argument('--towers', type=int, help='Towers to use.') parser.add_argument('--divide_input_first', type=bool, help='Whether to divide the input in first layers.') parser.add_argument('--divide_input_last', type=bool, help='Whether to divide the input in last layer.') parser.add_argument('--gru', type=bool, help='Whether to use gru.') parser.add_argument('--edge_dim', type=int, help='Size of edge embeddings.') parser.add_argument('--pretrans_layers', type=int, help='pretrans_layers.') parser.add_argument('--posttrans_layers', type=int, help='posttrans_layers.') parser.add_argument('--not_pre', action='store_true', default=False, help='Not applying pre-transformation') args = parser.parse_args() with open(args.config) as f: config = json.load(f) # device if args.gpu_id is not None: config['gpu']['id'] = int(args.gpu_id) config['gpu']['use'] = True device = gpu_setup(config['gpu']['use'], config['gpu']['id']) # model, dataset, out_dir if args.model is not None: MODEL_NAME = args.model else: MODEL_NAME = config['model'] if args.dataset is not None: DATASET_NAME = args.dataset else: DATASET_NAME = config['dataset'] dataset = SuperPixDataset(DATASET_NAME, args.coord_eig, proportion=args.proportion) if args.out_dir is not None: out_dir = args.out_dir else: out_dir = config['out_dir'] # parameters params = config['params'] if args.seed is not None: params['seed'] = int(args.seed) if args.epochs is not None: params['epochs'] = int(args.epochs) if args.batch_size is not None: params['batch_size'] = int(args.batch_size) if args.init_lr is not None: params['init_lr'] = float(args.init_lr) if args.lr_reduce_factor is not None: params['lr_reduce_factor'] = float(args.lr_reduce_factor) if args.lr_schedule_patience is not None: params['lr_schedule_patience'] = int(args.lr_schedule_patience) if args.min_lr is not None: params['min_lr'] = float(args.min_lr) if args.weight_decay is not None: params['weight_decay'] = float(args.weight_decay) if args.print_epoch_interval is not None: params['print_epoch_interval'] = int(args.print_epoch_interval) if args.max_time is not None: params['max_time'] = float(args.max_time) # network parameters net_params = config['net_params'] net_params['device'] = device net_params['gpu_id'] = config['gpu']['id'] net_params['batch_size'] = params['batch_size'] if args.L is not None: net_params['L'] = int(args.L) if args.hidden_dim is not None: net_params['hidden_dim'] = int(args.hidden_dim) if args.out_dim is not None: net_params['out_dim'] = int(args.out_dim) if args.residual is not None: net_params['residual'] = True if args.residual == 'True' else False if args.JK is not None: net_params['JK'] = args.JK if args.edge_feat is not None: net_params['edge_feat'] = True if args.edge_feat == 'True' else False if args.readout is not None: net_params['readout'] = args.readout if args.kernel is not None: net_params['kernel'] = int(args.kernel) if args.n_heads is not None: net_params['n_heads'] = int(args.n_heads) if args.gated is not None: net_params['gated'] = True if args.gated == 'True' else False if args.in_feat_dropout is not None: net_params['in_feat_dropout'] = float(args.in_feat_dropout) if args.dropout is not None: net_params['dropout'] = float(args.dropout) if args.graph_norm is not None: net_params['graph_norm'] = True if args.graph_norm == 'True' else False if args.batch_norm is not None: net_params['batch_norm'] = True if args.batch_norm == 'True' else False if args.sage_aggregator is not None: net_params['sage_aggregator'] = args.sage_aggregator if args.data_mode is not None: net_params['data_mode'] = args.data_mode if args.num_pool is not None: net_params['num_pool'] = int(args.num_pool) if args.gnn_per_block is not None: net_params['gnn_per_block'] = int(args.gnn_per_block) if args.embedding_dim is not None: net_params['embedding_dim'] = int(args.embedding_dim) if args.pool_ratio is not None: net_params['pool_ratio'] = float(args.pool_ratio) if args.linkpred is not None: net_params['linkpred'] = True if args.linkpred == 'True' else False if args.cat is not None: net_params['cat'] = True if args.cat == 'True' else False if args.self_loop is not None: net_params['self_loop'] = True if args.self_loop == 'True' else False if args.aggregators is not None: net_params['aggregators'] = args.aggregators if args.scalers is not None: net_params['scalers'] = args.scalers if args.towers is not None: net_params['towers'] = args.towers if args.divide_input_first is not None: net_params['divide_input_first'] = args.divide_input_first if args.divide_input_last is not None: net_params['divide_input_last'] = args.divide_input_last if args.NN_eig is not None: net_params['NN_eig'] = args.NN_eig if args.gru is not None: net_params['gru'] = args.gru if args.edge_dim is not None: net_params['edge_dim'] = args.edge_dim if args.pretrans_layers is not None: net_params['pretrans_layers'] = args.pretrans_layers if args.posttrans_layers is not None: net_params['posttrans_layers'] = args.posttrans_layers if args.not_pre is not None: net_params['not_pre'] = args.not_pre if args.type_net is not None: net_params['type_net'] = args.type_net if args.augmentation is not None: net_params['augmentation'] = args.augmentation if args.distortion is not None: net_params['distortion'] = args.distortion if args.flip is not None: net_params['flip'] = args.flip # Superpixels net_params['in_dim'] = dataset.train[0][0].ndata['feat'][0].size(0) net_params['in_dim_edge'] = dataset.train[0][0].edata['feat'][0].size(0) num_classes = len(np.unique(np.array(dataset.train[:][1]))) net_params['n_classes'] = num_classes D = torch.cat([torch.sparse.sum(g.adjacency_matrix(transpose=True), dim=-1).to_dense() for g in dataset.train.graph_lists]) net_params['avg_d'] = dict(lin=torch.mean(D), exp=torch.mean(torch.exp(torch.div(1, D)) - 1), log=torch.mean(torch.log(D + 1))) root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file if not os.path.exists(out_dir + 'results'): os.makedirs(out_dir + 'results') if not os.path.exists(out_dir + 'configs'): os.makedirs(out_dir + 'configs') net_params['total_param'] = view_model_param(MODEL_NAME, net_params) train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs)
graph_sizes.min().long().item(), graph_sizes.max().long().item())) pass plot_histo_graphs(dataset.train, 'trainset') plot_histo_graphs(dataset.val, 'valset') plot_histo_graphs(dataset.test, 'testset') Tools.print(len(dataset.train)) Tools.print(len(dataset.val)) Tools.print(len(dataset.test)) Tools.print(dataset.train[0]) Tools.print(dataset.val[0]) Tools.print(dataset.test[0]) ################################################################################# now_dataset_name = 'MNIST' now_data_file = "D:\data\GCN\{}.pkl".format(now_dataset_name) dataset = SuperPixDataset(now_dataset_name, data_file=now_data_file) # 54s trainset, valset, testset = dataset.train, dataset.val, dataset.test ################################################################################# start = time.time() batch_size = 10 train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=SuperPixDataset.collate) Tools.print('Time (sec):', time.time() - start) # 0.0003s
def main(): """ USER CONTROLS """ parser = argparse.ArgumentParser() parser.add_argument( '--config', help= "Please give a config.json file with training/model/data/param details" ) parser.add_argument('--gpu_id', help="Please give a value for gpu id") parser.add_argument('--model', help="Please give a value for model name") parser.add_argument('--dataset', help="Please give a value for dataset name") parser.add_argument('--seed', help="Please give a value for seed") parser.add_argument('--epochs', help="Please give a value for epochs") parser.add_argument('--batch_size', help="Please give a value for batch_size") parser.add_argument('--init_lr', help="Please give a value for init_lr") parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor") parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience") parser.add_argument('--min_lr', help="Please give a value for min_lr") parser.add_argument('--weight_decay', help="Please give a value for weight_decay") parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval") parser.add_argument('--L', help="Please give a value for L") parser.add_argument('--hidden_dim', help="Please give a value for hidden_dim") parser.add_argument('--out_dim', help="Please give a value for out_dim") parser.add_argument('--residual', help="Please give a value for residual") parser.add_argument('--edge_feat', help="Please give a value for edge_feat") parser.add_argument('--readout', help="Please give a value for readout") parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout") parser.add_argument('--dropout', help="Please give a value for dropout") parser.add_argument('--graph_norm', help="Please give a value for graph_norm") parser.add_argument('--batch_norm', help="Please give a value for batch_norm") parser.add_argument('--max_time', help="Please give a value for max_time") parser.add_argument('--expid', help='Experiment id.') parser.add_argument('--type_net', default='simple', help='Type of net') parser.add_argument('--lap_norm', default='none', help='Laplacian normalisation') parser.add_argument( '--augmentation', type=float, default=0., help='Dynamically augmenting with rotations, angle in degrees') parser.add_argument('--distortion', type=float, default=0., help='Distortion of the vector field') parser.add_argument('--proportion', type=float, default=1., help='Proportion of the dataset to use') parser.add_argument('--flip', action='store_true', default=False, help='Flip x-axis') # eig params parser.add_argument('--coord_eig', action='store_true', default=False, help='Having the coord. weights') parser.add_argument('--aggregators', type=str, help='Aggregators to use.') parser.add_argument('--scalers', type=str, help='Scalers to use.') parser.add_argument('--towers', type=int, help='Towers to use.') parser.add_argument('--divide_input_first', type=bool, help='Whether to divide the input in first layers.') parser.add_argument('--divide_input_last', type=bool, help='Whether to divide the input in last layer.') parser.add_argument('--edge_dim', type=int, help='Size of edge embeddings.') parser.add_argument('--pretrans_layers', type=int, help='pretrans_layers.') parser.add_argument('--posttrans_layers', type=int, help='posttrans_layers.') args = parser.parse_args() with open(args.config) as f: config = json.load(f) # device if args.gpu_id is not None: config['gpu']['id'] = int(args.gpu_id) config['gpu']['use'] = True device = gpu_setup(config['gpu']['use'], config['gpu']['id']) # dataset if args.dataset is not None: DATASET_NAME = args.dataset else: DATASET_NAME = config['dataset'] dataset = SuperPixDataset(DATASET_NAME, coord_eig=args.coord_eig, proportion=args.proportion) # parameters params = config['params'] if args.seed is not None: params['seed'] = int(args.seed) if args.epochs is not None: params['epochs'] = int(args.epochs) if args.batch_size is not None: params['batch_size'] = int(args.batch_size) if args.init_lr is not None: params['init_lr'] = float(args.init_lr) if args.lr_reduce_factor is not None: params['lr_reduce_factor'] = float(args.lr_reduce_factor) if args.lr_schedule_patience is not None: params['lr_schedule_patience'] = int(args.lr_schedule_patience) if args.min_lr is not None: params['min_lr'] = float(args.min_lr) if args.weight_decay is not None: params['weight_decay'] = float(args.weight_decay) if args.print_epoch_interval is not None: params['print_epoch_interval'] = int(args.print_epoch_interval) if args.max_time is not None: params['max_time'] = float(args.max_time) # network parameters net_params = config['net_params'] net_params['device'] = device net_params['gpu_id'] = config['gpu']['id'] net_params['batch_size'] = params['batch_size'] if args.L is not None: net_params['L'] = int(args.L) if args.hidden_dim is not None: net_params['hidden_dim'] = int(args.hidden_dim) if args.out_dim is not None: net_params['out_dim'] = int(args.out_dim) if args.residual is not None: net_params['residual'] = True if args.residual == 'True' else False if args.edge_feat is not None: net_params['edge_feat'] = True if args.edge_feat == 'True' else False if args.readout is not None: net_params['readout'] = args.readout if args.in_feat_dropout is not None: net_params['in_feat_dropout'] = float(args.in_feat_dropout) if args.dropout is not None: net_params['dropout'] = float(args.dropout) if args.graph_norm is not None: net_params['graph_norm'] = True if args.graph_norm == 'True' else False if args.batch_norm is not None: net_params['batch_norm'] = True if args.batch_norm == 'True' else False if args.aggregators is not None: net_params['aggregators'] = args.aggregators if args.scalers is not None: net_params['scalers'] = args.scalers if args.towers is not None: net_params['towers'] = args.towers if args.divide_input_first is not None: net_params['divide_input_first'] = args.divide_input_first if args.divide_input_last is not None: net_params['divide_input_last'] = args.divide_input_last if args.edge_dim is not None: net_params['edge_dim'] = args.edge_dim if args.pretrans_layers is not None: net_params['pretrans_layers'] = args.pretrans_layers if args.posttrans_layers is not None: net_params['posttrans_layers'] = args.posttrans_layers if args.type_net is not None: net_params['type_net'] = args.type_net if args.distortion is not None: net_params['distortion'] = args.distortion if args.augmentation is not None: net_params['augmentation'] = args.augmentation if args.flip is not None: net_params['flip'] = args.flip # Superpixels net_params['in_dim'] = dataset.train[0][0].ndata['feat'][0].size(0) net_params['in_dim_edge'] = dataset.train[0][0].edata['feat'][0].size(0) num_classes = len(np.unique(np.array(dataset.train[:][1]))) net_params['n_classes'] = num_classes # calculate logarithmic average degree for scalers D = torch.cat([ torch.sparse.sum(g.adjacency_matrix(transpose=True), dim=-1).to_dense() for g in dataset.train.graph_lists ]) net_params['avg_d'] = dict(lin=torch.mean(D), exp=torch.mean(torch.exp(torch.div(1, D)) - 1), log=torch.mean(torch.log(D + 1))) net_params['total_param'] = view_model_param(net_params) train_val_pipeline(dataset, params, net_params)