def create_model(dataset: str, model_config: dict, adaptor_config: dict, device): supports = load_graph_data(dataset, 'doubletransition') supports = torch.tensor(list(map(sp.coo_matrix.toarray, supports)), dtype=torch.float32, device=device) edge_dim = supports.size(0) adaptor = STAdaptor(supports, **adaptor_config) predictor = Ours(edge_dim=edge_dim, **model_config) return Model(predictor, adaptor)
parser.add_argument('--sub_dataname', type=str, help='subdata name.', default = 'DE') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # Load data g, n_classes = load_graph_data(args.dataset) features = g.ndata.pop('features') labels = g.ndata.pop('labels') num_class = labels.max()+1 if args.cuda: features = features.cuda() #adj = adj.cuda() labels = labels.cuda() #idx_train = idx_train.cuda() #idx_val = idx_val.cuda() #idx_test = idx_test.cuda() def test_sage(model, idx_train, idx_val, idx_test):
type_num_dict = {"category": 0, "product": 1, "user": 2} num_type_dict = {0: "category", 1: "product", 2: "user"} elif dataset_str == "yelp": edge_types_strings = ["business_category", "business_user", "user_user"] type_num_dict = {"business": 0, "category": 1, "user": 2} num_type_dict = {0: "business", 1: "category", 2: "user"} edge_types = [] for et in edge_types_strings: (i, j) = et.split("_") edge_types.append((type_num_dict[i], type_num_dict[j])) if i != j: edge_types.append((type_num_dict[j], type_num_dict[i])) # Load data G = load_graph_data(graph_path) adjs_orig = get_edge_adj_matrices(G, {et: None for et in edge_types_strings}) # # get adjajcency matrices for subgraphs adj_orig = nx.to_scipy_sparse_matrix(G) adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() p = dataset_path + "random_splits/" + edge_type + "/random" + str( random_seed) + "/" G_train, test_positive_e, test_negative_e, val_positive_e, val_negative_e, train_edges = read_split( G, edge_type.split("_"), random_seed, p) t0 = time.time()
def main(config): # set logger if not os.path.exists(os.path.join(config.base_dir, config.log_dir)): os.mkdir(os.path.join(config.base_dir, config.log_dir)) logger = utils.get_logger(os.path.join(config.base_dir, config.log_dir), "test", log_filename=config.graph_name + ".log") # load dataset #graph_pkl_filename = '../dat/adj_mx.pkl' #graph_pkl_filename = os.path.join(config['base_dir'], config['data']['graph_pkl_filename']) graph_pkl_filename = os.path.join(config.base_dir, config.dataset_dir, config.graph_pkl_filename) _, _, adj_mat = utils.load_graph_data(graph_pkl_filename) data = utils.load_dataset(dataset_dir=os.path.join(config.base_dir, config.dataset_dir), batch_size=config.batch_size, test_batch_size=config.batch_size) logger.info(f"data:") logger.info( f"x_train: {data['x_train'].shape}, y_train: {data['y_train'].shape}") logger.info(f"x_val: {data['x_val'].shape}, y_val: {data['y_val'].shape}") logger.info( f"x_test: {data['x_test'].shape}, y_test: {data['y_test'].shape}") train_data_loader = data['train_loader'] val_data_loader = data['val_loader'] test_data_loader = data['test_loader'] num_train_sample = data['x_train'].shape[0] num_val_sample = data['x_val'].shape[0] num_test_sample = data['x_test'].shape[0] # get number of iterations per epoch for progress bar num_train_iteration_per_epoch = math.ceil(num_train_sample / config.batch_size) num_val_iteration_per_epoch = math.ceil(num_val_sample / config.batch_size) num_test_iteration_per_epoch = math.ceil(num_test_sample / config.batch_size) # setup data_loader instances # data_loader = config.initialize('data_loader', module_data) # valid_data_loader = data_loader.split_validation() # build model architecture, then print to console #adj_arg = {"adj_mat": adj_mat} logger.info(f"model architecture:") logger.info( f"num_rnn_layers: {config.num_rnn_layers}, run_units: {config.rnn_units}, max_diffusion_step: {config.max_diffusion_step}" ) logger.info( f"n_in: {config.n_in}, n_out: {config.n_out}, epochs: {config.epochs}") logger.info(f"gpu: {config.n_gpu}") logger.info( f"input_dim: {config.input_dim}, output_dim: {config.output_dim}, num_nodes: {config.num_nodes}, batch_size: {config.batch_size}" ) logger.info( f"enc_input_dim: {config.enc_input_dim}, dec_input_dim: {config.dec_input_dim}" ) model = dcrnn_model.DCRNNModel(adj_mat, config.batch_size, config.enc_input_dim, config.dec_input_dim, \ config.max_diffusion_step, config.num_nodes, config.num_rnn_layers, config.rnn_units, \ config.output_dim, config.device) # model = getattr(module_arch, config['arch']['type'])(config['arch']['args'], adj_arg) # get function handles of loss and metrics loss = module_metric.masked_mae_loss(data['scaler'], 0.0) #loss = config.initialize('loss', module_metric, **{"scaler": data['scaler']}) # metrics = [getattr(module_metric, met) for met in config['metrics']] # get inverse preds & labels inverse = module_metric.inverse_scaler(data['scaler'], 0.0) # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler trainable_params = filter(lambda p: p.requires_grad, model.parameters()) #optimizer = config.initialize('optimizer', torch.optim, trainable_params) optimizer = torch.optim.Adam(params=trainable_params, lr=config.base_lr, weight_decay=0.0, eps=config.epsilon, amsgrad=True) #lr_scheduler = config.initialize('lr_scheduler', torch.optim.lr_scheduler, optimizer) #lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=config['train']['lr_milestones'], gamma=config['train']['lr_decay_ratio']) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=config.lr_milestones, gamma=config.lr_decay_ratio) trainer = DCRNNTrainer(model, loss, optimizer, config=config, data_loader=train_data_loader, logger=logger, valid_data_loader=val_data_loader, lr_scheduler=lr_scheduler, len_epoch=num_train_iteration_per_epoch, val_len_epoch=num_val_iteration_per_epoch) train_logs = trainer.train() epoch_loss = [i['loss'] for i in train_logs] val_loss = [i['val_loss'] for i in train_logs] tester = DCRNNTester(model, loss, inverse, config, data_loader=test_data_loader, logger=logger, test_data_loader=test_data_loader, test_len_epoch=num_test_iteration_per_epoch) test_loss, test_mae, test_rmse, test_mape, test_outputs, test_targets = tester.predict( ) #test_outputs = test_outputs.numpy() #test_targets = test_targets.numpy() #test_mae = test_metrics[0] #test_rmse = test_metrics[1] #test_mape = test_metrics[2] # TODO: need fixed # result #results = {"test": test_targets, "prediction": test_outputs, "true": test_outputs, "train_loss": epoch_loss, # "val_loss": val_loss,"rmse": test_rmse, "steps_rmse": steps_rmse, "mae": test_mae, # "mape": test_mape, "in_feats": config.in_feats, "out_feats": config.out_feats, # "encode_hidden_size": config.encode_hidden_size,"decode_hidden_size": config.decode_hidden_size, # "full_size": config.full_size, "frame": config.frame, "columns": config.columns} results = { "test": test_targets, "prediction": test_outputs, "train_loss": epoch_loss, "val_loss": val_loss, "rmse": test_rmse.tolist(), "mae": test_mae.tolist(), "mape": test_mape.tolist(), "input_dim": config.input_dim, "output_dim": config.output_dim, "n_in": config.n_in, "n_out": config.n_out, "num_rnn_layers:": config.num_rnn_layers, "run_units": config.rnn_units, "max_diffusion_step": config.max_diffusion_step, "enc_input_dim": config.enc_input_dim, "dec_input_dim": config.dec_input_dim, "num_nodes": config.num_nodes, "batch_size": config.batch_size } if not os.path.exists(os.path.join(config.base_dir, config.results_dir)): os.mkdir(os.path.join(config.base_dir, config.results_dir)) with open( os.path.join(config.base_dir, config.results_dir) + "/{}.json".format(config.graph_name), 'w') as fout: fout.write(json.dumps(results))
args.cuda = not args.no_cuda and torch.cuda.is_available() np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # ========= # Load data # ========= lossfun = 1 #0:null_loss() ; 1:cross_entropy() ; 2:MSE n = 8 filename ='Catbox{}_xi_norm5%'.format(str(n)) imgs_data = np.array([load_data(image='../data/{}/H1.txt'.format(filename))]) # imgs_data = (feature, label) adj = load_graph_data('../data/{}/Edge{}.txt'.format(filename,str(n)),'../data/{}/H1.txt'.format(filename)) # adj = (indices, values) n_feature = imgs_data[0][0].shape[1] # c = 3 (R,G,B) # imgs_data[0][0]:feature ; imgs_data[0][1]: labels # .shape[0] = rows num ; .shape[1] = column num n_class = 2 col = 1 train_idx = range(0, 1) test_idx = range(0, 1) # =================== # Model and optimizer # =================== model = GCN(nfeat=n_feature, nhid=args.hidden, nclass=n_class, dropout=args.dropout)