def main (args): """Main function for timeGAN experiments. Args: - data_name: sine, stock, or energy - seq_len: sequence length - Network parameters (should be optimized for different datasets) - module: gru, lstm, or lstmLN - hidden_dim: hidden dimensions - num_layer: number of layers - iteration: number of training iterations - batch_size: the number of samples in each batch - metric_iteration: number of iterations for metric computation Returns: - ori_data: original data - generated_data: generated synthetic data - metric_results: discriminative and predictive scores """ ## Data loading if args.data_name in ['stock', 'energy']: ori_data = real_data_loading(args.data_name, args.seq_len) elif args.data_name == 'sine': # Set number of samples and its dimensions no, dim = 10000, 5 ori_data = sine_data_generation(no, args.seq_len, dim) print(args.data_name + ' dataset is ready.') ## Synthetic data generation by TimeGAN # Set newtork parameters parameters = dict() parameters['module'] = args.module parameters['hidden_dim'] = args.hidden_dim parameters['num_layer'] = args.num_layer parameters['iterations'] = args.iteration parameters['batch_size'] = args.batch_size generated_data = timegan(ori_data, parameters) print('Finish Synthetic Data Generation') """ ## Performance metrics # Output initialization metric_results = dict() # 1. Discriminative Score discriminative_score = list() for _ in range(args.metric_iteration): temp_disc = discriminative_score_metrics(ori_data, generated_data) discriminative_score.append(temp_disc) metric_results['discriminative'] = np.mean(discriminative_score) # 2. Predictive score predictive_score = list() for tt in range(args.metric_iteration): temp_pred = predictive_score_metrics(ori_data, generated_data) predictive_score.append(temp_pred) metric_results['predictive'] = np.mean(predictive_score) # 3. Visualization (PCA and tSNE) visualization(ori_data, generated_data, 'pca') visualization(ori_data, generated_data, 'tsne') ## Print discriminative and predictive scores print(metric_results) """ return ori_data, generated_data #, metric_results
def main(args): """Main function for timeGAN experiments. Args: - data_name: sine, stock, or energy - seq_len: sequence length - Network parameters (should be optimized for different datasets) - module: gru, lstm, or lstmLN - hidden_dim: hidden dimensions - num_layer: number of layers - iteration: number of training iterations - batch_size: the number of samples in each batch - metric_iteration: number of iterations for metric computation Returns: - ori_data: original data - generated_data: generated synthetic data - metric_results: discriminative and predictive scores """ # Data loading if args.data_name in ["stock", "energy"]: ori_data = real_data_loading(args.data_name, args.seq_len) elif args.data_name == "sine": # Set number of samples and its dimensions no, dim = 10000, 5 ori_data = sine_data_generation(no, args.seq_len, dim) elif args.data_name == "hypo": ori_data = real_data_loading(args.data_name, args.seq_len) print(args.data_name + " dataset is ready.") # Synthetic data generation by TimeGAN # Set newtork parameters parameters = dict() parameters["module"] = args.module parameters["hidden_dim"] = args.hidden_dim parameters["num_layer"] = args.num_layer parameters["iterations"] = args.iteration parameters["batch_size"] = args.batch_size print(len(ori_data), ori_data[0].shape) generated_data = timegan(ori_data, parameters) print("Finish Synthetic Data Generation") print(len(generated_data), generated_data[0].shape) if len(generated_data) > len(ori_data): generated_data_part = generated_data[: len(ori_data)] print( "Generated data shape mismatch with original data, " + "calibrating part of generated data" ) # Performance metrics # Output initialization metric_results = dict() # 1. Discriminative Score discriminative_score = list() for _ in range(args.metric_iteration): temp_disc = discriminative_score_metrics(ori_data, generated_data_part) discriminative_score.append(temp_disc) metric_results["discriminative"] = np.mean(discriminative_score) # 2. Predictive score predictive_score = list() for tt in range(args.metric_iteration): temp_pred = predictive_score_metrics(ori_data, generated_data_part) predictive_score.append(temp_pred) metric_results["predictive"] = np.mean(predictive_score) # 3. Visualization (PCA and tSNE) visualization(ori_data, generated_data_part, "pca", args) visualization(ori_data, generated_data_part, "tsne", args) # Print discriminative and predictive scores print(metric_results) return ori_data, generated_data, metric_results
# Experiments iterations Iteration = 2 Sub_Iteration = 10 speed = 200 # 100,200,300,400,500 feed = 6 # 6,12 #%% Data Loading seq_length = 24 if data_name == 'google': dataX = google_data_loading(seq_length) elif data_name == 'sine': No = 10000 F_No = 5 dataX = sine_data_generation(No, seq_length, F_No) elif data_name == 'TUD': dataX,dataXs, min, max, idx, data_true = load_real_samples(seq_length, speed, feed, False) print(data_name + ' dataset is ready.') #%% Newtork Parameters parameters = dict() parameters['hidden_dim'] = len(dataX[0][0,:]) * 4 parameters['num_layers'] = 3 parameters['iterations'] = 2 parameters['batch_size'] = 128 parameters['module_name'] = 'gru' # Other options: 'lstm' or 'lstmLN' parameters['z_dim'] = len(dataX[0][0,:]) #%% Experiments
print(X_mb, T_mb) train_step_embedder(X_mb, T_mb) print(itt) #train() ####TESTING#### from data_loading import real_data_loading, sine_data_generation data_name = 'sine' seq_len = 5 if data_name in ['stock', 'energy']: ori_data = real_data_loading(data_name, seq_len) elif data_name == 'sine': # Set number of samples and its dimensions no, dim = 50, 2 ori_data = sine_data_generation(no, seq_len, dim) print(data_name + ' dataset is ready.') ## Newtork parameters parameters = dict() parameters['module'] = 'lstm' parameters['hidden_dim'] = 6 parameters['num_layer'] = 3 parameters['iterations'] = 10 parameters['batch_size'] = 4 timegan(ori_data, parameters)