def get_fundamental_data(): now = time.strftime('%Y%m%d', time.localtime(time.time())) kospi = stock.get_market_fundamental_by_ticker(now, market="KOSPI") kosdaq = stock.get_market_fundamental_by_ticker(now, market="KOSDAQ") df = pd.concat([kospi, kosdaq]) df['ticker'] = df.index df = df[['ticker', 'EPS', 'PER', 'BPS', 'PBR', 'DPS', 'DIV']] make_csv(df, 'fundament_')
def get_sector_tickers_data(): kospi, kosdaq = get_index() markets = kospi + kosdaq stocks = [] for idx in markets: stocks.append(stock.get_index_portfolio_deposit_file(idx)) time.sleep(0.5) df = pd.DataFrame({'index': markets, 'stocks': stocks}) make_csv(df, 'sector_tickers_')
def get_sector_index_data(): kospi, kosdaq = get_index() kospi_df = pd.DataFrame({ 'index': kospi, 'name': get_name(kospi) }) kosdaq_df = pd.DataFrame({ 'index': kosdaq, 'name': get_name(kosdaq) }) df = pd.concat([kospi_df, kosdaq_df]) make_csv(df, 'sector_index_')
def input_fn(filepath, mode=None): """ Implements the Recommended Input pipeline architecture of Tensorflow. :param filepath: File to be loaded into memory line by line. (MUST be a CSV) :param mode: One of the tf.estimator.ModeKeys (Train, Eval, Predict) :return: The input features and target values for the current step """ is_training = mode == tf.estimator.ModeKeys.TRAIN repeat_count = None if is_training else 1 default_val = [[0.0] for _ in range(PARAMS['CONTEXT_SIZE'])] default_val.append([0]) # output class should have data-type tf.int32, for accuracy calculations in model_fn def decode_csv(line): line = tf.decode_csv(line, default_val) return {'context': line[:-1]}, line[-1] dataset = tf.contrib.data\ .TextLineDataset(utils.make_csv(filepath, PARAMS['CONTEXT_SIZE']))\ .map(decode_csv, num_threads=4 if IS_GPU_AVL else 2) # preprocessing # shuffle input if is_training: dataset = dataset.shuffle(buffer_size=PARAMS['BATCH_SIZE'] * 2) dataset = dataset.repeat(repeat_count) dataset = dataset.batch(PARAMS['BATCH_SIZE']) iterator = dataset.make_one_shot_iterator() next_feature, next_label = iterator.get_next() return next_feature, next_label
def train(args): # Device Configuration # device = torch.device( f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu') # Fix Seed for Reproducibility # random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # Samples, Plots, Weights and CSV Path # paths = [ args.samples_path, args.plots_path, args.weights_path, args.csv_path ] for path in paths: make_dirs(path) # Prepare Data # data = pd.read_csv(args.data_path)[args.column] # Pre-processing # scaler_1 = StandardScaler() scaler_2 = StandardScaler() preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.delta) X = moving_windows(preprocessed_data, args.ts_dim) label = moving_windows(data.to_numpy(), args.ts_dim) # Prepare Networks # D = Discriminator(args.ts_dim).to(device) G = Generator(args.latent_dim, args.ts_dim, args.conditional_dim).to(device) # Loss Function # if args.criterion == 'l2': criterion = nn.MSELoss() elif args.criterion == 'wgangp': pass else: raise NotImplementedError # Optimizers # D_optim = torch.optim.Adam(D.parameters(), lr=args.lr, betas=(0.5, 0.9)) G_optim = torch.optim.Adam(G.parameters(), lr=args.lr, betas=(0.5, 0.9)) D_optim_scheduler = get_lr_scheduler(D_optim, args) G_optim_scheduler = get_lr_scheduler(G_optim, args) # Lists # D_losses, G_losses = list(), list() # Train # print("Training Time Series GAN started with total epoch of {}.".format( args.num_epochs)) for epoch in range(args.num_epochs): # Initialize Optimizers # G_optim.zero_grad() D_optim.zero_grad() if args.criterion == 'l2': n_critics = 1 elif args.criterion == 'wgangp': n_critics = 5 ####################### # Train Discriminator # ####################### for j in range(n_critics): series, start_dates = get_samples(X, label, args.batch_size) # Data Preparation # series = series.to(device) noise = torch.randn(args.batch_size, 1, args.latent_dim).to(device) # Adversarial Loss using Real Image # prob_real = D(series.float()) if args.criterion == 'l2': real_labels = torch.ones(prob_real.size()).to(device) D_real_loss = criterion(prob_real, real_labels) elif args.criterion == 'wgangp': D_real_loss = -torch.mean(prob_real) # Adversarial Loss using Fake Image # fake_series = G(noise) fake_series = torch.cat( (series[:, :, :args.conditional_dim].float(), fake_series.float()), dim=2) prob_fake = D(fake_series.detach()) if args.criterion == 'l2': fake_labels = torch.zeros(prob_fake.size()).to(device) D_fake_loss = criterion(prob_fake, fake_labels) elif args.criterion == 'wgangp': D_fake_loss = torch.mean(prob_fake) D_gp_loss = args.lambda_gp * get_gradient_penalty( D, series.float(), fake_series.float(), device) # Calculate Total Discriminator Loss # D_loss = D_fake_loss + D_real_loss if args.criterion == 'wgangp': D_loss += args.lambda_gp * D_gp_loss # Back Propagation and Update # D_loss.backward() D_optim.step() ################### # Train Generator # ################### # Adversarial Loss # fake_series = G(noise) fake_series = torch.cat( (series[:, :, :args.conditional_dim].float(), fake_series.float()), dim=2) prob_fake = D(fake_series) # Calculate Total Generator Loss # if args.criterion == 'l2': real_labels = torch.ones(prob_fake.size()).to(device) G_loss = criterion(prob_fake, real_labels) elif args.criterion == 'wgangp': G_loss = -torch.mean(prob_fake) # Back Propagation and Update # G_loss.backward() G_optim.step() # Add items to Lists # D_losses.append(D_loss.item()) G_losses.append(G_loss.item()) #################### # Print Statistics # #################### print("Epochs [{}/{}] | D Loss {:.4f} | G Loss {:.4f}".format( epoch + 1, args.num_epochs, np.average(D_losses), np.average(G_losses))) # Adjust Learning Rate # D_optim_scheduler.step() G_optim_scheduler.step() # Save Model Weights and Series # if (epoch + 1) % args.save_every == 0: torch.save( G.state_dict(), os.path.join( args.weights_path, 'TimeSeries_Generator_using{}_Epoch_{}.pkl'.format( args.criterion.upper(), epoch + 1))) series, fake_series = generate_fake_samples( X, label, G, scaler_1, scaler_2, args, device) plot_sample(series, fake_series, epoch, args) make_csv(series, fake_series, epoch, args) print("Training finished.")
def main(args): # Device Configuration # device = torch.device( f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu') # Fix Seed for Reproducibility # random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # Samples, Plots, Weights and CSV Path # paths = [ args.samples_path, args.weights_path, args.csv_path, args.inference_path ] for path in paths: make_dirs(path) # Prepare Data # data = pd.read_csv(args.data_path)[args.column] # Prepare Data # scaler_1 = StandardScaler() scaler_2 = StandardScaler() preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.constant, args.delta) train_X, train_Y, test_X, test_Y = prepare_data(data, preprocessed_data, args) train_X = moving_windows(train_X, args.ts_dim) train_Y = moving_windows(train_Y, args.ts_dim) test_X = moving_windows(test_X, args.ts_dim) test_Y = moving_windows(test_Y, args.ts_dim) # Prepare Networks # if args.model == 'conv': D = ConvDiscriminator(args.ts_dim).to(device) G = ConvGenerator(args.latent_dim, args.ts_dim).to(device) elif args.model == 'lstm': D = LSTMDiscriminator(args.ts_dim).to(device) G = LSTMGenerator(args.latent_dim, args.ts_dim).to(device) else: raise NotImplementedError ######### # Train # ######### if args.mode == 'train': # Loss Function # if args.criterion == 'l2': criterion = nn.MSELoss() elif args.criterion == 'wgangp': pass else: raise NotImplementedError # Optimizers # if args.optim == 'sgd': D_optim = torch.optim.SGD(D.parameters(), lr=args.lr, momentum=0.9) G_optim = torch.optim.SGD(G.parameters(), lr=args.lr, momentum=0.9) elif args.optim == 'adam': D_optim = torch.optim.Adam(D.parameters(), lr=args.lr, betas=(0., 0.9)) G_optim = torch.optim.Adam(G.parameters(), lr=args.lr, betas=(0., 0.9)) else: raise NotImplementedError D_optim_scheduler = get_lr_scheduler(D_optim, args) G_optim_scheduler = get_lr_scheduler(G_optim, args) # Lists # D_losses, G_losses = list(), list() # Train # print( "Training Time Series GAN started with total epoch of {}.".format( args.num_epochs)) for epoch in range(args.num_epochs): # Initialize Optimizers # G_optim.zero_grad() D_optim.zero_grad() ####################### # Train Discriminator # ####################### if args.criterion == 'l2': n_critics = 1 elif args.criterion == 'wgangp': n_critics = 5 for j in range(n_critics): series, start_dates = get_samples(train_X, train_Y, args.batch_size) # Data Preparation # series = series.to(device) noise = torch.randn(args.batch_size, 1, args.latent_dim).to(device) # Adversarial Loss using Real Image # prob_real = D(series.float()) if args.criterion == 'l2': real_labels = torch.ones(prob_real.size()).to(device) D_real_loss = criterion(prob_real, real_labels) elif args.criterion == 'wgangp': D_real_loss = -torch.mean(prob_real) # Adversarial Loss using Fake Image # fake_series = G(noise) prob_fake = D(fake_series.detach()) if args.criterion == 'l2': fake_labels = torch.zeros(prob_fake.size()).to(device) D_fake_loss = criterion(prob_fake, fake_labels) elif args.criterion == 'wgangp': D_fake_loss = torch.mean(prob_fake) D_gp_loss = args.lambda_gp * get_gradient_penalty( D, series.float(), fake_series.float(), device) # Calculate Total Discriminator Loss # D_loss = D_fake_loss + D_real_loss if args.criterion == 'wgangp': D_loss += args.lambda_gp * D_gp_loss # Back Propagation and Update # D_loss.backward() D_optim.step() ################### # Train Generator # ################### # Adversarial Loss # fake_series = G(noise) prob_fake = D(fake_series) # Calculate Total Generator Loss # if args.criterion == 'l2': real_labels = torch.ones(prob_fake.size()).to(device) G_loss = criterion(prob_fake, real_labels) elif args.criterion == 'wgangp': G_loss = -torch.mean(prob_fake) # Back Propagation and Update # G_loss.backward() G_optim.step() # Add items to Lists # D_losses.append(D_loss.item()) G_losses.append(G_loss.item()) # Adjust Learning Rate # D_optim_scheduler.step() G_optim_scheduler.step() # Print Statistics, Save Model Weights and Series # if (epoch + 1) % args.log_every == 0: # Print Statistics and Save Model # print("Epochs [{}/{}] | D Loss {:.4f} | G Loss {:.4f}".format( epoch + 1, args.num_epochs, np.average(D_losses), np.average(G_losses))) torch.save( G.state_dict(), os.path.join( args.weights_path, 'TS_using{}_and_{}_Epoch_{}.pkl'.format( G.__class__.__name__, args.criterion.upper(), epoch + 1))) # Generate Samples and Save Plots and CSVs # series, fake_series = generate_fake_samples( test_X, test_Y, G, scaler_1, scaler_2, args, device) plot_series(series, fake_series, G, epoch, args, args.samples_path) make_csv(series, fake_series, G, epoch, args, args.csv_path) ######## # Test # ######## elif args.mode == 'test': # Load Model Weights # G.load_state_dict( torch.load( os.path.join( args.weights_path, 'TS_using{}_and_{}_Epoch_{}.pkl'.format( G.__class__.__name__, args.criterion.upper(), args.num_epochs)))) # Lists # real, fake = list(), list() # Inference # for idx in range(0, test_X.shape[0], args.ts_dim): # Do not plot if the remaining data is less than time dimension # end_ix = idx + args.ts_dim if end_ix > len(test_X) - 1: break # Prepare Data # test_data = test_X[idx, :] test_data = np.expand_dims(test_data, axis=0) test_data = np.expand_dims(test_data, axis=1) test_data = torch.from_numpy(test_data).to(device) start = test_Y[idx, 0] noise = torch.randn(args.val_batch_size, 1, args.latent_dim).to(device) # Generate Fake Data # with torch.no_grad(): fake_series = G(noise) # Convert to Numpy format for Saving # test_data = np.squeeze(test_data.cpu().data.numpy()) fake_series = np.squeeze(fake_series.cpu().data.numpy()) test_data = post_processing(test_data, start, scaler_1, scaler_2, args.delta) fake_series = post_processing(fake_series, start, scaler_1, scaler_2, args.delta) real += test_data.tolist() fake += fake_series.tolist() # Plot, Save to CSV file and Derive Metrics # plot_series(real, fake, G, args.num_epochs - 1, args, args.inference_path) make_csv(real, fake, G, args.num_epochs - 1, args, args.inference_path) derive_metrics(real, fake, args) else: raise NotImplementedError
def generate_timeseries(args): # Device Configuration # device = torch.device( f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu') # Inference Path # make_dirs(args.inference_path) # Prepare Generator # if args.model == 'skip': G = SkipGenerator(args.latent_dim, args.ts_dim, args.conditional_dim).to(device) G.load_state_dict( torch.load( os.path.join( args.weights_path, 'TimeSeries_Generator_using{}_Epoch_{}.pkl'.format( args.criterion.upper(), args.num_epochs)))) else: raise NotImplementedError # Prepare Data # data = pd.read_csv(args.data_path)[args.column] scaler_1 = StandardScaler() scaler_2 = StandardScaler() preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.delta) X = moving_windows(preprocessed_data, args.ts_dim) label = moving_windows(data.to_numpy(), args.ts_dim) # Lists # real, fake = list(), list() # Inference # for idx in range(0, data.shape[0], args.ts_dim): end_ix = idx + args.ts_dim if end_ix > len(data) - 1: break samples = X[idx, :] samples = np.expand_dims(samples, axis=0) samples = np.expand_dims(samples, axis=1) samples = torch.from_numpy(samples).to(device) start_dates = label[idx, 0] noise = torch.randn(args.val_batch_size, 1, args.latent_dim).to(device) with torch.no_grad(): fake_series = G(noise) fake_series = torch.cat((samples[:, :, :args.conditional_dim].float(), fake_series.float()), dim=2) samples = np.squeeze(samples.cpu().data.numpy()) fake_series = np.squeeze(fake_series.cpu().data.numpy()) samples = post_processing(samples, start_dates, scaler_1, scaler_2, args.delta) fake_series = post_processing(fake_series, start_dates, scaler_1, scaler_2, args.delta) real += samples.tolist() fake += fake_series.tolist() plot_sample(real, fake, args.num_epochs - 1, args) make_csv(real, fake, args.num_epochs - 1, args)
# Stop training in between in more tuning is required. print("Stop training now?\nType y for Yes") rlist = select([sys.stdin], [], [], TIMEOUT)[0] feedback = None if rlist: feedback = sys.stdin.readline().strip() if (feedback is 'y') or (feedback is 'yes'): print('Finishing the training') break else: print('Training for the next iteration') continue # Make predictions on the test dataset if DO_PREDICTION and not force_create_vocab: # tf is returning earlier that the json dump. IDX_WORD gives error due to that # get results on the test data after all the training predictions = rnn_regressor.predict(input_fn=lambda: input_fn(FLAGS['TEST_DATA'], mode=tf.estimator.ModeKeys.PREDICT)) with open(utils.make_csv(FLAGS['TEST_DATA'], PARAMS['CONTEXT_SIZE']), 'r') as infile: for i, pred in enumerate(predictions): list_idx = infile.readline().strip().split(sep=',')[:-1] + [pred['predictions'].tolist()] pred_word = utils.convert_index_to_word(list_idx) print(' '.join(pred_word)) def create_new_text(initializer, how_many_words=10): for i in range(how_many_words): rnn_regressor.predict()