Пример #1
0
def get_fundamental_data():
    now = time.strftime('%Y%m%d', time.localtime(time.time()))
    kospi = stock.get_market_fundamental_by_ticker(now, market="KOSPI")
    kosdaq = stock.get_market_fundamental_by_ticker(now, market="KOSDAQ")

    df = pd.concat([kospi, kosdaq])
    df['ticker'] = df.index
    df = df[['ticker', 'EPS', 'PER', 'BPS', 'PBR', 'DPS', 'DIV']]
    make_csv(df, 'fundament_')
Пример #2
0
def get_sector_tickers_data():
    kospi, kosdaq = get_index()
    markets = kospi + kosdaq
    stocks = []

    for idx in markets:
        stocks.append(stock.get_index_portfolio_deposit_file(idx))
        time.sleep(0.5)

    df = pd.DataFrame({'index': markets, 'stocks': stocks})

    make_csv(df, 'sector_tickers_')
Пример #3
0
def get_sector_index_data():
    kospi, kosdaq = get_index()
    
    kospi_df = pd.DataFrame({
            'index': kospi,
            'name': get_name(kospi)
        })

    kosdaq_df = pd.DataFrame({
            'index': kosdaq,
            'name': get_name(kosdaq)
        })

    df = pd.concat([kospi_df, kosdaq_df])
    make_csv(df, 'sector_index_')
Пример #4
0
def input_fn(filepath, mode=None):
    """
    Implements the Recommended Input pipeline architecture of Tensorflow.
    :param filepath: File to be loaded into memory line by line. (MUST be a CSV)
    :param mode: One of the tf.estimator.ModeKeys (Train, Eval, Predict)
    :return: The input features and target values for the current step
    """

    is_training = mode == tf.estimator.ModeKeys.TRAIN
    repeat_count = None if is_training else 1

    default_val = [[0.0] for _ in range(PARAMS['CONTEXT_SIZE'])]
    default_val.append([0])  # output class should have data-type tf.int32, for accuracy calculations in model_fn

    def decode_csv(line):
        line = tf.decode_csv(line, default_val)
        return {'context': line[:-1]}, line[-1]

    dataset = tf.contrib.data\
        .TextLineDataset(utils.make_csv(filepath, PARAMS['CONTEXT_SIZE']))\
        .map(decode_csv, num_threads=4 if IS_GPU_AVL else 2)  # preprocessing
    # shuffle input
    if is_training:
        dataset = dataset.shuffle(buffer_size=PARAMS['BATCH_SIZE'] * 2)
    dataset = dataset.repeat(repeat_count)
    dataset = dataset.batch(PARAMS['BATCH_SIZE'])
    iterator = dataset.make_one_shot_iterator()
    next_feature, next_label = iterator.get_next()
    return next_feature, next_label
Пример #5
0
def train(args):

    # Device Configuration #
    device = torch.device(
        f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu')

    # Fix Seed for Reproducibility #
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # Samples, Plots, Weights and CSV Path #
    paths = [
        args.samples_path, args.plots_path, args.weights_path, args.csv_path
    ]
    for path in paths:
        make_dirs(path)

    # Prepare Data #
    data = pd.read_csv(args.data_path)[args.column]

    # Pre-processing #
    scaler_1 = StandardScaler()
    scaler_2 = StandardScaler()
    preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.delta)

    X = moving_windows(preprocessed_data, args.ts_dim)
    label = moving_windows(data.to_numpy(), args.ts_dim)

    # Prepare Networks #
    D = Discriminator(args.ts_dim).to(device)
    G = Generator(args.latent_dim, args.ts_dim,
                  args.conditional_dim).to(device)

    # Loss Function #
    if args.criterion == 'l2':
        criterion = nn.MSELoss()
    elif args.criterion == 'wgangp':
        pass
    else:
        raise NotImplementedError

    # Optimizers #
    D_optim = torch.optim.Adam(D.parameters(), lr=args.lr, betas=(0.5, 0.9))
    G_optim = torch.optim.Adam(G.parameters(), lr=args.lr, betas=(0.5, 0.9))

    D_optim_scheduler = get_lr_scheduler(D_optim, args)
    G_optim_scheduler = get_lr_scheduler(G_optim, args)

    # Lists #
    D_losses, G_losses = list(), list()

    # Train #
    print("Training Time Series GAN started with total epoch of {}.".format(
        args.num_epochs))

    for epoch in range(args.num_epochs):

        # Initialize Optimizers #
        G_optim.zero_grad()
        D_optim.zero_grad()

        if args.criterion == 'l2':
            n_critics = 1
        elif args.criterion == 'wgangp':
            n_critics = 5

        #######################
        # Train Discriminator #
        #######################

        for j in range(n_critics):
            series, start_dates = get_samples(X, label, args.batch_size)

            # Data Preparation #
            series = series.to(device)
            noise = torch.randn(args.batch_size, 1, args.latent_dim).to(device)

            # Adversarial Loss using Real Image #
            prob_real = D(series.float())

            if args.criterion == 'l2':
                real_labels = torch.ones(prob_real.size()).to(device)
                D_real_loss = criterion(prob_real, real_labels)

            elif args.criterion == 'wgangp':
                D_real_loss = -torch.mean(prob_real)

            # Adversarial Loss using Fake Image #
            fake_series = G(noise)
            fake_series = torch.cat(
                (series[:, :, :args.conditional_dim].float(),
                 fake_series.float()),
                dim=2)

            prob_fake = D(fake_series.detach())

            if args.criterion == 'l2':
                fake_labels = torch.zeros(prob_fake.size()).to(device)
                D_fake_loss = criterion(prob_fake, fake_labels)

            elif args.criterion == 'wgangp':
                D_fake_loss = torch.mean(prob_fake)
                D_gp_loss = args.lambda_gp * get_gradient_penalty(
                    D, series.float(), fake_series.float(), device)

            # Calculate Total Discriminator Loss #
            D_loss = D_fake_loss + D_real_loss

            if args.criterion == 'wgangp':
                D_loss += args.lambda_gp * D_gp_loss

            # Back Propagation and Update #
            D_loss.backward()
            D_optim.step()

        ###################
        # Train Generator #
        ###################

        # Adversarial Loss #
        fake_series = G(noise)
        fake_series = torch.cat(
            (series[:, :, :args.conditional_dim].float(), fake_series.float()),
            dim=2)
        prob_fake = D(fake_series)

        # Calculate Total Generator Loss #
        if args.criterion == 'l2':
            real_labels = torch.ones(prob_fake.size()).to(device)
            G_loss = criterion(prob_fake, real_labels)

        elif args.criterion == 'wgangp':
            G_loss = -torch.mean(prob_fake)

        # Back Propagation and Update #
        G_loss.backward()
        G_optim.step()

        # Add items to Lists #
        D_losses.append(D_loss.item())
        G_losses.append(G_loss.item())

        ####################
        # Print Statistics #
        ####################

        print("Epochs [{}/{}] | D Loss {:.4f} | G Loss {:.4f}".format(
            epoch + 1, args.num_epochs, np.average(D_losses),
            np.average(G_losses)))

        # Adjust Learning Rate #
        D_optim_scheduler.step()
        G_optim_scheduler.step()

        # Save Model Weights and Series #
        if (epoch + 1) % args.save_every == 0:
            torch.save(
                G.state_dict(),
                os.path.join(
                    args.weights_path,
                    'TimeSeries_Generator_using{}_Epoch_{}.pkl'.format(
                        args.criterion.upper(), epoch + 1)))

            series, fake_series = generate_fake_samples(
                X, label, G, scaler_1, scaler_2, args, device)
            plot_sample(series, fake_series, epoch, args)
            make_csv(series, fake_series, epoch, args)

    print("Training finished.")
Пример #6
0
def main(args):

    # Device Configuration #
    device = torch.device(
        f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu')

    # Fix Seed for Reproducibility #
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # Samples, Plots, Weights and CSV Path #
    paths = [
        args.samples_path, args.weights_path, args.csv_path,
        args.inference_path
    ]
    for path in paths:
        make_dirs(path)

    # Prepare Data #
    data = pd.read_csv(args.data_path)[args.column]

    # Prepare Data #
    scaler_1 = StandardScaler()
    scaler_2 = StandardScaler()
    preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.constant,
                                       args.delta)

    train_X, train_Y, test_X, test_Y = prepare_data(data, preprocessed_data,
                                                    args)

    train_X = moving_windows(train_X, args.ts_dim)
    train_Y = moving_windows(train_Y, args.ts_dim)

    test_X = moving_windows(test_X, args.ts_dim)
    test_Y = moving_windows(test_Y, args.ts_dim)

    # Prepare Networks #
    if args.model == 'conv':
        D = ConvDiscriminator(args.ts_dim).to(device)
        G = ConvGenerator(args.latent_dim, args.ts_dim).to(device)

    elif args.model == 'lstm':
        D = LSTMDiscriminator(args.ts_dim).to(device)
        G = LSTMGenerator(args.latent_dim, args.ts_dim).to(device)

    else:
        raise NotImplementedError

    #########
    # Train #
    #########

    if args.mode == 'train':

        # Loss Function #
        if args.criterion == 'l2':
            criterion = nn.MSELoss()

        elif args.criterion == 'wgangp':
            pass

        else:
            raise NotImplementedError

        # Optimizers #
        if args.optim == 'sgd':
            D_optim = torch.optim.SGD(D.parameters(), lr=args.lr, momentum=0.9)
            G_optim = torch.optim.SGD(G.parameters(), lr=args.lr, momentum=0.9)

        elif args.optim == 'adam':
            D_optim = torch.optim.Adam(D.parameters(),
                                       lr=args.lr,
                                       betas=(0., 0.9))
            G_optim = torch.optim.Adam(G.parameters(),
                                       lr=args.lr,
                                       betas=(0., 0.9))

        else:
            raise NotImplementedError

        D_optim_scheduler = get_lr_scheduler(D_optim, args)
        G_optim_scheduler = get_lr_scheduler(G_optim, args)

        # Lists #
        D_losses, G_losses = list(), list()

        # Train #
        print(
            "Training Time Series GAN started with total epoch of {}.".format(
                args.num_epochs))

        for epoch in range(args.num_epochs):

            # Initialize Optimizers #
            G_optim.zero_grad()
            D_optim.zero_grad()

            #######################
            # Train Discriminator #
            #######################

            if args.criterion == 'l2':
                n_critics = 1
            elif args.criterion == 'wgangp':
                n_critics = 5

            for j in range(n_critics):
                series, start_dates = get_samples(train_X, train_Y,
                                                  args.batch_size)

                # Data Preparation #
                series = series.to(device)
                noise = torch.randn(args.batch_size, 1,
                                    args.latent_dim).to(device)

                # Adversarial Loss using Real Image #
                prob_real = D(series.float())

                if args.criterion == 'l2':
                    real_labels = torch.ones(prob_real.size()).to(device)
                    D_real_loss = criterion(prob_real, real_labels)

                elif args.criterion == 'wgangp':
                    D_real_loss = -torch.mean(prob_real)

                # Adversarial Loss using Fake Image #
                fake_series = G(noise)
                prob_fake = D(fake_series.detach())

                if args.criterion == 'l2':
                    fake_labels = torch.zeros(prob_fake.size()).to(device)
                    D_fake_loss = criterion(prob_fake, fake_labels)

                elif args.criterion == 'wgangp':
                    D_fake_loss = torch.mean(prob_fake)
                    D_gp_loss = args.lambda_gp * get_gradient_penalty(
                        D, series.float(), fake_series.float(), device)

                # Calculate Total Discriminator Loss #
                D_loss = D_fake_loss + D_real_loss

                if args.criterion == 'wgangp':
                    D_loss += args.lambda_gp * D_gp_loss

                # Back Propagation and Update #
                D_loss.backward()
                D_optim.step()

            ###################
            # Train Generator #
            ###################

            # Adversarial Loss #
            fake_series = G(noise)
            prob_fake = D(fake_series)

            # Calculate Total Generator Loss #
            if args.criterion == 'l2':
                real_labels = torch.ones(prob_fake.size()).to(device)
                G_loss = criterion(prob_fake, real_labels)

            elif args.criterion == 'wgangp':
                G_loss = -torch.mean(prob_fake)

            # Back Propagation and Update #
            G_loss.backward()
            G_optim.step()

            # Add items to Lists #
            D_losses.append(D_loss.item())
            G_losses.append(G_loss.item())

            # Adjust Learning Rate #
            D_optim_scheduler.step()
            G_optim_scheduler.step()

            # Print Statistics, Save Model Weights and Series #
            if (epoch + 1) % args.log_every == 0:

                # Print Statistics and Save Model #
                print("Epochs [{}/{}] | D Loss {:.4f} | G Loss {:.4f}".format(
                    epoch + 1, args.num_epochs, np.average(D_losses),
                    np.average(G_losses)))
                torch.save(
                    G.state_dict(),
                    os.path.join(
                        args.weights_path,
                        'TS_using{}_and_{}_Epoch_{}.pkl'.format(
                            G.__class__.__name__, args.criterion.upper(),
                            epoch + 1)))

                # Generate Samples and Save Plots and CSVs #
                series, fake_series = generate_fake_samples(
                    test_X, test_Y, G, scaler_1, scaler_2, args, device)
                plot_series(series, fake_series, G, epoch, args,
                            args.samples_path)
                make_csv(series, fake_series, G, epoch, args, args.csv_path)

    ########
    # Test #
    ########

    elif args.mode == 'test':

        # Load Model Weights #
        G.load_state_dict(
            torch.load(
                os.path.join(
                    args.weights_path, 'TS_using{}_and_{}_Epoch_{}.pkl'.format(
                        G.__class__.__name__, args.criterion.upper(),
                        args.num_epochs))))

        # Lists #
        real, fake = list(), list()

        # Inference #
        for idx in range(0, test_X.shape[0], args.ts_dim):

            # Do not plot if the remaining data is less than time dimension #
            end_ix = idx + args.ts_dim

            if end_ix > len(test_X) - 1:
                break

            # Prepare Data #
            test_data = test_X[idx, :]
            test_data = np.expand_dims(test_data, axis=0)
            test_data = np.expand_dims(test_data, axis=1)
            test_data = torch.from_numpy(test_data).to(device)

            start = test_Y[idx, 0]

            noise = torch.randn(args.val_batch_size, 1,
                                args.latent_dim).to(device)

            # Generate Fake Data #
            with torch.no_grad():
                fake_series = G(noise)

            # Convert to Numpy format for Saving #
            test_data = np.squeeze(test_data.cpu().data.numpy())
            fake_series = np.squeeze(fake_series.cpu().data.numpy())

            test_data = post_processing(test_data, start, scaler_1, scaler_2,
                                        args.delta)
            fake_series = post_processing(fake_series, start, scaler_1,
                                          scaler_2, args.delta)

            real += test_data.tolist()
            fake += fake_series.tolist()

        # Plot, Save to CSV file and Derive Metrics #
        plot_series(real, fake, G, args.num_epochs - 1, args,
                    args.inference_path)
        make_csv(real, fake, G, args.num_epochs - 1, args, args.inference_path)
        derive_metrics(real, fake, args)

    else:
        raise NotImplementedError
Пример #7
0
def generate_timeseries(args):

    # Device Configuration #
    device = torch.device(
        f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu')

    # Inference Path #
    make_dirs(args.inference_path)

    # Prepare Generator #
    if args.model == 'skip':
        G = SkipGenerator(args.latent_dim, args.ts_dim,
                          args.conditional_dim).to(device)
        G.load_state_dict(
            torch.load(
                os.path.join(
                    args.weights_path,
                    'TimeSeries_Generator_using{}_Epoch_{}.pkl'.format(
                        args.criterion.upper(), args.num_epochs))))

    else:
        raise NotImplementedError

    # Prepare Data #
    data = pd.read_csv(args.data_path)[args.column]

    scaler_1 = StandardScaler()
    scaler_2 = StandardScaler()

    preprocessed_data = pre_processing(data, scaler_1, scaler_2, args.delta)

    X = moving_windows(preprocessed_data, args.ts_dim)
    label = moving_windows(data.to_numpy(), args.ts_dim)

    # Lists #
    real, fake = list(), list()

    # Inference #
    for idx in range(0, data.shape[0], args.ts_dim):

        end_ix = idx + args.ts_dim

        if end_ix > len(data) - 1:
            break

        samples = X[idx, :]
        samples = np.expand_dims(samples, axis=0)
        samples = np.expand_dims(samples, axis=1)

        samples = torch.from_numpy(samples).to(device)
        start_dates = label[idx, 0]

        noise = torch.randn(args.val_batch_size, 1, args.latent_dim).to(device)

        with torch.no_grad():
            fake_series = G(noise)
        fake_series = torch.cat((samples[:, :, :args.conditional_dim].float(),
                                 fake_series.float()),
                                dim=2)

        samples = np.squeeze(samples.cpu().data.numpy())
        fake_series = np.squeeze(fake_series.cpu().data.numpy())

        samples = post_processing(samples, start_dates, scaler_1, scaler_2,
                                  args.delta)
        fake_series = post_processing(fake_series, start_dates, scaler_1,
                                      scaler_2, args.delta)

        real += samples.tolist()
        fake += fake_series.tolist()

    plot_sample(real, fake, args.num_epochs - 1, args)
    make_csv(real, fake, args.num_epochs - 1, args)
Пример #8
0
    # Stop training in between in more tuning is required.
    print("Stop training now?\nType y for Yes")
    rlist = select([sys.stdin], [], [], TIMEOUT)[0]
    feedback = None
    if rlist:
        feedback = sys.stdin.readline().strip()
        if (feedback is 'y') or (feedback is 'yes'):
            print('Finishing the training')
            break
    else:
        print('Training for the next iteration')
        continue

# Make predictions on the test dataset
if DO_PREDICTION and not force_create_vocab:  # tf is returning earlier that the json dump. IDX_WORD gives error due to that
    # get results on the test data after all the training
    predictions = rnn_regressor.predict(input_fn=lambda: input_fn(FLAGS['TEST_DATA'], mode=tf.estimator.ModeKeys.PREDICT))
    with open(utils.make_csv(FLAGS['TEST_DATA'], PARAMS['CONTEXT_SIZE']), 'r') as infile:
        for i, pred in enumerate(predictions):
            list_idx = infile.readline().strip().split(sep=',')[:-1] + [pred['predictions'].tolist()]
            pred_word = utils.convert_index_to_word(list_idx)
            print(' '.join(pred_word))


def create_new_text(initializer, how_many_words=10):

    for i in range(how_many_words):
        rnn_regressor.predict()