Exemplo n.º 1
0
def get_same_industry_tickers(ticker, samplesize):

    #create usefull string for ticker
    ticker = str(ticker)
    for character in ["[", "]", "'"]:
        if character in str(ticker):
            ticker = ticker.replace(character, "")

    #create list of all tickers in same industry
    df_alltickers = pd.read_csv('./tickers.csv')
    industry = df_alltickers[df_alltickers.ticker ==
                             ticker].iloc[0]['industry']
    industrytickers = df_alltickers[df_alltickers.industry ==
                                    industry]['ticker'].tolist()
    industrytickers.remove(ticker)

    #select random sample of same-industry tickers
    sameindustrytickers = random.sample(industrytickers, samplesize)

    #create dataframe
    df_sameindustrytickers = pd.DataFrame()

    #dowload and append data for each ticker (one-by-one to avoid Frank's error)
    for sit in sameindustrytickers:
        sit = [sit]
        yr = yahoo_reader.finance_data(tickers=sit)
        df, tickers = yr.get_fix_yahoo_data()
        df_sameindustrytickers = df_sameindustrytickers.append(
            df, ignore_index=True)

    return df_sameindustrytickers
Exemplo n.º 2
0
def get_large_decreases_in_industry(ticker, percentage):

    #create usefull string for ticker
    ticker = str(ticker)
    for character in ["[", "]", "'"]:
        if character in str(ticker):
            ticker = ticker.replace(character, "")

    #create list of all tickers in same industry
    df_alltickers = pd.read_csv('./tickers.csv')
    industry = df_alltickers[df_alltickers.ticker ==
                             ticker].iloc[0]['industry']
    industrytickers = df_alltickers[df_alltickers.industry ==
                                    industry]['ticker'].tolist()
    industrytickers.remove(ticker)

    #for counter
    number = len(industrytickers)

    #main dataframe
    df_largedecreases = pd.DataFrame()

    #dowload and append data for each ticker (one-by-one to avoid Frank's error)
    for sit in industrytickers:
        try:
            print("Downloading data for " + str(number) +
                  " same-industry tickers.")
            sit = [sit]
            yr = yahoo_reader.finance_data(tickers=sit)
            df, tickers = yr.get_fix_yahoo_data()

            #calculate decrease and select 100 rows above and 50 rows below rows where this decrease is larger than 'percentage'
            df['perc_change'] = df.close / df.open
            df['arounddecrease'] = 0
            for X in list(range(-100, 50)):
                df['arounddecrease'] = np.where(
                    df.perc_change.shift(X) < percentage, 1, df.arounddecrease)

            #append ticker data to main dataframe
            df_largedecreases = df_largedecreases.append(df, ignore_index=True)
            number = number - 1
            print("Ticker data added")
        except:
            number = number - 1
            print("No data")

    df_largedecreases = df_largedecreases[df_largedecreases.arounddecrease ==
                                          1]
    df_largedecreases = df_largedecreases.drop(
        columns=['arounddecrease', 'perc_change'])

    return df_largedecreases
Exemplo n.º 3
0
def get_same_industry_similar_volatility_tickers(ticker, window, samplesize):

    #download data for ticker A
    yr = yahoo_reader.finance_data(tickers=[ticker])
    dfA, tickersA = yr.get_fix_yahoo_data()

    #calculate price and volatility (average standarddeviation over window) and save it for the last day
    dfA['volatility'] = dfA.close.rolling(window).std()
    volatilityA = dfA.volatility.tail(1)

    #create usefull string for ticker
    ticker = str(ticker)
    for character in ["[", "]", "'"]:
        if character in str(ticker):
            ticker = ticker.replace(character, "")

    #create list of all tickers in same industry
    df_alltickers = pd.read_csv('./tickers.csv')
    industry = df_alltickers[df_alltickers.ticker ==
                             ticker].iloc[0]['industry']
    industrytickers = df_alltickers[df_alltickers.industry ==
                                    industry]['ticker'].tolist()
    industrytickers.remove(ticker)

    #for counter
    number = len(industrytickers)

    #main dataframe and dataframe for last day of dataframe
    df_industrytickers = pd.DataFrame()
    df_tails = pd.DataFrame()

    #dowload and append data for each ticker (one-by-one to avoid Frank's error)
    for sit in industrytickers:
        try:
            print("Downloading data for " + str(number) +
                  " same-industry tickers.")
            sit = [sit]
            yr = yahoo_reader.finance_data(tickers=sit)
            df, tickers = yr.get_fix_yahoo_data()

            #calculate volatility
            df['volatility'] = df.close.rolling(window).std()

            #take the last day of dataframe and compare volatility with that of ticker A
            df_tail = df.tail(1)
            volatility = df_tail.volatility.tail(1)
            df_tail['vol_diff'] = abs(volatilityA - volatility)
            df_industrytickers = df_industrytickers.append(df,
                                                           ignore_index=True)
            df_tails = df_tails.append(df_tail, ignore_index=True)
            number = number - 1
            print("Ticker data added")

        except:
            number = number - 1
            print("No data")

    #sort the tickers by the difference in volatility compared to ticker A
    df_tails = df_tails.sort_values('vol_diff', ascending=True)
    df_tails = df_tails.head(samplesize)
    sample = list(df_tails.ticker.unique())

    #make final dataframe with all data from stocks in sample
    df_sample = pd.DataFrame()
    for ticker in sample:
        df = df_industrytickers[df_industrytickers.ticker == ticker]
        df_sample = df_sample.append(df, ignore_index=True)

    return df_sample
Exemplo n.º 4
0
def get_sample(ticker, vol_samplesize, vol_window, decrease_percentage):

    #download data for ticker A
    yr = yahoo_reader.finance_data(tickers=[ticker])
    dfA, tickersA = yr.get_fix_yahoo_data()

    #calculate price and volatility (average standarddeviation over window) and save it for the last day
    dfA['volatility'] = dfA.close.rolling(vol_window).std()
    volatilityA = dfA.volatility.tail(1)

    #create usefull string for ticker
    ticker = str(ticker)
    for character in ["[", "]", "'"]:
        if character in str(ticker):
            ticker = ticker.replace(character, "")

    #create list of all tickers in same industry
    df_alltickers = pd.read_csv('./tickers.csv')
    industry = df_alltickers[df_alltickers.ticker ==
                             ticker].iloc[0]['industry']
    industrytickers = df_alltickers[df_alltickers.industry ==
                                    industry]['ticker'].tolist()
    industrytickers.remove(ticker)

    #for counter
    number = len(industrytickers)

    #main dataframe and dataframe for last day of dataframe
    df_industrytickers = pd.DataFrame()
    df_tails = pd.DataFrame()

    #dowload and append data for each ticker (one-by-one to avoid Frank's error)
    for sit in industrytickers:
        try:
            print("Downloading data for " + str(number) +
                  " same-industry tickers.")
            sit = [sit]
            yr = yahoo_reader.finance_data(tickers=sit)
            df, tickers = yr.get_fix_yahoo_data()

            #calculate volatility
            df['volatility'] = df.close.rolling(vol_window).std()

            #take the last day of dataframe and compare volatility with that of ticker A
            df_tail = df.tail(1)
            volatility = df_tail.volatility.tail(1)
            df_tail['vol_diff'] = abs(volatilityA - volatility)
            df_tails = df_tails.append(df_tail, ignore_index=True)

            #calculate decrease and select 100 rows above and 50 rows below rows where this decrease is larger than 'percentage'
            df['perc_change'] = df.close / df.open
            df['arounddecrease'] = 0
            for X in list(range(-100, 50)):
                df['arounddecrease'] = np.where(
                    df.perc_change.shift(X) < decrease_percentage, 1,
                    df.arounddecrease)

            #append rows around decreases to df_largedecreases and all rows to df_industrytickers
            df_industrytickers = df_industrytickers.append(df,
                                                           ignore_index=True)
            number = number - 1
            print("Ticker data added")

        except:
            number = number - 1
            print("No data")

    #sort the tickers by the difference in volatility compared to ticker A
    df_tails = df_tails.sort_values('vol_diff', ascending=True)
    df_tails = df_tails.head(vol_samplesize)
    sample = list(df_tails.ticker.unique())
    print(sample)

    #make final dataframe with all data from stocks in sample
    df_sample = pd.DataFrame()
    for ticker in sample:
        df = df_industrytickers[(df_industrytickers.ticker == ticker) |
                                (df_industrytickers.arounddecrease == 1)]
        df_sample = df_sample.append(df, ignore_index=True)

    return df_sample
Exemplo n.º 5
0
import numpy as np

import yahoo_reader
import preprocessing as pp
import lstm_utils as utils
import lstm_model
import gc
import matplotlib.pyplot as plt

user = utils.load_user_from_yml(yml_file='./configs/user_settings.yml')
user_tickers = utils.get_tickers_for_a_user(user=user)
tickers_done = utils.get_tickers_done('./results/')
tickers_to_do = [
    ticker for ticker in user_tickers if ticker not in tickers_done
]
yr = yahoo_reader.finance_data(tickers=tickers_to_do[:1])
#df = pd.read_csv('forflight.csv', sep=',')

df, tickers = yr.get_fix_yahoo_data()
#ichimoku cloud
#tickers = df.ticker.unique().tolist()
#df = df_main[df_main.ticker == 'AMAG'].reset_index(drop=True)
#%%
#df = df[df.ticker == 'ASRV'].reset_index(drop=True)
#new_df_main = pd.DataFrame([])
#split = 100
#rest = len(df)%split
#
#df = df[rest:]
#print(len(df))
#for i in range(int(len(df)/split)-2):
Exemplo n.º 6
0
import pandas as pd
import numpy as np
import yahoo_reader as yr
import processing as ps
from sklearn.model_selection import RandomizedSearchCV, train_test_split, ShuffleSplit
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import randint as sp_randint
import plots

#%%
# Get Stock Data
yr = yr.finance_data()
df = yr.getData()

#%%
# Process for predictions
#df = ps.prepData(df)
df = ps.genFeatures(df)
df = ps.featureProcessing(df)
df = ps.genTargets(df)

#%%
# Define X and y
X = df[['open_sc','high_sc','low_sc','close_sc','volume_sc',
        'year_sc',
        'ce_month_x','ce_month_y','ce_dow_x','ce_dow_y','ce_doy_x','ce_doy_y',
        'high_low_sc','open_min1_sc','close_min1_sc','volume_min1_sc',
        'high_low_min1_sc','open_min5_sc','close_min5_sc','volume_min5_sc',
        'high_low_min5_sc','open_cagr_sc','close_cagr_sc','ticker_en']]
y = df['regressor_y']