示例#1
0
def test_equal_subsets():
    """
    Test the signals are identical when calculated for subsets of the tickers.
    """

    # Calculate and compare signals for these tickers.
    ticker = 'SAVE'
    tickers = ['FL', 'SAVE', 'TLYS']

    # Used for calculating signals for ALL tickers.
    hub1 = sf.StockHub()

    # Used for calculating signals for SOME tickers.
    hub2 = sf.StockHub(tickers=tickers)

    # Used for calculating signals for ONE ticker.
    hub3 = sf.StockHub(tickers=[ticker])

    # Helper-function to perform the actual signal calculation and comparison.
    def _test(func_name, variant, func=None):
        # Get the object-methods for the function with the given name.
        signal_func1 = getattr(hub1, func_name)
        signal_func2 = getattr(hub2, func_name)
        signal_func3 = getattr(hub3, func_name)

        # Calculate the signals.
        df_signals1 = signal_func1(variant=variant, func=func)
        df_signals2 = signal_func2(variant=variant, func=func)
        df_signals3 = signal_func3(variant=variant, func=func)

        # Compare the signals and ensure they are identical.
        assert_frame_equal(df_signals1.loc[tickers], df_signals2)
        assert_frame_equal(df_signals1.loc[ticker], df_signals3.loc[ticker])
        assert_frame_equal(df_signals2.loc[ticker], df_signals3.loc[ticker])

    # Test for the different signal-functions, variants and functions.
    for func_name in ['val_signals', 'fin_signals', 'growth_signals']:
        for variant in ['daily', 'latest']:
            for func in [None, sf.avg_ttm_2y]:
                _test(func_name=func_name, variant=variant, func=func)
示例#2
0
# Add this date-offset to the fundamental data such as
# Income Statements etc., because the REPORT_DATE is not
# when it was actually made available to the public,
# which can be 1, 2 or even 3 months after the Report Date.
offset = pd.DateOffset(days=60)

# Refresh the fundamental datasets (Income Statements etc.)
# every 30 days.
refresh_days = 30

# Refresh the dataset with shareprices every 10 days.
refresh_days_shareprices = 10

hub = sf.StockHub(market=market,
                  offset=offset,
                  refresh_days=refresh_days,
                  refresh_days_shareprices=refresh_days_shareprices)

df_fin_signals = hub.fin_signals(variant='daily')
df_growth_signals = hub.growth_signals(variant='daily')
df_val_signals = hub.val_signals(variant='daily')

# Combine the DataFrames.
dfs = [df_fin_signals, df_growth_signals, df_val_signals]
df_signals = pd.concat(dfs, axis=1)

# Remove all rows with only NaN values.
df = df_signals.dropna(how='all').reset_index(drop=True)

# List of the columns before removing any.
columns_before = df_signals.columns
import simfin as sf
#Used for column names
from simfin.names import *

#SIMFIN params
# Where all the csvs are stored
sf.set_data_dir('./simfin_data/')
# Simfin needs an api key (Using simfins free key so only get stock info from before 2018 )
sf.load_api_key(path='~/simfin_api_key.txt', default_key='free')
#offset the fundamental data by 50 days, the simfin documentation alsorecommends doing this..
dateOffset = pd.DateOffset(days=50)
refresh_days = 25
refresh_days_shareprices = 14
#Data collection from Simfin
hub = sf.StockHub(market='us',
                  offset=dateOffset,
                  refresh_days=refresh_days,
                  refresh_days_shareprices=refresh_days_shareprices)
#Creating a panda dataframe with the simfin data
growthSignalsDf = hub.growth_signals(variant='daily')
valueSignalsDf = hub.val_signals(variant='daily')
financialSignalsDf = hub.fin_signals(variant='daily')
#Combining the 3 data frames into one big data frame
dfs = [financialSignalsDf, growthSignalsDf, valueSignalsDf]
signalsDf = pd.concat(dfs, axis=1)
#Drop the rows where all elements are missing.
signalsDf.dropna(how='all').head()
df = signalsDf.dropna(how='all').reset_index(drop=True)
#Columns must have atleast 80% non NULL values, any that don't are dropped
#(Scikit cannot work well with lots of missing data)
thresh = 0.80 * len(signalsDf.dropna(how='all'))
signalsDf = signalsDf.dropna(axis='columns', thresh=thresh)
示例#4
0
df_all = df_all.merge(
    df_companies.merge(
        df_industries,
        how='left',
        on=INDUSTRY_ID).set_index(SIMFIN_ID),
    how='left',
    on=SIMFIN_ID).drop(
    TICKER + '_y',
    axis=1)
df_all[TICKER] = df_all[TICKER + '_x']
df_all = df_all.drop(TICKER + '_x', axis=1)

df_all = df_all.set_index([TICKER, REPORT_DATE])
fin_sig_list = list()
for mkt in market_list:
    hub = sf.StockHub(market=mkt, refresh_days=30, refresh_days_shareprices=1)
    # Lade die Finanzsignale für den aktuelle Markt
    fin_sig_list.append(hub.fin_signals(variant='quarterly'))

df_all = pd.concat([df_all, pd.concat(fin_sig_list)], axis=1)


fin_price_list = list()
for mkt in market_list:
    hub = sf.StockHub(market=mkt, refresh_days=30, refresh_days_shareprices=1)
    # Lade die Aktienpreise für den aktuelle Markt
    fin_price_list.append(hub.load_shareprices(variant='daily'))

fin_prices = pd.concat(fin_price_list)

df_all = df_all.reset_index(level=[REPORT_DATE])
示例#5
0
#Set the local directory where data-files are stored.
sf.set_data_dir('C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/')

# Set up API key
sf.set_api_key(api_key='free')

#set plotting style 
sns.set_style("whitegrid")

#--------------------------Load data-----------------------------
offset = pd.DateOffset(days=60)
# Refresh the fundamental datasets (Income Statements etc.)
# Refresh the dataset with shareprices every 10 days.
hub = sf.StockHub(market='us', offset=offset,
                  refresh_days=30,
                  refresh_days_shareprices=10)

#calculating signal data
df_fin_signals = hub.fin_signals(variant='daily')
df_growth_signals = hub.growth_signals(variant='daily')
df_val_signals = hub.val_signals(variant='daily')
dfs = [df_fin_signals, df_growth_signals, df_val_signals]
df_signals = pd.concat(dfs, axis=1)


#-----------------------Missing Data----------------------------------
# First Remove all rows with only NaN values.
df = df_signals.dropna(how='all').reset_index(drop=True)

# For each column, show the fraction of the rows that are NaN.