""" Created on Thursday March 19 @author: Jeffrey J. Walker test_reader.py Simple script to test the yahoo_csv_reader.py function. """ import pandas as pd import numpy as np import matplotlib.pyplot as plt ## insert the path corresponding to the Yahoo csv reader. # insert at 1, 0 is the script path (or '' in REPL) sys.path.insert(1, '/home/jjwalker/Desktop/finance/codes/data_cleaning') from yahoo_csv_reader import yahoo_csv_reader ## path to the security you want to plot: path = '/home/jjwalker/Desktop/finance/data/commodities/' ticker = 'HG=F' filename = path + ticker df = yahoo_csv_reader(filename, ticker) ## What is a good default plot, to include the chart, and maybe the volume? plt.plot(df.Close, '.k') plt.show()
from statsmodels.tsa.stattools import grangercausalitytests from statsmodels.tsa.stattools import adfuller #from statsmodels.tsa.stattools import kpss ## import the csv reader for Yahoo finance data # insert at 1, 0 is the script path (or '' in REPL) sys.path.insert(1, '/home/jjwalker/Desktop/finance/codes/data_cleaning') from yahoo_csv_reader import yahoo_csv_reader ## import the csv reader for FRED data from fred_csv_reader import fred_csv_reader ## path where the various data files are located. spx_path = '/home/jjwalker/Desktop/finance/data/stocks/' fed_path = '/home/jjwalker/Desktop/finance/data/us_economic_data/' fed_bs = fred_csv_reader(fed_path + 'WALCL') spx = yahoo_csv_reader(spx_path + '^GSPC', '^GSPC') ## Want to execute in python shell? then use: #execfile('/home/jjwalker/Desktop/finance/codes/stocks/granger_causality.py') ## concatenate the close price for the two (or more) securities and find the correlation. common_values = pd.concat([fed_bs.WALCL, spx.Close], axis=1) ## get rid of any nans?? Or, resample based on WALCL? common_values.dropna(inplace=True) ## choose a startdate? start_date = pd.to_datetime('2009-01-02') ## calculate pearson correlation coefficient, easy peasy: corr_coef = common_values[common_values.index > start_date].corr( method='pearson') ## BEFORE RUNNING ADF TEST, YOU NEED TO DO KPSS TEST!
from statsmodels.tsa.stattools import adfuller ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## Be sure that you use a valid ticker symbol, no indices! ## This will be the stock you are long on. ticker = '^SPX' ## insert the path corresponding to the Yahoo option chain scraper; ## we will need this function! # insert at 1, 0 is the script path (or '' in REPL) sys.path.insert(1, '/home/jjwalker/Desktop/finance/codes/data_cleaning') from yahoo_csv_reader import yahoo_csv_reader ## the path: path = '/home/jjwalker/Desktop/finance/data/stocks/' df = yahoo_csv_reader(path + ticker, ticker) ## ADF test: Null hypothesis is that the time series has unit root adftest = adfuller(df.Close, autolag='AIC') dfResults = pd.Series(adftest[0:4], index=[ 'ADF Test Statistic', 'P-Value', '# Lags Used', '# Observations Used' ]) ## critical values come from #adfResults[4].items() ## p-values from adfResults[1] ## The more negative the test statistic, the more likely we are to reject the ## NULL HYPOTHESIS that there is unit root. ## If the test statistic is less than the critical value, we reject Null ## hypothesis
'DGS20': '20', 'DGS30': '30', }, inplace=True) ## restructure the dataframe, sort the columns numerically ## However, this sorts alphabetically instead of numerically #yc.sort_index(axis=1,ascending=True,inplace=True) ## FINALLY found the way to do this: yc = yc[np.argsort(yc.columns.astype(float))] ## standard us treasury maturities xtemp = np.array(yc.columns.astype(float)) ## Load spx for comparison: (or use ^GSPC) ## EVEN BETTER! Use sp500TR, the sp500 total return index which assumes ## dividend reinvestment! spx = yahoo_csv_reader('/home/jjwalker/Desktop/finance/data/stocks/^SP500TR', '^SP500TR') ## SPX daily return, in case we need it: spx_drf = spx.Close[1:].values / (spx.Close[:-1]) ## Zero coupon yield curve from FRED: ## read in data; have to use (7)th line as the header, I don't know why datafile = "/home/jjwalker/Desktop/finance/data/bonds/feds200628.csv" zc = pd.read_csv(datafile, header=7) zc['Date'] = pd.to_datetime(zc.Date, infer_datetime_format=True) zc.set_index('Date', inplace=True) ## the SVENY variable names contain the zero-coupon yield, continuously ## compounded ## get the variables needed: ## 1 year zero coupon (for annual Sortino/information/Sharpe ratio) zc01 = zc['SVENY01'] ## 30 year zero coupon, the instrument we are interested in
'DGS30': '30', }, inplace=True) ## restructure the dataframe, sort the columns numerically ## However, this sorts alphabetically instead of numerically #yc.sort_index(axis=1,ascending=True,inplace=True) ## FINALLY found the way to do this: yc = yc[np.argsort(yc.columns.astype(float))] ## standard us treasury maturities xtemp = np.array(yc.columns.astype(float)) ## data for spx: (or use ^GSPC) ## EVEN BETTER! Use sp500TR, the sp500 total return index which assumes ## dividend reinvestment! #spx=yahoo_csv_reader('/home/jjwalker/Desktop/finance/data/stocks/^SP500TR','^SP500TR') spx = yahoo_csv_reader('/home/jjwalker/Desktop/finance/data/stocks/^GSPC', '^GSPC') ## SPX daily return, in case we need it: #spx_drf=spx.Close[1:].values/(spx.Close[:-1]) ## DCA inputs: ## starting cash; make this zero if you want to start by dca'ing first ## iteration of list cash = 0 ## DCA investing amount, dollars dca_inv = 200 ## initial number of spx shares: nshares = 0 ## Dates to use for dollar cost averaging; set to the first of the month ## Find a start date; Ideally this would be a treasury auction date? #start_date='1985-12-02' # this start date is the first month that 30 year # STRIPS data is available
import matplotlib.pyplot as plt ## Need this line to import the yahoo csv reader # insert at 1, 0 is the script path (or '' in REPL) sys.path.insert(1, '/home/jjwalker/Desktop/finance/codes/data_cleaning') from yahoo_csv_reader import yahoo_csv_reader ## path where the various data files are located. path='/home/jjwalker/Desktop/finance/data/stocks/' ## load data for each ticker ticker1='NSC' #ticker2='AMSC' div_data=ticker+'_div' ## Start date? stock=yahoo_csv_reader(path+ticker1,ticker1) div=yahoo_csv_reader(path+div_data,ticker1) ## make sure we have sorted by index! div.sort(inplace=True) ## initial number of shares or initial investment. ## initial investment p0=1e4 ## initial shares: n0=np.floor(p0/stock.Close[stock.index==div.index[0]])#n0=10.0 ## number of shares grows with time, can be fractional. ## just use the paid date for when these get added to the principal. #start_date= n=np.zeros((len(div)) n[0]=n0 for i in range(len(div)-1):