def groupholtf(groupdf, h=12, alpha=hw.hw_alpha, beta=hw.hw_beta): '''Holt-Winters forecasts h-periods ahead from group dataframe.''' forecasts = [] keys = list(groupdf.columns) for k in keys: kdf = tool.todf(groupdf[k]) holtdf = hw.holt(kdf, alpha, beta) forecastdf = hw.holtforecast(holtdf, h) forecasts.append(forecastdf) keysdf = tool.paste(forecasts) keysdf.columns = keys return keysdf
def rates2prices(ratarr, mean=0 / 256., initialpx=0): '''Convert array containing rates of return to DataFrame of prices.''' # Initial price, initialpx, if not set, implicitly starts at 1. # REMEMBER to adjust annualized mean to match data frequency. ret = (1 + mean) + ratarr # Price history is just the products of the returns ret. # Cumulative product, cumprod(), of array elements is very fast. # http://docs.scipy.org/doc/numpy/reference/generated/numpy.cumprod.html productret = np.cumprod(ret) if initialpx: prices = initialpx * productret else: prices = productret return todf(prices)
def smallsample_gmr(N, poparr, yearly=256, repeat=100, inprice=1.0, replace=True): '''Demo small sample statistics: repeat geometric mean rates.''' ssarr = np.ones((repeat, )) # small sample array to fill-in. for i in range(repeat): prices = bsret2prices(N, poparr, inprice=inprice, replace=replace) out = gemrat(prices, yearly=yearly, pc=False) ssarr[i] = out[0] # For user's convenience, we convert array to DataFrame format: return tool.todf(ssarr)
def holt(data, alpha=hw_alpha, beta=hw_beta): '''Holt-Winters growth (linear) model outputs workout dataframe.''' # holt is an EXPENSIVE function, so retain its output for later. holtdf = todf(data).dropna() # 'Y' ^else: # "ValueError: Length of values does not match length of index" y = holtdf.values # Convert to array. l, b = holt_winters_growth(y, alpha, beta) holtdf['Level'] = l holtdf['Growth'] = b # In effect, additional columns 'Level' and 'Growth' # for smoothed data and local slope, # along side the original index and given data: return holtdf
def groupgeoret(groupdf, yearly=256, order=True): '''Geometric mean returns, non-overlapping, for group dataframe. Argument "yearly" refers to annual frequency, e.g. 256 for daily trading days, 12 for monthly, 4 for quarterly. ATTN: Use groupgemrat() instead for greater accuracy. ''' keys = list(groupdf.columns) # Use list comprehension to store lists from georet(): geo = [tool.georet(tool.todf(groupdf[k]), yearly) + [k] for k in keys] # where each georet list gets appended with an identifying key. if order: geo.sort(reverse=True) # Group is ordered in-place with respect to decreasing georet. return geo
def getfut(slang, maxi=512, col='Settle'): '''slang string retrieves single column for one futures contract. The string consists of a key from fut_dict concatenated with 'yym' where yy is shorthand for year and m is the month symbol all in lower case, e.g. 'f4xau15z' for December 2015 Comex Gold. Available col are: Open, High, Low, Last, Change, Settle, Volume, 'Open Interest' ''' # Other than Eurodollars, we should not need more than 512 days # of data due to finite life of a futures contract. # 2015-09-11 quandl default seems to be maxi around 380. # fut = _qget(fut_decode(slang), rows=maxi) # return just a single column dataframe: return tool.todf(fut[[col]])
def smallsample_loss(N, poparr, yearly=256, repeat=100, level=0.90, inprice=1.0, replace=True): '''Demo small sample statistics: probability of loss: price < level. Relative to investment at initial price, inprice. ''' ssarr = np.ones((repeat, )) # small sample array to fill-in. for i in range(repeat): prices = bsret2prices(N, poparr, inprice=inprice, replace=replace) count = prices[prices < level].dropna().shape[0] prob = count / float(N) ssarr[i] = prob # For user's convenience, we convert array to DataFrame format: return tool.todf(ssarr)
def groupgemrat(groupdf, yearly=256, order=False, n=2): '''Geometric mean rates, non-overlapping, for group dataframe. Argument "yearly" refers to annual frequency, e.g. 256 for daily trading days, 12 for monthly, 4 for quarterly. Output is rounded to n-decimal places. Algorithm takes KURTOSIS into account for greater accuracy. ''' keys = list(groupdf.columns) # Use list comprehension to store lists from gemrat(): gem = [ tool.roundit(gemrat(tool.todf(groupdf[k]), yearly), n, echo=False) + [k] for k in keys ] # ^each gemrat list gets appended with an identifying key. if order: gem.sort(reverse=True) # Group is ordered in-place with respect to decreasing gemrat. return gem
def plotn(data, title='tmp'): '''Plot list, array, Series, or DataFrame where the index is numbered.''' # With todf: list, array, or Series will be converted to DataFrame. dataframe = tool.todf(data) # ^todf must dropna(), # otherwise index of last point plotted may be wrong. fig, ax = plt.subplots() # ax.xaxis_date() # # ^interpret x-axis values as dates. plt.xticks(rotation='vertical') # show x labels vertically. # ax.plot(dataframe.index, dataframe, 'b-') # ^x ^y blue line # k is black. ax.set_title(title + ' / last ' + str(dataframe.index[-1])) # ^index on last data point plt.grid(True) return [title, fig]
def cotr_position(futures='GC'): '''Extract market position from CFTC Commitment of Traders Report.''' cotr = cotr_get(futures) # Report for both futures and options requested by implicit "FO". # # For directionality we use these categories: try: longs = cotr['Asset Manager Longs'] shorts = cotr['Asset Manager Shorts'] # "Leveraged Funds" for FINANCIALS appear short-term, whereas # "Asset Manager" takes longer term perspective. except Exception: longs = cotr['Money Manager Longs'] shorts = cotr['Money Manager Shorts'] # "Money Manager" for COMMODITIES. # The report is structured differently than financials. # # _Scale-free between 0 and 1 indicating bullishness. return tool.todf(longs / (longs + shorts))
def test_qdl_fecon236_Check_xbt_prices_vSlow_oLocal(): '''Check on xbt prices on various dates, only Local.''' # Download Bitcoin prices from Quandl: xbt = qdl.getqdl(qdl.d7xbtusd) xbt = tool.todf(xbt, 'XBT') # todf used to rename column. assert abs(tool.tailvalue(xbt[:'2014-02-01']) - 815.99) < 0.1 assert abs(tool.tailvalue(xbt[:'2015-02-01']) - 220.72) < 0.1 assert abs(tool.tailvalue(xbt[:'2016-02-01']) - 376.86) < 0.1 # # Q: Why only Local, i.e. oLocal? # # Without revealing one's private authtoken.p, # all Travis CI machines sharing an IP address # will be considered one anonymous user, thus # the limited calls to Quandl will be jointly exceeded, # resulting in "HTTP Error 429: Too Many Requests". # And the Travis job will fail for Quandl's server policy # reasons, not the validity of the code. return
def plotdf(dataframe, title='tmp'): '''Plot dataframe where its index are dates.''' dataframe = tool.todf(dataframe) # ^todf must dropna(), # otherwise index of last point plotted may be wrong. # Also helps if dataframe resulted from synthetic operations, # or if a Series was incorrectly submitted as Dataframe. fig, ax = plt.subplots() ax.xaxis_date() # ^interpret x-axis values as dates. plt.xticks(rotation='vertical') # show x labels vertically. ax.plot(dataframe.index, dataframe, 'b-') # ^x ^y blue line # k is black. ax.set_title(title + ' / last ' + str(dataframe.index[-1])) # ^timestamp of last data point plt.grid(True) return [title, fig]
# 2013-04-16,1380.0 # 2013-04-17,1392.0 # 2013-04-18,1393.75 def test_fred_fecon236_Read_CSV_file(): '''Read CSV file then check values.''' df = fred.readfile('tests' + sep + 'zdata-xau-13hj-c30.csv') # readfile disregards XAU column name: assert [col for col in df.columns] == ['Y'] assert df.shape == (30, 1) return df xau = test_fred_fecon236_Read_CSV_file() xau = tool.todf(xau, 'XAU') # todf used to rename column. def test_fred_fecon236_check_xau_DataFrame(): '''Check xau dataframe.''' assert [col for col in xau.columns] == ['XAU'] assert tool.tailvalue(xau) == 1393.75 def test_fred_fecon236_check_xau_frequency(): '''Check xau dataframe frequency.''' assert fred.index_delta_secs(xau) == 86400.0 # Expect min daily frequency in seconds.
# 2013-04-16,1380.0 # 2013-04-17,1392.0 # 2013-04-18,1393.75 def test_tool_fecon236_Read_CSV_file(): '''Read CSV file then check values.''' df = fred.readfile('tests' + sep + 'zdata-xau-13hj-c30.csv') # readfile disregards XAU column name: assert [col for col in df.columns] == ['Y'] assert df.shape == (30, 1) return df xau = test_tool_fecon236_Read_CSV_file() xau = tool.todf(xau, 'XAU') # todf used to rename column. def test_tool_fecon236_check_xau_DataFrame(): '''Check xau dataframe.''' assert [col for col in xau.columns] == ['XAU'] assert tool.tailvalue(xau) == 1393.75 foo = tool.todf(xau + 5000.00, 'FOO') def test_tool_fecon236_check_foo_DataFrame(): '''Check foo dataframe which is just xau + 5000.00 increase.''' assert [col for col in foo.columns] == ['FOO']
def holtgrow(data, alpha=hw_alpha, beta=hw_beta): '''Just the Growth dataframe from Holt-Winters growth model.''' # In terms of units expressed in data. return todf(holt(data, alpha, beta)['Growth'])
def holtlevel(data, alpha=hw_alpha, beta=hw_beta): '''Just smoothed Level dataframe from Holt-Winters growth model.''' # Useful to filter out seasonals, e.g. see X-11 method: # http://www.sa-elearning.eu/basic-algorithm-x-11 return todf(holt(data, alpha, beta)['Level'])