def volatility( self, n, freq=None, which="close", ann=True, model="ln", min_periods=1, rolling="simple", ): """Return the annualized volatility series. N is the number of lookback periods. :param n: int, number of lookback periods :param freq: resample frequency or None :param which: price series to use :param ann: If True then annualize :param model: {'ln', 'pct', 'bbg'} ln - use logarithmic price changes pct - use pct price changes bbg - use logarithmic price changes but Bloomberg uses actual business days :param rolling:{'simple', 'exp'}, if exp, use ewmstd. if simple, use rolling_std :return: """ if model not in ("bbg", "ln", "pct"): raise ValueError("model must be one of (bbg, ln, pct), not %s" % model) if rolling not in ("simple", "exp"): raise ValueError("rolling must be one of (simple, exp), not %s" % rolling) px = self.frame[which] px = px if not freq else px.resample(freq, how="last") if model == "bbg" and periods_in_year(px) == 252: # Bloomberg uses business days, so need to convert and reindex orig = px.index px = px.resample("B").ffill() chg = np.log(px / px.shift(1)) chg[chg.index - orig] = np.nan if rolling == "simple": vol = pd.rolling_std(chg, n, min_periods=min_periods).reindex(orig) else: vol = pd.ewmstd(chg, span=n, min_periods=n) return vol if not ann else vol * np.sqrt(260) else: chg = px.pct_change() if model == "pct" else np.log(px / px.shift(1)) if rolling == "simple": vol = pd.rolling_std(chg, n, min_periods=min_periods) else: vol = pd.ewmstd(chg, span=n, min_periods=n) return vol if not ann else vol * np.sqrt(periods_in_year(vol))
def zscore_ranked(div, library): lookback = 5 markets = 3 data = pd.DataFrame() for mkt in library.list_symbols(): try: data[mkt] = library.read(mkt).data.Price except: print mkt zscores = (data - pd.ewma(data, 20)) / pd.ewmstd(data, 20) latest = zscores.tail(lookback) zscore_ranked = latest.T.sort_values( by=latest.T.columns[0]).dropna()[:markets] zscore_ranked = zscore_ranked.append( latest.T.sort_values(by=latest.T.columns[0]).dropna()[-markets:]) final_data = pd.DataFrame() i = 1 for d in zscore_ranked.columns: final_data['T+' + str(i)] = zscore_ranked[d] i = i + 1 return serialize(final_data, render_to=div, kind="bar", title="Noteable market moves", output_type='json')
def robust_vol_calc(x, days=35, min_periods=10, vol_abs_min=0.0000000001, vol_floor=True, floor_min_quant=0.05, floor_min_periods=100, floor_days=500): # Standard deviation will be nan for first 10 non nan values vol = pd.ewmstd(x, span=days, min_periods=min_periods) vol[vol < vol_abs_min] = vol_abs_min if vol_floor: # Find the rolling 5% quantile point to set as a minimum vol_min = pd.rolling_quantile(vol, floor_days, floor_min_quant, floor_min_periods) # set this to zero for the first value then propogate forward, ensures # we always have a value vol_min.set_value(vol_min.index[0], 0.0) vol_min = vol_min.ffill() # apply the vol floor vol_with_min = pd.concat([vol, vol_min], axis=1) vol_floored = vol_with_min.max(axis=1, skipna=False) else: vol_floored = vol return vol_floored
def vol_estimator(x, using_exponent=True, min_periods=20, ew_lookback=250): """ Generic vol estimator used for optimisation, works on data frames, produces a single answer :param x: data :type x: Tx1 pd.DataFrame :param using_exponent: Use exponential or normal vol (latter recommended for bootstrapping) :type using_exponent: bool :param min_periods: The minimum number of observations (*default* 10) :type min_periods: int :returns: pd.DataFrame -- volatility measure """ if using_exponent: vol = pd.ewmstd(x, span=ew_lookback, min_periods=min_periods).iloc[-1, :].values[0] else: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) vol = x.apply(apply_with_min_periods, axis=0, min_periods=min_periods, my_func=np.nanstd) stdev_list = list(vol) return stdev_list
def vol_estimator(x, using_exponent=True, min_periods=20, ew_lookback=250): """ Generic vol estimator used for optimisation, works on data frames, produces a single answer :param x: data :type x: Tx1 pd.DataFrame :param using_exponent: Use exponential or normal vol (latter recommended for bootstrapping) :type using_exponent: bool :param min_periods: The minimum number of observations (*default* 10) :type min_periods: int :returns: pd.DataFrame -- volatility measure """ if using_exponent: vol = pd.ewmstd(x, span=ew_lookback, min_periods=min_periods).iloc[-1,:].values[0] else: vol=x.apply(apply_with_min_periods,axis=0,min_periods=min_periods, my_func=np.nanstd) stdev_list=list(vol) return stdev_list
def robust_vol_calc(x, days=35, min_periods=10, vol_abs_min=0.0000000001, vol_floor=True, floor_min_quant=0.05, floor_min_periods=100, floor_days=500): # Standard deviation will be nan for first 10 non nan values vol = pd.ewmstd(x, span=days, min_periods=min_periods) vol[vol < vol_abs_min] = vol_abs_min if vol_floor: # Find the rolling 5% quantile point to set as a minimum vol_min = pd.rolling_quantile( vol, floor_days, floor_min_quant, floor_min_periods) # set this to zero for the first value then propogate forward, ensures # we always have a value vol_min.set_value(vol_min.index[0], 0.0) vol_min = vol_min.ffill() # apply the vol floor vol_with_min = pd.concat([vol, vol_min], axis=1) vol_floored = vol_with_min.max(axis=1, skipna=False) else: vol_floored = vol return vol_floored
def portfolio_return(asset_returns, cash_weights): index_returns = asset_returns.cumsum().ffill().diff() cash_align = cash_weights.reindex(asset_returns.index, method="ffill") cash_align[np.isnan(index_returns)] = 0.0 cash_align[np.isnan(cash_align)] = 0.0 vols = pd.ewmstd(asset_returns, span=100, min_periods=1) riskweights = pd.DataFrame(cash_align.values / vols.values, index=vols.index) riskweights.columns = asset_returns.columns riskweights[np.isnan(riskweights)] = 0.0 def _rowfix(x): if all([y == 0.0 for y in x]): return x sumx = sum(x) return [y / sumx for y in x] riskweights = riskweights.apply(_rowfix, axis=1) portfolio_returns = asset_returns * riskweights portfolio_returns[np.isnan(portfolio_returns)] = 0.0 portfolio_returns = portfolio_returns.sum(axis=1) return portfolio_returns
def standardize(data): log_return = np.log(data).diff() std = pd.ewmstd(log_return, 10) ewma = pd.ewma(log_return, 10) data_standardized = 1 / (1 + np.exp((log_return - ewma) / std)) return data_standardized
def macro_factors(div, library): factors = { 'Risk on': ['Russell 2000', 'DAX'], 'Quantitative Easing': ['Gold', 'German Bund', 'Gilts', 'US Treasuries 10 Yr'], 'Emerging Markets': ['Copper', 'MXN', 'BRL', 'Ibovespa', 'Taiwan (SIMEX)'], 'EU': ['DAX', 'FTSE 100', 'German Bund', 'Italian 10 year bonds'], 'Energies': ['Crude', 'Rotterdam Coal', 'Natural Gas'], 'Industrials': ['Copper', 'Rotterdam Coal', 'Crude', 'Shanghai Rebar'] } factor_data = pd.DataFrame() for f in factors.keys(): df = pd.DataFrame() for m in factors[f]: try: df[m] = library.read(m).data.Price.replace(to_replace=0, method='ffill') except: print m factor_data[f] = df.resample( rule='d', how='last').dropna(how='all').pct_change().mean(axis=1) lookback = 5 zscores = (factor_data.cumsum() - pd.ewma( factor_data.cumsum(), 60)) / pd.ewmstd(factor_data.cumsum(), 60) y = zscores.tail(1).T.columns[0].year return serialize(zscores[str(y)].ffill(), render_to=div, title='Factors', output_type='json')
def robust_vol_calc(x, days=35, min_periods=10, vol_abs_min=0.0000000001, vol_floor=True, floor_min_quant=0.05, floor_min_periods=100, floor_days=500): """ Robust exponential volatility calculation, assuming daily series of prices We apply an absolute minimum level of vol (absmin); and a volfloor based on lowest vol over recent history :param x: data :type x: Tx1 pd.Series :param days: Number of days in lookback (*default* 35) :type days: int :param min_periods: The minimum number of observations (*default* 10) :type min_periods: int :param vol_abs_min: The size of absolute minimum (*default* =0.0000000001) 0.0= not used :type absmin: float or None :param vol_floor Apply a floor to volatility (*default* True) :type vol_floor: bool :param floor_min_quant: The quantile to use for volatility floor (eg 0.05 means we use 5% vol) (*default 0.05) :type floor_min_quant: float :param floor_days: The lookback for calculating volatility floor, in days (*default* 500) :type floor_days: int :param floor_min_periods: Minimum observations for floor - until reached floor is zero (*default* 100) :type floor_min_periods: int :returns: pd.DataFrame -- volatility measure """ # Standard deviation will be nan for first 10 non nan values vol = pd.ewmstd(x, span=days, min_periods=min_periods) vol[vol < vol_abs_min] = vol_abs_min if vol_floor: # Find the rolling 5% quantile point to set as a minimum vol_min = pd.rolling_quantile(vol, floor_days, floor_min_quant, floor_min_periods) # set this to zero for the first value then propogate forward, ensures # we always have a value vol_min.set_value(vol_min.index[0], 0.0) vol_min = vol_min.ffill() # apply the vol floor vol_with_min = pd.concat([vol, vol_min], axis=1) vol_floored = vol_with_min.max(axis=1, skipna=False) else: vol_floored = vol return vol_floored
def tsmom_improved(data, months): vol = pd.ewmstd(data.pct_change(), 500) * math.sqrt(12) data = data.resample(rule='m', how='last') signal = data / data.shift(months) - 1 signal = signal / abs(signal) position = signal / vol return position
def expected_ewmstd(self, window_length, decay_rate): alpha = 1 - decay_rate span = (2 / alpha) - 1 return rolling_apply( self.raw_data, window_length, lambda window: ewmstd(window, span=span)[-1], )[window_length:]
def tsmom_daily(data, signal_lookback, vol_lookback=20): mul = get_contract_multipliers()[data.columns] vol = pd.ewmstd(data, vol_lookback, min_periods=vol_lookback) * math.sqrt(256) signal = pd.rolling_mean(data, signal_lookback) signal = signal / abs(signal) position = (signal / (vol * mul)) return position.shift(1)
def ts(df, panel): hl = TS_HALFLIFE min_per = 12 if panel: hl = hl * df.index.levels[1].shape[0] min_per = min_per * df.index.levels[1].shape[0] m = pd.ewma(df, halflife=hl, min_periods=min_per) std = pd.ewmstd(df, halflife=hl, min_periods=min_per) return (df - m) / std
def calc_std(returns): downside_only = False if (downside_only): returns = returns.copy() returns[returns > 0.0] = np.nan b = pd.ewmstd( returns, halflife=20, adjust=True, ignore_na=True).dropna() #halflife = 20 four week half life - mid-term return b.iloc[-1]
def devol(self, _lambda=0.06, n_days=1): _com = (1 - _lambda) / _lambda self.df['LogReturns'] = np.log( self.df.Close.pct_change(periods=n_days) + 1) self.df['Vola'] = pd.ewmstd(self.df.LogReturns, com=_com, ignore_na=True)[2:] self.df['DevolLogReturns'] = self.df.LogReturns / self.df.Vola self.df.set_index('Date', inplace=True)
def trend(self, tags, top_n=None, other=False, resample='D', cumulative=False, ewmaspan=None): """ show the supplied tags summed up per day """ if top_n is not None: tags = self.top_n_tags(top_n, tags) D = self.D[tags] if tags is not None else self.D if other: D['other'] = self.D[[t for t in self.D.keys() if t not in tags]].sum(axis=1) D = D.resample(resample, how='sum', label='left') self._obfuscate(D) D = D.fillna(0) if ewmaspan is not None: ewma = pd.ewma(D, span=ewmaspan) ewmstd = pd.ewmstd(D, span=2 * ewmaspan) if cumulative: ewmstd = ewmstd * 3 ewma = ewma.cumsum() if cumulative: D = D.cumsum() alpha = 0.5 if not cumulative and ewmaspan is not None else 1 ax = D.plot(linewidth=2, colormap=self.cmapname, legend=False, alpha=alpha) if ewmaspan is not None: colors = self.cmap(np.linspace(0., 1., len(D.keys()))) if cumulative: for idx, k in enumerate(tags): ax.fill_between(D.index, np.array(ewma[k] + ewmstd[k]).ravel(), np.array(ewma[k] - ewmstd[k]).ravel(), facecolor=colors[idx], alpha=0.2, linewidth=1) ewma.plot(style='--', legend=False, ax=ax, colormap=self.cmapname, linewidth=2) ax.legend(ax.lines[:len(D.keys())], map(lambda x: x.get_label(), ax.lines[:len(D.keys())]), loc='best') ax.grid(True) ax.set_ylim(0, D.max().max()) if cumulative: plt.ylabel('Time Spent (h)') else: plt.ylabel('Time Spent (h) per Interval (%s)' % resample) plt.xlabel('Interval ID')
def robust_vol_calc(x, days=35, min_periods=10, vol_abs_min=0.0000000001, vol_floor=True, floor_min_quant=0.05, floor_min_periods=100, floor_days=500): """ Robust exponential volatility calculation, assuming daily series of prices We apply an absolute minimum level of vol (absmin); and a volfloor based on lowest vol over recent history :param x: data :type x: Tx1 pd.DataFrame :param days: Number of days in lookback (*default* 35) :type days: int :param min_periods: The minimum number of observations (*default* 10) :type min_periods: int :param vol_abs_min: The size of absolute minimum (*default* =0.0000000001) 0.0= not used :type absmin: float or None :param vol_floor Apply a floor to volatility (*default* True) :type vol_floor: bool :param floor_min_quant: The quantile to use for volatility floor (eg 0.05 means we use 5% vol) (*default 0.05) :type floor_min_quant: float :param floor_days: The lookback for calculating volatility floor, in days (*default* 500) :type floor_days: int :param floor_min_periods: Minimum observations for floor - until reached floor is zero (*default* 100) :type floor_min_periods: int :returns: pd.DataFrame -- volatility measure """ # Standard deviation will be nan for first 10 non nan values vol = pd.ewmstd(x, span=days, min_periods=min_periods) vol[vol < vol_abs_min] = vol_abs_min if vol_floor: # Find the rolling 5% quantile point to set as a minimum vol_min = pd.rolling_quantile( vol, floor_days, floor_min_quant, floor_min_periods) # set this to zero for the first value then propogate forward, ensures # we always have a value vol_min.set_value(vol_min.index[0], vol_min.columns[0], 0.0) vol_min = vol_min.ffill() # apply the vol floor vol_with_min = pd.concat([vol, vol_min], axis=1) vol_floored = vol_with_min.max(axis=1, skipna=False).to_frame() else: vol_floored = vol vol_floored.columns = ["vol"] return vol_floored
def __init__( self, prices, volFunc = lambda returns: pd.ewmstd( returns, span = 252 ), name = 'Empirical' ): self.name = name self.dates = prices.index self.returns = prices.pct_change() self.vol = volFunc( self.returns ) self.volDyad = dict( ( today, self.dyad( self.vol.ix[ today.date() ] ) ) for today in self.dates ) self.empCov = dict( ( today, self.returns[ 1:today.date() ].corr().as_matrix() * self.volDyad[ today ] ) for today in self.dates[1:] )
def ewma_mom_daily(data, short_lookback, long_lookback, vol_lookback=20): mkts = data.columns mul = get_contract_multipliers()[mkts] vol = pd.ewmstd(data, vol_lookback, min_periods=vol_lookback) * math.sqrt(256) signal = signal = pd.ewma(data, short_lookback) - pd.ewma( data, long_lookback) # Rolling z secore using longer lookback zscore = calc_zscore(signal, long_lookback) position = (zscore / (vol * mul)) return position.shift(1)
def get_raw_value(self): rtns = wind.get_wind_data("AShareEODPrices", "s_dq_pctchange").loc["2005-01-01":] / 100 beta = Descriptor.Beta().get_raw_value() resid = {} common_index = sorted(set(rtns.index) & set(beta.index)) R = get_estimation_universe().get_returns() for idx in common_index: row = rtns.loc[idx] resid[idx] = row - beta.loc[idx] * R.loc[idx] resid = pd.DataFrame(resid).T sigma = pd.ewmstd(resid, halflife=self.T) return sigma
def ewma_mom_daily_signal(data, short_lookback, long_lookback, vol_lookback=20): vol = pd.ewmstd(data, vol_lookback, min_periods=vol_lookback) * math.sqrt(256) signal = signal = pd.ewma(data, short_lookback) - pd.ewma( data, long_lookback) # Rolling z secore using longer lookback zscore = calc_zscore(signal, long_lookback) position = (zscore / (vol)) return position.shift(1)
def volatility(self, n, freq=None, which='close', ann=True, model='ln', min_periods=1, rolling='simple'): """Return the annualized volatility series. N is the number of lookback periods. :param n: int, number of lookback periods :param freq: resample frequency or None :param which: price series to use :param ann: If True then annualize :param model: {'ln', 'pct', 'bbg'} ln - use logarithmic price changes pct - use pct price changes bbg - use logarithmic price changes but Bloomberg uses actual business days :param rolling:{'simple', 'exp'}, if exp, use ewmstd. if simple, use rolling_std :return: """ if model not in ('bbg', 'ln', 'pct'): raise ValueError('model must be one of (bbg, ln, pct), not %s' % model) if rolling not in ('simple', 'exp'): raise ValueError('rolling must be one of (simple, exp), not %s' % rolling) px = self.frame[which] px = px if not freq else px.resample(freq, how='last') if model == 'bbg' and periods_in_year(px) == 252: # Bloomberg uses business days, so need to convert and reindex orig = px.index px = px.resample('B').ffill() chg = np.log(px / px.shift(1)) chg[chg.index - orig] = np.nan if rolling == 'simple': vol = pd.rolling_std(chg, n, min_periods=min_periods).reindex(orig) else: vol = pd.ewmstd(chg, span=n, min_periods=n) return vol if not ann else vol * np.sqrt(260) else: chg = px.pct_change() if model == 'pct' else np.log(px / px.shift(1)) if rolling == 'simple': vol = pd.rolling_std(chg, n, min_periods=min_periods) else: vol = pd.ewmstd(chg, span=n, min_periods=n) return vol if not ann else vol * np.sqrt(periods_in_year(vol))
def robust_vol_calc(x, days=35, min_periods=10, vol_abs_min=0.0000000001, vol_floor=True, floor_min_quant=0.05, floor_min_periods=100, floor_days=500): vol = pd.ewmstd(x, span=days, min_periods=min_periods) vol[vol < vol_abs_min] = vol_abs_min if vol_floor: vol_min = pd.rolling_quantile( vol, floor_days, floor_min_quant, floor_min_periods) vol_min.set_value(vol_min.index[0], 0.0) vol_min = vol_min.ffill() vol_with_min = pd.concat([vol, vol_min], axis=1) vol_floored = vol_with_min.max(axis=1, skipna=False) else: vol_floored = vol return vol_floored
def trend(self, tags, top_n=None, other=False, resample='D', cumulative=False, ewmaspan=None): """ show the supplied tags summed up per day """ if top_n is not None: tags = self.top_n_tags(top_n, tags) D = self.D[tags] if tags is not None else self.D if other: D['other'] = self.D[[t for t in self.D.keys() if t not in tags]].sum(axis=1) D = D.resample(resample, how='sum', label='left') self._obfuscate(D) D = D.fillna(0) if ewmaspan is not None: ewma = pd.ewma(D, span=ewmaspan) ewmstd = pd.ewmstd(D, span=2 * ewmaspan) if cumulative: ewmstd = ewmstd * 3 ewma = ewma.cumsum() if cumulative: D = D.cumsum() alpha = 0.5 if not cumulative and ewmaspan is not None else 1 ax = D.plot(linewidth=2, colormap=self.cmapname, legend=False, alpha=alpha) if ewmaspan is not None: colors = self.cmap(np.linspace(0., 1., len(D.keys()))) if cumulative: for idx, k in enumerate(tags): ax.fill_between(D.index, np.array(ewma[k] + ewmstd[k]).ravel(), np.array(ewma[k] - ewmstd[k]).ravel(), facecolor=colors[idx], alpha=0.2, linewidth=1) ewma.plot(style='--', legend=False, ax=ax, colormap=self.cmapname, linewidth=2) ax.legend(ax.lines[:len(D.keys())], map(lambda x:x.get_label(), ax.lines[:len(D.keys())]), loc='best') ax.grid(True) ax.set_ylim(0, D.max().max()) if cumulative: plt.ylabel('Time Spent (h)') else: plt.ylabel('Time Spent (h) per Interval (%s)' % resample) plt.xlabel('Interval ID')
def reversal_pl(btc_ret, long_only=True): model = pd.ols(y=btc_ret, x=btc_ret.shift(1), window=12 * 24 * 5, window_type='rolling') betas = model.beta['x'] signal = timing_curve(betas * zscore(btc_ret)) sigma = np.sqrt(365 * 24 * 12) * pd.ewmstd( btc_ret, com=12 * 24 * 5, min_periods=12 * 24 * 5) if long_only: view = (.3 * (signal) / sigma).clip(0, 9999999) else: view = (.3 * (signal) / sigma) t_cost = (view.diff()).abs() * .005 pl = view.shift(1) * btc_ret net_pl = view.shift(1) * (btc_ret) - t_cost turnover = 365 * 12 * 24 * view.diff().abs().mean() print calc_sharpe(pl) print calc_sharpe(net_pl) print turnover return pd.DataFrame({'view': view, 'pl': pl, 'net_pl': net_pl})
def emstd(data, window): """Exponential Moving Standard Deviation: The exponentially weighted standard deviation of the price of a security over a specific number of periods. 'data' is a pandas Series or DataFrame of prices. A ValueError is raised if 'data' is of different data type. 'window' is the number of observations. It must be a positive integer less than or equal to the length of the data. Otherwise a ValueError will be raised. """ # todo: maybe add 'long' too? if not isinstance(window, int) or not 0 < window <= len(data): raise ValueError("'window' must be an integer " + "between 1 and %d." % len(data)) if not isinstance(data, (pd.Series, pd.DataFrame)): raise ValueError("'data' must be a pandas Series or DataFrame.") return pd.ewmstd(data, span = window)
def emstd(data, window): """Exponential Moving Standard Deviation: The exponentially weighted standard deviation of the price of a security over a specific number of periods. 'data' is a pandas Series or DataFrame of prices. A ValueError is raised if 'data' is of different data type. 'window' is the number of observations. It must be a positive integer less than or equal to the length of the data. Otherwise a ValueError will be raised. """ # todo: maybe add 'long' too? if not isinstance(window, int) or not 0 < window <= len(data): raise ValueError("'window' must be an integer " + "between 1 and %d." % len(data)) if not isinstance(data, (pd.Series, pd.DataFrame)): raise ValueError("'data' must be a pandas Series or DataFrame.") return pd.ewmstd(data, span=window)
plt.show() #d1=pd.datetime(2007,1,1) #d2=pd.datetime(2009,12,31) nerpu=data.apply(find_datediff, axis=1) nerpu.plot() plt.title("Nerpu") plt.show() ## Shouldn't need changing vol_lookback=25 stdev_returns=pd.ewmstd(price - price.shift(1), span=vol_lookback) ann_stdev=stdev_returns*ROOT_DAYS_IN_YEAR raw_carry=nerpu/ann_stdev f_scalar=30.0 raw_carry.plot() plt.title("Raw carry") plt.show() forecast=raw_carry*f_scalar c_forecast=cap_series(forecast) data_to_plot=pd.concat([forecast,c_forecast], axis=1) data_to_plot.columns=['Forecast','Capped forecast']
def calc_zscore(df, mean_halflife=21, mean_seed_period=21, std_halflife=21, std_seed_period=21, smth_halflife=0, ewm=True, subtract_mean=True, cap=3.0, lag=0): """ Calculate timeseries z-score (assuming normal distribution of input data) Parameters ---------- df : DataFrame or Series DataFrame or Series object containing timeseries data mean_halflife : int, optional Half-life period (periodicity determined by index of df) for computing mean mean_seed_period : int, optional Seeding period (periodicity determined by index of df) for computing mean std_halflife : int, optional Half-life period (periodicity determined by index of df) for computing standard deviation std_seed_period : int, optional Seeding period (periodicity determined by index of df) for computing standard deviation smth_halflife : int, optional Smoothing half-life period (periodicity determined by index of df) for smoothing input data before computing z-score ewm : bool, optional If True, compute z-score based on ewm mean and standard deviation. If False, compute z-score based on simple mean and standard deviation. subtract_mean : bool, optional If True, subtract mean while computing z-score. If False, normalize the value by dividing by standard deviation. cap : float, optional Absolute cap for z-score lag : int, optional Periods (periodicity determined by index of df) by which to lag the z-score Returns ------- score_df : DataFrame or Series DataFrame or Series object containing z-score """ is_series = False if isinstance(df, pd.Series): df = pd.DataFrame(df) is_series = True elif not isinstance(df, pd.DataFrame): raise ValueError('df should be either a DataFrame or Series object') if mean_halflife < 0: raise ValueError('%d is not a valid mean half-life' % mean_halflife) if mean_halflife > df.shape[0]: raise ValueError('mean_halflife can not be larger than length of index of df') if mean_seed_period < 0: raise ValueError('%d is not a valid mean seed period' % mean_seed_period) if mean_seed_period > df.shape[0]: raise ValueError('mean_seed_period can not be larger than length of index of df') if std_halflife < 0: raise ValueError('%d is not a valid standard deviation half-life' % std_halflife) if std_halflife > df.shape[0]: raise ValueError('std_halflife can not be larger than length of index of df') if std_seed_period < 0: raise ValueError('%d is not a valid standard deviation seed period' % std_seed_period) if std_seed_period > df.shape[0]: raise ValueError('std_seed_period can not be larger than length of index of df') if smth_halflife < 0: raise ValueError('%d is not a valid smoothing half-life' % smth_halflife) if smth_halflife > df.shape[0]: raise ValueError('smth_halflife can not be larger than length of index of df') if not isinstance(ewm, bool): raise ValueError('ewm should be either True of False') if not isinstance(subtract_mean, bool): raise ValueError('subtract_mean should be either True of False') if cap <= 0: raise ValueError('%f is not a valid score cap' % cap) if lag < 0: raise ValueError('%d is not a valid lag period' % lag) if lag > df.shape[0]: raise ValueError('lag can not be larger than length of index of df') # apply smoothing if smth_halflife > 0: df = pd.ewma(df, halflife=smth_halflife, min_periods=smth_halflife, adjust=False) # compute mean and standard deviation if ewm: mean_df = pd.ewma(df, halflife=mean_halflife, min_periods=mean_seed_period, adjust=False) std_df = pd.ewmstd(df, halflife=std_halflife, min_periods=std_seed_period, adjust=False) else: mean_df = pd.rolling_mean(df, window=mean_halflife, min_periods=mean_seed_period) std_df = pd.rolling_std(df, window=std_halflife, min_periods=std_seed_period) # compute score if subtract_mean: score_df = (df - mean_df) / std_df else: score_df = df / std_df # cap score score_df = score_df.clip(-cap, cap) # lag score if lag > 0: score_df = score_df.shift(lag) if is_series: return pd.Series(score_df.squeeze()) else: return score_df
def devol(self, _lambda=0.06, n_days=1): _com = (1 - _lambda) / _lambda self.df['LogReturns'] = np.log(self.df.Close.pct_change(periods=n_days) + 1) self.df['Vola'] = pd.ewmstd( self.df.LogReturns, com=_com, ignore_na=True)[2:] self.df['DevolLogReturns'] = self.df.LogReturns / self.df.Vola self.df.set_index('Date', inplace=True)
def volatility(price, vol_lookback=25): return pd.ewmstd(price - price.shift(1), span=vol_lookback, min_periods=vol_lookback)
def addEWSDev(self, fromIndex, addedSeriesName = 'ewstdev', win_length = 1): self[addedSeriesName] = pd.ewmstd(self[fromIndex], win_length)
def smoothSeriesEwmvar(self, series, span=5.0, adjust=True, halflife=None, min_periods=0): return pandas.ewmstd( series, com=None, span=span, halflife=halflife, min_periods=min_periods, adjust=adjust, ignore_na=True )
def spread_crossover(data_df,slow=1,fast=12): spread_log = pd.DataFrame(np.log(data_df.ix[:,0] * 100)) data_df['spread_z_ma'] = (spread_log - pd.expanding_mean(spread_log, min_periods=24))/ pd.expanding_std(spread_log, min_periods=24) data_df['spread_z_ema'] = (spread_log - pd.ewma(spread_log, min_periods = 24, halflife=12)) / pd.ewmstd(spread_log, halflife=12) data_df['spread_z_ema'] = pd.rolling_mean(data_df['spread_z_ema'], window=3) data_df['slow'] = pd.rolling_mean(data_df['US HY Spread'],slow) data_df['fast'] = pd.rolling_mean(data_df['US HY Spread'],fast) data_df['diff'] = (data_df['slow'] - data_df['fast']) * -1 data_df['diff'] = data_df['diff'] + 1 data_df['diff'] = np.log(data_df['diff']) data_df['tren_z_ma'] = (data_df['diff'] - pd.expanding_mean(data_df['diff'], min_periods=24))/ pd.expanding_std(data_df['diff'], min_periods=24) data_df['tren_z_ma'] = pd.rolling_mean(data_df['tren_z_ma'], window=3) trend_valuation_df = pd.concat([data_df['spread_z_ema'],data_df['tren_z_ma']], axis=1) trend_valuation_df.dropna(inplace=True) trend_valuation_df.plot() plt.show() algo_wghts_df = pd.DataFrame() wghts_array = [] valuation_threshold_cheap = 1 valuation_threshold_rich = -1.0 trend_threshold_tightening = 0.1 trend_threshold_widening = -0.1 data_df['spread_z_ma'].plot() plt.show() for score in trend_valuation_df.values: valuation_score = score[0] trend_score = score[1] if (trend_score >= -0.2 and valuation_score >= -1): wghts_array.append(min(1,abs(trend_score-valuation_score) / 1)) else: wghts_array.append(0) #elif trend_score <= -0.1 and valuation_score <= valuation_threshold_cheap: # wghts_array.append(-1) #elif valuation_score >= valuation_threshold_cheap: # wghts_array.append(1) #else: # wghts_array.append(0) wghts_df = pd.DataFrame(wghts_array, index = trend_valuation_df.index) long = wghts_df[wghts_df == 1].count()[0] / len(trend_valuation_df) neutral = wghts_df[wghts_df == 0].count()[0] / len(trend_valuation_df) short = wghts_df[wghts_df == -1].count()[0] / len(trend_valuation_df) wghts_df.columns = [data_df.columns.values[1]] wghts_df = wghts_df.shift(1) s1 = bt.Strategy('Valuation & Trend ', [bt.algos.WeighTarget(wghts_df), bt.algos.Rebalance()]) return_data = data_df.ix[:,1].to_frame() return_data.columns = [data_df.columns.values[1]] strategy = bt.Backtest(s1, return_data) res = bt.run(strategy) res.plot(logy=True) res.display() print(long,neutral,short)
"""Now let's get some real data using quandl""" startDate = "2017-01-01" endDate = "2017-12-31" df = qd.get("WIKI/F", start_date=startDate, end_date=endDate) time = np.linspace(1, len(df['Adj. Close']), len(df['Adj. Close'])) returns = pd.Series.diff(df['Adj. Close']) / df['Adj. Close'] # Shift so that we're predicting today's close price from yesterday's returns: returns = returns.shift(-1)[:-1] k = 5 # Rolling average length sigma = pd.ewmstd(returns, span=k) * k # 'diffusion' coefficient mu = pd.ewma(returns, span=k) * k # 'drift' coefficient #del sigma.index.name # New length: N = len(df['Adj. Close']) # Number of sims to run: M = 100 XN = np.zeros([M, N]) # New time steps: dt = 1. / N """ NOT USED """ #xn = np.zeros(N) #xn[k] = df['Adj. Close'][k] #
def parse(date, mode): conn = sqlite3.connect('data/orderbook_' + date + '.db') freq = '5S' cursor = conn.cursor() # resLst=cursor.execute(SELECT_SQL, (exchange,pair)) okex_df = pd.read_sql_query(SELECT_SQL, conn, params=('okex', ), index_col='timestamp') # df = pd.read_sql_table('trades',conn,) okex_df.index = pd.to_datetime(okex_df.index / 1000, unit='s') # print(df['price'].resample('1H').ohlc().tail()) # print(df['amount'].resample('1H').sum().tail()) # print(df['price'].resample('1H').mean().tail()) # okex_mean_serial= okex_df['price'].resample(freq).mean() # okex_mean_serial.name='okex' poloniex_df = pd.read_sql_query(SELECT_SQL, conn, params=('poloniex', ), index_col='timestamp') poloniex_df.index = pd.to_datetime(poloniex_df.index / 1000, unit='s') conn.close() if mode == 1: res_df = pd.concat([okex_df, poloniex_df], axis=1) res_df.apply(exchange, axis=1) print(x) elif mode == 2: profit_ok_sell = 2 * ( okex_df['bid1'] - poloniex_df['ask1'] - (okex_df['bid1'] * 0.002 + poloniex_df['ask1'] * 0.0025)) / ( okex_df['bid1'] + poloniex_df['ask1']) # first=okex_df['bid1']-poloniex_df['ask1'] profit_ok_sell.name = 'ok sell and poloniex buy' profit_ok_buy = 2 * ( poloniex_df['bid1'] - okex_df['ask1'] - (okex_df['ask1'] * 0.002 + poloniex_df['bid1'] * 0.0025)) / ( poloniex_df['bid1'] + okex_df['ask1']) # second=poloniex_df['bid1']-okex_df['ask1'] profit_ok_buy.name = 'poloniex sell and ok buy' res_df = pd.concat([profit_ok_sell, profit_ok_buy], axis=1) # res_df.plot() # plt.figure(); res_df.plot.hist(bins=20, alpha=0.5) # cost_ok_sell=okex_df['bid1']*0.001+poloniex_df['ask1']*0.0015 # cost_ok_sell.name='cost_ok_sell' # cost_ok_buy=okex_df['ask1']*0.001+poloniex_df['bid1']*0.0015 # cost_ok_buy='cost_ok_buy' # # res_df=pd.concat([cosk_ok_sell,cost_ok_buy],axis=1) # res_df=cost_ok_sell/profit_ok_sell # res_df.plot() plt.show() elif mode == 3: profit_ok_sell = okex_df['bid1'] - poloniex_df['ask1'] - ( okex_df['bid1'] * 0.002 + poloniex_df['ask1'] * 0.0025) profit_ok_sell.name = 'ok sell and poloniex buy' profit_ok_buy = poloniex_df['bid1'] - okex_df['ask1'] - ( okex_df['ask1'] * 0.002 + poloniex_df['bid1'] * 0.0025) # second=poloniex_df['bid1']-okex_df['ask1'] profit_ok_buy.name = 'poloniex sell and ok buy' res_df = pd.concat([profit_ok_sell, profit_ok_buy], axis=1) res_df.plot() plt.show() # plt.figure(); # res_df.plot.hist(bins=20,alpha=0.5) elif mode == 4: #EWMA profit_ok_sell = okex_df['bid1'] - poloniex_df['ask1'] profit_ok_sell.name = 'ok sell and poloniex buy' profit_ok_sell.name = 'ok sell and poloniex buy' profit_ok_buy = poloniex_df['bid1'] - okex_df['ask1'] # second=poloniex_df['bid1']-okex_df['ask1'] profit_ok_buy.name = 'poloniex sell and ok buy' span = 20 freq = '1H' ewma = pd.ewma(profit_ok_sell, span=span, freq=freq, adjust=True) ewma.name = 'oloniex buy ewma1' ewmstd = pd.ewmstd(profit_ok_sell, span=span, freq=freq) ewmstd.name = 'oloniex buy ewmstd1' ewma2 = pd.ewma(profit_ok_buy, span=span, freq=freq, adjust=True) ewma2.name = 'ok buy ewma2' ewmstd2 = pd.ewmstd(profit_ok_buy, span=span, freq=freq) ewmstd2.name = 'ok buy ewmstd2' upper = ewma + 1.5 * ewmstd upper.name = 'poloniex buy' lower = ewma2 + 1.5 * ewmstd2 lower.name = 'ok buy thres' res_df = pd.concat([ewma, upper, ewma2, lower], axis=1) res_df.plot() plt.show()
def calc_zscore_ew(df, lookback=24): return (df - pd.ewma(df, lookback, min_periods=12)) / pd.ewmstd( df, lookback, min_periods=12)
def calc_zscore(df, mean_halflife=21, mean_seed_period=21, std_halflife=21, std_seed_period=21, smth_halflife=0, ewm=True, subtract_mean=True, cap=3.0, lag=0): """ Calculate timeseries z-score (assuming normal distribution of input data) Parameters ---------- df : DataFrame or Series DataFrame or Series object containing timeseries data mean_halflife : int, optional Half-life period (periodicity determined by index of df) for computing mean mean_seed_period : int, optional Seeding period (periodicity determined by index of df) for computing mean std_halflife : int, optional Half-life period (periodicity determined by index of df) for computing standard deviation std_seed_period : int, optional Seeding period (periodicity determined by index of df) for computing standard deviation smth_halflife : int, optional Smoothing half-life period (periodicity determined by index of df) for smoothing input data before computing z-score ewm : bool, optional If True, compute z-score based on ewm mean and standard deviation. If False, compute z-score based on simple mean and standard deviation. subtract_mean : bool, optional If True, subtract mean while computing z-score. If False, normalize the value by dividing by standard deviation. cap : float, optional Absolute cap for z-score lag : int, optional Periods (periodicity determined by index of df) by which to lag the z-score Returns ------- score_df : DataFrame or Series DataFrame or Series object containing z-score """ is_series = False if isinstance(df, pd.Series): df = pd.DataFrame(df) is_series = True elif not isinstance(df, pd.DataFrame): raise ValueError('df should be either a DataFrame or Series object') if mean_halflife < 0: raise ValueError('%d is not a valid mean half-life' % mean_halflife) if mean_halflife > df.shape[0]: raise ValueError( 'mean_halflife can not be larger than length of index of df') if mean_seed_period < 0: raise ValueError('%d is not a valid mean seed period' % mean_seed_period) if mean_seed_period > df.shape[0]: raise ValueError( 'mean_seed_period can not be larger than length of index of df') if std_halflife < 0: raise ValueError('%d is not a valid standard deviation half-life' % std_halflife) if std_halflife > df.shape[0]: raise ValueError( 'std_halflife can not be larger than length of index of df') if std_seed_period < 0: raise ValueError('%d is not a valid standard deviation seed period' % std_seed_period) if std_seed_period > df.shape[0]: raise ValueError( 'std_seed_period can not be larger than length of index of df') if smth_halflife < 0: raise ValueError('%d is not a valid smoothing half-life' % smth_halflife) if smth_halflife > df.shape[0]: raise ValueError( 'smth_halflife can not be larger than length of index of df') if not isinstance(ewm, bool): raise ValueError('ewm should be either True of False') if not isinstance(subtract_mean, bool): raise ValueError('subtract_mean should be either True of False') if cap <= 0: raise ValueError('%f is not a valid score cap' % cap) if lag < 0: raise ValueError('%d is not a valid lag period' % lag) if lag > df.shape[0]: raise ValueError('lag can not be larger than length of index of df') # apply smoothing if smth_halflife > 0: df = pd.ewma(df, halflife=smth_halflife, min_periods=smth_halflife, adjust=False) # compute mean and standard deviation if ewm: mean_df = pd.ewma(df, halflife=mean_halflife, min_periods=mean_seed_period, adjust=False) std_df = pd.ewmstd(df, halflife=std_halflife, min_periods=std_seed_period, adjust=False) else: mean_df = pd.rolling_mean(df, window=mean_halflife, min_periods=mean_seed_period) std_df = pd.rolling_std(df, window=std_halflife, min_periods=std_seed_period) # compute score if subtract_mean: score_df = (df - mean_df) / std_df else: score_df = df / std_df # cap score score_df = score_df.clip(-cap, cap) # lag score if lag > 0: score_df = score_df.shift(lag) if is_series: return pd.Series(score_df.squeeze()) else: return score_df
# print playerData total_points = [] total_salary = [] unadjusted_points = [] for row in playerData.iterrows(): oppt = row[1]['Oppt'] week = row[1]['Week'] year = row[1]['Year'] pos = row[1]['Pos'] if oppt == '-': continue #FFPG[int(row[0])] = np.mean(total_points[-win_size:]) if EWMA and len(total_points) > 0: FFPG[int(row[0])] = pd.ewma(pd.Series(total_points), span = win_size).values[-1] price[int(row[0])] = pd.ewma(pd.Series(total_salary), span = win_size).values[-1] FDStd[int(row[0])] = pd.ewmstd(pd.Series(unadjusted_points), span = win_size).values[-1] # ARIMA models need at least 6 points to adequately fit the data elif ARIMA and len(total_points) >= 6 and np.mean(total_points) > 10: # the input mathematica string needs to be # formatted like {a,b,c} cmdString = '{' for elem in total_points: cmdString += str(elem) + ',' cmdString = cmdString[:-1] cmdString += '}' # calls the mathematica script command = '/usr/local/bin/MathematicaScript -script ~/FSA/1iaFantasy/mathTimeseries.sh %s' %cmdString # reads the output of the mathematica script # that is printed to the terminal output = os.popen(command).read().split('\n') print output
country="US" yearband=5 ## blocks to divide data into #datatype="Three_assets" datatype="Developed_equities" if datatype=="Three_assets": data, initial_stdev, initial_risk_weights, yields = get_some_crossasset_data() elif datatype=="Developed_equities": data, initial_stdev, initial_risk_weights, yields = get_equities_data(case="devall") else: raise Exception() ## rolling stdev for estimates if used stdevest=pd.ewmstd(data, halflife=12)*(12**.5) for idx in range(12): stdevest.iloc[idx,:]=initial_stdev stdevest[stdevest==0]=np.nan #bootstrapped_weights=optimise_over_periods(data, "rolling", "bootstrap", rollyears=5, equalisemeans=True, equalisevols=True, # monte_carlo=50, monte_length=12*5) bootstrapped_weights=optimise_over_periods(data, "rolling", "shrinkage", rollyears=5, equalisevols=True, shrinkage_factors=(1.0, 0.8)) avg_size=1.0/len(data.columns) #windowsizes=[0.5]
ABuMarketDrawing.plot_candle_form_klpd(tsla_df, html_bk=True) # 使用 Pandas 可视化数据 demo_list = np.array([2, 4, 16, 20]) demo_window = 3 pd.rolling_std(demo_list, window=demo_window, center=False) * np.sqrt(demo_window) tsla_df_copy = tsla_df.copy() # 计算投资回报 tsla_df_copy['return'] = np.log(tsla_df['close'] / tsla_df['close'].shift(1)) # 移动收益标准差 tsla_df_copy['mov_std'] = pd.rolling_std( tsla_df_copy['return'], window=20, center=False) * np.sqrt(20) # 加权移动收益标准差 tsla_df_copy['std_ewm'] = pd.ewmstd( tsla_df_copy['return'], span=20, min_periods=20, adjust=True) * np.sqrt(20) tsla_df_copy[['close', 'mov_std', 'std_ewm', 'return']].plot(subplots=True, grid=True) # 绘制均线 tsla_df.close.plot() # ma 30 pd.rolling_mean(tsla_df.close, window=30).plot() # ma 60 pd.rolling_mean(tsla_df.close, window=60).plot() # ma 90 pd.rolling_mean(tsla_df.close, window=90).plot() plt.legend(['close', '30 mv', '60 mv', '90 mv'], loc='best') # 验证低开高走第二天趋势
NBER_rec.index.name='date' for ticker in ticker_list: if ticker != 'USURTOT Index': continue NBER_rec_temp = NBER_rec.copy() temp_tick_data = bl_data[ticker].to_frame() temp_tick_data.dropna(inplace=True) look_back_sma = 24 hl_ewm = 6 ticker_ma = (temp_tick_data-pd.rolling_mean(temp_tick_data,window=look_back_sma))/pd.rolling_std(temp_tick_data,window=look_back_sma) ticker_z_ema = (temp_tick_data-pd.ewma(temp_tick_data,halflife=hl_ewm))/pd.ewmstd(temp_tick_data,halflife=hl_ewm) recession_level, expansion_level = scenario(ticker,NBER_rec_temp, temp_tick_data) recession_ma, expansion_ma = scenario(ticker,NBER_rec_temp, ticker_ma) recession_ema, expansion_ema = scenario(ticker,NBER_rec_temp, ticker_z_ema) #plot new_idx = pd.date_range(temp_tick_data.index.to_datetime()[0],pd.datetime.today().date(),freq='B') temp_tick_data = temp_tick_data.reindex(new_idx,method='ffill') NBER_rec_temp = NBER_rec_temp.reindex(new_idx,method='ffill')
def spread_val_score(data_df): data_df = np.log(data_df * 100) data_df['spread_z_ema'] = (data_df - pd.ewma(data_df, min_periods = 60, halflife=60)) / pd.ewmstd(data_df, halflife=60) data_df['spread_z_ema'].dropna(inplace=True) data_df['spread_z_ema'].plot() plt.show() return data_df['spread_z_ema']
data_index = pd.DataFrame() for m in mkts.keys(): try: data_index[m] = quandl.get(mkts[m], authtoken=token).Last except: try: data_index[m] = quandl.get(mkts[m], authtoken=token).Settle except: try: data_index[m] = quandl.get(mkts[m], authtoken=token).Value except: try: data_index[m] = quandl.get(mkts[m], authtoken=token).value except: try: data_index[m] = quandl.get(mkts[m], authtoken=token).Rate except: print(m) data_pct = data_index.pct_change() mu = pd.ewma(data_pct, 260) sd = pd.ewmstd(data_pct, 260) zscores = (data_pct - mu) / sd last = zscores.iloc[-2].dropna().sort_values() last.plot(kind='barh', colormap='jet', ylim=[-3, 3]).get_figure().savefig('zscore.png', bbox_inches='tight') e = Email(subject='Morning Update: Macro Dashboard') e.add_attachment('zscore.png') e.send()
def volatility(price, vol_lookback): price['volatility'] = 0 price['volatility'] = pd.ewmstd((price['Close'] - price['Close'].shift(1)), span=vol_lookback) price['volatility'] = price['volatility'].fillna(0) return price
def devol(self, _lambda=0.06): _com = (1 - _lambda) / _lambda self.ts['Vola'] = pd.ewmstd( self.ts.LogReturns, com=_com, ignore_na=True) self.ts['DevolLogReturns'] = self.ts.LogReturns / self.ts.Vola
def addBollBand_EW(self, fromIndex, addedSeriesName = 'BBand', scale=1,win_length = 1): self[addedSeriesName+'upper'] = scale*pd.ewmstd(self[fromIndex], win_length) + self[fromIndex] self[addedSeriesName+'lower'] = -scale*pd.ewmstd(self[fromIndex], win_length) + self[fromIndex]
def ewbband(self, halflife): '''Create Expontenial Weighted Bollinger Band.''' self.df[self.symbol+'.ewma'] = pd.ewma(self.df[self.close_index].shift(1), halflife) self.df[self.symbol+'.ewmstd'] = pd.ewmstd(self.df[self.close_index].shift(1), halflife) self.df[self.symbol+'.ewbb_upper'] = self.df[self.symbol+'.ewma'] + self.df[self.symbol+'.ewmstd'] self.df[self.symbol+'.ewbb_lower'] = self.df[self.symbol+'.ewma'] - self.df[self.symbol+'.ewmstd']
fast_ewma = pd.ewma(price, span=Lfast) slow_ewma = pd.ewma(price, span=Lslow) raw_ewmac = fast_ewma - slow_ewma data_to_plot = pd.concat([price, fast_ewma, slow_ewma], axis=1) data_to_plot.columns = ['Price', 'Fast', 'Slow'] data_to_plot[d1:d2].plot() plt.show() raw_ewmac[d1:d2].plot() plt.title("Raw EWMAC") plt.show() ## volatility adjustment stdev_returns = pd.ewmstd(price - price.shift(1), span=vol_lookback) vol_adj_ewmac = raw_ewmac / stdev_returns vol_adj_ewmac[d1:d2].plot() plt.title("Vol adjusted") plt.show() ## scaling adjustment f_scalar = ewmac_forecast_scalar(Lfast, Lslow) forecast = vol_adj_ewmac * f_scalar cap_forecast = cap_series(forecast, capmin=-20.0, capmax=20.0) data_to_plot = pd.concat([forecast, cap_forecast], axis=1) data_to_plot.columns = ['Scaled Forecast', 'Capped forecast']
def std_space(s, years): return pd.ewmstd(s, halflife=360*years)