def test_pct_change_periods_freq(self): # GH 7292 rs_freq = self.tsframe.pct_change(freq='5B') rs_periods = self.tsframe.pct_change(5) assert_frame_equal(rs_freq, rs_periods) rs_freq = self.tsframe.pct_change(freq='3B', fill_method=None) rs_periods = self.tsframe.pct_change(3, fill_method=None) assert_frame_equal(rs_freq, rs_periods) rs_freq = self.tsframe.pct_change(freq='3B', fill_method='bfill') rs_periods = self.tsframe.pct_change(3, fill_method='bfill') assert_frame_equal(rs_freq, rs_periods) rs_freq = self.tsframe.pct_change(freq='7B', fill_method='pad', limit=1) rs_periods = self.tsframe.pct_change(7, fill_method='pad', limit=1) assert_frame_equal(rs_freq, rs_periods) rs_freq = self.tsframe.pct_change(freq='7B', fill_method='bfill', limit=3) rs_periods = self.tsframe.pct_change(7, fill_method='bfill', limit=3) assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=self.tsframe.index, columns=self.tsframe.columns) rs_freq = empty_ts.pct_change(freq='14B') rs_periods = empty_ts.pct_change(14) assert_frame_equal(rs_freq, rs_periods)
def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): # GH 7292 rs_freq = self.tsframe.pct_change( freq=freq, fill_method=fill_method, limit=limit ) rs_periods = self.tsframe.pct_change( periods, fill_method=fill_method, limit=limit ) assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=self.tsframe.index, columns=self.tsframe.columns) rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) assert_frame_equal(rs_freq, rs_periods)
def replication_stats(df_price: pd.DataFrame, fund_name: str): df_price = df_price.resample('7D').first() rho = df_price.pct_change().corr(method="pearson") tau = df_price.pct_change().corr(method="kendall") returns_track = df_price.pct_change() returns_fund = df_price[fund_name].pct_change() df = pd.DataFrame() df['Correlation'] = rho[fund_name] df['Kendall tau'] = tau[fund_name] df['Tracking error'] = np.sqrt(52) * (returns_track.T - returns_fund.values).std(axis=1) df['R-squared'] = 1 - (returns_track.T - returns_fund.values).var(axis=1) / returns_fund.var() df['Sharpe ratio'] = np.sqrt(52) * returns_track.mean() / returns_track.std() df['Annual Return'] = (df_price.iloc[-1] / df_price.iloc[0]) ** (52 / len(df_price.index)) - 1 df['Maximum Drawdown'] = max_drawdown(df_price.values) return df
def calculate_daily_return(data: pd.DataFrame) -> pd.DataFrame: """Function to generate daily returns given input data (in dataframe, dtypes float, no time data) Example: >>> calculate_daily_return(data=pd.DataFrame([1,2,3,4])) """ return data.pct_change(1).iloc[1:, ]
def get_stocks(tickers, market, start_date, end_date, frequency): #Set Frequency for resampling FREQ_DICT = { 'Weekly': 'W-FRI', 'Monthly': 'M', } start_yahoo = datetime.datetime.strptime(start_date, '%d/%m/%Y') end_yahoo = datetime.datetime.strptime(end_date, '%d/%m/%Y') #Set market portfolio if (market != 'TA100') and (market != 'TA25'): if (market == 'SP500'): prices = DataFrame(web.get_data_yahoo('VFINX', start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how='last', fill_method='ffill'), columns=['SP500']) else: prices = DataFrame(web.get_data_yahoo(market, start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how='last', fill_method='ffill'), columns=[market]) else: prices = get_index_price(index_id = market, start_date = start_date, end_date = end_date, frequency = frequency).resample(FREQ_DICT[frequency], how = 'last') #Set Stocks Prices i = 0 while (i < len(tickers)): get_df_ticker = DataFrame(web.get_data_yahoo(tickers[i], start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how = 'last'), columns=[tickers[i]]) prices = pd.concat([prices, get_df_ticker], join='outer', axis = 1) i += 1 changes = prices.pct_change() return prices, changes[1:]
def gen_fitness_value(weights: [float], universe_data: pd.DataFrame): # portfolio returns returns = universe_data.pct_change() # CVaR VaR = gen_cond_var(weights, returns, .95) # entropy n = returns.nunique().sum() # number of unique returns all the universe p_x = 1 / ((returns.max().max() - returns.min().min()) + 1 ) # discrete probability of a uniform distribution for universe # discrete shannon entropy is maximized when distribution is uniform, i.e maximum entropy in the universe # discrete renyi entropy is maximized when distribution is uniform, i.e maximum entropy in the universe max_entropy_shannon = -n * p_x * math.log( p_x, 2) # discrete shannon equation for uniform distribution max_entropy_renyi = math.log( n * (p_x**2), 2) # discrete renyi entropy equation for uniform distribution entropy = gen_entropy(weights, returns, 'Renyi') # returns prob_pos_returns = gen_pos_returns(weights, returns, 'historical') # all objectives are unitless and are percentages from 0 to 1. Therefore they have about equal weight in # the fitness function. theoretical max fitness score = 0 + 1 + 1 = 2 fitness_score = VaR + entropy / max_entropy_renyi + prob_pos_returns return round(fitness_score, 5)
def calc_info_ratio(data: pd.DataFrame) -> pd.DataFrame: """Annual return from securities data(frame)""" daily_rtn = data.pct_change(1).iloc[1:, ] annual_rtn = np.mean(daily_rtn) * 252 ann_vol = np.std(daily_rtn) * np.sqrt(252) info_ratio = np.divide(annual_rtn, ann_vol) return info_ratio
def beta(ret_carteira: pd.DataFrame, ret_ibvsp: pd.DataFrame) -> float: """Calcula o beta da carteira, dados seus retornos diários e os retornos do ibovespa. Args: ret_carteira (pd.DataFrame): dataframe dos retornos diários da carteira. ret_ibvsp (pd.DataFrame): dataframe dos retornos diários do ibovespa. Returns: float: beta. """ ret_carteira = ret_carteira.dropna() ret_ibvsp = ret_ibvsp.pct_change().dropna() df = pd.concat( [ret_carteira, ret_ibvsp], axis=1, join='inner' ) Y = df.iloc[:,0] X = df.iloc[:,1] X = sm.add_constant(X) linear_model = sm.OLS(Y, X) return linear_model.fit().params[1]
def calculate_financials(data:pd.DataFrame, rf:float=0.005)->pd.DataFrame: '''Return, annualized return, volatility, annualized volatility, risk-adjusted return, maximum drawdown''' invest_period = (data.index[-1] - data.index[0]).days invest_period_srs = pd.Series(dict(zip(data.columns, [invest_period for _ in data.columns]))) start_srs = pd.Series(dict(zip(data.columns, [data.index[0].strftime("%Y-%m-%d") for _ in data.columns]))) end_srs = pd.Series(dict(zip(data.columns, [data.index[-1].strftime("%Y-%m-%d") for _ in data.columns]))) R = (data.iloc[-1] - data.iloc[0]) / data.iloc[0] AR = (1 + R) ** (365.25 / invest_period) - 1 data1 = data.pct_change() data1.drop(data1.index[0], inplace=True) Vol = data1.std() AVol = np.sqrt(252) * Vol AdjR = AR / AVol MDD, MDD_dates = max_drawdown(data) ''' 香港時間1/11/2018 早上11時15分的結算率。 到期日 港元利息結算率 隔夜 0.50000 ''' SR = (AR - rf) / AVol df = pd.concat([invest_period_srs, start_srs, end_srs, R, AR, Vol, AVol, AdjR, MDD, MDD_dates, SR], axis=1) df.columns = ['Days', 'Start', 'End', 'R', 'AR', 'Vol', 'AVol', 'AdjR', 'MDD', 'MDD_Date', 'SR'] return df
def get_returns( df: pd.DataFrame, return_type: ReturnType = "log", ) -> pd.Series: """Calculates return on a security. Args: df: Pandas dataframe with the prices used to calculate returns. return_type: Either `log`, `simple` or `diff` to specify how returns are calculated. Returns: Pandas series of return. """ if return_type is not None: return_type = ReturnType(return_type) if return_type == ReturnType.LOG: returns = (df / df.shift(1)).apply(np.log).dropna() elif return_type == ReturnType.SIMPLE: returns = df.pct_change().dropna() elif return_type == ReturnType.DIFF: returns = df.diff().dropna() returns.columns = pd.MultiIndex.from_tuples( tuples=[(f"adj_return", security) for security in returns.columns], names=["series", "security"], ) return returns
def capm(y: pd.Series, bases: pd.DataFrame, rf=0., fee=0.): freq = _freq(y.index) rf = rf / freq fee = fee / freq R = y.pct_change() - rf R.name = y.name R_base = bases.pct_change().sub(rf, axis=0) # CAPM: # R = alpha + rf + beta * (Rm - rf) model = OLS.from_formula(f"Q('{y.name}') ~ {'+'.join(bases.columns)}", R_base.join(R)).fit() alpha = model.params['Intercept'] * freq betas = model.params[bases.columns] # reconstruct artificial portfolio proxy = R_base @ betas + (1 - betas.sum()) * (rf + fee) cumproxy = (1 + proxy).cumprod() # residual portfolio r = y.pct_change() - cumproxy.pct_change() residual = (1 + r).cumprod() return { 'alpha': alpha, 'betas': betas, 'cumproxy': cumproxy, 'model': model, 'residual': residual, }
def risk_fraction(self, data: pd.DataFrame, n: int = 3): """ Computes the cumulative risk fraction of system see ref: formula (6) of main paper :param data: (pd.DataFrame) end of month prices shape = (n_samples, p_shares) :param n: (int) Number of principal components (3 by default) assumes user has chosen the best n :return: (float) """ # Store col names col_names = list(data) # Compute log returns data = np.log(1 + data.pct_change()) data = self.sc.fit_transform(data.dropna()) data = self.pca.fit_transform(data) self.transformed_data = pd.DataFrame(data, columns=col_names) # Total risk of system system_risk = np.sum(self.pca.explained_variance_) # Risk associated with first n principal components pca_risk = self.pca.explained_variance_[:n].sum() / system_risk return pca_risk
def monte_carlo(period: int, n_inc: int, n_sim: int, type: str, data: pd.DataFrame, assets: List[str]): df = pd.DataFrame() returns = data.pct_change() returns_mean = returns.mean() # daily mean returns_std = returns.std() # daily std for name in assets: # retrieve asset mean and std try: asset_mean = returns_mean[name] asset_std = returns_std[name] asset_last_price = data[name][-1] except: print('asset not in data frame') return #switch for monte carlo simulations if type is 'GBM': temp = gbm(asset_last_price, asset_mean, asset_std, period, n_inc, n_sim, name) # append asset simulation to final data frame if df.empty: df = temp else: df = df.join(temp) return df
def capm(y: pd.Series, bases: pd.DataFrame, rf=0.0, fee=0.0): freq = _freq(y.index) rf = rf / freq fee = fee / freq R = y.pct_change() - rf R.name = y.name R_base = bases.pct_change().sub(rf, axis=0) # CAPM: # R = alpha + rf + beta * (Rm - rf) model = OLS(R, R_base.assign(Intercept=1), missing="drop").fit() alpha = model.params["Intercept"] * freq betas = model.params[bases.columns] # reconstruct artificial portfolio proxy = R_base @ betas + (1 - betas.sum()) * (rf + fee) cumproxy = (1 + proxy).cumprod() # residual portfolio r = y.pct_change() - cumproxy.pct_change() residual = (1 + r).cumprod() return { "alpha": alpha, "betas": betas, "cumproxy": cumproxy, "model": model, "residual": residual, }
def _getDailyReturns(self, a_df: DataFrame = DataFrame()) -> DataFrame: # == (self._data / self._data.shift(1))-1 #new_df: DataFrame = a_df.pct_change(1) new_df: DataFrame = a_df.pct_change() new_df.iloc[0, :] = 0 new_df.columns = new_df.columns.str.replace(self._column, 'DailyReturns') return new_df
def returns(prices: pd.DataFrame, which: str = 'daily', period: str = 'a'): """Retorna os retornos (diários/mensais/anuais) de prices, a depender de 'which'. Ex: - which = 'daily' (retornos diários) - which = 'monthly' (retornos mensais) - which = 'annual' (retornos anuais) - which = 'total' (variação total do período) - which = 'acm' (retornos acumulados) Args: prices (pd.DataFrame): dataframe dos preços de fechamento. which (str, optional): tipo de retorno desejado: Padrão: 'daily'. period (str, optional): válido somente para which = 'total'; periodiza o retorno: (1 + r) ** period - 1. Padrão: 'a'. Returns: pd.DataFrame ou pd.Series. """ r = prices.pct_change().dropna() if which == 'daily': return r elif which == 'monthly': # dataframe com multindex # np.log1p(r) = np.log(1 + r) # np.expm1(r) = np.exp(r - 1) m_rets = r.groupby([r.index.year, r.index.month ]).apply(lambda x: np.expm1(np.log1p(x).sum())) # deixando o index como Y-m, em datetime m_rets.index = map(lambda d: dt.strptime(f'{d[0]}-{d[1]}', '%Y-%m'), m_rets.index) m_rets.index = m_rets.index.to_period('M') return m_rets elif which == 'annual': a_rets = r.groupby( r.index.year).apply(lambda x: np.expm1(np.log1p(x).sum())) a_rets.index = pd.to_datetime(a_rets.index.astype(str)).to_period('Y') return a_rets elif which == 'total': rets = (prices.iloc[-1] - prices.iloc[0]) / prices.iloc[0] if period not in ('m', 'a'): return rets n_days = prices.shape[0] n_years = n_days / 252 if period == 'm': return (1 + rets)**(1 / (12 * n_years)) - 1 elif period == 'a': return (1 + rets)**(1 / n_years) - 1 raise TypeError("Período inválido: 'm' ou 'a'.") elif which == 'acm': return (1 + r).cumprod() raise TypeError( "Tipo de retorno inválido: which -> 'daily', 'total', 'monthly, ou 'acm'." )
def test_pct_change_shift_over_nas(self): s = Series([1., 1.5, np.nan, 2.5, 3.]) df = DataFrame({'a': s, 'b': s}) chg = df.pct_change() expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2]) edf = DataFrame({'a': expected, 'b': expected}) assert_frame_equal(chg, edf)
def get_growth_rates_df(dataframe: pd.DataFrame) -> pd.DataFrame: """ :param df: the original data frame, with missing values :return: initial dataset growth rates (nan: infinite or unavailable) """ growth_rates = dataframe.pct_change(fill_method=None) # --- change inf values to na (will be dropped later) growth_rates.replace([np.inf, -np.inf], np.nan, inplace=True) return growth_rates
def make_stats_maxence(df_price: pd.DataFrame): df_return = df_price.pct_change().dropna() stats.describe(df_return) t_tstat, p_tstat = stats.ttest_rel(df_return.iloc[:, 0], df_return.iloc[:, 1]) # T-test t_KS, p_KS = stats.ks_2samp(df_return.iloc[:, 0], df_return.iloc[:, 1]) # KS -> p petit pas la meme distrib tau, p_tau = stats.kendalltau(df_return.iloc[:, 0], df_return.iloc[:, 1]) # Tau de Kendall return stats.describe(df_return), "t test: t = %g p = %g" % (t_tstat, p_tstat), \ "KS test: t = %g p = %g" % (t_KS, p_KS), "KendallTau: t = %g p = %g" % (tau, p_tau)
def test_pct_change_shift_over_nas(self): s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) df = DataFrame({"a": s, "b": s}) chg = df.pct_change() expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) edf = DataFrame({"a": expected, "b": expected}) assert_frame_equal(chg, edf)
def daily_returns(data: pd.DataFrame) -> pd.DataFrame: """Returns DataFrame with daily returns (percentage change) :Input: :data: ``pandas.DataFrame`` with daily stock prices :Output: :ret: a ``pandas.DataFrame`` of daily percentage change of Returns of given stock prices. """ return data.pct_change().dropna(how="all").replace([np.inf, -np.inf], np.nan)
def test_pct_change_periods_freq(self, freq, periods, fill_method, limit): # GH 7292 rs_freq = self.tsframe.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = self.tsframe.pct_change(periods, fill_method=fill_method, limit=limit) assert_frame_equal(rs_freq, rs_periods) empty_ts = DataFrame(index=self.tsframe.index, columns=self.tsframe.columns) rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) assert_frame_equal(rs_freq, rs_periods)
def returns(df: pd.DataFrame, which: str='daily', period: str='a'): """Retorna um dataframe ou uma série dos retornos de df, a depender de 'which', diários, mensais ou anuais, a depender de 'period'. Ex: which = 'daily' retorna df.pct_change().dropna() (retornos diários); which = 'total' retorna (df.iloc[-1] - df.iloc[0]) / df.iloc[0] (retornos totais), que podem ser diários (period = 'd'), mensais (period = 'm') ou anuais (period = 'a'); which = 'acm' retorna os retornos acumulados (1 + df.pct_change().dropna()).cumprod() Args: df (pd.DataFrame): dataframe dos preços. which (str, optional): tipo de retorno desejado: diário/total/ acumulado ('daily'/'total'/'acm'). Padrão: 'daily'. period (str, optional): retorno diário/mensal/anual 'd'/'m'/'a' (válido somente para which = 'total'). Padrão: 'a'. Returns: pd.DataFrame ou pd.Series: a depender de 'which'; retornos diários (dataframe), totais (series) ou acumulados (dataframe). """ if which == 'daily': return df.pct_change().dropna() elif which == 'total': s = (df.iloc[-1] - df.iloc[0]) / df.iloc[0] if period == 'a': return s start = df.index[0].strftime('%d/%m/%Y') end = df.index[-1].strftime('%d/%m/%Y') if period == 'd': s = (1 + s) ** (1/252) -1 return (1 + s) ** (1 / time_fraction(start, end, 'd')) - 1 elif period == 'm': s = (1 + s) ** (1/12) - 1 return (1 + s) ** (1 / time_fraction(start, end, 'm')) - 1 elif which == 'acm': return (1 + df.pct_change().dropna()).cumprod() raise "Tipo de retorno inválido: which -> 'daily', 'total' ou 'acm'."
def returns(prices: pd.DataFrame, which: str='daily', period: str='a', scaled: bool=True): """Retorna um dataframe ou uma série dos retornos de prices, a depender de 'which', diários, mensais ou anuais, a depender de 'period'. Ex: which = 'daily' retorna prices.pct_change().dropna() (retornos diários); which = 'total' retorna (prices.iloc[-1] - prices.iloc[0]) / prices.iloc[0] (retornos totais), que podem ser diários (period = 'd'), mensais (period = 'm') ou anuais (period = 'a'); which = 'acm' retorna os retornos acumulados (1 + prices.pct_change().dropna()).cumprod() Args: prices (pd.DataFrame): dataframe dos preços de fechamento. which (str, optional): tipo de retorno desejado: diário/total/ acumulado ('daily'/'total'/'acm'). Padrão: 'daily'. period (str, optional): retorno diário/mensal/anual 'd'/'m'/'a' (válido somente para which = 'total'). Padrão: 'a'. Returns: pd.DataFrame ou pd.Series: a depender de 'which'; retornos diários (dataframe), totais (series) ou acumulados (dataframe). """ if which == 'daily': return prices.pct_change().dropna() elif which == 'total': rets = (prices.iloc[-1] - prices.iloc[0]) / prices.iloc[0] if not scaled: return rets n_days = prices.shape[0] n_years = n_days / 252 if period == 'm': return (1 + rets) ** (1 / (12 * n_years)) - 1 elif period == 'a': return (1 + rets) ** (1 / n_years) - 1 raise TypeError("Período inválido: 'm' ou 'a'.") elif which == 'acm': return (1 + prices.pct_change().dropna()).cumprod() raise TypeError("Tipo de retorno inválido: which -> 'daily', 'total' ou 'acm'.")
def _setSimpleReturnsTimely(self, a_letter: str = '', a_df: DataFrame = DataFrame()) -> DataFrame: if a_letter == 'W': return a_df.resample('W').ffill().pct_change() #.to_frame() elif a_letter == 'M': return a_df.resample('M').ffill().pct_change() #.to_frame() elif a_letter == 'Q': return a_df.resample('Q').ffill().pct_change() #.to_frame() elif a_letter == 'A': return a_df.resample('A').ffill().pct_change() #.to_frame() else: return a_df.pct_change() #.to_frame()
def ran_ports_avg_dly_ret(df,size_list,ports_names): """generates random portfolios with number of stock as listed in the size_list from the dataframe of a group of stocks, df, and returns the average daily returns of the various portfolios""" df_r = df.pct_change()[1:] ports = [] port_rets = pd.DataFrame() for s in size_list: port_pr = df_r.sample(n=s,replace=False,axis=1) ports.append(port_pr) for t in range(len(ports)): port_rets[t] = ports[t].mean(axis = 1) port_rets.columns = ports_names return port_rets
def compute_vol(df: pd.DataFrame, span: int=100) -> pd.DataFrame: ''' Compute period volatility of returns as exponentially weighted moving standard deviation: Args: df (pd.DataFrame): Dataframe with price series in a single column. span (int): Span for exponential weighting. Returns: pd.DataFrame: Dataframe containing volatility estimates. ''' df.fillna(method='ffill', inplace=True) r = df.pct_change() return r.ewm(span=span).std()
def graph(self, period, portfolio=None, drop_components=False): data = {col: self.data[col] * (100 / self.data[col][self.start]) for col in self.data.columns} if portfolio: data['Portfolio'] = sum(data[st] * sh for st, sh in portfolio.items()) data['Portfolio'] = data['Portfolio'] * (100 / data['Portfolio'][self.start]) if drop_components: for st in portfolio: del data[st] data = DataFrame(data) data.plot(figsize=(12, 8), grid=1) stat = (data.pct_change(period) * 100).describe().T stat['shrp'] = (stat['mean'] - RISK_FREE_RATE * period / 252) / stat['std'] stat['drawdown'] = data.apply(self._max_drawdown) return stat.sort_values('shrp', ascending=False)
def test_pct_change_numeric(self): # GH#11150 pnl = DataFrame( [np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(0, 40, 10)] ).astype(np.float64) pnl.iat[1, 0] = np.nan pnl.iat[1, 1] = np.nan pnl.iat[2, 3] = 60 for axis in range(2): expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1 result = pnl.pct_change(axis=axis, fill_method="pad") tm.assert_frame_equal(result, expected)
def PnL(weights: pd.DataFrame, df: pd.DataFrame, returns_data=True): ''' portfolio profit/loss on each day expected the weights to be a 1 x n df ''' if weights.shape[0] != 1 or not isinstance(weights, pd.DataFrame): raise ValueError("weights should be a 1 x n DataFrame") if not returns_data: df = df.pct_change().dropna() weight_df = pd.DataFrame(weights.values.tolist() * df.shape[0], columns=df.columns, index=df.index) pnl = (df * weight_df).sum(axis=1).to_frame().rename(columns={0: 'PnL'}) return pnl
def realized_volatility(price_df: pd.DataFrame, *vol_lag, annualized_factor: int = 252, allowed_number_na: int = 5) \ -> pd.DataFrame: """Assumes price_df is a DataFrame filled with daily prices as values, tickers as column names and observation dates as index. Assumes that measurement_interval and annualized_factor is int and data_availability_threshold is a float. Returns a DataFrame with the rolling annualized realized volatility.""" if min(vol_lag) < 2: raise ValueError("vol_lag needs to be an 'int' larger or equal to 2.") max_volatility_df = None for lag in vol_lag: return_df = price_df.pct_change(fill_method=None) volatility_sub_df = return_df.rolling(window=lag, min_periods=allowed_number_na).std() \ * (annualized_factor ** 0.5) if max_volatility_df is None: max_volatility_df = volatility_sub_df else: max_volatility_df = pd.concat( [max_volatility_df, volatility_sub_df]).max(level=0, skipna=False) # before price starts publishing, value should be nan regardless of data_availability_threshold adjustment_df = price_df.pct_change().fillna(method='ffill').rolling( window=max(vol_lag)).mean().isnull() adjustment_df = np.where(adjustment_df, np.nan, 1) max_volatility_df *= adjustment_df return max_volatility_df
def aagr(df: pd.DataFrame, window: int=10): # TODO: don't include the window """average annual growth rate Parameters ---------- window : `int` the rolling window size Returns ------- return : `DataFrame` The rolling apply result """ pct = df.pct_change() return pct.rolling(window).apply(np.mean).dropna()
def show_rps(data: pd.DataFrame, interval: int = 1, start_index=None, show=False): rsp_data = data.pct_change(interval) if start_index: rsp_data = rsp_data.loc[rsp_data.index >= pd.to_datetime(start_index)] if show: plt.plot(rsp_data) plt.legend(data.columns, loc="best") plt.show() return rsp_data
import matplotlib.pyplot as plt from random import randint now = datetime.datetime.now() list = '^GSPC' start = None while start is None: try: start = datetime.datetime(randint(1950,2015), randint(1,12), randint(1,31)) except: pass end = datetime.datetime(now.year, now.month, now.day) df = pd.io.data.get_data_yahoo(list, start, end)['Adj Close'] df = DataFrame(df) df['Returns'] = df.pct_change() df['Date'] = df.index df['Date'] = [time.date() for time in df['Date']] l = df.index.values for i in range(0,len(l)): df.loc[l[i], 'DayoftheWeek'] = datetime.datetime.strptime(str(df.loc[l[i], 'Date']), '%Y-%m-%d').strftime('%A') days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'] Monday = 0 MonCount = 0 Mon = [] Tuesday = 0 TueCount = 0 Tue = [] Wednesday = 0 WedCount = 0
obj.describe() ## Correlation and Covariance import pandas.io.data as web all_data = {} for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']: all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000', '1/1/2010') price = DataFrame({tic: data['Adj Close'], for tic, data in all_data.iteritems()}) price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()}) price volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()}) # percent changes of the prices: returns = price.pct_change() returns.tails() returns.tail() returns.MSFT.corr(returns.IBM) # correlation of the overlapping non-NA returns.MSFT.cov(returns.IBM) # covariance of the overlapping non-NA returns.corr() returns.cov() returns.corrwith(returns.IBM) returns.corrwith(volume) ## Unique values, Value counts, and membership obj = Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c']) uniques = obj.unique() uniques obj.value_counts() obj.value_counts() # value frequencies from pandas import value_counts
import matplotlib.pyplot as plt from collections import defaultdict plt.interactive(True) names = ['AAPL', 'GOOG', 'MSFT', 'DELL', 'GS', 'MS', 'BAC', 'C'] def get_px(stock, start, end): print('Get ' + stock) return web.get_data_yahoo(stock, start, end)['Adj Close'] px = DataFrame({n: get_px(n, '1/1/2009', '6/1/2012') for n in names}) px = px.asfreq('B').fillna(method='pad') rets = px.pct_change() ((1 + rets).cumprod() - 1).plot() print('block') def calc_mom(price, lookback, lag): mon_ret = price.shift(lag).pct_change(lookback) ranks = mon_ret.rank(axis=1, ascending=False) demeaned = ranks - ranks.mean(axis=1) return demeaned / demeaned.std(axis=1) compound = lambda x: (1 + x).prod() - 1 daily_sr = lambda x: x.mean() / x.std()
def main(): """ Calculation and aggregation of summary statistics """ # Summary of statistics # return is not ndarray df = DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]], index=list('abcd'), columns=['one', 'two']) print df print df.sum() print df.sum(axis=1) print df.mean(axis=1) # exclude nan print df.mean(axis=1, skipna=False) print df.idxmin() print df.idxmax() print df.cumsum() print df.describe() # values are not number obj = Series(list('aabc') * 4) print obj.describe() methods = ['count', 'min', 'max', # 'argmin', 'argmax', 'quantile', 'median', 'mad', 'var', 'std', 'skew', 'kurt', 'cummin', 'cummax', 'cumprod', 'diff', 'pct_change'] for method in methods: print u'「{0}」'.format(method) print getattr(df, method)() print '' # Correspond and Covariance all_data = {} lst = [] # ['AAPL', 'IBM', 'MSFT'] #, 'GOOG']: for ticket in lst: #, 'GOOG']: # IOError: after 3 tries, Yahoo! did not return a 200 # for url 'http://ichart.finance.yahoo.com/table.csv?s=GOOG&a=0&b=1&c=2000&d=0&e=1&f=2010&g=d&ignore=.csv' all_data[ticket] = pd.io.data.get_data_yahoo(ticket, '1/1/2000', '1/1/2010') price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()}) volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()}) if all_data: returns = price.pct_change() print returns.tail() print '' print returns.MSFT.corr(returns.IBM) print returns.MSFT.cov(returns.IBM) print '' print returns.corr() print returns.cov() print '' print returns.corrwith(returns.IBM) print returns.corrwith(volume) # unique, frequency, belong print '','' obj = Series(list('cadaabbcc')) uniques = obj.unique() print uniques print obj.value_counts() print pd.value_counts(obj.values, sort=False) mask = obj.isin(['b', 'c']) print mask print obj[mask] data = DataFrame({ 'Qu1' : [1,3,4,3,4], 'Qu2' : [2,3,1,2,3], 'Qu3' : [1,5,2,4,4], }) print data print data.apply(pd.value_counts).fillna(0)