def iter_over_groups(data, group,features, key): row_num = 0 for x in group: if row_num == 0: data_1 = data[data[key]==x] for i in features: data_1['avg_10' + i.lower() + '_'+ key[0].lower()] = pd.rolling_mean(data_1[i], 7).shift(+1) data_1['sum_15' + i.lower() + '_'+ key[0].lower()] = pd.rolling_mean(data_1[i], 10).shift(+1) data_1['avg_3' + i.lower() + '_'+ key[0].lower()] = pd.rolling_mean(data_1[i], 3).shift(+1) data_1['expand' + i.lower() + '_'+ key[0].lower()] = pd.expanding_mean(data_1[i]).shift(+1) data_1['expand_sum' + i.lower() + '_'+ key[0].lower()] = pd.expanding_sum(data_1[i]).shift(+1) else: data_2 = data[data[key]==x] for i in features: data_2['avg_10' + i.lower() + '_' + key[0].lower()] = pd.rolling_mean(data_2[i], 7).shift(+1) data_2['sum_15' + i.lower() + '_' + key[0].lower()] = pd.rolling_mean(data_2[i], 10).shift(+1) data_2['avg_3' + i.lower() + '_' + key[0].lower()] = pd.rolling_mean(data_2[i],3).shift(+1) data_2['expand' + i.lower() + '_'+ key[0].lower()] = pd.expanding_mean(data_1[i]).shift(+1) data_2['expand_sum' + i.lower() + '_'+ key[0].lower()] = pd.expanding_sum(data_1[i]).shift(+1) data_1 = data_1.append(data_2, ignore_index = True) row_num += 1 return data_1
def plotall(basedir, plot=None): dirnames = [name for name in os.listdir(basedir) if os.path.isdir(os.path.join(basedir, name)) and os.path.isfile(os.path.join(basedir, name, 'onresults.csv'))] shortnames = getshortnames(dirnames) plotcount = len(plot) for dirname, shortname in zip(dirnames, shortnames): resultspath = os.path.join(basedir, dirname) hasdiverged = False if diverged(resultspath): hasdiverged = True resultsfile = os.path.join(resultspath, 'onresults.csv') if not os.path.isfile(resultsfile): continue csv = pd.read_csv(resultsfile) if isinstance(plot, list): for i, toplot in enumerate(plot): runaverages = False if isinstance(toplot, dict): runaverages = toplot.get('runaverages', False) toplot = toplot.get('plot') label = shortname + (" (diverged)" if hasdiverged else "") plt.subplot(1, plotcount, i + 1) if runaverages: pd.expanding_mean(csv[toplot]).plot(label=label, legend=True) else: csv[toplot].plot(label=label, legend=True) else: csv.plot(subplots=True) _setplotlabels(plot) plot_file = os.path.join(basedir,'all.pdf') plt.savefig(plot_file, bbox_inches='tight') plt.show()
def returns_annualized(returns, geometric=True, scale=None, expanding=False): """ return the annualized cumulative returns Parameters ---------- returns : DataFrame or Series geometric : link the returns geometrically scale: None or scalar or string (ie 12 for months in year), If None, attempt to resolve from returns If scalar, then use this as the annualization factor If string, then pass this to periodicity function to resolve annualization factor expanding: bool, default is False If True, return expanding series/frames. If False, return final result. """ scale = _resolve_periods_in_year(scale, returns) if expanding: if geometric: n = pd.expanding_count(returns) return ((1. + returns).cumprod() ** (scale / n)) - 1. else: return pd.expanding_mean(returns) * scale else: if geometric: n = returns.count() return ((1. + returns).prod() ** (scale / n)) - 1. else: return returns.mean() * scale
def chi2normal_transformation(df): risk_factor = pd.DataFrame(chi2.cdf(df, pd.rolling_mean(df, 150)), columns=df.columns, index=df.index) risk_factor = (risk_factor - pd.expanding_mean(risk_factor)) / \ pd.expanding_std(risk_factor) return risk_factor
def results(): # import csv we just created df = pd.read_csv('D:/Users/Antoine.Grappin/Dropbox/keyrus/python_scripts/tutorial_sample_files/performance_data_stocks_sentdex_dates_full.csv', index_col='time', parse_dates=True, # usecols=['time', # 'stock', # 'currentValuation', # 'percentChange'], dtype={'currentValuation': np.float64, 'percentChange': np.float64}) # sort index if unsorted df.sort_index(inplace=True) df['xMean'] = pd.expanding_mean(df['percentChange'],1) print print(df.head()) print pprint.pprint(df.describe()) print pprint.pprint(df.dtypes) print return df['xMean']
def rolling_tstat(x): emean = pd.expanding_mean(x) estd = pd.expanding_std(x) t = np.arange(1, len(x) + 1) esqr = np.sqrt(t) rtstat = (emean / estd) * esqr return rtstat
def engineer_previous_game_averages(df, columns, previous_game_averages): for column in columns: df.sort_values(by=['date_game', 'team_id'], ascending=True, inplace=True) for offset in range(1, max(previous_game_averages) + 1): df[column + "-" + str(offset)] = df.groupby('team_id')[column].shift( periods=offset) for average in previous_game_averages: selector = [column + "-" + str(x) for x in range(1, average + 1)] df[column + str(average) + 'game-avg'] = df.loc[:, selector].mean(axis=1) for offset in range(1, max(previous_game_averages) + 1): del df[column + "-" + str(offset)] df[column + 'seasonavg'] = df.groupby('team_id')[column].apply( lambda x: pd.expanding_mean(x).shift()) return df
def json_series(request, pk): screen = get_object_or_404(Screen,pk=pk) what = request.GET.get('mode','comp') # choices: comp, hand ref = request.GET.get('ref','nap') # choices: nap, bkb, mv, cm # filters = [ # RangeRule(name = 'range', lower = -5, upper = 5), # RollingRule(name = 'spike', count = 3, tolerance = 3, comp ='LT') # ] # determine resampling rule rule = request.GET.get('rule', 'H') # if rule is None: # series = screen.find_series() # rule = 'H' if series.aantal() < 10000 else 'D' series = screen.get_series(ref,what,rule=rule)#,filters=filters) if series is None or series.empty: values = [] else: values = zip(series.index, series.values) data = {'screen%s'%screen.nr: values} stats = request.GET.get('stats','0') try: stats = int(stats) if stats: mean = pd.expanding_mean(series) std = pd.expanding_std(series) a = (mean - std).dropna() b = (mean + std).dropna() ranges = zip(a.index.to_pydatetime(), a.values, b.values) data.update({'stats%s'%screen.nr: ranges}) except: pass return HttpResponse(json.dumps(data,ignore_nan=True,default=to_millis),content_type='application/json')
def returns_annualized(returns, geometric=True, scale=None, expanding=False): """return the annualized cumulative returns Parameters ---------- returns : DataFrame or Series geometric : link the returns geometrically scale: None or scalar or string (ie 12 for months in year), If None, attempt to resolve from returns If scalar, then use this as the annualization factor If string, then pass this to periodicity function to resolve annualization factor expanding: bool, default is False If True, return expanding series/frames. If False, return final result. """ scale = _resolve_periods_in_year(scale, returns) if expanding: if geometric: n = pd.expanding_count(returns) return ((1.0 + returns).cumprod()**(scale / n)) - 1.0 else: return pd.expanding_mean(returns) * scale else: if geometric: n = returns.count() return ((1.0 + returns).prod()**(scale / n)) - 1.0 else: return returns.mean() * scale
def results(): df = pd.read_csv('performance_data_sp500ish.csv', index_col='time', parse_dates=True) df.sort_index(inplace=True) df['x_mean'] = pd.expanding_mean(df['pc'], 0) return df['x_mean']
def daily_win_percentage(self): """ Gets the winning percentage of each team through the season. Returns ------- win_percent : winning percentage through time """ X = self.game_stats store_outcome = pd.DataFrame(index = self.unique_days, columns=self.teams) for ii in range(len(X.index)): row = X.iloc[ii] day = X.index[ii] w_team = row['Wteam'] #winning team l_team = row['Lteam'] #losing team store_outcome.loc[day,w_team] = 1 store_outcome.loc[day,l_team] = 0 win_percent = pd.expanding_mean(store_outcome) self.daily_stat_dict['win_perc'] = win_percent print('win_perc saved to daily_stat_dict!')
def credit_momentum_test(data_df): test_data = data_df[['US HY Return','US Int. Trsy Return']].add(1).cumprod() rolling_period = 3 rolling_change = pd.DataFrame.pct_change(test_data,periods=rolling_period) column_one = test_data.columns.values[0] column_two = test_data.columns.values[1] data_diff = rolling_change[column_one] - rolling_change[column_two] data_diff['rolling_z'] = (data_diff - pd.expanding_mean(data_diff, min_periods=24))/ pd.expanding_std(data_diff, min_periods=24) weights = pd.DataFrame(index=data_diff.index) weights['bond_wght'] = data_diff['rolling_z'] weights['treasury_wght'] = data_diff['rolling_z'] * -1 weights = weights / 0.5 weights.dropna(inplace=True) weights = weights.clip(-1, 1) #weights['bond_wght'] = np.where(data_diff > 0, 1.0, np.where(data_diff< 0,-1.0, np.nan)) #weights['treasury_wght'] = np.where(data_diff > 0, -1.0, np.where(data_diff < 0,1.0, np.nan)) bond_wght = weights['bond_wght'].to_frame() bond_wght.columns = ['US HY Return'] treasury_wght = weights['treasury_wght'].to_frame() treasury_wght.columns = ['US Int. Trsy Return'] combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1) combined_wghts = combined_wghts.shift(1) combined_wghts.dropna(inplace=True) weighted_returns = combined_wghts * data_df[['US HY Return','US Int. Trsy Return']] portfolio_return = weighted_returns.sum(axis=1).to_frame() portfolio_return = portfolio_return.add(1).cumprod() hy_mm = long_only_ew(portfolio_return, name='HY Momentum') return hy_mm, combined_wghts
def featurize(self, H): X = pd.DataFrame({ 'last_sh': H.shift(1).stack(), 'history_sh': pd.expanding_mean(H).shift(1).stack(), 'history_sh_vol': pd.expanding_std(H).shift(1).stack(), 'nr_days': H.notnull().cumsum().stack() }) return X
def VaR_norm(data, alpha=0.99, n=252): Z = stats.norm(0, 1).ppf(1 - alpha) #反概率密度函数 data['mean'] = pd.rolling_mean(data['return'], n) data['std'] = pd.rolling_std(data['return'], n) if math.isnan(data.tail(1).iat[0, 3]): data['mean'] = pd.expanding_mean(data['return']) data['std'] = pd.expanding_std(data['return']) data['delta'] = data['mean'] + Z * data['std'] return data.tail(1).iat[0, 4]
def savemean(basedir, plot=False): count = 0 dirnames = [ name for name in os.listdir(basedir) if os.path.isdir(os.path.join(basedir, name)) and os.path.isfile(os.path.join(basedir, name, 'onresults.csv')) ] for dirname in dirnames: resultspath = os.path.join(basedir, dirname) if diverged(resultspath): print("Diverged: " + resultspath) continue resultsfile = os.path.join(resultspath, 'onresults.csv') if not os.path.isfile(resultsfile): continue csv = pd.read_csv(resultsfile) if count == 0: csvsum = csv else: csvsum += csv count += 1 savepath = os.path.join(basedir, 'onresults.csv') meancsv = csvsum / count meancsv.to_csv(savepath, index=False) if plot: plotcount = len(plot) for i, toplot in enumerate(plot): runaverages = False if isinstance(toplot, dict): runaverages = toplot.get('runaverages', False) toplot = toplot.get('plot') plt.subplot(1, plotcount, i + 1) if runaverages: pd.expanding_mean(meancsv[toplot]).plot() else: meancsv[toplot].plot() _setplotlabels(plot) plot_file = os.path.join(basedir, 'mean.pdf') plt.savefig(plot_file, bbox_inches='tight') plt.show() return savepath
def sharpe(returns, rfr=0, expanding=0): """ returns: periodic return string rfr: risk free rate expanding: bool """ if expanding: excess = excess_returns(returns, rfr) return pd.expanding_mean(excess) / pd.expanding_std(returns) else: return excess_returns(returns, rfr).mean() / returns.std()
def expected_value(self, window=0, rebalanced=True, from_date=None, to_date=None): ret = None returns = self.returns(rebalanced, from_date, to_date) if window == 0: ret = np.asscalar(np.mean(returns)) if window > 0: ret = pd.rolling_mean(returns, window) if window == -1: ret = pd.expanding_mean(returns) return ret
def plotPanel(betaSeries, name): betaSeries = betaSeries["1995-01-01":] cumbetas = np.cumprod(betaSeries / 100 + 1) - 1 fig = plt.figure() ax1 = fig.add_subplot(411) ax1.set_title(name) ax1.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3)) betaSeries.plot() ax2 = fig.add_subplot(412) ax2.set_title("Cumulative " + name) ax2.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3)) cumbetas.plot() ax3 = fig.add_subplot(413) ax3.set_title("Rolling Mean: " + name) ax3.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3)) pd.expanding_mean(betaSeries).plot() ax4 = fig.add_subplot(414) ax4.set_title("Rolling t-stat: " + name) ax4.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3)) rolling_tstat(betaSeries).plot() fig.tight_layout(pad=1.1)
def savemean(basedir, plot=False): count = 0 dirnames = [name for name in os.listdir(basedir) if os.path.isdir(os.path.join(basedir, name)) and os.path.isfile(os.path.join(basedir, name, 'onresults.csv'))] for dirname in dirnames: resultspath = os.path.join(basedir, dirname) if diverged(resultspath): print("Diverged: " + resultspath) continue resultsfile = os.path.join(resultspath, 'onresults.csv') if not os.path.isfile(resultsfile): continue csv = pd.read_csv(resultsfile) if count == 0: csvsum = csv else: csvsum += csv count += 1 savepath = os.path.join(basedir, 'onresults.csv') meancsv = csvsum / count meancsv.to_csv(savepath, index=False) if plot: plotcount = len(plot) for i, toplot in enumerate(plot): runaverages = False if isinstance(toplot, dict): runaverages = toplot.get('runaverages', False) toplot = toplot.get('plot') plt.subplot(1, plotcount, i + 1) if runaverages: pd.expanding_mean(meancsv[toplot]).plot() else: meancsv[toplot].plot() _setplotlabels(plot) plot_file = os.path.join(basedir,'mean.pdf') plt.savefig(plot_file, bbox_inches='tight') plt.show() return savepath
def results(): """ DOCSTRING """ dataframe_a = pandas.read_csv('performance_data_sp500ish.csv', index_col='time', parse_dates=True) dataframe_a.sort_index(inplace=True) dataframe_a['expanding_mean'] = pandas.expanding_mean( dataframe_a['percent_change'], 0) dataframe_a['expanding_mean'].plot(label='Performance') pyplot.legend() pyplot.show()
def stat_select(self,stat): """ Gets the stats for all the teams for each day in the season. Parameters ---------- year : int , which year to grab data stat : string Can be: ['score', 'fgm', 'fga', 'fgm3', 'fga3', 'ftm', 'fta', 'or', 'dr', 'ast', 'to', 'stl', 'blk', 'pf', 'poss', 'oe', 'de', 'efg', 'eto', 'eor', 'eftr'] Not implemented: 'rpi','rpi_1', rpi_2, 'rpi_3' Returns ------- store_feature : dataframe with columns the team and rows the days values are the stats scored in the game store_outcome : dataframe with columns the team and rows the days values are 1 for win or 0 for loss. """ X = self.game_stats store_feature = pd.DataFrame(index = self.unique_days, columns=self.teams) #how it is represented in the game_data w_f = 'W' + stat l_f = 'L' + stat #loop through line and put the stat in correct places for ii in range(len(X.index)): day = X.index[ii] row = X.iloc[ii] w_team = row['Wteam'] #winning team w_feature = row[w_f] l_team = row['Lteam'] #losing team l_feature = row[l_f] store_feature.loc[day,w_team] = w_feature store_feature.loc[day,l_team] = l_feature # take the average of it through the season store_feature = pd.expanding_mean(store_feature) self.daily_stat_dict[stat] = store_feature print(stat + ' saved to daily_stat_dict!')
def plotall(basedir, plot=None): dirnames = [ name for name in os.listdir(basedir) if os.path.isdir(os.path.join(basedir, name)) and os.path.isfile(os.path.join(basedir, name, 'onresults.csv')) ] shortnames = getshortnames(dirnames) plotcount = len(plot) for dirname, shortname in zip(dirnames, shortnames): resultspath = os.path.join(basedir, dirname) hasdiverged = False if diverged(resultspath): hasdiverged = True resultsfile = os.path.join(resultspath, 'onresults.csv') if not os.path.isfile(resultsfile): continue csv = pd.read_csv(resultsfile) if isinstance(plot, list): for i, toplot in enumerate(plot): runaverages = False if isinstance(toplot, dict): runaverages = toplot.get('runaverages', False) toplot = toplot.get('plot') label = shortname + (" (diverged)" if hasdiverged else "") plt.subplot(1, plotcount, i + 1) if runaverages: pd.expanding_mean(csv[toplot]).plot(label=label, legend=True) else: csv[toplot].plot(label=label, legend=True) else: csv.plot(subplots=True) _setplotlabels(plot) plot_file = os.path.join(basedir, 'all.pdf') plt.savefig(plot_file, bbox_inches='tight') plt.show()
def movie_chart(request, movie_id): ratings = Rating.objects.filter(movie__id=movie_id) df = pd.DataFrame(model_to_dict(rating) for rating in ratings) df.index = df['timestamp'] counts = df['rating'] counts = counts.sort_index() series = pd.expanding_mean(counts).resample('M', how=np.max, fill_method='pad') response = HttpResponse(content_type='image/png') fig = plt.figure(figsize=(6, 4), facecolor="#272b30") plt.xticks(color="white") plt.yticks(color="white") series.plot() plt.title("Average Rating over Time", color="white") plt.xlabel("") canvas = FigureCanvas(fig) canvas.print_png(response) return response
def rater_chart(request, rater_id): ratings = Rating.objects.filter(rater_id = rater_id) df = pd.DataFrame(model_to_dict(rating) for rating in ratings) df.index=df['posted_at'] ratings = df['rating'] ratings = ratings.sort_index() series = pd.expanding_mean(ratings) series = series.resample('M', how=np.max, fill_method='pad') series = series[2:] response = HttpResponse(content_type='image/png') fig = plt.figure(figsize=(6,5)) series.plot() plt.title("User Average Rating Over Time") plt.xlabel("") canvas = FigureCanvas(fig) canvas.print_png(response) return response
def expanding_smoother(self, data, stype='rolling_mean', min_periods=None, freq=None): """ Perform a expanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html :param data: pandas dataframe input data :param stype: soothing type :param min_periods: periods :param freq: frequence smoothing types: expanding_count Number of non-null observations expanding_sum Sum of values expanding_mean Mean of values expanding_median Arithmetic median of values expanding_min Minimum expanding_max Maximum expandingg_std Unbiased standard deviation expanding_var Unbiased variance expanding_skew Unbiased skewness (3rd moment) expanding_kurt Unbiased kurtosis (4th moment) """ if stype == 'count': newy = pd.expanding_count(data, min_periods=min_periods, freq=freq) if stype == 'sum': newy = pd.expanding_sum(data, min_periods=min_periods, freq=freq) if stype == 'mean': newy = pd.expanding_mean(data, min_periods=min_periods, freq=freq) if stype == 'median': newy = pd.expanding_median(data, min_periods=min_periods, freq=freq) if stype == 'min': newy = pd.expanding_min(data, min_periods=min_periods, freq=freq) if stype == 'max': newy = pd.expanding_max(data, min_periods=min_periods, freq=freq) if stype == 'std': newy = pd.expanding_std(data, min_periods=min_periods, freq=freq) if stype == 'var': newy = pd.expanding_var(data, min_periods=min_periods, freq=freq) if stype == 'skew': newy = pd.expanding_skew(data, min_periods=min_periods, freq=freq) if stype == 'kurt': newy = pd.expanding_kurt(data, min_periods=min_periods, freq=freq) return newy
def sortino_ratio(returns, mar=0, full=1, expanding=0, ann=1): """ returns: periodic return stream mar: minimum acceptable return full: if true, use the entire series, else use the subset below mar expanding: bool """ factor = ann and periodicity(returns) or 1. if expanding: avgexcess = pd.expanding_mean(excess_returns(returns, mar)) avgexcess *= (ann and factor or 1.) downside = downside_deviation(returns, mar, full, expanding=1) downside *= (ann and np.sqrt(factor) or 1.) return avgexcess / downside else: avgexcess = excess_returns(returns, mar).mean() avgexcess *= (ann and factor or 1.) downside = downside_deviation(returns, mar, full) downside *= (ann and np.sqrt(factor) or 1.) return avgexcess / downside
def equity_vol_test(data_frame): rolling_period = 1 rolling_change = pd.DataFrame.pct_change(np.log(data_frame['Equity Volatility']),periods=rolling_period) rolling_change['rolling_z'] = (rolling_change - pd.expanding_mean(rolling_change, min_periods=24))/ pd.expanding_std(rolling_change, min_periods=24) rolling_change['rolling_z'] = rolling_change['rolling_z'].to_frame() weights = pd.DataFrame(index=rolling_change['rolling_z'].index) weights['bond_wght'] = rolling_change['rolling_z'] * -1 weights['treasury_wght'] = rolling_change['rolling_z'] weights = weights / 1.5 weights.dropna(inplace=True) weights = weights.clip(-1, 1) bond_wght = weights['bond_wght'].to_frame() bond_wght.columns = ['US HY Return'] treasury_wght = weights['treasury_wght'].to_frame() treasury_wght.columns = ['US Int. Trsy Return'] combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1) combined_wghts = combined_wghts.shift(1) combined_wghts.dropna(inplace=True) weighted_returns = combined_wghts * data_frame[['US HY Return','US Int. Trsy Return']] portfolio_return = weighted_returns.sum(axis=1).to_frame() portfolio_return = portfolio_return.add(1).cumprod() eq_vol = long_only_ew(portfolio_return, name='Equity Volatility') return eq_vol, combined_wghts
def shift_columns(df, columns, shifts, averages): for column in columns: for shift in shifts: df[column + str(shift)] = df.groupby('player')[column].shift(periods=shift) for average in averages: selector = [column + str(x) for x in range(-1, average-1, -1)] df[column + str(average) + 'avg'] = df.loc[:,selector].mean(axis=1) for shift in shifts: del df[column +str(shift)] df.sort_values(by=['date_game', 'player'], inplace=True) df[column + 'seasonavg'] = df.groupby('player')[column].apply( lambda x: pd.expanding_mean(x).shift()) df['rest'] = df.groupby('player')['date_game'].diff().astype('timedelta64[D]') return df
def daily_win_percentage3(self): """ Calculates the third column of the rpi scores Inputs the winning percentage of the team played on that day """ perc = self.daily_stat_dict['win_perc2'] sched = self.schedule store_perc = pd.DataFrame(index = self.unique_days, columns=self.teams) for ii in range(len(perc.index)): # fill in nans for the first day since no teams have a # winning percentage yet if ii ==0: ind0 = perc.index[ii] store_perc.loc[ind0] = np.nan else: ind0 = perc.index[ii] ind1 = perc.index[ii-1] p = perc.loc[ind1] # percentages of team before that point s = sched.loc[ind0].values # teams played team_played = s[s!=0] #teams played ind_team = perc.columns.values[s!=0].astype(int) # get the teams store_perc.loc[ind0][ind_team] = p[team_played].values #now go through and have a running average of the teams played store_perc = pd.expanding_mean(store_perc) self.daily_stat_dict['win_perc3'] = store_perc print('win_perc3 saved to daily_stat_dict!')
test=pd.ewma(data, span=d) return test def MACD(data,FastLength,SlowLength,MACDLength): data['Diff']='' data['Diff']=EMA_MACO(data['open'],FastLength)-EMA_MACO(data['open'],SlowLength) data['DEA']='' data['DEA']=EMA_MACO(data['Diff'],MACDLength) data['MACD']='' data['MACD']=data['Diff']-data['DEA'] return data for h,k in [(5,20),(15,20),(5,10),(5,15),(10,15)]: data1['fast_line']='' data1['slow_line']='' data1['fast_line']=pd.rolling_mean(data1['close'],h) data1['slow_line']=pd.rolling_mean(data1['close'],k) data1['fast_line']=data1['fast_line'].fillna(value=pd.expanding_mean(data1['close'])) data1['slow_line']=data1['slow_line'].fillna(value=pd.expanding_mean(data1['close'])) data1['dist_%s_%s'%(k,h)]=data1['fast_line']-data1['slow_line'] for h in range(10,26,5): data1['fast_line']='' data1['slow_line']='' data1['fast_line']=pd.rolling_max(data1['high'].shift(1),h) data1['slow_line']=pd.rolling_min(data1['low'].shift(1),h) data1['fast_line']=data1['fast_line'].fillna(value=pd.expanding_max(data1['high'])) data1['slow_line']=data1['slow_line'].fillna(value=pd.expanding_min(data1['low'])) data1['dist_high_%s'%h]=data1['high']-data1['fast_line'] data1['dist_low_%s'%h]=data1['low']-data1['slow_line'] data1=MACD(data1,12,26,9) data2=pd.read_csv('rb888_2017.csv',parse_dates=True,index_col='time') data2.reset_index(inplace=True) data2['log_return']=np.log(data2['close']/data2['close'].shift(1))
def expand_average_prod(self, station): assert 1 <= station <= 6, "Station does not exist." prod_data = self.prod_lists[station-1] prod_expand_avg = pd.expanding_mean(pd.Series(prod_data)) return self.prod_lists[7], prod_expand_avg.tolist()
# Difference functions allow us to identify seasonal changes when we see repeated up or downswings. # An example from FiveThirtyEight: # http://i2.wp.com/espnfivethirtyeight.files.wordpress.com/2015/03/casselman-datalab-wsj2.png?quality=90&strip=all&w=575&ssl=1 ''' Pandas Expanding Functions In addition to the set of rolling_* functions, Pandas also provides a similar collection of expanding_* functions, which, instead of using a window of N values, uses all values up until that time. ''' pd.expanding_mean(daily_store_sales) # average date from first till last date specified pd.expanding_sum(daily_store_sales) # sum of average sales per store until that date ''' EXERCISES 1. Plot the distribution of sales by month and compare the effect of promotions. hint: try using hue in sns 2. Are sales more correlated with the prior date, a similar date last year, or a similar date last month? 3. Plot the 15 day rolling mean of customers in the stores. 4. Identify the date with largest drop in sales from the same date in the previous week. 5. Compute the total sales up until Dec. 2014. 6. When were the largest differences between 15-day moving/rolling averages? HINT: Using rolling_mean and diff ''' # Plot the distribution of sales by month and compare the effect of promotions
def per_season_cummean(df,col_list): cumsum_df = (df.groupby(["PLAYER_NAME","SEASON_ID"]) .apply(lambda x: add_game_date_pts_col(pd.expanding_mean(x[col_list], min_periods = 2), x.GAME_DATE, x.OPP).reset_index(drop = True))) return cumsum_df.reset_index().drop('level_2',axis = 1).rename(columns=dict(zip(col_list,map(lambda x: 'C_' + x,col_list))))
def sma(arg, n): """ If n is 0 then return the ltd mean; else return the n day mean """ if n == 0: return pd.expanding_mean(arg) else: return pd.rolling_mean(arg, n, min_periods=n)
print "AP3 hits: %d Hit rate: %.2f%%" % (AP3_hits, AP3_hits / float(len(AP3_DELAY))) print "AP5 hits: %d Hit rate: %.2f%%" % (AP5_hits, AP5_hits / float(len(AP5_DELAY))) print "\nAP3 average miss del#ay: " + str(np.mean(np.array(AP3_miss_delays))) print "AP5 average miss delay: " + str(np.mean(np.array(AP5_miss_delays))) print "\nAP3 average hit delay: " + str(np.mean(np.array(AP3_hit_delays))) print "AP5 average hit delay: " + str(np.mean(np.array(AP5_hit_delays))) print "\nAP3 average total delay: " + str(np.mean(np.array(AP3_DELAY))) print "AP5 average total delay: " + str(np.mean(np.array(AP5_DELAY))) df1 = pd.DataFrame({'delay': AP3_DELAY}) rm1 = pd.expanding_mean(df1, 10) df2 = pd.DataFrame({'delay': AP5_DELAY}) rm2 = pd.expanding_mean(df2, 10) #plt.plot(range(1,len(rm1) + 1), rm1, 'r') #plt.plot(range(1,len(rm2) + 1), rm2, 'b') #plt.plot(range(1,len(AP3_DELAY) + 1), AP3_DELAY, 'r', label='Single Median - AP1') #plt.plot(range(1,len(AP5_DELAY) + 1), AP5_DELAY, 'b', label='2-median - S3') #plt.xlabel('Number of requests', fontsize=18) #plt.ylabel('Delay [ms]', fontsize=16) #plt.plot(range(1,len(AP3_td) + 1), AP3_td, 'r--', range(1,len(AP5_td) + 1), AP5_td, 'g--') #plt.show()
from scipy import stats all_rewards = np.array([]) all_times = np.array([]) for runid in range(10001, 10018, 1): df = pd.read_csv("final_results/new_results" + str(runid) + "-4.csv") mean_reward = df.rewards.mean() duration = df.times[len(df.times) - 1] - df.times[0] all_times = np.append(all_times, [duration]) all_rewards = np.append(all_rewards, [mean_reward]) em = pd.expanding_mean(all_rewards) em_times = pd.expanding_mean(all_times) x = np.arange(0, len(em), 1) print("Overall mean time per runid (in sec)", all_times.mean()) print("Overall mean cumulative reward", all_rewards.mean()) print("SE of the times (in sec)", stats.sem(all_times)) print("SE of the mean cumulative rewards", stats.sem(all_rewards)) plt.plot(x, em) plt.show()
raw = requests.get("http://www.google.com/finance/getprices?i=" + interval + "&p=" + lookback + "d&f=c&df=cpct&q=" + symbol).text # Take the data and put it into a DataFrame raw = raw.split()[7:] data = pd.DataFrame(raw) data = data.astype("float") data["price"] = data[0] del data[0] # We only need 60 minutes worth of data if len(data["price"] >= 60): data["price"] = data["price"][-60:] # Columns for expanding mean and standard deviation data["mean"] = pd.expanding_mean(data["price"]) data["vol"] = pd.expanding_std(data["price"]) # Linear regression on price data x = range(len(data["price"][-60:])) y = data["price"][-60:].values A, B = curve_fit(f, x, y) # Print the trend to the console if A[0] < 0: print("downtrend") else: print("uptrend") # Plot window plt.figure(1)
# var_list = ['dp', 'dy', 'ep', 'de', 'rvol', 'bm', 'ntis', 'tbl', 'lty', 'ltr', # 'tms','dfy','dfr','infl'] econ_var = ['dp', 'dy', 'ep', 'de', 'rvol', 'bm', 'ntis', 'tbl', 'lty', 'ltr', 'tms','dfy','dfr','infl'] tech_var = ['ma_1_9', 'ma_1_12', 'ma_2_9', 'ma_2_12', 'ma_3_9', 'ma_3_12', 'mom_9', 'mom_12', 'vol_1_9', 'vol_1_12', 'vol_2_9', 'vol_2_12', 'vol_3_9', 'vol_3_12'] var_list = econ_var + tech_var # get data for specified date range df_sub = df[beg_date_init:end_date_oos] # historical average (ha) forecast init_obs = len(pd.date_range(beg_date_init, beg_date_oos, freq='M')) # Should be 181 obs ha_forecast = pd.expanding_mean(df_sub['log_equity_premium'], min_periods=init_obs) ha_forecast = ha_forecast.shift(1) ha_err = df_sub['log_equity_premium'][beg_date_oos:end_date_oos] - \ ha_forecast[beg_date_oos:end_date_oos] ha_msfe = np.mean(np.power(ha_err, 2)) ha_msfe_exp = np.mean(np.power(ha_err[df_sub['recession'] == 0], 2)) ha_msfe_rec = np.mean(np.power(ha_err[df_sub['recession'] == 1], 2)) # initialize dictionary of lists d = {} for i in ['msfe', 'msfe_exp', 'msfe_rec', 'msfe_adj', 'p_value', 'r2', 'r2_exp', 'r2_rec', 'sq bias', 'rem term']: d[i] = [] # lag the x variables df[var_list] = df[var_list].shift(1)
def get_context_data(self, **kwargs): context = super(WellChartView, self).get_context_data(**kwargs) well = Well.objects.get(pk=context['pk']) name = unicode(well) options = { 'rangeSelector': { 'enabled': True, 'inputEnabled': True, }, 'navigator': { 'adaptToUpdatedData': True, 'enabled': True }, 'chart': { 'type': 'arearange', 'zoomType': 'x' }, 'title': { 'text': name }, 'xAxis': { 'type': 'datetime' }, 'yAxis': [{ 'title': { 'text': 'm tov NAP' } }], 'tooltip': { 'valueSuffix': ' m', 'valueDecimals': 2, 'shared': True, }, 'legend': { 'enabled': True }, 'plotOptions': { 'line': { 'marker': { 'enabled': False } } }, 'credits': { 'enabled': True, 'text': 'acaciawater.com', 'href': 'http://www.acaciawater.com', }, } series = [] xydata = [] for screen in well.screen_set.all(): name = unicode(screen) data = screen.to_pandas(ref='nap') xydata = zip(data.index.to_pydatetime(), data.values) series.append({ 'name': name, 'type': 'line', 'data': xydata, 'zIndex': 1, }) mean = pd.expanding_mean(data) # series.append({'name': 'gemiddelde', # 'type': 'line', # 'data': zip(mean.index.to_pydatetime(), mean.values), # 'linkedTo' : ':previous', # }) std = pd.expanding_std(data) a = (mean - std).dropna() b = (mean + std).dropna() ranges = zip(a.index.to_pydatetime(), a.values, b.values) series.append({ 'name': 'spreiding', 'data': ranges, 'type': 'arearange', 'lineWidth': 0, 'fillOpacity': 0.2, 'linkedTo': ':previous', 'zIndex': 0, }) if len(xydata) > 0: mv = [] for i in range(len(xydata)): mv.append((xydata[i][0], screen.well.maaiveld)) series.append({'name': 'maaiveld', 'type': 'line', 'data': mv}) options['series'] = series context['options'] = json.dumps( options, default=lambda x: int(time.mktime(x.timetuple()) * 1000)) context['object'] = well return context
def comput_idicators(df, trading_days, required, save_file, save_address, whole=1): # TODO:net_value has some problem. # columns needed col = ['index_price', 'Interest_rate', 'nav', 'rebalancing', 'stoploss'] df_valid = df.ix[:, col] start_balance = df.index[df['rebalancing'] == 1][0] df_valid = df_valid[df_valid.index >= start_balance] # daily return df_valid['return'] = np.log(df['nav']) - np.log(df['nav'].shift(1)) # benchmark_net_value df_valid[ 'benchmark'] = df_valid['index_price'] / df_valid['index_price'].ix[0] # benchmark_return df_valid['benchmark_return'] = (df_valid['benchmark']- df_valid['benchmark'].shift(1))/\ df_valid['benchmark'].shift(1) # Annualized return df_valid['Annu_return'] = pd.expanding_mean( df_valid['return']) * trading_days # Volatility df_valid.loc[:, 'algo_volatility'] = pd.expanding_std( df_valid['return']) * np.sqrt(trading_days) df_valid.loc[:, 'xret'] = df_valid[ 'return'] - df_valid['Interest_rate'] / trading_days / 100 df_valid.loc[:, 'ex_return'] = df_valid['return'] - df_valid[ 'benchmark_return'] def ratio(x): return np.nanmean(x) / np.nanstd(x) # sharpe ratio df_valid.loc[:, 'sharpe'] = pd.expanding_apply(df_valid['xret'], ratio)\ * np.sqrt(trading_days) # information ratio df_valid.loc[:, 'IR'] = pd.expanding_apply(df_valid['ex_return'], ratio)\ * np.sqrt(trading_days) # Sortino ratio def modify_ratio(x, re): re /= trading_days ret = np.nanmean(x) - re st_d = np.nansum(np.square(x[x < re] - re)) / x[x < re].size return ret / np.sqrt(st_d) df_valid.loc[:, 'sortino'] = pd.expanding_apply( df_valid['return'], modify_ratio, args=(required, )) * np.sqrt(trading_days) # Transfer infs to NA df_valid.loc[np.isinf(df_valid.loc[:, 'sharpe']), 'sharpe'] = np.nan df_valid.loc[np.isinf(df_valid.loc[:, 'IR']), 'IR'] = np.nan # hit_rate wins = np.where(df_valid['return'] >= df_valid['benchmark_return'], 1.0, 0.0) df_valid.loc[:, 'hit_rate'] = wins.cumsum() / pd.expanding_apply(wins, len) # 95% VaR df_valid['VaR'] = -pd.expanding_quantile(df_valid['return'], 0.05)*\ np.sqrt(trading_days) # 95% CVaR df_valid['CVaR'] = -pd.expanding_apply(df_valid['return'], lambda x: x[x < np.nanpercentile(x, 5)].mean())\ * np.sqrt(trading_days) if whole == 1: # max_drawdown def exp_diff(x, type): if type == 'dollar': xret = pd.expanding_apply(x, lambda xx: (xx[-1] - xx.max())) else: xret = pd.expanding_apply( x, lambda xx: (xx[-1] - xx.max()) / xx.max()) return xret # dollar # xret = exp_diff(df_valid['cum_profit'],'dollar') # df_valid['max_drawdown_profit'] = abs(pd.expanding_min(xret)) # percentage xret = exp_diff(df_valid['nav'], 'percentage') df_valid['max_drawdown_ret'] = abs(pd.expanding_min(xret)) # max_drawdown_duration: # drawdown_enddate is the first time for restoring the max def drawdown_end(x, type): xret = exp_diff(x, type) minloc = xret[xret == xret.min()].index[0] x_sub = xret[xret.index > minloc] # if never recovering,then return nan try: return x_sub[x_sub == 0].index[0] except: return np.nan def drawdown_start(x, type): xret = exp_diff(x, type) minloc = xret[xret == xret.min()].index[0] x_sub = xret[xret.index < minloc] try: return x_sub[x_sub == 0].index[-1] except: return np.nan df_valid['max_drawdown_start'] = pd.Series() df_valid['max_drawdown_end'] = pd.Series() df_valid['max_drawdown_start'].ix[-1] = drawdown_start( df_valid['nav'], 'percentage') df_valid['max_drawdown_end'].ix[-1] = drawdown_end( df_valid['nav'], 'percentage') df_valid.to_csv(save_address) # =====result visualization===== plt.figure(1) if whole == 1: plt.subplot(224) plt.plot(df_valid['nav'], label='strategy') plt.plot(df_valid['benchmark'], label='S&P500') plt.xlabel('Date') plt.legend(loc=0, shadow=True) plt.ylabel('Nav') plt.title('Nav of ' + save_file + ' & SP500') # plt.subplot(223) # plt.plot(df_valid['cum_profit'],label = 'strategy') # plt.xlabel('Date') # plt.ylabel('Cum_profit') # plt.title('Cum_profit of ' + save_file) plt.subplot(221) plt.plot(df_valid['return'], label='strategy') plt.xlabel('Date') plt.ylabel('Daily_return') plt.title('Daily Return of ' + save_file) plt.subplot(222) x_return = df_valid[df_valid['return'].notna()].loc[:, 'return'] y_return = df_valid[ df_valid['benchmark_return'].notna()].loc[:, 'benchmark_return'] mu = x_return.mean() sigma = x_return.std() mybins = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100) count_x, _, _ = plt.hist(x_return, mybins, normed=1, alpha=0.5, label='strategy') count_y, _, _ = plt.hist(y_return, mybins, normed=1, alpha=0.5, label='S&P500') plt.ylabel('density') plt.xlabel('daily_return') plt.title('Histogram of Daily Return for ' + save_file + ' & SP500') plt.grid(True) # add normal distribution line y = mlab.normpdf(mybins, mu, sigma) plt.plot(mybins, y, 'r--', linewidth=1, label='Normal of strategy') plt.legend(loc=0, shadow=True) # plt.tight_layout() plt.show() return df_valid
data['MACD']='' data['MACD']=data['Diff']-data['DEA'] return data #导入数据,生成因子 data=pd.read_csv('rb888_2015.csv',parse_dates=True,index_col='time') data.reset_index(inplace=True) data['log_return']=np.log(data['open']/data['open'].shift(1)) data['log_return']=data['log_return'].fillna(0) data['log_return_5']=np.log(data['open']/data['open'].shift(5)) data['log_return_5']=data['log_return_5'].fillna(0) for h,k in [(5,10),(5,15),(5,20),(10,15),(10,20),(15,20),(15,30)]: data['fast_line']='' data['slow_line']='' data['fast_line']=pd.rolling_mean(data['open'],h) data['slow_line']=pd.rolling_mean(data['open'],k) data['fast_line']=data['fast_line'].fillna(value=pd.expanding_mean(data['open'])) data['slow_line']=data['slow_line'].fillna(value=pd.expanding_mean(data['open'])) data['dist_%s_%s'%(k,h)]=data['fast_line']-data['slow_line'] for i in range(5,31,5): data['MA_%s'%i]=pd.rolling_mean(data['open'],i) data['MA_%s'%i]=data['MA_%s'%i].fillna(0)-data['open'] data=MACD(data,12,26,9) for h in range(10,26,5): data['fast_line']='' data['slow_line']='' data['fast_line']=pd.rolling_max(data['high'].shift(1),h) data['slow_line']=pd.rolling_min(data['low'].shift(1),h) data['fast_line']=data['fast_line'].fillna(value=pd.expanding_max(data['high'])) data['slow_line']=data['slow_line'].fillna(value=pd.expanding_min(data['low'])) data['dist_high_%s'%h]=data['high']-data['fast_line'] data['dist_low_%s'%h]=data['low']-data['slow_line']
class CumulativeRets(object): def __init__(self, rets=None, ltd_rets=None): if rets is None and ltd_rets is None: raise ValueError('rets or ltd_rets must be specified') if rets is None: if ltd_rets.empty: rets = ltd_rets else: rets = (1. + ltd_rets).pct_change() rets.iloc[0] = ltd_rets.iloc[0] if ltd_rets is None: if rets.empty: ltd_rets = rets else: ltd_rets = (1. + rets).cumprod() - 1. self.rets = rets self.ltd_rets = ltd_rets pds_per_year = property(lambda self: periodicity(self.rets)) def asfreq(self, freq): other_pds_per_year = periodicity(freq) if self.pds_per_year < other_pds_per_year: msg = 'Cannot downsample returns. Cannot convert from %s periods/year to %s' raise ValueError(msg % (self.pds_per_year, other_pds_per_year)) if freq == 'B': rets = (1. + self.rets).groupby(self.rets.index.date).apply(lambda s: s.prod()) - 1. # If you do not do this, it will be an object index rets.index = pd.DatetimeIndex([i for i in rets.index]) return CumulativeRets(rets) else: rets = (1. + self.rets).resample(freq, how='prod') - 1. return CumulativeRets(rets) # ----------------------------------------------------------- # Resampled data dly = lazy_property(lambda self: self.asfreq('B'), 'dly') weekly = lazy_property(lambda self: self.asfreq('W'), 'weekly') monthly = lazy_property(lambda self: self.asfreq('M'), 'monthly') quarterly = lazy_property(lambda self: self.asfreq('Q'), 'quarterly') annual = lazy_property(lambda self: self.asfreq('A'), 'annual') # ----------------------------------------------------------- # Basic Metrics @lazy_property def ltd_rets_ann(self): return (1. + self.ltd_rets) ** (self.pds_per_year / pd.expanding_count(self.rets)) - 1. cnt = property(lambda self: self.rets.notnull().astype(int).sum()) mean = lazy_property(lambda self: self.rets.mean(), 'avg') mean_ann = lazy_property(lambda self: self.mean * self.pds_per_year, 'avg_ann') ltd = lazy_property(lambda self: self.ltd_rets.iloc[-1], name='ltd') ltd_ann = lazy_property(lambda self: self.ltd_rets_ann.iloc[-1], name='ltd_ann') std = lazy_property(lambda self: self.rets.std(), 'std') std_ann = lazy_property(lambda self: self.std * np.sqrt(self.pds_per_year), 'std_ann') drawdown_info = lazy_property(lambda self: drawdown_info(self.rets), 'drawdown_info') drawdowns = lazy_property(lambda self: drawdowns(self.rets), 'drawdowns') maxdd = lazy_property(lambda self: self.drawdown_info['maxdd'].min(), 'maxdd') dd_avg = lazy_property(lambda self: self.drawdown_info['maxdd'].mean(), 'dd_avg') kurtosis = lazy_property(lambda self: self.rets.kurtosis(), 'kurtosis') skew = lazy_property(lambda self: self.rets.skew(), 'skew') sharpe_ann = lazy_property(lambda self: np.divide(self.ltd_ann, self.std_ann), 'sharpe_ann') downside_deviation = lazy_property(lambda self: downside_deviation(self.rets, mar=0, full=0, ann=1), 'downside_deviation') sortino = lazy_property(lambda self: self.ltd_ann / self.downside_deviation, 'sortino') @lazy_property def maxdd_dt(self): ddinfo = self.drawdown_info if ddinfo.empty: return None else: return self.drawdown_info['maxdd dt'].ix[self.drawdown_info['maxdd'].idxmin()] # ----------------------------------------------------------- # Expanding metrics expanding_mean = property(lambda self: pd.expanding_mean(self.rets), 'expanding_avg') expanding_mean_ann = property(lambda self: self.expanding_mean * self.pds_per_year, 'expanding_avg_ann') expanding_std = lazy_property(lambda self: pd.expanding_std(self.rets), 'expanding_std') expanding_std_ann = lazy_property(lambda self: self.expanding_std * np.sqrt(self.pds_per_year), 'expanding_std_ann') expanding_sharpe_ann = property(lambda self: np.divide(self.ltd_rets_ann, self.expanding_std_ann)) # ----------------------------------------------------------- # Rolling metrics rolling_mean = property(lambda self: pd.rolling_mean(self.rets), 'rolling_avg') rolling_mean_ann = property(lambda self: self.rolling_mean * self.pds_per_year, 'rolling_avg_ann') def rolling_ltd_rets(self, n): return pd.rolling_apply(self.rets, n, lambda s: (1. + s).prod() - 1.) def rolling_ltd_rets_ann(self, n): tot = self.rolling_ltd_rets(n) return tot ** (self.pds_per_year / n) def rolling_std(self, n): return pd.rolling_std(self.rets, n) def rolling_std_ann(self, n): return self.rolling_std(n) * np.sqrt(self.pds_per_year) def rolling_sharpe_ann(self, n): return self.rolling_ltd_rets_ann(n) / self.rolling_std_ann(n) def iter_by_year(self): """Split the return objects by year and iterate""" for key, grp in self.rets.groupby(lambda x: x.year): yield key, CumulativeRets(rets=grp) def truncate(self, before=None, after=None): rets = self.rets.truncate(before=before, after=after) return CumulativeRets(rets=rets) @lazy_property def summary(self): d = OrderedDict() d['ltd'] = self.ltd d['ltd ann'] = self.ltd_ann d['mean'] = self.mean d['mean ann'] = self.mean_ann d['std'] = self.std d['std ann'] = self.std_ann d['sharpe ann'] = self.sharpe_ann d['sortino'] = self.sortino d['maxdd'] = self.maxdd d['maxdd dt'] = self.maxdd_dt d['dd avg'] = self.dd_avg d['cnt'] = self.cnt return pd.Series(d, name=self.rets.index.freq or guess_freq(self.rets.index)) def _repr_html_(self): from tia.util.fmt import new_dynamic_formatter fmt = new_dynamic_formatter(method='row', precision=2, pcts=1, trunc_dot_zeros=1, parens=1) df = self.summary.to_frame() return fmt(df)._repr_html_() def get_alpha_beta(self, bm_rets): if isinstance(bm_rets, pd.Series): bm = CumulativeRets(bm_rets) elif isinstance(bm_rets, CumulativeRets): bm = bm_rets else: raise ValueError('bm_rets must be series or CumulativeRetPerformace not %s' % (type(bm_rets))) bm_freq = guess_freq(bm_rets) if self.pds_per_year != bm.pds_per_year: tgt = {'B': 'dly', 'W': 'weekly', 'M': 'monthly', 'Q': 'quarterly', 'A': 'annual'}.get(bm_freq, None) if tgt is None: raise ValueError('No mapping for handling benchmark with frequency: %s' % bm_freq) tmp = getattr(self, tgt) y = tmp.rets y_ann = tmp.ltd_ann else: y = self.rets y_ann = self.ltd_ann x = bm.rets.truncate(y.index[0], y.index[-1]) x_ann = bm.ltd_ann model = pd.ols(x=x, y=y) beta = model.beta[0] alpha = y_ann - beta * x_ann return pd.Series({'alpha': alpha, 'beta': beta}, name=bm_freq) def plot_ltd(self, ax=None, style='k', label='ltd', show_dd=1, title=True, legend=1): ltd = self.ltd_rets ax = ltd.plot(ax=ax, style=style, label=label) if show_dd: dd = self.drawdowns dd.plot(style='r', label='drawdowns', alpha=.5, ax=ax) ax.fill_between(dd.index, 0, dd.values, facecolor='red', alpha=.25) fmt = PercentFormatter AxesFormat().Y.percent().X.label("").apply(ax) legend and ax.legend(loc='upper left', prop={'size': 12}) # show the actualy date and value mdt, mdd = self.maxdd_dt, self.maxdd bbox_props = dict(boxstyle="round", fc="w", ec="0.5", alpha=0.25) try: dtstr = '{0}'.format(mdt.to_period()) except: # assume daily dtstr = '{0}'.format(hasattr(mdt, 'date') and mdt.date() or mdt) ax.text(mdt, dd[mdt], "{1} \n {0}".format(fmt(mdd), dtstr).strip(), ha="center", va="top", size=8, bbox=bbox_props) if title is True: pf = new_percent_formatter(1, parens=False, trunc_dot_zeros=True) ff = new_float_formatter(precision=1, parens=False, trunc_dot_zeros=True) total = pf(self.ltd_ann) vol = pf(self.std_ann) sh = ff(self.sharpe_ann) mdd = pf(self.maxdd) title = 'ret$\mathregular{_{ann}}$ %s vol$\mathregular{_{ann}}$ %s sharpe %s maxdd %s' % ( total, vol, sh, mdd) title and ax.set_title(title, fontdict=dict(fontsize=10, fontweight='bold')) return ax def plot_ret_on_dollar(self, title=None, show_maxdd=1, figsize=None, ax=None, append=0, label=None, **plot_args): plot_return_on_dollar(self.rets, title=title, show_maxdd=show_maxdd, figsize=figsize, ax=ax, append=append, label=label, **plot_args) def plot_hist(self, ax=None, **histplot_kwargs): pf = new_percent_formatter(precision=1, parens=False, trunc_dot_zeros=1) ff = new_float_formatter(precision=1, parens=False, trunc_dot_zeros=1) ax = self.rets.hist(ax=ax, **histplot_kwargs) AxesFormat().X.percent(1).apply(ax) m, s, sk, ku = pf(self.mean), pf(self.std), ff(self.skew), ff(self.kurtosis) txt = '$\mathregular{\mu}$=%s $\mathregular{\sigma}$=%s skew=%s kurt=%s' % (m, s, sk, ku) bbox = dict(facecolor='white', alpha=0.5) ax.text(0, 1, txt, fontdict={'fontweight': 'bold'}, bbox=bbox, ha='left', va='top', transform=ax.transAxes) return ax def filter(self, mask, keep_ltd=0): if isinstance(mask, pd.Series): mask = mask.values rets = self.rets.ix[mask] ltd = None if keep_ltd: ltd = self.ltd_rets.ix[mask] return CumulativeRets(rets=rets, ltd_rets=ltd)
def cum_avg(data): data = pandas.DataFrame({'data': data}) means = pandas.expanding_mean(data) stds = pandas.expanding_std(data) return numpy.array([i[0] for i in means.values ]), numpy.array([i[0] for i in stds.values])
""" import numpy as np import pandas as pd from scipy.special import logit import matplotlib.pyplot as pp import seaborn sample_size = 100000 pp.ion() dists = pd.DataFrame(np.random.normal(size=sample_size), columns=['normal']) dists['inverse_normal'] = 1/(np.random.normal(size=sample_size)) dists['normal_ratio'] = np.random.normal(size=sample_size)/(np.random.normal(size=sample_size)) dists['poisson'] = (np.random.poisson(size=sample_size)) dists['poisson_ratio'] = (np.random.poisson(size=sample_size))/(np.random.poisson(size=sample_size)) dists['poisson_diff'] = (np.random.poisson(size=sample_size))-(np.random.poisson(size=sample_size)) dists['logit'] = logit(np.random.uniform(size=sample_size)) dists['cauchy'] = np.random.standard_cauchy(size=sample_size) dists['uniform_wide'] = np.random.uniform(low=-100,high=100, size=sample_size) bins = np.linspace(-100,100,100) dists.hist(bins=bins, log=True, alpha=0.5) pd.expanding_mean(dists).plot() pp.title('Expanding Means') pp.ylim(-3,3) pp.show()
def process_sample(x): x = keystrokes2events(x) tau = x['time'].diff() predictions = pd.expanding_mean(tau).shift() return SMAPE(tau, predictions).dropna().mean()
if __name__ == '__main__': start = datetime.now() PosSizeL = 1 PosSizeS = 1 data1 = pd.read_csv('rb888_2015.csv', parse_dates=True, index_col='time') data1.reset_index(inplace=True) for h, k in [(5, 20), (15, 20), (5, 10), (5, 15), (10, 15)]: data1['fast_line'] = '' data1['slow_line'] = '' data1['fast_line'] = pd.rolling_mean(data1['close'], h) data1['slow_line'] = pd.rolling_mean(data1['close'], k) data1['fast_line'] = data1['fast_line'].fillna( value=pd.expanding_mean(data1['close'])) data1['slow_line'] = data1['slow_line'].fillna( value=pd.expanding_mean(data1['close'])) data1['dist_%s_%s' % (k, h)] = data1['fast_line'] - data1['slow_line'] for h in range(10, 26, 5): data1['fast_line'] = '' data1['slow_line'] = '' data1['fast_line'] = pd.rolling_max(data1['high'].shift(1), h) data1['slow_line'] = pd.rolling_min(data1['low'].shift(1), h) data1['fast_line'] = data1['fast_line'].fillna( value=pd.expanding_max(data1['high'])) data1['slow_line'] = data1['slow_line'].fillna( value=pd.expanding_min(data1['low'])) data1['dist_high_%s' % h] = data1['high'] - data1['fast_line'] data1['dist_low_%s' % h] = data1['low'] - data1['slow_line'] data1 = MACD(data1, 12, 26, 9)
def duration_smape(x): d = x['timerelease'] - x['timepress'] predictions = pd.expanding_mean(d).shift() return SMAPE(d, predictions).dropna().mean()
def pp_smape(x): tau = x['timepress'].diff() predictions = pd.expanding_mean(tau).shift() return SMAPE(tau, predictions).dropna().mean()
# Read the pickled data benmore = pd.read_pickle("benmore.pickle") otahuhu = pd.read_pickle("otahuhu.pickle") # Get OTA and BEN prices for current quarter connection = ea.DW_connect(linux=True) prices = ea.timeseries_convert( ea.FP_getter( connection, ea.query_prices(ea.current_quarter().start_time, datetime.now().date(), nodelist=["BEN2201", "OTA2201"]), ) ) # Calculate expanding mean over the quarter spot_OTA = prices.price.OTA2201.groupby(lambda x: x.date()).mean() spot_BEN = prices.price.BEN2201.groupby(lambda x: x.date()).mean() spot_OTA_EXMEAN = pd.expanding_mean(spot_OTA) # Otahuhu expanding mean spot_BEN_EXMEAN = pd.expanding_mean(spot_BEN) # Benmore expanding mean OTA_BA = ea.CQ_data(otahuhu, ota, spot_OTA_EXMEAN, CQ, "otahuhu") BEN_BA = ea.CQ_data(benmore, ben, spot_BEN_EXMEAN, CQ, "benmore") print "Generate LaTex table" ea.asx_table_maker(otahuhu, benmore, ota, ben, CQ, path + "/tables/asx_table_1.tex") # ea.asx_market_comment(ota,ben,path + '/comments/comment.tex') print "Printing Hedge Market data" ea.forward_price_curve(9, ota, "Reds", path + "/figures/ota_fpc.pdf") ea.forward_price_curve(9, ota, "Reds", path + "/figures/ota_fpc.png") ea.forward_price_curve(10, ben, "Blues", path + "/figures/ben_fpc.pdf")
lasso, predictionsLS = lasso(df_final10_LS) # pickles model pickle_model(lasso, filename = 'pickled/lasso10_model.pk') # unpickle model and get predictions predictions = unpickle_and_predict(df_final10_new, filename = 'pickled/lasso10_model.pk') # append predictions to df_final5_new and drop all columns that we don't care about df_final = df_final10_new[['home_team', 'away_team', 'date', 'home_team_win']] preds = pd.DataFrame(predictions) preds.columns = [['prediction']] final = pd.merge(df_final, preds, how = 'left', left_index = True, right_index = True) final.sort_values('date', ascending = True, inplace = True) final = final.reset_index(drop=True) final['match'] = np.where(final['home_team_win'] == final['prediction'], 1, 0) final['cumulative_average'] = pd.expanding_mean(final['match'], 1) # most recent games will be at the bottom # 58.8% accuracy # last season df_finalLS = df_final10_LS[['home_team', 'away_team', 'date', 'home_team_win']] predsLS = pd.DataFrame(predictionsLS) predsLS.columns = [['prediction']] finalLS = pd.merge(df_finalLS, predsLS, how = 'left', left_index = True, right_index = True) finalLS.sort_values('date', ascending = True, inplace = True) finalLS = finalLS.reset_index(drop=True) finalLS['match'] = np.where(finalLS['home_team_win'] == finalLS['prediction'], 1, 0) finalLS['cumulative_average'] = pd.expanding_mean(finalLS['match'], 1) # these results don't match what was found in ModelVisualization.py # 57.3% accuracy
raw = requests.get("http://www.google.com/finance/getprices?i="+interval+"&p="+lookback+"d&f=c&df=cpct&q="+symbol).text # Take the data and put it into a DataFrame raw = raw.split()[7:] data = pd.DataFrame(raw) data = data.astype("float") data["price"] = data[0] del data[0] # We only need 60 minutes worth of data if len(data["price"] >= 60): data["price"] = data["price"][-60:] # Columns for expanding mean and standard deviation data["mean"] = pd.expanding_mean(data["price"]) data["vol"] = pd.expanding_std(data["price"]) # Linear regression on price data x = range(len(data["price"][-60:])) y = data["price"][-60:].values A,B = curve_fit(f,x,y) # Print the trend to the console if A[0] < 0 : print("downtrend") else: print("uptrend") # Plot window plt.figure(1)
def get_context_data(self, **kwargs): context = super(WellChartView, self).get_context_data(**kwargs) well = Well.objects.get(pk=context['pk']) name = unicode(well) options = { 'rangeSelector': { 'enabled': True, 'inputEnabled': True, }, 'navigator': {'adaptToUpdatedData': True, 'enabled': True}, 'chart': {'type': 'arearange', 'zoomType': 'x'}, 'title': {'text': name}, 'xAxis': {'type': 'datetime'}, 'yAxis': [{'title': {'text': 'Grondwaterstand\n(m tov NAP)'}} ], 'tooltip': {'valueSuffix': ' m', 'valueDecimals': 2, 'shared': True, }, 'legend': {'enabled': True}, 'plotOptions': {'line': {'marker': {'enabled': False}}}, 'credits': {'enabled': True, 'text': 'acaciawater.com', 'href': 'http://www.acaciawater.com', }, } series = [] xydata = [] for screen in well.screen_set.all(): name = unicode(screen) data = screen.to_pandas(ref='nap') if data.size > 0: xydata = zip(data.index.to_pydatetime(), data.values) series.append({'name': name, 'type': 'line', 'data': xydata, 'lineWidth': 1, 'color': '#0066FF', 'zIndex': 2, }) mean = pd.expanding_mean(data) std = pd.expanding_std(data) a = (mean - std).dropna() b = (mean + std).dropna() ranges = zip(a.index.to_pydatetime(), a.values, b.values) series.append({'name': 'spreiding', 'data': ranges, 'type': 'arearange', 'lineWidth': 0, 'color': '#0066FF', 'fillOpacity': 0.2, 'linkedTo' : ':previous', 'zIndex': 0, }) data = screen.to_pandas(ref='nap',kind='HAND') if data.size > 0: hand = zip(data.index.to_pydatetime(), data.values) series.append({'name': 'handpeiling', 'type': 'scatter', 'data': hand, 'zIndex': 3, 'marker': {'symbol': 'circle', 'radius': 6, 'lineColor': 'white', 'lineWidth': 2, 'fillColor': 'red'}, }) if len(xydata)>0: mv = [] mv.append((xydata[0][0], screen.well.maaiveld)) mv.append((xydata[-1][0], screen.well.maaiveld)) series.append({'name': 'maaiveld', 'type': 'line', 'lineWidth': 2, 'color': '#009900', 'dashStyle': 'Dash', 'data': mv, 'zIndex': 4, }) # neerslag toevoegen try: closest = Station.closest(well.location) name = 'Meteostation {} (dagwaarden)'.format(closest.naam) neerslag = Series.objects.get(name='RH',mlocatie__name=name) data = neerslag.to_pandas(start=xydata[0][0], stop=xydata[-1][0]) / 10.0 # 0.1 mm -> mm data = zip(data.index.to_pydatetime(), data.values) series.append({'name': 'Neerslag '+ closest.naam, 'type': 'column', 'data': data, 'yAxis': 1, 'pointRange': 24 * 3600 * 1000, # 1 day 'pointPadding': 0.01, 'pointPlacement': 0.5, 'zIndex': 1, 'color': 'orange', 'borderColor': '#cc6600', }) options['yAxis'].append({'title': {'text': 'Neerslag (mm)'}, 'opposite': 1, 'min': 0, }) except: pass options['series'] = series context['options'] = json.dumps(options, default=lambda x: int(time.mktime(x.timetuple())*1000)) context['object'] = well return context
for i in ['ntis', 'tbl', 'lty', 'infl']: df[i] = df[i] * -1 econ_var = ['dp', 'dy', 'ep', 'de', 'rvol', 'bm', 'ntis', 'tbl', 'lty', 'ltr', 'tms','dfy','dfr','infl'] tech_var = ['ma_1_9', 'ma_1_12', 'ma_2_9', 'ma_2_12', 'ma_3_9', 'ma_3_12', 'mom_9', 'mom_12', 'vol_1_9', 'vol_1_12', 'vol_2_9', 'vol_2_12', 'vol_3_9', 'vol_3_12'] all_var = econ_var + tech_var # get data for specified date range df_sub = df[beg_date_init:end_date_oos] # Expanding window historical average forecast for equity premium df['ha_mean'] = Series(pd.expanding_mean(df_sub['equity_premium']/100, min_periods = window_size).shift(1), index = df_sub.index) # Rolling window historical average forecast for equity premium variance # note degree of freedom adjusted to match NRZ df['ha_var'] = Series(pd.rolling_var(df_sub['equity_premium']/100, window_size, min_periods = window_size, ddof = 0).shift(1), index = df_sub.index) # Perform asset allocation using historical average forecasts using c_bp = 0 # all months df_sub = df[beg_date_oos:end_date_oos] ha_results = perform_asset_allocation(df_sub['equity_premium']/100, df_sub['Rfree'], df_sub['ha_mean'], df_sub['ha_var'], gamma_MV, 0) # expansion months df_exp = df_sub[df_sub['recession']==0] ha_results_exp = perform_asset_allocation(df_exp['equity_premium']/100, df_exp['Rfree'],