def iter_over_groups(data, group,features, key):
	row_num = 0	
	for x in group:

		if row_num == 0:
			data_1 = data[data[key]==x]
			for i in features:

				data_1['avg_10' + i.lower() + '_'+ key[0].lower()] = pd.rolling_mean(data_1[i], 7).shift(+1)
				data_1['sum_15' + i.lower() + '_'+ key[0].lower()] = pd.rolling_mean(data_1[i], 10).shift(+1)
				data_1['avg_3' + i.lower() + '_'+ key[0].lower()] = pd.rolling_mean(data_1[i], 3).shift(+1)
				data_1['expand' + i.lower() + '_'+ key[0].lower()] = pd.expanding_mean(data_1[i]).shift(+1)	
				data_1['expand_sum' + i.lower() + '_'+ key[0].lower()] = pd.expanding_sum(data_1[i]).shift(+1)		

		else:
			data_2 = data[data[key]==x]
			for i in features:
				data_2['avg_10' + i.lower() + '_' + key[0].lower()] = pd.rolling_mean(data_2[i], 7).shift(+1)
				data_2['sum_15' + i.lower() + '_' + key[0].lower()] = pd.rolling_mean(data_2[i], 10).shift(+1)
				data_2['avg_3' + i.lower() + '_' + key[0].lower()] = pd.rolling_mean(data_2[i],3).shift(+1)
				data_2['expand' + i.lower() + '_'+ key[0].lower()] = pd.expanding_mean(data_1[i]).shift(+1)
				data_2['expand_sum' + i.lower() + '_'+ key[0].lower()] = pd.expanding_sum(data_1[i]).shift(+1)
				


			data_1 = data_1.append(data_2, ignore_index = True)

		row_num += 1
	return data_1
示例#2
0
def plotall(basedir, plot=None):
    dirnames = [name for name in os.listdir(basedir)
                if os.path.isdir(os.path.join(basedir, name))
                                and os.path.isfile(os.path.join(basedir, name, 'onresults.csv'))]
    shortnames = getshortnames(dirnames)
    plotcount = len(plot)
    for dirname, shortname in zip(dirnames, shortnames):
        resultspath = os.path.join(basedir, dirname)
        hasdiverged = False
        if diverged(resultspath):
            hasdiverged = True
        resultsfile = os.path.join(resultspath, 'onresults.csv')
        if not os.path.isfile(resultsfile):
            continue
        csv = pd.read_csv(resultsfile)
        if isinstance(plot, list):
            for i, toplot in enumerate(plot):
                runaverages = False
                if isinstance(toplot, dict):
                    runaverages = toplot.get('runaverages', False)
                    toplot = toplot.get('plot')
                label = shortname + (" (diverged)" if hasdiverged else "")
                plt.subplot(1, plotcount, i + 1)
                if runaverages:
                    pd.expanding_mean(csv[toplot]).plot(label=label, legend=True)
                else:
                    csv[toplot].plot(label=label, legend=True)
        else:
            csv.plot(subplots=True)
    _setplotlabels(plot)
    plot_file = os.path.join(basedir,'all.pdf')
    plt.savefig(plot_file, bbox_inches='tight')
    plt.show()
示例#3
0
文件: perf.py 项目: ychaim/tia
def returns_annualized(returns, geometric=True, scale=None, expanding=False):
    """ return the annualized cumulative returns

    Parameters
    ----------
    returns : DataFrame or Series
    geometric : link the returns geometrically
    scale: None or scalar or string (ie 12 for months in year),
           If None, attempt to resolve from returns
           If scalar, then use this as the annualization factor
           If string, then pass this to periodicity function to resolve annualization factor
    expanding: bool, default is False
                If True, return expanding series/frames.
                If False, return final result.
    """
    scale = _resolve_periods_in_year(scale, returns)
    if expanding:
        if geometric:
            n = pd.expanding_count(returns)
            return ((1. + returns).cumprod() ** (scale / n)) - 1.
        else:
            return pd.expanding_mean(returns) * scale
    else:
        if geometric:
            n = returns.count()
            return ((1. + returns).prod() ** (scale / n)) - 1.
        else:
            return returns.mean() * scale
def chi2normal_transformation(df):
    risk_factor = pd.DataFrame(chi2.cdf(df, pd.rolling_mean(df, 150)),
                               columns=df.columns,
                               index=df.index)
    risk_factor = (risk_factor - pd.expanding_mean(risk_factor)) / \
        pd.expanding_std(risk_factor)
    return risk_factor
def results():
    # import csv we just created
    df = pd.read_csv('D:/Users/Antoine.Grappin/Dropbox/keyrus/python_scripts/tutorial_sample_files/performance_data_stocks_sentdex_dates_full.csv',
                     index_col='time',
                     parse_dates=True,
#                     usecols=['time',
#                              'stock',
#                              'currentValuation',
#                              'percentChange'],
                     dtype={'currentValuation': np.float64,
                            'percentChange': np.float64})
    
    # sort index if unsorted
    df.sort_index(inplace=True)
    
    df['xMean'] = pd.expanding_mean(df['percentChange'],1)
    
    print
    print(df.head())
    print
    pprint.pprint(df.describe())
    print
    pprint.pprint(df.dtypes)
    print
    
    return df['xMean']
示例#6
0
def rolling_tstat(x):
    emean = pd.expanding_mean(x)
    estd = pd.expanding_std(x)
    t = np.arange(1, len(x) + 1)
    esqr = np.sqrt(t)
    rtstat = (emean / estd) * esqr
    return rtstat
示例#7
0
def engineer_previous_game_averages(df, columns, previous_game_averages):

    for column in columns:

        df.sort_values(by=['date_game', 'team_id'],
                       ascending=True,
                       inplace=True)

        for offset in range(1, max(previous_game_averages) + 1):

            df[column + "-" +
               str(offset)] = df.groupby('team_id')[column].shift(
                   periods=offset)

        for average in previous_game_averages:

            selector = [column + "-" + str(x) for x in range(1, average + 1)]
            df[column + str(average) +
               'game-avg'] = df.loc[:, selector].mean(axis=1)

        for offset in range(1, max(previous_game_averages) + 1):

            del df[column + "-" + str(offset)]

        df[column + 'seasonavg'] = df.groupby('team_id')[column].apply(
            lambda x: pd.expanding_mean(x).shift())

    return df
示例#8
0
def json_series(request, pk):
    screen = get_object_or_404(Screen,pk=pk)
    what = request.GET.get('mode','comp') # choices: comp, hand
    ref = request.GET.get('ref','nap') # choices: nap, bkb, mv, cm
#     filters = [
#         RangeRule(name = 'range', lower = -5, upper = 5),
#         RollingRule(name = 'spike', count = 3, tolerance = 3, comp ='LT')
#         ]
    # determine resampling rule
    rule = request.GET.get('rule', 'H')
#     if rule is None:
#         series = screen.find_series()
#         rule = 'H' if series.aantal() < 10000 else 'D'
    series = screen.get_series(ref,what,rule=rule)#,filters=filters)
    if series is None or series.empty:
        values = []
    else:
        values = zip(series.index, series.values)
        
    data = {'screen%s'%screen.nr: values}
    stats = request.GET.get('stats','0')
    try:
        stats = int(stats)
        if stats:
            mean = pd.expanding_mean(series)
            std = pd.expanding_std(series)
            a = (mean - std).dropna()
            b = (mean + std).dropna()
            ranges = zip(a.index.to_pydatetime(), a.values, b.values)
            data.update({'stats%s'%screen.nr: ranges})
    except:
        pass
    return HttpResponse(json.dumps(data,ignore_nan=True,default=to_millis),content_type='application/json')
示例#9
0
def returns_annualized(returns, geometric=True, scale=None, expanding=False):
    """return the annualized cumulative returns

    Parameters
    ----------
    returns : DataFrame or Series
    geometric : link the returns geometrically
    scale: None or scalar or string (ie 12 for months in year),
           If None, attempt to resolve from returns
           If scalar, then use this as the annualization factor
           If string, then pass this to periodicity function to resolve annualization factor
    expanding: bool, default is False
                If True, return expanding series/frames.
                If False, return final result.
    """
    scale = _resolve_periods_in_year(scale, returns)
    if expanding:
        if geometric:
            n = pd.expanding_count(returns)
            return ((1.0 + returns).cumprod()**(scale / n)) - 1.0
        else:
            return pd.expanding_mean(returns) * scale
    else:
        if geometric:
            n = returns.count()
            return ((1.0 + returns).prod()**(scale / n)) - 1.0
        else:
            return returns.mean() * scale
def results():
    df = pd.read_csv('performance_data_sp500ish.csv',
                     index_col='time',
                     parse_dates=True)
    df.sort_index(inplace=True)
    df['x_mean'] = pd.expanding_mean(df['pc'], 0)
    return df['x_mean']
示例#11
0
	def daily_win_percentage(self):
		"""
		Gets the winning percentage of each team through the season.

		Returns
		-------
		win_percent : winning percentage through time
		"""

		X = self.game_stats
	
		store_outcome = pd.DataFrame(index = self.unique_days,
			columns=self.teams)

		for ii in range(len(X.index)):

			row = X.iloc[ii]
			day = X.index[ii]

			w_team = row['Wteam'] #winning team
			l_team = row['Lteam'] #losing team

			store_outcome.loc[day,w_team] = 1
			store_outcome.loc[day,l_team] = 0


		win_percent = pd.expanding_mean(store_outcome)

		self.daily_stat_dict['win_perc'] = win_percent
		print('win_perc saved to daily_stat_dict!')
def credit_momentum_test(data_df):
    
    test_data = data_df[['US HY Return','US Int. Trsy Return']].add(1).cumprod()
    
    rolling_period = 3
    
    
    rolling_change = pd.DataFrame.pct_change(test_data,periods=rolling_period)

    column_one = test_data.columns.values[0]
    column_two = test_data.columns.values[1]
    
    
    data_diff = rolling_change[column_one] - rolling_change[column_two]
    
    data_diff['rolling_z'] = (data_diff - pd.expanding_mean(data_diff, min_periods=24))/  pd.expanding_std(data_diff, min_periods=24)
    
    
    weights = pd.DataFrame(index=data_diff.index)
    
    
    weights['bond_wght'] = data_diff['rolling_z']
    weights['treasury_wght'] = data_diff['rolling_z'] * -1
    
    weights = weights / 0.5
    weights.dropna(inplace=True)
    
    
    weights = weights.clip(-1, 1)

    #weights['bond_wght'] = np.where(data_diff > 0, 1.0, np.where(data_diff< 0,-1.0, np.nan))
    
    
    #weights['treasury_wght'] = np.where(data_diff > 0, -1.0, np.where(data_diff < 0,1.0, np.nan))
    
    bond_wght =  weights['bond_wght'].to_frame()
    bond_wght.columns = ['US HY Return']
    treasury_wght = weights['treasury_wght'].to_frame()
    treasury_wght.columns = ['US Int. Trsy Return']
    
    
    
    combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1)
    
    combined_wghts = combined_wghts.shift(1)
    
    combined_wghts.dropna(inplace=True)
    

    weighted_returns = combined_wghts * data_df[['US HY Return','US Int. Trsy Return']]
    
    portfolio_return = weighted_returns.sum(axis=1).to_frame()
    
    
    portfolio_return =  portfolio_return.add(1).cumprod()

    hy_mm = long_only_ew(portfolio_return, name='HY Momentum')
    
    return hy_mm, combined_wghts
 def featurize(self, H):
     X = pd.DataFrame({
         'last_sh': H.shift(1).stack(),
         'history_sh': pd.expanding_mean(H).shift(1).stack(),
         'history_sh_vol': pd.expanding_std(H).shift(1).stack(),
         'nr_days': H.notnull().cumsum().stack()
     })
     return X
示例#14
0
 def featurize(self, H):
     X = pd.DataFrame({
         'last_sh': H.shift(1).stack(),
         'history_sh': pd.expanding_mean(H).shift(1).stack(),
         'history_sh_vol': pd.expanding_std(H).shift(1).stack(),
         'nr_days': H.notnull().cumsum().stack()
     })
     return X
示例#15
0
def VaR_norm(data, alpha=0.99, n=252):
    Z = stats.norm(0, 1).ppf(1 - alpha)  #反概率密度函数
    data['mean'] = pd.rolling_mean(data['return'], n)
    data['std'] = pd.rolling_std(data['return'], n)
    if math.isnan(data.tail(1).iat[0, 3]):
        data['mean'] = pd.expanding_mean(data['return'])
        data['std'] = pd.expanding_std(data['return'])
    data['delta'] = data['mean'] + Z * data['std']
    return data.tail(1).iat[0, 4]
示例#16
0
def savemean(basedir, plot=False):
    count = 0

    dirnames = [
        name for name in os.listdir(basedir)
        if os.path.isdir(os.path.join(basedir, name))
        and os.path.isfile(os.path.join(basedir, name, 'onresults.csv'))
    ]
    for dirname in dirnames:
        resultspath = os.path.join(basedir, dirname)
        if diverged(resultspath):
            print("Diverged: " + resultspath)
            continue
        resultsfile = os.path.join(resultspath, 'onresults.csv')
        if not os.path.isfile(resultsfile):
            continue
        csv = pd.read_csv(resultsfile)
        if count == 0:
            csvsum = csv
        else:
            csvsum += csv
        count += 1
    savepath = os.path.join(basedir, 'onresults.csv')
    meancsv = csvsum / count
    meancsv.to_csv(savepath, index=False)
    if plot:
        plotcount = len(plot)
        for i, toplot in enumerate(plot):
            runaverages = False
            if isinstance(toplot, dict):
                runaverages = toplot.get('runaverages', False)
                toplot = toplot.get('plot')
            plt.subplot(1, plotcount, i + 1)
            if runaverages:
                pd.expanding_mean(meancsv[toplot]).plot()
            else:
                meancsv[toplot].plot()
        _setplotlabels(plot)
        plot_file = os.path.join(basedir, 'mean.pdf')
        plt.savefig(plot_file, bbox_inches='tight')
        plt.show()
    return savepath
示例#17
0
def sharpe(returns, rfr=0, expanding=0):
    """
    returns: periodic return string
    rfr: risk free rate
    expanding: bool
    """
    if expanding:
        excess = excess_returns(returns, rfr)
        return pd.expanding_mean(excess) / pd.expanding_std(returns)
    else:
        return excess_returns(returns, rfr).mean() / returns.std()
示例#18
0
文件: perf.py 项目: ychaim/tia
def sharpe(returns, rfr=0, expanding=0):
    """
    returns: periodic return string
    rfr: risk free rate
    expanding: bool
    """
    if expanding:
        excess = excess_returns(returns, rfr)
        return pd.expanding_mean(excess) / pd.expanding_std(returns)
    else:
        return excess_returns(returns, rfr).mean() / returns.std()
示例#19
0
    def expected_value(self, window=0, rebalanced=True, from_date=None, to_date=None):
        ret = None
        returns = self.returns(rebalanced, from_date, to_date)

        if window == 0:
            ret = np.asscalar(np.mean(returns))
        if window > 0:
            ret = pd.rolling_mean(returns, window)
        if window == -1:
            ret = pd.expanding_mean(returns)
        
        return ret
示例#20
0
def plotPanel(betaSeries, name):
    betaSeries = betaSeries["1995-01-01":]
    cumbetas = np.cumprod(betaSeries / 100 + 1) - 1
    fig = plt.figure()
    ax1 = fig.add_subplot(411)
    ax1.set_title(name)
    ax1.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3))
    betaSeries.plot()
    ax2 = fig.add_subplot(412)
    ax2.set_title("Cumulative " + name)
    ax2.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3))
    cumbetas.plot()
    ax3 = fig.add_subplot(413)
    ax3.set_title("Rolling Mean: " + name)
    ax3.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3))
    pd.expanding_mean(betaSeries).plot()
    ax4 = fig.add_subplot(414)
    ax4.set_title("Rolling t-stat: " + name)
    ax4.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(3))
    rolling_tstat(betaSeries).plot()
    fig.tight_layout(pad=1.1)
示例#21
0
def savemean(basedir, plot=False):
    count = 0

    dirnames = [name for name in os.listdir(basedir)
                if os.path.isdir(os.path.join(basedir, name))
                                and os.path.isfile(os.path.join(basedir, name, 'onresults.csv'))]
    for dirname in dirnames:
        resultspath = os.path.join(basedir, dirname)
        if diverged(resultspath):
            print("Diverged: " + resultspath)
            continue
        resultsfile = os.path.join(resultspath, 'onresults.csv')
        if not os.path.isfile(resultsfile):
            continue
        csv = pd.read_csv(resultsfile)
        if count == 0:
            csvsum = csv
        else:
            csvsum += csv
        count += 1
    savepath = os.path.join(basedir, 'onresults.csv')
    meancsv = csvsum / count
    meancsv.to_csv(savepath, index=False)
    if plot:
        plotcount = len(plot)
        for i, toplot in enumerate(plot):
            runaverages = False
            if isinstance(toplot, dict):
                runaverages = toplot.get('runaverages', False)
                toplot = toplot.get('plot')
            plt.subplot(1, plotcount, i + 1)
            if runaverages:
                pd.expanding_mean(meancsv[toplot]).plot()
            else:
                meancsv[toplot].plot()
        _setplotlabels(plot)
        plot_file = os.path.join(basedir,'mean.pdf')
        plt.savefig(plot_file, bbox_inches='tight')
        plt.show()
    return savepath
示例#22
0
def results():
    """
    DOCSTRING
    """
    dataframe_a = pandas.read_csv('performance_data_sp500ish.csv',
                                  index_col='time',
                                  parse_dates=True)
    dataframe_a.sort_index(inplace=True)
    dataframe_a['expanding_mean'] = pandas.expanding_mean(
        dataframe_a['percent_change'], 0)
    dataframe_a['expanding_mean'].plot(label='Performance')
    pyplot.legend()
    pyplot.show()
示例#23
0
	def stat_select(self,stat):
		"""
		Gets the stats for all the teams for each day in the season.

		Parameters
		----------
		year : int , which year to grab data
		stat : string
		Can be:
		['score', 'fgm', 'fga', 'fgm3', 'fga3', 'ftm', 'fta', 'or', 'dr', 'ast',
		 'to', 'stl', 'blk', 'pf', 'poss', 'oe', 'de', 'efg', 'eto', 'eor', 'eftr']
		 Not implemented:
		 'rpi','rpi_1', rpi_2, 'rpi_3'

		Returns
		-------
		store_feature : dataframe with columns the team and rows the days
			values are the stats scored in the game
		store_outcome : dataframe with columns the team and rows the days
			values are 1 for win or 0 for loss.
		"""

		X = self.game_stats
	
		store_feature = pd.DataFrame(index = self.unique_days,
			columns=self.teams)

		#how it is represented in the game_data
		w_f = 'W' + stat
		l_f = 'L' + stat

		#loop through line and put the stat in correct places
		for ii in range(len(X.index)):

			day = X.index[ii]
			row = X.iloc[ii]

			w_team = row['Wteam'] #winning team
			w_feature = row[w_f]
			l_team = row['Lteam'] #losing team
			l_feature = row[l_f]

			store_feature.loc[day,w_team] = w_feature
			store_feature.loc[day,l_team] = l_feature

		# take the average of it through the season
		store_feature = pd.expanding_mean(store_feature)

		self.daily_stat_dict[stat] = store_feature
		
		print(stat + ' saved to daily_stat_dict!')
示例#24
0
def plotall(basedir, plot=None):
    dirnames = [
        name for name in os.listdir(basedir)
        if os.path.isdir(os.path.join(basedir, name))
        and os.path.isfile(os.path.join(basedir, name, 'onresults.csv'))
    ]
    shortnames = getshortnames(dirnames)
    plotcount = len(plot)
    for dirname, shortname in zip(dirnames, shortnames):
        resultspath = os.path.join(basedir, dirname)
        hasdiverged = False
        if diverged(resultspath):
            hasdiverged = True
        resultsfile = os.path.join(resultspath, 'onresults.csv')
        if not os.path.isfile(resultsfile):
            continue
        csv = pd.read_csv(resultsfile)
        if isinstance(plot, list):
            for i, toplot in enumerate(plot):
                runaverages = False
                if isinstance(toplot, dict):
                    runaverages = toplot.get('runaverages', False)
                    toplot = toplot.get('plot')
                label = shortname + (" (diverged)" if hasdiverged else "")
                plt.subplot(1, plotcount, i + 1)
                if runaverages:
                    pd.expanding_mean(csv[toplot]).plot(label=label,
                                                        legend=True)
                else:
                    csv[toplot].plot(label=label, legend=True)
        else:
            csv.plot(subplots=True)
    _setplotlabels(plot)
    plot_file = os.path.join(basedir, 'all.pdf')
    plt.savefig(plot_file, bbox_inches='tight')
    plt.show()
示例#25
0
def movie_chart(request, movie_id):
    ratings = Rating.objects.filter(movie__id=movie_id)
    df = pd.DataFrame(model_to_dict(rating) for rating in ratings)
    df.index = df['timestamp']
    counts = df['rating']
    counts = counts.sort_index()
    series = pd.expanding_mean(counts).resample('M', how=np.max, fill_method='pad')
    response = HttpResponse(content_type='image/png')

    fig = plt.figure(figsize=(6, 4), facecolor="#272b30")
    plt.xticks(color="white")
    plt.yticks(color="white")
    series.plot()
    plt.title("Average Rating over Time", color="white")
    plt.xlabel("")
    canvas = FigureCanvas(fig)
    canvas.print_png(response)
    return response
示例#26
0
def rater_chart(request, rater_id):
    ratings = Rating.objects.filter(rater_id = rater_id)
    df = pd.DataFrame(model_to_dict(rating) for rating in ratings)
    df.index=df['posted_at']
    ratings = df['rating']
    ratings = ratings.sort_index()
    series = pd.expanding_mean(ratings)
    series = series.resample('M', how=np.max, fill_method='pad')
    series = series[2:]

    response = HttpResponse(content_type='image/png')

    fig = plt.figure(figsize=(6,5))
    series.plot()
    plt.title("User Average Rating Over Time")
    plt.xlabel("")
    canvas = FigureCanvas(fig)
    canvas.print_png(response)
    return response
示例#27
0
 def expanding_smoother(self, data, stype='rolling_mean', min_periods=None, freq=None):
     """
     
     Perform a expanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html
     
     :param data: pandas dataframe input data
     :param stype: soothing type
     :param min_periods: periods
     :param freq: frequence
     smoothing types:
     expanding_count	Number of non-null observations
     expanding_sum	Sum of values
     expanding_mean	Mean of values
     expanding_median	Arithmetic median of values
     expanding_min	Minimum
     expanding_max	Maximum
     expandingg_std	Unbiased standard deviation
     expanding_var	Unbiased variance
     expanding_skew	Unbiased skewness (3rd moment)
     expanding_kurt	Unbiased kurtosis (4th moment)
     
     """
     if stype == 'count':
         newy = pd.expanding_count(data, min_periods=min_periods, freq=freq)
     if stype == 'sum':
         newy = pd.expanding_sum(data, min_periods=min_periods, freq=freq)
     if stype == 'mean':
         newy = pd.expanding_mean(data, min_periods=min_periods, freq=freq)
     if stype == 'median':
         newy = pd.expanding_median(data, min_periods=min_periods, freq=freq)
     if stype == 'min':
         newy = pd.expanding_min(data, min_periods=min_periods, freq=freq)
     if stype == 'max':
         newy = pd.expanding_max(data, min_periods=min_periods, freq=freq)
     if stype == 'std':
         newy = pd.expanding_std(data, min_periods=min_periods, freq=freq)
     if stype == 'var':
         newy = pd.expanding_var(data, min_periods=min_periods, freq=freq)
     if stype == 'skew':
         newy = pd.expanding_skew(data, min_periods=min_periods, freq=freq)
     if stype == 'kurt':
         newy = pd.expanding_kurt(data, min_periods=min_periods, freq=freq)
     return newy
示例#28
0
文件: perf.py 项目: georgebdavis/tia
def sortino_ratio(returns, mar=0, full=1, expanding=0, ann=1):
    """
    returns: periodic return stream
    mar: minimum acceptable return
    full: if true, use the entire series, else use the subset below mar
    expanding: bool
    """
    factor = ann and periodicity(returns) or 1.
    if expanding:
        avgexcess = pd.expanding_mean(excess_returns(returns, mar))
        avgexcess *= (ann and factor or 1.)
        downside = downside_deviation(returns, mar, full, expanding=1)
        downside *= (ann and np.sqrt(factor) or 1.)
        return avgexcess / downside
    else:
        avgexcess = excess_returns(returns, mar).mean()
        avgexcess *= (ann and factor or 1.)
        downside = downside_deviation(returns, mar, full)
        downside *= (ann and np.sqrt(factor) or 1.)
        return avgexcess / downside
def equity_vol_test(data_frame):
    
    rolling_period = 1
    
    rolling_change = pd.DataFrame.pct_change(np.log(data_frame['Equity Volatility']),periods=rolling_period)
    
    
    rolling_change['rolling_z'] = (rolling_change - pd.expanding_mean(rolling_change, min_periods=24))/  pd.expanding_std(rolling_change, min_periods=24)
    rolling_change['rolling_z'] = rolling_change['rolling_z'].to_frame()
    
    weights = pd.DataFrame(index=rolling_change['rolling_z'].index)
    
    weights['bond_wght'] = rolling_change['rolling_z'] * -1
    
    weights['treasury_wght'] = rolling_change['rolling_z']
    
    weights = weights / 1.5
    weights.dropna(inplace=True)
    weights = weights.clip(-1, 1)

    bond_wght =  weights['bond_wght'].to_frame()
    bond_wght.columns = ['US HY Return']
    treasury_wght = weights['treasury_wght'].to_frame()
    treasury_wght.columns = ['US Int. Trsy Return']
    
    combined_wghts = pd.concat([bond_wght,treasury_wght], axis=1)
    
    combined_wghts = combined_wghts.shift(1)
    
    combined_wghts.dropna(inplace=True)    

    weighted_returns = combined_wghts * data_frame[['US HY Return','US Int. Trsy Return']]
    
    portfolio_return = weighted_returns.sum(axis=1).to_frame()
    
    portfolio_return =  portfolio_return.add(1).cumprod()

    eq_vol = long_only_ew(portfolio_return, name='Equity Volatility')

    
    return eq_vol, combined_wghts
示例#30
0
def shift_columns(df, columns, shifts, averages):

    for column in columns:
        for shift in shifts:
            df[column + str(shift)] =  df.groupby('player')[column].shift(periods=shift)

        for average in averages:
            selector = [column + str(x) for x in range(-1, average-1, -1)]
            df[column + str(average) + 'avg'] = df.loc[:,selector].mean(axis=1)

        for shift in shifts:
            del df[column +str(shift)]

        df.sort_values(by=['date_game', 'player'], inplace=True)
    
        df[column + 'seasonavg'] = df.groupby('player')[column].apply(
            lambda x: pd.expanding_mean(x).shift())

    df['rest'] = df.groupby('player')['date_game'].diff().astype('timedelta64[D]')

    return df
示例#31
0
	def daily_win_percentage3(self):

		"""
		Calculates the third column of the rpi scores
		Inputs the winning percentage of the team played
		on that day
		"""

		perc = self.daily_stat_dict['win_perc2']
		sched = self.schedule

		store_perc = pd.DataFrame(index = self.unique_days,
			columns=self.teams)

		for ii in range(len(perc.index)):

			# fill in nans for the first day since no teams have a 
			# winning percentage yet
			if ii ==0:
				ind0 = perc.index[ii]
				store_perc.loc[ind0] = np.nan

			else:
				ind0 = perc.index[ii]
				ind1 = perc.index[ii-1]
				p = perc.loc[ind1]		# percentages of team before that point
				s = sched.loc[ind0].values 	# teams played


				team_played = s[s!=0] #teams played

				ind_team = perc.columns.values[s!=0].astype(int) # get the teams
				
				store_perc.loc[ind0][ind_team] = p[team_played].values

		#now go through and have a running average of the teams played
		store_perc = pd.expanding_mean(store_perc)

		self.daily_stat_dict['win_perc3'] = store_perc
		print('win_perc3 saved to daily_stat_dict!')
    test=pd.ewma(data, span=d)
    return test
def MACD(data,FastLength,SlowLength,MACDLength):
    data['Diff']=''
    data['Diff']=EMA_MACO(data['open'],FastLength)-EMA_MACO(data['open'],SlowLength)
    data['DEA']=''
    data['DEA']=EMA_MACO(data['Diff'],MACDLength)
    data['MACD']=''
    data['MACD']=data['Diff']-data['DEA']
    return data
for h,k in [(5,20),(15,20),(5,10),(5,15),(10,15)]:
    data1['fast_line']=''
    data1['slow_line']=''
    data1['fast_line']=pd.rolling_mean(data1['close'],h)
    data1['slow_line']=pd.rolling_mean(data1['close'],k)
    data1['fast_line']=data1['fast_line'].fillna(value=pd.expanding_mean(data1['close']))
    data1['slow_line']=data1['slow_line'].fillna(value=pd.expanding_mean(data1['close']))
    data1['dist_%s_%s'%(k,h)]=data1['fast_line']-data1['slow_line']
for h in range(10,26,5):
    data1['fast_line']=''
    data1['slow_line']=''
    data1['fast_line']=pd.rolling_max(data1['high'].shift(1),h)
    data1['slow_line']=pd.rolling_min(data1['low'].shift(1),h)
    data1['fast_line']=data1['fast_line'].fillna(value=pd.expanding_max(data1['high']))
    data1['slow_line']=data1['slow_line'].fillna(value=pd.expanding_min(data1['low']))
    data1['dist_high_%s'%h]=data1['high']-data1['fast_line']
    data1['dist_low_%s'%h]=data1['low']-data1['slow_line']
data1=MACD(data1,12,26,9)
data2=pd.read_csv('rb888_2017.csv',parse_dates=True,index_col='time')
data2.reset_index(inplace=True)
data2['log_return']=np.log(data2['close']/data2['close'].shift(1))
 def expand_average_prod(self, station):
     assert 1 <= station <= 6, "Station does not exist."
     prod_data = self.prod_lists[station-1]
     prod_expand_avg = pd.expanding_mean(pd.Series(prod_data))
     return self.prod_lists[7], prod_expand_avg.tolist()
示例#34
0
# Difference functions allow us to identify seasonal changes when we see repeated up or downswings.
# An example from FiveThirtyEight:
# http://i2.wp.com/espnfivethirtyeight.files.wordpress.com/2015/03/casselman-datalab-wsj2.png?quality=90&strip=all&w=575&ssl=1

'''
Pandas Expanding Functions

In addition to the set of rolling_* functions, Pandas also 
provides a similar collection of expanding_* functions, which, 
instead of using a window of N values, uses all values up until 
that time.
'''


pd.expanding_mean(daily_store_sales) # average date from first till last date specified
pd.expanding_sum(daily_store_sales) # sum of average sales per store until that date

'''
EXERCISES

1. Plot the distribution of sales by month and compare the effect of promotions.
hint: try using hue in sns
2. Are sales more correlated with the prior date, a similar date last year, or a similar date last month?
3. Plot the 15 day rolling mean of customers in the stores.
4. Identify the date with largest drop in sales from the same date in the previous week.
5. Compute the total sales up until Dec. 2014.
6. When were the largest differences between 15-day moving/rolling averages? HINT: Using rolling_mean and diff
'''

# Plot the distribution of sales by month and compare the effect of promotions
def per_season_cummean(df,col_list):
    cumsum_df = (df.groupby(["PLAYER_NAME","SEASON_ID"])
                   .apply(lambda x: add_game_date_pts_col(pd.expanding_mean(x[col_list], min_periods = 2), x.GAME_DATE, x.OPP).reset_index(drop = True)))
    return cumsum_df.reset_index().drop('level_2',axis = 1).rename(columns=dict(zip(col_list,map(lambda x: 'C_' + x,col_list))))
示例#36
0
文件: ta.py 项目: x829901/tia
def sma(arg, n):
    """ If n is 0 then return the ltd mean; else return the n day mean """
    if n == 0:
        return pd.expanding_mean(arg)
    else:
        return pd.rolling_mean(arg, n, min_periods=n)
示例#37
0
print "AP3 hits: %d  Hit rate: %.2f%%" % (AP3_hits,
                                          AP3_hits / float(len(AP3_DELAY)))
print "AP5 hits: %d  Hit rate: %.2f%%" % (AP5_hits,
                                          AP5_hits / float(len(AP5_DELAY)))

print "\nAP3 average miss del#ay: " + str(np.mean(np.array(AP3_miss_delays)))
print "AP5 average miss delay: " + str(np.mean(np.array(AP5_miss_delays)))

print "\nAP3 average hit delay: " + str(np.mean(np.array(AP3_hit_delays)))
print "AP5 average hit delay: " + str(np.mean(np.array(AP5_hit_delays)))

print "\nAP3 average total delay: " + str(np.mean(np.array(AP3_DELAY)))
print "AP5 average total delay: " + str(np.mean(np.array(AP5_DELAY)))

df1 = pd.DataFrame({'delay': AP3_DELAY})
rm1 = pd.expanding_mean(df1, 10)

df2 = pd.DataFrame({'delay': AP5_DELAY})
rm2 = pd.expanding_mean(df2, 10)

#plt.plot(range(1,len(rm1) + 1), rm1, 'r')
#plt.plot(range(1,len(rm2) + 1), rm2, 'b')

#plt.plot(range(1,len(AP3_DELAY) + 1), AP3_DELAY, 'r', label='Single Median - AP1')
#plt.plot(range(1,len(AP5_DELAY) + 1), AP5_DELAY, 'b', label='2-median - S3')
#plt.xlabel('Number of requests', fontsize=18)
#plt.ylabel('Delay [ms]', fontsize=16)

#plt.plot(range(1,len(AP3_td) + 1), AP3_td, 'r--', range(1,len(AP5_td) + 1), AP5_td, 'g--')
#plt.show()
from scipy import stats

all_rewards = np.array([])
all_times = np.array([])

for runid in range(10001, 10018, 1):

    df = pd.read_csv("final_results/new_results" + str(runid) + "-4.csv")

    mean_reward = df.rewards.mean()
    
    duration = df.times[len(df.times) - 1] - df.times[0]
    all_times = np.append(all_times, [duration])
    all_rewards = np.append(all_rewards, [mean_reward])

em = pd.expanding_mean(all_rewards)
em_times = pd.expanding_mean(all_times)

x = np.arange(0, len(em), 1)

print("Overall mean time per runid (in sec)", all_times.mean())
print("Overall mean cumulative reward", all_rewards.mean())
print("SE of the times (in sec)", stats.sem(all_times))
print("SE of the mean cumulative rewards", stats.sem(all_rewards))

plt.plot(x, em)
plt.show()



示例#39
0
文件: ta.py 项目: georgebdavis/tia
def sma(arg, n):
    """ If n is 0 then return the ltd mean; else return the n day mean """
    if n == 0:
        return pd.expanding_mean(arg)
    else:
        return pd.rolling_mean(arg, n, min_periods=n)
示例#40
0
raw = requests.get("http://www.google.com/finance/getprices?i=" + interval +
                   "&p=" + lookback + "d&f=c&df=cpct&q=" + symbol).text

# Take the data and put it into a DataFrame
raw = raw.split()[7:]

data = pd.DataFrame(raw)
data = data.astype("float")
data["price"] = data[0]
del data[0]

# We only need 60 minutes worth of data
if len(data["price"] >= 60): data["price"] = data["price"][-60:]

# Columns for expanding mean and standard deviation
data["mean"] = pd.expanding_mean(data["price"])
data["vol"] = pd.expanding_std(data["price"])

# Linear regression on price data
x = range(len(data["price"][-60:]))
y = data["price"][-60:].values

A, B = curve_fit(f, x, y)

# Print the trend to the console
if A[0] < 0: print("downtrend")
else: print("uptrend")

# Plot window
plt.figure(1)
# var_list = ['dp', 'dy', 'ep', 'de', 'rvol', 'bm', 'ntis', 'tbl', 'lty', 'ltr',
#             'tms','dfy','dfr','infl']
econ_var = ['dp', 'dy', 'ep', 'de', 'rvol', 'bm', 'ntis', 'tbl', 'lty', 'ltr',
            'tms','dfy','dfr','infl']
tech_var = ['ma_1_9', 'ma_1_12', 'ma_2_9', 'ma_2_12', 'ma_3_9', 'ma_3_12',
            'mom_9', 'mom_12', 'vol_1_9', 'vol_1_12', 'vol_2_9', 'vol_2_12',
            'vol_3_9', 'vol_3_12']
var_list = econ_var + tech_var


# get data for specified date range
df_sub = df[beg_date_init:end_date_oos]

# historical average (ha) forecast
init_obs = len(pd.date_range(beg_date_init, beg_date_oos, freq='M')) # Should be 181 obs
ha_forecast = pd.expanding_mean(df_sub['log_equity_premium'], 
	          min_periods=init_obs)
ha_forecast = ha_forecast.shift(1)
ha_err = df_sub['log_equity_premium'][beg_date_oos:end_date_oos] - \
    ha_forecast[beg_date_oos:end_date_oos]
ha_msfe = np.mean(np.power(ha_err, 2))
ha_msfe_exp = np.mean(np.power(ha_err[df_sub['recession'] == 0], 2))
ha_msfe_rec = np.mean(np.power(ha_err[df_sub['recession'] == 1], 2))

# initialize dictionary of lists
d = {}
for i in ['msfe', 'msfe_exp', 'msfe_rec', 'msfe_adj', 'p_value', 'r2', 'r2_exp',
          'r2_rec', 'sq bias', 'rem term']:
    d[i] = []

# lag the x variables
df[var_list] = df[var_list].shift(1)
示例#42
0
    def get_context_data(self, **kwargs):
        context = super(WellChartView, self).get_context_data(**kwargs)
        well = Well.objects.get(pk=context['pk'])
        name = unicode(well)
        options = {
            'rangeSelector': {
                'enabled': True,
                'inputEnabled': True,
            },
            'navigator': {
                'adaptToUpdatedData': True,
                'enabled': True
            },
            'chart': {
                'type': 'arearange',
                'zoomType': 'x'
            },
            'title': {
                'text': name
            },
            'xAxis': {
                'type': 'datetime'
            },
            'yAxis': [{
                'title': {
                    'text': 'm tov NAP'
                }
            }],
            'tooltip': {
                'valueSuffix': ' m',
                'valueDecimals': 2,
                'shared': True,
            },
            'legend': {
                'enabled': True
            },
            'plotOptions': {
                'line': {
                    'marker': {
                        'enabled': False
                    }
                }
            },
            'credits': {
                'enabled': True,
                'text': 'acaciawater.com',
                'href': 'http://www.acaciawater.com',
            },
        }
        series = []
        xydata = []
        for screen in well.screen_set.all():
            name = unicode(screen)
            data = screen.to_pandas(ref='nap')
            xydata = zip(data.index.to_pydatetime(), data.values)
            series.append({
                'name': name,
                'type': 'line',
                'data': xydata,
                'zIndex': 1,
            })
            mean = pd.expanding_mean(data)
            #             series.append({'name': 'gemiddelde',
            #                         'type': 'line',
            #                         'data': zip(mean.index.to_pydatetime(), mean.values),
            #                         'linkedTo' : ':previous',
            #                         })
            std = pd.expanding_std(data)
            a = (mean - std).dropna()
            b = (mean + std).dropna()
            ranges = zip(a.index.to_pydatetime(), a.values, b.values)
            series.append({
                'name': 'spreiding',
                'data': ranges,
                'type': 'arearange',
                'lineWidth': 0,
                'fillOpacity': 0.2,
                'linkedTo': ':previous',
                'zIndex': 0,
            })

        if len(xydata) > 0:
            mv = []
            for i in range(len(xydata)):
                mv.append((xydata[i][0], screen.well.maaiveld))
            series.append({'name': 'maaiveld', 'type': 'line', 'data': mv})

        options['series'] = series
        context['options'] = json.dumps(
            options, default=lambda x: int(time.mktime(x.timetuple()) * 1000))
        context['object'] = well
        return context
示例#43
0
文件: utils.py 项目: luoli413/Cornell
def comput_idicators(df,
                     trading_days,
                     required,
                     save_file,
                     save_address,
                     whole=1):
    # TODO:net_value has some problem.
    # columns needed
    col = ['index_price', 'Interest_rate', 'nav', 'rebalancing', 'stoploss']
    df_valid = df.ix[:, col]
    start_balance = df.index[df['rebalancing'] == 1][0]
    df_valid = df_valid[df_valid.index >= start_balance]

    # daily return
    df_valid['return'] = np.log(df['nav']) - np.log(df['nav'].shift(1))
    # benchmark_net_value
    df_valid[
        'benchmark'] = df_valid['index_price'] / df_valid['index_price'].ix[0]
    # benchmark_return
    df_valid['benchmark_return'] = (df_valid['benchmark']-
                                           df_valid['benchmark'].shift(1))/\
                                   df_valid['benchmark'].shift(1)
    # Annualized return
    df_valid['Annu_return'] = pd.expanding_mean(
        df_valid['return']) * trading_days
    # Volatility
    df_valid.loc[:, 'algo_volatility'] = pd.expanding_std(
        df_valid['return']) * np.sqrt(trading_days)
    df_valid.loc[:, 'xret'] = df_valid[
        'return'] - df_valid['Interest_rate'] / trading_days / 100
    df_valid.loc[:, 'ex_return'] = df_valid['return'] - df_valid[
        'benchmark_return']

    def ratio(x):
        return np.nanmean(x) / np.nanstd(x)

    # sharpe ratio
    df_valid.loc[:, 'sharpe'] = pd.expanding_apply(df_valid['xret'], ratio)\
                                * np.sqrt(trading_days)
    # information ratio
    df_valid.loc[:, 'IR'] = pd.expanding_apply(df_valid['ex_return'], ratio)\
                                * np.sqrt(trading_days)

    # Sortino ratio
    def modify_ratio(x, re):
        re /= trading_days
        ret = np.nanmean(x) - re
        st_d = np.nansum(np.square(x[x < re] - re)) / x[x < re].size
        return ret / np.sqrt(st_d)

    df_valid.loc[:, 'sortino'] = pd.expanding_apply(
        df_valid['return'], modify_ratio,
        args=(required, )) * np.sqrt(trading_days)
    # Transfer infs to NA
    df_valid.loc[np.isinf(df_valid.loc[:, 'sharpe']), 'sharpe'] = np.nan
    df_valid.loc[np.isinf(df_valid.loc[:, 'IR']), 'IR'] = np.nan
    # hit_rate
    wins = np.where(df_valid['return'] >= df_valid['benchmark_return'], 1.0,
                    0.0)
    df_valid.loc[:, 'hit_rate'] = wins.cumsum() / pd.expanding_apply(wins, len)
    # 95% VaR
    df_valid['VaR'] = -pd.expanding_quantile(df_valid['return'], 0.05)*\
                      np.sqrt(trading_days)
    # 95% CVaR
    df_valid['CVaR'] = -pd.expanding_apply(df_valid['return'],
                                          lambda x: x[x < np.nanpercentile(x, 5)].mean())\
                       * np.sqrt(trading_days)

    if whole == 1:
        # max_drawdown
        def exp_diff(x, type):
            if type == 'dollar':
                xret = pd.expanding_apply(x, lambda xx: (xx[-1] - xx.max()))
            else:
                xret = pd.expanding_apply(
                    x, lambda xx: (xx[-1] - xx.max()) / xx.max())
            return xret
    # dollar
    #     xret = exp_diff(df_valid['cum_profit'],'dollar')
    #     df_valid['max_drawdown_profit'] = abs(pd.expanding_min(xret))
    # percentage

        xret = exp_diff(df_valid['nav'], 'percentage')
        df_valid['max_drawdown_ret'] = abs(pd.expanding_min(xret))

        # max_drawdown_duration:
        # drawdown_enddate is the first time for restoring the max
        def drawdown_end(x, type):
            xret = exp_diff(x, type)
            minloc = xret[xret == xret.min()].index[0]
            x_sub = xret[xret.index > minloc]
            # if never recovering,then return nan
            try:
                return x_sub[x_sub == 0].index[0]
            except:
                return np.nan

        def drawdown_start(x, type):
            xret = exp_diff(x, type)
            minloc = xret[xret == xret.min()].index[0]
            x_sub = xret[xret.index < minloc]
            try:
                return x_sub[x_sub == 0].index[-1]
            except:
                return np.nan

        df_valid['max_drawdown_start'] = pd.Series()
        df_valid['max_drawdown_end'] = pd.Series()
        df_valid['max_drawdown_start'].ix[-1] = drawdown_start(
            df_valid['nav'], 'percentage')
        df_valid['max_drawdown_end'].ix[-1] = drawdown_end(
            df_valid['nav'], 'percentage')
    df_valid.to_csv(save_address)
    # =====result visualization=====
    plt.figure(1)
    if whole == 1:
        plt.subplot(224)
        plt.plot(df_valid['nav'], label='strategy')
        plt.plot(df_valid['benchmark'], label='S&P500')
    plt.xlabel('Date')
    plt.legend(loc=0, shadow=True)
    plt.ylabel('Nav')
    plt.title('Nav of ' + save_file + ' & SP500')

    # plt.subplot(223)
    # plt.plot(df_valid['cum_profit'],label = 'strategy')
    # plt.xlabel('Date')
    # plt.ylabel('Cum_profit')
    # plt.title('Cum_profit of ' + save_file)

    plt.subplot(221)
    plt.plot(df_valid['return'], label='strategy')
    plt.xlabel('Date')
    plt.ylabel('Daily_return')
    plt.title('Daily Return of ' + save_file)

    plt.subplot(222)
    x_return = df_valid[df_valid['return'].notna()].loc[:, 'return']
    y_return = df_valid[
        df_valid['benchmark_return'].notna()].loc[:, 'benchmark_return']
    mu = x_return.mean()
    sigma = x_return.std()
    mybins = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100)
    count_x, _, _ = plt.hist(x_return,
                             mybins,
                             normed=1,
                             alpha=0.5,
                             label='strategy')
    count_y, _, _ = plt.hist(y_return,
                             mybins,
                             normed=1,
                             alpha=0.5,
                             label='S&P500')
    plt.ylabel('density')
    plt.xlabel('daily_return')
    plt.title('Histogram of Daily Return for ' + save_file + ' & SP500')
    plt.grid(True)
    # add normal distribution line
    y = mlab.normpdf(mybins, mu, sigma)
    plt.plot(mybins, y, 'r--', linewidth=1, label='Normal of strategy')
    plt.legend(loc=0, shadow=True)
    # plt.tight_layout()
    plt.show()
    return df_valid
示例#44
0
    data['MACD']=''
    data['MACD']=data['Diff']-data['DEA']
    return data
#导入数据,生成因子
data=pd.read_csv('rb888_2015.csv',parse_dates=True,index_col='time')
data.reset_index(inplace=True)
data['log_return']=np.log(data['open']/data['open'].shift(1))
data['log_return']=data['log_return'].fillna(0)
data['log_return_5']=np.log(data['open']/data['open'].shift(5))
data['log_return_5']=data['log_return_5'].fillna(0)
for h,k in [(5,10),(5,15),(5,20),(10,15),(10,20),(15,20),(15,30)]:
    data['fast_line']=''
    data['slow_line']=''
    data['fast_line']=pd.rolling_mean(data['open'],h)
    data['slow_line']=pd.rolling_mean(data['open'],k)
    data['fast_line']=data['fast_line'].fillna(value=pd.expanding_mean(data['open']))
    data['slow_line']=data['slow_line'].fillna(value=pd.expanding_mean(data['open']))
    data['dist_%s_%s'%(k,h)]=data['fast_line']-data['slow_line']
for i in range(5,31,5):
    data['MA_%s'%i]=pd.rolling_mean(data['open'],i)
    data['MA_%s'%i]=data['MA_%s'%i].fillna(0)-data['open']
data=MACD(data,12,26,9)
for h in range(10,26,5):
    data['fast_line']=''
    data['slow_line']=''
    data['fast_line']=pd.rolling_max(data['high'].shift(1),h)
    data['slow_line']=pd.rolling_min(data['low'].shift(1),h)
    data['fast_line']=data['fast_line'].fillna(value=pd.expanding_max(data['high']))
    data['slow_line']=data['slow_line'].fillna(value=pd.expanding_min(data['low']))
    data['dist_high_%s'%h]=data['high']-data['fast_line']
    data['dist_low_%s'%h]=data['low']-data['slow_line']
示例#45
0
文件: ret.py 项目: xie3ge/tia
class CumulativeRets(object):
    def __init__(self, rets=None, ltd_rets=None):
        if rets is None and ltd_rets is None:
            raise ValueError('rets or ltd_rets must be specified')

        if rets is None:
            if ltd_rets.empty:
                rets = ltd_rets
            else:
                rets = (1. + ltd_rets).pct_change()
                rets.iloc[0] = ltd_rets.iloc[0]

        if ltd_rets is None:
            if rets.empty:
                ltd_rets = rets
            else:
                ltd_rets = (1. + rets).cumprod() - 1.

        self.rets = rets
        self.ltd_rets = ltd_rets

    pds_per_year = property(lambda self: periodicity(self.rets))

    def asfreq(self, freq):
        other_pds_per_year = periodicity(freq)
        if self.pds_per_year < other_pds_per_year:
            msg = 'Cannot downsample returns. Cannot convert from %s periods/year to %s'
            raise ValueError(msg % (self.pds_per_year, other_pds_per_year))

        if freq == 'B':
            rets = (1. + self.rets).groupby(self.rets.index.date).apply(lambda s: s.prod()) - 1.
            # If you do not do this, it will be an object index
            rets.index = pd.DatetimeIndex([i for i in rets.index])
            return CumulativeRets(rets)
        else:
            rets = (1. + self.rets).resample(freq, how='prod') - 1.
            return CumulativeRets(rets)

    # -----------------------------------------------------------
    # Resampled data
    dly = lazy_property(lambda self: self.asfreq('B'), 'dly')
    weekly = lazy_property(lambda self: self.asfreq('W'), 'weekly')
    monthly = lazy_property(lambda self: self.asfreq('M'), 'monthly')
    quarterly = lazy_property(lambda self: self.asfreq('Q'), 'quarterly')
    annual = lazy_property(lambda self: self.asfreq('A'), 'annual')

    # -----------------------------------------------------------
    # Basic Metrics
    @lazy_property
    def ltd_rets_ann(self):
        return (1. + self.ltd_rets) ** (self.pds_per_year / pd.expanding_count(self.rets)) - 1.

    cnt = property(lambda self: self.rets.notnull().astype(int).sum())
    mean = lazy_property(lambda self: self.rets.mean(), 'avg')
    mean_ann = lazy_property(lambda self: self.mean * self.pds_per_year, 'avg_ann')
    ltd = lazy_property(lambda self: self.ltd_rets.iloc[-1], name='ltd')
    ltd_ann = lazy_property(lambda self: self.ltd_rets_ann.iloc[-1], name='ltd_ann')
    std = lazy_property(lambda self: self.rets.std(), 'std')
    std_ann = lazy_property(lambda self: self.std * np.sqrt(self.pds_per_year), 'std_ann')
    drawdown_info = lazy_property(lambda self: drawdown_info(self.rets), 'drawdown_info')
    drawdowns = lazy_property(lambda self: drawdowns(self.rets), 'drawdowns')
    maxdd = lazy_property(lambda self: self.drawdown_info['maxdd'].min(), 'maxdd')
    dd_avg = lazy_property(lambda self: self.drawdown_info['maxdd'].mean(), 'dd_avg')
    kurtosis = lazy_property(lambda self: self.rets.kurtosis(), 'kurtosis')
    skew = lazy_property(lambda self: self.rets.skew(), 'skew')

    sharpe_ann = lazy_property(lambda self: np.divide(self.ltd_ann, self.std_ann), 'sharpe_ann')
    downside_deviation = lazy_property(lambda self: downside_deviation(self.rets, mar=0, full=0, ann=1),
                                       'downside_deviation')
    sortino = lazy_property(lambda self: self.ltd_ann / self.downside_deviation, 'sortino')

    @lazy_property
    def maxdd_dt(self):
        ddinfo = self.drawdown_info
        if ddinfo.empty:
            return None
        else:
            return self.drawdown_info['maxdd dt'].ix[self.drawdown_info['maxdd'].idxmin()]

    # -----------------------------------------------------------
    # Expanding metrics
    expanding_mean = property(lambda self: pd.expanding_mean(self.rets), 'expanding_avg')
    expanding_mean_ann = property(lambda self: self.expanding_mean * self.pds_per_year, 'expanding_avg_ann')
    expanding_std = lazy_property(lambda self: pd.expanding_std(self.rets), 'expanding_std')
    expanding_std_ann = lazy_property(lambda self: self.expanding_std * np.sqrt(self.pds_per_year), 'expanding_std_ann')
    expanding_sharpe_ann = property(lambda self: np.divide(self.ltd_rets_ann, self.expanding_std_ann))

    # -----------------------------------------------------------
    # Rolling metrics
    rolling_mean = property(lambda self: pd.rolling_mean(self.rets), 'rolling_avg')
    rolling_mean_ann = property(lambda self: self.rolling_mean * self.pds_per_year, 'rolling_avg_ann')

    def rolling_ltd_rets(self, n):
        return pd.rolling_apply(self.rets, n, lambda s: (1. + s).prod() - 1.)

    def rolling_ltd_rets_ann(self, n):
        tot = self.rolling_ltd_rets(n)
        return tot ** (self.pds_per_year / n)

    def rolling_std(self, n):
        return pd.rolling_std(self.rets, n)

    def rolling_std_ann(self, n):
        return self.rolling_std(n) * np.sqrt(self.pds_per_year)

    def rolling_sharpe_ann(self, n):
        return self.rolling_ltd_rets_ann(n) / self.rolling_std_ann(n)

    def iter_by_year(self):
        """Split the return objects by year and iterate"""
        for key, grp in self.rets.groupby(lambda x: x.year):
            yield key, CumulativeRets(rets=grp)

    def truncate(self, before=None, after=None):
        rets = self.rets.truncate(before=before, after=after)
        return CumulativeRets(rets=rets)

    @lazy_property
    def summary(self):
        d = OrderedDict()
        d['ltd'] = self.ltd
        d['ltd ann'] = self.ltd_ann
        d['mean'] = self.mean
        d['mean ann'] = self.mean_ann
        d['std'] = self.std
        d['std ann'] = self.std_ann
        d['sharpe ann'] = self.sharpe_ann
        d['sortino'] = self.sortino
        d['maxdd'] = self.maxdd
        d['maxdd dt'] = self.maxdd_dt
        d['dd avg'] = self.dd_avg
        d['cnt'] = self.cnt
        return pd.Series(d, name=self.rets.index.freq or guess_freq(self.rets.index))

    def _repr_html_(self):
        from tia.util.fmt import new_dynamic_formatter

        fmt = new_dynamic_formatter(method='row', precision=2, pcts=1, trunc_dot_zeros=1, parens=1)
        df = self.summary.to_frame()
        return fmt(df)._repr_html_()

    def get_alpha_beta(self, bm_rets):
        if isinstance(bm_rets, pd.Series):
            bm = CumulativeRets(bm_rets)
        elif isinstance(bm_rets, CumulativeRets):
            bm = bm_rets
        else:
            raise ValueError('bm_rets must be series or CumulativeRetPerformace not %s' % (type(bm_rets)))

        bm_freq = guess_freq(bm_rets)
        if self.pds_per_year != bm.pds_per_year:
            tgt = {'B': 'dly', 'W': 'weekly', 'M': 'monthly', 'Q': 'quarterly', 'A': 'annual'}.get(bm_freq, None)
            if tgt is None:
                raise ValueError('No mapping for handling benchmark with frequency: %s' % bm_freq)
            tmp = getattr(self, tgt)
            y = tmp.rets
            y_ann = tmp.ltd_ann
        else:
            y = self.rets
            y_ann = self.ltd_ann

        x = bm.rets.truncate(y.index[0], y.index[-1])
        x_ann = bm.ltd_ann

        model = pd.ols(x=x, y=y)
        beta = model.beta[0]
        alpha = y_ann - beta * x_ann
        return pd.Series({'alpha': alpha, 'beta': beta}, name=bm_freq)

    def plot_ltd(self, ax=None, style='k', label='ltd', show_dd=1, title=True, legend=1):
        ltd = self.ltd_rets
        ax = ltd.plot(ax=ax, style=style, label=label)
        if show_dd:
            dd = self.drawdowns
            dd.plot(style='r', label='drawdowns', alpha=.5, ax=ax)
            ax.fill_between(dd.index, 0, dd.values, facecolor='red', alpha=.25)
            fmt = PercentFormatter

            AxesFormat().Y.percent().X.label("").apply(ax)
            legend and ax.legend(loc='upper left', prop={'size': 12})

            # show the actualy date and value
            mdt, mdd = self.maxdd_dt, self.maxdd
            bbox_props = dict(boxstyle="round", fc="w", ec="0.5", alpha=0.25)
            try:
                dtstr = '{0}'.format(mdt.to_period())
            except:
                # assume daily
                dtstr = '{0}'.format(hasattr(mdt, 'date') and mdt.date() or mdt)
            ax.text(mdt, dd[mdt], "{1} \n {0}".format(fmt(mdd), dtstr).strip(), ha="center", va="top", size=8,
                    bbox=bbox_props)

        if title is True:
            pf = new_percent_formatter(1, parens=False, trunc_dot_zeros=True)
            ff = new_float_formatter(precision=1, parens=False, trunc_dot_zeros=True)
            total = pf(self.ltd_ann)
            vol = pf(self.std_ann)
            sh = ff(self.sharpe_ann)
            mdd = pf(self.maxdd)
            title = 'ret$\mathregular{_{ann}}$ %s     vol$\mathregular{_{ann}}$ %s     sharpe %s     maxdd %s' % (
            total, vol, sh, mdd)

        title and ax.set_title(title, fontdict=dict(fontsize=10, fontweight='bold'))
        return ax

    def plot_ret_on_dollar(self, title=None, show_maxdd=1, figsize=None, ax=None, append=0, label=None, **plot_args):
        plot_return_on_dollar(self.rets, title=title, show_maxdd=show_maxdd, figsize=figsize, ax=ax, append=append,
                              label=label, **plot_args)

    def plot_hist(self, ax=None, **histplot_kwargs):
        pf = new_percent_formatter(precision=1, parens=False, trunc_dot_zeros=1)
        ff = new_float_formatter(precision=1, parens=False, trunc_dot_zeros=1)

        ax = self.rets.hist(ax=ax, **histplot_kwargs)
        AxesFormat().X.percent(1).apply(ax)
        m, s, sk, ku = pf(self.mean), pf(self.std), ff(self.skew), ff(self.kurtosis)
        txt = '$\mathregular{\mu}$=%s   $\mathregular{\sigma}$=%s   skew=%s   kurt=%s' % (m, s, sk, ku)
        bbox = dict(facecolor='white', alpha=0.5)
        ax.text(0, 1, txt, fontdict={'fontweight': 'bold'}, bbox=bbox, ha='left', va='top', transform=ax.transAxes)
        return ax

    def filter(self, mask, keep_ltd=0):
        if isinstance(mask, pd.Series):
            mask = mask.values
        rets = self.rets.ix[mask]
        ltd = None
        if keep_ltd:
            ltd = self.ltd_rets.ix[mask]
        return CumulativeRets(rets=rets, ltd_rets=ltd)
示例#46
0
def cum_avg(data):
    data = pandas.DataFrame({'data': data})
    means = pandas.expanding_mean(data)
    stds = pandas.expanding_std(data)
    return numpy.array([i[0] for i in means.values
                        ]), numpy.array([i[0] for i in stds.values])
示例#47
0
"""
import numpy as np
import pandas as pd
from scipy.special import logit
import matplotlib.pyplot as pp
import seaborn

sample_size = 100000

pp.ion()

dists = pd.DataFrame(np.random.normal(size=sample_size), columns=['normal'])
dists['inverse_normal'] = 1/(np.random.normal(size=sample_size))
dists['normal_ratio'] = np.random.normal(size=sample_size)/(np.random.normal(size=sample_size))
dists['poisson'] = (np.random.poisson(size=sample_size))
dists['poisson_ratio'] = (np.random.poisson(size=sample_size))/(np.random.poisson(size=sample_size))
dists['poisson_diff'] = (np.random.poisson(size=sample_size))-(np.random.poisson(size=sample_size))
dists['logit'] = logit(np.random.uniform(size=sample_size))
dists['cauchy'] = np.random.standard_cauchy(size=sample_size)
dists['uniform_wide'] = np.random.uniform(low=-100,high=100, size=sample_size)
bins = np.linspace(-100,100,100)

dists.hist(bins=bins, log=True, alpha=0.5)

pd.expanding_mean(dists).plot()
pp.title('Expanding Means')
pp.ylim(-3,3)
pp.show()


 def process_sample(x):
     x = keystrokes2events(x)
     tau = x['time'].diff()
     predictions = pd.expanding_mean(tau).shift()
     return SMAPE(tau, predictions).dropna().mean()

if __name__ == '__main__':
    start = datetime.now()
    PosSizeL = 1
    PosSizeS = 1
    data1 = pd.read_csv('rb888_2015.csv', parse_dates=True, index_col='time')
    data1.reset_index(inplace=True)

    for h, k in [(5, 20), (15, 20), (5, 10), (5, 15), (10, 15)]:
        data1['fast_line'] = ''
        data1['slow_line'] = ''
        data1['fast_line'] = pd.rolling_mean(data1['close'], h)
        data1['slow_line'] = pd.rolling_mean(data1['close'], k)
        data1['fast_line'] = data1['fast_line'].fillna(
            value=pd.expanding_mean(data1['close']))
        data1['slow_line'] = data1['slow_line'].fillna(
            value=pd.expanding_mean(data1['close']))
        data1['dist_%s_%s' % (k, h)] = data1['fast_line'] - data1['slow_line']
    for h in range(10, 26, 5):
        data1['fast_line'] = ''
        data1['slow_line'] = ''
        data1['fast_line'] = pd.rolling_max(data1['high'].shift(1), h)
        data1['slow_line'] = pd.rolling_min(data1['low'].shift(1), h)
        data1['fast_line'] = data1['fast_line'].fillna(
            value=pd.expanding_max(data1['high']))
        data1['slow_line'] = data1['slow_line'].fillna(
            value=pd.expanding_min(data1['low']))
        data1['dist_high_%s' % h] = data1['high'] - data1['fast_line']
        data1['dist_low_%s' % h] = data1['low'] - data1['slow_line']
    data1 = MACD(data1, 12, 26, 9)
 def duration_smape(x):
     d = x['timerelease'] - x['timepress']
     predictions = pd.expanding_mean(d).shift()
     return SMAPE(d, predictions).dropna().mean()
 def pp_smape(x):
     tau = x['timepress'].diff()
     predictions = pd.expanding_mean(tau).shift()
     return SMAPE(tau, predictions).dropna().mean()
示例#52
0
# Read the pickled data
benmore = pd.read_pickle("benmore.pickle")
otahuhu = pd.read_pickle("otahuhu.pickle")

# Get OTA and BEN prices for current quarter
connection = ea.DW_connect(linux=True)
prices = ea.timeseries_convert(
    ea.FP_getter(
        connection,
        ea.query_prices(ea.current_quarter().start_time, datetime.now().date(), nodelist=["BEN2201", "OTA2201"]),
    )
)
# Calculate expanding mean over the quarter
spot_OTA = prices.price.OTA2201.groupby(lambda x: x.date()).mean()
spot_BEN = prices.price.BEN2201.groupby(lambda x: x.date()).mean()
spot_OTA_EXMEAN = pd.expanding_mean(spot_OTA)  # Otahuhu expanding mean
spot_BEN_EXMEAN = pd.expanding_mean(spot_BEN)  # Benmore expanding mean

OTA_BA = ea.CQ_data(otahuhu, ota, spot_OTA_EXMEAN, CQ, "otahuhu")
BEN_BA = ea.CQ_data(benmore, ben, spot_BEN_EXMEAN, CQ, "benmore")

print "Generate LaTex table"

ea.asx_table_maker(otahuhu, benmore, ota, ben, CQ, path + "/tables/asx_table_1.tex")
# ea.asx_market_comment(ota,ben,path + '/comments/comment.tex')

print "Printing Hedge Market data"

ea.forward_price_curve(9, ota, "Reds", path + "/figures/ota_fpc.pdf")
ea.forward_price_curve(9, ota, "Reds", path + "/figures/ota_fpc.png")
ea.forward_price_curve(10, ben, "Blues", path + "/figures/ben_fpc.pdf")
示例#53
0
    lasso, predictionsLS = lasso(df_final10_LS)

    # pickles model
    pickle_model(lasso, filename = 'pickled/lasso10_model.pk')
    # unpickle model and get predictions
    predictions = unpickle_and_predict(df_final10_new, filename = 'pickled/lasso10_model.pk')

    # append predictions to df_final5_new and drop all columns that we don't care about
    df_final = df_final10_new[['home_team', 'away_team', 'date', 'home_team_win']]
    preds = pd.DataFrame(predictions)
    preds.columns = [['prediction']]
    final = pd.merge(df_final, preds, how = 'left', left_index = True, right_index = True)
    final.sort_values('date', ascending = True, inplace = True)
    final = final.reset_index(drop=True)
    final['match'] = np.where(final['home_team_win'] == final['prediction'], 1, 0)
    final['cumulative_average'] = pd.expanding_mean(final['match'], 1)
    # most recent games will be at the bottom
    # 58.8% accuracy

    # last season
    df_finalLS = df_final10_LS[['home_team', 'away_team', 'date', 'home_team_win']]
    predsLS = pd.DataFrame(predictionsLS)
    predsLS.columns = [['prediction']]
    finalLS = pd.merge(df_finalLS, predsLS, how = 'left', left_index = True, right_index = True)
    finalLS.sort_values('date', ascending = True, inplace = True)
    finalLS = finalLS.reset_index(drop=True)
    finalLS['match'] = np.where(finalLS['home_team_win'] == finalLS['prediction'], 1, 0)
    finalLS['cumulative_average'] = pd.expanding_mean(finalLS['match'], 1)
    # these results don't match what was found in ModelVisualization.py
    # 57.3% accuracy
示例#54
0
raw = requests.get("http://www.google.com/finance/getprices?i="+interval+"&p="+lookback+"d&f=c&df=cpct&q="+symbol).text

# Take the data and put it into a DataFrame
raw = raw.split()[7:]

data = pd.DataFrame(raw)
data = data.astype("float")
data["price"] =  data[0]
del data[0]

# We only need 60 minutes worth of data
if len(data["price"] >= 60):  data["price"] = data["price"][-60:]

# Columns for expanding mean and standard deviation
data["mean"] = pd.expanding_mean(data["price"])
data["vol"] = pd.expanding_std(data["price"])

# Linear regression on price data
x = range(len(data["price"][-60:]))
y = data["price"][-60:].values

A,B = curve_fit(f,x,y)

# Print the trend to the console
if A[0] < 0 : print("downtrend")
else: print("uptrend")

# Plot window
plt.figure(1)
示例#55
0
    def get_context_data(self, **kwargs):
        context = super(WellChartView, self).get_context_data(**kwargs)
        well = Well.objects.get(pk=context['pk'])
        name = unicode(well)
        options = {
             'rangeSelector': { 'enabled': True,
                               'inputEnabled': True,
                               },
            'navigator': {'adaptToUpdatedData': True, 'enabled': True},
            'chart': {'type': 'arearange', 'zoomType': 'x'},
            'title': {'text': name},
            'xAxis': {'type': 'datetime'},
            'yAxis': [{'title': {'text': 'Grondwaterstand\n(m tov NAP)'}}
                      ],
            'tooltip': {'valueSuffix': ' m',
                        'valueDecimals': 2,
                        'shared': True,
                       }, 
            'legend': {'enabled': True},
            'plotOptions': {'line': {'marker': {'enabled': False}}},            
            'credits': {'enabled': True, 
                        'text': 'acaciawater.com', 
                        'href': 'http://www.acaciawater.com',
                       },
            }
        series = []
        xydata = []
        for screen in well.screen_set.all():
            name = unicode(screen)
            data = screen.to_pandas(ref='nap')
            if data.size > 0:
                xydata = zip(data.index.to_pydatetime(), data.values)
                series.append({'name': name,
                            'type': 'line',
                            'data': xydata,
                            'lineWidth': 1,
                            'color': '#0066FF',
                            'zIndex': 2,
                            })
                mean = pd.expanding_mean(data)
                std = pd.expanding_std(data)
                a = (mean - std).dropna()
                b = (mean + std).dropna()
                ranges = zip(a.index.to_pydatetime(), a.values, b.values)
                series.append({'name': 'spreiding',
                            'data': ranges,
                            'type': 'arearange',
                            'lineWidth': 0,
                            'color': '#0066FF',
                            'fillOpacity': 0.2,
                            'linkedTo' : ':previous',
                            'zIndex': 0,
                            })

            data = screen.to_pandas(ref='nap',kind='HAND')
            if data.size > 0:
                hand = zip(data.index.to_pydatetime(), data.values)
                series.append({'name': 'handpeiling',
                            'type': 'scatter',
                            'data': hand,
                            'zIndex': 3,
                            'marker': {'symbol': 'circle', 'radius': 6, 'lineColor': 'white', 'lineWidth': 2, 'fillColor': 'red'},
                            })

        if len(xydata)>0:
            mv = []
            mv.append((xydata[0][0], screen.well.maaiveld))
            mv.append((xydata[-1][0], screen.well.maaiveld))
            series.append({'name': 'maaiveld',
                        'type': 'line',
                        'lineWidth': 2,
                        'color': '#009900',
                        'dashStyle': 'Dash',
                        'data': mv,
                        'zIndex': 4,
                        })

        # neerslag toevoegen
        try:
            closest = Station.closest(well.location)
            name = 'Meteostation {} (dagwaarden)'.format(closest.naam)
            neerslag = Series.objects.get(name='RH',mlocatie__name=name)
            data = neerslag.to_pandas(start=xydata[0][0], stop=xydata[-1][0]) / 10.0 # 0.1 mm -> mm
            data = zip(data.index.to_pydatetime(), data.values)
            series.append({'name': 'Neerslag '+ closest.naam,
                        'type': 'column',
                        'data': data,
                        'yAxis': 1,
                        'pointRange': 24 * 3600 * 1000, # 1 day
                        'pointPadding': 0.01,
                        'pointPlacement': 0.5,
                        'zIndex': 1,
                        'color': 'orange', 
                        'borderColor': '#cc6600', 
                        })
            options['yAxis'].append({'title': {'text': 'Neerslag (mm)'},
                                     'opposite': 1,
                                     'min': 0,
                                     })
        except:
            pass
        options['series'] = series
        context['options'] = json.dumps(options, default=lambda x: int(time.mktime(x.timetuple())*1000))
        context['object'] = well
        return context
for i in ['ntis', 'tbl', 'lty', 'infl']:
    df[i] = df[i] * -1

econ_var = ['dp', 'dy', 'ep', 'de', 'rvol', 'bm', 'ntis', 'tbl', 'lty', 'ltr',
            'tms','dfy','dfr','infl']
tech_var = ['ma_1_9', 'ma_1_12', 'ma_2_9', 'ma_2_12', 'ma_3_9', 'ma_3_12',
            'mom_9', 'mom_12', 'vol_1_9', 'vol_1_12', 'vol_2_9', 'vol_2_12',
            'vol_3_9', 'vol_3_12']
all_var =  econ_var + tech_var


# get data for specified date range
df_sub = df[beg_date_init:end_date_oos]

# Expanding window historical average forecast for equity premium
df['ha_mean'] = Series(pd.expanding_mean(df_sub['equity_premium']/100,
                min_periods = window_size).shift(1), index = df_sub.index)

# Rolling window historical average forecast for equity premium variance
# note degree of freedom adjusted to match NRZ
df['ha_var'] = Series(pd.rolling_var(df_sub['equity_premium']/100, window_size,
               min_periods = window_size, ddof = 0).shift(1), index = df_sub.index)


# Perform asset allocation using historical average forecasts using c_bp = 0
#  all months
df_sub = df[beg_date_oos:end_date_oos]  
ha_results = perform_asset_allocation(df_sub['equity_premium']/100, df_sub['Rfree'],
	        df_sub['ha_mean'], df_sub['ha_var'], gamma_MV, 0)
#  expansion months
df_exp = df_sub[df_sub['recession']==0]
ha_results_exp = perform_asset_allocation(df_exp['equity_premium']/100, df_exp['Rfree'],