Exemplo n.º 1
0
def pandas_xy_dist(df, x_col='x', y_col='y'):
	"""
	Takes in a pandas dataframe containing x and y coordinates. Calculates the euclidean distance between point pairs as wella s the cumulative distance. 

	VARIABLES

	df : a pandas dataframe
	x_col : x column header (default 'x')
	y_col : y column header (default 'y')

	RETURN

	Neighbour didtance array
	Cumulative didtance array
	"""

	df=df.assign(x_diff=pd.rolling_apply(df[x_col], 2, \
	  lambda x : x[1]-x[0])) 

	df=df.assign(y_diff=pd.rolling_apply(df[y_col], 2, \
	  lambda y : y[1]-y[0]))

	df['xy_distance']=np.hypot(df['y_diff'], df['x_diff'])

	spacing = df['xy_distance'].values
	cumulative = np.asarray(np.cumsum(df['xy_distance']))
	cumulative[0]=0

	df = df.drop('x_diff', 1)
	df = df.drop('y_diff', 1)

	df['cumulative']=cumulative
	
	return spacing, cumulative, df
Exemplo n.º 2
0
    def GenSingleAlphaWeight(self, dfData):
        n = len(dfData)
        dfAlpha = dfData[0][[self.alpha_name]].copy()
        for i in range(1,n):
            dfAlpha = pd.merge(dfAlpha, dfData[i][[self.alpha_name]], left_index=True, right_index=True, how='outer')
        asset_names = []
        for i in range(n):
            asset_names.append(dfData[i].index.name)
        dfAlpha.columns = asset_names
        dfAlpha.index.name = 'Date'
        
        dfAlpha['lower'] = pd.rolling_apply(np.arange(len(dfAlpha)), self.window_length,
                                            self.threshold_calc, args=(dfAlpha, self.lower_percentile),min_periods=1)
        dfAlpha['upper'] = pd.rolling_apply(np.arange(len(dfAlpha)), self.window_length,
                                            self.threshold_calc, args=(dfAlpha, self.upper_percentile),min_periods=1)
        dfAlpha['lower'] = dfAlpha['lower'].shift(1)
        dfAlpha['upper'] = dfAlpha['upper'].shift(1)
        dfAlpha.dropna(inplace=True)

        dfAlpha = dfAlpha[str(self.start_date):]

        dfAlphaWeight = dfAlpha.apply(self.func, axis=1)

        del dfAlpha['upper']
        del dfAlpha['lower']
        del dfAlphaWeight['upper']
        del dfAlphaWeight['lower']

        return (dfAlpha, dfAlphaWeight)    
Exemplo n.º 3
0
def preProcess(zig):
	f1 = pd.rolling_apply(zig, 5, lambda x: 100*(x[2]-x[0])/x[0], center = True, min_periods=5)
	f2 = pd.rolling_apply(zig, 5, lambda x: 100*(x[4]-x[2])/x[2], center = True, min_periods=5)
	f3 = pd.rolling_apply(zig, 5, lambda x: 100*(x[4]-x[0])/x[0], center = True, min_periods=5)
	f4 = pd.rolling_apply(zig, 5, lambda x: 100*(x[3]-x[1])/x[1], center = True, min_periods=5)
	f5 = pd.rolling_apply(zig, 5, lambda x: 100*(x[1]-x[0])/x[0], center = True, min_periods=5)
	features = pd.DataFrame({'f1':f1, 'f2':f2, "f3":f3, "f4":f4, 'f5':f5})
	return features.dropna()
def BBANDS(data, n = 20, m = 2):
    data['bbands_mid'] = ta.SMA(np.array(data[['high', 'low', 'close']].mean(axis=1)),n)
    data['bbands_up'] =  data['bbands_mid'] + m* pd.rolling_apply(data.close, n, np.std)
    data['bbands_dn'] =  data['bbands_mid'] - m* pd.rolling_apply(data.close, n, np.std)
    signal = pd.DataFrame(index = data.index)
    """
    当收盘价上穿上轨线,买入,信号为1
    当收盘价下穿下轨线,卖空,信号为-1
    参数为20
    """
    signal['1'] = ((data['close'] > data['bbands_up'])&(data['close'].shift(1) < data['bbands_up'].shift(1)))*1 +                  ((data['close'] < data['bbands_dn'])&(data['close'].shift(1) > data['bbands_dn'].shift(1)))*(-1)
    signal['1'] = signal['1'][signal['1'].isin([1,-1])].reindex(data.index, method='ffill') 
    signal = signal.fillna(0)
    return signal
Exemplo n.º 5
0
def cci(df_typ, df_c, i_period):
    """
    http://en.wikipedia.org/wiki/Commodity_channel_index

    CCI = (p - SMA(p)) / (σ(p) * 0.015)

    p = typical price
    SMA = simple moving average
    σ = mean absolute deviation
    """
    i_len = len(df_typ)
    assert i_len >= i_period

    df_mad = pd.rolling_apply(df_c,10,lambda x : np.fabs(x-x.mean()).mean())

    df_sma = sma(df_c, i_period)

    df_cci = ( df_typ - df_sma) / (df_mad * 0.015)

    ## set values before i_period ( wait for enough data )
    df_cci[:i_period-1] = 0.

    df_cci.name = 'cci' + str(i_period)

    return df_cci
Exemplo n.º 6
0
def min_rolling_theta_entropy(pos, window=24, bins=24):
    """Compute the minimum Shannon entropy in any window.

    Parameters
    ----------
    pos : DataFrame with columns x and y, indexed by frame
    window : number of observations per window
    bins : number of equally-spaced bins in distribution. Default 24.

    Returns
    -------
    float : Shannon entropy

    Examples
    --------
    >>> theta_entropy(t[t['particle'] == 3].set_index('frame'))

    >>> S = t.set_index('frame').groupby('particle').apply(
    ...     tp.min_rolling_theta_entropy)
    """

    disp = pos - pos.shift(1)
    direction = np.arctan2(disp['y'], disp['x'])
    bins = np.linspace(-np.pi, np.pi, bins + 1)
    f = lambda x: shannon_entropy(x, bins)
    return pd.rolling_apply(direction.dropna(), window, f).min()
def denormalize(df, col):    
    vals = df[col].copy().dropna().sort_values().round(8)
    vals = pd.rolling_apply(vals, 2, lambda x: x[1] - x[0])
    vals = vals[vals > 1e-5]
    denom = vals.value_counts().idxmax() 
    denormalized = np.round(np.array(df[col]/denom),0).astype(int)
    return denormalized
def run():
    num_average_ticks = 12  # v=['B', 'H', 'S'] p=[0.05, 0.9, 0.05]
    d = pd.DataFrame(DATA[['timestamp', 'last']])
    d['returns'] = compute_returns(d['last'])
    print(d['returns'].head())

    print(d['returns'].rolling(window=2, center=False).mean().head())

    print(d['returns'])

    sr_column = 'sharpe_ratio_{}'.format(num_average_ticks)
    # is to make a forward apply not a backward apply as people usually do.
    d[sr_column] = pd.rolling_apply(d['returns'][::-1],
                                    window=num_average_ticks,
                                    func=sharpe_ratio,
                                    center=False).fillna(0)[::-1]

    print(d.tail(100))

    labels = ['SELL', 'HOLD', 'BUY']
    d['signals'] = pd.qcut(d[sr_column], q=[0, 0.05, 0.95, 1], labels=[0, 1, 2])

    print(d.head(100))
    print(d['signals'].head(100))
    d['signals'].astype(np.float).plot()
    import matplotlib.pyplot as plt
    plt.show()
Exemplo n.º 9
0
def find_denominator(df, col):

    print type(df[col].dropna())
    vals = df[col].dropna().sort_values().round(8)
    vals = pd.rolling_apply(vals, 2, lambda x: x[1] - x[0])
    vals = vals[vals > 0.000001]
    return vals.value_counts().idxmax() 
Exemplo n.º 10
0
def src_step1():
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import tushare as ts
    import sys
    sys.path.append("..")
    from JSONData import tdx_data_Day as tdd
    # http://stackoverflow.com/questions/21058333/compute-rolling-maximum-drawdown-of-pandas-series
    def max_dd(ser):
        max2here = pd.expanding_max(ser)
        dd2here = ser - max2here
        return dd2here.min()

    np.random.seed(0)
    n = 100
    s = pd.Series(np.random.randn(n).cumsum())
    # s.plot()
    # plt.show()

    code='999999'
    # d=ts.get_hist_data(code).sort_index()
    d=tdd.get_tdx_Exp_day_to_df(code, 'f').sort_index()

    rolling_dd = pd.rolling_apply(d.close, 200, max_dd, min_periods=0)
    df = pd.concat([d.close, rolling_dd], axis=1)
    df.columns = [code, 'rol_dd_10']
    df.plot()

    plt.show()
Exemplo n.º 11
0
def rollingReturn(data, horizon):
    ''' Function to calculate rolling returns over a horizon.
    
    rollingReturn computes the returns over a horizon
    
    Example: average 1-Year return
    
        >> averageHorizonReturn(data, 12)

    Input: 
        - data (timeseries): timeseris of monthly retun data
        - horizon (int): window size for rolling analysis
        
    Returns:
        - rollingReturn (timeseries): timeseries of the same size as data
       
    '''
    
    cleanData = utils.processData(data)
    
    if (1 <= horizon <= len(cleanData)) & isinstance(horizon, int):    

        # Calculate rolling returns
        rollingReturns = pd.rolling_apply(cleanData, horizon, lambda x: np.prod(1 + x) - 1)        
        
        return rollingReturns
        
    else:
        
        raise customExceptions.invalidInput('averageHorizonReturn') 
Exemplo n.º 12
0
def aspect_list(planet1, planet2, start, end, aspect, freq='3H', scale=1):
    """ return a list of aspect made by 2 planets in given time span and aspect
        I only need the exact day so the calculation can be simplified

        modify freq to get different accurance
    """

    # we don't normalize the distance when calculating 0/180 degree.
    if aspect in [0, 180]:
        diffs = location_diff(planet1, planet2, start, end, freq,
                              scale=scale, fit180=False)
        # only search 180 degree aspect when distance bigger than 90 degree.
        if aspect == 180:
            diffs_new = diffs[abs(diffs) > 90].apply(
                lambda x: x - _sign(x) * 180)
        else:
            diffs_new = diffs
    # for other aspects
    else:
        diffs = location_diff(planet1, planet2, start, end, freq, scale=scale)
        # now we can treat all aspect like conjection
        diffs_new = diffs - aspect
    aspectlist = pd.rolling_apply(diffs_new, 2, aspected)
    tindex = aspectlist[aspectlist==True].index
    res = pd.Series([aspect] * len(tindex), tindex)

    return res
Exemplo n.º 13
0
def n_period_return(price_series, n):
    ''' Using the given price series, returns a new series 
        corresponding to the cumulative net returns of n periods ago
        for each period in the series'''
    gross_ret = 1 + price_series.pct_change()
    gross_np_ret = pd.rolling_apply(gross_ret, n, lambda x: x.prod())
    return gross_np_ret - 1
Exemplo n.º 14
0
def plot_cumulative_returns_by_quantile(quantile_returns, bin_list=None, period=1, ax=None):
    if ax is None:
        f, ax = plt.subplots(1, 1, figsize=(18, 6))

    return_wide = quantile_returns.reset_index().pivot(index='date', columns='factor_quantile', values=period)

    if period > 1:
        def compound_returns(ret, n):
            return (np.nanmean(ret) + 1)**(1./n) - 1
        return_wide = pd.rolling_apply(return_wide, period, compound_returns, min_periods=1, args=(period,))

    cum_ret = return_wide.add(1).cumprod()

    if bin_list is not None:
        cum_ret = cum_ret[bin_list]

    cum_ret.plot(lw=2, ax=ax)
    ax.legend()
    y_min, y_max = cum_ret.min().min(), cum_ret.max().max()

    ax.set(ylabel='Log Cumulative Returns',
           title='Cumulative Return by Quantile ({} Period Forward Return)'.format(period),
           xlabel='',
           yscale='symlog',
           yticks=np.linspace(y_min, y_max, 5),
           ylim=(y_min, y_max))

    ax.yaxis.set_major_formatter(ScalarFormatter())
    ax.axhline(1.0, linestyle='-', color='black', lw=1)

    return ax
Exemplo n.º 15
0
 def calculate_indicator(self, label): # self.stock.close_prices()
     prices = np.array(self.stock.get_data(label), dtype=np.float64)
     last = lambda x: x[-1]
     prices_last = pd.rolling_apply(prices, self.window, last)
     moving_average = pd.rolling_mean(prices, self.window)
     result = (prices_last-moving_average)/moving_average*100 # include nan
     return result.tolist()
Exemplo n.º 16
0
    def rolling_sparse_average(self, data_frame, periods):
        """
        rolling_sparse_average - Calculates the rolling moving average of a sparse time series

        Parameters
        ----------
        data_frame : DataFrame
            contains time series
        periods : int
            number of periods in the rolling sparse average

        Returns
        -------
        DataFrame
        """

        # 1. calculate rolling sum (ignore NaNs)
        # 2. count number of non-NaNs
        # 3. average of non-NaNs
        foo = lambda z: z[pandas.notnull(z)].sum()

        rolling_sum = pandas.rolling_apply(data_frame, periods, foo, min_periods=1)
        rolling_non_nans = pandas.stats.moments.rolling_count(data_frame, periods, freq=None, center=False, how=None)

        return rolling_sum / rolling_non_nans
Exemplo n.º 17
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    # see note around test_rolling_dataframe for logic concerning precision
    assert_eq(pd.rolling_skew(p, 3),
              dd.rolling_skew(d, 3), check_less_precise=True)
    assert_eq(pd.rolling_kurt(p, 3),
              dd.rolling_kurt(d, 3), check_less_precise=True)
    assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        assert_eq(pd.rolling_window(p, 3, 'boxcar'),
                  dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    assert_eq(pd.rolling_sum(p, 3, min_periods=3),
              dd.rolling_sum(d, 3, min_periods=3))
Exemplo n.º 18
0
def next_aspect_states(planet1, planet2, start=None,
                        num=1, asps=None, freq='3H', scale=1):
    """return aspect state changes(station/direction) of given planets
       within given time span.
    """
    if not start:
        start = datetime.today()
    elif type(start) is str:
        start = pd.to_datetime(start)
    if not asps:
        asps = [0, 60, 90, 120, 180]

    res = dict()
    # get last asp
    pre_asp = previous_aspect(planet1, planet2, start=start, asps=asps, freq=freq,
                                    scale=scale, num=1)

    while len(res) < num:
        next_asps = next_aspect(planet1, planet2, start=start, asps=asps,
                                num=10, freq=freq, scale=scale)

        asps_list = pd.concat([pre_asp, next_asps])
        stay_list = pd.rolling_apply(asps_list, 2, lambda xs: True if xs[0] == xs[1] else np.nan)
        stay_list = stay_list.dropna()
        for i in range(len(stay_list)):
            i0 = asps_list.index.get_loc(stay_list.index[i]) - 1
            i1 = i0 + 1
            res[asps_list.index[i0]] = -1
            res[asps_list.index[i1]] = 1

        pre_asp = asps_list[-1:]
        start = asps_list.index[-1]

    return pd.Series(res)
Exemplo n.º 19
0
  def Rate(self, time_window):
    """Apply rate function to all time series in this query."""

    if self.time_series is None:
      raise RuntimeError("Rate must be called after Take*().")

    if self.sample_interval is None:
      raise RuntimeError("Resample() must be called prior to Rate().")

    if time_window.seconds % self.sample_interval.seconds:
      raise RuntimeError("Rate's time window should be divisible by sampling "
                         "time window (rate time window: %s, sampling time "
                         "window: %s)." % (time_window, self.sample_interval))

    num_samples = time_window.seconds / self.sample_interval.seconds + 1
    num_seconds = float(time_window.seconds)

    def Rate(x):
      return (x[-1] - x[0]) / num_seconds

    new_time_series = []
    for time_serie in self.time_series:
      new_time_series.append(pandas.rolling_apply(
          time_serie, num_samples, Rate)[(num_samples - 1):])

    self.time_series = new_time_series
    return self
Exemplo n.º 20
0
def plot_cumulative_returns_by_quantile(quantile_returns, period=1, ax=None):
    """
    Plots the cumulative returns of various factor quantiles.

    When 'period' N is greater than 1 the cumulative returns plot is computed
    building and averaging the  cumulative returns of N interleaved portfolios
    (started at subsequent periods 1,2,3,...,N) each one rebalancing every N
    periods. This results in trading the factor at every value/signal computed
    by the factor and also the cumulative returns don't dependent on a specific
    starting date.

    Parameters
    ----------
    quantile_returns : pd.DataFrame
        Cumulative returns by factor quantile.
    period: int, optional
        Period over which the daily returns are calculated
    ax : matplotlib.Axes, optional
        Axes upon which to plot.

    Returns
    -------
    ax : matplotlib.Axes
    """

    if ax is None:
        f, ax = plt.subplots(1, 1, figsize=(18, 6))

    ret_wide = quantile_returns.reset_index()\
        .pivot(index='date', columns='factor_quantile', values=period)

    if period > 1:
        # build N portfolios each rebalancing every N periods and average them
        ret_wide = pd.rolling_apply(
            ret_wide,
            period,
            # rate of 1 period returns
            lambda ret, period: ((np.nanmean(ret) + 1)**(1. / period)) - 1,
            min_periods=1,
            args=(period,))

    cum_ret = ret_wide.add(1).cumprod()
    cum_ret = cum_ret.loc[:, ::-1]

    cum_ret.plot(lw=2, ax=ax, cmap=cm.RdYlGn_r)
    ax.legend()
    ymin, ymax = cum_ret.min().min(), cum_ret.max().max()
    ax.set(ylabel='Log Cumulative Returns',
           title='''Cumulative Return by Quantile
                    ({} Period Forward Return)'''.format(period),
           xlabel='',
           yscale='symlog',
           yticks=np.linspace(ymin, ymax, 5),
           ylim=(ymin, ymax))

    ax.yaxis.set_major_formatter(ScalarFormatter())
    ax.axhline(1.0, linestyle='-', color='black', lw=1)

    return ax
Exemplo n.º 21
0
def find_denominator(df, col):
    # Finds the approximate denominator used for scaling 
    # (used to undo feature scaling)
    # Credit : http://bit.ly/1RV4w0y
    vals = df[col].dropna().sort_values().round(8)
    vals = pd.rolling_apply(vals, 2, lambda x: x[1] - x[0])
    vals = vals[vals > 0.000001]
    return vals.value_counts().idxmax()
Exemplo n.º 22
0
 def expected_ewmstd(self, window_length, decay_rate):
     alpha = 1 - decay_rate
     span = (2 / alpha) - 1
     return rolling_apply(
         self.raw_data,
         window_length,
         lambda window: ewmstd(window, span=span)[-1],
     )[window_length:]
Exemplo n.º 23
0
def blended_rolling_apply(series, window=2, fun=pd.np.mean):
    new_series = pd.Series(np.fromiter((fun(series[:i+1]) for i in range(window - 1)), type(series.values[0])), index=series.index[:window - 1]).append(
        pd.rolling_apply(series.copy(), window, fun)[window - 1:])
    assert len(series) == len(new_series), ( 
            "blended_rolling_apply should always return a series of the same length! len(series) = {0} != {1} = len(new_series".format(
                len(series), len(new_series)))
    assert not any(np.isnan(val) or val is None for val in new_series)
    return new_series
Exemplo n.º 24
0
def computeInterpStep(df):
    steps = []
    grouped = df.groupby(['race_id','User','laccid'])
    for ix,group in grouped:
        step = np.mean(pd.rolling_apply(group['ratio'],2,np.diff))
        steps.append(step)
    interpStep = np.mean(steps)
    return interpStep
Exemplo n.º 25
0
def rolling_apply(df, lookback, fn):
    index_series = pd.Series(range(len(df)))
    result = pd.rolling_apply(
        index_series, lookback, 
        lambda ii: _window_apply(df, ii, fn))
    
    result.index = df.index
    return result
Exemplo n.º 26
0
def find_denominator(df, col):
    """
    Function that trying to find an approximate denominator used for scaling.
    So we can undo the feature scaling.
    """
    vals = df[col].dropna().sort_values().round(8)
    vals = pd.rolling_apply(vals, 2, lambda x: x[1] - x[0])
    vals = vals[vals > 0.000001]
    return vals.value_counts().idxmax()
Exemplo n.º 27
0
 def calculate_indicator(self):
     yesterday_value = lambda x: x[0]
     today_value = lambda x: x[1]
     previous_close = pd.rolling_apply(
                         np.array(self.stock.close_prices(),dtype=np.float64),
                         2, yesterday_value)
     current_high = pd.rolling_apply(
                         np.array(self.stock.high_prices(),dtype=np.float64),
                         2, today_value)
     current_low = pd.rolling_apply(
                         np.array(self.stock.low_prices(),dtype=np.float64),
                         2, today_value)
     true_high = map( (lambda x,y: max(x,y)), previous_close, current_high )
     true_low = map( (lambda x,y: min(x,y)), previous_close, current_low )
     true_ranges = np.array(true_high,dtype=np.float64) \
                     - np.array(true_low,dtype=np.float64)
             
     return pd.rolling_mean(true_ranges, self.window).tolist()
Exemplo n.º 28
0
def compute_sign_change_cnt(series):
    """
    method for computing sign change counts of time series data
    :return: feature value
    """
    if series is None or len(series) == 0:
        return np.nan
    sc_series = series[pd.rolling_apply(series, 2, lambda x: (x[0] > 0 > x[1]) or (x[0] < 0 < x[1])) > 0]
    return len(sc_series)
Exemplo n.º 29
0
    def calculate_indicator(self, label):
        prices = np.array(self.stock.get_data(label), dtype=np.float64)
        last = lambda x: x[-1]
        prices_last = pd.rolling_apply(prices, self.window, last)
        
        first = lambda x: x[0]
        prices_first = pd.rolling_apply(prices, self.window, first)
        
        direction = prices_last - prices_first
        direction[direction>0]=100
        direction[direction<0]=-100
        
        res = direction.astype(str)
        res[res=='100.0']='up'
        res[res=='-100.0']='down'
        res[res=='0.0']='flat'

        return res.tolist()
Exemplo n.º 30
0
def plot_score(ax, series, labels, colors, ylabel):
    """Score plot where score is calculated as 90th percentile. Quite useful
    for trends and dips analysis."""
    ax.set_ylabel("Percentile of score ({})".format(ylabel))
    ax.set_xlabel("Time elapsed, sec")
    for s, label, color in zip(series, labels, colors):
        scoref = lambda x: stats.percentileofscore(x, s.quantile(0.9))
        rolling_score = pd.rolling_apply(s, min(len(s) / 15, 40), scoref)
        ax.plot(s.index, rolling_score, label=label, color=color)
        plt.ylim(ymin=0, ymax=105)
Exemplo n.º 31
0
print(close_px)
spx_px = close_px_all['SPX']
spx_rets = spx_px / spx_px.shift(1) - 1
returns = close_px.pct_change()
corr = pd.rolling_corr(returns.AAPL, spx_rets, 125, min_periods=100)
corr.plot()
plt.show()

corr = pd.rolling_corr(returns, spx_rets, 125, min_periods=100)
corr.plot()
plt.show()

from scipy.stats import percentileofscore

score_at_2percent = lambda x: percentileofscore(x, 0.02)
result = pd.rolling_apply(returns.AAPL, 250, score_at_2percent)
result.plot()
plt.show()
'''
print('-------------------------')
# 时序案例分析
# 参数初始化
discfile = 'arima_data.xls'
forecastnum = 5

# 读取数据,指定日期列为指标,Pandas自动将“日期”列识别为Datetime格式
data = pd.read_excel(discfile, index_col=u'日期')
data = pd.DataFrame(data, dtype=np.float64)
print('data : ', data)

# 时序图
Exemplo n.º 32
0
 def getSpeeds(self, dfTrack):
     dfTrack['SpeedX'] = pd.rolling_apply(dfTrack["AvgPosX"], 2, self.getLastDiff);
     dfTrack['SpeedY'] = pd.rolling_apply(dfTrack["AvgPosY"], 2, self.getLastDiff);
     dfTrack['Speed'] = np.sqrt(dfTrack['SpeedX']**2 + dfTrack['SpeedY']**2);
     return dfTrack;
Exemplo n.º 33
0
def rolling_profit_count(
    dataframe
):  # function returns sum of count of number of profit (+1) and loss trades (-1)
    count = 0
    for profit in dataframe:
        if profit >= 0:
            count = count + 1
        else:
            count = count - 1
    return count


# Add Rolling stats to Order DF
window = 15
df_ord['Roll_Profit_Count'] = pd.rolling_apply(df_ord['Profit'], window,
                                               rolling_profit_count, 1)
df_ord['Roll_Mean'] = pd.rolling_mean(df_ord['Profit'], window)
df_ord['Roll_std'] = pd.rolling_std(df_ord['Profit'], window)
df_ord['Roll_var'] = pd.rolling_var(df_ord['Profit'], window)

# Create Trend Ranges, based on visual inspection of previous graph, and add Trends to Order dataframe
trend_range = [0, 6, 33, 60, 77, 86, 150, 171, 207, 222, 271, 314, 331, 348]
trend_labels = [
    'UP1', 'FLAT1', 'DOWN1', 'UP2', 'DOWN2', 'FLAT2', 'DOWN3', 'UP3', 'DOWN4',
    'UP4', 'DOWN5', 'UP6', 'FLAT3'
]

df_ord['Trend'] = pd.cut(df_ord.Ticket, trend_range,
                         labels=trend_labels).astype('category')

# Order Dataframe with Trends added
Exemplo n.º 34
0
 def decay_linear(self, x, n):
     return pd.rolling_apply(x, n, self.decay_linear_array)
Exemplo n.º 35
0
 def getAccelerations(self, dfTrack):
     dfTrack['AccX'] = pd.rolling_apply(dfTrack["SpeedX"], 2, self.getLastDiff);
     dfTrack['AccY'] = pd.rolling_apply(dfTrack["SpeedY"], 2, self.getLastDiff);
     dfTrack['Acc'] = np.sqrt(dfTrack['AccX']**2 + dfTrack['AccY']**2);
     return dfTrack;
Exemplo n.º 36
0
 def AVEDEV(self, param):
     return pd.rolling_apply(param[0], param[1],
                             lambda x: pd.DataFrame(x).mad())
Exemplo n.º 37
0
 def decay_exp(self, x, f, n):
     return pd.rolling_apply(x, n, self.decay_exp_array, args=[f])
Exemplo n.º 38
0
def ts_compoundFn(arr, min_periods, max_periods):
    if not (max_periods): max_periods = len(arr)
    return pd.rolling_apply(arr,
                            max_periods,
                            lambda arr: (1 + arr).prod() - 1,
                            min_periods=min_periods)
Exemplo n.º 39
0
data = data.sort(["id","trade_date"]).reset_index(drop=True)
data = data[data.trade_date.isin(np.arange(start_date, end_date , dtype='datetime64[D]'))]
data["trade_biweek"] = [ x.year * 100 + int(datetime.datetime.strftime(x,"%U"))/2 for x in data.trade_date ]
data_grouped = data.groupby(["id","trade_biweek"])
data['loss'] = data_grouped.accu_value.apply(lambda x: pd.expanding_apply(x,lambda y: (y[-1]/(np.max(y)))-1))
data['biggest_loss'] = data.loc[data_grouped.loss.idxmin(),'loss']
data['biggest_loss_day'] = data.loc[data_grouped.loss.idxmin(),'trade_date']

data_result = pd.DataFrame()
data_result['biweek_first_date'] = data_grouped.trade_date.first()
data_result['biweek_last_date'] = data_grouped.trade_date.last()
data_result['biweek_start_value'] = data_grouped.accu_value.first()

data_result['biweek_last_value'] = data_grouped.accu_value.last()
data_result['earning1'] = (data_result.biweek_last_value / data_result.biweek_start_value ) - 1
data_result['earning2'] = pd.concat([ pd.rolling_apply(v.biweek_last_value,2,lambda x:(x[1]/x[0])-1) for k,v in data_result.reset_index(level=0).groupby(["id"])]).values

data_result['earning'] = np.where(pd.isnull(data_result['earning2']), data_result['earning1'], data_result['earning2'])

data['rtn'] = data.groupby(['id']).apply(lambda y:pd.rolling_apply(y['accu_value'],2,lambda x:(x[1]/x[0])-1)).values

#it's a hacking,needa fix
#data['rtn'] = data.rtn.fillna(0)

data_result['volatility'] = data_grouped.rtn.std()
data_result['win_days'] = data[data.rtn>= 0].groupby(["id","trade_biweek"]).rtn.count()
data_result['lose_days'] = data[data.rtn< 0].groupby(["id","trade_biweek"]).rtn.count()

data_result['biggest_loss_day'] = data.set_index(["id","trade_biweek"]).biggest_loss_day.dropna()
data_result['biggest_loss'] = data.set_index(["id","trade_biweek"]).biggest_loss.dropna()
Exemplo n.º 40
0
def ts_geomeanFn(arr, min_periods, max_periods):
    if not (max_periods): max_periods = len(arr)
    return pd.rolling_apply(arr,
                            max_periods,
                            lambda arr: (1 + arr).prod()**(1 / len(arr)) - 1,
                            min_periods=min_periods)
Exemplo n.º 41
0
def factor_return_rnn_predictor(df_factor_return_,
                                start_date=datetime(2006, 5, 1),
                                look_back=10,
                                rnn=None,
                                type='gru',
                                num_internal_projection=4,
                                dropout_probability=0.2,
                                init='he_uniform',
                                loss='mse',
                                optimizer='rmsprop',
                                nb_epoch=20,
                                batch_size=10,
                                save_to_csv=None,
                                train_freq="Monthly",
                                train_period=None,
                                verbosity=False):
    if train_freq == "Monthly":
        offset_begin = MonthBegin()
        offset_end = MonthEnd()
    else:
        raise ValueError('Frequency not implemented yet')

    df_factor_return = deepcopy(df_factor_return_).sort_index()
    predict_start = start_date + DateOffset(days=0)
    predict_end = predict_start + offset_end
    last_day = df_factor_return.index[-1]
    predict_df_list = []
    if rnn is None:
        rnn = {}
        for factor in df_factor_return.columns:
            rnn[factor] = RNN(look_back,
                              type=type,
                              num_internal_projection=num_internal_projection,
                              dropout_probability=dropout_probability,
                              init=init,
                              loss=loss,
                              optimizer=optimizer)

    while predict_start < last_day:
        print(predict_start)

        if train_period is None:
            df_train = df_factor_return[df_factor_return.index < predict_start]
        else:
            df_train = df_factor_return[
                (df_factor_return.index < predict_start)
                & (df_factor_return.index >=
                   (predict_start - DateOffset(days=train_period)))]
            if verbosity:
                print("train from")
                print(df_train.index[0])
                print("to")
                print(df_train.index[-1])

        df_predict = pd.concat([
            df_train.ix[-(look_back - 1):],
            df_factor_return[(df_factor_return.index >= predict_start)
                             & (df_factor_return.index <= predict_end)]
        ]).sort_index()

        df_res = pd.DataFrame(index=df_predict.index)

        for factor in df_predict.columns:
            factor_series = df_train[[factor]].as_matrix()
            X_train, Y_train, X_predict = series_to_matricise(
                factor_series, look_back, True)
            X_train = X_train.reshape([X_train.shape[0], 1, X_train.shape[1]])
            rnn[factor].train(X_train,
                              Y_train,
                              nb_epoch=nb_epoch,
                              batch_size=batch_size)
            rnn_func = lambda factor_series: rnn[factor].predict(
                factor_series.reshape([1, 1, look_back]))
            df_res[factor] = pd.rolling_apply(df_predict[factor], look_back,
                                              rnn_func)

        predict_df_list.append(df_res.dropna(axis=0))
        predict_start = predict_end + offset_begin
        predict_end = predict_start + offset_end

    return pd.concat(predict_df_list).sort_index()
Exemplo n.º 42
0
data_grouped = data.groupby(["id", "trade_biweek"])
data['loss'] = data_grouped.accu_value.apply(
    lambda x: pd.expanding_apply(x, lambda y: (y[-1] / (np.max(y))) - 1))
data['biggest_loss'] = data.loc[data_grouped.loss.idxmin(), 'loss']
data['biggest_loss_day'] = data.loc[data_grouped.loss.idxmin(), 'trade_date']

data_result = pd.DataFrame()

data_result['biweek_start_value'] = data_grouped.accu_value.first()

data_result['biweek_last_value'] = data_grouped.accu_value.last()
data_result['earning1'] = (data_result.biweek_last_value /
                           data_result.biweek_start_value) - 1
data_result['earning2'] = pd.concat([
    pd.rolling_apply(v.biweek_last_value, 2, lambda x: (x[1] / x[0]) - 1)
    for k, v in data_result.reset_index(level=0).groupby(["id"])
]).values

#data_result['gain2'] = pd.rolling_apply(data_result['last'],2,lambda x:(x[1]/x[0])-1)

data_result['earning'] = np.where(pd.isnull(data_result['earning2']),
                                  data_result['earning1'],
                                  data_result['earning2'])

data['rtn'] = data.groupby(
    ['id']).apply(lambda y: pd.rolling_apply(y['accu_value'], 2, lambda x:
                                             (x[1] / x[0]) - 1)).values

data_result['rtn_std'] = data_grouped.rtn.std()
Exemplo n.º 43
0
import time

array_size = 100000
foo = pd.DataFrame(np.random.uniform(size=array_size))
window_size = 50
# stupid floating point arithmatic messing up the scipy version comparison
epsilon = 0.00000001

start = time.time()
a = roll_rank(foo[0].values, window_size, window_size, 0.8)
end = time.time()
print('cython ranking: {0} seconds', end - start)


def precentile_of_score(array):
    return stats.percentileofscore(array, 0.8)


start = time.time()
# adjust from percentile to rank
adj_factor = window_size / 100
b = pd.rolling_apply(foo[0], window_size, precentile_of_score) * adj_factor
end = time.time()
print('percentileofscore ranking: {0} seconds', end - start)

bar = pd.DataFrame(abs(a - b) < epsilon)
if np.count_nonzero(bar[0]) == array_size - window_size + 1:
    print('Correct number of equal values')
else:
    print('Incorrect number of equal values')
Exemplo n.º 44
0
 def product(self, x, n):
     return pd.rolling_apply(x, n, np.product)