Пример #1
0
def BBands(Symbols, d_data):
    #df_close = d_data['close']
    df_close = d_data['close']


    # Creating an empty dataframe
    temp_data_set = copy.deepcopy(df_close)

    for symbol in Symbols:


        temp_data_set[symbol+'20d_ma'] = pd.rolling_mean(temp_data_set[symbol], window=20)
        temp_data_set[symbol+'50d_ma'] = pd.rolling_mean(temp_data_set[symbol], window=50)

        temp_data_set[symbol+'Bol_upper'] = pd.rolling_mean(temp_data_set[symbol], window=20) + 2 * pd.rolling_std(temp_data_set[symbol], 20, min_periods=20)
        temp_data_set[symbol+'Bol_lower'] = pd.rolling_mean(temp_data_set[symbol], window=20) - 2 * pd.rolling_std(temp_data_set[symbol], 20, min_periods=20)

        #bolinger Widths
        #temp_data_set[symbol+'Bol_BW'] = ((temp_data_set[symbol+'Bol_upper'] - temp_data_set[symbol+'Bol_lower']) / temp_data_set[symbol+'20d_ma']) * 100
        #temp_data_set[symbol+'Bol_BW_200MA'] = pd.rolling_mean(temp_data_set[symbol+'Bol_BW'], window=50)  # cant get the 200 daa
        #temp_data_set[symbol+'Bol_BW_200MA'] = temp_data_set[symbol+'Bol_BW_200MA'].fillna(method='backfill')  ##?? ,may not be good
    #'To convert present value of Bollinger bands into -1 to 1:'
        temp_data_set[symbol+'BB_norm']= 2 * (temp_data_set[symbol]-temp_data_set[symbol+'Bol_lower'])/ (temp_data_set[symbol+'Bol_upper']-temp_data_set[symbol+'Bol_lower'])-1
    #boll_val = 2 * ((current_price - lower_band) / (upper_band - lower_band)) - 1
    #temp_data_set.plot(x=temp_data_set.index, y=[symbol, symbol + '20d_ma', symbol + 'Bol_upper', symbol + 'Bol_lower'])
        temp_data_set.plot(x=temp_data_set.index, y=[symbol+'BB_norm'])
        plt.show()


    temp_data_set.to_csv('/Users/jcovino/Desktop/dataSet.csv')
Пример #2
0
def collection_freq(breath_df, win):
    print(breath_df.columns)
    for ds_type in ['ds', 'pl', 'pvt', 'ie']:
        breath_df['{0}_rolling'.format(ds_type)] = pd.rolling_sum(breath_df['analysis.' + ds_type], window = 60 * win,
                                                                  center = True, min_periods = 1)
        breath_df[ds_type + '_tot_rolling'] = pd.rolling_count(breath_df['analysis.' + ds_type], window = 60 * win,
                                                               center = True)
        breath_df[ds_type + '_freq'] = breath_df[ds_type + '_rolling'] / breath_df[ds_type + '_tot_rolling']

    # add rolling average for Fio2, PEEP, p_mean
    try:
        breath_df['peep_rolling'] = pd.rolling_mean(breath_df['vent_settings.PEEP'], window = 60 * win,
                                                    center = True, min_periods = 1)
    except KeyError:
        pass

    try:
        breath_df['p_mean_rolling'] = pd.rolling_mean(breath_df['vent_settings.p_mean'], window = 60 * win,
                                                      center = True, min_periods = 1)
    except KeyError:
        pass

    try:
        breath_df['fio2_rolling'] = pd.rolling_mean(breath_df['vent_settings.FiO2'], window = 60 * win,
                                                    center = True, min_periods = 1)
    except KeyError:
        pass

    return breath_df
Пример #3
0
 def getFeatures(self): 
     #print 1
     self.features = pd.DataFrame(self.acc2.copy())
     self.features.columns = ['acc2']
     self.features['acc'] = np.asarray(pd.rolling_mean(self.acc.val,window=2).ix[1:])
     #self.features['cacc'] = np.asarray(pd.rolling_mean(self.cacc.val,window=2).ix[1:])
     self.features['v'] = np.asarray(pd.rolling_mean(self.speed.val,window=3).ix[2:])
Пример #4
0
def rsi(prices, params={"window": 14}):
    """
    Calculate the RSI indicator.

    Parameters
    ----------
    prices: DataFrame
    params: dict

    Returns
    ----------
    rsi_val: DataFrame
    """
    window = params["window"]
    close = prices["Close"]

    delta = close - close.shift(1)  # the difference between rows
    gain = delta.copy()
    lose = delta.copy()
    gain[gain < 0] = 0
    lose[lose > 0] = 0

    rs = pd.rolling_mean(gain, window) / abs(pd.rolling_mean(lose, window))
    rsi_val = 100 - 100 / (1 + rs)
    return pd.DataFrame(rsi_val.values, index=prices.index, columns=["RSI"])
Пример #5
0
def getBackgroundKnowledge(df, periods):
    logging.info('Background knowledge: retrieving...')

    # HLC
    hlc = df.apply(lambda x: (x['high'] + x['low'] + x['close']) / 3, axis=1)

    for x in periods:
        avg_x = pd.rolling_mean(hlc, x)
        avg_x_yesterday = avg_x.shift(+1)
        df['ma_{0}_bullish'.format(x)] = avg_x >= avg_x_yesterday
        avg_x_delta = abs(avg_x - avg_x_yesterday)
        avg_x_delta_yesterday = avg_x_delta.shift(+1)
        df['ma_{0}_divergence'.format(x)] = avg_x_delta >= avg_x_delta_yesterday
        df['ma_{0}_magnitude'.format(x)] = avg_x_delta > avg_x_delta.mean()

    for x in periods:
        for y in periods:
            if y <= x:
                continue
            logging.info('MA for {0} and {1}'.format(x, y))
            avg_x = pd.rolling_mean(hlc, x)
            avg_y = pd.rolling_mean(hlc, y)
            df['ma_{0}_crossover_{1}_bullish'.format(x, y)] = avg_x >= avg_y

            ma_diff = avg_x - avg_y
            avg_x_yesterday = avg_x.shift(+1)
            avg_y_yesterday = avg_y.shift(+1)
            ma_diff_yesterday = avg_x_yesterday - avg_y_yesterday
            df['ma_{0}_crossover_{1}_divergence'.format(x, y)] = ma_diff >= ma_diff_yesterday
            df['ma_{0}_crossover_{1}_magnitude'.format(x, y)] = ma_diff >= ma_diff.mean()

    logging.info('Background knowledge: retrieved')
    return df
Пример #6
0
    def calculate_ma(self, config, *args, **kwargs):

        """
        Calculate the moving average on the three periods choosen by the user

        :param config: configuration saved
        """

        # get period of moving average
        ma1_period = config["ma1_curve"]["period"]
        ma2_period = config["ma2_curve"]["period"]
        ma3_period = config["ma3_curve"]["period"]
        what_to_plot = config["ticks_curve"]["data"]

        lenght = config["ticks_curve"]["lenght"]    # actual curve lenght

        # calculate moing average
        self.ticks["ma1_curve"] = pd.rolling_mean(self.ticks[what_to_plot], ma1_period)
        self.ticks["ma2_curve"] = pd.rolling_mean(self.ticks[what_to_plot], ma2_period)
        self.ticks["ma3_curve"] = pd.rolling_mean(self.ticks[what_to_plot], ma3_period)

        # get only data within the chart lenght
        ma1_data = self.ticks["ma1_curve"].values[-lenght:]
        ma2_data = self.ticks["ma2_curve"].values[-lenght:]
        ma3_data = self.ticks["ma3_curve"].values[-lenght:]
        indexes  = self.ticks.index.values[-lenght:]

        # dict containing ma data.
        dict_ma = {"ma1_curve": (indexes, ma1_data),
                   "ma2_curve": (indexes, ma2_data),
                   "ma3_curve": (indexes, ma3_data)
                   }

        return(dict_ma)
Пример #7
0
 def test_multiple_talib_with_args(self):
     zipline_transforms = [ta.MA(timeperiod=10),
                           ta.MA(timeperiod=25)]
     talib_fn = talib.abstract.MA
     algo = TALIBAlgorithm(talib=zipline_transforms)
     algo.run(self.source)
     # Test if computed values match those computed by pandas rolling mean.
     sid = 0
     talib_values = np.array([x[sid] for x in
                              algo.talib_results[zipline_transforms[0]]])
     np.testing.assert_array_equal(talib_values,
                                   pd.rolling_mean(self.panel[0]['price'],
                                                   10).values)
     talib_values = np.array([x[sid] for x in
                              algo.talib_results[zipline_transforms[1]]])
     np.testing.assert_array_equal(talib_values,
                                   pd.rolling_mean(self.panel[0]['price'],
                                                   25).values)
     for t in zipline_transforms:
         talib_result = np.array(algo.talib_results[t][-1])
         talib_data = dict()
         data = t.window
         # TODO: Figure out if we are clobbering the tests by this
         # protection against empty windows
         if not data:
             continue
         for key in ['open', 'high', 'low', 'volume']:
             if key in data:
                 talib_data[key] = data[key][0].values
         talib_data['close'] = data['price'][0].values
         expected_result = talib_fn(talib_data, **t.call_kwargs)[-1]
         np.testing.assert_allclose(talib_result, expected_result)
Пример #8
0
def plot(name):
        name = str(name)
        data_ext = YFHistDataExtr()
        data_ext.set_interval_to_retrieve(200)
        data_ext.set_multiple_stock_list([name])
        data_ext.get_hist_data_of_all_target_stocks()
        # convert the date column to date object
        data_ext.all_stock_df['Date'] =  pandas.to_datetime( data_ext.all_stock_df['Date'])
        temp_data_set = data_ext.all_stock_df.sort('Date', ascending=True)

        temp_data_set['20d_ma'] = pandas.rolling_mean(temp_data_set['Adj Close'], window=20)
        temp_data_set['50d_ma'] = pandas.rolling_mean(temp_data_set['Adj Close'], window=50)
        temp_data_set['Bol_upper'] = pandas.rolling_mean(temp_data_set['Adj Close'], window=20) + 2* pandas.rolling_std(temp_data_set['Adj Close'], 20, min_periods=20)
        temp_data_set['Bol_lower'] = pandas.rolling_mean(temp_data_set['Adj Close'], window=20) - 2* pandas.rolling_std(temp_data_set['Adj Close'], 20, min_periods=20)
        temp_data_set['Bol_BW'] = ((temp_data_set['Bol_upper'] - temp_data_set['Bol_lower'])/temp_data_set['20d_ma'])*100
        temp_data_set['Bol_BW_200MA'] = pandas.rolling_mean(temp_data_set['Bol_BW'], window=50)
        temp_data_set['Bol_BW_200MA'] = temp_data_set['Bol_BW_200MA'].fillna(method='backfill')
        temp_data_set['20d_exma'] = pandas.ewma(temp_data_set['Adj Close'], span=20)
        temp_data_set['50d_exma'] = pandas.ewma(temp_data_set['Adj Close'], span=50)
        data_ext.all_stock_df = temp_data_set.sort('Date', ascending = False)

        data_ext.all_stock_df.plot(x='Date', y=['Adj Close', '20d_ma', '50d_ma', 'Bol_upper', 'Bol_lower'])

        cur_path = os.path.dirname(os.path.abspath(__file__))
        #cur_path = os.path.join(cur_path, "app/analysis")
        data_path = os.path.join(cur_path, "raw_stock_data")
        print cur_path
        img_path = os.path.join(cur_path, "static/img")
        history_img_path = os.path.join(img_path, "history.png")
        dividend_img_path = os.path.join(img_path, "dividend.png")
        plt.savefig(history_img_path)
        data_ext.all_stock_df.plot(x='Date', y=['Bol_BW','Bol_BW_200MA'])
        plt.savefig(dividend_img_path)

        return render_template('analysis.html')
Пример #9
0
def rollingMean(ms):

    pm = pd.DataFrame(ms.reshape(-1, 9))
    nms = pd.rolling_mean(pm, 3, min_periods=1, center=True).values.reshape(-1, 3, 3)
    mtxRs = np.array(map(lambda x: cv2.RQDecomp3x3(x)[1], ms))
    mtxQs = np.array(map(lambda x: cv2.RQDecomp3x3(x)[2], ms))
    nms = []
    n, _, _ = mtxRs.shape
    params = []
    for i in range(n):
        r = mtxRs[i]
        q = mtxQs[i]
        theta = -np.arcsin(q[0, 1])
        dx = r[0, 2]
        dy = r[1, 2]
        focus = r[1, 1]
        shear = r[0, 1]
        ratio = r[0, 0] / focus
        params.append([focus, shear, dx, theta, ratio, dy, q[2, 0], q[2, 1]])

    df = pd.DataFrame(params, columns=["focus", "shear", "dx", "theta", "ratio", "dy", "p1", "p2"])
    nmsdf = pd.rolling_mean(df, 10, min_periods=1, center=True).values
    nms = []
    for focus, shear, dx, theta, ratio, dy, p1, p2 in df.values:
        r = np.array([[ratio * focus, shear, dx], [0, focus, dy], [0, 0, 1]])
        q = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [p1, p2, 1]])
        nms.append(np.dot(r, q))

    df.plot(subplots=True, layout=(3, 3))
    # nms.plot(subplots=True, layout=(3, 3))
    plt.show()
    return nms
Пример #10
0
def TimeSeries(df,stock_name,VARS,signal=None,longIN=None,longOUT=None,shortIN=None,shortOUT=None,
                         suptitle ='',fig_fn=None,date1=None,date2=None,mean_per=1,ymin=None,ymax=None,
                         includeTrades=False,includeTradesEarnings=False,includeSignals=False):
                             #for most stacked graphs enforce ymin=ymax=None as different scales apply
                    
    """Multiple Windows, shared X-axis. perfect for time series.
    
    This function allows to graphs as many variables (in vector VARS) on the shared X-axis
    but in separate windows, hence allowing to juxtaposition different varibles which cannot be
    compared well on the same Y-axis.

    the graphs have rolling mean plotted on top of each statistic.    
    """
    fig,axes = plt.subplots(nrows=len(VARS),ncols=1,sharex=True,sharey=False)
    fig.subplots_adjust(hspace=0.15)
    #fig.set_size_inches(25.5,20.5)
    
    suptitle = form_suptitle(suptitle,date1,date2)
    fig.suptitle(suptitle,ha='center', va='center',fontsize=10,color="#FF3300") 
    
    for f in range(len(VARS)):  
        if len(VARS) == 1:
            ax = axes
        else:
            ax = axes[f]
        t_title = VARS[f] 

        df[t_title].plot(kind='line',color=COLORS[f],ax=ax,alpha=0.9,label=t_title)

        if mean_per > 1 and t_title.find("Close") < 0:
            pd.rolling_mean(df[t_title],mean_per).plot(ax=ax,color=COLORS[f],linewidth=2,label=str(mean_per)+"D rolling mean") #color="#52A3CC",alpha=0.70,linewidth=1.9,style="k--"
        
        if ("RelRet" in t_title) or ("RawRet" in t_title):
            add_range_lines(ax,df,t_title)        

        if includeTradesEarnings:
            plot_TradesEarnings(df,ax)        

        if includeSignals and signal!=None:
            plotSignals(df,signal,ax,shortIN=shortIN,longIN=longIN,shortOUT=shortOUT,longOUT=longOUT)

        if includeTrades:
            plotTrades(df,ax)


        if f == len(VARS)-1:
            format_plot(ax,"Date",title=t_title,use_legends=False)        
        else:
            format_plot(ax,"",title=t_title,use_legends=False) 
        if ymin != None:
            ax.axes.set_ylim(ymin,ymax)    
        if t_title.lower() in ["oorelret","ccrelret"]:
            ax.axes.set_ylim(-0.06,0.06)
        if t_title.lower() in ["oorelret(3d avg)","ccrelret(3d avg)"]:
            ax.axes.set_ylim(-0.03,0.03)
                  
    if fig_fn != None:
        plt.savefig(fig_fn)  
    else:
        plt.show()         
def handle_data(account):                  # 每个交易日的买入卖出指令
    hist = account.get_attribute_history('closePrice',window_long)
    fund = universe_tuple[0]
    today = account.current_date
    preday100 = today + timedelta(days = -100)
    yestoday = today + timedelta(days = -100)
    
    #yestoday 使用today会使用未来数据;更改这个后,maIndexShort.values[-1]可以使用;
    cIndex = DataAPI.MktIdxdGet(ticker='399006',beginDate=preday100,endDate=yestoday,field=["tradeDate","closeIndex"],pandas="1")
    
    maIndexShort  = np.round(pd.rolling_mean(cIndex['closeIndex'],window=window_short),2)
    maIndexLong  = np.round(pd.rolling_mean(cIndex['closeIndex'],window=window_long),2)
    
    #maIndexShort.values[-1] 就会使用未来的数据 (不再有效)
    if maIndexShort.values[-1]>= maIndexLong.values[-1]:
        if account.position.secpos.get(fund, 0) == 0:
            # *1.02 为了防跳空高开,买不到那么多的头寸
            approximationAmount = int(account.cash/(hist[universe_tuple[0]][-1]*1.02)/100.0) * 100
            order(universe_tuple[0],approximationAmount)
    elif maIndexShort.values[-1] < maIndexLong.values[-1]:
        if account.position.secpos.get(fund, 0) > 0:
            order_to(universe_tuple[0],0)
    else :
        if isnan(maIndexShort.values[-1]) or isnan(maIndexLong.values[-1]) :
            print 'Warning : MA is NaN.'
        pass
Пример #12
0
    def generate_signals_MA(self):
        """Returns the DataFrame of symbols containing the signals
        to go long, short or hold (1, -1 or 0)."""
        signals = pd.DataFrame(index=self.bars.index)
        signals['signal'] = 0.0
        signals['tradesignal'] = 0.0
        signals['Longshortstatues'] = 0.0
        signals[self.pair[0]] = 0.0
        signals[self.pair[1]] = 0.0
        #create signal 
        AtoB = self.generate_AtoB()
        short_window = 10
        long_window = 30 
        signals['short_mavg'] = pd.rolling_mean( AtoB['A/B'], short_window, min_periods=1)
        signals['long_mavg'] = pd.rolling_mean( AtoB['A/B'], long_window, min_periods=1)       
        signals['signal'][short_window:] = np.where(signals['short_mavg'][short_window:] 
            > signals['long_mavg'][short_window:], 1.0, 0.0)   

        # Take the difference of the signals in order to generate actual trading orders
        signals['tradesignal'] = signals['signal'].diff()         
        
        # generate signal for stock,this is trading signal not position
        
        signals[self.pair[0]] = signals['tradesignal']
        
        signals[self.pair[1]] = -signals['tradesignal'] * AtoB['MA']
        #the last one is not good, deal it when generat position set last one to zero
        return signals.loc[:,self.pair]
Пример #13
0
    def test_dollar_volume(self):
        results = self.engine.run_pipeline(
            Pipeline(
                columns={
                    "dv1": AverageDollarVolume(window_length=1),
                    "dv5": AverageDollarVolume(window_length=5),
                    "dv1_nan": AverageDollarVolume(
                        window_length=1, inputs=[USEquityPricing.open, USEquityPricing.volume]
                    ),
                    "dv5_nan": AverageDollarVolume(
                        window_length=5, inputs=[USEquityPricing.open, USEquityPricing.volume]
                    ),
                }
            ),
            self.dates[5],
            self.dates[-1],
        )

        expected_1 = (self.raw_data[5:] ** 2) * 2
        assert_frame_equal(results["dv1"].unstack(), expected_1)

        expected_5 = rolling_mean((self.raw_data ** 2) * 2, window=5)[5:]
        assert_frame_equal(results["dv5"].unstack(), expected_5)

        # The following two use USEquityPricing.open and .volume as inputs.
        # The former uses self.raw_data_with_nans, and the latter uses
        # .raw_data * 2.  Thus we multiply instead of squaring as above.
        expected_1_nan = (self.raw_data_with_nans[5:] * self.raw_data[5:] * 2).fillna(0)
        assert_frame_equal(results["dv1_nan"].unstack(), expected_1_nan)

        expected_5_nan = rolling_mean((self.raw_data_with_nans * self.raw_data * 2).fillna(0), window=5)[5:]
        assert_frame_equal(results["dv5_nan"].unstack(), expected_5_nan)
Пример #14
0
def test_run_0104_08():
    # Read Data
    dates = pd.date_range('2012-01-01', '2012-12-31')
    symbols = ['SPY']
    df = get_data(symbols, dates)
    
    # Plot SPY data, retain matplotlib axis object
    ax = df['SPY'].plot(title = 'SPY rolling mean', label = 'SPY')
    
    # Compute rolling mean using a 20-day window
    rm_SPY1 = pd.rolling_mean(df['SPY'], window = 20)
    rm_SPY2 = pd.rolling_mean(df['SPY'], window = 40)
    rm_SPY3 = pd.rolling_mean(df['SPY'], window = 60)
    #rm_SPY = pd.rolling_mean(df['SPY'], window = 40)
    #rm_SPY = pd.rolling_mean(df['SPY'], window = 60)
    
    # Add rolling mean to same plot
    rm_SPY1.plot(label='Rolling Mean', ax = ax)
    rm_SPY2.plot(label='Rolling Mean', ax = ax)
    rm_SPY3.plot(label='Rolling Mean', ax = ax)
    
    # Add axis labels and legend
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc = 'upper left')
    plt.show
Пример #15
0
def find_events_using_bollingerBandIndicator(ls_symbols, d_data):
    '''Find event using bollinger band'''
    df_close = d_data['close']
    ts_market = df_close['SPY']
    event_count = 0
    
    df_events = copy.deepcopy(df_close)
    df_events = df_events * np.NAN

    ldt_timestamps = df_close.index

    spyPrice = df_close['SPY']
    spyMean = pd.rolling_mean(spyPrice, 20)
    spyStd = pd.rolling_std(spyPrice, 20)
    spyBollinger = (spyPrice-spyMean)/spyStd

    for s_sym in ls_symbols:
        symprice = df_close[s_sym]
        mean = pd.rolling_mean(symprice, 20)
        std = pd.rolling_std(symprice, 20)
        bollingerVals = (symprice-mean)/std
        for i in range(1, len(ldt_timestamps)):
            if(bollingerVals.ix[ldt_timestamps[i]] <= -2.0 and bollingerVals.ix[ldt_timestamps[i-1]] >= -2.0 and spyBollinger.ix[ldt_timestamps[i]] >= 1.5):
                df_events[s_sym].ix[ldt_timestamps[i]] = 1
                event_count += 1

    print ("Total event number is %s."%(event_count))
    return df_events
Пример #16
0
def KELCH(df, n,ksgn='close'):
    '''
    def KELCH(df, n):  				#肯特纳通道(Keltner Channel,KC)
  肯特纳通道(KC)是一个移动平均通道,由叁条线组合而成(上通道、中通道及下通道)。
	KC通道,一般情况下是以上通道线及下通道线的分界作为买卖的最大可能性。
  	若股价於边界出现不沉常的波动,即表示买卖机会。    
    【输入】
        df, pd.dataframe格式数据源
        n,时间长度
        ksgn,列名,一般是:close收盘价
    【输出】    
        df, pd.dataframe格式数据源,
        增加了3栏:kc_m,中间数据
            kc_u,up上轨道数据
            kc_d,down下轨道数据
    '''
    xnam='kc_m'
    xnam2='kc_u'
    xnam3='kc_d'
    KelChM = pd.Series(pd.rolling_mean((df['high'] + df['low'] + df[ksgn]) / 3, n), name = xnam)  #'KelChM_' + str(n)
    KelChU = pd.Series(pd.rolling_mean((4 * df['high'] - 2 * df['low'] + df[ksgn]) / 3, n), name = xnam2)   #'KelChU_' + str(n)
    KelChD = pd.Series(pd.rolling_mean((-2 * df['high'] + 4 * df['low'] + df[ksgn]) / 3, n), name =xnam3)    #'KelChD_' + str(n)
    df = df.join(KelChM)  
    df = df.join(KelChU)  
    df = df.join(KelChD)  
    
    return df
Пример #17
0
    def getBuySignals( self, measurement, colName ):
        """na wejsciu data frame o zadanych przez InputSettings parametrach,
        na wyjsciu 0,1,-1 kiedy kupowac z kierunkiem - nie wiem czy bedziemy
        mieli takie algorytmy co beda w wyniku dawac sygnaly -1,1?
        """
        #print (self.df)
        self.df = self.df.append(Series(measurement[colName],index = ['a']))
        #print measurement.name
        #sys.exit(1)
        if self.df.shape[0] == self.bigAvg:
            curBig = rolling_mean(self.df, self.bigAvg)
            curSmall = rolling_mean(self.df[(self.bigAvg-self.smallAvg):], self.smallAvg)
#            print "1=========================="
#            print curBig[-1]
#            print curSmall[-1]
            
            if curBig[-1] < curSmall[-1]:
                self.df = self.df[1:]
                #return [self.getReturn(1), measurement.name]
                return self.getReturn(1)
            else:
                self.df = self.df[1:]
                #return [self.getReturn(-1), measurement.name]
                return self.getReturn(-1)
        else:
            return 0
def convert_test_runs_list_to_time_series_dict(test_runs_list, resample):
    test_runs = []
    for test_run in test_runs_list:
        tr = test_run.to_dict()
        # Populate dict
        start_time = test_run.start_time
        if start_time and test_run.start_time_microsecond:
            start_time = start_time.replace(
                microsecond=test_run.start_time_microsecond)
            tr['start_time'] = start_time
        tr.pop('start_time_microsecond')
        if test_run.stop_time:
            stop_time = test_run.stop_time
            if test_run.stop_time_microsecond:
                stop_time = stop_time.replace(
                    microsecond=test_run.stop_time_microsecond)
            tr['stop_time'] = stop_time
        tr['run_time'] = read_subunit.get_duration(start_time,
                                                   tr.pop('stop_time'))
        tr.pop('stop_time_microsecond')
        tr.pop('id')
        tr.pop('test_id')
        test_runs.append(tr)

    df = pd.DataFrame(test_runs).set_index('start_time')
    df.index = pd.DatetimeIndex(df.index)
    # Add rolling mean and std dev of run_time to datafram
    df['avg_run_time'] = pd.rolling_mean(df['run_time'], 20)
    df['stddev_run_time'] = pd.rolling_std(df['run_time'], 20)

    # Resample numeric data for the run_time graph from successful runs
    numeric_df = df[df['status'] == 'success'].resample(
        base.resample_matrix[resample], how='mean')
    # Drop duplicate or invalid colums
    del(numeric_df['run_id'])
    del(df['run_time'])
    # Interpolate missing data
    numeric_df['run_time'] = numeric_df.interpolate(method='time', limit=20)
    # Add rolling mean and std dev of run_time to datafram
    numeric_df['avg_run_time'] = pd.rolling_mean(numeric_df['run_time'], 20)
    numeric_df['stddev_run_time'] = pd.rolling_std(numeric_df['run_time'], 20)

    # Convert the dataframes to a dict
    numeric_dict = dict(
        (date.isoformat(),
            {
            'run_time': run_time,
            'avg_run_time': avg,
            'std_dev_run_time': stddev,
        }) for date, run_time, avg, stddev in zip(
            numeric_df.index, numeric_df.run_time, numeric_df.avg_run_time,
            numeric_df.stddev_run_time))
    temp_dict = dict(
        (date.isoformat(),
            {
            'run_id': run_id,
            'status': status,
            }) for date, run_id, status in zip(df.index, df.run_id, df.status))

    return {'numeric': numeric_dict, 'data': temp_dict}
    def filter_by_MA(self):
        """
        Remove abrupt changes using a Moving Average filter
        """
        self.data['ma_entry'] = pd.rolling_mean(self.data.ENTRIES, window=3, min_periods=3)
        self.data['ma_exit'] = pd.rolling_mean(self.data.EXITS, window=3, min_periods=3)

        # Winsorize at 10 std_dev
        cap_entry = 10 * self.data.ENTRIES.std()
        cap_exit = 10 * self.data.EXITS.std()

        self.logger.debug("The 10 * std.dev is :{:.2f} , {:.2f} for entries and exits.".format(cap_entry, cap_exit))

        if cap_entry > 5000:
            cap_entry = 5000
        if cap_exit > 5000:
            cap_exit = 5000

        self.data['outlier'] = False
        self.data.ix[np.abs(self.data.ma_entry - self.data.ENTRIES) > cap_entry, "outlier"] = True
        self.data.ix[np.abs(self.data.ma_exit - self.data.EXITS) > cap_exit, "outlier"] = True

        self.logger.info("{} out of {} observations are flagged as outliers by Moving-average.".format(len(self.data.ix[self.data['outlier']]), len(self.data)))
        self.data = self.data.ix[~self.data['outlier']]
        self.data.drop(['ma_entry', 'ma_exit', 'outlier'], inplace=True, axis=1)
def citi_surprise_test(data_df):
    
    data_df = data_df.reindex(pd.date_range(data_df.index[0],data_df.index[-1],freq='m'),method='ffill')
    equity_monthly_rtn = data_df['S&P 500 Price'].pct_change(periods=1).to_frame().dropna()
    absolute_monthly_test = pd.concat([data_df['Citi Suprise'],equity_monthly_rtn],axis=1).dropna(axis=0)
    absolute_monthly_test.columns = ['Citi Suprise','S&P 500 Monthly Return']
    citi_above_zero_test = absolute_monthly_test[absolute_monthly_test['Citi Suprise'] > 0]
    
    citi_below_zero_test = absolute_monthly_test[absolute_monthly_test['Citi Suprise'] <= 0]
  
    
    ax = citi_above_zero_test.plot(kind='scatter', x='Citi Suprise', y='S&P 500 Monthly Return', color='Red', label='Citi Surprise > 0')

    citi_below_zero_test.plot(kind='scatter', x='Citi Suprise', y='S&P 500 Monthly Return',color='Grey', label='Citi Surprise <= 0', ax=ax)   


    citi_three_month_average = pd.rolling_mean(data_df['Citi Suprise'],window=3)
    citi_six_month_average = pd.rolling_mean(data_df['Citi Suprise'],window=12)
    citi_trend = pd.DataFrame(citi_three_month_average - citi_six_month_average,index=citi_six_month_average.index)
    
    citi_trend_test = pd.concat([citi_trend,equity_monthly_rtn],axis=1).dropna(axis=0)
    citi_trend_test.columns = ['Citi Suprise','S&P 500 Monthly Return']
    
    
    
    citi_up_trend_test = citi_trend_test[citi_trend_test['Citi Suprise'] > 0]
    
    citi_down_trend_test = citi_trend_test[citi_trend_test['Citi Suprise'] <= 0]
  
    
    ax = citi_up_trend_test.plot(kind='scatter', x='Citi Suprise', y='S&P 500 Monthly Return', color='Red', label='Citi Surprise Up Trend')

    citi_down_trend_test.plot(kind='scatter', x='Citi Suprise', y='S&P 500 Monthly Return',color='Grey', label='Citi Surprise Down Trend', ax=ax)   
Пример #21
0
 def select_Time_DMA(self):
     
     #MA 
     ma_list = [self.AVR_SHORT, self.AVR_LONG]
     ma_dea = 10
     
     if ma_list[0] == self.AVR_SHORT and ma_list[1] == self.AVR_LONG:
         ma_close_short = self.ma_12
         ma_close_long = self.ma_40
     else:    
         ma_close_short = pd.rolling_mean(self.close_price, ma_list[0])
         ma_close_long = pd.rolling_mean(self.close_price, ma_list[1])
     
     dma_price = ma_close_short - ma_close_long
     ama_price = pd.rolling_mean(dma_price, ma_dea)
     
     signal = SIGNAL_DEFAULT
         
     if dma_price[-1] > dma_price[-2] and dma_price[-1] > ama_price[-1] \
                                         and dma_price[-2] < ama_price[-2]:
         signal = SIGNAL_BUY
     elif dma_price[-1] < dma_price[-2] and dma_price[-1] < ama_price[-1] \
                         and dma_price[-2] > ama_price[-2]:
         signal = SIGNAL_SALE           
     return signal            
Пример #22
0
def KELCH(df, n):
    """
    Keltner Channel
    """
    KelChM = pd.Series(
        pd.rolling_mean(
            (df['High'] + df['Low'] + df['Close']) / 3,
            n
        ),
        name='KelChM_' + str(n)
    )
    KelChU = pd.Series(
        pd.rolling_mean(
            (4 * df['High'] - 2 * df['Low'] + df['Close']) / 3,
            n
        ),
        name='KelChU_' + str(n)
    )
    KelChD = pd.Series(
        pd.rolling_mean(
            (-2 * df['High'] + 4 * df['Low'] + df['Close']) / 3,
            n
        ),
        name='KelChD_' + str(n)
    )
    result = pd.DataFrame([KelChM, KelChU, KelChD]).transpose()
    return out(SETTINGS, df, result)
Пример #23
0
def avgDepth_speed(panel1,panel2,sep1,sep2,probet,adcpt):
    a = panel1.minor_axis[:]
    probem = []
    adcpm = []
    for i,j in enumerate(a):
        print j
        height1 = panel1.minor_xs(j)
        height2 = panel2.minor_xs(j)
        mean1 = pd.rolling_mean(height1,sep1)
        mean2 = pd.rolling_mean(height2,sep2)
        mean1t = mean1.apply(mean_vel,axis=1)
        mean2t = mean2.apply(mean_vel,axis=1)
        pmean = mean1t[probet]
        amean = mean2t[adcpt]
        probem.append(pmean)
        adcpm.append(amean)
    fig,ax = plt.subplots()
    ax.plot(probem,a,label='FVCOM')
    ax.plot(adcpm,a,label='ADCP')
    ax.xaxis.grid()
    ax.yaxis.grid()
    ax.set_xlabel('Mean Speed (m/s)')
    ax.set_ylabel('Depth (m)')
    ax.set_title('Velocity by Depth')
    plt.legend()
    plt.show()
Пример #24
0
def turb_depth(panel1,panel2,sep1,sep2,probet,adcpt):
    a = panel1.minor_axis[:]
    probeturbint = []
    adcpturbint = []
    for i,j in enumerate(a):
        print j
        height1 = panel1.minor_xs(j)
        height2 = panel2.minor_xs(j)
        mean1 = pd.rolling_mean(height1,sep1)
        mean2 = pd.rolling_mean(height2,sep2)
        var1 = pd.rolling_var(height1,sep1)
        var2 = pd.rolling_var(height2,sep2)
        var1t = var1.apply(variance,axis=1)
        var2t = var2.apply(variance,axis=1)
        mean1t = mean1.apply(mean_vel,axis=1)
        mean2t = mean2.apply(mean_vel,axis=1)
        t_int1 = var1t/mean1t
        t_int2 = var2t/mean2t
        ptime = t_int1[probet]
        atime = t_int2[adcpt]
        print ptime
        print atime
        probeturbint.append(ptime)
        adcpturbint.append(atime)
    fig,ax = plt.subplots()
    ax.plot(probeturbint,a,label='FVCOM')
    ax.plot(adcpturbint,a,label='ADCP')
    ax.xaxis.grid()
    ax.yaxis.grid()
    ax.set_xlabel('Turbulence Intensity')
    ax.set_ylabel('Depth (m)')
    ax.set_title('Turbulence Intensity by Depth')
    plt.legend()
    plt.show()
Пример #25
0
def reynolds_depth(panel1,panel2,sep1,sep2,probet,adcpt):
    a = panel1.minor_axis[:]
    probereystr = []
    adcpreystr = []
    for i,j in enumerate(a):
        print j
        height1 = panel1.minor_xs(j)
        height2 = panel2.minor_xs(j)
        mean1 = pd.rolling_mean(height1,sep1)
        mean2 = pd.rolling_mean(height2,sep2)
        rstress1 = mean1.apply(rey_stress,axis=1)
        rstress2 = mean2.apply(rey_stress,axis=1)
        pR = rstress1[probet]
        aR = rstress2[adcpt]
        print pR
        print aR
        probereystr.append(pR)
        adcpreystr.append(aR)
    fig,ax = plt.subplots()
    ax.plot(probereystr,a,label='FVCOM')
    ax.plot(adcpreystr,a,label='ADCP')
    ax.xaxis.grid()
    ax.yaxis.grid()
    ax.set_xlabel('Reynolds Stress')
    ax.set_ylabel('Depth (m)')
    ax.set_title('Reynolds Stress by Depth')
    plt.legend()
    plt.show()
Пример #26
0
def btn_update__click(dom):
    symbol = dom['select_stock']['value']
    if symbol == 'AAPL':
        df = AAPL
    elif symbol == 'GOOG':
        df = GOOG
    else:
        return dom
    bounds = [dom[x]['value'] if dom[x]['value'] else None
              for x in ['date_start', 'date_end']]
    ts = df['Close'][bounds[0]:bounds[1]]
    if ts.any():
        try:
            ts.plot()
            for win in [int(dom[x]['value'])
                        for x in ['slider_window_1', 'slider_window_2']]:
                pd.rolling_mean(ts, win).plot()
            plt.title("Weekly closing prices for {}".format(symbol))

            # get_svg is added by ashiba.plot
            dom['img_plot'].set_image(plt.get_svg(), 'svg')
        finally:
            plt.close()

    return dom
Пример #27
0
def get_data():

    with open("D:\diplomski_kod\lista_burzi.txt") as f:

        lista_burzi = f.read().splitlines()
        lista1 = []
        for ime_burze in lista_burzi:
            print "reading stock exchange " + ime_burze
            lista1.append("D:\diplomski_kod\podaci" + "\\" + ime_burze + "\lista_simbola.txt")
            for i in lista1:
                with open(i) as f:
                    lines = f.read().splitlines()
                    lista = []
                    for symbol in lines:
                        # set the path for files needed by indikatori.parse raw data
                        lista.append("D:\diplomski_kod\podaci" + "\\" + ime_burze + "\\" + symbol + ".csv")
            for i in lista:
                indikatori.parse_raw_data(i, ime_burze)
                ml_model.train_and_eval_Model(i, ime_burze)
                print i

    # calculating arith and std_dev for each exchange
    for burza in lista_burzi:
        path = "D:\diplomski_kod\podaci\\" + burza + "\metrics.csv"
        df = pd.read_csv(path)
        df["all_arithm"] = pd.rolling_mean(df["arithm"], 10)
        df["all_std_dev"] = pd.rolling_mean(df["std_dev"], 10)
        df.to_csv(path, sep=",", index=False, encoding="utf-8")

    # calculating stock exchange for entire thing
    df2 = pd.read_csv("D:\diplomski_kod\\all_exchange_metrics.csv")
    df2["arithm all exchange"] = df2["arithm"].mean()
    df2["std dev all exchange"] = df2["std_dev"].mean()
    df2.to_csv("D:\diplomski_kod\\all_exchange_metrics.csv", sep=",", index=False, encoding="utf-8")
def convert_data_to_df(ticker):
    df = pd.read_csv("/Users/excalibur/Dropbox/datasets/quandl_data/{}.csv".format(ticker))
    df = df.drop("Adjusted Close", axis=1)

    df["50dravg"] = pd.rolling_mean(df["Close"], window=50)
    df["200dravg"] = pd.rolling_mean(df["Close"], window=200)

    df["OC%"] = (df["Close"] / df["Open"]) - 1
    df["HL%"] = (df["High"] / df["Low"]) - 1
    df["OH%"] = (df["High"] / df["Open"]) - 1

    df["LastOpen"] = df["Open"].shift(1)
    df["LastHigh"] = df["High"].shift(1)
    df["LastLow"] = df["Low"].shift(1)
    df["LastClose"] = df["Close"].shift(1)
    df["LastVolume"] = df["Volume"].shift(1)
    df["LastOC%"] = df["OC%"].shift(1)
    df["LastHL%"] = df["HL%"].shift(1)
    df["LastOH%"] = df["OH%"].shift(1)

    df["ticker"] = ticker

    df["label"] = df["OH%"].shift(-1)

    return df.copy()
Пример #29
0
def kdj(prices, params={"windows": [9, 3, 3]}):
    """
    Calculate KDJ indicator:
    RSV = (Ct - Ln) / (Hn - Ln) * 100
    K = sma3(RSV)
    D = sma3(K)
    J = 3 * D - 2 * K

    Parameters
    ----------
    prices: DataFrame
        Includes the open, close, high, low and volume.
    params: dict

    Returns
    ----------
    kdj_val: DataFrame
    """
    windows = params["windows"]
    rsv = __rsv(prices, windows[0])
    k = pd.rolling_mean(rsv, windows[1])
    d = pd.rolling_mean(k, windows[2])
    j = 3 * k - 2 * d
    kdj_val = np.column_stack((k, d, j))

    return pd.DataFrame(kdj_val, index=prices.index, columns=["K", "D", "J"])
Пример #30
0
def rolling_table(ticker):

    fund = db.session.query(models.VanguardFund).filter(
        models.VanguardFund.ticker==ticker
    ).first()

    ts = pandas.Series(
        [x.price for x in fund.prices], 
        index=pandas.to_datetime([x.date for x in fund.prices]),
    )

    x = pandas.concat(
        [
            ts, 
            pandas.rolling_mean(ts, 30), 
            pandas.rolling_mean(ts, 90),
            pandas.rolling_mean(ts, 180)
        ],
        axis=1
    )

    #return [
    #    (d, x[0][d], x[1][d], x[2][d], x[3][d])
    #    for d in x.index
    #]


    return [ [d] + list(row) for d, row in x.iterrows() ]
def EOM(df, n):
    EoM = (df['High'].diff(1) +
           df['Low'].diff(1)) * (df['High'] - df['Low']) / (2 * df['Volume'])
    Eom_ma = pd.Series(pd.rolling_mean(EoM, n), name='EoM_' + str(n))
    df = df.join(Eom_ma)
    return df
Пример #32
0
def SL_PMT_plots(myCountry, economy, event_level, myiah, myHaz, my_PDS,
                 _wprime, base_str, to_usd):
    out_files = os.getcwd() + '/../output_country/' + myCountry + '/'

    listofquintiles = np.arange(0.20, 1.01, 0.20)
    quint_labels = [
        'Poorest\nquintile', 'Second', 'Third', 'Fourth',
        'Wealthiest\nquintile'
    ]

    myiah['hhid'] = myiah['hhid'].astype('str')
    myiah = myiah.set_index('hhid')
    pmt, _ = get_pmt(myiah)
    myiah['PMT'] = pmt

    for _loc in myHaz[0]:
        for _haz in myHaz[1]:
            for _rp in myHaz[2]:

                plt.cla()
                _ = myiah.loc[(myiah[economy] == _loc)
                              & (myiah['hazard'] == _haz) &
                              (myiah['rp'] == _rp)].copy()

                _ = _.reset_index().groupby(
                    economy, sort=True).apply(lambda x: match_percentiles(
                        x,
                        perc_with_spline(reshape_data(x.PMT),
                                         reshape_data(x.pcwgt_no),
                                         listofquintiles), 'quintile', 'PMT'))

                for _sort in ['PMT']:

                    _ = _.sort_values(_sort, ascending=True)

                    _['pcwgt_cum_' + base_str] = _['pcwgt_' +
                                                   base_str].cumsum()
                    _['pcwgt_cum_' + my_PDS] = _['pcwgt_' + my_PDS].cumsum()

                    _['dk0_cum'] = _[['pcwgt_' + base_str,
                                      'dk0']].prod(axis=1).cumsum()

                    _['cost_cum_' + my_PDS] = _[[
                        'pcwgt_' + my_PDS, 'help_received_' + my_PDS
                    ]].prod(axis=1).cumsum()
                    # ^ cumulative cost
                    _['cost_frac_' + my_PDS] = _[[
                        'pcwgt_' + my_PDS, 'help_received_' + my_PDS
                    ]].prod(axis=1).cumsum() / _[[
                        'pcwgt_' + my_PDS, 'help_received_' + my_PDS
                    ]].prod(axis=1).sum()
                    # ^ cumulative cost as fraction of total

                    # GET WELFARE COSTS
                    _['dw_cum_' +
                      base_str] = _[['pcwgt_' + base_str,
                                     'dw_' + base_str]].prod(axis=1).cumsum()
                    # Include public costs in baseline (dw_cum)
                    ext_costs_base = pd.read_csv(out_files +
                                                 'public_costs_tax_' +
                                                 base_str + '_.csv').set_index(
                                                     [economy, 'hazard', 'rp'])

                    ext_costs_base[
                        'dw_pub_curr'] = ext_costs_base['dw_pub'] / _wprime
                    ext_costs_base[
                        'dw_soc_curr'] = ext_costs_base['dw_soc'] / _wprime
                    ext_costs_base['dw_tot_curr'] = ext_costs_base[[
                        'dw_pub', 'dw_soc'
                    ]].sum(axis=1) / _wprime

                    ext_costs_base_sum = ext_costs_base.loc[
                        ext_costs_base['contributer'] != ext_costs_base.index.
                        get_level_values(event_level[0]),
                        ['dw_pub_curr', 'dw_soc_curr', 'dw_tot_curr']].sum(
                            level=[economy, 'hazard', 'rp']).reset_index()

                    ext_costs_base_pub = float(ext_costs_base_sum.loc[
                        (ext_costs_base_sum[economy] == _loc)
                        & ext_costs_base_sum.eval('(hazard==@_haz)&(rp==@_rp)'
                                                  ), 'dw_pub_curr'])
                    ext_costs_base_soc = float(ext_costs_base_sum.loc[
                        (ext_costs_base_sum[economy] == _loc)
                        & ext_costs_base_sum.eval('(hazard==@_haz)&(rp==@_rp)'
                                                  ), 'dw_soc_curr'])
                    ext_costs_base_sum = float(ext_costs_base_sum.loc[
                        (ext_costs_base_sum[economy] == _loc)
                        & ext_costs_base_sum.eval('(hazard==@_haz)&(rp==@_rp)'
                                                  ), 'dw_tot_curr'])

                    _['dw_cum_' +
                      my_PDS] = _[['pcwgt_' + my_PDS,
                                   'dw_' + my_PDS]].prod(axis=1).cumsum()
                    # ^ cumulative DW, with my_PDS implemented

                    # Include public costs in pds_dw_cum
                    ext_costs_pds = pd.read_csv(out_files +
                                                'public_costs_tax_' + my_PDS +
                                                '_.csv').set_index(
                                                    [economy, 'hazard', 'rp'])

                    ext_costs_pds[
                        'dw_pub_curr'] = ext_costs_pds['dw_pub'] / _wprime
                    ext_costs_pds[
                        'dw_soc_curr'] = ext_costs_pds['dw_soc'] / _wprime
                    ext_costs_pds['dw_tot_curr'] = ext_costs_pds[[
                        'dw_pub', 'dw_soc'
                    ]].sum(axis=1) / _wprime

                    ext_costs_pds_sum = ext_costs_pds.loc[
                        (ext_costs_pds['contributer'] != ext_costs_pds.index.
                         get_level_values(event_level[0])),
                        ['dw_pub_curr', 'dw_soc_curr', 'dw_tot_curr']].sum(
                            level=[economy, 'hazard', 'rp']).reset_index()

                    ext_costs_pds_pub = float(ext_costs_pds_sum.loc[
                        (ext_costs_pds_sum[economy] == _loc)
                        & ext_costs_pds_sum.eval('(hazard==@_haz)&(rp==@_rp)'),
                        'dw_pub_curr'])
                    ext_costs_pds_soc = float(ext_costs_pds_sum.loc[
                        (ext_costs_pds_sum[economy] == _loc)
                        & ext_costs_pds_sum.eval('(hazard==@_haz)&(rp==@_rp)'),
                        'dw_soc_curr'])
                    ext_costs_pds_sum = float(ext_costs_pds_sum.loc[
                        (ext_costs_pds_sum[economy] == _loc)
                        & ext_costs_pds_sum.eval('(hazard==@_haz)&(rp==@_rp)'),
                        'dw_tot_curr'])

                    _['dw_cum_' +
                      my_PDS] += (ext_costs_pds_pub +
                                  ext_costs_pds_soc) * _['cost_frac_' + my_PDS]
                    _['delta_dw_cum_' +
                      my_PDS] = _['dw_cum_' + base_str] - _['dw_cum_' + my_PDS]

                    ### PMT-ranked population coverage [%]
                    plt.plot(
                        100. * _['pcwgt_cum_' + base_str] /
                        _['pcwgt_' + base_str].sum(), 100. * _['dk0_cum'] /
                        _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum())
                    plt.annotate(
                        'Total asset losses\n$' +
                        str(round(1E-6 * to_usd * _.iloc[-1]['dk0_cum'], 1)) +
                        ' mil.',
                        xy=(0.1, 0.85),
                        xycoords='axes fraction',
                        color=greys_pal[7],
                        fontsize=10)
                    if False:
                        plt.plot(
                            100. * _['pcwgt_cum_' + base_str] /
                            _['pcwgt_' + base_str].sum(), 100. * _['dk0_cum'] /
                            _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum())

                plt.xlabel('Population percentile [%]',
                           labelpad=8,
                           fontsize=10)
                plt.ylabel('Cumulative asset losses [%]',
                           labelpad=8,
                           fontsize=10)
                plt.xlim(0)
                plt.ylim(-0.1)
                plt.gca().xaxis.set_ticks([20, 40, 60, 80, 100])
                sns.despine()
                plt.grid(False)

                plt.gcf().savefig('../output_plots/SL/PMT/pcwgt_vs_dk0_' +
                                  _loc + '_' + _haz + '_' + str(_rp) + '.pdf',
                                  format='pdf',
                                  bbox_inches='tight')
                plt.cla()

                #####################################
                ### PMT threshold vs dk (normalized)
                _ = _.sort_values('PMT', ascending=True)
                plt.plot(_['PMT'],
                         100. * _['dk0_cum'] /
                         _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum(),
                         linewidth=1.8,
                         zorder=99,
                         color=q_colors[1])

                for _q in [1, 2, 3, 4, 5]:
                    _q_x = _.loc[_['quintile'] == _q, 'PMT'].max()
                    _q_y = 100. * _.loc[_['quintile'] <= _q,
                                        ['pcwgt_' + base_str, 'dk0']].prod(
                                            axis=1).sum() / _[[
                                                'pcwgt_' + base_str, 'dk0'
                                            ]].prod(axis=1).sum()
                    if _q == 1: _q_yprime = _q_y / 20

                    plt.plot([_q_x, _q_x], [0, _q_y],
                             color=greys_pal[4],
                             ls=':',
                             linewidth=1.5,
                             zorder=91)

                    _usd = ' mil.'
                    plt.annotate((quint_labels[_q - 1] + '\n$' + str(
                        round(
                            1E-6 * to_usd *
                            _.loc[_['quintile'] == _q,
                                  ['pcwgt_' + base_str, 'dk0']].prod(
                                      axis=1).sum(), 1)) + _usd),
                                 xy=(_q_x, _q_y + _q_yprime),
                                 color=greys_pal[6],
                                 ha='right',
                                 va='bottom',
                                 style='italic',
                                 fontsize=8,
                                 zorder=91)

                if False:
                    plt.scatter(
                        _['PMT'],
                        100. * _['dk0_cum'] /
                        _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum(),
                        alpha=0.08,
                        s=6,
                        zorder=10,
                        color=q_colors[1])

                plt.xlabel('Household income [PMT]', labelpad=8, fontsize=10)
                plt.ylabel('Cumulative asset losses [%]',
                           labelpad=8,
                           fontsize=10)
                plt.annotate(
                    'Total asset losses\n$' +
                    str(round(1E-6 * to_usd * _.iloc[-1]['dk0_cum'], 1)) +
                    ' mil.',
                    xy=(0.1, 0.85),
                    xycoords='axes fraction',
                    color=greys_pal[7],
                    fontsize=10)
                plt.xlim(825)
                plt.ylim(-0.1)

                sns.despine()
                plt.grid(False)

                plt.gcf().savefig('../output_plots/SL/PMT/pmt_vs_dk_norm_' +
                                  _loc + '_' + _haz + '_' + str(_rp) + '.pdf',
                                  format='pdf',
                                  bbox_inches='tight')

                #####################################
                ### PMT threshold vs dk & dw
                plt.cla()
                plt.plot(_['PMT'],
                         _['dk0_cum'] * to_usd * 1E-6,
                         color=q_colors[1],
                         linewidth=1.8,
                         zorder=99)
                plt.plot(_['PMT'],
                         _['dw_cum_' + base_str] * to_usd * 1E-6,
                         color=q_colors[3],
                         linewidth=1.8,
                         zorder=99)

                _y1 = 1.08
                _y2 = 1.03
                if _['dk0_cum'].max() < _['dw_cum_' + base_str].max():
                    _y1 = 1.03
                    _y2 = 1.08

                plt.annotate(
                    'Total asset losses = $' +
                    str(round(_['dk0_cum'].max() * to_usd * 1E-6, 1)) +
                    ' million',
                    xy=(0.02, _y1),
                    xycoords='axes fraction',
                    color=q_colors[1],
                    ha='left',
                    va='top',
                    fontsize=10,
                    annotation_clip=False)

                wb_str = 'Total wellbeing losses = \$' + str(
                    round(_['dw_cum_' + base_str].max() * to_usd * 1E-6,
                          1)) + ' million'
                #wb_natl_str = '(+\$'+str(round(ext_costs_base_sum*to_usd*1E-6,1))+')'
                wb_natl_str = 'National welfare losses\n  $' + str(
                    round(ext_costs_base_sum * to_usd * 1E-6, 1)) + ' million'

                plt.annotate(wb_str,
                             xy=(0.02, _y2),
                             xycoords='axes fraction',
                             color=q_colors[3],
                             ha='left',
                             va='top',
                             fontsize=10,
                             annotation_clip=False)
                #plt.annotate(wb_natl_str,xy=(0.02,0.77),xycoords='axes fraction',color=q_colors[3],ha='left',va='top',fontsize=10)

                for _q in [1, 2, 3, 4, 5]:
                    _q_x = _.loc[_['quintile'] == _q, 'PMT'].max()
                    _q_y = max(
                        _.loc[_['quintile'] <= _q,
                              ['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum()
                        * to_usd * 1E-6,
                        _.loc[_['quintile'] <= _q,
                              ['pcwgt_' + base_str, 'dw_' +
                               base_str]].prod(axis=1).sum() * to_usd * 1E-6)
                    if _q == 1: _q_yprime = _q_y / 25

                    plt.plot([_q_x, _q_x], [0, _q_y],
                             color=greys_pal[4],
                             ls=':',
                             linewidth=1.5,
                             zorder=91)
                    plt.annotate(quint_labels[_q - 1],
                                 xy=(_q_x, _q_y + 7 * _q_yprime),
                                 color=greys_pal[6],
                                 ha='right',
                                 va='bottom',
                                 style='italic',
                                 fontsize=8,
                                 zorder=91,
                                 annotation_clip=False)

                    # This figures out label ordering (are cumulative asset or cum welfare lossers higher?)
                    _cumk = round(
                        _.loc[_['quintile'] <= _q,
                              ['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum()
                        * to_usd * 1E-6, 1)
                    _cumw = round(
                        _.loc[_['quintile'] <= _q,
                              ['pcwgt_' + base_str, 'dw_' +
                               base_str]].prod(axis=1).sum() * to_usd * 1E-6,
                        1)
                    if _cumk >= _cumw:
                        _yprime_k = 4 * _q_yprime
                        _yprime_w = 1 * _q_yprime
                    else:
                        _yprime_k = 1 * _q_yprime
                        _yprime_w = 4 * _q_yprime

                    _qk = round(
                        _.loc[_['quintile'] == _q,
                              ['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum()
                        * to_usd * 1E-6, 1)
                    _qw = round(
                        _.loc[_['quintile'] == _q,
                              ['pcwgt_' + base_str, 'dw_' +
                               base_str]].prod(axis=1).sum() * to_usd * 1E-6,
                        1)

                    plt.annotate('$' + str(_qk) + ' mil.',
                                 xy=(_q_x, _q_y + _yprime_k),
                                 color=q_colors[1],
                                 ha='right',
                                 va='bottom',
                                 style='italic',
                                 fontsize=8,
                                 zorder=91,
                                 annotation_clip=False)
                    plt.annotate('$' + str(_qw) + ' mil.',
                                 xy=(_q_x, _q_y + _yprime_w),
                                 color=q_colors[3],
                                 ha='right',
                                 va='bottom',
                                 style='italic',
                                 fontsize=8,
                                 zorder=91,
                                 annotation_clip=False)

                plt.xlabel('Household income [PMT]', labelpad=8, fontsize=10)
                plt.ylabel('Cumulative losses [mil. US$]',
                           labelpad=8,
                           fontsize=10)
                plt.xlim(825)
                plt.ylim(-0.1)

                plt.title(' ' + str(_rp) + '-year ' + haz_dict[_haz].lower() +
                          ' in ' + _loc,
                          loc='left',
                          color=greys_pal[7],
                          pad=30,
                          fontsize=15)

                sns.despine()
                plt.grid(False)

                plt.gcf().savefig('../output_plots/SL/PMT/pmt_vs_dk0_' + _loc +
                                  '_' + _haz + '_' + str(_rp) + '.pdf',
                                  format='pdf',
                                  bbox_inches='tight')
                plt.close('all')

                #####################################
                ### Cost vs benefit of PMT

                show_net_benefit = False
                if show_net_benefit:
                    _['dw_cum_' +
                      base_str] += (ext_costs_base_pub + ext_costs_base_soc
                                    ) * _['cost_frac_' + my_PDS]
                #*_[['pcwgt_'+base_str,'dk0']].prod(axis=1).cumsum()/_[['pcwgt_'+base_str,'dk0']].prod(axis=1).sum()
                # ^ include national costs in baseline dw
                _['delta_dw_cum_' +
                  my_PDS] = _['dw_cum_' + base_str] - _['dw_cum_' + my_PDS]
                # redefine this because above changed

                plt.cla()
                plt.plot(_['PMT'],
                         _['cost_cum_' + my_PDS] * to_usd * 1E-6,
                         color=q_colors[1],
                         linewidth=1.8,
                         zorder=99)
                plt.plot(_['PMT'],
                         _['delta_dw_cum_' + my_PDS] * to_usd * 1E-6,
                         color=q_colors[3],
                         linewidth=1.8,
                         zorder=99)

                plt.annotate('PDS cost =\n$' + str(
                    round(_['cost_cum_' + my_PDS].max() * to_usd * 1E-6, 2)) +
                             ' mil.',
                             xy=(_['PMT'].max(),
                                 _['cost_cum_' + my_PDS].max() * to_usd *
                                 1E-6),
                             color=q_colors[1],
                             weight='bold',
                             ha='left',
                             va='top',
                             fontsize=10,
                             annotation_clip=False)
                plt.annotate('Avoided wellbeing\nlosses = $' + str(
                    round(_.iloc[-1]['delta_dw_cum_' + my_PDS] * to_usd * 1E-6,
                          2)) + ' mil.',
                             xy=(_['PMT'].max(),
                                 _.iloc[-1]['delta_dw_cum_' + my_PDS] *
                                 to_usd * 1E-6),
                             color=q_colors[3],
                             weight='bold',
                             ha='left',
                             va='top',
                             fontsize=10)

                #for _q in [1,2,3,4,5]:
                #    _q_x = _.loc[_['quintile']==_q,'PMT'].max()
                #    _q_y = max(_.loc[_['quintile']<=_q,['pcwgt','dk0']].prod(axis=1).sum()*to_usd*1E-6,
                #               _.loc[_['quintile']<=_q,['pcwgt','dw_no']].prod(axis=1).sum()*to_usd*1E-6)
                #    if _q == 1: _q_yprime = _q_y/20

                #    plt.plot([_q_x,_q_x],[0,_q_y],color=greys_pal[4],ls=':',linewidth=1.5,zorder=91)
                #    plt.annotate(quint_labels[_q-1],xy=(_q_x,_q_y+_q_yprime),color=greys_pal[6],ha='right',va='bottom',style='italic',fontsize=8,zorder=91)

                plt.xlabel('Upper PMT threshold for post-disaster support',
                           labelpad=8,
                           fontsize=12)
                plt.ylabel('Cost & benefit [mil. US$]',
                           labelpad=8,
                           fontsize=12)
                plt.xlim(825)  #;plt.ylim(0)

                plt.title(' ' + str(_rp) + '-year ' + haz_dict[_haz].lower() +
                          '\n  in ' + _loc,
                          loc='left',
                          color=greys_pal[7],
                          pad=25,
                          fontsize=15)
                plt.annotate(pds_dict[my_PDS],
                             xy=(0.02, 1.03),
                             xycoords='axes fraction',
                             color=greys_pal[6],
                             ha='left',
                             va='bottom',
                             weight='bold',
                             style='italic',
                             fontsize=8,
                             zorder=91,
                             clip_on=False)

                plt.plot(plt.gca().get_xlim(), [0, 0],
                         color=greys_pal[2],
                         linewidth=0.90)
                sns.despine(bottom=True)
                plt.grid(False)

                plt.gcf().savefig('../output_plots/SL/PMT/pmt_dk_vs_dw_' +
                                  _loc + '_' + _haz + '_' + str(_rp) + '_' +
                                  my_PDS + '.pdf',
                                  format='pdf',
                                  bbox_inches='tight')
                plt.close('all')
                continue

                #####################################
                ### Cost vs benefit of PMT
                _ = _.fillna(0)
                #_ = _.loc[_['pcwgt_'+my_PDS]!=0].copy()
                _ = _.loc[(_['help_received_' + my_PDS] != 0)
                          & (_['pcwgt_' + my_PDS] != 0)].copy()

                #_['dw_cum_'+my_PDS] = _[['pcwgt_'+my_PDS,'dw_'+my_PDS]].prod(axis=1).cumsum()
                #_['dw_cum_'+my_PDS] += ext_costs_pds_pub + ext_costs_pds_soc*_['cost_frac_'+my_PDS]
                # ^ unchanged from above

                _c1, _c1b = paired_pal[2], paired_pal[3]
                _c2, _c2b = paired_pal[0], paired_pal[1]

                _window = 100
                if _.shape[0] < 100: _window = int(_.shape[0] / 5)

                plt.cla()

                _y_values_A = (_['cost_cum_' + my_PDS] *
                               to_usd).diff() / _['pcwgt_' + my_PDS]
                _y_values_B = pd.rolling_mean(
                    (_['cost_cum_' + my_PDS] * to_usd).diff() /
                    _['pcwgt_' + my_PDS], _window)

                if _y_values_A.max() >= 1.25 * _y_values_B.max(
                ) or _y_values_A.min() <= 0.75 * _y_values_B.min():
                    plt.scatter(_['PMT'],
                                (_['cost_cum_' + my_PDS] * to_usd).diff() /
                                _['pcwgt_' + my_PDS],
                                color=_c1,
                                s=4,
                                zorder=98,
                                alpha=0.25)
                    plt.plot(_['PMT'],
                             pd.rolling_mean(
                                 (_['cost_cum_' + my_PDS] * to_usd).diff() /
                                 _['pcwgt_' + my_PDS], _window),
                             color=_c1b,
                             lw=1.0,
                             zorder=98)
                else:
                    plt.plot(_['PMT'],
                             (_['cost_cum_' + my_PDS] * to_usd).diff() /
                             _['pcwgt_' + my_PDS],
                             color=_c1b,
                             lw=1.0,
                             zorder=98)

                plt.scatter(_['PMT'],
                            (_['delta_dw_cum_' + my_PDS] * to_usd).diff() /
                            _['pcwgt_' + my_PDS],
                            color=_c2,
                            s=4,
                            zorder=98,
                            alpha=0.25)
                plt.plot(_['PMT'],
                         pd.rolling_mean(
                             (_['delta_dw_cum_' + my_PDS] * to_usd).diff() /
                             _['pcwgt_' + my_PDS], _window),
                         color=_c2b,
                         lw=1.0,
                         zorder=98)
                _y_min = 1.05 * pd.rolling_mean(
                    (_['delta_dw_cum_' + my_PDS] * to_usd).diff() /
                    _['pcwgt_' + my_PDS], _window).min()
                _y_max = 1.1 * max(
                    pd.rolling_mean(
                        (_['delta_dw_cum_' + my_PDS] * to_usd).diff() /
                        _['pcwgt_' + my_PDS], _window).max(), 1.05 *
                    ((_['cost_cum_' + my_PDS] * to_usd).diff() /
                     _['pcwgt_' + my_PDS]).mean() + _q_yprime)

                for _q in [1, 2, 3, 4, 5]:
                    _q_x = min(1150, _.loc[_['quintile'] == _q, 'PMT'].max())
                    #_q_y = max(_.loc[_['quintile']<=_q,['pcwgt_'+my_PDS,'dk0']].prod(axis=1).sum()*to_usd,
                    #           _.loc[_['quintile']<=_q,['pcwgt_'+my_PDS,'dw_no']].prod(axis=1).sum()*to_usd))
                    if _q == 1:
                        _q_xprime = (_q_x - 840) / 40
                        _q_yprime = _y_max / 200

                    plt.plot([_q_x, _q_x], [_y_min, _y_max],
                             color=greys_pal[4],
                             ls=':',
                             linewidth=1.5,
                             zorder=91)
                    plt.annotate(quint_labels[_q - 1],
                                 xy=(_q_x - _q_xprime, _y_max),
                                 color=greys_pal[6],
                                 ha='right',
                                 va='top',
                                 style='italic',
                                 fontsize=7,
                                 zorder=99)

                #toggle this
                plt.annotate('PDS cost',
                             xy=(_['PMT'].max() - _q_xprime,
                                 ((_['cost_cum_' + my_PDS] * to_usd).diff() /
                                  _['pcwgt_' + my_PDS]).mean() + _q_yprime),
                             color=_c1b,
                             weight='bold',
                             ha='right',
                             va='bottom',
                             fontsize=8,
                             annotation_clip=False)

                #plt.annotate('Avoided\nwellbeing losses',xy=(_['PMT'].max()-_q_xprime,pd.rolling_mean((_['delta_dw_cum']*to_usd/_['pcwgt_'+my_PDS]).diff(),_window).min()+_q_yprime),
                #             color=_c2b,weight='bold',ha='right',va='bottom',fontsize=8)

                plt.xlabel('Upper PMT threshold for post-disaster support',
                           labelpad=10,
                           fontsize=10)
                plt.ylabel(
                    'Marginal impact at threshold [US$ per next enrollee]',
                    labelpad=10,
                    fontsize=10)

                plt.title(str(_rp) + '-year ' + haz_dict[_haz].lower() +
                          ' in ' + _loc,
                          loc='right',
                          color=greys_pal[7],
                          pad=20,
                          fontsize=15)
                plt.annotate(pds_dict[my_PDS],
                             xy=(0.99, 1.02),
                             xycoords='axes fraction',
                             color=greys_pal[6],
                             ha='right',
                             va='bottom',
                             weight='bold',
                             style='italic',
                             fontsize=8,
                             zorder=91,
                             clip_on=False)

                plt.plot([840, 1150], [0, 0],
                         color=greys_pal[2],
                         linewidth=0.90)
                plt.xlim(840, 1150)
                plt.ylim(_y_min, _y_max)
                sns.despine(bottom=True)
                plt.grid(False)

                plt.gcf().savefig(
                    '../output_plots/SL/PMT/pmt_slope_cost_vs_benefit_' +
                    _loc + '_' + _haz + '_' + str(_rp) + '_' + my_PDS + '.pdf',
                    format='pdf',
                    bbox_inches='tight')
                plt.close('all')
This file preprocess the test files and save it as ordered_test.csv
"""
# same structer as preprocessing
import pandas as pd
import numpy as np

df_key = pd.read_csv("../input/key.csv")
df_test = pd.read_csv("../input/test.csv")
df_weather = pd.read_csv("../input/weather.csv")

df_test['date'] = pd.to_datetime(df_test['date'])
df_weather['date'] = pd.to_datetime(df_weather['date'])

temp = pd.merge(df_test, df_key,how='left', on=['store_nbr'])
df_main_test = pd.merge(temp, df_weather, how='left', on=['station_nbr','date'])

df_ordered = df_main_test.sort_values(['store_nbr','item_nbr','date']).reset_index(drop=True)
#df7 = df7.apply(pd.to_numeric, errors='coerce')
df_ordered = df_ordered.convert_objects(convert_numeric=True)
df_ordered['preciptotal'] = df_ordered['preciptotal'].fillna(0)
df_ordered['snowfall'] = df_ordered['snowfall'].fillna(0)
df_ordered = df_ordered.interpolate()


patternRA = 'RA'
patternSN = 'SN'
df_ordered['RA'], df_ordered['SN'] = df_ordered['codesum'].str.contains(patternRA), df_ordered['codesum'].str.contains(patternSN)
df_ordered['Condition'] = (df_ordered['RA'] & (df_ordered['preciptotal']>1.0)) | (df_ordered['SN'] & (df_ordered['preciptotal']>2.0))
df_ordered['WEvent'] = (pd.rolling_mean(df_ordered['Condition'],window=7,center=True) > 0)

df_ordered.to_csv('ordered_test.csv', sep=',')
Пример #34
0
    xzdf = xzdf[end:start]

    #resampling and filling XZ, XY and X dataframes
    fr_xzdf, fr_xydf, fr_xdf = resamp_fill_df(resampind, xzdf, xydf, xdf)
    fr_xzdf[which_node].plot()

    #computing cumulative node displacements
    cs_xzdf = fr_xzdf.cumsum(axis=1)
    cs_xydf = fr_xydf.cumsum(axis=1)
    cs_xdf = fr_xdf.cumsum(axis=1)

    for cur_node_ID in range(num_nodes):
        if cur_node_ID != which_node: continue

        #rolling mean in 3 hour-window and 3 minimum data points
        rm_xzdf = pd.rolling_mean(fr_xzdf, window=length)
        #    rm_xydf=pd.rolling_mean(fr_xydf,window=7)
        #    rm_xdf=pd.rolling_mean(fr_xdf,window=7)

        #linear regression in 3 hour-window and 3 minimum data points
        td_rm_xzdf = rm_xzdf.index.values - rm_xzdf.index.values[0]
        #    td_rm_xydf=rm_xydf.index.values-rm_xydf.index.values[0]
        #    td_rm_xdf=rm_xdf.index.values-rm_xdf.index.values[0]

        tdelta = pd.Series(td_rm_xzdf / np.timedelta64(1, 'D'),
                           index=rm_xzdf.index)
        #    tdelta=pd.Series(td_rm_xydf/np.timedelta64(1,'D'),index=rm_xydf.index)
        #    tdelta=pd.Series(td_rm_xdf/np.timedelta64(1,'D'),index=rm_xdf.index)

        plt.figure()
        lr_xzdf = ols(y=rm_xzdf[which_node],
Пример #35
0
def sma(df_in, periods):
    return pd.rolling_mean(df_in, abs(periods))
def MA(df, n):
    MA = pd.Series(pd.rolling_mean(df['Close'], n), name='MA_' + str(n))
    df = df.join(MA)
    return df
def CCI(df, n):
    PP = (df['High'] + df['Low'] + df['Close']) / 3
    CCI = pd.Series((PP - pd.rolling_mean(PP, n)) / pd.rolling_std(PP, n),
                    name='CCI_' + str(n))
    df = df.join(CCI)
    return df
Пример #38
0
t = rx['timestamp']

#Convert to pandas dataset, indexed by microsecond timestamp
print("Calculating throughput...")
t_pd = pd.to_datetime(t, unit='us')
len_pd = pd.Series(l, index=t_pd)

rs_interval = 100  #msec
rolling_winow = 600  #samples

#Resample length vector, summing in each interval; fill empty intervals with 0
# Interval argument must be 'NL' for Nmsec ('100L' for 100msec)
len_rs = len_pd.resample(('%dL' % rs_interval), how='sum').fillna(value=0)

#Calculate rolling mean of throghput (units of bytes per rs_interval)
xput_roll = pd.rolling_mean(len_rs, rolling_winow)

#Scale to Mb/sec
xput_roll = xput_roll * (1.0e-6 * 8.0 * (1.0 / (rs_interval * 1e-3)))

#----------------------
# Plot results

#X axis in units of minutes
t_p = np.linspace(0, (1.0 / 60) * 1e-6 * (max(t) - min(t)), len(xput_roll))

#enter interactive mode from script, so figures/plots update live
ion()

figure()
clf()
Пример #39
0
    def testPolicy(self, symbol = "IBM", \
        sd=dt.datetime(2009,1,1), \
        ed=dt.datetime(2010,1,1), \
        sv = 10000):

        # here we build a fake set of trades
        # your code should return the same sort of data
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data([symbol], dates)  # automatically adds SPY
        prices = prices_all[[symbol]]
        prices = prices.fillna(method='ffill').fillna(method='bfill')
        trades = prices_all[[symbol,]]  # only portfolio symbols
        trades_SPY = prices_all['SPY']  # only SPY, for comparison later
        trades.values[:,:] = 0 # set them all to nothing
#        trades.values[3,:] = 200 # add a BUY at the 4th date
#        trades.values[5,:] = -200 # add a SELL at the 6th date 
#        trades.values[6,:] = 200 # add a SELL at the 7th date 
#        trades.values[8,:] = -400 # add a BUY at the 9th date
#        if self.verbose: print type(trades) # it better be a DataFrame!
#        if self.verbose: print trades
#        if self.verbose: print prices_all
        
        # start calculating three indicators
        X1_Momentum = prices.values[self.N-1:,:]/prices.values[0:-self.N+1,:] *100
#        X1_Momentum = prices.iloc[N:-1].divide(prices.iloc[0:-N-1])*100

        X2_SMA = pd.rolling_mean(prices,window=self.N)        
#        X3_middle = pd.rolling_mean(prices,window=self.N)
#        X3_std = pd.rolling_std(prices,window=self.N)
#        X3_upper = X3_middle.add(2*X3_std)
#        X3_lower = X3_middle.subtract(2*X3_std)
        
        self.X1_max = max(X1_Momentum)
        self.X1_min = min(X1_Momentum)
        self.X2_max = X2_SMA.max(axis=0).values
        self.X2_min = X2_SMA.min(axis=0).values
#        self.X3_max = X3_upper.max(axis=0).values
#        self.X3_min = X3_upper.min(axis=0).values
        
        position = 0
#        my_action = 0
        testing_day = 0
        while testing_day < trades.shape[0]:
            my_action = 0
            if not np.isnan(X2_SMA.iloc[testing_day].values):
                
                state_0 = int(position/200 + 1)
                state_1 = self.discretize(1, X1_Momentum[testing_day-self.N+1])
                state_2 = self.discretize(2, X2_SMA.iloc[testing_day].values)
#                state_3 = self.discretize(3, X3_upper.iloc[testing_day].values)
                
                state = state_0*self.states_N**2 + state_1*self.states_N + state_2
                action = self.learner.querysetstate(state)
                
                if state_0 == 0: #hold -200
                    new_position = position + action*200
                elif state_0 == 1:
                    new_position = position + (action-1)*200
                elif state_0 == 2:
                    new_position = position + (action-2)*200 
                    
                my_action = new_position - position
                if abs(position)>200:
                    print 'error'
                position = new_position                
                                
            if self.verbose:
                print testing_day, position
            trades.values[testing_day,:] = my_action
            testing_day = testing_day + 1
        
        return trades
Пример #40
0
k = 4

d1 = pd.cut(data, k, labels=range(k))  # 等宽离散化,各个类比依次命名为0,1,2,3

# 等频率离散化
w = [1.0 * i / k for i in range(k + 1)]
w = data.describe(percentiles=w)[4:4 + k + 1]  # 使用describe函数自动计算分位数
w[0] = w[0] * (1 - 1e-10)
d2 = pd.cut(data, w, labels=range(k))

from sklearn.cluster import KMeans  # 引入KMeans

kmodel = KMeans(n_clusters=k, n_jobs=1)  # 建立模型,n_jobs是并行数,一般等于CPU数较好
kmodel.fit(data.reshape((len(data), 1)))  # 训练模型
c = pd.DataFrame(kmodel.cluster_centers_).sort_values(0)  # 输出聚类中心,并且排序(默认是随机序的)
w = pd.rolling_mean(c, 2).iloc[1:]  # 相邻两项求中点,作为边界点
w = [0] + list(w[0]) + [data.max()]  # 把首末边界点加上
d3 = pd.cut(data, w, labels=range(k))


def cluster_plot(d, k):  # 自定义作图函数来显示聚类结果
    import matplotlib.pyplot as plt
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
    plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号

    plt.figure(figsize=(8, 3))
    for j in range(0, k):
        plt.plot(data[d == j], [j for i in d[d == j]], 'o')

    plt.ylim(-0.5, k - 0.5)
    return plt
Пример #41
0

data['time']=data['time'].astype(str).replace('time','1490600000000000000')
data['time']=data['time'].astype(float)
data['time']=pd.to_datetime(data['time'],format=None)


temp=data[fields]
scatter_matrix(temp, alpha=0.2, figsize=(6, 6), diagonal='kde')



humidity = data['Humidity']
moist = data['Moisture']
temp = data['Temperature']
mavg = pd.rolling_mean(moist, 50, center = True)
havg = pd.rolling_mean(humidity, 50, center = True)
tavg = pd.rolling_mean(temp, 50, center = True)
time = data['time']



fig = plt.figure()

ax1 = plt.subplot2grid((20,1), (0,0), rowspan = 7, colspan = 1 )
ax1.plot(time, humidity, color='cyan', linewidth= 2.0, label = "")
ax1.plot(time,havg, '--',color='red', linewidth= 2.0, label = "Rolling Mean")
plt.ylabel('Humidity')
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=2, mode="expand", borderaxespad=0.)
ax1.yaxis.set_major_locator(mticker.MaxNLocator(nbins=5, prune='both'))
Пример #42
0
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000): 

        # add your code to do learning here
        position = 0 # number of share you hold
        
        # X1: Momentum, 10 days delay, M = close(i)/close(i-10)*100
        # X2: Moving average, avg(i) = mean(close(i-10+1):close(i))
        # X3: Bollinger Bands, 10 days mean & 2*std      
               
        # Create three learners, each for one position
        self.learner = ql.QLearner(num_states=3*self.states_N**2,\
            num_actions = 3, \
            alpha = 0.2, \
            gamma = 0.9, \
            rar = random.random(), radr = random.random(), \
            dyna = 0, \
            verbose=False) 
                             
#        self.learner1 = ql.QLearner(num_states=self.states_N**3,\
#            num_actions = 3, \
#            alpha = 0.2, \
#            gamma = 0.9, \
##            rar = 0.5, radr = 0.99, \
#            dyna = 0, \
#            verbose=False) #position 0
#        
#        self.learner2 = ql.QLearner(num_states=self.states_N**3,\
#            num_actions = 3, \
#            alpha = 0.2, \
#            gamma = 0.9, \
##            rar = 0.5, radr = 0.99, \
#            dyna = 0, \
#            verbose=False) #position 200
        
        flag_0 = True #haven't been initiazlied yet
#        flag_1 = True
#        flag_2 = True
        
        # example usage of the old backward compatible util function
        syms=[symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols
        prices = prices.fillna(method='ffill').fillna(method='bfill')
        prices_SPY = prices_all['SPY']  # only SPY, for comparison later
        if self.verbose: print prices
        
        # start calculating three indicators
        X1_Momentum = prices.values[self.N-1:,:]/prices.values[0:-self.N+1,:] *100
#        X1_Momentum = prices.iloc[N:-1].divide(prices.iloc[0:-N-1])*100

        X2_SMA = pd.rolling_mean(prices,window=self.N)        
#        X3_middle = pd.rolling_mean(prices,window=self.N)
#        X3_std = pd.rolling_std(prices,window=self.N)
#        X3_upper = X3_middle.add(2*X3_std)
#        X3_lower = X3_middle.subtract(2*X3_std)
        
        self.X1_max = max(X1_Momentum)
        self.X1_min = min(X1_Momentum)
        self.X2_max = X2_SMA.max(axis=0).values
        self.X2_min = X2_SMA.min(axis=0).values
#        self.X3_max = X3_upper.max(axis=0).values
#        self.X3_min = X3_upper.min(axis=0).values
        
        training_day = 0
        port_value = prices_all[syms]        
        port_value.values[:,:] = 0
        port_value.values[0,:] = sv
        previous_end_port = 0
        cash = sv
        epoch = 0
        repeated = 0
        while (training_day < X2_SMA.shape[0]) & (previous_end_port<2.5*sv): 
            
            
            
            if not np.isnan(X2_SMA.iloc[training_day].values):
                                
                state_0 = int(position/200 + 1)
                state_1 = self.discretize(1, X1_Momentum[training_day-self.N+1])
                state_2 = self.discretize(2, X2_SMA.iloc[training_day].values)
#                state_3 = self.discretize(3, X3_upper.iloc[training_day].values)
                
                state = state_0*self.states_N**2 + state_1*self.states_N + state_2
#                if position == -200:
#                    if flag_0 == True:
#                        flag_0 = False
#                        action = self.learner0.querysetstate(state)
#                        
#                    r = prices.iloc[training_day].values - prices.iloc[training_day-1].values
#                    r = r*position
#                    action = self.learner0.query(state,r)
#                    new_position = position + action*200                    
#                    
#                if position == 0:
#                    if flag_1 == True:
#                        flag_1 = False
#                        action = self.learner1.querysetstate(state)
#                        
#                    r = prices.iloc[training_day].values - prices.iloc[training_day-1].values
#                    r = r*position
#                    action = self.learner1.query(state,r)
#                    new_position = position + (action-1)*200
#                    
#                if position == 200:
#                    if flag_2 == True:
#                        flag_2 = False
#                        action = self.learner2.querysetstate(state)
#                        
#                    r = prices.iloc[training_day].values - prices.iloc[training_day-1].values
#                    r = r*position
#                    action = self.learner2.query(state,r)
#                    new_position = position + (action-2)*200
                if flag_0 == True:
                    flag_0 = False
                    action = self.learner.querysetstate(state)
                
                r = (prices.iloc[training_day].values - prices.iloc[training_day-1].values)*position
#                r = prices.iloc[training_day].values*position + cash - sv
                action = self.learner.query(state,r)
                if state_0 == 0: #hold -200
                    new_position = position + action*200
                elif state_0 == 1:
                    new_position = position + (action-1)*200
                elif state_0 == 2:
                    new_position = position + (action-2)*200                
                               
                cash = cash - (new_position - position)*prices.values[training_day]
                position = new_position
                                
            if self.verbose:
                print training_day, position
            port_value.values[training_day] = cash + position*prices.values[training_day]
            training_day = training_day + 1
            
            if training_day == X2_SMA.shape[0]:
                training_day = 0
#                print epoch, port_value.values[-1]
                if port_value.values[-1] == previous_end_port:
                    repeated = repeated + 1
                    if repeated > 3:
                        if cash + position*prices.values[-1] > 2 * sv:
                            break
                        else:
                            self.learner = ql.QLearner(num_states=3*self.states_N**2,\
                                                       num_actions = 3, \
                                                       alpha = 0.2, \
                                                       gamma = 0.9, \
                                                       rar = random.random(), radr = random.random(), \
                                                       dyna = 0, \
                                                       verbose=False)
                            repeated = 0
                            continue
                else:
                    repeated = 0
                    
                epoch = epoch + 1
                if epoch > 100:
                    break
                current_port = cash + position*prices.values[-1]
                previous_end_port = current_port#port_value.copy.values[-1]
                
                port_value.values[:,:] = 0
                port_value.values[0,:] = sv
                position = 0
                cash = sv
Пример #43
0
    def sort(self, data, stock_list):
        day = max(self.periods) + 5
        # self.platform.log_info('stock_list: \n', stock_list)
        fields = ['close', 'volume']
        filter_stock = []
        stock_list['trend_val'] = 10
        for stock in stock_list['code']:
            df = self.platform.attribute_history(stock, day, unit=self.unit, fields=fields)
            for col in self.periods:
                ma = df[fields].rolling_mean(window=col) if self.platform.is_py3() else pd.rolling_mean(df[fields],
                                                                                                        window=col)
                for f in fields:
                    ma.rename(columns={f: 'MA' + f + str(col)}, inplace=True)
                df = df.join(ma)

            df['date'] = df.index
            df['code'] = stock
            jp_utils.jp_reset_range_index(df)
            df = df.dropna()
            # self.platform.log_info('dataframe: \n', df)
            # 5日均成交量 > 10日均成交量
            temp_flag = []
            for f in fields:
                for index in range(len(self.periods) - 1):
                    field0 = 'MA' + f + str(self.periods[index])
                    field1 = 'MA' + f + str(self.periods[index + 1])
                    flag = df.iloc[-1][field0] > df.iloc[-1][field1]
                    temp_flag.append(flag)
                    self.platform.log_info(field0 + '-' + field1 + ':' + str(flag))
            # self.platform.log_info('temp_flag: ', temp_flag)
            if sum(temp_flag) == len(fields) * (len(self.periods) - 1):
                filter_stock.append(stock)
                self.platform.log_info('code:', self.platform.get_security_info(stock))
        stock_list.loc[stock_list['code'].isin(filter_stock), 'trend_val'] = stock_list['trend_val'] / 2
        new_params = self._params.copy()
        new_params['field'] = 'trend_val'
        return new_params, stock_list
Пример #44
0
#-*- coding:utf-8 -*-
# Peishichao

import numpy as np
import pandas as pd
inputfile = '../data/water_heater.xls'
n = 4

threshold = pd.Timedelta(minutes=5)
data = pd.read_excel(inputfile)

data[u'发生时间'] = pd.to_datetime(data[u'发生时间'], format='%Y%m%d%H%M%S')
data = data[data[u'水流量'] > 0]


def event_num(ts):
    d = data[u'发生时间'].diff() > ts
    return d.sum() + 1


dt = [pd.Timedelta(minutes=i) for i in np.arange(1, 9, 0.25)]
h = pd.DataFrame(dt, columns=[u'阈值'])
h[u'事件数'] = h[u'阈值'].apply(event_num)
h[u'斜率'] = h['事件数'].diff() / 0.25

h[u'斜率指标'] = pd.rolling_mean(h[u'斜率'].abs(), n)
ts = h[u'阈值'][h[u'斜率指标'].idxmin() - n]
if ts > threshold:
    ts = pd.Timedelta(minutes=4)
print(ts)
Пример #45
0
full = set(devices)
n = len(full)
print('full set has %d items' % (n))

#######################################
# # II.Feature engineering
#######################################

#create useful features from the 9 attributes

#I calculate rolling mean (and rolling std for attribute 1)while keeping the window of mean calculation as a variable
#that we could change.

for i in range(1, 10):
    device_failure['attribute' + str(i) + '_s_rolling_mean'] = pd.rolling_mean(
        device_failure['attribute' + str(i)], window=s_window,
        min_periods=5)  #.mean()
    device_failure['attribute' + str(i) + '_l_rolling_mean'] = pd.rolling_mean(
        device_failure['attribute' + str(i)], window=l_window,
        min_periods=30)  #.mean()

    if (i in (1, 6)):
        device_failure['attribute' + str(i) +
                       '_s_rolling_std'] = pd.rolling_std(
                           device_failure['attribute' + str(i)],
                           window=s_window,
                           min_periods=5)  #.std()
        device_failure['attribute' + str(i) +
                       '_l_rolling_std'] = pd.rolling_std(
                           device_failure['attribute' + str(i)],
                           window=l_window,
Пример #46
0
symbols = ["MSFT"]
startday = dt.datetime(2010, 1, 1)
endday = dt.datetime(2010, 12, 31)
timeofday = dt.timedelta(hours=16)
timestamps = du.getNYSEdays(startday, endday, timeofday)

dataobj = da.DataAccess('Yahoo')
voldata = dataobj.get_data(timestamps, symbols, "volume")
adjcloses = dataobj.get_data(timestamps, symbols, "close")
actualclose = dataobj.get_data(timestamps, symbols, "actual_close")

#adjcloses = adjcloses.fillna()
adjcloses = adjcloses.fillna(method='backfill')
adjcloses = adjcloses[symbols]

rolling_means = pandas.rolling_mean(adjcloses, 20, min_periods=20)
rolling_stds = pandas.rolling_std(adjcloses, 20, min_periods=20)
upperband = rolling_means + rolling_stds
lowerband = rolling_means - rolling_stds
Bollinger_val = (adjcloses - rolling_means) / (rolling_stds)

# Plot the prices
plt.clf()
#symtoplot = 'AAPL'
fig = plt.figure()
gs = gridspec.GridSpec(2, 1)
ax1 = fig.add_subplot(gs[0, :])
ax1.plot(adjcloses.index, adjcloses[symbols].values, label=symbols)
ax1.plot(adjcloses.index, rolling_means[symbols].values)
#upper band
ax1.plot(adjcloses.index, upperband[symbols].values)
Пример #47
0
def inspect(AcousticIndexes, BESTLOG):

    for i in np.arange(len(BESTLOG)):

        STARTOFFSET = 2000  # 1000#60000
        STOPOFFSET = 0  # 1000#4000

        QUENCH = BESTLOG.iloc[i]
        FILE = QUENCH.File + '.tdms'
        START = QUENCH.Start
        STOP = QUENCH.Stop
        BestStart = np.array([
            QUENCH.S1, QUENCH.S2, QUENCH.S3, QUENCH.S4, QUENCH.S5, QUENCH.S6,
            QUENCH.S8, QUENCH.S9
        ]) + START
        channels = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S8', 'S9']

        TDDF = getDataFrame(FILE)
        COLS = TDDF.columns[AcousticIndexes]

        for i in np.arange(len(AcousticIndexes)):

            mid = 0
            off = 2000

            AUDIO = TDDF[COLS[i]].iloc[START - STARTOFFSET:STOP + STOPOFFSET]
            MARK = BestStart[i]

            title = FILE + ' Channel: ' + channels[i] + ', start:' + str(MARK)
            ax = AUDIO.plot(c='blue', alpha=0.6)
            plt.plot(MARK,
                     np.mean(AUDIO),
                     marker='x',
                     linewidth=0,
                     markerSize=14,
                     c='r')
            plt.title(title)

            IL = pd.rolling_mean(TDDF[TDDF.columns[2]], 500)
            OL = pd.rolling_mean(TDDF[TDDF.columns[3]], 500)
            VDIF = np.abs(IL - OL)

            VDIF.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(ax=ax,
                                                                  label='VDIF')
            IL.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(ax=ax,
                                                                linewidth=2)
            OL.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(ax=ax,
                                                                linewidth=2)

            print(COLS[i])
            print(title)

            plt.axvline(START, c='g', alpha=0.5, linewidth=4)
            plt.axvline(STOP, c='r', alpha=0.5, linewidth=4)

            mid = MARK
            plt.xlim(mid - off, mid + off)

            plt.legend()
            plt.grid()
            #plt.savefig(FILE + '_channel_' +  channels[i] + '_start_' + str(MARK)+'.jpg')
            #plt.cla()
            #plt.clf()

            plt.show()
Пример #48
0
def feature_engineering(df, complete_dates):

    df = df.groupby( ["Ciclo_Estacion", "day_counter", "ITERATION", "iteration_start", "iteration_end"])["hora"].count()
    df = df.sort_index()
    df = df.reset_index()
    df = df.rename( columns = {"hora": "flow"})

    df_append = pd.DataFrame()
    for station in df.Ciclo_Estacion.unique():
        df_station = df[df.Ciclo_Estacion == station]
        df_merge = complete_dates.merge( df_station, on= ["day_counter", "ITERATION", "iteration_start", "iteration_end"], how ="left" )
        df_merge["Ciclo_Estacion"] = station
        df_merge.loc[pd.isnull(df_merge.flow), "flow"] =0 

        if len(df_append) ==0 :
            df_append = df_merge
        else:
            df_append = df_append.append(df_merge)

    df = df_append

    #ITERATION (15 minutes) LAG VALUES
    df["flow_lag1"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(1)
    df["flow_lag2"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(2)
    df["flow_lag3"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(3)
    df["flow_lag4"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(4)
    df["flow_lag5"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(5)
    df["flow_lag6"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(6)
    df["flow_lag7"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(7)
    df["flow_lag8"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(8)

    df["flow_rollingmean_lag1_4"]  = pd.rolling_mean( df["flow_lag1"], 4)
    df["flow_rollingmean_lag1_8"]  = pd.rolling_mean( df["flow_lag1"], 8)
    df["flow_rollingmean_lag1_12"] = pd.rolling_mean( df["flow_lag1"], 12)
    df["flow_rollingmean_lag1_16"] = pd.rolling_mean( df["flow_lag1"], 16)

    df["flow_rollingmean_lag4_4"]  = pd.rolling_mean( df["flow_lag4"], 4)
    df["flow_rollingmean_lag4_8"]  = pd.rolling_mean( df["flow_lag4"], 8)
    df["flow_rollingmean_lag4_12"] = pd.rolling_mean( df["flow_lag4"], 12)
    df["flow_rollingmean_lag4_16"] = pd.rolling_mean( df["flow_lag4"], 16)

    df["flow_rollingmean_lag8_4"]  = pd.rolling_mean( df["flow_lag8"], 4)
    df["flow_rollingmean_lag8_8"]  = pd.rolling_mean( df["flow_lag8"], 8)
    df["flow_rollingmean_lag8_12"] = pd.rolling_mean( df["flow_lag8"], 12)
    df["flow_rollingmean_lag8_16"] = pd.rolling_mean( df["flow_lag8"], 16)

    df["flow_ewma_lag1_4"]  = pd.ewma( df["flow_lag1"], 4)
    df["flow_ewma_lag1_8"]  = pd.ewma( df["flow_lag1"], 8)
    df["flow_ewma_lag1_12"] = pd.ewma( df["flow_lag1"], 12)    
    df["flow_ewma_lag1_16"] = pd.ewma( df["flow_lag1"], 16)

    df["flow_ewma_lag4_4"]  = pd.ewma( df["flow_lag4"], 4)
    df["flow_ewma_lag4_8"]  = pd.ewma( df["flow_lag4"], 8)
    df["flow_ewma_lag4_12"] = pd.ewma( df["flow_lag4"], 12)    
    df["flow_ewma_lag4_16"] = pd.ewma( df["flow_lag4"], 16)

    df["flow_ewma_lag8_4"]  = pd.ewma( df["flow_lag4"], 4)
    df["flow_ewma_lag8_8"]  = pd.ewma( df["flow_lag8"], 8)
    df["flow_ewma_lag8_12"] = pd.ewma( df["flow_lag8"], 12)    
    df["flow_ewma_lag8_16"] = pd.ewma( df["flow_lag8"], 16)

    #DAYs LAG VALUES
    df["flow_lag1day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(94)
    df["flow_lag2day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(95)
    df["flow_lag3day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(96)
    df["flow_lag4day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(97)
    df["flow_lag5day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(98)
    df["flow_lag6day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(99)
    df["flow_lag7day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(100)
    df["flow_lag8day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(101)

    df["flow_rollingmean_lag1day_4"]  = pd.rolling_mean( df["flow_lag1day"], 4)
    df["flow_rollingmean_lag1day_8"]  = pd.rolling_mean( df["flow_lag1day"], 8)
    df["flow_rollingmean_lag1day_12"] = pd.rolling_mean( df["flow_lag1day"], 12)
    df["flow_rollingmean_lag1day_16"] = pd.rolling_mean( df["flow_lag1day"], 16)


    df["flow_rollingmean_lag4day_4"]  = pd.rolling_mean( df["flow_lag4day"], 4)
    df["flow_rollingmean_lag4day_8"]  = pd.rolling_mean( df["flow_lag4day"], 8)
    df["flow_rollingmean_lag4day_12"] = pd.rolling_mean( df["flow_lag4day"], 12)
    df["flow_rollingmean_lag4day_16"] = pd.rolling_mean( df["flow_lag4day"], 16)


    df["flow_rollingmean_lag8day_4"]  = pd.rolling_mean( df["flow_lag8day"], 4)
    df["flow_rollingmean_lag8day_8"]  = pd.rolling_mean( df["flow_lag8day"], 8)
    df["flow_rollingmean_lag8day_12"] = pd.rolling_mean( df["flow_lag8day"], 12)
    df["flow_rollingmean_lag8day_16"] = pd.rolling_mean( df["flow_lag8day"], 16)

    df["flow_ewma_lag1day_4"]  = pd.ewma( df["flow_lag1day"], 4)
    df["flow_ewma_lag1day_8"]  = pd.ewma( df["flow_lag1day"], 8)
    df["flow_ewma_lag1day_12"] = pd.ewma( df["flow_lag1day"], 12)    
    df["flow_ewma_lag1day_16"] = pd.ewma( df["flow_lag1day"], 16)

    df["flow_ewma_lag4day_4"]  = pd.ewma( df["flow_lag4day"], 4)
    df["flow_ewma_lag4day_8"]  = pd.ewma( df["flow_lag4day"], 8)
    df["flow_ewma_lag4day_12"] = pd.ewma( df["flow_lag4day"], 12)    
    df["flow_ewma_lag4day_16"] = pd.ewma( df["flow_lag4day"], 16)

    df["flow_ewma_lag8day_4"]  = pd.ewma( df["flow_lag8day"], 4)
    df["flow_ewma_lag8day_8"]  = pd.ewma( df["flow_lag8day"], 8)
    df["flow_ewma_lag8day_12"] = pd.ewma( df["flow_lag8day"], 12)    
    df["flow_ewma_lag8day_16"] = pd.ewma( df["flow_lag8day"], 16)

    #WEEK LAG VALUES
    df["month"]     = df.iteration_start.apply(lambda x: x.date().month)
    df["day_month"] = df.iteration_start.apply(lambda x: x.date().day)

    return df
Пример #49
0
def heatmap(col, t_timestamp, t_win='1d'):

    df_merge = pd.DataFrame()
    smin = 0
    smax = 255
    mini = 0
    maxi = 1300

    if (t_win == '1d'):
        for_base = 0
        timew = 24
        interval = '30T'
    elif (t_win == '3d'):
        for_base = 0
        timew = 72
        interval = '120T'
    elif (t_win == '30d'):
        for_base = int(t_timestamp[11] + t_timestamp[12])
        timew = 720
        interval = '24H'
    else:
        print "invalid monitoring window"

    f_timestamp = pd.to_datetime(
        pd.to_datetime(t_timestamp) - timedelta(hours=timew))
    t_timestamp = pd.to_datetime(
        pd.to_datetime(t_timestamp) + timedelta(minutes=30))

    if (len(col) > 4):

        query = "select num_nodes from senslopedb.site_column_props where name = '%s'" % col

        node = qs.GetDBDataFrame(query)
        for node_num in range(1, int(node.num_nodes[0]) + 1):

            df = CSR.getsomscaldata(col,
                                    node_num,
                                    f_timestamp,
                                    t_timestamp,
                                    if_multi=True)

            if (df.empty == False):
                df = df.reset_index()
                df.ts = pd.to_datetime(df.ts)

                df.index = df.ts
                df.drop('ts', axis=1, inplace=True)

                df = df[((df < 1300) == True) & ((df > 0) == True)]
                df['cval'] = df['mval1'].apply(lambda x: (x - mini) * smax /
                                               (maxi) + smin)
                dfrs = pd.rolling_mean(
                    df.resample(interval, base=for_base),
                    window=3,
                    min_periods=1)  #mean for one day (dataframe)

                if 'mval1' in df.columns:
                    dfrs = dfrs.drop('mval1', axis=1)


#				n=len(dfrs)-1
                dfrs = dfrs.reset_index(0)

                #				dfp=dfrs[n-timew:n]
                #				dfp = dfp.reset_index()

                df_merge = pd.concat([df_merge, dfrs], axis=0)
                df_merge['ts'] = df_merge.ts.astype(object).astype(str)

        dfjson = df_merge.to_json(orient='records', double_precision=0)
        print dfjson
    else:
        return 'v1'
Пример #50
0
def inspect_voltage(AcousticIndexes, BESTLOG):

    for i in np.arange(len(BESTLOG)):

        STARTOFFSET = 2000  # 1000#60000
        STOPOFFSET = 0  # 1000#4000

        QUENCH = BESTLOG.iloc[i]
        FILE = QUENCH.File + '.tdms'
        START = QUENCH.Start
        STOP = QUENCH.Stop
        BestStart = np.array([
            QUENCH.S1, QUENCH.S2, QUENCH.S3, QUENCH.S4, QUENCH.S5, QUENCH.S6,
            QUENCH.S8, QUENCH.S9
        ]) + START
        channels = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S8', 'S9']

        TDDF = getDataFrame(FILE)
        COLS = TDDF.columns[AcousticIndexes]

        for i in np.arange(len(AcousticIndexes)):

            mid = 0
            off = 2000

            IL = pd.rolling_mean(TDDF[TDDF.columns[2]], 500)
            OL = pd.rolling_mean(TDDF[TDDF.columns[3]], 500)

            IL = IL - IL.dropna().iloc[:200].mean()
            OL = OL - OL.dropna().iloc[:200].mean()

            VDIF = np.abs(IL - OL)

            ax = VDIF.iloc[START - STARTOFFSET:STOP +
                           STOPOFFSET].plot(label='VDIF')
            IL.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(ax=ax,
                                                                linewidth=2)
            OL.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(ax=ax,
                                                                linewidth=2)

            MARK = BestStart[i]
            AUDIO = TDDF[COLS[i]].iloc[START - STARTOFFSET:STOP + STOPOFFSET]
            title = FILE + ' Channel: ' + channels[i] + ', start:' + str(MARK)

            ENV = MakePreciseEnvelope(AUDIO)
            ENV_DF = pd.DataFrame(ENV, index=AUDIO.index) / 400000
            ENV_DF.plot(ax=ax)

            print(COLS[i])
            print(title)

            plt.axvline(START, c='g', alpha=0.5, linewidth=4)
            plt.axvline(STOP, c='r', alpha=0.5, linewidth=4)

            AUDIO.plot(c='blue', alpha=0.4, ax=ax, secondary_y=True)
            plt.plot(MARK,
                     np.mean(AUDIO),
                     marker='x',
                     linewidth=0,
                     markerSize=14,
                     c='r')
            plt.axvline(MARK, c='r', linewidth=1)
            plt.title(title)

            AFTBUFFER = 500
            FOREBUFFER = 500
            channel_data = TDDF[COLS[i]].iloc[MARK - FOREBUFFER:MARK +
                                              AFTBUFFER]
            channel_val = channel_data.values
            channel_env = MakePreciseEnvelope(channel_val)

            ENV_DF = pd.DataFrame(channel_env,
                                  index=channel_data.index) / 200000
            ENV_DF = ENV_DF - (pd.rolling_mean(ENV_DF, 15).diff()) * 30
            ENV_DF.plot(ax=ax, c='black')
            #
            #            ENV_DF = pd.DataFrame(channel_env,index=channel_data.index)/400000
            #            ENV_DF = pd.rolling_mean(1000*ENV_DF.diff(),30)
            #            ENV_DF.plot(ax=ax,c='purple')
            #
            #            ENV_DF = pd.DataFrame(channel_env,index=channel_data.index)/400000
            #            ENV_DF = 1000*pd.rolling_mean(ENV_DF,30).diff()
            #            ENV_DF.plot(ax=ax,c='orange')

            #            ENV_DF_2 = ((pd.DataFrame(channel_env,index=channel_data.index)/40000).diff().abs())-0.05
            #            ENV_DF_2.plot(ax=ax,c='purple',alpha=0.5)
            #
            #            (ENV_DF-ENV_DF_2).plot(ax=ax,c='orange')

            mid = MARK
            plt.xlim(mid - off, mid + off)

            plt.title(title)
            plt.legend()
            plt.grid()
            #plt.savefig('UBER' + FILE + '_channel_' +  channels[i] + '_start_' + str(MARK)+'.jpg')
            #plt.cla()
            #plt.clf()

            plt.show()
def main():
    """
	This demo is for simulating the strategy
	Variables
	"""
    dt_start = dt.datetime(2013, 1, 1)
    dt_end = dt.datetime(2015, 12, 31)

    sym_list = 'sp5002012.txt'
    market_sym = 'SPY'

    starting_cash = 100000
    bol_period = 20

    print "Setting Up ..."
    # Obtatining data from Yahoo
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))
    dataobj = da.DataAccess('Yahoo')
    ls_symbols = load_symlists(sym_list)
    ls_symbols.append(market_sym)
    """
	key values. Creating a dictionary.
	"""
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))
    """
	fill out N/A values
	"""
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)
    """
	df_close contains only a close column.
	"""
    df_close = d_data['close']
    df_volume = d_data['volume']

    print "Finding Events ..."
    ''' 
	Finding the event dataframe 
	'''
    ts_market = df_close['SPY']

    # Creating an empty dataframe
    df_events = copy.deepcopy(df_close) * 0

    # Time stamps for the event range
    ldt_timestamps = df_close.index

    rolling_mean = pd.rolling_mean(df_close, window=bol_period)
    rolling_std = pd.rolling_std(df_close, window=bol_period)

    rolling_mean_vol = pd.rolling_mean(df_volume, window=bol_period)
    rolling_std_vol = pd.rolling_std(df_volume, window=bol_period)
    '''
	finding_events starts here
	'''

    bol_clo = (df_close - rolling_mean) / rolling_std

    delays = 14

    for s_sym in ls_symbols:
        for i in range(1, len(ldt_timestamps) - delays):
            bol_tod = bol_clo[s_sym].loc[ldt_timestamps[i]]
            bol_yes = bol_clo[s_sym].loc[ldt_timestamps[i - 1]]
            bol_tod_mark = bol_clo["SPY"].loc[ldt_timestamps[i]]

            if (bol_tod <= -3.0 and bol_yes >= -3.0 and bol_tod_mark >= 1.0):
                for delay in range(delays):
                    df_events[s_sym].loc[ldt_timestamps[i + delay]] += (
                        30000.00 / df_close[s_sym].loc[ldt_timestamps[i]])
                    if df_close[s_sym].loc[ldt_timestamps[
                            i +
                            delay]] > df_close[s_sym].loc[ldt_timestamps[i]]:
                        break
            elif (bol_tod >= 2.0 and bol_yes <= 2.0 and bol_tod_mark <= -1.0):
                for delay in range(delays):
                    df_events[s_sym].loc[ldt_timestamps[i + delay]] += (
                        10000.00 / df_close[s_sym].loc[ldt_timestamps[i]])
                    if df_close[s_sym].loc[ldt_timestamps[
                            i +
                            delay]] > df_close[s_sym].loc[ldt_timestamps[i]]:
                        break

    print "Starting Simulation ..."

    # Find symbols that satisfy the event condition.
    ls_symbols_red = []

    for sym in ls_symbols:
        for i in range(len(ldt_timestamps)):
            if df_events[sym].loc[ldt_timestamps[i]] != 0:
                ls_symbols_red.append(sym)
                break
    '''
	value and cash are zero arrays
	'''
    # df_orders = copy.deepcopy(df_events)
    print "ls_symbols_red", ls_symbols_red
    df_orders = df_events[ls_symbols_red]
    value = copy.deepcopy(df_events) * 0
    cash = copy.deepcopy(value[market_sym])
    '''
	Update value
	'''
    print "Updating Value and Cash Array..."
    for s_sym in ls_symbols_red:
        for i in range(len(ldt_timestamps)):
            ind_time = ldt_timestamps[i]
            if i == 0:
                if df_orders[s_sym].loc[ind_time] != 0:
                    sym_value = df_orders[s_sym].loc[ind_time] * df_close[
                        s_sym].loc[ind_time]
                    value[s_sym].loc[ind_time] = sym_value
                    cash[ind_time] -= sym_value
            else:
                ind_time_yest = ldt_timestamps[i - 1]
                if df_orders[s_sym].loc[ind_time] != 0 and df_orders[
                        s_sym].loc[ind_time_yest] == 0:
                    sym_value = df_orders[s_sym].loc[ind_time] * df_close[
                        s_sym].loc[ind_time]
                    value[s_sym].loc[ind_time] = sym_value
                    cash[ind_time] -= sym_value
                elif df_orders[s_sym].loc[ind_time_yest] != 0:
                    sym_value = df_orders[s_sym].loc[ind_time] * df_close[
                        s_sym].loc[ind_time]
                    value[s_sym].loc[ind_time] = sym_value
                    cash[ind_time] -= (df_orders[s_sym].loc[ind_time] -
                                       df_orders[s_sym].loc[ind_time_yest]
                                       ) * df_close[s_sym].loc[ind_time_yest]
    '''
	Update cash
	'''
    cash.to_csv("c:/cash_pre.csv", sep=",", mode="w")
    print "Modifying Cash Array..."
    cash[ldt_timestamps[0]] += starting_cash
    for i in range(1, len(ldt_timestamps)):
        ind_prev = cash[ldt_timestamps[i - 1]]
        ind_curr = cash[ldt_timestamps[i]]
        cash[ldt_timestamps[i]] = ind_curr + ind_prev

    # Save to csv files
    cash.to_csv("c:/cash.csv", sep=",", mode="w")
    value.to_csv("c:/portfolio.csv", sep=",", mode="w")

    print "Updating Total..."
    for i in range(len(ldt_timestamps)):
        sym_sum = 0
        for s_sym in ls_symbols_red:
            sym_sum += value[s_sym].ix[ldt_timestamps[i]]
        cash[ldt_timestamps[i]] += sym_sum

    # Save to csv files
    cash.to_csv("c:/total.csv", sep=",", mode="w")
    ts_market.to_csv("c:/ts_market.csv", sep=",", mode="w")

    # Normalizing dataframes.
    cash /= cash[0]
    ts_market /= ts_market[0]

    print "Summary..."
    tot_ret_fund = cash[-1]
    tot_ret_mark = ts_market[-1]
    '''
	Create new array for fund and market
	'''
    daily_ret_fund = np.zeros((len(ldt_timestamps), 1))
    daily_ret_mark = copy.deepcopy(daily_ret_fund)

    for i in range(1, len(ldt_timestamps)):
        daily_ret_fund[
            i] = cash[ldt_timestamps[i]] / cash[ldt_timestamps[i - 1]] - 1
        daily_ret_mark[i] = ts_market[ldt_timestamps[i]] / ts_market[
            ldt_timestamps[i - 1]] - 1

    vol_fund = np.std(daily_ret_fund)
    vol_mark = np.std(daily_ret_mark)

    avg_ret_fund = np.average(daily_ret_fund)
    avg_ret_mark = np.average(daily_ret_mark)

    sharpe_fund = np.sqrt(252) * avg_ret_fund / vol_fund
    sharpe_mark = np.sqrt(252) * avg_ret_mark / vol_mark

    print "Start Date:", dt_start
    print "End Date  :", dt_end
    print " "
    print "Sharpe Ratio of Fund: ", sharpe_fund
    print "Sharpe Ratio of $SPX: ", sharpe_mark
    print " "
    print "Total Return of Fund: ", tot_ret_fund
    print "Total Return of $SPX: ", tot_ret_mark
    print " "
    print "Standard Deviation of Fund: ", vol_fund
    print "Standard Deviation of $SPX: ", vol_mark
    print " "
    print "Average Daily Return of Fund: ", avg_ret_fund
    print "Average Daily Return of $SPX: ", avg_ret_mark

    # plt.plot(cash.index, cash, 'r', ts_market.index, ts_market, 'b')
    # f, axarr = plt.subplots(3, sharex=True)
    # axarr[0].plot(cash.index, cash, 'r', ts_market.index, ts_market, 'b')
    # axarr[0].set_title('Testing')
    # axarr[1].plot(ts_market.index, df_volume["SPY"], 'b')
    # axarr[2].plot(ts_market.index, rolling_std["SPY"], 'b')
    # plt.show()

    # df_volume_norm = df_volume["SPY"]/df_volume["SPY"][ldt_timestamps[0]]

    f, axarr = plt.subplots(3, sharex=True)
    axarr[0].plot(cash.index, cash, 'r', ts_market.index, ts_market, 'b')
    axarr[0].set_title('Testing')
    axarr[1].plot(ts_market.index, df_volume["SPY"], 'b', ts_market.index,
                  rolling_mean_vol["SPY"] + rolling_std_vol["SPY"], 'b--',
                  ts_market.index,
                  rolling_mean_vol["SPY"] - rolling_std_vol["SPY"], 'b--')
    axarr[2].plot(ts_market.index, rolling_std["SPY"], 'g')
    plt.show()
import pandas as pd
from datetime import datetime
from sklearn import datasets, linear_model
from sklearn.metrics import mean_absolute_error

hist = pd.read_csv('sphist.csv', parse_dates=['Date'])
hist.sort_values('Date', ascending=True, inplace=True)
hist['avg_5_days'] = pd.rolling_mean(hist.Close, window=5).shift(1)
hist['avg_30_days'] = pd.rolling_mean(hist.Close, window=30).shift(1)
hist['avg_365_days'] = pd.rolling_mean(hist.Close, window=365).shift(1)

clean_hist = hist[hist['Date'] > datetime(year=1951, month=1, day=2)].copy()
clean_hist.dropna(axis=0, inplace=True)
train = clean_hist[
    clean_hist['Date'] < datetime(year=2013, month=1, day=1)].copy()
test = clean_hist[
    clean_hist['Date'] >= datetime(year=2013, month=1, day=1)].copy()

features = ['avg_5_days', 'avg_30_days', 'avg_365_days']
lr = linear_model.LinearRegression()
lr.fit(train[features], train['Close'])
predictions = lr.predict(test[features])
test_msa = mean_absolute_error(test['Close'], predictions)
print(test_msa)
Пример #53
0
def main():
    """
    This function is called from the main block. The purpose of this function is to contain all the calls to
    business logic functions
    :return: int - Return 0 or 1, which is used as the exist code, depending on successful or erroneous flow
    """
    # Wrap in a try block so that we catch any exceptions thrown by other functions and return a 1 for graceful exit
    try:
        # ===== Step 0: Sanitation =====
        # Fix Pandas Datareader's Issues with Yahoo Finance (Since yahoo abandoned it's API)
        yahoo_finance_bridge()

        # ===== Step 1: Get the Ticker From user =====
        # Prompt the user to input the data that needs to be downloaded
        stock_ticker = get_ticker_from_user()
        logging.debug('Stock Ticker is: %s' % str(stock_ticker))

        # ===== Step 2: Download the data for the Ticker =====
        # Get the data fetched from Yahoo Finance
        data = get_data_from_yahoo_finance(str(stock_ticker))
        data = pd.DataFrame(data['Open'])
        data = data.sort_index(axis=0, ascending=True)

        # Calculate daily differences
        data['diff'] = data.diff(periods=1)
        ## Calcultate the cumulative returns
        data['cum'] = data['diff'].cumsum()

        #Meanreversion
        # Setting position long = 1 and short = -1 based on previous day move
        delta = 0.005
        # If previous day price difference was less than or equal then delta, we go long
        # If previous day price difference was more than or equal then delta, we go short
        data['position_mr'] = np.where(
            data['diff'].shift(1) <= -delta, 1,
            np.where(data['diff'].shift(1) >= delta, -1, 0))
        data['result_mr'] = (data['diff'] * data['position_mr']).cumsum()

        # We will filter execution of our strategy by only executing if our result are above it's 200 day moving average
        win = 200
        data['ma_mr'] = pd.rolling_mean(data['result_mr'], window=win)
        filtering_mr = data['result_mr'].shift(1) > data['ma_mr'].shift(1)
        data['filteredresult_mr'] = np.where(
            filtering_mr, data['diff'] * data['position_mr'], 0).cumsum()
        # if we do not want to filter we use below line of code
        # df['filteredresult_mr'] = (df['diff'] * df['position_mr']).cumsum()
        data[['ma_mr', 'result_mr', 'filteredresult_mr']].plot(figsize=(10, 8))
        plt.show()
        plt.close()

        # Breakout
        # Setting position long = 1 and short = -1 based on previous day move
        # By setting the delta to negative we are switching the strategy to Breakout
        delta = -0.01
        # If previous day price difference was less than or equal then delta, we go long
        # If previous day price difference was more than or equal then delta, we go short
        data['position_bo'] = np.where(
            data['diff'].shift(1) <= -delta, 1,
            np.where(data['diff'].shift(1) >= delta, -1, 0))
        data['result_bo'] = (data['diff'] * data['position_bo']).cumsum()

        # We will filter execution of our strategy by only executing if our result are above it's 200 day moving average
        win = 200
        data['ma_bo'] = pd.rolling_mean(data['result_bo'], window=win)
        filtering_bo = data['result_bo'].shift(1) > data['ma_bo'].shift(1)
        data['filteredresult_bo'] = np.where(
            filtering_bo, data['diff'] * data['position_bo'], 0).cumsum()
        # df['filteredresult_bo'] = (df['diff'] * df['position_bo']).cumsum()
        data[['ma_bo', 'result_bo', 'filteredresult_bo']].plot(figsize=(10, 8))
        plt.show()
        plt.close()
        # Here we combine the Meanreversion and the Breakout strategy results
        data['combi'] = data['filteredresult_mr'] + data['filteredresult_bo']
        data[['combi', 'filteredresult_mr',
              'filteredresult_bo']].plot(figsize=(10, 8))

        # get 80% data
        data['sel'] = range(int(len(data)))
        eighty_data = data.loc[:data.index[
            data['sel'] == int(0.8 *
                               len(data))].strftime('%Y%m%d').tolist()[0]]
        # Calculate Optimal F for 80% data FOR MOVING AVERAGE
        p_mr = float(len(eighty_data[eighty_data['result_mr'] > 0])) / float(
            len(data))
        plr_mr = eighty_data[eighty_data['result_mr'] > 0]['result_mr'].mean(
        ) / eighty_data[eighty_data['result_mr'] < 0]['result_mr'].mean()
        op_f_mr = p_mr * (plr_mr + 1) - 1 / plr_mr
        print('Optimal F for MR is: %s' % str(op_f_mr))

        # Calculate Optimal F for 80% data FOR BREAKOUT
        p_bo = float(len(eighty_data[eighty_data['result_bo'] > 0])) / float(
            len(data))
        plr_bo = eighty_data[eighty_data['result_bo'] > 0]['result_bo'].mean() / \
                 eighty_data[eighty_data['result_bo'] < 0]['result_bo'].mean()
        op_f_bo = p_bo * (plr_bo + 1) - 1 / plr_bo
        print('Optimal F for BO is: %s' % str(op_f_bo))

        # Calculate KPIs on 20% data
        twenty_data = data.loc[:data.index[
            data['sel'] == int(0.2 *
                               len(data))].strftime('%Y%m%d').tolist()[0]]
        import ffn
        # FOR Moving Average
        df_portfolio_value_mr = twenty_data['result_mr']
        perf = df_portfolio_value_mr.calc_stats()
        perf.plot()
        plt.show()
        plt.close()
        print perf.display()

        # FOR Breakout
        df_portfolio_value_bo = twenty_data['result_bo']
        perf_bo = df_portfolio_value_bo.calc_stats()
        perf_bo.plot()
        plt.show()
        plt.close()
        print perf_bo.display()

    except BaseException, e:
        # Casting a wide net to catch all exceptions
        print('\n%s' % str(e))
        return 1
Пример #54
0
def get_rolling_mean(values, window):
    """Return rolling mean of given values, using specified window size."""
    return pd.rolling_mean(values, window=window)
Пример #55
0
    def organize_data(self):
        position_data, index_data, position_turn_over = self.load_data()

        ####index_data####
        ##calculate moving average to find the trend
        index_data['MA5'] = pd.rolling_mean(index_data['close'], 30)
        index_data['MA10'] = pd.rolling_mean(index_data['close'], 60)

        index_data['trend'] = index_data['MA5'] - index_data['MA10']

        index_data = index_data.sort(['update_date'])
        for col in ['trend', 'MA5', 'MA10', 'position_all']:
            index_data[col] = index_data[col].shift(1)
        index_data['log_open'] = np.log(index_data['open'])
        index_data['return_rate'] = index_data['log_open'].diff()
        index_data = index_data.drop(['log_open'], axis=1)

        def hmm(category):
            def hmm_with_category(day):
                return execute(day, category)

            return hmm_with_category

        exe = hmm(self.category.upper())

        # index_data['trend']=map(exe,index_data['update_date'])

        ####position_data####
        def position_org(position_data):
            self.position_data_org = pd.DataFrame(columns=[
                'company_name', 'position', 'position_chg', 'update_date',
                'contract'
            ])
            temp = position_data[[
                'company_name_2', 'hold_vol_buy', 'hold_vol_buy_chg',
                'update_date', 'contract'
            ]]
            temp = temp.rename(
                columns={
                    'company_name_2': 'company_name',
                    'hold_vol_buy': 'position',
                    'hold_vol_buy_chg': 'position_chg'
                })
            temp['direction_tag'] = temp['position_chg'].apply(lambda x: 10
                                                               if x > 0 else 0)
            temp['tag'] = 'pos'
            self.position_data_org = self.position_data_org.append(temp)
            temp = position_data[[
                'company_name_3', 'hold_vol_sell', 'hold_vol_sell_chg',
                'update_date', 'contract'
            ]]
            temp = temp.rename(
                columns={
                    'company_name_3': 'company_name',
                    'hold_vol_sell': 'position',
                    'hold_vol_sell_chg': 'position_chg'
                })
            temp['position'] = -1 * temp['position']
            temp['position_chg'] = -1 * temp['position_chg']
            temp['direction_tag'] = temp['position_chg'].apply(lambda x: 1
                                                               if x < 0 else 0)
            temp['tag'] = 'neg'
            self.position_data_org = self.position_data_org.append(temp)
            return self.position_data_org

        self.position_data_org = position_org(position_data)

        self.position_data_org_2 = self.position_data_org.groupby(
            ['update_date', 'company_name']).sum()
        try:
            self.position_data_org_2 = self.position_data_org_2.drop(
                ['contract'], axis=1)
            self.position_data_org_1 = self.position_data_org.groupby(
                ['update_date', 'company_name']).contract.count()
            self.position_data_org = pd.concat(
                [self.position_data_org_2, self.position_data_org_1],
                axis=1,
                join='inner')
            self.position_data_org = self.position_data_org.loc[:, [
                'position', 'position_chg', 'direction_tag', 'contract'
            ]]
        except:
            self.position_data_org = self.position_data_org_2
            self.position_data_org['contract'] = 1
        self.position_data_org.reset_index(inplace=True)

        # 取出特定交易商的交易持仓变化记录
        self.position_data_selected = pd.DataFrame(
            columns=self.position_data_org.columns)
        for item in self.brokerName:
            print item
            temp = self.position_data_org[
                self.position_data_org['company_name'] == item]
            if len(temp) != 0:
                self.position_data_selected = self.position_data_selected.append(
                    temp)
            else:
                print 'cannot find %s in data, please check...' % item

        # 将全量日期对上筛选后的数据
        self.position_data_selected = pd.merge(index_data[['update_date']],
                                               self.position_data_selected,
                                               on=['update_date'],
                                               how='outer')
        ##将今天收盘得到的数据设定为明天的决策依据
        self.position_data_lagged = pd.DataFrame()
        for i, j in self.position_data_selected.groupby('company_name'):
            j = j.sort('update_date')
            for col in ['position', 'position_chg', 'direction_tag']:
                j[col] = j[col].shift(1)

                self.position_data_lagged = self.position_data_lagged.append(j)

        ####position_turn_over####
        position_turn_over = position_turn_over.loc[:, [
            'update_date', 'turn_over_rate'
        ]]
        position_turn_over['turn_over_rate'] = position_turn_over[
            'turn_over_rate'].shift(1)
        return index_data, self.position_data_lagged, position_turn_over
Пример #56
0
def predict(df, prediction_start_date, prediction_end_date, predict_tommorrow):
    df.loc[len(df)] = [predict_tommorrow, 0, 0, 0, 0, 0, 0]
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
    df['year'] = pd.DatetimeIndex(df['Date']).year
    df = df.set_index('Date', drop=True)
    df = df.sort_index(axis=0, ascending=True)
    df['avg_close_price_day_5'] = pd.rolling_mean(df['Close'],
                                                  window=5).shift(1)
    df['avg_close_price_day_30'] = pd.rolling_mean(df['Close'],
                                                   window=30).shift(1)
    df['avg_close_price_day_365'] = pd.rolling_mean(df['Close'],
                                                    window=365).shift(1)
    df['ratio_avg_close_price_5_365'] = df['avg_close_price_day_5'] / df[
        'avg_close_price_day_365']
    df['std_close_price_day_5'] = pd.rolling_std(df['Close'],
                                                 window=5).shift(1)
    df['std_close_price_day_365'] = pd.rolling_std(df['Close'],
                                                   window=365).shift(1)
    df['ratio_std_close_price_5_365'] = df['std_close_price_day_5'] / df[
        'std_close_price_day_365']
    df['avg_volume_day_5'] = pd.rolling_mean(df['Volume'], window=5).shift(1)
    df['avg_volume_day_365'] = pd.rolling_mean(df['Volume'],
                                               window=365).shift(1)
    df['ratio_volume_5_365'] = df['avg_volume_day_5'] / df['avg_volume_day_365']
    df['std_avg_volume_5'] = pd.rolling_mean(df['avg_volume_day_5'],
                                             window=5).shift(1)
    df['std_avg_volume_365'] = pd.rolling_mean(df['avg_volume_day_365'],
                                               window=365).shift(1)
    df['ratio_std_avg_volume_5_365'] = df['std_avg_volume_5'] / df[
        'std_avg_volume_365']
    df = df[['Close'] + list(df.columns[6:])]
    df = df.dropna(axis=0)

    predicted_values_regression = []
    predicted_values_random_forest = []

    df_prediction = pd.DataFrame()
    df_prediction['Actual'] = df.ix[prediction_start_date:prediction_end_date][
        'Close']

    regressor = LinearRegression()
    random_forest_regressor = RandomForestRegressor()

    for index in df_prediction.index:
        train = df.ix[df.index[0]:index - timedelta(days=1)]
        test = df.ix[index:index]
        train_predictors = train[list(df.columns[1:])]
        train_to_predict = train['Close']
        regressor.fit(train_predictors, train_to_predict)
        random_forest_regressor.fit(train_predictors, train_to_predict)
        test_predictors = test[list(df.columns[1:])]
        predicted_values_regression.append(
            regressor.predict(test_predictors)[0])
        predicted_values_random_forest.append(
            random_forest_regressor.predict(test_predictors)[0])

    df_prediction['Predicted_regression'] = predicted_values_regression
    df_prediction['Predicted_random_forest'] = predicted_values_random_forest
    mae_regression = sum(
        abs(df_prediction['Actual'] -
            df_prediction['Predicted_regression'])) / len(
                df_prediction['Predicted_regression'])
    mae_random_forest = sum(
        abs(df_prediction['Actual'] -
            df_prediction['Predicted_random_forest'])) / len(
                df_prediction['Predicted_random_forest'])

    tomorrow = df.ix[predict_tommorrow:predict_tommorrow + timedelta(days=1)]
    tomorrow_predictors = tomorrow[list(df.columns[1:])]

    if mae_regression <= mae_random_forest:
        prediction_for_tomorrow = regressor.predict(tomorrow_predictors)[0]
    else:
        prediction_for_tomorrow = random_forest_regressor.predict(
            tomorrow_predictors)[0]

    f = open('predicted_value_for_tommorrow', 'w')
    f.write(
        'The mean absolute error of linear regression model and randome forest model is %s and %s , respectively. Based on the model with smaller mae, predicted value for tomorrow is %s .'
        % (mae_regression, mae_random_forest, prediction_for_tomorrow))
    f.close()
    return df_prediction
Пример #57
0
print('生成日期范围:\n', pd.date_range('2012/1/4', '2012/4/6', freq='BM'))
print('生成日期范围:\n', pd.date_range('2012/3/4', '2012/4/6', freq='W-FRI'))

ts = pd.Series(np.random.randn(4),
               index=pd.date_range('1/1/2000', periods=4, freq='M'))
print('时间位移:', ts.shift(2))
print('时间位移:', ts.shift(-2))
print('时间位移:', ts.shift(1, freq='3D'))

s = pd.Series(df.trade_vol.values, index=df.time)
s2 = pd.Series(df.trade_pr.values, index=df.time)
ticks = pd.DataFrame(
    {
        'open': df.trade_pr.values,
        'high': df.s1pr.values,
        'low': df.b1pr.values,
        'close': df.trade_pr.values
    },
    index=df.time)
ms = s.resample('5min', how=sum)
print('降采样:', ms[:5])
mt = s2.resample('1min', how='ohlc', fill_method="ffill")
print('open high low close降采样:', mt[:10])

s15t = mt.resample('15s', fill_method='ffill')
print('升采样:', s15t[:10])

mean_s = pd.rolling_mean(s, 5, min_periods=1)
print('求移动均值:', mean_s[:10])
ema_s = pd.ewma(s, 60)
print('求指数移动均值:', mean_s[:10])
Пример #58
0
 def moving_average(self, values):
     ma = pd.rolling_mean(self.df[values], 100)
     return ma
Пример #59
0
df = pd.io.excel.read_excel(
    "C:\Users\PAULINKENBRANDT\Downloads\E5382-MonitoringData (1)\North_Side_Weirs.xlsx",
    "Main",
    index_col=0)
wld = df.resample('60Min')

wld['NB_wl_std'] = pd.stats.moments.rolling_std('NB_ft_water', 24)
wld['NB_wl_avg'] = pd.stats.moments.rolling_mean('NB_ft_water', 24)

wld['NB_ft_water'].plot(style='k--')
wld['NB_wl_avg'].plot(style='k')

wldata = wld.ix[1050:]

wldata['rollmean'] = pd.rolling_mean(wldata['wlelev_m'], 30)
wldata['wlnorm'] = (wldata['wlelev_m'] - wldata['wlelev_m'].mean()) / (
    wldata['wlelev_m'].max() - wldata['wlelev_m'].min())
wldata['bpnorm'] = (wldata['bp_mH2O'] - wldata['bp_mH2O'].mean()) / (
    wldata['bp_mH2O'].max() - wldata['bp_mH2O'].min())
wldata['tempnorm'] = (wldata['temp'] - wldata['temp'].mean()) / (
    wldata['temp'].max() - wldata['temp'].min())
wldata['condnorm'] = (wldata['cond'] - wldata['cond'].mean()) / (
    wldata['cond'].max() - wldata['cond'].min())
wldata['dwl'] = wldata['wlelev_m'].diff()
wldata['dbp'] = wldata['bp_mH2O'].diff()


########################################################   date conversion
#function to convert date into julian date
def jday(Y, M, D, h, m, s):
Пример #60
0
    print 'Results of Dickey-Fuller Test:'
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4],
                         index=[
                             'Test Statistic', 'p-value', '#Lags Used',
                             'Number of Observations Used'
                         ])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print dfoutput


ts_log = np.log(ts)
plt.plot(ts_log)

moving_avg = pd.rolling_mean(ts_log, 12)
plt.plot(ts_log)
plt.plot(moving_avg, color='red')

ts_log_moving_avg_diff = ts_log - moving_avg
ts_log_moving_avg_diff.head(12)

ts_log_moving_avg_diff.dropna(inplace=True)
test_stationarity(ts_log_moving_avg_diff)

expwighted_avg = pd.ewma(ts_log, halflife=12)
plt.plot(ts_log)
plt.plot(expwighted_avg, color='red')

ts_log_ewma_diff = ts_log - expwighted_avg
test_stationarity(ts_log_ewma_diff)