def BBands(Symbols, d_data): #df_close = d_data['close'] df_close = d_data['close'] # Creating an empty dataframe temp_data_set = copy.deepcopy(df_close) for symbol in Symbols: temp_data_set[symbol+'20d_ma'] = pd.rolling_mean(temp_data_set[symbol], window=20) temp_data_set[symbol+'50d_ma'] = pd.rolling_mean(temp_data_set[symbol], window=50) temp_data_set[symbol+'Bol_upper'] = pd.rolling_mean(temp_data_set[symbol], window=20) + 2 * pd.rolling_std(temp_data_set[symbol], 20, min_periods=20) temp_data_set[symbol+'Bol_lower'] = pd.rolling_mean(temp_data_set[symbol], window=20) - 2 * pd.rolling_std(temp_data_set[symbol], 20, min_periods=20) #bolinger Widths #temp_data_set[symbol+'Bol_BW'] = ((temp_data_set[symbol+'Bol_upper'] - temp_data_set[symbol+'Bol_lower']) / temp_data_set[symbol+'20d_ma']) * 100 #temp_data_set[symbol+'Bol_BW_200MA'] = pd.rolling_mean(temp_data_set[symbol+'Bol_BW'], window=50) # cant get the 200 daa #temp_data_set[symbol+'Bol_BW_200MA'] = temp_data_set[symbol+'Bol_BW_200MA'].fillna(method='backfill') ##?? ,may not be good #'To convert present value of Bollinger bands into -1 to 1:' temp_data_set[symbol+'BB_norm']= 2 * (temp_data_set[symbol]-temp_data_set[symbol+'Bol_lower'])/ (temp_data_set[symbol+'Bol_upper']-temp_data_set[symbol+'Bol_lower'])-1 #boll_val = 2 * ((current_price - lower_band) / (upper_band - lower_band)) - 1 #temp_data_set.plot(x=temp_data_set.index, y=[symbol, symbol + '20d_ma', symbol + 'Bol_upper', symbol + 'Bol_lower']) temp_data_set.plot(x=temp_data_set.index, y=[symbol+'BB_norm']) plt.show() temp_data_set.to_csv('/Users/jcovino/Desktop/dataSet.csv')
def collection_freq(breath_df, win): print(breath_df.columns) for ds_type in ['ds', 'pl', 'pvt', 'ie']: breath_df['{0}_rolling'.format(ds_type)] = pd.rolling_sum(breath_df['analysis.' + ds_type], window = 60 * win, center = True, min_periods = 1) breath_df[ds_type + '_tot_rolling'] = pd.rolling_count(breath_df['analysis.' + ds_type], window = 60 * win, center = True) breath_df[ds_type + '_freq'] = breath_df[ds_type + '_rolling'] / breath_df[ds_type + '_tot_rolling'] # add rolling average for Fio2, PEEP, p_mean try: breath_df['peep_rolling'] = pd.rolling_mean(breath_df['vent_settings.PEEP'], window = 60 * win, center = True, min_periods = 1) except KeyError: pass try: breath_df['p_mean_rolling'] = pd.rolling_mean(breath_df['vent_settings.p_mean'], window = 60 * win, center = True, min_periods = 1) except KeyError: pass try: breath_df['fio2_rolling'] = pd.rolling_mean(breath_df['vent_settings.FiO2'], window = 60 * win, center = True, min_periods = 1) except KeyError: pass return breath_df
def getFeatures(self): #print 1 self.features = pd.DataFrame(self.acc2.copy()) self.features.columns = ['acc2'] self.features['acc'] = np.asarray(pd.rolling_mean(self.acc.val,window=2).ix[1:]) #self.features['cacc'] = np.asarray(pd.rolling_mean(self.cacc.val,window=2).ix[1:]) self.features['v'] = np.asarray(pd.rolling_mean(self.speed.val,window=3).ix[2:])
def rsi(prices, params={"window": 14}): """ Calculate the RSI indicator. Parameters ---------- prices: DataFrame params: dict Returns ---------- rsi_val: DataFrame """ window = params["window"] close = prices["Close"] delta = close - close.shift(1) # the difference between rows gain = delta.copy() lose = delta.copy() gain[gain < 0] = 0 lose[lose > 0] = 0 rs = pd.rolling_mean(gain, window) / abs(pd.rolling_mean(lose, window)) rsi_val = 100 - 100 / (1 + rs) return pd.DataFrame(rsi_val.values, index=prices.index, columns=["RSI"])
def getBackgroundKnowledge(df, periods): logging.info('Background knowledge: retrieving...') # HLC hlc = df.apply(lambda x: (x['high'] + x['low'] + x['close']) / 3, axis=1) for x in periods: avg_x = pd.rolling_mean(hlc, x) avg_x_yesterday = avg_x.shift(+1) df['ma_{0}_bullish'.format(x)] = avg_x >= avg_x_yesterday avg_x_delta = abs(avg_x - avg_x_yesterday) avg_x_delta_yesterday = avg_x_delta.shift(+1) df['ma_{0}_divergence'.format(x)] = avg_x_delta >= avg_x_delta_yesterday df['ma_{0}_magnitude'.format(x)] = avg_x_delta > avg_x_delta.mean() for x in periods: for y in periods: if y <= x: continue logging.info('MA for {0} and {1}'.format(x, y)) avg_x = pd.rolling_mean(hlc, x) avg_y = pd.rolling_mean(hlc, y) df['ma_{0}_crossover_{1}_bullish'.format(x, y)] = avg_x >= avg_y ma_diff = avg_x - avg_y avg_x_yesterday = avg_x.shift(+1) avg_y_yesterday = avg_y.shift(+1) ma_diff_yesterday = avg_x_yesterday - avg_y_yesterday df['ma_{0}_crossover_{1}_divergence'.format(x, y)] = ma_diff >= ma_diff_yesterday df['ma_{0}_crossover_{1}_magnitude'.format(x, y)] = ma_diff >= ma_diff.mean() logging.info('Background knowledge: retrieved') return df
def calculate_ma(self, config, *args, **kwargs): """ Calculate the moving average on the three periods choosen by the user :param config: configuration saved """ # get period of moving average ma1_period = config["ma1_curve"]["period"] ma2_period = config["ma2_curve"]["period"] ma3_period = config["ma3_curve"]["period"] what_to_plot = config["ticks_curve"]["data"] lenght = config["ticks_curve"]["lenght"] # actual curve lenght # calculate moing average self.ticks["ma1_curve"] = pd.rolling_mean(self.ticks[what_to_plot], ma1_period) self.ticks["ma2_curve"] = pd.rolling_mean(self.ticks[what_to_plot], ma2_period) self.ticks["ma3_curve"] = pd.rolling_mean(self.ticks[what_to_plot], ma3_period) # get only data within the chart lenght ma1_data = self.ticks["ma1_curve"].values[-lenght:] ma2_data = self.ticks["ma2_curve"].values[-lenght:] ma3_data = self.ticks["ma3_curve"].values[-lenght:] indexes = self.ticks.index.values[-lenght:] # dict containing ma data. dict_ma = {"ma1_curve": (indexes, ma1_data), "ma2_curve": (indexes, ma2_data), "ma3_curve": (indexes, ma3_data) } return(dict_ma)
def test_multiple_talib_with_args(self): zipline_transforms = [ta.MA(timeperiod=10), ta.MA(timeperiod=25)] talib_fn = talib.abstract.MA algo = TALIBAlgorithm(talib=zipline_transforms) algo.run(self.source) # Test if computed values match those computed by pandas rolling mean. sid = 0 talib_values = np.array([x[sid] for x in algo.talib_results[zipline_transforms[0]]]) np.testing.assert_array_equal(talib_values, pd.rolling_mean(self.panel[0]['price'], 10).values) talib_values = np.array([x[sid] for x in algo.talib_results[zipline_transforms[1]]]) np.testing.assert_array_equal(talib_values, pd.rolling_mean(self.panel[0]['price'], 25).values) for t in zipline_transforms: talib_result = np.array(algo.talib_results[t][-1]) talib_data = dict() data = t.window # TODO: Figure out if we are clobbering the tests by this # protection against empty windows if not data: continue for key in ['open', 'high', 'low', 'volume']: if key in data: talib_data[key] = data[key][0].values talib_data['close'] = data['price'][0].values expected_result = talib_fn(talib_data, **t.call_kwargs)[-1] np.testing.assert_allclose(talib_result, expected_result)
def plot(name): name = str(name) data_ext = YFHistDataExtr() data_ext.set_interval_to_retrieve(200) data_ext.set_multiple_stock_list([name]) data_ext.get_hist_data_of_all_target_stocks() # convert the date column to date object data_ext.all_stock_df['Date'] = pandas.to_datetime( data_ext.all_stock_df['Date']) temp_data_set = data_ext.all_stock_df.sort('Date', ascending=True) temp_data_set['20d_ma'] = pandas.rolling_mean(temp_data_set['Adj Close'], window=20) temp_data_set['50d_ma'] = pandas.rolling_mean(temp_data_set['Adj Close'], window=50) temp_data_set['Bol_upper'] = pandas.rolling_mean(temp_data_set['Adj Close'], window=20) + 2* pandas.rolling_std(temp_data_set['Adj Close'], 20, min_periods=20) temp_data_set['Bol_lower'] = pandas.rolling_mean(temp_data_set['Adj Close'], window=20) - 2* pandas.rolling_std(temp_data_set['Adj Close'], 20, min_periods=20) temp_data_set['Bol_BW'] = ((temp_data_set['Bol_upper'] - temp_data_set['Bol_lower'])/temp_data_set['20d_ma'])*100 temp_data_set['Bol_BW_200MA'] = pandas.rolling_mean(temp_data_set['Bol_BW'], window=50) temp_data_set['Bol_BW_200MA'] = temp_data_set['Bol_BW_200MA'].fillna(method='backfill') temp_data_set['20d_exma'] = pandas.ewma(temp_data_set['Adj Close'], span=20) temp_data_set['50d_exma'] = pandas.ewma(temp_data_set['Adj Close'], span=50) data_ext.all_stock_df = temp_data_set.sort('Date', ascending = False) data_ext.all_stock_df.plot(x='Date', y=['Adj Close', '20d_ma', '50d_ma', 'Bol_upper', 'Bol_lower']) cur_path = os.path.dirname(os.path.abspath(__file__)) #cur_path = os.path.join(cur_path, "app/analysis") data_path = os.path.join(cur_path, "raw_stock_data") print cur_path img_path = os.path.join(cur_path, "static/img") history_img_path = os.path.join(img_path, "history.png") dividend_img_path = os.path.join(img_path, "dividend.png") plt.savefig(history_img_path) data_ext.all_stock_df.plot(x='Date', y=['Bol_BW','Bol_BW_200MA']) plt.savefig(dividend_img_path) return render_template('analysis.html')
def rollingMean(ms): pm = pd.DataFrame(ms.reshape(-1, 9)) nms = pd.rolling_mean(pm, 3, min_periods=1, center=True).values.reshape(-1, 3, 3) mtxRs = np.array(map(lambda x: cv2.RQDecomp3x3(x)[1], ms)) mtxQs = np.array(map(lambda x: cv2.RQDecomp3x3(x)[2], ms)) nms = [] n, _, _ = mtxRs.shape params = [] for i in range(n): r = mtxRs[i] q = mtxQs[i] theta = -np.arcsin(q[0, 1]) dx = r[0, 2] dy = r[1, 2] focus = r[1, 1] shear = r[0, 1] ratio = r[0, 0] / focus params.append([focus, shear, dx, theta, ratio, dy, q[2, 0], q[2, 1]]) df = pd.DataFrame(params, columns=["focus", "shear", "dx", "theta", "ratio", "dy", "p1", "p2"]) nmsdf = pd.rolling_mean(df, 10, min_periods=1, center=True).values nms = [] for focus, shear, dx, theta, ratio, dy, p1, p2 in df.values: r = np.array([[ratio * focus, shear, dx], [0, focus, dy], [0, 0, 1]]) q = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [p1, p2, 1]]) nms.append(np.dot(r, q)) df.plot(subplots=True, layout=(3, 3)) # nms.plot(subplots=True, layout=(3, 3)) plt.show() return nms
def TimeSeries(df,stock_name,VARS,signal=None,longIN=None,longOUT=None,shortIN=None,shortOUT=None, suptitle ='',fig_fn=None,date1=None,date2=None,mean_per=1,ymin=None,ymax=None, includeTrades=False,includeTradesEarnings=False,includeSignals=False): #for most stacked graphs enforce ymin=ymax=None as different scales apply """Multiple Windows, shared X-axis. perfect for time series. This function allows to graphs as many variables (in vector VARS) on the shared X-axis but in separate windows, hence allowing to juxtaposition different varibles which cannot be compared well on the same Y-axis. the graphs have rolling mean plotted on top of each statistic. """ fig,axes = plt.subplots(nrows=len(VARS),ncols=1,sharex=True,sharey=False) fig.subplots_adjust(hspace=0.15) #fig.set_size_inches(25.5,20.5) suptitle = form_suptitle(suptitle,date1,date2) fig.suptitle(suptitle,ha='center', va='center',fontsize=10,color="#FF3300") for f in range(len(VARS)): if len(VARS) == 1: ax = axes else: ax = axes[f] t_title = VARS[f] df[t_title].plot(kind='line',color=COLORS[f],ax=ax,alpha=0.9,label=t_title) if mean_per > 1 and t_title.find("Close") < 0: pd.rolling_mean(df[t_title],mean_per).plot(ax=ax,color=COLORS[f],linewidth=2,label=str(mean_per)+"D rolling mean") #color="#52A3CC",alpha=0.70,linewidth=1.9,style="k--" if ("RelRet" in t_title) or ("RawRet" in t_title): add_range_lines(ax,df,t_title) if includeTradesEarnings: plot_TradesEarnings(df,ax) if includeSignals and signal!=None: plotSignals(df,signal,ax,shortIN=shortIN,longIN=longIN,shortOUT=shortOUT,longOUT=longOUT) if includeTrades: plotTrades(df,ax) if f == len(VARS)-1: format_plot(ax,"Date",title=t_title,use_legends=False) else: format_plot(ax,"",title=t_title,use_legends=False) if ymin != None: ax.axes.set_ylim(ymin,ymax) if t_title.lower() in ["oorelret","ccrelret"]: ax.axes.set_ylim(-0.06,0.06) if t_title.lower() in ["oorelret(3d avg)","ccrelret(3d avg)"]: ax.axes.set_ylim(-0.03,0.03) if fig_fn != None: plt.savefig(fig_fn) else: plt.show()
def handle_data(account): # 每个交易日的买入卖出指令 hist = account.get_attribute_history('closePrice',window_long) fund = universe_tuple[0] today = account.current_date preday100 = today + timedelta(days = -100) yestoday = today + timedelta(days = -100) #yestoday 使用today会使用未来数据;更改这个后,maIndexShort.values[-1]可以使用; cIndex = DataAPI.MktIdxdGet(ticker='399006',beginDate=preday100,endDate=yestoday,field=["tradeDate","closeIndex"],pandas="1") maIndexShort = np.round(pd.rolling_mean(cIndex['closeIndex'],window=window_short),2) maIndexLong = np.round(pd.rolling_mean(cIndex['closeIndex'],window=window_long),2) #maIndexShort.values[-1] 就会使用未来的数据 (不再有效) if maIndexShort.values[-1]>= maIndexLong.values[-1]: if account.position.secpos.get(fund, 0) == 0: # *1.02 为了防跳空高开,买不到那么多的头寸 approximationAmount = int(account.cash/(hist[universe_tuple[0]][-1]*1.02)/100.0) * 100 order(universe_tuple[0],approximationAmount) elif maIndexShort.values[-1] < maIndexLong.values[-1]: if account.position.secpos.get(fund, 0) > 0: order_to(universe_tuple[0],0) else : if isnan(maIndexShort.values[-1]) or isnan(maIndexLong.values[-1]) : print 'Warning : MA is NaN.' pass
def generate_signals_MA(self): """Returns the DataFrame of symbols containing the signals to go long, short or hold (1, -1 or 0).""" signals = pd.DataFrame(index=self.bars.index) signals['signal'] = 0.0 signals['tradesignal'] = 0.0 signals['Longshortstatues'] = 0.0 signals[self.pair[0]] = 0.0 signals[self.pair[1]] = 0.0 #create signal AtoB = self.generate_AtoB() short_window = 10 long_window = 30 signals['short_mavg'] = pd.rolling_mean( AtoB['A/B'], short_window, min_periods=1) signals['long_mavg'] = pd.rolling_mean( AtoB['A/B'], long_window, min_periods=1) signals['signal'][short_window:] = np.where(signals['short_mavg'][short_window:] > signals['long_mavg'][short_window:], 1.0, 0.0) # Take the difference of the signals in order to generate actual trading orders signals['tradesignal'] = signals['signal'].diff() # generate signal for stock,this is trading signal not position signals[self.pair[0]] = signals['tradesignal'] signals[self.pair[1]] = -signals['tradesignal'] * AtoB['MA'] #the last one is not good, deal it when generat position set last one to zero return signals.loc[:,self.pair]
def test_dollar_volume(self): results = self.engine.run_pipeline( Pipeline( columns={ "dv1": AverageDollarVolume(window_length=1), "dv5": AverageDollarVolume(window_length=5), "dv1_nan": AverageDollarVolume( window_length=1, inputs=[USEquityPricing.open, USEquityPricing.volume] ), "dv5_nan": AverageDollarVolume( window_length=5, inputs=[USEquityPricing.open, USEquityPricing.volume] ), } ), self.dates[5], self.dates[-1], ) expected_1 = (self.raw_data[5:] ** 2) * 2 assert_frame_equal(results["dv1"].unstack(), expected_1) expected_5 = rolling_mean((self.raw_data ** 2) * 2, window=5)[5:] assert_frame_equal(results["dv5"].unstack(), expected_5) # The following two use USEquityPricing.open and .volume as inputs. # The former uses self.raw_data_with_nans, and the latter uses # .raw_data * 2. Thus we multiply instead of squaring as above. expected_1_nan = (self.raw_data_with_nans[5:] * self.raw_data[5:] * 2).fillna(0) assert_frame_equal(results["dv1_nan"].unstack(), expected_1_nan) expected_5_nan = rolling_mean((self.raw_data_with_nans * self.raw_data * 2).fillna(0), window=5)[5:] assert_frame_equal(results["dv5_nan"].unstack(), expected_5_nan)
def test_run_0104_08(): # Read Data dates = pd.date_range('2012-01-01', '2012-12-31') symbols = ['SPY'] df = get_data(symbols, dates) # Plot SPY data, retain matplotlib axis object ax = df['SPY'].plot(title = 'SPY rolling mean', label = 'SPY') # Compute rolling mean using a 20-day window rm_SPY1 = pd.rolling_mean(df['SPY'], window = 20) rm_SPY2 = pd.rolling_mean(df['SPY'], window = 40) rm_SPY3 = pd.rolling_mean(df['SPY'], window = 60) #rm_SPY = pd.rolling_mean(df['SPY'], window = 40) #rm_SPY = pd.rolling_mean(df['SPY'], window = 60) # Add rolling mean to same plot rm_SPY1.plot(label='Rolling Mean', ax = ax) rm_SPY2.plot(label='Rolling Mean', ax = ax) rm_SPY3.plot(label='Rolling Mean', ax = ax) # Add axis labels and legend ax.set_xlabel("Date") ax.set_ylabel("Price") ax.legend(loc = 'upper left') plt.show
def find_events_using_bollingerBandIndicator(ls_symbols, d_data): '''Find event using bollinger band''' df_close = d_data['close'] ts_market = df_close['SPY'] event_count = 0 df_events = copy.deepcopy(df_close) df_events = df_events * np.NAN ldt_timestamps = df_close.index spyPrice = df_close['SPY'] spyMean = pd.rolling_mean(spyPrice, 20) spyStd = pd.rolling_std(spyPrice, 20) spyBollinger = (spyPrice-spyMean)/spyStd for s_sym in ls_symbols: symprice = df_close[s_sym] mean = pd.rolling_mean(symprice, 20) std = pd.rolling_std(symprice, 20) bollingerVals = (symprice-mean)/std for i in range(1, len(ldt_timestamps)): if(bollingerVals.ix[ldt_timestamps[i]] <= -2.0 and bollingerVals.ix[ldt_timestamps[i-1]] >= -2.0 and spyBollinger.ix[ldt_timestamps[i]] >= 1.5): df_events[s_sym].ix[ldt_timestamps[i]] = 1 event_count += 1 print ("Total event number is %s."%(event_count)) return df_events
def KELCH(df, n,ksgn='close'): ''' def KELCH(df, n): #肯特纳通道(Keltner Channel,KC) 肯特纳通道(KC)是一个移动平均通道,由叁条线组合而成(上通道、中通道及下通道)。 KC通道,一般情况下是以上通道线及下通道线的分界作为买卖的最大可能性。 若股价於边界出现不沉常的波动,即表示买卖机会。 【输入】 df, pd.dataframe格式数据源 n,时间长度 ksgn,列名,一般是:close收盘价 【输出】 df, pd.dataframe格式数据源, 增加了3栏:kc_m,中间数据 kc_u,up上轨道数据 kc_d,down下轨道数据 ''' xnam='kc_m' xnam2='kc_u' xnam3='kc_d' KelChM = pd.Series(pd.rolling_mean((df['high'] + df['low'] + df[ksgn]) / 3, n), name = xnam) #'KelChM_' + str(n) KelChU = pd.Series(pd.rolling_mean((4 * df['high'] - 2 * df['low'] + df[ksgn]) / 3, n), name = xnam2) #'KelChU_' + str(n) KelChD = pd.Series(pd.rolling_mean((-2 * df['high'] + 4 * df['low'] + df[ksgn]) / 3, n), name =xnam3) #'KelChD_' + str(n) df = df.join(KelChM) df = df.join(KelChU) df = df.join(KelChD) return df
def getBuySignals( self, measurement, colName ): """na wejsciu data frame o zadanych przez InputSettings parametrach, na wyjsciu 0,1,-1 kiedy kupowac z kierunkiem - nie wiem czy bedziemy mieli takie algorytmy co beda w wyniku dawac sygnaly -1,1? """ #print (self.df) self.df = self.df.append(Series(measurement[colName],index = ['a'])) #print measurement.name #sys.exit(1) if self.df.shape[0] == self.bigAvg: curBig = rolling_mean(self.df, self.bigAvg) curSmall = rolling_mean(self.df[(self.bigAvg-self.smallAvg):], self.smallAvg) # print "1==========================" # print curBig[-1] # print curSmall[-1] if curBig[-1] < curSmall[-1]: self.df = self.df[1:] #return [self.getReturn(1), measurement.name] return self.getReturn(1) else: self.df = self.df[1:] #return [self.getReturn(-1), measurement.name] return self.getReturn(-1) else: return 0
def convert_test_runs_list_to_time_series_dict(test_runs_list, resample): test_runs = [] for test_run in test_runs_list: tr = test_run.to_dict() # Populate dict start_time = test_run.start_time if start_time and test_run.start_time_microsecond: start_time = start_time.replace( microsecond=test_run.start_time_microsecond) tr['start_time'] = start_time tr.pop('start_time_microsecond') if test_run.stop_time: stop_time = test_run.stop_time if test_run.stop_time_microsecond: stop_time = stop_time.replace( microsecond=test_run.stop_time_microsecond) tr['stop_time'] = stop_time tr['run_time'] = read_subunit.get_duration(start_time, tr.pop('stop_time')) tr.pop('stop_time_microsecond') tr.pop('id') tr.pop('test_id') test_runs.append(tr) df = pd.DataFrame(test_runs).set_index('start_time') df.index = pd.DatetimeIndex(df.index) # Add rolling mean and std dev of run_time to datafram df['avg_run_time'] = pd.rolling_mean(df['run_time'], 20) df['stddev_run_time'] = pd.rolling_std(df['run_time'], 20) # Resample numeric data for the run_time graph from successful runs numeric_df = df[df['status'] == 'success'].resample( base.resample_matrix[resample], how='mean') # Drop duplicate or invalid colums del(numeric_df['run_id']) del(df['run_time']) # Interpolate missing data numeric_df['run_time'] = numeric_df.interpolate(method='time', limit=20) # Add rolling mean and std dev of run_time to datafram numeric_df['avg_run_time'] = pd.rolling_mean(numeric_df['run_time'], 20) numeric_df['stddev_run_time'] = pd.rolling_std(numeric_df['run_time'], 20) # Convert the dataframes to a dict numeric_dict = dict( (date.isoformat(), { 'run_time': run_time, 'avg_run_time': avg, 'std_dev_run_time': stddev, }) for date, run_time, avg, stddev in zip( numeric_df.index, numeric_df.run_time, numeric_df.avg_run_time, numeric_df.stddev_run_time)) temp_dict = dict( (date.isoformat(), { 'run_id': run_id, 'status': status, }) for date, run_id, status in zip(df.index, df.run_id, df.status)) return {'numeric': numeric_dict, 'data': temp_dict}
def filter_by_MA(self): """ Remove abrupt changes using a Moving Average filter """ self.data['ma_entry'] = pd.rolling_mean(self.data.ENTRIES, window=3, min_periods=3) self.data['ma_exit'] = pd.rolling_mean(self.data.EXITS, window=3, min_periods=3) # Winsorize at 10 std_dev cap_entry = 10 * self.data.ENTRIES.std() cap_exit = 10 * self.data.EXITS.std() self.logger.debug("The 10 * std.dev is :{:.2f} , {:.2f} for entries and exits.".format(cap_entry, cap_exit)) if cap_entry > 5000: cap_entry = 5000 if cap_exit > 5000: cap_exit = 5000 self.data['outlier'] = False self.data.ix[np.abs(self.data.ma_entry - self.data.ENTRIES) > cap_entry, "outlier"] = True self.data.ix[np.abs(self.data.ma_exit - self.data.EXITS) > cap_exit, "outlier"] = True self.logger.info("{} out of {} observations are flagged as outliers by Moving-average.".format(len(self.data.ix[self.data['outlier']]), len(self.data))) self.data = self.data.ix[~self.data['outlier']] self.data.drop(['ma_entry', 'ma_exit', 'outlier'], inplace=True, axis=1)
def citi_surprise_test(data_df): data_df = data_df.reindex(pd.date_range(data_df.index[0],data_df.index[-1],freq='m'),method='ffill') equity_monthly_rtn = data_df['S&P 500 Price'].pct_change(periods=1).to_frame().dropna() absolute_monthly_test = pd.concat([data_df['Citi Suprise'],equity_monthly_rtn],axis=1).dropna(axis=0) absolute_monthly_test.columns = ['Citi Suprise','S&P 500 Monthly Return'] citi_above_zero_test = absolute_monthly_test[absolute_monthly_test['Citi Suprise'] > 0] citi_below_zero_test = absolute_monthly_test[absolute_monthly_test['Citi Suprise'] <= 0] ax = citi_above_zero_test.plot(kind='scatter', x='Citi Suprise', y='S&P 500 Monthly Return', color='Red', label='Citi Surprise > 0') citi_below_zero_test.plot(kind='scatter', x='Citi Suprise', y='S&P 500 Monthly Return',color='Grey', label='Citi Surprise <= 0', ax=ax) citi_three_month_average = pd.rolling_mean(data_df['Citi Suprise'],window=3) citi_six_month_average = pd.rolling_mean(data_df['Citi Suprise'],window=12) citi_trend = pd.DataFrame(citi_three_month_average - citi_six_month_average,index=citi_six_month_average.index) citi_trend_test = pd.concat([citi_trend,equity_monthly_rtn],axis=1).dropna(axis=0) citi_trend_test.columns = ['Citi Suprise','S&P 500 Monthly Return'] citi_up_trend_test = citi_trend_test[citi_trend_test['Citi Suprise'] > 0] citi_down_trend_test = citi_trend_test[citi_trend_test['Citi Suprise'] <= 0] ax = citi_up_trend_test.plot(kind='scatter', x='Citi Suprise', y='S&P 500 Monthly Return', color='Red', label='Citi Surprise Up Trend') citi_down_trend_test.plot(kind='scatter', x='Citi Suprise', y='S&P 500 Monthly Return',color='Grey', label='Citi Surprise Down Trend', ax=ax)
def select_Time_DMA(self): #MA ma_list = [self.AVR_SHORT, self.AVR_LONG] ma_dea = 10 if ma_list[0] == self.AVR_SHORT and ma_list[1] == self.AVR_LONG: ma_close_short = self.ma_12 ma_close_long = self.ma_40 else: ma_close_short = pd.rolling_mean(self.close_price, ma_list[0]) ma_close_long = pd.rolling_mean(self.close_price, ma_list[1]) dma_price = ma_close_short - ma_close_long ama_price = pd.rolling_mean(dma_price, ma_dea) signal = SIGNAL_DEFAULT if dma_price[-1] > dma_price[-2] and dma_price[-1] > ama_price[-1] \ and dma_price[-2] < ama_price[-2]: signal = SIGNAL_BUY elif dma_price[-1] < dma_price[-2] and dma_price[-1] < ama_price[-1] \ and dma_price[-2] > ama_price[-2]: signal = SIGNAL_SALE return signal
def KELCH(df, n): """ Keltner Channel """ KelChM = pd.Series( pd.rolling_mean( (df['High'] + df['Low'] + df['Close']) / 3, n ), name='KelChM_' + str(n) ) KelChU = pd.Series( pd.rolling_mean( (4 * df['High'] - 2 * df['Low'] + df['Close']) / 3, n ), name='KelChU_' + str(n) ) KelChD = pd.Series( pd.rolling_mean( (-2 * df['High'] + 4 * df['Low'] + df['Close']) / 3, n ), name='KelChD_' + str(n) ) result = pd.DataFrame([KelChM, KelChU, KelChD]).transpose() return out(SETTINGS, df, result)
def avgDepth_speed(panel1,panel2,sep1,sep2,probet,adcpt): a = panel1.minor_axis[:] probem = [] adcpm = [] for i,j in enumerate(a): print j height1 = panel1.minor_xs(j) height2 = panel2.minor_xs(j) mean1 = pd.rolling_mean(height1,sep1) mean2 = pd.rolling_mean(height2,sep2) mean1t = mean1.apply(mean_vel,axis=1) mean2t = mean2.apply(mean_vel,axis=1) pmean = mean1t[probet] amean = mean2t[adcpt] probem.append(pmean) adcpm.append(amean) fig,ax = plt.subplots() ax.plot(probem,a,label='FVCOM') ax.plot(adcpm,a,label='ADCP') ax.xaxis.grid() ax.yaxis.grid() ax.set_xlabel('Mean Speed (m/s)') ax.set_ylabel('Depth (m)') ax.set_title('Velocity by Depth') plt.legend() plt.show()
def turb_depth(panel1,panel2,sep1,sep2,probet,adcpt): a = panel1.minor_axis[:] probeturbint = [] adcpturbint = [] for i,j in enumerate(a): print j height1 = panel1.minor_xs(j) height2 = panel2.minor_xs(j) mean1 = pd.rolling_mean(height1,sep1) mean2 = pd.rolling_mean(height2,sep2) var1 = pd.rolling_var(height1,sep1) var2 = pd.rolling_var(height2,sep2) var1t = var1.apply(variance,axis=1) var2t = var2.apply(variance,axis=1) mean1t = mean1.apply(mean_vel,axis=1) mean2t = mean2.apply(mean_vel,axis=1) t_int1 = var1t/mean1t t_int2 = var2t/mean2t ptime = t_int1[probet] atime = t_int2[adcpt] print ptime print atime probeturbint.append(ptime) adcpturbint.append(atime) fig,ax = plt.subplots() ax.plot(probeturbint,a,label='FVCOM') ax.plot(adcpturbint,a,label='ADCP') ax.xaxis.grid() ax.yaxis.grid() ax.set_xlabel('Turbulence Intensity') ax.set_ylabel('Depth (m)') ax.set_title('Turbulence Intensity by Depth') plt.legend() plt.show()
def reynolds_depth(panel1,panel2,sep1,sep2,probet,adcpt): a = panel1.minor_axis[:] probereystr = [] adcpreystr = [] for i,j in enumerate(a): print j height1 = panel1.minor_xs(j) height2 = panel2.minor_xs(j) mean1 = pd.rolling_mean(height1,sep1) mean2 = pd.rolling_mean(height2,sep2) rstress1 = mean1.apply(rey_stress,axis=1) rstress2 = mean2.apply(rey_stress,axis=1) pR = rstress1[probet] aR = rstress2[adcpt] print pR print aR probereystr.append(pR) adcpreystr.append(aR) fig,ax = plt.subplots() ax.plot(probereystr,a,label='FVCOM') ax.plot(adcpreystr,a,label='ADCP') ax.xaxis.grid() ax.yaxis.grid() ax.set_xlabel('Reynolds Stress') ax.set_ylabel('Depth (m)') ax.set_title('Reynolds Stress by Depth') plt.legend() plt.show()
def btn_update__click(dom): symbol = dom['select_stock']['value'] if symbol == 'AAPL': df = AAPL elif symbol == 'GOOG': df = GOOG else: return dom bounds = [dom[x]['value'] if dom[x]['value'] else None for x in ['date_start', 'date_end']] ts = df['Close'][bounds[0]:bounds[1]] if ts.any(): try: ts.plot() for win in [int(dom[x]['value']) for x in ['slider_window_1', 'slider_window_2']]: pd.rolling_mean(ts, win).plot() plt.title("Weekly closing prices for {}".format(symbol)) # get_svg is added by ashiba.plot dom['img_plot'].set_image(plt.get_svg(), 'svg') finally: plt.close() return dom
def get_data(): with open("D:\diplomski_kod\lista_burzi.txt") as f: lista_burzi = f.read().splitlines() lista1 = [] for ime_burze in lista_burzi: print "reading stock exchange " + ime_burze lista1.append("D:\diplomski_kod\podaci" + "\\" + ime_burze + "\lista_simbola.txt") for i in lista1: with open(i) as f: lines = f.read().splitlines() lista = [] for symbol in lines: # set the path for files needed by indikatori.parse raw data lista.append("D:\diplomski_kod\podaci" + "\\" + ime_burze + "\\" + symbol + ".csv") for i in lista: indikatori.parse_raw_data(i, ime_burze) ml_model.train_and_eval_Model(i, ime_burze) print i # calculating arith and std_dev for each exchange for burza in lista_burzi: path = "D:\diplomski_kod\podaci\\" + burza + "\metrics.csv" df = pd.read_csv(path) df["all_arithm"] = pd.rolling_mean(df["arithm"], 10) df["all_std_dev"] = pd.rolling_mean(df["std_dev"], 10) df.to_csv(path, sep=",", index=False, encoding="utf-8") # calculating stock exchange for entire thing df2 = pd.read_csv("D:\diplomski_kod\\all_exchange_metrics.csv") df2["arithm all exchange"] = df2["arithm"].mean() df2["std dev all exchange"] = df2["std_dev"].mean() df2.to_csv("D:\diplomski_kod\\all_exchange_metrics.csv", sep=",", index=False, encoding="utf-8")
def convert_data_to_df(ticker): df = pd.read_csv("/Users/excalibur/Dropbox/datasets/quandl_data/{}.csv".format(ticker)) df = df.drop("Adjusted Close", axis=1) df["50dravg"] = pd.rolling_mean(df["Close"], window=50) df["200dravg"] = pd.rolling_mean(df["Close"], window=200) df["OC%"] = (df["Close"] / df["Open"]) - 1 df["HL%"] = (df["High"] / df["Low"]) - 1 df["OH%"] = (df["High"] / df["Open"]) - 1 df["LastOpen"] = df["Open"].shift(1) df["LastHigh"] = df["High"].shift(1) df["LastLow"] = df["Low"].shift(1) df["LastClose"] = df["Close"].shift(1) df["LastVolume"] = df["Volume"].shift(1) df["LastOC%"] = df["OC%"].shift(1) df["LastHL%"] = df["HL%"].shift(1) df["LastOH%"] = df["OH%"].shift(1) df["ticker"] = ticker df["label"] = df["OH%"].shift(-1) return df.copy()
def kdj(prices, params={"windows": [9, 3, 3]}): """ Calculate KDJ indicator: RSV = (Ct - Ln) / (Hn - Ln) * 100 K = sma3(RSV) D = sma3(K) J = 3 * D - 2 * K Parameters ---------- prices: DataFrame Includes the open, close, high, low and volume. params: dict Returns ---------- kdj_val: DataFrame """ windows = params["windows"] rsv = __rsv(prices, windows[0]) k = pd.rolling_mean(rsv, windows[1]) d = pd.rolling_mean(k, windows[2]) j = 3 * k - 2 * d kdj_val = np.column_stack((k, d, j)) return pd.DataFrame(kdj_val, index=prices.index, columns=["K", "D", "J"])
def rolling_table(ticker): fund = db.session.query(models.VanguardFund).filter( models.VanguardFund.ticker==ticker ).first() ts = pandas.Series( [x.price for x in fund.prices], index=pandas.to_datetime([x.date for x in fund.prices]), ) x = pandas.concat( [ ts, pandas.rolling_mean(ts, 30), pandas.rolling_mean(ts, 90), pandas.rolling_mean(ts, 180) ], axis=1 ) #return [ # (d, x[0][d], x[1][d], x[2][d], x[3][d]) # for d in x.index #] return [ [d] + list(row) for d, row in x.iterrows() ]
def EOM(df, n): EoM = (df['High'].diff(1) + df['Low'].diff(1)) * (df['High'] - df['Low']) / (2 * df['Volume']) Eom_ma = pd.Series(pd.rolling_mean(EoM, n), name='EoM_' + str(n)) df = df.join(Eom_ma) return df
def SL_PMT_plots(myCountry, economy, event_level, myiah, myHaz, my_PDS, _wprime, base_str, to_usd): out_files = os.getcwd() + '/../output_country/' + myCountry + '/' listofquintiles = np.arange(0.20, 1.01, 0.20) quint_labels = [ 'Poorest\nquintile', 'Second', 'Third', 'Fourth', 'Wealthiest\nquintile' ] myiah['hhid'] = myiah['hhid'].astype('str') myiah = myiah.set_index('hhid') pmt, _ = get_pmt(myiah) myiah['PMT'] = pmt for _loc in myHaz[0]: for _haz in myHaz[1]: for _rp in myHaz[2]: plt.cla() _ = myiah.loc[(myiah[economy] == _loc) & (myiah['hazard'] == _haz) & (myiah['rp'] == _rp)].copy() _ = _.reset_index().groupby( economy, sort=True).apply(lambda x: match_percentiles( x, perc_with_spline(reshape_data(x.PMT), reshape_data(x.pcwgt_no), listofquintiles), 'quintile', 'PMT')) for _sort in ['PMT']: _ = _.sort_values(_sort, ascending=True) _['pcwgt_cum_' + base_str] = _['pcwgt_' + base_str].cumsum() _['pcwgt_cum_' + my_PDS] = _['pcwgt_' + my_PDS].cumsum() _['dk0_cum'] = _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).cumsum() _['cost_cum_' + my_PDS] = _[[ 'pcwgt_' + my_PDS, 'help_received_' + my_PDS ]].prod(axis=1).cumsum() # ^ cumulative cost _['cost_frac_' + my_PDS] = _[[ 'pcwgt_' + my_PDS, 'help_received_' + my_PDS ]].prod(axis=1).cumsum() / _[[ 'pcwgt_' + my_PDS, 'help_received_' + my_PDS ]].prod(axis=1).sum() # ^ cumulative cost as fraction of total # GET WELFARE COSTS _['dw_cum_' + base_str] = _[['pcwgt_' + base_str, 'dw_' + base_str]].prod(axis=1).cumsum() # Include public costs in baseline (dw_cum) ext_costs_base = pd.read_csv(out_files + 'public_costs_tax_' + base_str + '_.csv').set_index( [economy, 'hazard', 'rp']) ext_costs_base[ 'dw_pub_curr'] = ext_costs_base['dw_pub'] / _wprime ext_costs_base[ 'dw_soc_curr'] = ext_costs_base['dw_soc'] / _wprime ext_costs_base['dw_tot_curr'] = ext_costs_base[[ 'dw_pub', 'dw_soc' ]].sum(axis=1) / _wprime ext_costs_base_sum = ext_costs_base.loc[ ext_costs_base['contributer'] != ext_costs_base.index. get_level_values(event_level[0]), ['dw_pub_curr', 'dw_soc_curr', 'dw_tot_curr']].sum( level=[economy, 'hazard', 'rp']).reset_index() ext_costs_base_pub = float(ext_costs_base_sum.loc[ (ext_costs_base_sum[economy] == _loc) & ext_costs_base_sum.eval('(hazard==@_haz)&(rp==@_rp)' ), 'dw_pub_curr']) ext_costs_base_soc = float(ext_costs_base_sum.loc[ (ext_costs_base_sum[economy] == _loc) & ext_costs_base_sum.eval('(hazard==@_haz)&(rp==@_rp)' ), 'dw_soc_curr']) ext_costs_base_sum = float(ext_costs_base_sum.loc[ (ext_costs_base_sum[economy] == _loc) & ext_costs_base_sum.eval('(hazard==@_haz)&(rp==@_rp)' ), 'dw_tot_curr']) _['dw_cum_' + my_PDS] = _[['pcwgt_' + my_PDS, 'dw_' + my_PDS]].prod(axis=1).cumsum() # ^ cumulative DW, with my_PDS implemented # Include public costs in pds_dw_cum ext_costs_pds = pd.read_csv(out_files + 'public_costs_tax_' + my_PDS + '_.csv').set_index( [economy, 'hazard', 'rp']) ext_costs_pds[ 'dw_pub_curr'] = ext_costs_pds['dw_pub'] / _wprime ext_costs_pds[ 'dw_soc_curr'] = ext_costs_pds['dw_soc'] / _wprime ext_costs_pds['dw_tot_curr'] = ext_costs_pds[[ 'dw_pub', 'dw_soc' ]].sum(axis=1) / _wprime ext_costs_pds_sum = ext_costs_pds.loc[ (ext_costs_pds['contributer'] != ext_costs_pds.index. get_level_values(event_level[0])), ['dw_pub_curr', 'dw_soc_curr', 'dw_tot_curr']].sum( level=[economy, 'hazard', 'rp']).reset_index() ext_costs_pds_pub = float(ext_costs_pds_sum.loc[ (ext_costs_pds_sum[economy] == _loc) & ext_costs_pds_sum.eval('(hazard==@_haz)&(rp==@_rp)'), 'dw_pub_curr']) ext_costs_pds_soc = float(ext_costs_pds_sum.loc[ (ext_costs_pds_sum[economy] == _loc) & ext_costs_pds_sum.eval('(hazard==@_haz)&(rp==@_rp)'), 'dw_soc_curr']) ext_costs_pds_sum = float(ext_costs_pds_sum.loc[ (ext_costs_pds_sum[economy] == _loc) & ext_costs_pds_sum.eval('(hazard==@_haz)&(rp==@_rp)'), 'dw_tot_curr']) _['dw_cum_' + my_PDS] += (ext_costs_pds_pub + ext_costs_pds_soc) * _['cost_frac_' + my_PDS] _['delta_dw_cum_' + my_PDS] = _['dw_cum_' + base_str] - _['dw_cum_' + my_PDS] ### PMT-ranked population coverage [%] plt.plot( 100. * _['pcwgt_cum_' + base_str] / _['pcwgt_' + base_str].sum(), 100. * _['dk0_cum'] / _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum()) plt.annotate( 'Total asset losses\n$' + str(round(1E-6 * to_usd * _.iloc[-1]['dk0_cum'], 1)) + ' mil.', xy=(0.1, 0.85), xycoords='axes fraction', color=greys_pal[7], fontsize=10) if False: plt.plot( 100. * _['pcwgt_cum_' + base_str] / _['pcwgt_' + base_str].sum(), 100. * _['dk0_cum'] / _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum()) plt.xlabel('Population percentile [%]', labelpad=8, fontsize=10) plt.ylabel('Cumulative asset losses [%]', labelpad=8, fontsize=10) plt.xlim(0) plt.ylim(-0.1) plt.gca().xaxis.set_ticks([20, 40, 60, 80, 100]) sns.despine() plt.grid(False) plt.gcf().savefig('../output_plots/SL/PMT/pcwgt_vs_dk0_' + _loc + '_' + _haz + '_' + str(_rp) + '.pdf', format='pdf', bbox_inches='tight') plt.cla() ##################################### ### PMT threshold vs dk (normalized) _ = _.sort_values('PMT', ascending=True) plt.plot(_['PMT'], 100. * _['dk0_cum'] / _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum(), linewidth=1.8, zorder=99, color=q_colors[1]) for _q in [1, 2, 3, 4, 5]: _q_x = _.loc[_['quintile'] == _q, 'PMT'].max() _q_y = 100. * _.loc[_['quintile'] <= _q, ['pcwgt_' + base_str, 'dk0']].prod( axis=1).sum() / _[[ 'pcwgt_' + base_str, 'dk0' ]].prod(axis=1).sum() if _q == 1: _q_yprime = _q_y / 20 plt.plot([_q_x, _q_x], [0, _q_y], color=greys_pal[4], ls=':', linewidth=1.5, zorder=91) _usd = ' mil.' plt.annotate((quint_labels[_q - 1] + '\n$' + str( round( 1E-6 * to_usd * _.loc[_['quintile'] == _q, ['pcwgt_' + base_str, 'dk0']].prod( axis=1).sum(), 1)) + _usd), xy=(_q_x, _q_y + _q_yprime), color=greys_pal[6], ha='right', va='bottom', style='italic', fontsize=8, zorder=91) if False: plt.scatter( _['PMT'], 100. * _['dk0_cum'] / _[['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum(), alpha=0.08, s=6, zorder=10, color=q_colors[1]) plt.xlabel('Household income [PMT]', labelpad=8, fontsize=10) plt.ylabel('Cumulative asset losses [%]', labelpad=8, fontsize=10) plt.annotate( 'Total asset losses\n$' + str(round(1E-6 * to_usd * _.iloc[-1]['dk0_cum'], 1)) + ' mil.', xy=(0.1, 0.85), xycoords='axes fraction', color=greys_pal[7], fontsize=10) plt.xlim(825) plt.ylim(-0.1) sns.despine() plt.grid(False) plt.gcf().savefig('../output_plots/SL/PMT/pmt_vs_dk_norm_' + _loc + '_' + _haz + '_' + str(_rp) + '.pdf', format='pdf', bbox_inches='tight') ##################################### ### PMT threshold vs dk & dw plt.cla() plt.plot(_['PMT'], _['dk0_cum'] * to_usd * 1E-6, color=q_colors[1], linewidth=1.8, zorder=99) plt.plot(_['PMT'], _['dw_cum_' + base_str] * to_usd * 1E-6, color=q_colors[3], linewidth=1.8, zorder=99) _y1 = 1.08 _y2 = 1.03 if _['dk0_cum'].max() < _['dw_cum_' + base_str].max(): _y1 = 1.03 _y2 = 1.08 plt.annotate( 'Total asset losses = $' + str(round(_['dk0_cum'].max() * to_usd * 1E-6, 1)) + ' million', xy=(0.02, _y1), xycoords='axes fraction', color=q_colors[1], ha='left', va='top', fontsize=10, annotation_clip=False) wb_str = 'Total wellbeing losses = \$' + str( round(_['dw_cum_' + base_str].max() * to_usd * 1E-6, 1)) + ' million' #wb_natl_str = '(+\$'+str(round(ext_costs_base_sum*to_usd*1E-6,1))+')' wb_natl_str = 'National welfare losses\n $' + str( round(ext_costs_base_sum * to_usd * 1E-6, 1)) + ' million' plt.annotate(wb_str, xy=(0.02, _y2), xycoords='axes fraction', color=q_colors[3], ha='left', va='top', fontsize=10, annotation_clip=False) #plt.annotate(wb_natl_str,xy=(0.02,0.77),xycoords='axes fraction',color=q_colors[3],ha='left',va='top',fontsize=10) for _q in [1, 2, 3, 4, 5]: _q_x = _.loc[_['quintile'] == _q, 'PMT'].max() _q_y = max( _.loc[_['quintile'] <= _q, ['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum() * to_usd * 1E-6, _.loc[_['quintile'] <= _q, ['pcwgt_' + base_str, 'dw_' + base_str]].prod(axis=1).sum() * to_usd * 1E-6) if _q == 1: _q_yprime = _q_y / 25 plt.plot([_q_x, _q_x], [0, _q_y], color=greys_pal[4], ls=':', linewidth=1.5, zorder=91) plt.annotate(quint_labels[_q - 1], xy=(_q_x, _q_y + 7 * _q_yprime), color=greys_pal[6], ha='right', va='bottom', style='italic', fontsize=8, zorder=91, annotation_clip=False) # This figures out label ordering (are cumulative asset or cum welfare lossers higher?) _cumk = round( _.loc[_['quintile'] <= _q, ['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum() * to_usd * 1E-6, 1) _cumw = round( _.loc[_['quintile'] <= _q, ['pcwgt_' + base_str, 'dw_' + base_str]].prod(axis=1).sum() * to_usd * 1E-6, 1) if _cumk >= _cumw: _yprime_k = 4 * _q_yprime _yprime_w = 1 * _q_yprime else: _yprime_k = 1 * _q_yprime _yprime_w = 4 * _q_yprime _qk = round( _.loc[_['quintile'] == _q, ['pcwgt_' + base_str, 'dk0']].prod(axis=1).sum() * to_usd * 1E-6, 1) _qw = round( _.loc[_['quintile'] == _q, ['pcwgt_' + base_str, 'dw_' + base_str]].prod(axis=1).sum() * to_usd * 1E-6, 1) plt.annotate('$' + str(_qk) + ' mil.', xy=(_q_x, _q_y + _yprime_k), color=q_colors[1], ha='right', va='bottom', style='italic', fontsize=8, zorder=91, annotation_clip=False) plt.annotate('$' + str(_qw) + ' mil.', xy=(_q_x, _q_y + _yprime_w), color=q_colors[3], ha='right', va='bottom', style='italic', fontsize=8, zorder=91, annotation_clip=False) plt.xlabel('Household income [PMT]', labelpad=8, fontsize=10) plt.ylabel('Cumulative losses [mil. US$]', labelpad=8, fontsize=10) plt.xlim(825) plt.ylim(-0.1) plt.title(' ' + str(_rp) + '-year ' + haz_dict[_haz].lower() + ' in ' + _loc, loc='left', color=greys_pal[7], pad=30, fontsize=15) sns.despine() plt.grid(False) plt.gcf().savefig('../output_plots/SL/PMT/pmt_vs_dk0_' + _loc + '_' + _haz + '_' + str(_rp) + '.pdf', format='pdf', bbox_inches='tight') plt.close('all') ##################################### ### Cost vs benefit of PMT show_net_benefit = False if show_net_benefit: _['dw_cum_' + base_str] += (ext_costs_base_pub + ext_costs_base_soc ) * _['cost_frac_' + my_PDS] #*_[['pcwgt_'+base_str,'dk0']].prod(axis=1).cumsum()/_[['pcwgt_'+base_str,'dk0']].prod(axis=1).sum() # ^ include national costs in baseline dw _['delta_dw_cum_' + my_PDS] = _['dw_cum_' + base_str] - _['dw_cum_' + my_PDS] # redefine this because above changed plt.cla() plt.plot(_['PMT'], _['cost_cum_' + my_PDS] * to_usd * 1E-6, color=q_colors[1], linewidth=1.8, zorder=99) plt.plot(_['PMT'], _['delta_dw_cum_' + my_PDS] * to_usd * 1E-6, color=q_colors[3], linewidth=1.8, zorder=99) plt.annotate('PDS cost =\n$' + str( round(_['cost_cum_' + my_PDS].max() * to_usd * 1E-6, 2)) + ' mil.', xy=(_['PMT'].max(), _['cost_cum_' + my_PDS].max() * to_usd * 1E-6), color=q_colors[1], weight='bold', ha='left', va='top', fontsize=10, annotation_clip=False) plt.annotate('Avoided wellbeing\nlosses = $' + str( round(_.iloc[-1]['delta_dw_cum_' + my_PDS] * to_usd * 1E-6, 2)) + ' mil.', xy=(_['PMT'].max(), _.iloc[-1]['delta_dw_cum_' + my_PDS] * to_usd * 1E-6), color=q_colors[3], weight='bold', ha='left', va='top', fontsize=10) #for _q in [1,2,3,4,5]: # _q_x = _.loc[_['quintile']==_q,'PMT'].max() # _q_y = max(_.loc[_['quintile']<=_q,['pcwgt','dk0']].prod(axis=1).sum()*to_usd*1E-6, # _.loc[_['quintile']<=_q,['pcwgt','dw_no']].prod(axis=1).sum()*to_usd*1E-6) # if _q == 1: _q_yprime = _q_y/20 # plt.plot([_q_x,_q_x],[0,_q_y],color=greys_pal[4],ls=':',linewidth=1.5,zorder=91) # plt.annotate(quint_labels[_q-1],xy=(_q_x,_q_y+_q_yprime),color=greys_pal[6],ha='right',va='bottom',style='italic',fontsize=8,zorder=91) plt.xlabel('Upper PMT threshold for post-disaster support', labelpad=8, fontsize=12) plt.ylabel('Cost & benefit [mil. US$]', labelpad=8, fontsize=12) plt.xlim(825) #;plt.ylim(0) plt.title(' ' + str(_rp) + '-year ' + haz_dict[_haz].lower() + '\n in ' + _loc, loc='left', color=greys_pal[7], pad=25, fontsize=15) plt.annotate(pds_dict[my_PDS], xy=(0.02, 1.03), xycoords='axes fraction', color=greys_pal[6], ha='left', va='bottom', weight='bold', style='italic', fontsize=8, zorder=91, clip_on=False) plt.plot(plt.gca().get_xlim(), [0, 0], color=greys_pal[2], linewidth=0.90) sns.despine(bottom=True) plt.grid(False) plt.gcf().savefig('../output_plots/SL/PMT/pmt_dk_vs_dw_' + _loc + '_' + _haz + '_' + str(_rp) + '_' + my_PDS + '.pdf', format='pdf', bbox_inches='tight') plt.close('all') continue ##################################### ### Cost vs benefit of PMT _ = _.fillna(0) #_ = _.loc[_['pcwgt_'+my_PDS]!=0].copy() _ = _.loc[(_['help_received_' + my_PDS] != 0) & (_['pcwgt_' + my_PDS] != 0)].copy() #_['dw_cum_'+my_PDS] = _[['pcwgt_'+my_PDS,'dw_'+my_PDS]].prod(axis=1).cumsum() #_['dw_cum_'+my_PDS] += ext_costs_pds_pub + ext_costs_pds_soc*_['cost_frac_'+my_PDS] # ^ unchanged from above _c1, _c1b = paired_pal[2], paired_pal[3] _c2, _c2b = paired_pal[0], paired_pal[1] _window = 100 if _.shape[0] < 100: _window = int(_.shape[0] / 5) plt.cla() _y_values_A = (_['cost_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS] _y_values_B = pd.rolling_mean( (_['cost_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS], _window) if _y_values_A.max() >= 1.25 * _y_values_B.max( ) or _y_values_A.min() <= 0.75 * _y_values_B.min(): plt.scatter(_['PMT'], (_['cost_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS], color=_c1, s=4, zorder=98, alpha=0.25) plt.plot(_['PMT'], pd.rolling_mean( (_['cost_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS], _window), color=_c1b, lw=1.0, zorder=98) else: plt.plot(_['PMT'], (_['cost_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS], color=_c1b, lw=1.0, zorder=98) plt.scatter(_['PMT'], (_['delta_dw_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS], color=_c2, s=4, zorder=98, alpha=0.25) plt.plot(_['PMT'], pd.rolling_mean( (_['delta_dw_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS], _window), color=_c2b, lw=1.0, zorder=98) _y_min = 1.05 * pd.rolling_mean( (_['delta_dw_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS], _window).min() _y_max = 1.1 * max( pd.rolling_mean( (_['delta_dw_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS], _window).max(), 1.05 * ((_['cost_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS]).mean() + _q_yprime) for _q in [1, 2, 3, 4, 5]: _q_x = min(1150, _.loc[_['quintile'] == _q, 'PMT'].max()) #_q_y = max(_.loc[_['quintile']<=_q,['pcwgt_'+my_PDS,'dk0']].prod(axis=1).sum()*to_usd, # _.loc[_['quintile']<=_q,['pcwgt_'+my_PDS,'dw_no']].prod(axis=1).sum()*to_usd)) if _q == 1: _q_xprime = (_q_x - 840) / 40 _q_yprime = _y_max / 200 plt.plot([_q_x, _q_x], [_y_min, _y_max], color=greys_pal[4], ls=':', linewidth=1.5, zorder=91) plt.annotate(quint_labels[_q - 1], xy=(_q_x - _q_xprime, _y_max), color=greys_pal[6], ha='right', va='top', style='italic', fontsize=7, zorder=99) #toggle this plt.annotate('PDS cost', xy=(_['PMT'].max() - _q_xprime, ((_['cost_cum_' + my_PDS] * to_usd).diff() / _['pcwgt_' + my_PDS]).mean() + _q_yprime), color=_c1b, weight='bold', ha='right', va='bottom', fontsize=8, annotation_clip=False) #plt.annotate('Avoided\nwellbeing losses',xy=(_['PMT'].max()-_q_xprime,pd.rolling_mean((_['delta_dw_cum']*to_usd/_['pcwgt_'+my_PDS]).diff(),_window).min()+_q_yprime), # color=_c2b,weight='bold',ha='right',va='bottom',fontsize=8) plt.xlabel('Upper PMT threshold for post-disaster support', labelpad=10, fontsize=10) plt.ylabel( 'Marginal impact at threshold [US$ per next enrollee]', labelpad=10, fontsize=10) plt.title(str(_rp) + '-year ' + haz_dict[_haz].lower() + ' in ' + _loc, loc='right', color=greys_pal[7], pad=20, fontsize=15) plt.annotate(pds_dict[my_PDS], xy=(0.99, 1.02), xycoords='axes fraction', color=greys_pal[6], ha='right', va='bottom', weight='bold', style='italic', fontsize=8, zorder=91, clip_on=False) plt.plot([840, 1150], [0, 0], color=greys_pal[2], linewidth=0.90) plt.xlim(840, 1150) plt.ylim(_y_min, _y_max) sns.despine(bottom=True) plt.grid(False) plt.gcf().savefig( '../output_plots/SL/PMT/pmt_slope_cost_vs_benefit_' + _loc + '_' + _haz + '_' + str(_rp) + '_' + my_PDS + '.pdf', format='pdf', bbox_inches='tight') plt.close('all')
This file preprocess the test files and save it as ordered_test.csv """ # same structer as preprocessing import pandas as pd import numpy as np df_key = pd.read_csv("../input/key.csv") df_test = pd.read_csv("../input/test.csv") df_weather = pd.read_csv("../input/weather.csv") df_test['date'] = pd.to_datetime(df_test['date']) df_weather['date'] = pd.to_datetime(df_weather['date']) temp = pd.merge(df_test, df_key,how='left', on=['store_nbr']) df_main_test = pd.merge(temp, df_weather, how='left', on=['station_nbr','date']) df_ordered = df_main_test.sort_values(['store_nbr','item_nbr','date']).reset_index(drop=True) #df7 = df7.apply(pd.to_numeric, errors='coerce') df_ordered = df_ordered.convert_objects(convert_numeric=True) df_ordered['preciptotal'] = df_ordered['preciptotal'].fillna(0) df_ordered['snowfall'] = df_ordered['snowfall'].fillna(0) df_ordered = df_ordered.interpolate() patternRA = 'RA' patternSN = 'SN' df_ordered['RA'], df_ordered['SN'] = df_ordered['codesum'].str.contains(patternRA), df_ordered['codesum'].str.contains(patternSN) df_ordered['Condition'] = (df_ordered['RA'] & (df_ordered['preciptotal']>1.0)) | (df_ordered['SN'] & (df_ordered['preciptotal']>2.0)) df_ordered['WEvent'] = (pd.rolling_mean(df_ordered['Condition'],window=7,center=True) > 0) df_ordered.to_csv('ordered_test.csv', sep=',')
xzdf = xzdf[end:start] #resampling and filling XZ, XY and X dataframes fr_xzdf, fr_xydf, fr_xdf = resamp_fill_df(resampind, xzdf, xydf, xdf) fr_xzdf[which_node].plot() #computing cumulative node displacements cs_xzdf = fr_xzdf.cumsum(axis=1) cs_xydf = fr_xydf.cumsum(axis=1) cs_xdf = fr_xdf.cumsum(axis=1) for cur_node_ID in range(num_nodes): if cur_node_ID != which_node: continue #rolling mean in 3 hour-window and 3 minimum data points rm_xzdf = pd.rolling_mean(fr_xzdf, window=length) # rm_xydf=pd.rolling_mean(fr_xydf,window=7) # rm_xdf=pd.rolling_mean(fr_xdf,window=7) #linear regression in 3 hour-window and 3 minimum data points td_rm_xzdf = rm_xzdf.index.values - rm_xzdf.index.values[0] # td_rm_xydf=rm_xydf.index.values-rm_xydf.index.values[0] # td_rm_xdf=rm_xdf.index.values-rm_xdf.index.values[0] tdelta = pd.Series(td_rm_xzdf / np.timedelta64(1, 'D'), index=rm_xzdf.index) # tdelta=pd.Series(td_rm_xydf/np.timedelta64(1,'D'),index=rm_xydf.index) # tdelta=pd.Series(td_rm_xdf/np.timedelta64(1,'D'),index=rm_xdf.index) plt.figure() lr_xzdf = ols(y=rm_xzdf[which_node],
def sma(df_in, periods): return pd.rolling_mean(df_in, abs(periods))
def MA(df, n): MA = pd.Series(pd.rolling_mean(df['Close'], n), name='MA_' + str(n)) df = df.join(MA) return df
def CCI(df, n): PP = (df['High'] + df['Low'] + df['Close']) / 3 CCI = pd.Series((PP - pd.rolling_mean(PP, n)) / pd.rolling_std(PP, n), name='CCI_' + str(n)) df = df.join(CCI) return df
t = rx['timestamp'] #Convert to pandas dataset, indexed by microsecond timestamp print("Calculating throughput...") t_pd = pd.to_datetime(t, unit='us') len_pd = pd.Series(l, index=t_pd) rs_interval = 100 #msec rolling_winow = 600 #samples #Resample length vector, summing in each interval; fill empty intervals with 0 # Interval argument must be 'NL' for Nmsec ('100L' for 100msec) len_rs = len_pd.resample(('%dL' % rs_interval), how='sum').fillna(value=0) #Calculate rolling mean of throghput (units of bytes per rs_interval) xput_roll = pd.rolling_mean(len_rs, rolling_winow) #Scale to Mb/sec xput_roll = xput_roll * (1.0e-6 * 8.0 * (1.0 / (rs_interval * 1e-3))) #---------------------- # Plot results #X axis in units of minutes t_p = np.linspace(0, (1.0 / 60) * 1e-6 * (max(t) - min(t)), len(xput_roll)) #enter interactive mode from script, so figures/plots update live ion() figure() clf()
def testPolicy(self, symbol = "IBM", \ sd=dt.datetime(2009,1,1), \ ed=dt.datetime(2010,1,1), \ sv = 10000): # here we build a fake set of trades # your code should return the same sort of data dates = pd.date_range(sd, ed) prices_all = ut.get_data([symbol], dates) # automatically adds SPY prices = prices_all[[symbol]] prices = prices.fillna(method='ffill').fillna(method='bfill') trades = prices_all[[symbol,]] # only portfolio symbols trades_SPY = prices_all['SPY'] # only SPY, for comparison later trades.values[:,:] = 0 # set them all to nothing # trades.values[3,:] = 200 # add a BUY at the 4th date # trades.values[5,:] = -200 # add a SELL at the 6th date # trades.values[6,:] = 200 # add a SELL at the 7th date # trades.values[8,:] = -400 # add a BUY at the 9th date # if self.verbose: print type(trades) # it better be a DataFrame! # if self.verbose: print trades # if self.verbose: print prices_all # start calculating three indicators X1_Momentum = prices.values[self.N-1:,:]/prices.values[0:-self.N+1,:] *100 # X1_Momentum = prices.iloc[N:-1].divide(prices.iloc[0:-N-1])*100 X2_SMA = pd.rolling_mean(prices,window=self.N) # X3_middle = pd.rolling_mean(prices,window=self.N) # X3_std = pd.rolling_std(prices,window=self.N) # X3_upper = X3_middle.add(2*X3_std) # X3_lower = X3_middle.subtract(2*X3_std) self.X1_max = max(X1_Momentum) self.X1_min = min(X1_Momentum) self.X2_max = X2_SMA.max(axis=0).values self.X2_min = X2_SMA.min(axis=0).values # self.X3_max = X3_upper.max(axis=0).values # self.X3_min = X3_upper.min(axis=0).values position = 0 # my_action = 0 testing_day = 0 while testing_day < trades.shape[0]: my_action = 0 if not np.isnan(X2_SMA.iloc[testing_day].values): state_0 = int(position/200 + 1) state_1 = self.discretize(1, X1_Momentum[testing_day-self.N+1]) state_2 = self.discretize(2, X2_SMA.iloc[testing_day].values) # state_3 = self.discretize(3, X3_upper.iloc[testing_day].values) state = state_0*self.states_N**2 + state_1*self.states_N + state_2 action = self.learner.querysetstate(state) if state_0 == 0: #hold -200 new_position = position + action*200 elif state_0 == 1: new_position = position + (action-1)*200 elif state_0 == 2: new_position = position + (action-2)*200 my_action = new_position - position if abs(position)>200: print 'error' position = new_position if self.verbose: print testing_day, position trades.values[testing_day,:] = my_action testing_day = testing_day + 1 return trades
k = 4 d1 = pd.cut(data, k, labels=range(k)) # 等宽离散化,各个类比依次命名为0,1,2,3 # 等频率离散化 w = [1.0 * i / k for i in range(k + 1)] w = data.describe(percentiles=w)[4:4 + k + 1] # 使用describe函数自动计算分位数 w[0] = w[0] * (1 - 1e-10) d2 = pd.cut(data, w, labels=range(k)) from sklearn.cluster import KMeans # 引入KMeans kmodel = KMeans(n_clusters=k, n_jobs=1) # 建立模型,n_jobs是并行数,一般等于CPU数较好 kmodel.fit(data.reshape((len(data), 1))) # 训练模型 c = pd.DataFrame(kmodel.cluster_centers_).sort_values(0) # 输出聚类中心,并且排序(默认是随机序的) w = pd.rolling_mean(c, 2).iloc[1:] # 相邻两项求中点,作为边界点 w = [0] + list(w[0]) + [data.max()] # 把首末边界点加上 d3 = pd.cut(data, w, labels=range(k)) def cluster_plot(d, k): # 自定义作图函数来显示聚类结果 import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 plt.figure(figsize=(8, 3)) for j in range(0, k): plt.plot(data[d == j], [j for i in d[d == j]], 'o') plt.ylim(-0.5, k - 0.5) return plt
data['time']=data['time'].astype(str).replace('time','1490600000000000000') data['time']=data['time'].astype(float) data['time']=pd.to_datetime(data['time'],format=None) temp=data[fields] scatter_matrix(temp, alpha=0.2, figsize=(6, 6), diagonal='kde') humidity = data['Humidity'] moist = data['Moisture'] temp = data['Temperature'] mavg = pd.rolling_mean(moist, 50, center = True) havg = pd.rolling_mean(humidity, 50, center = True) tavg = pd.rolling_mean(temp, 50, center = True) time = data['time'] fig = plt.figure() ax1 = plt.subplot2grid((20,1), (0,0), rowspan = 7, colspan = 1 ) ax1.plot(time, humidity, color='cyan', linewidth= 2.0, label = "") ax1.plot(time,havg, '--',color='red', linewidth= 2.0, label = "Rolling Mean") plt.ylabel('Humidity') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) ax1.yaxis.set_major_locator(mticker.MaxNLocator(nbins=5, prune='both'))
def addEvidence(self, symbol = "IBM", \ sd=dt.datetime(2008,1,1), \ ed=dt.datetime(2009,1,1), \ sv = 10000): # add your code to do learning here position = 0 # number of share you hold # X1: Momentum, 10 days delay, M = close(i)/close(i-10)*100 # X2: Moving average, avg(i) = mean(close(i-10+1):close(i)) # X3: Bollinger Bands, 10 days mean & 2*std # Create three learners, each for one position self.learner = ql.QLearner(num_states=3*self.states_N**2,\ num_actions = 3, \ alpha = 0.2, \ gamma = 0.9, \ rar = random.random(), radr = random.random(), \ dyna = 0, \ verbose=False) # self.learner1 = ql.QLearner(num_states=self.states_N**3,\ # num_actions = 3, \ # alpha = 0.2, \ # gamma = 0.9, \ ## rar = 0.5, radr = 0.99, \ # dyna = 0, \ # verbose=False) #position 0 # # self.learner2 = ql.QLearner(num_states=self.states_N**3,\ # num_actions = 3, \ # alpha = 0.2, \ # gamma = 0.9, \ ## rar = 0.5, radr = 0.99, \ # dyna = 0, \ # verbose=False) #position 200 flag_0 = True #haven't been initiazlied yet # flag_1 = True # flag_2 = True # example usage of the old backward compatible util function syms=[symbol] dates = pd.date_range(sd, ed) prices_all = ut.get_data(syms, dates) # automatically adds SPY prices = prices_all[syms] # only portfolio symbols prices = prices.fillna(method='ffill').fillna(method='bfill') prices_SPY = prices_all['SPY'] # only SPY, for comparison later if self.verbose: print prices # start calculating three indicators X1_Momentum = prices.values[self.N-1:,:]/prices.values[0:-self.N+1,:] *100 # X1_Momentum = prices.iloc[N:-1].divide(prices.iloc[0:-N-1])*100 X2_SMA = pd.rolling_mean(prices,window=self.N) # X3_middle = pd.rolling_mean(prices,window=self.N) # X3_std = pd.rolling_std(prices,window=self.N) # X3_upper = X3_middle.add(2*X3_std) # X3_lower = X3_middle.subtract(2*X3_std) self.X1_max = max(X1_Momentum) self.X1_min = min(X1_Momentum) self.X2_max = X2_SMA.max(axis=0).values self.X2_min = X2_SMA.min(axis=0).values # self.X3_max = X3_upper.max(axis=0).values # self.X3_min = X3_upper.min(axis=0).values training_day = 0 port_value = prices_all[syms] port_value.values[:,:] = 0 port_value.values[0,:] = sv previous_end_port = 0 cash = sv epoch = 0 repeated = 0 while (training_day < X2_SMA.shape[0]) & (previous_end_port<2.5*sv): if not np.isnan(X2_SMA.iloc[training_day].values): state_0 = int(position/200 + 1) state_1 = self.discretize(1, X1_Momentum[training_day-self.N+1]) state_2 = self.discretize(2, X2_SMA.iloc[training_day].values) # state_3 = self.discretize(3, X3_upper.iloc[training_day].values) state = state_0*self.states_N**2 + state_1*self.states_N + state_2 # if position == -200: # if flag_0 == True: # flag_0 = False # action = self.learner0.querysetstate(state) # # r = prices.iloc[training_day].values - prices.iloc[training_day-1].values # r = r*position # action = self.learner0.query(state,r) # new_position = position + action*200 # # if position == 0: # if flag_1 == True: # flag_1 = False # action = self.learner1.querysetstate(state) # # r = prices.iloc[training_day].values - prices.iloc[training_day-1].values # r = r*position # action = self.learner1.query(state,r) # new_position = position + (action-1)*200 # # if position == 200: # if flag_2 == True: # flag_2 = False # action = self.learner2.querysetstate(state) # # r = prices.iloc[training_day].values - prices.iloc[training_day-1].values # r = r*position # action = self.learner2.query(state,r) # new_position = position + (action-2)*200 if flag_0 == True: flag_0 = False action = self.learner.querysetstate(state) r = (prices.iloc[training_day].values - prices.iloc[training_day-1].values)*position # r = prices.iloc[training_day].values*position + cash - sv action = self.learner.query(state,r) if state_0 == 0: #hold -200 new_position = position + action*200 elif state_0 == 1: new_position = position + (action-1)*200 elif state_0 == 2: new_position = position + (action-2)*200 cash = cash - (new_position - position)*prices.values[training_day] position = new_position if self.verbose: print training_day, position port_value.values[training_day] = cash + position*prices.values[training_day] training_day = training_day + 1 if training_day == X2_SMA.shape[0]: training_day = 0 # print epoch, port_value.values[-1] if port_value.values[-1] == previous_end_port: repeated = repeated + 1 if repeated > 3: if cash + position*prices.values[-1] > 2 * sv: break else: self.learner = ql.QLearner(num_states=3*self.states_N**2,\ num_actions = 3, \ alpha = 0.2, \ gamma = 0.9, \ rar = random.random(), radr = random.random(), \ dyna = 0, \ verbose=False) repeated = 0 continue else: repeated = 0 epoch = epoch + 1 if epoch > 100: break current_port = cash + position*prices.values[-1] previous_end_port = current_port#port_value.copy.values[-1] port_value.values[:,:] = 0 port_value.values[0,:] = sv position = 0 cash = sv
def sort(self, data, stock_list): day = max(self.periods) + 5 # self.platform.log_info('stock_list: \n', stock_list) fields = ['close', 'volume'] filter_stock = [] stock_list['trend_val'] = 10 for stock in stock_list['code']: df = self.platform.attribute_history(stock, day, unit=self.unit, fields=fields) for col in self.periods: ma = df[fields].rolling_mean(window=col) if self.platform.is_py3() else pd.rolling_mean(df[fields], window=col) for f in fields: ma.rename(columns={f: 'MA' + f + str(col)}, inplace=True) df = df.join(ma) df['date'] = df.index df['code'] = stock jp_utils.jp_reset_range_index(df) df = df.dropna() # self.platform.log_info('dataframe: \n', df) # 5日均成交量 > 10日均成交量 temp_flag = [] for f in fields: for index in range(len(self.periods) - 1): field0 = 'MA' + f + str(self.periods[index]) field1 = 'MA' + f + str(self.periods[index + 1]) flag = df.iloc[-1][field0] > df.iloc[-1][field1] temp_flag.append(flag) self.platform.log_info(field0 + '-' + field1 + ':' + str(flag)) # self.platform.log_info('temp_flag: ', temp_flag) if sum(temp_flag) == len(fields) * (len(self.periods) - 1): filter_stock.append(stock) self.platform.log_info('code:', self.platform.get_security_info(stock)) stock_list.loc[stock_list['code'].isin(filter_stock), 'trend_val'] = stock_list['trend_val'] / 2 new_params = self._params.copy() new_params['field'] = 'trend_val' return new_params, stock_list
#-*- coding:utf-8 -*- # Peishichao import numpy as np import pandas as pd inputfile = '../data/water_heater.xls' n = 4 threshold = pd.Timedelta(minutes=5) data = pd.read_excel(inputfile) data[u'发生时间'] = pd.to_datetime(data[u'发生时间'], format='%Y%m%d%H%M%S') data = data[data[u'水流量'] > 0] def event_num(ts): d = data[u'发生时间'].diff() > ts return d.sum() + 1 dt = [pd.Timedelta(minutes=i) for i in np.arange(1, 9, 0.25)] h = pd.DataFrame(dt, columns=[u'阈值']) h[u'事件数'] = h[u'阈值'].apply(event_num) h[u'斜率'] = h['事件数'].diff() / 0.25 h[u'斜率指标'] = pd.rolling_mean(h[u'斜率'].abs(), n) ts = h[u'阈值'][h[u'斜率指标'].idxmin() - n] if ts > threshold: ts = pd.Timedelta(minutes=4) print(ts)
full = set(devices) n = len(full) print('full set has %d items' % (n)) ####################################### # # II.Feature engineering ####################################### #create useful features from the 9 attributes #I calculate rolling mean (and rolling std for attribute 1)while keeping the window of mean calculation as a variable #that we could change. for i in range(1, 10): device_failure['attribute' + str(i) + '_s_rolling_mean'] = pd.rolling_mean( device_failure['attribute' + str(i)], window=s_window, min_periods=5) #.mean() device_failure['attribute' + str(i) + '_l_rolling_mean'] = pd.rolling_mean( device_failure['attribute' + str(i)], window=l_window, min_periods=30) #.mean() if (i in (1, 6)): device_failure['attribute' + str(i) + '_s_rolling_std'] = pd.rolling_std( device_failure['attribute' + str(i)], window=s_window, min_periods=5) #.std() device_failure['attribute' + str(i) + '_l_rolling_std'] = pd.rolling_std( device_failure['attribute' + str(i)], window=l_window,
symbols = ["MSFT"] startday = dt.datetime(2010, 1, 1) endday = dt.datetime(2010, 12, 31) timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) dataobj = da.DataAccess('Yahoo') voldata = dataobj.get_data(timestamps, symbols, "volume") adjcloses = dataobj.get_data(timestamps, symbols, "close") actualclose = dataobj.get_data(timestamps, symbols, "actual_close") #adjcloses = adjcloses.fillna() adjcloses = adjcloses.fillna(method='backfill') adjcloses = adjcloses[symbols] rolling_means = pandas.rolling_mean(adjcloses, 20, min_periods=20) rolling_stds = pandas.rolling_std(adjcloses, 20, min_periods=20) upperband = rolling_means + rolling_stds lowerband = rolling_means - rolling_stds Bollinger_val = (adjcloses - rolling_means) / (rolling_stds) # Plot the prices plt.clf() #symtoplot = 'AAPL' fig = plt.figure() gs = gridspec.GridSpec(2, 1) ax1 = fig.add_subplot(gs[0, :]) ax1.plot(adjcloses.index, adjcloses[symbols].values, label=symbols) ax1.plot(adjcloses.index, rolling_means[symbols].values) #upper band ax1.plot(adjcloses.index, upperband[symbols].values)
def inspect(AcousticIndexes, BESTLOG): for i in np.arange(len(BESTLOG)): STARTOFFSET = 2000 # 1000#60000 STOPOFFSET = 0 # 1000#4000 QUENCH = BESTLOG.iloc[i] FILE = QUENCH.File + '.tdms' START = QUENCH.Start STOP = QUENCH.Stop BestStart = np.array([ QUENCH.S1, QUENCH.S2, QUENCH.S3, QUENCH.S4, QUENCH.S5, QUENCH.S6, QUENCH.S8, QUENCH.S9 ]) + START channels = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S8', 'S9'] TDDF = getDataFrame(FILE) COLS = TDDF.columns[AcousticIndexes] for i in np.arange(len(AcousticIndexes)): mid = 0 off = 2000 AUDIO = TDDF[COLS[i]].iloc[START - STARTOFFSET:STOP + STOPOFFSET] MARK = BestStart[i] title = FILE + ' Channel: ' + channels[i] + ', start:' + str(MARK) ax = AUDIO.plot(c='blue', alpha=0.6) plt.plot(MARK, np.mean(AUDIO), marker='x', linewidth=0, markerSize=14, c='r') plt.title(title) IL = pd.rolling_mean(TDDF[TDDF.columns[2]], 500) OL = pd.rolling_mean(TDDF[TDDF.columns[3]], 500) VDIF = np.abs(IL - OL) VDIF.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(ax=ax, label='VDIF') IL.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(ax=ax, linewidth=2) OL.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(ax=ax, linewidth=2) print(COLS[i]) print(title) plt.axvline(START, c='g', alpha=0.5, linewidth=4) plt.axvline(STOP, c='r', alpha=0.5, linewidth=4) mid = MARK plt.xlim(mid - off, mid + off) plt.legend() plt.grid() #plt.savefig(FILE + '_channel_' + channels[i] + '_start_' + str(MARK)+'.jpg') #plt.cla() #plt.clf() plt.show()
def feature_engineering(df, complete_dates): df = df.groupby( ["Ciclo_Estacion", "day_counter", "ITERATION", "iteration_start", "iteration_end"])["hora"].count() df = df.sort_index() df = df.reset_index() df = df.rename( columns = {"hora": "flow"}) df_append = pd.DataFrame() for station in df.Ciclo_Estacion.unique(): df_station = df[df.Ciclo_Estacion == station] df_merge = complete_dates.merge( df_station, on= ["day_counter", "ITERATION", "iteration_start", "iteration_end"], how ="left" ) df_merge["Ciclo_Estacion"] = station df_merge.loc[pd.isnull(df_merge.flow), "flow"] =0 if len(df_append) ==0 : df_append = df_merge else: df_append = df_append.append(df_merge) df = df_append #ITERATION (15 minutes) LAG VALUES df["flow_lag1"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(1) df["flow_lag2"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(2) df["flow_lag3"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(3) df["flow_lag4"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(4) df["flow_lag5"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(5) df["flow_lag6"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(6) df["flow_lag7"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(7) df["flow_lag8"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(8) df["flow_rollingmean_lag1_4"] = pd.rolling_mean( df["flow_lag1"], 4) df["flow_rollingmean_lag1_8"] = pd.rolling_mean( df["flow_lag1"], 8) df["flow_rollingmean_lag1_12"] = pd.rolling_mean( df["flow_lag1"], 12) df["flow_rollingmean_lag1_16"] = pd.rolling_mean( df["flow_lag1"], 16) df["flow_rollingmean_lag4_4"] = pd.rolling_mean( df["flow_lag4"], 4) df["flow_rollingmean_lag4_8"] = pd.rolling_mean( df["flow_lag4"], 8) df["flow_rollingmean_lag4_12"] = pd.rolling_mean( df["flow_lag4"], 12) df["flow_rollingmean_lag4_16"] = pd.rolling_mean( df["flow_lag4"], 16) df["flow_rollingmean_lag8_4"] = pd.rolling_mean( df["flow_lag8"], 4) df["flow_rollingmean_lag8_8"] = pd.rolling_mean( df["flow_lag8"], 8) df["flow_rollingmean_lag8_12"] = pd.rolling_mean( df["flow_lag8"], 12) df["flow_rollingmean_lag8_16"] = pd.rolling_mean( df["flow_lag8"], 16) df["flow_ewma_lag1_4"] = pd.ewma( df["flow_lag1"], 4) df["flow_ewma_lag1_8"] = pd.ewma( df["flow_lag1"], 8) df["flow_ewma_lag1_12"] = pd.ewma( df["flow_lag1"], 12) df["flow_ewma_lag1_16"] = pd.ewma( df["flow_lag1"], 16) df["flow_ewma_lag4_4"] = pd.ewma( df["flow_lag4"], 4) df["flow_ewma_lag4_8"] = pd.ewma( df["flow_lag4"], 8) df["flow_ewma_lag4_12"] = pd.ewma( df["flow_lag4"], 12) df["flow_ewma_lag4_16"] = pd.ewma( df["flow_lag4"], 16) df["flow_ewma_lag8_4"] = pd.ewma( df["flow_lag4"], 4) df["flow_ewma_lag8_8"] = pd.ewma( df["flow_lag8"], 8) df["flow_ewma_lag8_12"] = pd.ewma( df["flow_lag8"], 12) df["flow_ewma_lag8_16"] = pd.ewma( df["flow_lag8"], 16) #DAYs LAG VALUES df["flow_lag1day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(94) df["flow_lag2day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(95) df["flow_lag3day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(96) df["flow_lag4day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(97) df["flow_lag5day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(98) df["flow_lag6day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(99) df["flow_lag7day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(100) df["flow_lag8day"] = df.groupby(["Ciclo_Estacion"])["flow"].shift(101) df["flow_rollingmean_lag1day_4"] = pd.rolling_mean( df["flow_lag1day"], 4) df["flow_rollingmean_lag1day_8"] = pd.rolling_mean( df["flow_lag1day"], 8) df["flow_rollingmean_lag1day_12"] = pd.rolling_mean( df["flow_lag1day"], 12) df["flow_rollingmean_lag1day_16"] = pd.rolling_mean( df["flow_lag1day"], 16) df["flow_rollingmean_lag4day_4"] = pd.rolling_mean( df["flow_lag4day"], 4) df["flow_rollingmean_lag4day_8"] = pd.rolling_mean( df["flow_lag4day"], 8) df["flow_rollingmean_lag4day_12"] = pd.rolling_mean( df["flow_lag4day"], 12) df["flow_rollingmean_lag4day_16"] = pd.rolling_mean( df["flow_lag4day"], 16) df["flow_rollingmean_lag8day_4"] = pd.rolling_mean( df["flow_lag8day"], 4) df["flow_rollingmean_lag8day_8"] = pd.rolling_mean( df["flow_lag8day"], 8) df["flow_rollingmean_lag8day_12"] = pd.rolling_mean( df["flow_lag8day"], 12) df["flow_rollingmean_lag8day_16"] = pd.rolling_mean( df["flow_lag8day"], 16) df["flow_ewma_lag1day_4"] = pd.ewma( df["flow_lag1day"], 4) df["flow_ewma_lag1day_8"] = pd.ewma( df["flow_lag1day"], 8) df["flow_ewma_lag1day_12"] = pd.ewma( df["flow_lag1day"], 12) df["flow_ewma_lag1day_16"] = pd.ewma( df["flow_lag1day"], 16) df["flow_ewma_lag4day_4"] = pd.ewma( df["flow_lag4day"], 4) df["flow_ewma_lag4day_8"] = pd.ewma( df["flow_lag4day"], 8) df["flow_ewma_lag4day_12"] = pd.ewma( df["flow_lag4day"], 12) df["flow_ewma_lag4day_16"] = pd.ewma( df["flow_lag4day"], 16) df["flow_ewma_lag8day_4"] = pd.ewma( df["flow_lag8day"], 4) df["flow_ewma_lag8day_8"] = pd.ewma( df["flow_lag8day"], 8) df["flow_ewma_lag8day_12"] = pd.ewma( df["flow_lag8day"], 12) df["flow_ewma_lag8day_16"] = pd.ewma( df["flow_lag8day"], 16) #WEEK LAG VALUES df["month"] = df.iteration_start.apply(lambda x: x.date().month) df["day_month"] = df.iteration_start.apply(lambda x: x.date().day) return df
def heatmap(col, t_timestamp, t_win='1d'): df_merge = pd.DataFrame() smin = 0 smax = 255 mini = 0 maxi = 1300 if (t_win == '1d'): for_base = 0 timew = 24 interval = '30T' elif (t_win == '3d'): for_base = 0 timew = 72 interval = '120T' elif (t_win == '30d'): for_base = int(t_timestamp[11] + t_timestamp[12]) timew = 720 interval = '24H' else: print "invalid monitoring window" f_timestamp = pd.to_datetime( pd.to_datetime(t_timestamp) - timedelta(hours=timew)) t_timestamp = pd.to_datetime( pd.to_datetime(t_timestamp) + timedelta(minutes=30)) if (len(col) > 4): query = "select num_nodes from senslopedb.site_column_props where name = '%s'" % col node = qs.GetDBDataFrame(query) for node_num in range(1, int(node.num_nodes[0]) + 1): df = CSR.getsomscaldata(col, node_num, f_timestamp, t_timestamp, if_multi=True) if (df.empty == False): df = df.reset_index() df.ts = pd.to_datetime(df.ts) df.index = df.ts df.drop('ts', axis=1, inplace=True) df = df[((df < 1300) == True) & ((df > 0) == True)] df['cval'] = df['mval1'].apply(lambda x: (x - mini) * smax / (maxi) + smin) dfrs = pd.rolling_mean( df.resample(interval, base=for_base), window=3, min_periods=1) #mean for one day (dataframe) if 'mval1' in df.columns: dfrs = dfrs.drop('mval1', axis=1) # n=len(dfrs)-1 dfrs = dfrs.reset_index(0) # dfp=dfrs[n-timew:n] # dfp = dfp.reset_index() df_merge = pd.concat([df_merge, dfrs], axis=0) df_merge['ts'] = df_merge.ts.astype(object).astype(str) dfjson = df_merge.to_json(orient='records', double_precision=0) print dfjson else: return 'v1'
def inspect_voltage(AcousticIndexes, BESTLOG): for i in np.arange(len(BESTLOG)): STARTOFFSET = 2000 # 1000#60000 STOPOFFSET = 0 # 1000#4000 QUENCH = BESTLOG.iloc[i] FILE = QUENCH.File + '.tdms' START = QUENCH.Start STOP = QUENCH.Stop BestStart = np.array([ QUENCH.S1, QUENCH.S2, QUENCH.S3, QUENCH.S4, QUENCH.S5, QUENCH.S6, QUENCH.S8, QUENCH.S9 ]) + START channels = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S8', 'S9'] TDDF = getDataFrame(FILE) COLS = TDDF.columns[AcousticIndexes] for i in np.arange(len(AcousticIndexes)): mid = 0 off = 2000 IL = pd.rolling_mean(TDDF[TDDF.columns[2]], 500) OL = pd.rolling_mean(TDDF[TDDF.columns[3]], 500) IL = IL - IL.dropna().iloc[:200].mean() OL = OL - OL.dropna().iloc[:200].mean() VDIF = np.abs(IL - OL) ax = VDIF.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(label='VDIF') IL.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(ax=ax, linewidth=2) OL.iloc[START - STARTOFFSET:STOP + STOPOFFSET].plot(ax=ax, linewidth=2) MARK = BestStart[i] AUDIO = TDDF[COLS[i]].iloc[START - STARTOFFSET:STOP + STOPOFFSET] title = FILE + ' Channel: ' + channels[i] + ', start:' + str(MARK) ENV = MakePreciseEnvelope(AUDIO) ENV_DF = pd.DataFrame(ENV, index=AUDIO.index) / 400000 ENV_DF.plot(ax=ax) print(COLS[i]) print(title) plt.axvline(START, c='g', alpha=0.5, linewidth=4) plt.axvline(STOP, c='r', alpha=0.5, linewidth=4) AUDIO.plot(c='blue', alpha=0.4, ax=ax, secondary_y=True) plt.plot(MARK, np.mean(AUDIO), marker='x', linewidth=0, markerSize=14, c='r') plt.axvline(MARK, c='r', linewidth=1) plt.title(title) AFTBUFFER = 500 FOREBUFFER = 500 channel_data = TDDF[COLS[i]].iloc[MARK - FOREBUFFER:MARK + AFTBUFFER] channel_val = channel_data.values channel_env = MakePreciseEnvelope(channel_val) ENV_DF = pd.DataFrame(channel_env, index=channel_data.index) / 200000 ENV_DF = ENV_DF - (pd.rolling_mean(ENV_DF, 15).diff()) * 30 ENV_DF.plot(ax=ax, c='black') # # ENV_DF = pd.DataFrame(channel_env,index=channel_data.index)/400000 # ENV_DF = pd.rolling_mean(1000*ENV_DF.diff(),30) # ENV_DF.plot(ax=ax,c='purple') # # ENV_DF = pd.DataFrame(channel_env,index=channel_data.index)/400000 # ENV_DF = 1000*pd.rolling_mean(ENV_DF,30).diff() # ENV_DF.plot(ax=ax,c='orange') # ENV_DF_2 = ((pd.DataFrame(channel_env,index=channel_data.index)/40000).diff().abs())-0.05 # ENV_DF_2.plot(ax=ax,c='purple',alpha=0.5) # # (ENV_DF-ENV_DF_2).plot(ax=ax,c='orange') mid = MARK plt.xlim(mid - off, mid + off) plt.title(title) plt.legend() plt.grid() #plt.savefig('UBER' + FILE + '_channel_' + channels[i] + '_start_' + str(MARK)+'.jpg') #plt.cla() #plt.clf() plt.show()
def main(): """ This demo is for simulating the strategy Variables """ dt_start = dt.datetime(2013, 1, 1) dt_end = dt.datetime(2015, 12, 31) sym_list = 'sp5002012.txt' market_sym = 'SPY' starting_cash = 100000 bol_period = 20 print "Setting Up ..." # Obtatining data from Yahoo ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) dataobj = da.DataAccess('Yahoo') ls_symbols = load_symlists(sym_list) ls_symbols.append(market_sym) """ key values. Creating a dictionary. """ ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) """ fill out N/A values """ for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) """ df_close contains only a close column. """ df_close = d_data['close'] df_volume = d_data['volume'] print "Finding Events ..." ''' Finding the event dataframe ''' ts_market = df_close['SPY'] # Creating an empty dataframe df_events = copy.deepcopy(df_close) * 0 # Time stamps for the event range ldt_timestamps = df_close.index rolling_mean = pd.rolling_mean(df_close, window=bol_period) rolling_std = pd.rolling_std(df_close, window=bol_period) rolling_mean_vol = pd.rolling_mean(df_volume, window=bol_period) rolling_std_vol = pd.rolling_std(df_volume, window=bol_period) ''' finding_events starts here ''' bol_clo = (df_close - rolling_mean) / rolling_std delays = 14 for s_sym in ls_symbols: for i in range(1, len(ldt_timestamps) - delays): bol_tod = bol_clo[s_sym].loc[ldt_timestamps[i]] bol_yes = bol_clo[s_sym].loc[ldt_timestamps[i - 1]] bol_tod_mark = bol_clo["SPY"].loc[ldt_timestamps[i]] if (bol_tod <= -3.0 and bol_yes >= -3.0 and bol_tod_mark >= 1.0): for delay in range(delays): df_events[s_sym].loc[ldt_timestamps[i + delay]] += ( 30000.00 / df_close[s_sym].loc[ldt_timestamps[i]]) if df_close[s_sym].loc[ldt_timestamps[ i + delay]] > df_close[s_sym].loc[ldt_timestamps[i]]: break elif (bol_tod >= 2.0 and bol_yes <= 2.0 and bol_tod_mark <= -1.0): for delay in range(delays): df_events[s_sym].loc[ldt_timestamps[i + delay]] += ( 10000.00 / df_close[s_sym].loc[ldt_timestamps[i]]) if df_close[s_sym].loc[ldt_timestamps[ i + delay]] > df_close[s_sym].loc[ldt_timestamps[i]]: break print "Starting Simulation ..." # Find symbols that satisfy the event condition. ls_symbols_red = [] for sym in ls_symbols: for i in range(len(ldt_timestamps)): if df_events[sym].loc[ldt_timestamps[i]] != 0: ls_symbols_red.append(sym) break ''' value and cash are zero arrays ''' # df_orders = copy.deepcopy(df_events) print "ls_symbols_red", ls_symbols_red df_orders = df_events[ls_symbols_red] value = copy.deepcopy(df_events) * 0 cash = copy.deepcopy(value[market_sym]) ''' Update value ''' print "Updating Value and Cash Array..." for s_sym in ls_symbols_red: for i in range(len(ldt_timestamps)): ind_time = ldt_timestamps[i] if i == 0: if df_orders[s_sym].loc[ind_time] != 0: sym_value = df_orders[s_sym].loc[ind_time] * df_close[ s_sym].loc[ind_time] value[s_sym].loc[ind_time] = sym_value cash[ind_time] -= sym_value else: ind_time_yest = ldt_timestamps[i - 1] if df_orders[s_sym].loc[ind_time] != 0 and df_orders[ s_sym].loc[ind_time_yest] == 0: sym_value = df_orders[s_sym].loc[ind_time] * df_close[ s_sym].loc[ind_time] value[s_sym].loc[ind_time] = sym_value cash[ind_time] -= sym_value elif df_orders[s_sym].loc[ind_time_yest] != 0: sym_value = df_orders[s_sym].loc[ind_time] * df_close[ s_sym].loc[ind_time] value[s_sym].loc[ind_time] = sym_value cash[ind_time] -= (df_orders[s_sym].loc[ind_time] - df_orders[s_sym].loc[ind_time_yest] ) * df_close[s_sym].loc[ind_time_yest] ''' Update cash ''' cash.to_csv("c:/cash_pre.csv", sep=",", mode="w") print "Modifying Cash Array..." cash[ldt_timestamps[0]] += starting_cash for i in range(1, len(ldt_timestamps)): ind_prev = cash[ldt_timestamps[i - 1]] ind_curr = cash[ldt_timestamps[i]] cash[ldt_timestamps[i]] = ind_curr + ind_prev # Save to csv files cash.to_csv("c:/cash.csv", sep=",", mode="w") value.to_csv("c:/portfolio.csv", sep=",", mode="w") print "Updating Total..." for i in range(len(ldt_timestamps)): sym_sum = 0 for s_sym in ls_symbols_red: sym_sum += value[s_sym].ix[ldt_timestamps[i]] cash[ldt_timestamps[i]] += sym_sum # Save to csv files cash.to_csv("c:/total.csv", sep=",", mode="w") ts_market.to_csv("c:/ts_market.csv", sep=",", mode="w") # Normalizing dataframes. cash /= cash[0] ts_market /= ts_market[0] print "Summary..." tot_ret_fund = cash[-1] tot_ret_mark = ts_market[-1] ''' Create new array for fund and market ''' daily_ret_fund = np.zeros((len(ldt_timestamps), 1)) daily_ret_mark = copy.deepcopy(daily_ret_fund) for i in range(1, len(ldt_timestamps)): daily_ret_fund[ i] = cash[ldt_timestamps[i]] / cash[ldt_timestamps[i - 1]] - 1 daily_ret_mark[i] = ts_market[ldt_timestamps[i]] / ts_market[ ldt_timestamps[i - 1]] - 1 vol_fund = np.std(daily_ret_fund) vol_mark = np.std(daily_ret_mark) avg_ret_fund = np.average(daily_ret_fund) avg_ret_mark = np.average(daily_ret_mark) sharpe_fund = np.sqrt(252) * avg_ret_fund / vol_fund sharpe_mark = np.sqrt(252) * avg_ret_mark / vol_mark print "Start Date:", dt_start print "End Date :", dt_end print " " print "Sharpe Ratio of Fund: ", sharpe_fund print "Sharpe Ratio of $SPX: ", sharpe_mark print " " print "Total Return of Fund: ", tot_ret_fund print "Total Return of $SPX: ", tot_ret_mark print " " print "Standard Deviation of Fund: ", vol_fund print "Standard Deviation of $SPX: ", vol_mark print " " print "Average Daily Return of Fund: ", avg_ret_fund print "Average Daily Return of $SPX: ", avg_ret_mark # plt.plot(cash.index, cash, 'r', ts_market.index, ts_market, 'b') # f, axarr = plt.subplots(3, sharex=True) # axarr[0].plot(cash.index, cash, 'r', ts_market.index, ts_market, 'b') # axarr[0].set_title('Testing') # axarr[1].plot(ts_market.index, df_volume["SPY"], 'b') # axarr[2].plot(ts_market.index, rolling_std["SPY"], 'b') # plt.show() # df_volume_norm = df_volume["SPY"]/df_volume["SPY"][ldt_timestamps[0]] f, axarr = plt.subplots(3, sharex=True) axarr[0].plot(cash.index, cash, 'r', ts_market.index, ts_market, 'b') axarr[0].set_title('Testing') axarr[1].plot(ts_market.index, df_volume["SPY"], 'b', ts_market.index, rolling_mean_vol["SPY"] + rolling_std_vol["SPY"], 'b--', ts_market.index, rolling_mean_vol["SPY"] - rolling_std_vol["SPY"], 'b--') axarr[2].plot(ts_market.index, rolling_std["SPY"], 'g') plt.show()
import pandas as pd from datetime import datetime from sklearn import datasets, linear_model from sklearn.metrics import mean_absolute_error hist = pd.read_csv('sphist.csv', parse_dates=['Date']) hist.sort_values('Date', ascending=True, inplace=True) hist['avg_5_days'] = pd.rolling_mean(hist.Close, window=5).shift(1) hist['avg_30_days'] = pd.rolling_mean(hist.Close, window=30).shift(1) hist['avg_365_days'] = pd.rolling_mean(hist.Close, window=365).shift(1) clean_hist = hist[hist['Date'] > datetime(year=1951, month=1, day=2)].copy() clean_hist.dropna(axis=0, inplace=True) train = clean_hist[ clean_hist['Date'] < datetime(year=2013, month=1, day=1)].copy() test = clean_hist[ clean_hist['Date'] >= datetime(year=2013, month=1, day=1)].copy() features = ['avg_5_days', 'avg_30_days', 'avg_365_days'] lr = linear_model.LinearRegression() lr.fit(train[features], train['Close']) predictions = lr.predict(test[features]) test_msa = mean_absolute_error(test['Close'], predictions) print(test_msa)
def main(): """ This function is called from the main block. The purpose of this function is to contain all the calls to business logic functions :return: int - Return 0 or 1, which is used as the exist code, depending on successful or erroneous flow """ # Wrap in a try block so that we catch any exceptions thrown by other functions and return a 1 for graceful exit try: # ===== Step 0: Sanitation ===== # Fix Pandas Datareader's Issues with Yahoo Finance (Since yahoo abandoned it's API) yahoo_finance_bridge() # ===== Step 1: Get the Ticker From user ===== # Prompt the user to input the data that needs to be downloaded stock_ticker = get_ticker_from_user() logging.debug('Stock Ticker is: %s' % str(stock_ticker)) # ===== Step 2: Download the data for the Ticker ===== # Get the data fetched from Yahoo Finance data = get_data_from_yahoo_finance(str(stock_ticker)) data = pd.DataFrame(data['Open']) data = data.sort_index(axis=0, ascending=True) # Calculate daily differences data['diff'] = data.diff(periods=1) ## Calcultate the cumulative returns data['cum'] = data['diff'].cumsum() #Meanreversion # Setting position long = 1 and short = -1 based on previous day move delta = 0.005 # If previous day price difference was less than or equal then delta, we go long # If previous day price difference was more than or equal then delta, we go short data['position_mr'] = np.where( data['diff'].shift(1) <= -delta, 1, np.where(data['diff'].shift(1) >= delta, -1, 0)) data['result_mr'] = (data['diff'] * data['position_mr']).cumsum() # We will filter execution of our strategy by only executing if our result are above it's 200 day moving average win = 200 data['ma_mr'] = pd.rolling_mean(data['result_mr'], window=win) filtering_mr = data['result_mr'].shift(1) > data['ma_mr'].shift(1) data['filteredresult_mr'] = np.where( filtering_mr, data['diff'] * data['position_mr'], 0).cumsum() # if we do not want to filter we use below line of code # df['filteredresult_mr'] = (df['diff'] * df['position_mr']).cumsum() data[['ma_mr', 'result_mr', 'filteredresult_mr']].plot(figsize=(10, 8)) plt.show() plt.close() # Breakout # Setting position long = 1 and short = -1 based on previous day move # By setting the delta to negative we are switching the strategy to Breakout delta = -0.01 # If previous day price difference was less than or equal then delta, we go long # If previous day price difference was more than or equal then delta, we go short data['position_bo'] = np.where( data['diff'].shift(1) <= -delta, 1, np.where(data['diff'].shift(1) >= delta, -1, 0)) data['result_bo'] = (data['diff'] * data['position_bo']).cumsum() # We will filter execution of our strategy by only executing if our result are above it's 200 day moving average win = 200 data['ma_bo'] = pd.rolling_mean(data['result_bo'], window=win) filtering_bo = data['result_bo'].shift(1) > data['ma_bo'].shift(1) data['filteredresult_bo'] = np.where( filtering_bo, data['diff'] * data['position_bo'], 0).cumsum() # df['filteredresult_bo'] = (df['diff'] * df['position_bo']).cumsum() data[['ma_bo', 'result_bo', 'filteredresult_bo']].plot(figsize=(10, 8)) plt.show() plt.close() # Here we combine the Meanreversion and the Breakout strategy results data['combi'] = data['filteredresult_mr'] + data['filteredresult_bo'] data[['combi', 'filteredresult_mr', 'filteredresult_bo']].plot(figsize=(10, 8)) # get 80% data data['sel'] = range(int(len(data))) eighty_data = data.loc[:data.index[ data['sel'] == int(0.8 * len(data))].strftime('%Y%m%d').tolist()[0]] # Calculate Optimal F for 80% data FOR MOVING AVERAGE p_mr = float(len(eighty_data[eighty_data['result_mr'] > 0])) / float( len(data)) plr_mr = eighty_data[eighty_data['result_mr'] > 0]['result_mr'].mean( ) / eighty_data[eighty_data['result_mr'] < 0]['result_mr'].mean() op_f_mr = p_mr * (plr_mr + 1) - 1 / plr_mr print('Optimal F for MR is: %s' % str(op_f_mr)) # Calculate Optimal F for 80% data FOR BREAKOUT p_bo = float(len(eighty_data[eighty_data['result_bo'] > 0])) / float( len(data)) plr_bo = eighty_data[eighty_data['result_bo'] > 0]['result_bo'].mean() / \ eighty_data[eighty_data['result_bo'] < 0]['result_bo'].mean() op_f_bo = p_bo * (plr_bo + 1) - 1 / plr_bo print('Optimal F for BO is: %s' % str(op_f_bo)) # Calculate KPIs on 20% data twenty_data = data.loc[:data.index[ data['sel'] == int(0.2 * len(data))].strftime('%Y%m%d').tolist()[0]] import ffn # FOR Moving Average df_portfolio_value_mr = twenty_data['result_mr'] perf = df_portfolio_value_mr.calc_stats() perf.plot() plt.show() plt.close() print perf.display() # FOR Breakout df_portfolio_value_bo = twenty_data['result_bo'] perf_bo = df_portfolio_value_bo.calc_stats() perf_bo.plot() plt.show() plt.close() print perf_bo.display() except BaseException, e: # Casting a wide net to catch all exceptions print('\n%s' % str(e)) return 1
def get_rolling_mean(values, window): """Return rolling mean of given values, using specified window size.""" return pd.rolling_mean(values, window=window)
def organize_data(self): position_data, index_data, position_turn_over = self.load_data() ####index_data#### ##calculate moving average to find the trend index_data['MA5'] = pd.rolling_mean(index_data['close'], 30) index_data['MA10'] = pd.rolling_mean(index_data['close'], 60) index_data['trend'] = index_data['MA5'] - index_data['MA10'] index_data = index_data.sort(['update_date']) for col in ['trend', 'MA5', 'MA10', 'position_all']: index_data[col] = index_data[col].shift(1) index_data['log_open'] = np.log(index_data['open']) index_data['return_rate'] = index_data['log_open'].diff() index_data = index_data.drop(['log_open'], axis=1) def hmm(category): def hmm_with_category(day): return execute(day, category) return hmm_with_category exe = hmm(self.category.upper()) # index_data['trend']=map(exe,index_data['update_date']) ####position_data#### def position_org(position_data): self.position_data_org = pd.DataFrame(columns=[ 'company_name', 'position', 'position_chg', 'update_date', 'contract' ]) temp = position_data[[ 'company_name_2', 'hold_vol_buy', 'hold_vol_buy_chg', 'update_date', 'contract' ]] temp = temp.rename( columns={ 'company_name_2': 'company_name', 'hold_vol_buy': 'position', 'hold_vol_buy_chg': 'position_chg' }) temp['direction_tag'] = temp['position_chg'].apply(lambda x: 10 if x > 0 else 0) temp['tag'] = 'pos' self.position_data_org = self.position_data_org.append(temp) temp = position_data[[ 'company_name_3', 'hold_vol_sell', 'hold_vol_sell_chg', 'update_date', 'contract' ]] temp = temp.rename( columns={ 'company_name_3': 'company_name', 'hold_vol_sell': 'position', 'hold_vol_sell_chg': 'position_chg' }) temp['position'] = -1 * temp['position'] temp['position_chg'] = -1 * temp['position_chg'] temp['direction_tag'] = temp['position_chg'].apply(lambda x: 1 if x < 0 else 0) temp['tag'] = 'neg' self.position_data_org = self.position_data_org.append(temp) return self.position_data_org self.position_data_org = position_org(position_data) self.position_data_org_2 = self.position_data_org.groupby( ['update_date', 'company_name']).sum() try: self.position_data_org_2 = self.position_data_org_2.drop( ['contract'], axis=1) self.position_data_org_1 = self.position_data_org.groupby( ['update_date', 'company_name']).contract.count() self.position_data_org = pd.concat( [self.position_data_org_2, self.position_data_org_1], axis=1, join='inner') self.position_data_org = self.position_data_org.loc[:, [ 'position', 'position_chg', 'direction_tag', 'contract' ]] except: self.position_data_org = self.position_data_org_2 self.position_data_org['contract'] = 1 self.position_data_org.reset_index(inplace=True) # 取出特定交易商的交易持仓变化记录 self.position_data_selected = pd.DataFrame( columns=self.position_data_org.columns) for item in self.brokerName: print item temp = self.position_data_org[ self.position_data_org['company_name'] == item] if len(temp) != 0: self.position_data_selected = self.position_data_selected.append( temp) else: print 'cannot find %s in data, please check...' % item # 将全量日期对上筛选后的数据 self.position_data_selected = pd.merge(index_data[['update_date']], self.position_data_selected, on=['update_date'], how='outer') ##将今天收盘得到的数据设定为明天的决策依据 self.position_data_lagged = pd.DataFrame() for i, j in self.position_data_selected.groupby('company_name'): j = j.sort('update_date') for col in ['position', 'position_chg', 'direction_tag']: j[col] = j[col].shift(1) self.position_data_lagged = self.position_data_lagged.append(j) ####position_turn_over#### position_turn_over = position_turn_over.loc[:, [ 'update_date', 'turn_over_rate' ]] position_turn_over['turn_over_rate'] = position_turn_over[ 'turn_over_rate'].shift(1) return index_data, self.position_data_lagged, position_turn_over
def predict(df, prediction_start_date, prediction_end_date, predict_tommorrow): df.loc[len(df)] = [predict_tommorrow, 0, 0, 0, 0, 0, 0] df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d') df['year'] = pd.DatetimeIndex(df['Date']).year df = df.set_index('Date', drop=True) df = df.sort_index(axis=0, ascending=True) df['avg_close_price_day_5'] = pd.rolling_mean(df['Close'], window=5).shift(1) df['avg_close_price_day_30'] = pd.rolling_mean(df['Close'], window=30).shift(1) df['avg_close_price_day_365'] = pd.rolling_mean(df['Close'], window=365).shift(1) df['ratio_avg_close_price_5_365'] = df['avg_close_price_day_5'] / df[ 'avg_close_price_day_365'] df['std_close_price_day_5'] = pd.rolling_std(df['Close'], window=5).shift(1) df['std_close_price_day_365'] = pd.rolling_std(df['Close'], window=365).shift(1) df['ratio_std_close_price_5_365'] = df['std_close_price_day_5'] / df[ 'std_close_price_day_365'] df['avg_volume_day_5'] = pd.rolling_mean(df['Volume'], window=5).shift(1) df['avg_volume_day_365'] = pd.rolling_mean(df['Volume'], window=365).shift(1) df['ratio_volume_5_365'] = df['avg_volume_day_5'] / df['avg_volume_day_365'] df['std_avg_volume_5'] = pd.rolling_mean(df['avg_volume_day_5'], window=5).shift(1) df['std_avg_volume_365'] = pd.rolling_mean(df['avg_volume_day_365'], window=365).shift(1) df['ratio_std_avg_volume_5_365'] = df['std_avg_volume_5'] / df[ 'std_avg_volume_365'] df = df[['Close'] + list(df.columns[6:])] df = df.dropna(axis=0) predicted_values_regression = [] predicted_values_random_forest = [] df_prediction = pd.DataFrame() df_prediction['Actual'] = df.ix[prediction_start_date:prediction_end_date][ 'Close'] regressor = LinearRegression() random_forest_regressor = RandomForestRegressor() for index in df_prediction.index: train = df.ix[df.index[0]:index - timedelta(days=1)] test = df.ix[index:index] train_predictors = train[list(df.columns[1:])] train_to_predict = train['Close'] regressor.fit(train_predictors, train_to_predict) random_forest_regressor.fit(train_predictors, train_to_predict) test_predictors = test[list(df.columns[1:])] predicted_values_regression.append( regressor.predict(test_predictors)[0]) predicted_values_random_forest.append( random_forest_regressor.predict(test_predictors)[0]) df_prediction['Predicted_regression'] = predicted_values_regression df_prediction['Predicted_random_forest'] = predicted_values_random_forest mae_regression = sum( abs(df_prediction['Actual'] - df_prediction['Predicted_regression'])) / len( df_prediction['Predicted_regression']) mae_random_forest = sum( abs(df_prediction['Actual'] - df_prediction['Predicted_random_forest'])) / len( df_prediction['Predicted_random_forest']) tomorrow = df.ix[predict_tommorrow:predict_tommorrow + timedelta(days=1)] tomorrow_predictors = tomorrow[list(df.columns[1:])] if mae_regression <= mae_random_forest: prediction_for_tomorrow = regressor.predict(tomorrow_predictors)[0] else: prediction_for_tomorrow = random_forest_regressor.predict( tomorrow_predictors)[0] f = open('predicted_value_for_tommorrow', 'w') f.write( 'The mean absolute error of linear regression model and randome forest model is %s and %s , respectively. Based on the model with smaller mae, predicted value for tomorrow is %s .' % (mae_regression, mae_random_forest, prediction_for_tomorrow)) f.close() return df_prediction
print('生成日期范围:\n', pd.date_range('2012/1/4', '2012/4/6', freq='BM')) print('生成日期范围:\n', pd.date_range('2012/3/4', '2012/4/6', freq='W-FRI')) ts = pd.Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M')) print('时间位移:', ts.shift(2)) print('时间位移:', ts.shift(-2)) print('时间位移:', ts.shift(1, freq='3D')) s = pd.Series(df.trade_vol.values, index=df.time) s2 = pd.Series(df.trade_pr.values, index=df.time) ticks = pd.DataFrame( { 'open': df.trade_pr.values, 'high': df.s1pr.values, 'low': df.b1pr.values, 'close': df.trade_pr.values }, index=df.time) ms = s.resample('5min', how=sum) print('降采样:', ms[:5]) mt = s2.resample('1min', how='ohlc', fill_method="ffill") print('open high low close降采样:', mt[:10]) s15t = mt.resample('15s', fill_method='ffill') print('升采样:', s15t[:10]) mean_s = pd.rolling_mean(s, 5, min_periods=1) print('求移动均值:', mean_s[:10]) ema_s = pd.ewma(s, 60) print('求指数移动均值:', mean_s[:10])
def moving_average(self, values): ma = pd.rolling_mean(self.df[values], 100) return ma
df = pd.io.excel.read_excel( "C:\Users\PAULINKENBRANDT\Downloads\E5382-MonitoringData (1)\North_Side_Weirs.xlsx", "Main", index_col=0) wld = df.resample('60Min') wld['NB_wl_std'] = pd.stats.moments.rolling_std('NB_ft_water', 24) wld['NB_wl_avg'] = pd.stats.moments.rolling_mean('NB_ft_water', 24) wld['NB_ft_water'].plot(style='k--') wld['NB_wl_avg'].plot(style='k') wldata = wld.ix[1050:] wldata['rollmean'] = pd.rolling_mean(wldata['wlelev_m'], 30) wldata['wlnorm'] = (wldata['wlelev_m'] - wldata['wlelev_m'].mean()) / ( wldata['wlelev_m'].max() - wldata['wlelev_m'].min()) wldata['bpnorm'] = (wldata['bp_mH2O'] - wldata['bp_mH2O'].mean()) / ( wldata['bp_mH2O'].max() - wldata['bp_mH2O'].min()) wldata['tempnorm'] = (wldata['temp'] - wldata['temp'].mean()) / ( wldata['temp'].max() - wldata['temp'].min()) wldata['condnorm'] = (wldata['cond'] - wldata['cond'].mean()) / ( wldata['cond'].max() - wldata['cond'].min()) wldata['dwl'] = wldata['wlelev_m'].diff() wldata['dbp'] = wldata['bp_mH2O'].diff() ######################################################## date conversion #function to convert date into julian date def jday(Y, M, D, h, m, s):
print 'Results of Dickey-Fuller Test:' dftest = adfuller(timeseries, autolag='AIC') dfoutput = pd.Series(dftest[0:4], index=[ 'Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used' ]) for key, value in dftest[4].items(): dfoutput['Critical Value (%s)' % key] = value print dfoutput ts_log = np.log(ts) plt.plot(ts_log) moving_avg = pd.rolling_mean(ts_log, 12) plt.plot(ts_log) plt.plot(moving_avg, color='red') ts_log_moving_avg_diff = ts_log - moving_avg ts_log_moving_avg_diff.head(12) ts_log_moving_avg_diff.dropna(inplace=True) test_stationarity(ts_log_moving_avg_diff) expwighted_avg = pd.ewma(ts_log, halflife=12) plt.plot(ts_log) plt.plot(expwighted_avg, color='red') ts_log_ewma_diff = ts_log - expwighted_avg test_stationarity(ts_log_ewma_diff)