def walk_foward(self): """Function that do the walk-foward analysis (optimization). First it runs through the divided period (1 period interval for training and testing). We have to choose properly `self.start_date` and `self.end_date` as they set the numbers of period. Then the program runs through each training and testing period (in `self.dict_name_`). The program optimizes only in the training period `self.training_name_`. The results are store in the folder results and results_training for the training period and results_test for the testing period. Parameters ---------- `self.start_date` : datetime object Set in `initialize.py`. Beginning date of training and testing. `self.end_date` : datetime object Set in `initialize.py`. End date of training and testing. """ md_ = md _first_time = True self.dict_date_ = dm.date_dict(self.start_date, self.end_date, **self.dict_name_) if (len(self.dict_date_)) == 0: raise Exception("Total period not long enough for optimization") for key, _ in self.dict_date_.items(): for key_, _ in self.dict_name_.items(): self.start_date = self.dict_date_[key][key_][0] self.end_date = self.dict_date_[key][key_][1] if _first_time: md_(self.dir_output, self.name_out, extension=key_).erase_content() self.init_series() self.calcul_indicator() if key_ == self.training_name_: #we only optimize for the training period self.optimize_param() self.pnl_dict, self.params = ga(self).__call__() else: #test period, we use the optimized parameters in the training period self.assign_value() self.pnl_() md.write_csv_(self.dir_output, self.name_out, add_doc=key_, is_walkfoward=self.is_walkfoward, **self.pnl_dict) md.write_csv_(self.dir_output, self.name_out, add_doc=key_, is_walkfoward=self.is_walkfoward, **self.params) _first_time = False
def __call__(self): """Function do different things dependent if we optimize or not""" if self.is_walkfoward: self.walk_foward() else: self.calcul_indicator() self.pnl_() md.write_csv_(self.dir_output, self.name_out, add_doc="", is_walkfoward=self.is_walkfoward, **self.pnl_dict)
def __init__(self, series_, self_): super().__init__() super().__call__() new_obj = copy.deepcopy(self_) self.__dict__.update(new_obj.__dict__) io.init_series(self) del new_obj, self_ self.sous_series = md.sous_series_(series_, self.nb_data)
def chart_trigger(self): """Marks entry and exit level on chart""" mark_up = md.pd_tolist(self.trades_track, self.entry_row) mark_down = md.pd_tolist(self.trades_track, self.exit_row) marks_ = { 'marker_entry': { self.marker_: '^', self.color_mark: 'g', self.marker_signal: mark_up }, 'marker_exit': { self.marker_: 'v', self.color_mark: 'r', self.marker_signal: mark_down } } self.cht_.chart_marker(self.marker_signal, self.marker_, self.color_mark, **marks_)
def init_series(self): """Function that extract the data from csv to a pandas Dataframe `self.series` It actually is the data that we are using for the strategy """ self.series = md.csv_to_pandas(self.date_name, self.start_date, self.end_date, self.name, self.directory, self.asset, ordinal_name=self.date_ordinal_name, is_fx=self.is_fx, dup_col=self.dup_col) if self.is_detrend: self.series_diff = md.de_trend(self.series, self.date_name, self.date_ordinal_name, self.default_data, period=self.period, p_value=self.p_value)
def __init__(self, series_, self_, alpha=0.01, iteration=True): super().__init__() super().__call__() new_obj = copy.deepcopy(self_) self.__dict__.update(new_obj.__dict__) del self_, new_obj io.init_series(self) self.alpha = alpha self.first_iteration = iteration self.nb_sign = 0 self.sous_series = md.sous_series_(series_, self.nb_data) self.series_mk = series_
def calcul_indicator(self): """Function that return the value of an indicator through desired period and the calculation lenght of the indicator The indicator always take into account the value of the price for the same row. Ex: We are at row 99, the indicator will take into account the data for row 99 then write the value on row 99. Basically, we have to enter or exit the market (or exit) on the next row (value) The function iterate through the indicators in `self.indicator` and through the range of `self.series`,defined in `init_operations.py` and function `init_series()`. Then it calculates the value of the indicator using the subseries `self.sous_series`. Parameters ---------- `self.series` : pandas Dataframe It contains the series used to build the model. `self.indicator` : dictionary It contains the indicator we are using for the strategy. Return ------ The function doesn't return anything in itself, but it calculates and stores the value of the desired indicator in `self.indicator` with new columns in `self.series` (pandas Dataframe) """ super().__call__() rg = lr.RegressionSlopeStrenght(self.series,self) mk_ = mk.MannKendall(self.series,self) self.indicator = {'r_square': rg, 'mk': mk_} self.point_data=0 #self.slope_key=list(self.indicator.keys())[0] self.r_square_key=list(self.indicator.keys())[0] self.mk_key = list(self.indicator.keys())[1] self.point_data = 0 nb_columns=len(self.series.columns) for key,value in self.indicator.items(): self.series[key] = np.nan value.point_data = 0 for row in range(len(self.series.index)-self.nb_data+1): value.sous_series = md.sous_series_(self.series,self.nb_data,point_data=value.point_data) value_ = getattr(value,key)() self.series.loc[self.series.index[row]+self.nb_data-1,key]=value_ value.point_data+=1
def mk(self): """ I'm not the original writer of this function, it comes from github : https://github.com/mps9506/Mann-Kendall-Trend/blob/master/mk_test.py The goal here is to calculate the Mann Kendall value at data point, so to save time, we just substract the first value and add the last value when we go to a new data point. This function is derived from code originally posted by Sat Kumar Tomer ([email protected]) See also: http://vsp.pnnl.gov/help/Vsample/Design_Trend_Mann_Kendall.htm The purpose of the Mann-Kendall (MK) test (Mann 1945, Kendall 1975, Gilbert 1987) is to statistically assess if there is a monotonic upward or downward trend of the variable of interest over time. A monotonic upward (downward) trend means that the variable consistently increases (decreases) through time, but the trend may or may not be linear. The MK test can be used in place of a parametric linear regression analysis, which can be used to test if the slope of the estimated linear regression line is different from zero. The regression analysis requires that the residuals from the fitted regression line be normally distributed; an assumption not required by the MK test, that is, the MK test is a non-parametric (distribution-free) test. Hirsch, Slack and Smith (1982, page 107) indicate that the MK test is best viewed as an exploratory analysis and is most appropriately used to identify stations where changes are significant or of large magnitude and to quantify these findings. By default, it is a two-side test Input: x: a vector of data alpha: significance level (0.01 default) Output: trend: tells the trend (increasing, decreasing or no trend) h: True (if trend is present) or False (if trend is absence) p: p value of the significance test z: normalized test statistics Return value : -1 if there is a negative trend (at the significance level) +1 if there is positive trend (at the significance level) """ sous_series_ = self.sous_series.loc[:, self.default_data] n = len(sous_series_) # calculate positive and negative sign if self.first_iteration: for k in range(n - 1): for j in range(k + 1, n): self.nb_sign += np.sign(sous_series_.values[j] - sous_series_.values[k]) # if we iterate through time, we use previous calculation and add new value and substract old value else: for k in range(n - 1): self.nb_sign += np.sign(sous_series_.values[n - 1] - sous_series_.values[k]) self.sous_series = md.sous_series_(self.series_mk, self.nb_data, point_data=self.point_data - 1) sous_series_ = self.sous_series.loc[:, self.default_data] n = len(sous_series_) for k in range(n - 1): self.nb_sign -= np.sign(sous_series_.values[k + 1] - sous_series_.values[0]) self.first_iteration = False # calculate the unique data unique_x, tp = np.unique(sous_series_.values, return_counts=True) g = len(unique_x) # calculate the var(s) if n == g: # there is no tie var_s = (n * (n - 1) * (2 * n + 5)) / 18 else: # there are some ties in data var_s = (n * (n - 1) * (2 * n + 5) - np.sum(tp * (tp - 1) * (2 * tp + 5))) / 18 if self.nb_sign > 0: z = (self.nb_sign - 1) / np.sqrt(var_s) elif self.nb_sign < 0: z = (self.nb_sign + 1) / np.sqrt(var_s) else: # self.nb_sign == 0: z = 0 # calculate the p_value p = 2 * (1 - norm.cdf(abs(z))) # two tail test h = abs(z) > norm.ppf(1 - self.alpha / 2) if (z < 0) and h: trend = -1 elif (z > 0) and h: trend = 1 else: trend = 0 # return +1 if there a positive trend, -1 if there a negative trend and 0 if none. return trend