def test_original_test(NoTrendData, TrendData, arbitrary_1d_data): # check with no trend data NoTrendRes = mk.original_test(NoTrendData) assert NoTrendRes.trend == 'no trend' assert NoTrendRes.h == False assert NoTrendRes.p == 1.0 assert NoTrendRes.z == 0 assert NoTrendRes.Tau == 0.0 assert NoTrendRes.s == 0.0 assert NoTrendRes.var_s == 0.0 assert NoTrendRes.slope == 0.0 # check with trendy data TrendRes = mk.original_test(TrendData) assert TrendRes.trend == 'increasing' assert TrendRes.h == True assert TrendRes.p == 0.0 assert TrendRes.Tau == 1.0 assert TrendRes.s == 64620.0 np.testing.assert_allclose(TrendRes.slope, 1.0, rtol=1e-02) # check with arbitrary data result = mk.original_test(arbitrary_1d_data) assert result.trend == 'no trend' assert result.h == False assert result.p == 0.37591058740506833 assert result.z == -0.8854562842589916 assert result.Tau == -0.03153167653875869 assert result.s == -1959.0 assert result.var_s == 4889800.333333333 assert result.slope == -0.0064516129032258064
def test_residuals(model, timeperiod, reg): """ Test for a residual trend, applying a Mann-Kendall-test Parameters ---------- model : GLMObject Best model timeperiod : np.array considered years (not used here) Returns ------- float slope in residuals float p-value """ res_trend = mk.original_test(model.resid_response, alpha=0.1) fig, ax = plt.subplots(figsize=(12, 8)) sm.graphics.tsa.plot_acf(model.resid_response, lags=39, ax=ax) ax.set_xlabel('lag') ax.set_title('Autocorrelation {}'.format(reg)) #fig.savefig('/home/insauer/projects/NC_Submission/Climada_papers/Test/AutocorrResidualsGMT_{}.png'.format(reg),bbox_inches = 'tight',dpi =600) alt_trend_test = mk.hamed_rao_modification_test(model.resid_response) return res_trend.slope, res_trend.p, alt_trend_test.trend, alt_trend_test.p
def mk_col_slope(Data, YearCol, LocCol, window, TH, alpha=0.1): length = Data[LocCol].shape[0] start_index = Data[YearCol].values[window - 1] final_index = Data[YearCol].values[length - 1] + 1 Year = pd.Series(range(start_index, final_index)) iterations = length - (window - 1) stats_list2 = pd.DataFrame(index=range(0, iterations), columns=['Year', LocCol]) for instance in range(0, iterations): stats_list2['Year'].values[instance] = Year[instance] for instance in range(0, iterations): snip = Data[LocCol].loc[instance:instance + window - 1] if missing_values(snip, window, TH): snip_test = MK.original_test(snip, alpha) stats_list2[LocCol].values[instance] = snip_test.slope else: stats_list2[LocCol].values[instance] = 'Not enough data' #print(stats_list2) return stats_list2
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--granularity', '-g', type=str, default='minute', help= 'one of [day,hour,minute], the granularity of the x-axis. default=minute' ) parser.add_argument( '--num_points', '-n', type=int, default=60 * 24, help= 'number of points to sample from historic prices (e.g. number of days). default=1440' ) parser.add_argument( '--mode', '-m', type=str, default='close', help= 'the cryptocompare price attribute to use (e.g. open or close). default=close' ) parser.add_argument('--currency', '-c', type=str, default='CAD', help='currency (e.g. CAD, USD). default=CAD') args = parser.parse_args() fig, axs = plt.subplots(2, 1) colors = 'rgbykc' for ax, symbol, color in zip(axs, ['BTC', 'ETH'], colors): fn = getattr(cryptocompare, f'get_historical_price_{args.granularity}') btc = fn(symbol, 'CAD', limit=args.num_points) data = [x[args.mode] for x in btc] cur = data[-1] result = mk.original_test(data) line = lambda t: result.slope * t + result.intercept print(symbol, ':', result, '\n\n') ax.plot( data, label=f'{symbol} = {cur} ({result.trend}, p={round(result.p, 4)})', c=color) ax.plot(list(map(line, range(len(data)))), label='MK fit') ax.legend() ax.set_ylabel(f'{symbol} Price ({args.mode})') ax.set_xlabel('previous ' + args.granularity + 's') fig.suptitle( f'Crypto prices {args.currency} for the last {args.num_points} {args.granularity}s', fontsize=12) fig.tight_layout() plt.show()
def multip_function(x): """This function estimates the trend in the discharge time series of one grid cell. Parameters ---------- x : tuple lat, lon coordinates Returns ------- reg.slope slope of the trend reg.p v-value of the trend """ lat, lon = x # exclude coordinates over sea if (lat == -1000) or (lon == -1000): return np.nan, np.nan print(lon, lat) data = get_dis_gridcell(lat, lon) if np.isnan(data).all(): return np.nan, np.nan else: reg = mk.original_test(data, alpha=0.1) return reg.slope, reg.p
def test_crash(norm_stats, crash_date, norm_name): (V, SD, AC) = norm_stats sys.stdout.write(f"Results of the Mann Kendall Test " f"for the {norm_name} (crash: {crash_date}): \n") MKV = mk.original_test(V) sys.stdout.write( f"Variance: trend = {MKV.trend} | tau = {MKV.Tau:0.4f}\n") MKSD = mk.original_test(SD) sys.stdout.write( f"Spectral Density: trend = {MKSD.trend} | tau = {MKSD.Tau:0.4f}\n" ) MKAC = mk.original_test(AC) sys.stdout.write( f"Autocorrelation: trend = {MKAC.trend} | tau = {MKAC.Tau:0.4f}\n\n" )
def test_residuals(model, timeperiod, reg): """ Test for a residual trend, applying a Mann-Kendall-test Parameters ---------- model : GLMObject Best model timeperiod : np.array considered years (not used here) Returns ------- float slope in residuals float p-value """ res_trend = mk.original_test(model.resid_response, alpha=0.1) fig, ax = plt.subplots(figsize=(12, 8)) alt_trend_test = mk.hamed_rao_modification_test(model.resid_response) return res_trend.slope, res_trend.p, alt_trend_test.trend, alt_trend_test.p
def mann_kendall_price(self): """ The Mann Kendall Trend Test (sometimes called the M-K test) is used to analyze data collected over time for consistently increasing or decreasing trends (monotonic) in Y values. H0 - there is no monotonic trend, H1 - the trend exists, it is either positive or negative :return: trend: trend direction, h: bool if trend exists, p: p-value, z: z-stat, Tau: Kendall Tau, s: Mann-Kendal’s score, var_s: Variance S, slope: Theil-Sen estimator/slope, intercept: Intercept of Kendall-Theil Robust Line """ return mk.original_test(self.center_price())
def doTheAnnMeanFlowTrends(pastStats, currentStats = None): tSeries = [] for key in pastStats: #tSeries.extend(pastStats[key][:12]) tSeries.append(pastStats[key][12]) #print(pastStats[key]) if currentStats is not None: for key in currentStats: tSeries.append(currentStats[key][12]) #print(len(tSeries), sum(tSeries)/len(tSeries)) #print(tSeries) return [pmk.original_test(tSeries), pmk.hamed_rao_modification_test(tSeries)]
def mkTest(series, seasonal): if seasonal == False: data_mk = mk.original_test(series) trend = data_mk[0] else: data_mk_seasonal_test = mk.seasonal_test(series, period= 12) trend = data_mk_seasonal_test[0] if trend == 'decreasing' or trend == 'increasing': self.__trend__ = 'present' trend = 'present' return trend self.__trend__ = trend return trend
def trendTest(output_dir, data): """ Tests each of the call categories during a given time period for a monotonic trend by using the mann-kendall test. Results of this test are saved to a csv file. Inputs: - output_dir: String path to output directory - data: incident data of time period you want to graph """ call_categories = [ 'injuries_external', 'motor', 'health', 'fire', 'mental_illness', 'other' ] trend = [] h = [] p = [] z = [] Tau = [] s = [] var_s = [] slope = [] for category in call_categories: category_data = data[[category]] to, ho, po, zo, Tauo, so, var_so, slopeo = mk.original_test( category_data) trend.append(to) h.append(ho) p.append(po) z.append(zo) Tau.append(Tauo) s.append(so) var_s.append(var_so) slope.append(slopeo) results = pd.DataFrame({ 'Call_Category': call_categories, 'Trend': trend, 'h': h, 'p': p, 'z': z, 'Tau': Tau, 's': s, 'var_s': var_s, 'slope': slope }) results.to_csv(join(output_dir, "trend_test_results.csv"), index=False)
def getValuesMK(arry, p_sig): z_, lines, cols = arry.shape arryMk = np.ndarray( ( 3, lines, cols ) ) for l in range(lines): for c in range(cols): v = arry[:, l, c ] if np.isnan( np.sum(v) ): arryMk[0, l, c] = np.nan arryMk[1, l, c] = np.nan arryMk[2, l, c] = np.nan continue r = mk.original_test( v, p_sig ) arryMk[0, l, c] = r.s if r.p <= p_sig else np.nan arryMk[1, l, c] = r.p arryMk[2, l, c] = r.slope if r.p <= p_sig else np.nan return arryMk
def test_autocorrelation(time_series): """ Test for autocorrelation Parameters ---------- time-series Returns ------- float tau """ auto = mk.original_test(time_series, alpha=0.1) return auto.Tau
def determine_orientation(self, positions): "Given a list of minimizer positions, determine the orientation of the contig" if len(positions) > 1: if all(x < y for x, y in zip(positions, positions[1:])): return "+" if all(x > y for x, y in zip(positions, positions[1:])): return "-" if self.args.mkt: mkt_result = mk.original_test(positions) if mkt_result.h and mkt_result.p <= 0.05: return "+" if mkt_result.trend == "increasing" else "-" else: tally = Counter([x < y for x, y in zip(positions, positions[1:])]) positive_perc = tally[True]/float(len(positions)-1)*100 negative_perc = 100 - positive_perc if positive_perc >= self.args.m: return "+" if negative_perc >= self.args.m: return "-" return "?"
def mk_column(Data, YearCol, LocCol, windows, datalist, threshold, alpha=0.1): for each in windows: length = Data[LocCol].shape[0] start_index = Data[YearCol].values[each - 1] final_index = Data[YearCol].values[length - 1] + 1 Year = pd.Series(range(start_index, final_index)) iterations = length - (each - 1) stats_list2 = pd.DataFrame(index=range(0, iterations), columns=[ LocCol, 'Year', 'trend', 'Ha', 'p', 'Z', 'S', 'VAR(S)', 'slope' ]) stats_list2[LocCol].values[0] = str(each) + " Year window" for instance in range(0, iterations): stats_list2['Year'].values[instance] = Year[instance] for instance in range(0, iterations): snip = Data[LocCol].loc[instance:instance + each - 1] # INSERT TEST FOR 10% if missing_values(snip, each, threshold): snip_test = MK.original_test(snip, alpha) stats_list2['trend'].values[instance] = snip_test.trend stats_list2['Ha'].values[instance] = snip_test.h stats_list2['p'].values[instance] = snip_test.p stats_list2['Z'].values[instance] = snip_test.z stats_list2['S'].values[instance] = snip_test.s stats_list2['VAR(S)'].values[instance] = snip_test.var_s stats_list2['slope'].values[instance] = snip_test.slope else: stats_list2['trend'].values[instance] = 'Not enough data' #print(stats_list2) datalist.append( stats_list2 ) # this is used if you want to enter different data frames in a list ''' need to find way to store it in Excel '''
def test_autocorrelation(time_series): """ Test for a residual trend, applying a Mann-Kendall-test Parameters ---------- model : GLMObject Best model timeperiod : np.array considered years (not used here) Returns ------- float slope in residuals float p-value """ auto = mk.original_test(time_series, alpha=0.1) return auto.Tau
def test_residuals(model, timeperiod, reg): """ Test for a residual trend, applying a Mann-Kendall-test Parameters ---------- model : GLMObject Best model timeperiod : np.array considered years (not used here) Returns ------- float slope in residuals float p-value """ res_trend = mk.original_test(model.resid_response, alpha=0.1) return res_trend.slope, res_trend.p
def calc_mann_kendall(data_file, info_file, out_file): """ 用 kendall tau刻画每个column的发育趋势 """ # load df = pd.read_csv(data_file) info_df = pd.read_csv(info_file) ages = np.array(info_df['age in years']) age_uniq = np.unique(ages) # calculate out_df = pd.DataFrame(index=('tau', 'p'), columns=df.columns) for col in out_df.columns: meas_vec = np.array(df[col]) y = np.zeros_like(age_uniq, dtype=np.float64) for age_idx, age in enumerate(age_uniq): y[age_idx] = np.mean(meas_vec[ages == age]) mk_test = mk.original_test(y, 0.05) out_df.loc['tau', col] = mk_test.Tau out_df.loc['p', col] = mk_test.p # save out_df.to_csv(out_file)
def Trend_1(data_sku, star1): starting = star1 data_sku3 = data_sku[starting:len(data_sku)] data_sku3 = data_sku3.reset_index(drop=True) index = data_sku3.ne(0).idxmax() data_sku4 = data_sku3[index:] #first non zero element data_sku4 = data_sku4.reset_index(drop=True) Zero = np.where((data_sku4 == 0) == True) sparsity = len(Zero) / len(data_sku4) if len(data_sku3) >= 8 and sparsity < (0.2): dd = mk.original_test(data_sku3) if dd[0] == 'increasing': Type_0 = 'Growing' elif dd[0] == 'decreasing': Type_0 = 'Degrowing' else: Type_0 = 'Normal' elif len(data_sku3) >= 12 and sparsity > (0.2): #rolling-->can be applied to series roll_sum = pd.Series(data_sku).rolling(6).apply(np.mean) roll_diff = np.diff(roll_sum) g_1 = len(np.where((roll_diff > 0) == True)[0]) l_1 = len(np.where((roll_diff < 0) == True)[0]) g_p = g_1 / len(roll_diff) l_p = l_1 / len(roll_diff) if g_p >= 0.75: Type_0 = 'Growing' elif l_p >= 0.75: Type_0 = 'Degrowing' else: Type_0 = 'Normal' else: Type_0 = 'Normal' # v=c(Type_0,starting) return (Type_0)
def mk_column(Data, YearCol, LocCol, windows): for each in windows: length = Data[LocCol].shape[0] start_index = Data[YearCol].values[each - 1] final_index = Data[YearCol].values[length - 1] + 1 Year = pd.Series(range(start_index, final_index)) iterations = length - (each - 1) stats_list2 = pd.DataFrame( index=range(0, iterations), columns=['Year', 'trend', 'Ha', 'p', 'Z', 'S', 'VAR(S)', 'slope']) stats_list2.insert(0, 'Name', None) stats_list2['Name'].values[0] = LocCol for instance in range(0, iterations): stats_list2['Year'].values[instance] = Year[instance] for instance in range(0, iterations): snip = Data[LocCol].loc[instance:instance + each - 1] snip_test = MK.original_test(snip, 0.1) stats_list2['trend'].values[instance] = snip_test[0] stats_list2['Ha'].values[instance] = snip_test[1] stats_list2['p'].values[instance] = snip_test[2] stats_list2['Z'].values[instance] = snip_test[3] stats_list2['S'].values[instance] = snip_test[5] stats_list2['VAR(S)'].values[instance] = snip_test[6] stats_list2['slope'].values[instance] = snip_test[7] # stats_list2 = stats_list2.transpose() # stats_list2.insert(stats_list2.shape[1],None,None) # stats_list2 = stats_list2.transpose() stats_list2 = stats_list2.append(pd.Series([np.nan]), ignore_index=True) print(stats_list2) # stats_list2.to_excel("testNewColTranspose.xlsx", index=False) ''' need to find way to store it in Excel '''
def add_element(self, value): ''' Add new element to the statistic ''' #reset parameters if change was detected: if self.in_concept_change: self.reset() #append elements: self.instance_memory.append(value) if len(self.instance_memory) == self.min_instances: self.sample_count = 1 if len(self.instance_memory) > self.min_instances: self.instance_count += 1 #start drift detection: >> min_instances have to be reached, then always perform test once, after that perform test every i_th instance (instances_step) if len(self.instance_memory) >= self.min_instances and ((self.instance_count == self.instances_step) or (self.sample_count == 1)): if self.test_type == 'original_mk': #call corresponding test from package: print('Perform MK test') results_tuple = mk.original_test(self.instance_memory, self.alpha) print('MK test ended') if self.test_type == 'hamed_rao_mod': #call corresponding test from package: results_tuple = mk.hamed_rao_modification_test(self.instance_memory, self.alpha) if self.test_type == 'yue_wang_mod': #call corresponding test from package: results_tuple = mk.yue_wang_modification_test(self.instance_memory, self.alpha) if self.test_type == 'trend_free_pre_whitening_mod': #call corresponding test from package: results_tuple = mk.trend_free_pre_whitening_modification_test(self.instance_memory, self.alpha) if self.test_type == 'pre_whitening_mod': #call corresponding test from package: results_tuple = mk.pre_whitening_modification_test(self.instance_memory, self.alpha) if self.test_type == 'seasonal': #call corresponding test from package: results_tuple = mk.seasonal_test(self.instance_memory, period = self.period, alpha = self.alpha) #reset counter every time a test was performed: self.sample_count = 0 self.instance_count = 0 #assign results: self.p_value = results_tuple[2] self.sens_slope = results_tuple[-1] self.trend = results_tuple[0] if self.p_value < self.alpha and np.abs(self.sens_slope) > self.slope_threshold: self.in_concept_change = True else: self.in_concept_change = False
def rel_time_attr_MK(dataFrame71): """ Theil-Sen-Slope estimation and Mann-Kendall-Test to estimate the contribution of each driver! Parameters ---------- dataFrame71 : time series Time series Returns ------- regH : List MK-output Sen_slope and MK-test result with uncertainty range of hazard (with 1980 fixed exposure)(TS_Haz) 1980-2010 regHE : List MK-output Sen_slope and MK-test result with uncertainty range of TS_HazExp 1980-2010 regF : List MK-output Sen_slope and MK-test result with uncertainty range of TS_Full 1980-2010. regH7 : List MK-output Sen_slope and MK-test result with uncertainty range of hazard (with 1980 fixed exposure)(TS_Haz) 1971-2010 regH107 : List MK-output Sen_slope and MK-test result with uncertainty range of hazard (with 2010 fixed exposure)(TS_Haz) 1971-2010 regH10 : List MK-output Sen_slope and MK-test result with uncertainty range of hazard (with 2010 fixed exposure)(TS_Haz) 1980-2010 regE : List MK-output Sen_slope and MK-test result with uncertainty range of exposure difference function (TS_HazExp - TS_Haz) 1980-2010 (not used) regE7 : List MK-output Sen_slope and MK-test result with uncertainty range of exposure difference function (TS_HazExp - TS_Haz) 1971-2010 (not used) regV : List MK-output Sen_slope and MK-test result with uncertainty range of vulnerability difference function (TS_full - TS_Haz_Exp)(not used) regI : List MK-output Sen_slope and MK-test result with uncertainty range of modeled damges (including vulnerability) regN : List MK-output Sen_slope and MK-test result with uncertainty range of observed damages """ dataFrame = dataFrame71[dataFrame71['Year'] > 1979] regLHazExp = mk.original_test(dataFrame['Norm_Impact_2y_trend'], alpha=0.1) slopeLHazExp = stats.theilslopes(dataFrame['Norm_Impact_2y_trend'], alpha=0.1) regHE = [regLHazExp.slope, regLHazExp.p, slopeLHazExp[2], slopeLHazExp[3]] regLFull = mk.original_test(dataFrame['Norm_Impact_Pred'], alpha=0.1) slopeLFull = stats.theilslopes(dataFrame['Norm_Impact_Pred'], alpha=0.1) regF = [regLFull.slope, regLFull.p, slopeLFull[2], slopeLFull[3]] regHaz = mk.original_test(dataFrame['Norm_ImpFix_2y_trend'], alpha=0.1) slopeHaz = stats.theilslopes(dataFrame['Norm_ImpFix_2y_trend'], alpha=0.1) regH = [regHaz.slope, regHaz.p, slopeHaz[2], slopeHaz[3]] regHaz7 = mk.original_test(dataFrame71['Norm_ImpFix_2y_trend'], alpha=0.1) slopeHaz7 = stats.theilslopes(dataFrame71['Norm_ImpFix_2y_trend'], alpha=0.1) regH7 = [regHaz7.slope, regHaz7.p, slopeHaz7[2], slopeHaz7[3]] regHaz107 = mk.original_test(dataFrame71['Norm_Imp2010_2y_trend'], alpha=0.1) slopeHaz107 = stats.theilslopes(dataFrame71['Norm_Imp2010_2y_trend'], alpha=0.1) regH107 = [regHaz107.slope, regHaz107.p, slopeHaz107[2], slopeHaz107[3]] regHaz10 = mk.original_test(dataFrame['Norm_Imp2010_2y_trend'], alpha=0.1) slopeHaz10 = stats.theilslopes(dataFrame['Norm_Imp2010_2y_trend'], alpha=0.1) regH10 = [regHaz10.slope, regHaz10.p, slopeHaz10[2], slopeHaz10[3]] regNat = mk.original_test(dataFrame['natcat_flood_damages_2005_CPI'], alpha=0.1) slopeNat = stats.theilslopes(dataFrame['natcat_flood_damages_2005_CPI'], alpha=0.1) regN = [regNat.slope, regNat.p, slopeNat[2], slopeNat[3]] return regH, regHE, regH7, regH107, regH10, regF, regN
SLS = Data_2_use.sel(lon=0.5, lat=7.5, method='nearest') SLS.plot() #Areal Selection & Averaging ASA = Data_2_use.sel(lon=np.arange(-1.5, 1.5, 0.5), lat=np.arange(5, 15, 0.5), method='nearest') ASA = ASA.mean(dim=('lon', 'lat')) ASA.plot() #Annual Averaging Ann_avg = ASA.groupby('time.year').mean('time') Ann_avg.plot() ### Statistics of Data stat_result = pmk.original_test(Ann_avg) print(stat_result) #Seasonal Climatology Seas_avg = ASA.groupby('time.season').mean('time') print(Seas_avg) #THANK YOU #Reading Data from NetCDF File Data = xr.open_dataset(in_file) Data_2_use = Data['tmp'] #Selecting location 0.5W, 6.5N loc_data = Data_2_use.sel(lon=-0.5, lat=6.5, method='nearest') loc_data.plot()
break x = struct.unpack("f", x)[0] if (not np.isinf(x) and not np.isnan(x)): ajat.append(x) ind.append(i) else: dnfind.append(i) i += 1 f.close() ajat = np.array(ajat) dnfind = np.array(dnfind) ind = np.array(ind) sr = pd.Series(ajat, ind) ts = mk.original_test(sr) pns = stat.linregress(ind, ajat) plt.plot(ind, ajat, 'o', color='b') plt.plot(ind, ind * pns.slope + pns.intercept, label="pns") plt.plot(ind, ind * ts.slope + ts.intercept, label="ts") plt.xlim(right=np.max(ind)) plt.ylim(top=np.max(ajat)) #dnfien plottaus ala, ula = plt.ylim() dnfaika = np.zeros(np.shape(dnfind)) + ula plt.plot(dnfind, dnfaika, 'o', color='r') plt.legend() #plt.tight_layout();
def trend_test(df): result = mk.original_test(df) print(result)
import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.tsa.seasonal import seasonal_decompose from statsmodels.tsa.api import Holt from statsmodels.tsa.stattools import adfuller import pymannkendall as mk import os #We imported the required libraries filenamemadrid = os.getcwd() + "\\weather_madrid_LEMD_1997_2015.csv" df_madrid = pd.read_csv(filenamemadrid, usecols=["CET", "Mean TemperatureC"], sep=",") #We make python read the csv files df_madrid = df_madrid.dropna() df_madrid = df_madrid.rename(columns={"CET": "date"}) filenamebrazil = os.getcwd() + "\\sudeste.csv" df_brazil = pd.read_csv(filenamebrazil, usecols=["date", "temp"], sep=",") df_brazil = df_brazil.dropna() df_brazil = df_brazil.groupby(["date"])["temp"].mean() df_brazil = df_brazil.to_frame() df_brazil = df_brazil.reset_index(drop=False) df_brazil = df_brazil.set_index('date') df_madrid = df_madrid.set_index('date') plt.show() trend_brazil = mk.original_test(df_brazil) print(trend_brazil) trend_madrid = mk.original_test(df_madrid) print(trend_madrid)
def do_backtest(df, symbol, end=None): trade_count = 0 trade_history = [] balance = initial_balance win_count = 0 loss_count = 0 profit = 0 action = HOLD current_tick = 0 entry_tick = 0 buy_mode = True entry_price = 0 buy_index = 0 window_size = 1000 last_size = 50 if backtest_mode == 2: df = df.iloc[end - window_size * 1 - 100:end + window_size * 2] elif backtest_mode == 3: df_x = df df = df.iloc[195267:199267] # fragment = detect_anomaly(df) #detect_anomaly(df.iloc[11706:11074]) #plot_whole(df_x) df = df.reset_index() df = df.fillna(0) for i, row in df.iterrows(): start_time = time.time() current_price = row['last_price'] current_ask_price = row['best_ask_price'] current_bid_price = row['best_bid_price'] current_tick += 1 if i > window_size: last = df.iloc[i, :] prev1 = df.iloc[i - 2, :] prev25 = df.iloc[i - 25, :] prev50 = df.iloc[i - 50, :] prev100 = df.iloc[i - 100, :] prev200 = df.iloc[i - 200, :] prev500 = df.iloc[i - 500, :] diffx1 = last.qav_sma500 - last.qav_sma1000 diffx2 = prev50.qav_sma500 - prev50.qav_sma1000 diffx3 = prev100.qav_sma500 - prev100.qav_sma1000 diffx4 = prev200.qav_sma500 - prev200.qav_sma1000 first_check = ( last['last_sma600'] > prev100['last_sma600'] and last['last_sma600'] > prev500['last_sma600'] and last.qav_sma500 > last.qav_sma1000 and prev50.qav_sma500 > prev50.qav_sma1000 and prev100.qav_sma500 > prev100.qav_sma1000 and prev200.qav_sma500 > prev200.qav_sma1000 and last.qav_sma500 > prev50.qav_sma500 > prev100.qav_sma500 > prev200.qav_sma500 and diffx1 > diffx2 > diffx3 > diffx4 and diffx1 > 0.3 and ###buda yanıltıcı!!!!! diffx1 < 1 ###yanıltıcı!!!!! ) # if last['index'] == 114395: # pdb.set_trace() if (first_check == True and conditions[0]['buy_mode'] == True): fragment = df.iloc[i - window_size:i, :] fragment = detect_anomaly(fragment) fragment = fragment.reset_index() last = fragment.iloc[-1, :] prev1 = fragment.iloc[-2, :] first_n = fragment[:window_size - last_size] last_n = fragment[-last_size:] mk_test = mk.original_test(fragment.change_qav.to_numpy()) fragment_sum = fragment.groupby( ['score_qav', 'label_qav'], as_index=False, sort=False)[["change_qav", "change_price"]].sum() conditions[0]['buy_cond'] = ( (fragment_sum[fragment_sum['label_qav'] == 1].change_qav < 3).all() and mk_test.z > 1 and mk_test.z < 10 and mk_test.Tau < 0.1 and fragment_sum[ fragment_sum['label_qav'] == 1].change_qav.sum() > 4 and fragment_sum[fragment_sum['label_qav'] == 1].change_qav.sum() < 10 and fragment_sum.label_qav.iloc[0] == 0 and fragment_sum.label_qav.iloc[-1] == 1 and fragment_sum.label_qav.iloc[-2] == 1 and (fragment_sum[fragment_sum['label_qav'] == 0].change_qav < fragment_sum[fragment_sum['label_qav'] == 1].change_qav.max()).all() and fragment_sum.iloc[-1].change_price + fragment_sum.iloc[-2].change_price > 0 and fragment_sum.change_price.sum() > 0 and (last_n.label_qav == 1).count() < 50) elif (conditions[0]['buy_mode'] == False): conditions[0]['sell_cond'] = (last['last_sma600'] < prev1['last_sma600']) else: continue for ic, cond in enumerate(conditions): if cond['buy_mode'] and cond['buy_cond']: conditions[ic]['action'] = BUY conditions[ic]['entry_price'] = current_ask_price conditions[ic]['buy_mode'] = False if ic == 0: printLog("CONDITION " + str(ic + 1) + " IS BUYING....") printLog("##### TRADE " + str(cond['trade_count']) + " #####") printLog("BUY: " + symbol + " for " + str(cond['entry_price']) + " at " + str(last.date) + " - index: " + str(last['index'])) printLog(fragment[[ 'index', 'date', 'symbol', 'last_price', 'total_traded_quote_asset_volume', 'label_qav', 'score_qav', 'change_qav', 'change_price' ]].tail(100)) printLog( mk.original_test(fragment.change_qav.to_numpy())) printLog(fragment_sum) printLog("diffx1: " + str(diffx1)) printLog("last.qav_sma500: " + str(last.qav_sma500)) printLog("last.qav_sma1000: " + str(last.qav_sma1000)) printLog("prev100.qav_sma500: " + str(prev100.qav_sma500)) printLog("prev100.qav_sma1000: " + str(prev100.qav_sma1000)) #plot_whole(df) #pdb.set_trace() elif not cond['buy_mode'] and cond['sell_cond']: printLog("CONDITION " + str(ic + 1) + " IS SELLING....") conditions[ic]['action'] = SELL exit_price = current_bid_price profit = ( (exit_price - cond['entry_price']) / cond['entry_price'] + 1) * (1 - transaction_fee)**2 - 1 conditions[ic]['balance'] = conditions[ic]['balance'] * ( 1.0 + profit) conditions[ic]['trade_count'] += 1 conditions[ic]['buy_mode'] = True printLog("SELL: " + symbol + " for " + str(exit_price) + " at " + str(last.date) + " - index: " + str(last['index'])) printLog("PROFIT: " + str(profit * 100)) printLog("BALANCE: " + str(cond['balance'])) else: conditions[ic]['action'] = HOLD if (current_tick > len(df) - 1): printLog("*********TOTAL RESULTS*************************") for ic, cond in enumerate(conditions): printLog("SYMBOL: " + symbol) printLog("CONDITION NUMBER: " + str(ic)) printLog("TOTAL BALANCE: " + str(cond['balance'])) printLog("TRADE COUNT: " + str(cond['trade_count'])) printLog("**********************************") if i % 1000 == 0: printLog(symbol + "-" + str(row['index']))
import numpy as np import pandas as pd import matplotlib.pyplot as plt import pymannkendall as mk Birth_data = pd.read_csv("daily-total-female-births.csv", parse_dates=['Date'], index_col='Date') #Birth_data data = Birth_data fig, ax = plt.subplots(figsize=(12, 8)) res = mk.original_test(data) trend_line = np.arange(len(data)) * res.slope + res.intercept ax.plot(data) ax.plot(data.index, trend_line) ax.legend(['data', 'trend line']) ax.set(xlabel="Dates", ylabel="Births", title="Trend line") fig.savefig('Trendline_plot1.png')
# IMPORTING THE FIRST DATA SET Birth_data = pd.read_csv("daily-total-female-births.csv", parse_dates=['Date'], index_col='Date') # SUMMARY STATISTICS head = Birth_data.head() Summary = Birth_data.describe() print(head) print(Summary) # MANNKENDALL TREND TEST Birth_data = pd.read_csv("daily-total-female-births.csv", parse_dates=['Date'], index_col='Date') #Birth_data MKT = mk.original_test(Birth_data, alpha=0.05) print(MKT) # IMPORTING SECOND DATA SET Shampoo_data = pd.read_csv("shampoo.csv", parse_dates=['Month'], index_col='Month') # SUMMARY STATISTICS head_shampoo = Shampoo_data.head() Summary_shampoo = Shampoo_data.describe() print(head_shampoo) print(Summary_shampoo) # TREND TEST 1 MKT1 = mk.hamed_rao_modification_test(Shampoo_data)
for month in range(1, 13, 1): name_month = datetime.date(1900, int(month), 1).strftime('%B') TREND = [] for index, row in list_nom.iterrows(): data = pd.read_csv(path_m + row[0] + '_MONTH_' + varin + '_' + indice + '_' + str(yearmin) + '_' + str(yearmax) + '_' + str('{:02d}'.format(month)) + '.csv', skiprows=2) data = data.rename(columns={ data.columns[1]: "var" }).set_index('datetime') if (valeur['name'] == 'Original Mann-Kendall test'): trend, h, p, z, Tau, s, var_s, slope, intercept = mk.original_test( data) elif (valeur['name'] == 'Hamed and Rao Modified MK Test'): trend, h, p, z, Tau, s, var_s, slope, intercept = mk.hamed_rao_modification_test( data) elif (valeur['name'] == 'Yue and Wang Modified MK Test'): trend, h, p, z, Tau, s, var_s, slope, intercept = mk.yue_wang_modification_test( data) elif (valeur['name'] == 'Modified MK test using Pre-Whitening method'): trend, h, p, z, Tau, s, var_s, slope, intercept = mk.pre_whitening_modification_test( data) elif (valeur['name'] == 'Modified MK test using Trend free Pre-Whitening method'): trend, h, p, z, Tau, s, var_s, slope, intercept = mk.trend_free_pre_whitening_modification_test( data)