def plot_data_trials(Nclasses, X_train, y_train, n_trials_to_show=2, colors=["r", "k"]): # Nclasses = len(X_data_labels) # print Nclasses ## Plotting evolution in time of several trials for both classes gl.scatter_3D(0, 0, 0, nf=1, na=0) for i in range(Nclasses): X_train_class_i = [ X_train[j] for j in np.where(np.array(y_train) == i)[0] ] for ntr in range(n_trials_to_show): X_train_class_i_n = [X_train_class_i[ntr]] X_train_class_i_n = np.concatenate(X_train_class_i_n, axis=0) gl.scatter_3D(X_train_class_i_n[:, 0], X_train_class_i_n[:, 1], X_train_class_i_n[:, 2], color=colors[i], nf=0, na=0, labels=[ "Time Evolution of trials different classes", "D1", "D2", "D3" ])
def plot_trials_for_same_instance(X_data_trials, X_data_labels, X_train, y_train, colors=["r", "k"], time_show=100, normalize=True): # For a fixed time instant, the plotting of points from several trials of both classes gl.scatter_3D(0, 0, 0, nf=1, na=0) for i in range(len(X_data_trials)): class_i = X_data_labels[i] if (normalize == True): caca = gf.normalize_module(X_data_trials[i][[time_show], :]) else: caca = X_data_trials[i][[time_show], :] caca = caca.flatten() gl.scatter_3D( caca[0], caca[1], caca[2], nf=0, na=0, color=colors[class_i], labels=[ "Trials for the same time instant for different classes", "D1", "D2", "D3" ])
def plot_means(Nclasses, X_train, y_train, colors = ["r","k"], normalize = True): print Nclasses ## Get the time average profile of every label. # For every label, we average across time to get the time profile. # We kind of should assume that the trials are somewhat time-aligned # X_data_ave = dp.get_timeSeries_average_by_label(X_All_labels, channel_sel = channel_sel) X_data_ave = dp.get_average_from_train(Nclasses, X_train, y_train, normalize = normalize) # Evolution of the means of each class in time in time representation gl.plot([0],[0]) for i in range(1): max_val = 0 if (i >= 1): max_val += np.max(np.abs(X_data_ave[i-1])) + np.max(np.abs(X_data_ave[i])) gl.plot([], X_data_ave[i] + max_val, color = colors[i], nf = 0, labels = ["Mean value of the 70 Channels","Time Index","Channels"]) # Evolution of the means of each class in time in Spherical representation gl.scatter_3D(0, 0,0, nf = 1, na = 0) for i in range(Nclasses): gl.scatter_3D(X_data_ave[i][:,0], X_data_ave[i][:,1],X_data_ave[i][:,2], color = colors[i], nf = 0, na = 0, labels = ["Mean Time Evolution the different classes", "D1","D2","D3"])
def plot_trials_for_same_instance(X_data_trials, X_data_labels, X_train, y_train, colors = ["r","k"], time_show = 100, normalize = True): # For a fixed time instant, the plotting of points from several trials of both classes gl.scatter_3D(0, 0,0, nf = 1, na = 0) for i in range(len(X_data_trials)): class_i = X_data_labels[i] if (normalize == True): caca = gf.normalize_module(X_data_trials[i][[time_show],:]) else: caca = X_data_trials[i][[time_show],:] caca = caca.flatten() gl.scatter_3D(caca[0],caca[1],caca[2], nf = 0, na = 0, color = colors[class_i],labels = ["Trials for the same time instant for different classes", "D1","D2","D3"])
def plot_data_trials(Nclasses, X_train, y_train, n_trials_to_show = 2 , colors = ["r","k"]): # Nclasses = len(X_data_labels) # print Nclasses ## Plotting evolution in time of several trials for both classes gl.scatter_3D(0, 0,0, nf = 1, na = 0) for i in range(Nclasses): X_train_class_i = [ X_train[j] for j in np.where(np.array(y_train) == i)[0]] for ntr in range (n_trials_to_show): X_train_class_i_n = [X_train_class_i[ntr]] X_train_class_i_n = np.concatenate(X_train_class_i_n, axis = 0) gl.scatter_3D(X_train_class_i_n[:,0], X_train_class_i_n[:,1],X_train_class_i_n[:,2], color = colors[i], nf = 0, na = 0, labels = ["Time Evolution of trials different classes", "D1","D2","D3"])
def scatter_deltaDailyMagic(self): ## PLOTS DAILY HEIKE ASHI ddelta = self.get_timeSeriesbyName("RangeCO") hldelta = self.get_timeSeriesbyName("RangeHL") mdelta = self.get_magicDelta() labels = ["Delta Magic Scatter", "Magic", "Delta"] gl.scatter(mdelta, ddelta, labels=labels, legend=[self.symbolID], nf=1) # gl.set_subplots(1,1) gl.scatter_3D(mdelta, ddelta, hldelta, labels=labels, legend=[self.symbolID], nf=1)
def scatter_deltaDailyMagic(self): ## PLOTS DAILY HEIKE ASHI ddelta = self.get_timeSeriesbyName("RangeCO") hldelta = self.get_timeSeriesbyName("RangeHL") mdelta = self.get_magicDelta() labels = ["Delta Magic Scatter","Magic","Delta"] gl.scatter(mdelta,ddelta, labels = labels, legend = [self.symbolID], nf = 1) # gl.set_subplots(1,1) gl.scatter_3D(mdelta,ddelta, hldelta, labels = labels, legend = [self.symbolID], nf = 1)
def plot_means(Nclasses, X_train, y_train, colors=["r", "k"], normalize=True): print Nclasses ## Get the time average profile of every label. # For every label, we average across time to get the time profile. # We kind of should assume that the trials are somewhat time-aligned # X_data_ave = dp.get_timeSeries_average_by_label(X_All_labels, channel_sel = channel_sel) X_data_ave = dp.get_average_from_train(Nclasses, X_train, y_train, normalize=normalize) # Evolution of the means of each class in time in time representation gl.plot([0], [0]) for i in range(1): max_val = 0 if (i >= 1): max_val += np.max(np.abs(X_data_ave[i - 1])) + np.max( np.abs(X_data_ave[i])) gl.plot( [], X_data_ave[i] + max_val, color=colors[i], nf=0, labels=["Mean value of the 70 Channels", "Time Index", "Channels"]) # Evolution of the means of each class in time in Spherical representation gl.scatter_3D(0, 0, 0, nf=1, na=0) for i in range(Nclasses): gl.scatter_3D(X_data_ave[i][:, 0], X_data_ave[i][:, 1], X_data_ave[i][:, 2], color=colors[i], nf=0, na=0, labels=[ "Mean Time Evolution the different classes", "D1", "D2", "D3" ])
def get_2EM_vectors(X_All_labels, label_classes, max_trials=50, channel_sel=None, plot_flag=0): Nclasses = len(X_All_labels) # Instead of obtaining the average profile at each sime instant, we run # a 2-EM clustering to each label, at each time instance. # For each time instance,instead of getting the previous mean, we get 2 vectors. # The mean vector of the class with high kappa and the one with low kappa # We work on the assumtion that we have 2 clusters, X_data_ave_EM_plus, X_data_ave_EM_minus = dp.get_labels_ave_EM( X_All_labels, label_classes, max_trials=max_trials, channel_sel=channel_sel) # Get the 2-EM for all the trials !! # For every trial we just run a 2 EM and get the direction of the X_trials_EM_plus, X_trials_EM_minus = dp.get_X_trials_EM( X_All_labels, label_classes, max_trials=max_trials, channel_sel=channel_sel) plot_flag = 0 if (plot_flag): ## For the 2-EM across time gl.scatter_3D(0, 0, 0, nf=1, na=0) for i in range(Nclasses): gl.scatter_3D(X_data_ave_EM_plus[i][:, 0], X_data_ave_EM_plus[i][:, 1], X_data_ave_EM_plus[i][:, 2], nf=0, na=0) gl.scatter_3D(0, 0, 0, nf=1, na=0) for i in range(Nclasses): gl.scatter_3D(X_data_ave_EM_minus[i][:, 0], X_data_ave_EM_minus[i][:, 1], X_data_ave_EM_minus[i][:, 2], nf=0, na=0) ## For the 2-EM across trials gl.scatter_3D(0, 0, 0, nf=1, na=0) for i in range(Nclasses): gl.scatter_3D(X_trials_EM_plus[i][:, 0], X_trials_EM_plus[i][:, 1], X_trials_EM_plus[i][:, 2], nf=0, na=0) gl.scatter_3D(0, 0, 0, nf=1, na=0) for i in range(Nclasses): gl.scatter_3D(X_trials_EM_minus[i][:, 0], X_trials_EM_minus[i][:, 1], X_trials_EM_minus[i][:, 2], nf=0, na=0)
import Watson_estimators as Wae import general_func as gf plt.close("all") mu_caca = np.ones((1000, 1)) ################################################################ ######## Load and combine 3 sets ############################## ############################################################### EM_data = 1 if (EM_data): K = 3 #gl.scatter_3D([0,1,1,1,1,-1,-1,-1,-1], [0,1,1,-1,-1,1,1,-1,-1],[0,1,-1,1,-1,1,-1,1,-1], nf = 1, na = 0) gl.scatter_3D(0, 0, 0, nf=1, na=0) kflag = 0 for k in range(1, K + 1): # folder = "./EM_data/" folder = "./test_data/" filedir = folder + "Wdata_" + str(k) + ".csv" Xdata_k = np.array(pd.read_csv(filedir, sep=",", header=None)) Xdata_k = Xdata_k[:1000, :] print Xdata_k.shape # Xdata_param = pkl.load_pickle( folder + "Wdata_"+ str(k)+".pkl",1) # mu = Xdata_param[0] # kappa = Xdata_param[1] # print "Real: ", mu,kappa # Generate and plot the data
def get_2EM_vectors(X_All_labels, label_classes, max_trials = 50, channel_sel= None, plot_flag = 0): Nclasses = len(X_All_labels) # Instead of obtaining the average profile at each sime instant, we run # a 2-EM clustering to each label, at each time instance. # For each time instance,instead of getting the previous mean, we get 2 vectors. # The mean vector of the class with high kappa and the one with low kappa # We work on the assumtion that we have 2 clusters, X_data_ave_EM_plus, X_data_ave_EM_minus = dp.get_labels_ave_EM( X_All_labels, label_classes, max_trials = max_trials, channel_sel= channel_sel) # Get the 2-EM for all the trials !! # For every trial we just run a 2 EM and get the direction of the X_trials_EM_plus, X_trials_EM_minus = dp.get_X_trials_EM( X_All_labels, label_classes, max_trials = max_trials, channel_sel= channel_sel) plot_flag = 0 if (plot_flag): ## For the 2-EM across time gl.scatter_3D(0, 0,0, nf = 1, na = 0) for i in range(Nclasses): gl.scatter_3D(X_data_ave_EM_plus[i][:,0], X_data_ave_EM_plus[i][:,1],X_data_ave_EM_plus[i][:,2], nf = 0, na = 0) gl.scatter_3D(0, 0,0, nf = 1, na = 0) for i in range(Nclasses): gl.scatter_3D(X_data_ave_EM_minus[i][:,0], X_data_ave_EM_minus[i][:,1],X_data_ave_EM_minus[i][:,2], nf = 0, na = 0) ## For the 2-EM across trials gl.scatter_3D(0, 0,0, nf = 1, na = 0) for i in range(Nclasses): gl.scatter_3D(X_trials_EM_plus[i][:,0], X_trials_EM_plus[i][:,1],X_trials_EM_plus[i][:,2], nf = 0, na = 0) gl.scatter_3D(0, 0,0, nf = 1, na = 0) for i in range(Nclasses): gl.scatter_3D(X_trials_EM_minus[i][:,0], X_trials_EM_minus[i][:,1],X_trials_EM_minus[i][:,2], nf = 0, na = 0)
# intervals etc. # Fit the model model = ols("Y ~ MACD + RSI + ATR + MACD_vel + ATR_vel + RSI_vel", data).fit() params = model._results.params # Print the summary print(model.summary()) print("OLS model Parameters") print(params) Xdata = np.array([MACD[:, indx], RSI[:, indx], MACD_vel[:, indx]]).T ## Graoca bonita !! TODO print(ul.fnp(np.sqrt(np.sum(Xdata * Xdata, axis=1))).shape) Xdata = Xdata / ul.fnp(np.sqrt(np.sum(Xdata * Xdata, axis=1))) gl.scatter_3D(Xdata[:, 0], Xdata[:, 1], Xdata[:, 2]) gl.scatter_3D(-Xdata[:, 0], -Xdata[:, 1], -Xdata[:, 2], nf=0) gl.scatter_3D(-Xdata[:, 0], Xdata[:, 1], -Xdata[:, 2], nf=0) gl.scatter_3D(Xdata[:, 0], -Xdata[:, 1], Xdata[:, 2], nf=0) # Peform analysis of variance on fitted linear model #anova_results = anova_lm(model) #print('\nANOVA results') #print(anova_results) ############################################################################## model_sklearn = 1 if (model_sklearn): ## Eliminate the Nans !! Even if they appear in just one dim mask_X = np.sum(np.isnan(X_data), axis=1) == 0
gl.plot(lags, residuals) ############## Plotting of 3D regression ############## plotting_3D = 1 if (plotting_3D): # We train 3 models # Y_data = np.sign(Y_data) mask_X = np.sum(np.isnan(X_data), axis = 1) == 0 mask_Y = np.isnan(Y_data) == 0 mask = mask_X & mask_Y[:,0] # Mask without NaNs. This is done automatically in the OLS # Create linear regression object regr = linear_model.LinearRegression() # Train the model using the training sets X_datafiltered = X_data[:,[0,1]] regr.fit(X_datafiltered[mask,:], Y_data[mask,:]) # coeffs = np.array([regr.intercept_, regr.coef_])[0] coeffs = np.append(regr.intercept_, regr.coef_) params = np.array(coeffs) gl.scatter_3D(X_data[:,0],X_data[:,1], Y_data, labels = ["","",""], legend = ["Pene"], nf = 1) grids = ul.get_grids(X_data) z = bMA.get_plane_Z(grids[0], grids[1],params) h = gl.plot_3D(grids[0],grids[1],z, nf = 0)
# Own libraries import import_folders from graph_lib import gl import sampler_lib as sl import EM_lib as EMl import pickle_lib as pkl import Watson_distribution as Wad import Watson_sampling as Was import Watson_estimators as Wae import general_func as gf import copy K = 3 gl.scatter_3D(0, 0, 0, nf=1, na=0) N = 10000 Xdata = [] # List will all the generated data K = 3 #gl.scatter_3D([0,1,1,1,1,-1,-1,-1,-1], [0,1,1,-1,-1,1,1,-1,-1],[0,1,-1,1,-1,1,-1,1,-1], nf = 1, na = 0) gl.scatter_3D(0, 0, 0, nf=1, na=0) kflag = 0 for k in range(1, K + 1): folder = "./EM_data/" folder = "./test_data/" filedir = folder + "Wdata_" + str(k) + ".csv" Xdata_k = np.array(pd.read_csv(filedir, sep=",", header=None)) Xdata_k = Xdata_k[:, :] print Xdata_k.shape
Module = Module.reshape(Nsamples, 1) # Check that the modulus is not 0 Xdata = Xdata[np.where(Module > tol)[0], :] Xdata = np.divide(Xdata, Module) gl.scatter(Xdata[:, 0], Xdata[:, 1], nf=0) ### POLLAS Nsamples = 1000 Ndim = 3 tol = 0.000000001 Xdata = np.random.randn(Nsamples, Ndim) + 2 Module = np.sqrt(np.sum(np.power(Xdata, 2), 1)) Module = Module.reshape(Nsamples, 1) # Check that the modulus is not 0 Xdata = Xdata[np.where(Module > tol)[0], :] Xdata = np.divide(Xdata, Module) gl.scatter_3D(Xdata[:, 0], Xdata[:, 1], Xdata[:, 2]) Nsamples = 100 Xdata = np.random.randn(Nsamples, Ndim) - 0 Module = np.sqrt(np.sum(np.power(Xdata, 2), 1)) Module = Module.reshape(Nsamples, 1) # Check that the modulus is not 0 Xdata = Xdata[np.where(Module > tol)[0], :] Xdata = np.divide(Xdata, Module) gl.scatter_3D(Xdata[:, 0], Xdata[:, 1], Xdata[:, 2], nf=0)
folder_EM = "../data/EM_data/" folder_HMM = "../data/HMM_data/" generate_data = 1 if (generate_data): ### Generate the data, save it to file, load it and estimate parameters kappa = 20 N = 1000 mu = np.array([2, 4, 5]) mu = mu / np.sqrt(np.sum(mu * mu)) RandWatson = Was.randWatson(N, mu, kappa) gl.scatter_3D(RandWatson[:, 0], RandWatson[:, 1], RandWatson[:, 2], nf=1, na=0) print "Generation Parameters" print mu print kappa print "Estimation Parameters" mu = Wae.get_MLmean(RandWatson) kappa = Wae.get_MLkappa(mu, RandWatson) print mu print kappa ## Save the file to disk !! filedir = folder_EM + "Wdata.csv"
folder = "./HMM_data/" Chains_list = pkl.load_pickle(folder + "HMM_labels.pkl", 1) HMM_list = pkl.load_pickle(folder + "HMM_datapoints.pkl", 1) params = pkl.load_pickle(folder + "HMM_param.pkl", 1) pi = params[0] A = params[1] pi_end = HMMlf.get_final_probabilities(pi, A, 20) #print pi_end print "Real pi" print pi print "Real A" print A gl.scatter_3D(0, 0, 0, nf=1, na=0) for XdataChain in HMM_list: gl.scatter_3D(XdataChain[:, 0], XdataChain[:, 1], XdataChain[:, 2], nf=0, na=0) ################################################################ ######## Initialization of the parameters ############################## ############################################################### I = 3 D = HMM_list[0].shape[1] init_with_EM = 0
# Own libraries import import_folders from graph_lib import gl import sampler_lib as sl import EM_lib as EMl import pickle_lib as pkl import Watson_distribution as Wad import Watson_sampling as Was import Watson_estimators as Wae import general_func as gf import copy K = 3 gl.scatter_3D(0, 0,0, nf = 1, na = 0) N = 10000 Xdata = [] # List will all the generated data K = 3 #gl.scatter_3D([0,1,1,1,1,-1,-1,-1,-1], [0,1,1,-1,-1,1,1,-1,-1],[0,1,-1,1,-1,1,-1,1,-1], nf = 1, na = 0) gl.scatter_3D(0, 0,0, nf = 1, na = 0) kflag = 0 for k in range(1,K+1): folder = "./EM_data/" folder = "./test_data/" filedir = folder + "Wdata_"+ str(k)+".csv" Xdata_k = np.array(pd.read_csv(filedir, sep = ",", header = None)) Xdata_k = Xdata_k[:,:] print Xdata_k.shape
X_All_labels = [X_All_labels[0], X_All_labels[2]] label_classes = [label_classes[0], label_classes[2]] # Now X_All_labels has in every postion the trials for each class in the form # of a matrix Ntrials x Ntimes x Ndim ################################################################ ######## Preprocessing ! ############################## ############################################################### # For the first label plotting_all_trials_one_instant = 0 if (plotting_all_trials_one_instant): gl.scatter_3D(0, 0, 0, nf=1, na=0) Ntrial, Nsam, Ndim = X_All_labels[0].shape i0 = 50 # Good one !! t_show = 30 for i in range(Ntrial): X_trial = X_All_labels[0][i, :, i0:i0 + 3] # Nsam x Ndim # X_trial = X_trial - np.sum(X_trial, axis = 1).reshape(X_trial.shape[0],1) # scaler = preprocessing.StandardScaler().fit(X_trial) # X_trial = scaler.transform(X_trial) X_trial = gf.normalize_data(X_trial) gl.scatter_3D(X_trial[t_show, 0], X_trial[t_show, 1], X_trial[t_show, 2],
for i in range(Nsa): probs.append([]) for j in range(Nsa): XdataSample = [np.sin(Xthetta[i])*np.cos(Xfi[j]), np.sin(Xthetta[i])*np.sin(Xfi[j]), np.cos(Xthetta[i])] probs[i].append(Wad.Watson_pdf(XdataSample,mu,kappa )) probs = np.array(probs).T ## Plotting gl.set_subplots(1,3) ## Plot it in terms of (angle, prob) gl.plot_3D(Xthetta,Xfi, np.array(probs)) gl.plot_3D(Xthetta,Xfi, np.array(probs), project = "spher") mu = np.random.randn(5,1); mu = gf.normalize_module(mu.T).flatten() ## Generate samples RandWatson = Was.randWatson(Nsampling, mu, kappa) gl.scatter_3D(RandWatson[:,0],RandWatson[:,1], RandWatson[:,2]) mu_est = Wae.get_MLmean(RandWatson) kappa_est = Wae.get_MLkappa(mu_est, RandWatson) mu_est2, kappa_est2 = Wae.get_Watson_muKappa_ML(RandWatson) print "Real: ", mu, kappa print "Estimate: ", mu_est2, kappa_est2
plt.close("all") folder = "./data/test_data/" folder_HMM = "./data/HMM_data/" load_EM_data = 0 load_HMM_data = 1 perform_EM = 1 ################################################################ ######## Load and combine 3 sets ############################## ############################################################### if (load_EM_data): K = 3 #gl.scatter_3D([0,1,1,1,1,-1,-1,-1,-1], [0,1,1,-1,-1,1,1,-1,-1],[0,1,-1,1,-1,1,-1,1,-1], nf = 1, na = 0) gl.scatter_3D(0, 0, 0, nf=1, na=0) kflag = 0 for k in range(1, K + 1): filedir = folder + "Wdata_" + str(k) + ".csv" Xdata_k = np.array(pd.read_csv(filedir, sep=",", header=None)) Xdata_k = Xdata_k[:1000, :] # Xdata_param = pkl.load_pickle( folder + "Wdata_"+ str(k)+".pkl",1) # mu = Xdata_param[0] # kappa = Xdata_param[1] # print "Real: ", mu,kappa # Generate and plot the data gl.scatter_3D(Xdata_k[:, 0], Xdata_k[:, 1], Xdata_k[:, 2], nf=0, na=0)