def fcnn(x_train, y_train,k,r,w,v): #k, r, w, v are dummies to integrate all reduction algs #Vectorized implementation for a more interpretable implementation see below n_classes = int(y_train.max() + 1) n_samples = x_train.shape[0] T = np.concatenate([x_train, np.reshape(y_train, [n_samples, 1])], axis=1) ind_delta_S = np.array([centroid(x_train[y_train == i]) for i in range(n_classes)]) X = T[:, :-1] Y = T[:, -1].astype(int) ind_T = np.array([i for i in range(n_samples)]) ind_nearest = np.ones([n_classes + n_samples], dtype=int) min_distances = np.inf * np.ones([n_classes + n_samples]) ind_S = np.array([]) while ind_delta_S.shape[0] != 0: ind_S = np.array(list(set(ind_S.tolist()).union(set(ind_delta_S.tolist())))) ind_Q = np.array(list(set(ind_T.tolist()) - set(ind_S.tolist()))) distances = np.min(dist(X[ind_Q], X[ind_delta_S]), axis=1) ind_distances = np.array([ind_delta_S[i] for i in np.argmin(dist(X[ind_Q], X[ind_delta_S]), axis=1)]).astype( int) ind_change_nearest = min_distances[ind_Q] > distances min_distances[ind_Q[ind_change_nearest]] = distances[ind_change_nearest] ind_nearest[ind_Q[ind_change_nearest]] = ind_distances[ind_change_nearest] # We do the same for representatives ind_missmatch = ind_Q[np.logical_not(np.equal(Y[ind_Q], Y[ind_nearest[ind_Q]]))] # Different labels ind_rep = np.array([], dtype=int) def_rep = np.unique(ind_nearest[ind_Q]) delete=[] for m,n in enumerate(def_rep): n_indexes = ind_Q[np.where(ind_nearest[ind_Q] == n)[0]] n_indexes = np.array(list(set(n_indexes.tolist()).intersection(set(ind_missmatch.tolist())))) if n_indexes.size == 0: delete.append(m) continue rep_distances = np.linalg.norm(X[ind_nearest[n_indexes]] - X[n_indexes], axis=1) ind_rep = np.concatenate([ind_rep, [n_indexes[np.argmin(rep_distances)]]]) def_rep=np.delete(def_rep,delete) ind_delta_S = ind_rep[np.in1d(def_rep, ind_S)] # Return final subset x_train = np.stack(X[ind_S]) y_train = np.array(Y[ind_S]) return x_train, y_train
def K_medoids_min_cost(x, k, iteration): a = [] b = [] c = [] for i in range(iteration): medoids = int_medoids(x, k) d = dist(x, medoids) a.append(d) target_class = d.argmin(axis=1) b.append(target_class) iteration_cost = sum(d.min(axis=1)) c.append(iteration_cost) return a, b, c
def incremental_farthest_search(points, k): remaining_points = points[:] solution_set = [] solution_set.append( remaining_points.pop(random.randint(0, len(remaining_points) - 1))) for _ in range(k - 1): distances = [dist(p, solution_set[0]) for p in remaining_points] for i, p in enumerate(remaining_points): for j, s in enumerate(solution_set): distances[i] = min(distances[i], distance(p, s)) solution_set.append( remaining_points.pop(distances.index(max(distances)))) return solution_set
def Predict_Ngram(Inpath="../test/test.txt", Outpath="../Output/8_1.csv", Train_Trump="../train/trump.txt", Train_Obama="../train/obama.txt"): ''' Input: Inpath : file path for test data Outpath : file path for output csv file Train_Trump : file path to train Trump's bigram model Train_Obama : file path to train Obama's bigram model Output: Return None, Output should go straight to .csv file ''' f = open(Outpath, 'w') f.write('Id,Prediction\n') #Preprocess the test set Paragraphs_Trump = NGram.corpora_preprocess(Train_Trump) P_Vecs_Trump = [Get_Vector(p) for p in Paragraphs_Trump] Paragraphs_Obama = NGram.corpora_preprocess(Train_Obama) P_Vecs_Obama = [Get_Vector(p) for p in Paragraphs_Obama] Paragraphs_test = NGram.corpora_preprocess(Inpath) P_Vecs_test = [Get_Vector(p) for p in Paragraphs_test] for idx, pvec in enumerate(P_Vecs_test): max_cosine = -50 isTrump = True for pv_trump in P_Vecs_Trump: max_cosine = max(max_cosine, dist(pvec, pv_trump)) for pv_obama in P_Vecs_Obama: if dist(pvec, pv_obama) > max_cosine: isTrump = False break f.write(str(idx) + ',') if isTrump: f.write('1') else: f.write('0') f.write('\n')
def find_d_embedding(data: list, maxm: int) -> int: RT = 15.0 AT = 2 sigmay = np.std(data, ddof=1) nyr = len(data) m = maxm EM = lagmat(data, maxlag=m - 1) EEM = np.asarray([EM[j, :] for j in range(m - 1, EM.shape[0])]) embedm = maxm for k in range(AT, EEM.shape[1] + 1): fnn1 = [] fnn2 = [] Ma = EEM[:, range(k)] D = dist(Ma) for i in range(1, EEM.shape[0] - m - k): d = D[i, :] pdnz = np.where(d > 0) dnz = d[pdnz] Rm = np.min(dnz) l = np.where(d == Rm) l = l[0] l = l[len(l) - 1] if l + m + k - 1 < nyr: fnn1.append( np.abs(data[i + m + k - 1] - data[l + m + k - 1]) / Rm) fnn2.append( np.abs(data[i + m + k - 1] - data[l + m + k - 1]) / sigmay) Ind1 = np.where(np.asarray(fnn1) > RT) Ind2 = np.where(np.asarray(fnn2) > AT) if len(Ind1[0]) / float(len(fnn1)) < 0.1 and len(Ind2[0]) / float( len(fnn2)) < 0.1: embedm = k break return embedm
def fnn(data, maxm): """ Compute the embedding dimension of a time series data to build the phase space using the false neighbors criterion data--> time series maxm--> maximmum embeding dimension """ RT = 15.0 AT = 2 sigmay = np.std(data, ddof=1) nyr = len(data) m = maxm EM = lagmat(data, maxlag=m - 1) EEM = np.asarray([EM[j, :] for j in range(m - 1, EM.shape[0])]) embedm = maxm for k in range(AT, EEM.shape[1] + 1): fnn1 = [] fnn2 = [] Ma = EEM[:, range(k)] D = dist(Ma) for i in range(1, EEM.shape[0] - m - k): #print D.shape #print(D[i,range(i-1)]) d = D[i, :] pdnz = np.where(d > 0) dnz = d[pdnz] Rm = np.min(dnz) l = np.where(d == Rm) l = l[0] l = l[len(l) - 1] if l + m + k - 1 < nyr: fnn1.append( np.abs(data[i + m + k - 1] - data[l + m + k - 1]) / Rm) fnn2.append( np.abs(data[i + m + k - 1] - data[l + m + k - 1]) / sigmay) Ind1 = np.where(np.asarray(fnn1) > RT) Ind2 = np.where(np.asarray(fnn2) > AT) if len(Ind1[0]) / float(len(fnn1)) < 0.1 and len(Ind2[0]) / float( len(fnn2)) < 0.1: embedm = k break return embedm
def Dim_Corr(datas, Tao, m, graph=False): """ Compute the correlation dimension of a time series with a time-lag Tao and an embedding dimension m datas--> time series to compute the correlation dimension Tao--> time lag computed using the first zero crossing of the auto-correlation function (see Tao func) m--> embeding dimension of the time-series, computed using the false neighbors method (see fnn func) graph (optional)--> plot the phase space (attractor) in 3D """ x = PhaseSpace(datas, m, Tao, graph) ED2 = dist(x.T) posD = np.triu_indices_from(ED2, k=1) ED = ED2[posD] max_eps = np.max(ED) min_eps = np.min(ED[np.where(ED > 0)]) max_eps = np.exp(math.floor(np.log(max_eps))) n_div = int(math.floor(np.log(max_eps / min_eps))) n_eps = n_div + 1 eps_vec = range(n_eps) unos = np.ones([len(eps_vec)]) * -1 eps_vec1 = max_eps * np.exp(unos * eps_vec - unos) Npairs = ((len(x[1, :])) * ((len(x[1, :]) - 1))) C_eps = np.zeros(n_eps) for i in eps_vec: eps = eps_vec1[i] N = np.where(((ED < eps) & (ED > 0))) S = len(N[0]) C_eps[i] = float(S) / Npairs omit_pts = 1 k1 = omit_pts k2 = n_eps - omit_pts xd = np.log(eps_vec1) yd = np.log(C_eps) xp = xd[k1:k2] yp = yd[k1:k2] p = np.polyfit(xp, yp, 1) return p[0]
def fnn(data, maxm): """ Compute the embedding dimension of a time series data to build the phase space using the false neighbors criterion data--> time series maxm--> maximmum embeding dimension """ RT=15.0 AT=2 sigmay=np.std(data, ddof=1) nyr=len(data) m=maxm EM=lagmat(data, maxlag=m-1) EEM=np.asarray([EM[j,:] for j in range(m-1, EM.shape[0])]) embedm=maxm for k in range(AT,EEM.shape[1]+1): fnn1=[] fnn2=[] Ma=EEM[:,range(k)] D=dist(Ma) for i in range(1,EEM.shape[0]-m-k): #print D.shape #print(D[i,range(i-1)]) d=D[i,:] pdnz=np.where(d>0) dnz=d[pdnz] Rm=np.min(dnz) l=np.where(d==Rm) l=l[0] l=l[len(l)-1] if l+m+k-1<nyr: fnn1.append(np.abs(data[i+m+k-1]-data[l+m+k-1])/Rm) fnn2.append(np.abs(data[i+m+k-1]-data[l+m+k-1])/sigmay) Ind1=np.where(np.asarray(fnn1)>RT) Ind2=np.where(np.asarray(fnn2)>AT) if len(Ind1[0])/float(len(fnn1))<0.1 and len(Ind2[0])/float(len(fnn2))<0.1: embedm=k break return embedm
def Dim_Corr(datas, Tao, m, graph=False): """ Compute the correlation dimension of a time series with a time-lag Tao and an embedding dimension m datas--> time series to compute the correlation dimension Tao--> time lag computed using the first zero crossing of the auto-correlation function (see Tao func) m--> embeding dimension of the time-series, computed using the false neighbors method (see fnn func) graph (optional)--> plot the phase space (attractor) in 3D """ x=PhaseSpace(datas, m, Tao, graph) ED2=dist(x.T) posD=np.triu_indices_from(ED2, k=1) ED=ED2[posD] max_eps=np.max(ED) min_eps=np.min(ED[np.where(ED>0)]) max_eps=np.exp(math.floor(np.log(max_eps))) n_div=int(math.floor(np.log(max_eps/min_eps))) n_eps=n_div+1 eps_vec=range(n_eps) unos=np.ones([len(eps_vec)])*-1 eps_vec1=max_eps*np.exp(unos*eps_vec-unos) Npairs=((len(x[1,:]))*((len(x[1,:])-1))) C_eps=np.zeros(n_eps) for i in eps_vec: eps=eps_vec1[i] N=np.where(((ED<eps) & (ED>0))) S=len(N[0]) C_eps[i]=float(S)/Npairs omit_pts=1 k1=omit_pts k2=n_eps-omit_pts xd=np.log(eps_vec1) yd=np.log(C_eps) xp=xd[k1:k2] yp=yd[k1:k2] p = np.polyfit(xp, yp, 1) return p[0]
def correlation_dim(datas: List[np.ndarray], tau: int, d: int) -> float: x = phase_space(datas, d, tau) print('Finding correlation dimension ...') ED2 = dist(x.T) posD = np.triu_indices_from(ED2, k=1) ED = ED2[posD] max_eps = np.max(ED) min_eps = np.min(ED[np.where(ED > 0)]) max_eps = np.exp(math.floor(np.log(max_eps))) n_div = int(math.floor(np.log(max_eps / min_eps))) n_eps = n_div + 1 eps_vec = range(n_eps) unos = np.ones([len(eps_vec)]) * -1 eps_vec1 = max_eps * np.exp(unos * eps_vec - unos) Npairs = ((len(x[1, :])) * ((len(x[1, :]) - 1))) C_eps = np.zeros(n_eps) for i in eps_vec: eps = eps_vec1[i] N = np.where(((ED < eps) & (ED > 0))) S = len(N[0]) C_eps[i] = float(S) / Npairs omit_pts = 1 k1 = omit_pts k2 = n_eps - omit_pts xd = np.log(eps_vec1) yd = np.log(C_eps) xp = xd[k1:k2] yp = yd[k1:k2] p = np.polyfit(xp, yp, 1) return p[0]
def kernel(x, y, lengthscale): return np.exp(-dist(x, y, squared=True) / (lengthscale**2)) + 1e-300
def get_distance(points): """points assumed to be latitue and longitude in m""" return dist(np.radians(points)) * 6371 * 1000
def centroid(class_x_train): mean = np.mean(class_x_train, axis=0, keepdims=True) distances = dist(class_x_train, mean) return np.argmin(distances)
query_radius = 15 x_ref = tracker.particles.mean(axis=0)[:2] skip=1 ref_points = s_init.points[s_init.tree.query_ball_point(x_ref,query_radius)][::skip] train_size = min(1000,ref_points.shape[0]) rand_ind = np.random.choice(range(len(ref_points)),train_size,replace=False) ref_points = ref_points[rand_ind] l = 0.1 sigma2 = 0.01 r_mean = ref_points.mean(axis=0) r_std = ref_points.std(axis=0) x_train = (ref_points - r_mean)/r_std K = np.exp(-dist(x_train[:,:2],x_train[:,:2],squared=True)/l**2) + sigma2*np.eye(x_train.shape[0]) Kinv = np.linalg.inv(K) particles_init = tracker.particles.copy() init_index = np.linspace(0,len(particles_init)-1,len(particles_init)).astype(int) particle_list = [] mean_list = [] fig,axs = plt.subplots() fig.set_size_inches(12,12) s0 = scanset.index(0) particles_0 = tracker.particles.copy() w = np.ones(len(particles_0)) w/=w.sum()