Пример #1
0
def fcnn(x_train, y_train,k,r,w,v):
    #k, r, w, v are dummies to integrate all reduction algs
    #Vectorized implementation for a more interpretable implementation see below
    n_classes = int(y_train.max() + 1)
    n_samples = x_train.shape[0]
    T = np.concatenate([x_train, np.reshape(y_train, [n_samples, 1])], axis=1)
    ind_delta_S = np.array([centroid(x_train[y_train == i]) for i in range(n_classes)])
    X = T[:, :-1]
    Y = T[:, -1].astype(int)
    ind_T = np.array([i for i in range(n_samples)])
    ind_nearest = np.ones([n_classes + n_samples], dtype=int)
    min_distances = np.inf * np.ones([n_classes + n_samples])
    ind_S = np.array([])

    while ind_delta_S.shape[0] != 0:


        ind_S = np.array(list(set(ind_S.tolist()).union(set(ind_delta_S.tolist()))))
        ind_Q = np.array(list(set(ind_T.tolist()) - set(ind_S.tolist())))
        distances = np.min(dist(X[ind_Q], X[ind_delta_S]), axis=1)
        ind_distances = np.array([ind_delta_S[i] for i in np.argmin(dist(X[ind_Q], X[ind_delta_S]), axis=1)]).astype(
            int)
        ind_change_nearest = min_distances[ind_Q] > distances
        min_distances[ind_Q[ind_change_nearest]] = distances[ind_change_nearest]
        ind_nearest[ind_Q[ind_change_nearest]] = ind_distances[ind_change_nearest]
        # We do the same for representatives

        ind_missmatch = ind_Q[np.logical_not(np.equal(Y[ind_Q], Y[ind_nearest[ind_Q]]))]  # Different labels

        ind_rep = np.array([], dtype=int)

        def_rep = np.unique(ind_nearest[ind_Q])
        delete=[]
        for m,n in enumerate(def_rep):
            n_indexes = ind_Q[np.where(ind_nearest[ind_Q] == n)[0]]
            n_indexes = np.array(list(set(n_indexes.tolist()).intersection(set(ind_missmatch.tolist()))))
            if n_indexes.size == 0:
                delete.append(m)
                continue
            rep_distances = np.linalg.norm(X[ind_nearest[n_indexes]] - X[n_indexes], axis=1)
            ind_rep = np.concatenate([ind_rep, [n_indexes[np.argmin(rep_distances)]]])
        def_rep=np.delete(def_rep,delete)
        ind_delta_S = ind_rep[np.in1d(def_rep, ind_S)]

    # Return final subset
    x_train = np.stack(X[ind_S])
    y_train = np.array(Y[ind_S])
    return x_train, y_train
Пример #2
0
def K_medoids_min_cost(x, k, iteration):
    a = []
    b = []
    c = []
    for i in range(iteration):
        medoids = int_medoids(x, k)
        d = dist(x, medoids)
        a.append(d)
        target_class = d.argmin(axis=1)
        b.append(target_class)
        iteration_cost = sum(d.min(axis=1))
        c.append(iteration_cost)
    return a, b, c
Пример #3
0
def incremental_farthest_search(points, k):
    remaining_points = points[:]
    solution_set = []
    solution_set.append(
        remaining_points.pop(random.randint(0,
                                            len(remaining_points) - 1)))
    for _ in range(k - 1):
        distances = [dist(p, solution_set[0]) for p in remaining_points]
        for i, p in enumerate(remaining_points):
            for j, s in enumerate(solution_set):
                distances[i] = min(distances[i], distance(p, s))
        solution_set.append(
            remaining_points.pop(distances.index(max(distances))))
    return solution_set
Пример #4
0
def Predict_Ngram(Inpath="../test/test.txt",
                  Outpath="../Output/8_1.csv",
                  Train_Trump="../train/trump.txt",
                  Train_Obama="../train/obama.txt"):
    '''
    Input:
        Inpath  : file path for test data
        Outpath : file path for output csv file
        Train_Trump : file path to train Trump's bigram model
        Train_Obama : file path to train Obama's bigram model
    Output:
        Return None, Output should go straight to .csv file
    '''
    f = open(Outpath, 'w')
    f.write('Id,Prediction\n')
    #Preprocess the test set
    Paragraphs_Trump = NGram.corpora_preprocess(Train_Trump)
    P_Vecs_Trump = [Get_Vector(p) for p in Paragraphs_Trump]
    Paragraphs_Obama = NGram.corpora_preprocess(Train_Obama)
    P_Vecs_Obama = [Get_Vector(p) for p in Paragraphs_Obama]
    Paragraphs_test = NGram.corpora_preprocess(Inpath)
    P_Vecs_test = [Get_Vector(p) for p in Paragraphs_test]
    for idx, pvec in enumerate(P_Vecs_test):
        max_cosine = -50
        isTrump = True
        for pv_trump in P_Vecs_Trump:
            max_cosine = max(max_cosine, dist(pvec, pv_trump))
        for pv_obama in P_Vecs_Obama:
            if dist(pvec, pv_obama) > max_cosine:
                isTrump = False
                break
        f.write(str(idx) + ',')
        if isTrump:
            f.write('1')
        else:
            f.write('0')
        f.write('\n')
Пример #5
0
def find_d_embedding(data: list, maxm: int) -> int:
    RT = 15.0
    AT = 2

    sigmay = np.std(data, ddof=1)
    nyr = len(data)
    m = maxm

    EM = lagmat(data, maxlag=m - 1)
    EEM = np.asarray([EM[j, :] for j in range(m - 1, EM.shape[0])])
    embedm = maxm

    for k in range(AT, EEM.shape[1] + 1):
        fnn1 = []
        fnn2 = []
        Ma = EEM[:, range(k)]
        D = dist(Ma)

        for i in range(1, EEM.shape[0] - m - k):
            d = D[i, :]
            pdnz = np.where(d > 0)
            dnz = d[pdnz]
            Rm = np.min(dnz)

            l = np.where(d == Rm)
            l = l[0]
            l = l[len(l) - 1]

            if l + m + k - 1 < nyr:
                fnn1.append(
                    np.abs(data[i + m + k - 1] - data[l + m + k - 1]) / Rm)
                fnn2.append(
                    np.abs(data[i + m + k - 1] - data[l + m + k - 1]) / sigmay)

        Ind1 = np.where(np.asarray(fnn1) > RT)
        Ind2 = np.where(np.asarray(fnn2) > AT)

        if len(Ind1[0]) / float(len(fnn1)) < 0.1 and len(Ind2[0]) / float(
                len(fnn2)) < 0.1:
            embedm = k
            break

    return embedm
Пример #6
0
def fnn(data, maxm):
    """
    Compute the embedding dimension of a time series data to build the phase space using the false neighbors criterion
    data--> time series
    maxm--> maximmum embeding dimension
    """
    RT = 15.0
    AT = 2
    sigmay = np.std(data, ddof=1)
    nyr = len(data)
    m = maxm
    EM = lagmat(data, maxlag=m - 1)
    EEM = np.asarray([EM[j, :] for j in range(m - 1, EM.shape[0])])
    embedm = maxm
    for k in range(AT, EEM.shape[1] + 1):
        fnn1 = []
        fnn2 = []
        Ma = EEM[:, range(k)]
        D = dist(Ma)
        for i in range(1, EEM.shape[0] - m - k):
            #print D.shape
            #print(D[i,range(i-1)])
            d = D[i, :]
            pdnz = np.where(d > 0)
            dnz = d[pdnz]
            Rm = np.min(dnz)
            l = np.where(d == Rm)
            l = l[0]
            l = l[len(l) - 1]
            if l + m + k - 1 < nyr:
                fnn1.append(
                    np.abs(data[i + m + k - 1] - data[l + m + k - 1]) / Rm)
                fnn2.append(
                    np.abs(data[i + m + k - 1] - data[l + m + k - 1]) / sigmay)
        Ind1 = np.where(np.asarray(fnn1) > RT)
        Ind2 = np.where(np.asarray(fnn2) > AT)
        if len(Ind1[0]) / float(len(fnn1)) < 0.1 and len(Ind2[0]) / float(
                len(fnn2)) < 0.1:
            embedm = k
            break
    return embedm
Пример #7
0
def Dim_Corr(datas, Tao, m, graph=False):
    """
	Compute the correlation dimension of a time series with a time-lag Tao and an embedding dimension m
	datas--> time series to compute the correlation dimension
	Tao--> time lag computed using the first zero crossing of the auto-correlation function (see Tao func)   
	m--> embeding dimension of the time-series, computed using the false neighbors method (see fnn func)  
	graph (optional)--> plot the phase space (attractor) in 3D
	"""
    x = PhaseSpace(datas, m, Tao, graph)
    ED2 = dist(x.T)
    posD = np.triu_indices_from(ED2, k=1)
    ED = ED2[posD]
    max_eps = np.max(ED)
    min_eps = np.min(ED[np.where(ED > 0)])
    max_eps = np.exp(math.floor(np.log(max_eps)))
    n_div = int(math.floor(np.log(max_eps / min_eps)))
    n_eps = n_div + 1
    eps_vec = range(n_eps)
    unos = np.ones([len(eps_vec)]) * -1
    eps_vec1 = max_eps * np.exp(unos * eps_vec - unos)
    Npairs = ((len(x[1, :])) * ((len(x[1, :]) - 1)))
    C_eps = np.zeros(n_eps)

    for i in eps_vec:
        eps = eps_vec1[i]
        N = np.where(((ED < eps) & (ED > 0)))
        S = len(N[0])
        C_eps[i] = float(S) / Npairs

    omit_pts = 1
    k1 = omit_pts
    k2 = n_eps - omit_pts
    xd = np.log(eps_vec1)
    yd = np.log(C_eps)
    xp = xd[k1:k2]
    yp = yd[k1:k2]
    p = np.polyfit(xp, yp, 1)
    return p[0]
Пример #8
0
def fnn(data, maxm):
    """
    Compute the embedding dimension of a time series data to build the phase space using the false neighbors criterion
    data--> time series
    maxm--> maximmum embeding dimension
    """    
    RT=15.0
    AT=2
    sigmay=np.std(data, ddof=1)
    nyr=len(data)
    m=maxm
    EM=lagmat(data, maxlag=m-1)
    EEM=np.asarray([EM[j,:] for j in range(m-1, EM.shape[0])])
    embedm=maxm
    for k in range(AT,EEM.shape[1]+1):
        fnn1=[]
        fnn2=[]
        Ma=EEM[:,range(k)]
        D=dist(Ma)
        for i in range(1,EEM.shape[0]-m-k):
            #print D.shape            
            #print(D[i,range(i-1)])
            d=D[i,:]
            pdnz=np.where(d>0)
            dnz=d[pdnz]
            Rm=np.min(dnz)
            l=np.where(d==Rm)
            l=l[0]
            l=l[len(l)-1]
            if l+m+k-1<nyr:
                fnn1.append(np.abs(data[i+m+k-1]-data[l+m+k-1])/Rm)
                fnn2.append(np.abs(data[i+m+k-1]-data[l+m+k-1])/sigmay)
        Ind1=np.where(np.asarray(fnn1)>RT)
        Ind2=np.where(np.asarray(fnn2)>AT)
        if len(Ind1[0])/float(len(fnn1))<0.1 and len(Ind2[0])/float(len(fnn2))<0.1:
            embedm=k
            break
    return embedm
Пример #9
0
def Dim_Corr(datas, Tao, m, graph=False): 
	"""
	Compute the correlation dimension of a time series with a time-lag Tao and an embedding dimension m
	datas--> time series to compute the correlation dimension
	Tao--> time lag computed using the first zero crossing of the auto-correlation function (see Tao func)   
	m--> embeding dimension of the time-series, computed using the false neighbors method (see fnn func)  
	graph (optional)--> plot the phase space (attractor) in 3D
	"""
	x=PhaseSpace(datas, m, Tao, graph)
	ED2=dist(x.T)
	posD=np.triu_indices_from(ED2, k=1)
	ED=ED2[posD]
	max_eps=np.max(ED)
	min_eps=np.min(ED[np.where(ED>0)])
	max_eps=np.exp(math.floor(np.log(max_eps)))
	n_div=int(math.floor(np.log(max_eps/min_eps)))
	n_eps=n_div+1
	eps_vec=range(n_eps)
	unos=np.ones([len(eps_vec)])*-1
	eps_vec1=max_eps*np.exp(unos*eps_vec-unos)
	Npairs=((len(x[1,:]))*((len(x[1,:])-1)))
	C_eps=np.zeros(n_eps)
 
	for i in eps_vec:
        	eps=eps_vec1[i]
        	N=np.where(((ED<eps) & (ED>0)))
        	S=len(N[0])
        	C_eps[i]=float(S)/Npairs

	omit_pts=1 
	k1=omit_pts
	k2=n_eps-omit_pts
	xd=np.log(eps_vec1)
	yd=np.log(C_eps)
	xp=xd[k1:k2]
	yp=yd[k1:k2]
	p = np.polyfit(xp, yp, 1)
	return p[0]
Пример #10
0
def correlation_dim(datas: List[np.ndarray], tau: int, d: int) -> float:
    x = phase_space(datas, d, tau)

    print('Finding correlation dimension ...')
    ED2 = dist(x.T)
    posD = np.triu_indices_from(ED2, k=1)
    ED = ED2[posD]

    max_eps = np.max(ED)
    min_eps = np.min(ED[np.where(ED > 0)])
    max_eps = np.exp(math.floor(np.log(max_eps)))

    n_div = int(math.floor(np.log(max_eps / min_eps)))
    n_eps = n_div + 1
    eps_vec = range(n_eps)
    unos = np.ones([len(eps_vec)]) * -1

    eps_vec1 = max_eps * np.exp(unos * eps_vec - unos)
    Npairs = ((len(x[1, :])) * ((len(x[1, :]) - 1)))
    C_eps = np.zeros(n_eps)

    for i in eps_vec:
        eps = eps_vec1[i]
        N = np.where(((ED < eps) & (ED > 0)))
        S = len(N[0])
        C_eps[i] = float(S) / Npairs

    omit_pts = 1
    k1 = omit_pts
    k2 = n_eps - omit_pts
    xd = np.log(eps_vec1)
    yd = np.log(C_eps)
    xp = xd[k1:k2]
    yp = yd[k1:k2]
    p = np.polyfit(xp, yp, 1)
    return p[0]
Пример #11
0
def kernel(x, y, lengthscale):
    return np.exp(-dist(x, y, squared=True) / (lengthscale**2)) + 1e-300
Пример #12
0
def get_distance(points):
    """points assumed to be latitue and longitude in  m"""
    return dist(np.radians(points)) * 6371 * 1000
Пример #13
0
def centroid(class_x_train):
    mean = np.mean(class_x_train, axis=0, keepdims=True)
    distances = dist(class_x_train, mean)
    return np.argmin(distances)
Пример #14
0
query_radius = 15
x_ref = tracker.particles.mean(axis=0)[:2]
skip=1

ref_points = s_init.points[s_init.tree.query_ball_point(x_ref,query_radius)][::skip]
train_size = min(1000,ref_points.shape[0])
rand_ind = np.random.choice(range(len(ref_points)),train_size,replace=False)
ref_points = ref_points[rand_ind]

l = 0.1
sigma2 = 0.01
r_mean = ref_points.mean(axis=0)
r_std = ref_points.std(axis=0)
x_train = (ref_points - r_mean)/r_std
K = np.exp(-dist(x_train[:,:2],x_train[:,:2],squared=True)/l**2) + sigma2*np.eye(x_train.shape[0])
Kinv = np.linalg.inv(K)
particles_init = tracker.particles.copy()

init_index = np.linspace(0,len(particles_init)-1,len(particles_init)).astype(int)

particle_list = []
mean_list = []

fig,axs = plt.subplots()
fig.set_size_inches(12,12)
s0 = scanset.index(0)
particles_0 = tracker.particles.copy()
w = np.ones(len(particles_0))
w/=w.sum()