Пример #1
0
def xy_proj0(t, east, west, c, data, z, x, y, count, tz):
    # print "+"
    bigger = 1.05
    some = 0.000001
    # xyobj = xy()
    # calculating test row's x and y coordinates
    trow = row(data[tz][t])
    # xyobj.addtrow(trow)
    ta = dist(data[tz][t], data[z][east], data, z, indep, nump)
    tb = dist(data[tz][t], data[z][west], data, z, indep, nump)
    # xyobj.trow.x = (ta**2 + c**2 - tb**2) / (2*c + some)
    # xyobj.trow.y = (ta**2 - xyobj.trow.x**2)**0.5
    tx = (ta ** 2 + c ** 2 - tb ** 2) / (2 * c + some)
    ty = (ta ** 2 - tx ** 2) ** 0.5
    xyobj = xy(t, tx, ty)
    # print xyobj.trow.x,xyobj.trow.y
    for d in data[z]:
        ind = data[z].index(d)
        a = dist(data[z][ind], data[z][east], data, z, indep, nump)
        b = dist(data[z][ind], data[z][west], data, z, indep, nump)
        x[ind] = (a ** 2 + c ** 2 - b ** 2) / (2 * c + some)
        y[ind] = (a ** 2 - x[ind] ** 2) ** 0.5
        r = row(d)
        r.x = x[ind]
        r.y = y[ind]
        xyobj.keep(r)
    return xyobj
Пример #2
0
def xycalc(z):

    rows = []
    bigger = 1.05
    some = 0.00001

    #Pick any row d
    d = anyi(data[z])
    if d == len(data[z]):
        d -= 1

    #Initialize x and y lists
    x = [0] * len(data[z])
    y = [0] * len(data[z])

    #find furthest from d
    east = furthest(d, data, z)
    west = furthest(data[z].index(east), data, z)
    inde = data[z].index(east)
    indw = data[z].index(west)
    c = dist(data[z][inde], data[z][indw], z, indep, nump)

    for d in data[z]:
        ind = data[z].index(d)
        a = dist(data[z][ind], data[z][inde], z, indep, nump)
        b = dist(data[z][ind], data[z][indw], z, indep, nump)
        x[ind] = (a**2 + c**2 - b**2) / (2**c + some)
        y[ind] = (a**2 - x[ind]**2)**0.5
        r = xy.row(d)
        r.x = x[ind]
        r.y = y[ind]
        rows.append(r)

    return rows
Пример #3
0
def xycalc(z):

    rows = []
    bigger = 1.05
    some = 0.00001
    
    #Pick any row d
    d = anyi(data[z])
    if d == len(data[z]):
        d -= 1

    #Initialize x and y lists
    x = [0]*len(data[z])
    y = [0]*len(data[z])
    
    #find furthest from d
    east = furthest(d,data,z)
    west = furthest(data[z].index(east),data,z)
    inde = data[z].index(east)
    indw = data[z].index(west)
    c = dist(data[z][inde],data[z][indw],z,indep,nump)

    for d in data[z]:
        ind = data[z].index(d)
        a = dist(data[z][ind],data[z][inde],z,indep,nump)
        b = dist(data[z][ind],data[z][indw],z,indep,nump)
        x[ind] = (a**2 + c**2 -b**2) / (2**c + some)
        y[ind] = (a**2 - x[ind]**2)**0.5
        r = xy.row(d)
        r.x = x[ind]
        r.y = y[ind]
        rows.append(r)
    
    return rows
Пример #4
0
def project0(east,west,data,z,x,y,count):
    print "+"
    bigger = 1.05
    some = 0.000001
    c = dist(data[z][east],data[z][west],data,z,indep,nump)
    for d in data[z]:
        ind = data[z].index(d)
        a = dist(data[z][ind],data[z][east],data,z,indep,nump)
        b = dist(data[z][ind],data[z][west],data,z,indep,nump)
        if a > c*bigger:
            return project0(east,ind,data,z,x,y,count)
        if b > c*bigger:
            return project0(ind,west,data,z,x,y,count)
        #print "."
        x[ind] = (a**2 + c**2 - b**2) / (2*c + some)
        y[ind] = (a**2 - x[ind]**2)**0.5
Пример #5
0
def extend_path(path, ps, lookahead=0):
    """Add the closest point in ps to an end of path to that end
    of path and delete it from ps."""
    def update_path(end, cur_path, p):
        "Add p to the appropriate end of cur_path."
        if j == 0:
            cur_path.insert(0, ps[i])
        else:
            cur_path.append(ps[i])

    min_dist = None
    min_indices = None
    for i in range(len(ps)):
        for j in [0, -1]:
            d = None
            if lookahead > 0:
                tmp_path = list(path)
                update_path(j, tmp_path, ps[i])
                tmp_ps = list(ps)
                del tmp_ps[i]
                tour = nn_tour(tmp_path, tmp_ps, lookahead=lookahead - 1)
                d = tour_length(tour)
            else:
                d = dist(ps[i], path[j])
            if min_dist == None or d < min_dist:
                min_dist = d
                min_indices = (i, j)
    i, j = min_indices
    update_path(j, path, ps[i])
    del ps[i]
Пример #6
0
def xy_proj(z, data, t, tz, check):
    # xyobj = xy()
    d = anyi(data[z])
    if d == len(data[z]):
        d -= 1
    x = [0] * len(data[z])
    y = [0] * len(data[z])
    east = furthest(d, data, z)
    west = furthest(data[z].index(east), data, z)
    inde = data[z].index(east)
    indw = data[z].index(west)
    c = dist(data[z][inde], data[z][indw], data, z, indep, nump)
    xyobj = xy_proj0(t, inde, indw, c, data, z, x, y, count, tz)
    leaves = {}
    oldd = 999999
    for n, leaf in enumerate(xyobj.tiles(20, 4, 0, oldd)):
        leaves[n] = leaf
    # if check == True: leafprint(leaves)
    if check == True:
        print "nearest d", xy_d  # ,"nearest node",xyobj.nearest
    if check == True:
        print "test row:", xyobj.trow.x, xyobj.trow.y
    ltab = leaftab(leaves)
    if check == True:
        printltab(ltab)
    close = nearleaf(ltab, xyobj)
    if check == True:
        checkie(leaves, ltab, close, data, tz, t)

    return out_reduced(leaves, close)
Пример #7
0
def nearestn(zlst, near, e):
    #Returns zlst containing only nearest n elements to centroid
    for i in range(1, len(zlst)):
        z = zlst[i]
        l = len(data[z])
        dists = []
        for j in range(0, l):
            dists.append(dist(expected1(z, e), data[z][j], z, indep, nump))
        sorted_dists = sorted(dists)
        k = 0
        #Create temporary data structure
        temp_data = []
        for d in sorted_dists:
            if k <= ((near * l) / 100):
                r = dists.index(d)
                temp_data.append(data[z][r])
                k += 1
            else:
                break

        #Remove old data and add new data
        removeData(z)
        for r in temp_data:
            addRow(r, z)

    return zlst
Пример #8
0
def project0(east, west, data, z, x, y, count):
    print "+"
    bigger = 1.05
    some = 0.000001
    c = dist(data[z][east], data[z][west], data, z, indep, nump)
    for d in data[z]:
        ind = data[z].index(d)
        a = dist(data[z][ind], data[z][east], data, z, indep, nump)
        b = dist(data[z][ind], data[z][west], data, z, indep, nump)
        if a > c * bigger:
            return project0(east, ind, data, z, x, y, count)
        if b > c * bigger:
            return project0(ind, west, data, z, x, y, count)
        #print "."
        x[ind] = (a**2 + c**2 - b**2) / (2 * c + some)
        y[ind] = (a**2 - x[ind]**2)**0.5
Пример #9
0
def nearestn(zlst,near,e):
    #Returns zlst containing only nearest n elements to centroid
    for i in range(1,len(zlst)):
        z = zlst[i]
        l = len(data[z])
        dists = []
        for j in range(0,l):
            dists.append(dist(expected1(z,e),data[z][j],z,indep,nump))
        sorted_dists = sorted(dists)
        k = 0
        #Create temporary data structure
        temp_data = [] 
        for d in sorted_dists:
            if k<= ((near*l)/100):
                r = dists.index(d)
                temp_data.append(data[z][r])
                k += 1
            else:
                break
        
        #Remove old data and add new data
        removeData(z)
        for r in temp_data:
            addRow(r,z)
        
    return zlst
Пример #10
0
def computeWSS(centroids, clusters,dist=dist.euclidiandist):
  '''Computes the WSS.
  centroids -- a list of records that form the centroids for the given clusters.
  clusters -- a list of lists which hold the indexes for the points in each clusters. Corresponds to globally stored data.
  dist -- the distance function used to compute WSS. Defaults to euclidian dist.

  returns -- the WSS.
  '''
  WSS=0.0
  for i in range(len(clusters)):
    for j in range(len(clusters[i])):
      dis=dist(centroids[i],data[clusters[i][j]])
      dis=math.pow(dis,2)
      WSS+=dis
  return WSS
Пример #11
0
def computeBSS(centroids, clusters, dist=dist.euclidiandist):
  '''Computes the BSS.
  centroids -- a list of records that form the centroids for the given data.
  clusters -- a list of lists which hold the indexes for the points in each cluster. Corresponds to globally stored data
  dist -- the distance function used to compute BSS. Defaults to euclidian dist.

  returns -- the BSS
  '''
  BSS=0.0
  all_centroid=[]
  #First we must compute the centroid of the entire data set.
  for i in range(len(data[0])):
    run_tot=0.0
    for j in data:
      run_tot+=j[i]
    all_centroid.append(run_tot/len(data))
  #Now we can compute the BSS
  for i in range(len(clusters)):
    BSS+=len(clusters[i])*math.pow(dist(all_centroid,centroids[i]),2)
  return BSS
Пример #12
0
def neighbors(t, data, z, lst):
    for d in data[z]:
        ind = data[z].index(d)
        dic = {}
        dic['x'] = dist(t, d, data, z, indep[z], nump[z])
        dic['d'] = d
        lst.append(dic)
        #lst[ind]['x'] = dist(t,d,data,z,indep[z],nump[z])
        #lst[ind]['d'] = d
    """print "lsttttttttttttttttttttttt"
    for i in range(0,len(lst)):
        try:
            print lst[i]
            print lst[i]['x'];
            print i
        except KeyError:
            lst[i]['x'] = -1
            lst[i]['d'] = []
            """
    sort = sorted(lst, key=lambda lst: lst['x'])
    return sort
Пример #13
0
def k_means(data,k,dist):
  '''Runs the k means algorithm with the specified distance measure
  and number of clusters.
  data -- properly formated 2-d array of data
  dist -- distance function
  k -- number of clusters
  
  returns -- returns list of centroids and list of clusters (which is a list of points' indicies)
  prints out header and cluster on stout.  
  '''
  #We will arbitrarily pick the first k points as centroids.
  #Please note that centroid will not always be data points.
  #len(centroids)==k
  cycles=0
  centroids=[]
  if k>len(data):
    print "Error: k should be less than the number of records"
    sys.exit(0)
  for i in range(k):
    centroids.append(data[i][:])
  clusters_changed=True
  #This array will contain arrays of indexes of data points
  clusters=[]
  #initialize it so that we don't get index out of range problems down the line.
  for w in range(k):
    clusters.append([])
  while clusters_changed:
    #print "centroids",centroids
    #We need to store both old clusters and new, so that we can compare them.
    new_clusters=[]
    #initialize new clusters
    for w in range(k):
      new_clusters.append([])
    #For every point, we must place it in a centroid. j is the point's index.
    #print "data",data
    for j in range(len(data)):
      
      #Find the closest centroid.
      sortable=[]
      for c in range(len(centroids)):
        d=dist(centroids[c],data[j])
        t=d,c
        sortable.append(t)
      list.sort(sortable)
      #print j,sortable
      #Places point index in closest centroid. (e.g. shortest distance)
      new_clusters[sortable[0][1]].append(j)
    #At this point we have successfully created our new clusters. Now we need to compare them to the original clusters
    #Please note: We just need to check new[i] against old[i]. They should line up.
    #print new_clusters
    #print clusters
    same=True
    for i in range(k):
      #we only sort new clusters. Old clusters have been sorted because they used to be new clusters.
      new_clusters[i].sort()
      #Clusters have changed.
      if(clusters[i]!=new_clusters[i]):
        same=False
        break;
    if(same==False):
      #print "data beginning recompute",data
      #print "centroids en recomp",centroids
      #reassign, recompute centroids and continue
      cycles+=1
      clusters=new_clusters
      for q in range(k):
        for w in range(len(data[0])):
          runtot=0.0
          for e in range(len(clusters[q])):
            index=clusters[q][e]
            runtot+=data[index][w]
          val=len(clusters[q])
          if val!=0:
            runtot=runtot/len(clusters[q])
          else:
            runtot=0
          centroids[q][w]=runtot
      #print "data end recompue",data

          
    else:
      #We've found the stuff, so print stuff and return.
      #print "**********For",k,"Clusters***********"
      #for i in range(k):
        #print "Cluster",i,"centroid is:",centroids[i]
        #print "\tContaining points:", clusters[i]
        #for j in range(len(clusters[i])):
          #print data_labels[clusters[i][j]]
          
        #print "cycles to complete:",cycles
      #print "cycles to complete:",cycles
      return centroids, clusters
#END OF KMENAS
Пример #14
0
def distedAll(csvfile, z):
    for i in range(0, len(data[z])):
        for j in range(0, len(data[z])):
            print dist(data[z][i], data[z][j], data, z, indep, nump)
Пример #15
0
def distedAll(csvfile,z):
    for i in range(0,len(data[z])):
        for j in range(0,len(data[z])):
            print dist(data[z][i],data[z][j],data,z,indep,nump)