def _proximity_filter(point, data, total): """ Given a point, and a data list of coordinate tuples, we return an n number of coordinate tuples amounting to total """ tree = KDTree.construct_from_data(data) return tree.query(query_point=point, t=total)
def autodiscover(): global kd_tree from pixel.models import Pixel cc = list(Pixel.objects.all()) kd_tree = KDTree.construct_from_data(cc)
def __init__(self, eps, MinPts, pointlist): self.eps = eps self.MinPts = MinPts self.points = pointlist self.unvisited = [i for i in range(len(pointlist))] self.kdtree = KDTree.construct_from_data(self.formatpoints()) self.pointidmap = {} for point in pointlist: self.pointidmap[tuple(point.coordinates)] = point.id
def createTrackpointTree(self, trackpoints): ''' Create a tree out of the trackpoints ''' self.track_tupel_list = [] #Change from Vec3 to tupel for point in self.trackpoints: self.track_tupel_list.append( (point.getX(), point.getY(), point.getZ())) self.list4tree = self.track_tupel_list[:] return KDTree.construct_from_data(self.list4tree)
def knn(): f = open('train.csv' , 'r') data = [] # all labeled data lookupTable = dict() num = 0 for line in f: d = line.split(',') if d[0] == 'label': continue d = map(int , d) data.append(d) lookupTable[tuple(d[1:])] = d[0] if num > 40000: break num += 1 f.close() points1 = map(lambda x : tuple(x[1:]) , data) tree = KDTree.construct_from_data(points1) num = 0 points = map(lambda x : x[1:] , data) f = open('train.csv' , 'r') for line in f: num += 1 if num < 32000: continue if num > 32100: break d = line.split(',') if d[0] == 'label': continue d = map(int , d) start = time.mktime(time.localtime()) nn = tree.query(tuple(d[1:]) , 10) end = time.mktime(time.localtime()) #print str(end - start) + ' secs to get distances' start = time.mktime(time.localtime()) #nn = nearestNeighbours(points , d[1:] , 10) counts = defaultdict(int) for x in nn: counts[lookupTable[x]] += 1 print str(d[0] == sorted(counts , key = lambda x : counts[x] , reverse = True)[0]) f.close()
def nearest_filtered(Primary_Technology, Role): #Separating out the indices #Opening the files with open("C:\DataMining\IndexPredictionsOutput.csv", 'rb') as f: reader = csv.reader(f) data = map(tuple, reader) # Filtering the list def f1(t): return t[2] == Primary_Technology # Primary Skill is Informatica def f2(t): return t[11] == Role # Role filters = [f1, f2] filtered_data = filter_lambda(filters, data) kd_filtered_data = map( operator.itemgetter(0, 57, 58, 59, 60, 61, 62, 63, 64, 65), filtered_data) # Creating the tree tree = KDTree.construct_from_data(kd_filtered_data) # Finding the nearest neighbours, t can be varied for the number of neighbours nearest = tree.query(query_point=(0, 10, 10, 10, 10, 10, 10, 10, 10, 10), t=5) # The serial number of the nearest neighbours nearest_index = [x[0] for x in nearest] # Using this to filter the original list kd_filtered_nearest = [ tup for tup in filtered_data if tup[0] in nearest_index ] # Preparing the dataset to be printed kd_filtered_nearest_printed = map(operator.itemgetter(1, 2, 8, 9, 10), kd_filtered_nearest) return kd_filtered_nearest_printed
def nearest_filtered(Primary_Technology, Role): #Separating out the indices #Opening the files with open("C:\DataMining\IndexPredictionsOutput.csv", 'rb') as f: reader = csv.reader(f) data = map(tuple, reader) # Filtering the list def f1(t): return t[2].strip()==Primary_Technology # Primary Skill is Informatica def f2(t): return t[11].strip()==Role # Role filters = [f1,f2] filtered_data = filter_lambda(filters, data) kd_filtered_data = map(operator.itemgetter(0,58,59,60,61,62,63,64,65,66), filtered_data) # Creating the tree tree = KDTree.construct_from_data(kd_filtered_data) # Finding the nearest neighbours, t can be varied for the number of neighbours nearest = tree.query(query_point=(85,10,10,10,10,10,10,10,10,10), t=10) # The serial number of the nearest neighbours nearest_index = [x[0] for x in nearest] # Using this to filter the original list kd_filtered_nearest = [tup for tup in filtered_data if tup[0] in nearest_index] # Preparing the dataset to be printed kd_filtered_nearest_printed = map(operator.itemgetter(0,1,2,26,8,23,10), kd_filtered_nearest) return kd_filtered_nearest_printed
def proximity_filter(point, data, total): """ given a point, and a data set of points, we return a list of points, capped with a length of _total_, sorted in proximity. """ tree = KDTree.construct_from_data(data) return tree.query(query_point=point, t=total)
# declare a 2D array for confusion matrix confusionMatrix = [[0 for x in xrange(26)] for x in xrange(26)] listOfPoints = [] with open('training_data.txt', 'r') as f: for line in f: if counter < 15000: listOfPoints.append(getDataElementFrom(line)) counter += 1 continue if isKDTreeConstructed == False: start = time.clock() kdTree = KDTree.construct_from_data(listOfPoints) elapsedForKDTreeConstruction = (time.clock() - start) isKDTreeConstructed = True print "KDTree constructed in %.2fs" % (elapsedForKDTreeConstruction) searchStartTime = time.clock() print "Evaluating input data..." currentLine = getDataElementFrom(line) nearest = kdTree.query(currentLine) confusionMatrix[ord(currentLine.lettr) - 65][ord(nearest[0].lettr) - 65] += 1 if currentLine.lettr == nearest[0].lettr: properMatches += 1 counter += 1 if counter % 500 == 0:
from kdtree import KDTree data = [(1, 2, 3), (4, 0, 1), (5, 3, 1), (10, 5, 4), (9, 8, 9), (4, 2, 4)] tree = KDTree.construct_from_data(data) nearest = tree.query(query_point=(5, 4, 3), t=2) print nearest
def buildKDTree(): #Create 2DTree with topics' coordinates data = topic_dict.keys() tree = KDTree.construct_from_data(data) return tree
for city in files: points = [] city_name = city.split('.')[0] with open('geopositions/%s.txt'%city_name, 'rb') as csvfile: georeader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in georeader: points.append((float(row[0]), float(row[1]), 1.0)) block_size = 100 start_time = time.time() while len(points) > 2000: print len(points) s_time = time.time() tree = KDTree.construct_from_data(points) # min_dist = 2000000000 # f_point = None # s_point = None pairs = [] for point in points: nearest = tree.query(query_point=point, t=2) found_point = nearest[1] dist = get_dist(found_point, point) pairs.append((point, found_point, dist)) # if dist < min_dist: # f_point = found_point # s_point = point # min_dist = dist pairs.sort(key=lambda x: x[2]) all_pairs = []
print "\tEvaluated %d rows for condensed training set in %.2fs" %(counter, time.clock() - start) continue elif isPrinted == False: elapsed = (time.clock() - start) print ("Condensed Training data mapped to feature space in %.4fmin." % (elapsed/60)) print ("Boundary points evaluated: %d" % getTrainingData().__len__()) classificationTimeStart = time.clock() isPrinted = True # Implement the search of the next 5000 elements using a KDTree searchStartTime = time.clock() #test: Trying with KDTree kdTree = KDTree.construct_from_data(getTrainingData()) else: currentLine = getDataElementFrom(line) # nearest = evaluateLine(line, 1) nearest = kdTree.query(currentLine) confusionMatrix[ord(currentLine.lettr) - 65][ord(nearest[0].lettr) - 65] += 1 if currentLine.lettr == nearest[0].lettr: properMatches += 1 # confusionMatrix[ord(nearest[0]) - 65][ord(nearest[1]) - 65] += 1 # # if nearest[0] == nearest[1]: # properMatches += 1 counter += 1
from kdtree import KDTree data = [(1,2),(4,0),(8,3),(10,5),(9,8),(4,2)] tree = KDTree.construct_from_data(data) nearest = tree.query(query_point=(10,0), t=1) print(nearest)
def fitting_obj_sample(param): """ computes residuals based on distance from ellipsoid can be used with different loss-functions on residual """ obj = 0 # centers cx = param[0] cy = param[1] cz = param[2] rx = param[3] ry = param[4] rz = param[5] sx, sy, sz = ellipsoid(cx, cy, cz, rx, ry, rz, 20) num_samples = len(sx) #plot_point_cloud(sx, sy, sz) print "num_samples", num_samples #import pdb #pdb.set_trace() #data = numpy.array(zip(sx, sy, sz)).T #tree = kdt.kdtree( data, leafsize=1000 ) data = zip(sx, sy, sz) tree = KDTree.construct_from_data(data) num_queries = len(x) print "num_queries", num_queries global global_loss global_loss = numpy.zeros(num_queries) for idx in range(num_queries): """ Compute the unique root tbar of F(t) on (-e2*e2,+infinity); x0 = e0*e0*y0/(tbar + e0*e0); x1 = e1*e1*y1/(tbar + e1*e1); x2 = e2*e2*y2/(tbar + e2*e2); distance = sqrt((x0 - y0)*(x0 - y0) + (x1 - y1)*(x1 - y1) + (x2 - y2)*(x2 - y2)) """ query = (x[idx], y[idx], z[idx]) nearest, = tree.query(query_point=query, t=1) residual = dist.euclidean(query, nearest) #obj += loss_functions.squared_loss(residual) #obj += loss_functions.abs_loss(residual) #obj += loss_functions.eps_loss(residual, 2) #obj += loss_functions.eps_loss_bounded(residual, 2) loss_xt = loss_functions.eps_loss_asym(residual, 2, 1.0, 0.2) obj += loss_xt global_loss[idx] = num_queries #obj += eps_loss(residual, 2)*data_intensity[idx] # add regularizer to keep radii close reg = 10 * regularizer(param) print "loss", obj print "reg", reg obj += reg return obj
]) + tuple([round(limitedListCA_ProtA[i][2], 3)]) + tuple([Va[i].T]) #eigenvector Va.T is appended to each node listXa.append(tupp) #listXb is the list of atoms in protein b listXb = [] for i in range(len(limitedListCA_ProtB)): tupp = tuple([round(limitedListCA_ProtB[i][0], 3)]) + tuple([ round(limitedListCA_ProtB[i][1], 3) ]) + tuple([round(limitedListCA_ProtB[i][2], 3)]) + tuple([Vb[i]]) #eigenvector Vb is appended to each node listXb.append(tupp) data1 = listXa data2 = listXb Tree1 = KDTree.construct_from_data(data1) Tree2 = KDTree.construct_from_data(data2) score = 0 #print("####################################") #Times for KD Tree approach startT = time.time() for i in range(len(data1)): #finds the atoms within radius 30 of query pt score += Tree2.queryrange(query_point=data1[i], r=50) solveTime = time.time() - startT print(solveTime) #Time for non-tree approach #startT = time.time() #total =0 #for i in range(len(data1)):
def __init__(self, data): self.data = data print("Kd-tree will be constructed...") self.tree = KDTree.construct_from_data(data) print("Kd-tree construction done!") self.rel_levels = set([vector.rel for vector in data])
def query_nearest(METAR_data, ref_point): tree = KDTree.construct_from_data(METAR_data) nearest = tree.query(query_point=ref_point) return nearest
def top_down(grid, output, tile_size): ''' Starts matching from top-left, going to bottom-right ''' #user_image = UserImage() #cursor = Pixel.objects.all() # size = 500 # mm = Pixel.objects.count()-size-1 # if mm > 10: # index = random.randint(0,mm) # else: # index = 0 # # cursor = Pixel.objects.all()[index:index+size] cursor = Pixel.objects.all() #image_list = ImageList(gen(cursor)) image_list = KDTree.construct_from_data(list(cursor)) #nearest = tree.query(query_point=(5,4,3), t=3) _tile_list = dict() counter = 0 for yPos, y in enumerate(grid): for xPos, x in enumerate(grid[yPos]): #print counter, rgb = grid[yPos][xPos].color qrgb = quantize_color(rgb) #tile = image_list.search(rgb).image.blob #tile_wrapper = image_list.search(rgb) w = image_list.query(query_point=qrgb, t=1) #i = random.randint(0,len(w)-1) tile_pixel = w[0] #tile_pixel = tile_wrapper.pixel #print tile_pixel.id #tile = tile_wrapper.image #tile = Image.open(StringIO(tile_pixel.image1.file.read())) tile = tile_pixel.image tile.thumbnail(tile_size) xy = (xPos * tile_size[0], yPos * tile_size[1]) #print tile output.paste(tile, xy) counter += 1 _tile_list.setdefault((tile_pixel.id), list()).append((xy[0],xy[1])) #print counter return _tile_list;
listXa=[] for i in range(len(limitedListCA_ProtA)): tupp = tuple([round(limitedListCA_ProtA[i][0], 3)])+tuple([round(limitedListCA_ProtA[i][1],3)])+tuple([round(limitedListCA_ProtA[i][2], 3)])+tuple([Va[i].T]) #eigenvector Va.T is appended to each node listXa.append(tupp) #listXb is the list of atoms in protein b listXb=[] for i in range(len(limitedListCA_ProtB)): tupp = tuple([round(limitedListCA_ProtB[i][0], 3)])+tuple([round(limitedListCA_ProtB[i][1],3)])+tuple([round(limitedListCA_ProtB[i][2], 3)])+tuple([Vb[i]]) #eigenvector Vb is appended to each node listXb.append(tupp) data1 = listXa data2 = listXb Tree1 = KDTree.construct_from_data(data1) Tree2 = KDTree.construct_from_data(data2) score = 0 #print("####################################") #Times for KD Tree approach startT = time.time() for i in range(len(data1)): #finds the atoms within radius 30 of query pt score += Tree2.queryrange(query_point=data1[i], r = 50) solveTime = time.time() - startT print(solveTime) #Time for non-tree approach #startT = time.time()