def testL1Distance(self): n1 = numpy.array([[1., 2., 3., 4.], [1., 1., 1., 1.]], dtype=numpy.float32) n2 = numpy.array([[5., 6., 7., -8.], [1., 1., 1., 1.]], dtype=numpy.float32) out = self.Run(functions.l1_distance(n1, n2)) testing.assert_allclose( out[0], numpy.array( [distance.cityblock(n1[0], n2[0]), distance.cityblock(n1[1], n2[1]) ]), rtol=TOLERANCE)
def testL1DistanceWithBroadcast(self): n1 = numpy.array([[[1., 2., 3., 4.], [1., 1., 1., 1.]], [[5., 6., 7., 8.], [1., 1., 1., 2.]]], dtype=numpy.float32) n2 = numpy.array([[5., 6., 7., -8.], [1., 1., 1., 1.]], dtype=numpy.float32) out = self.Run(functions.l1_distance(n1, n2)) expected = numpy.array( [[distance.cityblock(n1[0, 0], n2[0]), distance.cityblock( n1[0, 1], n2[1])], [distance.cityblock(n1[1, 0], n2[0]), distance.cityblock(n1[1, 1], n2[1])]]) testing.assert_allclose(expected, out[0], atol=TOLERANCE)
def question_6(): a = (0, 0) b = (100, 40) pairs = [(55, 5), (59, 10), (56, 15), (50, 18)] for p in pairs: print('({}, {}):'.format(p[0], p[1])) print('L1norm(0, 0): {0}'.format(round(distance.cityblock(p, a), 5))) print('L1norm(100, 40): {0}'.format(round(distance.cityblock(p, b), 5))) print('L2norm(0, 0): {0}'.format(round(distance.euclidean(p, a), 5))) print('L2norm(100, 40): {0}'.format(round(distance.euclidean(p, b), 5))) print('')
def tmpAssignPoints(self, centroids): print "WYBRANA METRYKA: "+ str(self.metric) distCount = lambda a, b: euclidean(a, b) if self.metric == "chebyshev": distCount = lambda a, b: chebyshev(a, b) if self.metric == "cityblock": distCount = lambda a, b: cityblock(a, b) # chebyshev = nieskonczonosc # distCount = lambda x, y: chebyshev(x, y) #cityblock = l1 # distCount = lambda x, y: cityblock(x, y) labels_centroids=[] for i in self.df.index: disctances=[] c_dist = [] for c in centroids: # print "i: "+str(self.df.loc[i].values)+ " c: "+str(c) x= distCount(self.df.loc[i].values, c) # print "xx: "+str(x) #klucz-x, wartosc-c disctances.append(x) c_dist.append(c) m = min(disctances) # print "distances:" # print disctances # print "c_dist" # print c_dist dm = disctances.index(m) tmp_nearest_centr = c_dist[dm] labels_centroids.append(tmp_nearest_centr) # print"closest centr: "+str(tmp_nearest_centr ) return (labels_centroids, centroids)
def Distance(i,j,type): if type == 1: return distance.euclidean(i,j) elif type == 2: return distance.cityblock(i,j) elif type == 3: return distance.cosine(i,j)
def Classify (self, sample, verbose = True): length = len (sample) features = MFCC.extract (numpy.frombuffer (sample, numpy.int16)) gestures = {} for gesture in self.params: d = [] for tsample in self.params[gesture]: total_distance = 0 smpl_length = len(tsample) if(numpy.abs(length - smpl_length) <= 0): continue for i in range (min (len (features), len (tsample))): total_distance += dist.cityblock(features[i], tsample[i]) d.append (total_distance/float (i)) score = numpy.min(d) gestures[gesture] = score if(verbose): print "Gesture %s: %f" % (gesture, score) try: if (score < minimum): minimum = score lowest = gesture except: minimum = score lowest = gesture if verbose: print lowest, minimum if(minimum < THRESHOLD): return lowest else: return None
def getDistLambda(metric): if (metric == "manhattan"): return lambda x,y : distance.cityblock(x,y) elif (metric == "cosine"): return lambda x,y : distance.cosine(x,y) else: return lambda x,y : distance.euclidean(x,y)
def calc_dist(e1,e2,mode=1): if mode == 1: return ssd.euclidean(e1,e2) elif mode == 2: return ssd.cityblock(e1,e2) elif mode == 3: return ssd.cosine(e1,e2)
def k_means(data, k=2, distance='e'): centers = np.array(random.sample(list(data), k)) centers_steps = [centers.tolist()] changed = True while changed: prev_centers = np.copy(centers) data_nr = data.shape[0] clusters = np.empty((data_nr, k)) for i in range(data_nr): if distance == 'e': clusters[i] = np.array([euclidean(data[i] - centers[j]) for j in range(k)]) elif distance == 'm': clusters[i] = np.array([cityblock(data[i], centers[j]) for j in range(k)]) elif distance == 'h': clusters[i] = np.array([hamming(data[i], centers[j]) for j in range(k)]) else: raise ValueError('Unrecognized distance') clusters = np.argmin(clusters, axis=1) for i in range(k): centers[i] = np.mean(data[np.where(clusters == i)], axis=0) changed = not np.intersect1d(prev_centers, centers).size == centers.size centers_steps.append(centers.tolist()) return centers, centers_steps
def iter_kill_spots(cx, cz): c = w.getChunk(cx, cz) bedrock_blocks = (c.Blocks == w.materials.Bedrock.ID) for bx, bz, y in mcbuddy_util.get_block_pos_from_mask(bedrock_blocks): if is_kill_spot(c, bx, bz, y): x = bx + (cx << 4) z = bz + (cz << 4) dist = cityblock((100, 100), (x, z)) yield x, z, dist
def __init__(self, rad): super().__init__(rad) self.mask_ = np.zeros((2*rad+1, 2*rad+1, 2*rad+1), dtype=np.bool) for r1 in range(2*self.rad+1): for r2 in range(2*self.rad+1): for r3 in range(2*self.rad+1): if(cityblock((r1, r2, r3), (self.rad, self.rad, self.rad)) <= self.rad): self.mask_[r1, r2, r3] = True
def calc_dist(di,dj,i=1): """ Distance calculation for every distance functions in use""" if i == 1: return ssd.euclidean(di,dj) # built-in Euclidean fn elif i == 2: return ssd.cityblock(di,dj) # built-in Manhattan fn elif i == 3: return ssd.cosine(di,dj) # built-in Cosine fn
def sliced_wasserstein(PD1, PD2, M=50): """ Implementation of Sliced Wasserstein distance as described in Sliced Wasserstein Kernel for Persistence Diagrams by Mathieu Carriere, Marco Cuturi, Steve Oudot (https://arxiv.org/abs/1706.03358) Parameters ----------- PD1: np.array size (m,2) Persistence diagram PD2: np.array size (n,2) Persistence diagram M: int, default is 50 Iterations to run approximation. Returns -------- sw: float Sliced Wasserstein distance between PD1 and PD2 """ diag_theta = np.array( [np.cos(0.25 * np.pi), np.sin(0.25 * np.pi)], dtype=np.float32) l_theta1 = [np.dot(diag_theta, x) for x in PD1] l_theta2 = [np.dot(diag_theta, x) for x in PD2] if (len(l_theta1) != PD1.shape[0]) or (len(l_theta2) != PD2.shape[0]): raise ValueError("The projected points and origin do not match") PD_delta1 = [[np.sqrt(x**2 / 2.0)] * 2 for x in l_theta1] PD_delta2 = [[np.sqrt(x**2 / 2.0)] * 2 for x in l_theta2] # i have the input now to compute the sw sw = 0 theta = 0.5 step = 1.0 / M for i in range(M): l_theta = np.array([np.cos(theta * np.pi), np.sin(theta * np.pi)], dtype=np.float32) V1 = [np.dot(l_theta, x) for x in PD1] + [np.dot(l_theta, x) for x in PD_delta2] V2 = [np.dot(l_theta, x) for x in PD2] + [np.dot(l_theta, x) for x in PD_delta1] sw += step * cityblock(sorted(V1), sorted(V2)) theta += step return sw
def run(self, video_path=None): if video_path is not None: self.video_path = video_path assert (self.video_path is not None), "you should must the video path!" self.shots = [] cap = cv2.VideoCapture(self.video_path) hists = [] frames = [] while True: success, frame = cap.read() if not success: break if self.output_dir is not None: frames.append(frame) # compute RGB histogram for each frame color_histgrams = [cv2.calcHist([frame], [c], None, [self.hist_size], [0,256]) \ for c in range(3)] color_histgrams = np.array([chist/float(sum(chist)) for chist in color_histgrams]) hists.append(color_histgrams.flatten()) # manhattan distance of two consecutive histgrams scores = [cityblock(*h12) for h12 in zip(hists[:-1], hists[1:])] print("max diff:", max(scores), "min diff:", min(scores)) # compute automatic threshold mean_score = np.mean(scores) std_score = np.std(scores) threshold = self.absolute_threshold # decide shot boundaries prev_i = 0 prev_score = scores[0] for i, score in enumerate(scores[1:]): if (score >= threshold) and (abs(score - prev_score) >= threshold/2.0): self.shots.append((prev_i, i+2)) prev_i = i + 2 prev_score = score video_length = len(hists) self.shots.append((prev_i, video_length)) assert video_length>=self.min_duration, "duration error" self.merge_short_shots() # save key frames if self.output_dir is not None: for shot in self.shots: cv2.imwrite("%s/frame-%d.jpg" % (self.output_dir,shot[0]), frames[shot[0]]) print("key frames written to %s" % self.output_dir)
def calculate(self, row): q1 = str(row['question1']) q2 = str(row['question2']) q1_keywords = self.calculate_keyword(q1) q2_keywords = self.calculate_keyword(q2) if not len(q1_keywords) or not len(q2_keywords): return [0.0, 0.0, 0.0] q1_vector = self.calculate_vector(q1_keywords) q2_vector = self.calculate_vector(q2_keywords) cos_sim = 1 - cosine(q1_vector, q2_vector) euclidean_sim = 1 - euclidean(q1_vector, q2_vector) manhattan_sim = 1 - cityblock(q1_vector, q2_vector) return [cos_sim, euclidean_sim, manhattan_sim]
def get_distances(X): nfeatures = len(X[0]) man_dist = [cityblock(features, np.zeros(nfeatures)) for features in X] cosine_dist = [cosine(features, np.ones(nfeatures)) for features in X] euclid_dist = [ sqeuclidean(features, np.zeros(nfeatures)) for features in X ] minkowski_dist = [ minkowski(features, np.zeros(nfeatures), 2) for features in X ] all_dist = np.column_stack((np.column_stack((np.column_stack( (man_dist, cosine_dist)), euclid_dist)), minkowski_dist)) return all_dist
def calc_puddle_penalization(self, state: tuple) -> float: """ Return a float that represents a penalization, the penalization is the lowest distance between current state and the nearest border in manhattan distance. :param state: :return: """ # Min distance found! min_distance = min( cityblock(self.current_state, state) for state in self.free_spaces) # Set penalization per distance return -min_distance
def get_min_dist(incoming_coord): dist = {} for coordinate in coordinates.iterrows(): coord = coordinate[0] x = coordinate[1][0] y = coordinate[1][1] dist[coord] = distance.cityblock(incoming_coord, (x, y)) min_value = min(dist.values()) min_keys = [k for k in dist if dist[k] == min_value] if len(min_keys) > 1: return -1 # Location is equally far from two or more coordinates else: return min_keys[0]
def feature_construct(city, model_name, friends, walk_len=100, walk_times=20, num_features=128): '''construct the feature matrixu2_checkin Args: city: city model_name: 20_locid friends: friends list (asymetric) [u1, u2] walk_len: walk length walk_times: walk times num_features: dimension for vector Returns: ''' if os.path.exists('dataset/'+city+'/feature/'+city+'_'+model_name+'_'+\ str(int(walk_len))+'_'+str(int(walk_times))+'_'+str(int(num_features))+'.feature'): os.remove('dataset/'+city+'/feature/'+city+'_'+model_name+'_'+\ str(int(walk_len))+'_'+str(int(walk_times))+'_'+str(int(num_features))+'.feature') emb = pd.read_csv('dataset/'+city+'/emb/'+city+'_'+model_name+'_'+\ str(int(walk_len))+'_'+str(int(walk_times))+'_'+str(int(num_features))+'.emb',\ header=None, skiprows=1, sep=' ') emb = emb.rename(columns={0: 'uid'}) # last column is user id emb = emb.loc[emb.uid > 0] # only take users, no loc_type, not necessary pair = pair_construct(emb.uid.unique(), friends) for i in range(len(pair)): u1 = pair.loc[i, 'u1'] u2 = pair.loc[i, 'u2'] label = pair.loc[i, 'label'] u1_vector = emb.loc[emb.uid == u1, range(1, emb.shape[1])] u2_vector = emb.loc[emb.uid == u2, range(1, emb.shape[1])] i_feature = pd.DataFrame([[u1, u2, label,\ cosine(u1_vector, u2_vector),\ euclidean(u1_vector, u2_vector),\ correlation(u1_vector, u2_vector),\ chebyshev(u1_vector, u2_vector),\ braycurtis(u1_vector, u2_vector),\ canberra(u1_vector, u2_vector),\ cityblock(u1_vector, u2_vector),\ sqeuclidean(u1_vector, u2_vector)]]) i_feature.to_csv('dataset/'+city+'/feature/'+city+'_'+model_name+'_'+\ str(int(walk_len))+'_'+str(int(walk_times))+'_'+str(int(num_features))+'.feature',\ index = False, header = None, mode = 'a')
def fit(self,data): # Will do the normalization, if necessary if (self.l2norm == True): transformer = Normalizer().fit(data) data = transformer.transform(data) # Initialize the Centroids self.ctrds = {} # Set the default centroids as the first k points of the data for i in range(self.k): self.ctrds[i] = data[i] # Set the default running iteration for i in range(self.max_iter): self.classes = {} # Set the default classification for k classes for i in range(self.k): self.classes[i] = [] # Count the distances between the data and the centroids. Methods : Euclidean, Cityblock, Cosine for data_rows in data: if (self.method == "euclidean"): dst = [distance.euclidean(data_rows, self.ctrds[ctrd]) for ctrd in self.ctrds] elif (self.method == "cityblock"): dst = [distance.cityblock(data_rows, self.ctrds[ctrd]) for ctrd in self.ctrds] elif (self.method == "cosine"): dst = [distance.cosine(data_rows, self.ctrds[ctrd]) for ctrd in self.ctrds] clf = dst.index(min(dst)) self.classes[clf].append(data_rows) # Enter the centroids to a dictionary prev_ctrds = dict(self.ctrds) # Determine the average of the distances to determine a new centroid for clf in self.classes: self.ctrds[clf] = np.average(self.classes[clf],axis=0) optimized = True # Determine the loop if it has passed the minimum tolerance or not for c in self.ctrds: default_ctrd = prev_ctrds[c] current_ctrd = self.ctrds[c] if np.sum((current_ctrd - default_ctrd)/default_ctrd * 100.0) > self.tol: optimized = False if optimized: break
def get_features(question1, question2): """ Get all the features to input into the XGBoost model. Pre: Both questions cannot be None. Args: question1: The first question to match. question2: The second question to match. Returns: A dictionary with all the features for the XGBoost model. """ w2v = word2vec_features(question1, question2, W2VModel) output_dict = { # length based features "len_q1": [len(question1)], "len_q2": [len(question2)], "diff_len": [len(question1) - len(question2)], "len_char_q1": [len(question1.replace(" ", ""))], "len_char_q2": [len(question2.replace(" ", ""))], "len_word_q1": [len(question1.split())], "len_word_q2": [len(question2.split())], "common_words": [ len( set(question1.lower().split()).intersection( set(question2.lower().split()))) ], # distance based features # (fuzzywuzzy library tutorial: https://www.datacamp.com/community/tutorials/fuzzy-string-python) "fuzz_Qratio": [fuzz.QRatio(question1, question2)], "fuzz_Wratio": [fuzz.WRatio(question1, question2)], "fuzz_partial_ratio": [fuzz.partial_ratio(question1, question2)], "fuzz_partial_token_set_ratio": [fuzz.partial_token_set_ratio(question1, question2)], "fuzz_partial_token_sort_ratio": [fuzz.partial_token_sort_ratio(question1, question2)], "fuzz_token_set_ratio": [fuzz.token_set_ratio(question1, question2)], "fuzz_token_sort_ratio": [fuzz.token_sort_ratio(question1, question2)], # word2vec based features "cosine_distance": [cosine(w2v[0], w2v[1])], "cityblock_distance": [cityblock(w2v[0], w2v[1])], "jaccard_distance": [jaccard(w2v[0], w2v[1])], "canberra_distance": [canberra(w2v[0], w2v[1])], "euclidean_distance": [euclidean(w2v[0], w2v[1])], "minkowski_distance": [minkowski(w2v[0], w2v[1])], "braycurtis_distance": [braycurtis(w2v[0], w2v[1])], "wmd": [w2v[2]], "norm_wmd": [w2v[3]] } return output_dict
def find_distance(x,y,option): if option == "1": # return math.sqrt(sum([(a - b) ** 2 for a, b in zip(x, y)])) return np.linalg.norm(x-y) elif option == "2": # return sum([(a - b) for a,b in zip(x, y)]) return scp.cityblock(x,y) elif option == "3": # numer = sum([a*b for a,b in zip(x,y)]) # denom_1 = math.sqrt(sum([a ** 2 for a in x])) # denom_2 = math.sqrt(sum([b ** 2 for b in y])) # return numer/(denom_1*denom_2) # return sum([sum(a*b)/ (math.sqrt(a**2)*math.sqrt(b**2)) for a, b in zip(x,y)]) return scp.cosine(x,y)
def add_lateral_connections_topology(layer, distance_to_weight): proj = sim.Projection(layer.population, layer.population, sim.AllToAllConnector(), sim.StaticSynapse()) weights = np.zeros((layer.size(), layer.size())) # for all combinations of neurons for x1, y1, x2, y2 in itertools.product(np.arange(layer.shape[0]), np.arange(layer.shape[1]), repeat=2): w = distance_to_weight(distance.cityblock([x1, y1], [x2, y2])) weights[layer.get_idx(x1, y1)][layer.get_idx(x2, y2)] = w proj.set(weight=weights)
def get_longest_shortest_routes(self): """ Return manhattan distance for all pair of batteries and houses""" import collections #import math manhattan_distance = [] for house_key, house_value in self.houses.items(): house_position = house_value.x, house_value.y for battery_key, battery_value in self.batteries.items(): battery_position = battery_value.x, battery_value.y manhattan_distance.append([ house_key, battery_key, distance.cityblock(battery_position, house_position) ]) # shortest routes shortest_distance_per_house = [] for house in self.houses: shortest_distance = math.inf for i in manhattan_distance: #if i[0] == house and i == 0: if i[0] == house: if i[2] < shortest_distance: shortest_distance = i[2] shortest_distance_per_house.append(shortest_distance) smallest_object_value = 0 for i in shortest_distance_per_house: smallest_object_value += i print(smallest_object_value) # largest routes largest_distance_per_house = [] for house in self.houses: largest_distance = 0 for i in manhattan_distance: #if i[0] == house and i == 0: if i[0] == house: if i[2] > largest_distance: largest_distance = i[2] largest_distance_per_house.append(largest_distance) biggest_object_value = 0 for i in largest_distance_per_house: biggest_object_value += i print(biggest_object_value) return manhattan_distance
def similarity(svd1, svd2): scores = [0, 0, 0] array_depths = [32, number_svd_vectors, number_svd_vectors] importance_factors = [ 475, 1, 310 ] #these are weighting numbers (chosen by trial and error) which resulted in the highest accuracy. #this accounts for not all of the things SVD returns being of equal scale/importance. for i in range(3): for j in range(array_depths[i]): scores[i] += distance.cityblock(svd1[0][j], svd2[0][j]) #(manhattan distance) return np.array(scores).dot(np.array(importance_factors))
def calculate_similarity(self, question_ebd, relation_ebd, metric='cosine'): if metric == 'braycurtis': return distance.braycurtis(question_ebd, relation_ebd) elif metric == 'canberra': return distance.canberra(question_ebd, relation_ebd) elif metric == 'cityblock': return distance.cityblock(question_ebd, relation_ebd) elif metric == 'cosine': return distance.cosine(question_ebd, relation_ebd) elif metric == 'euclidean': return distance.euclidean(question_ebd, relation_ebd)
def manhattan(self, x=None, y=None, w=None): """ 曼哈顿距离(Manhattan Distance)是由十九世纪的赫尔曼·闵可夫斯基所创词汇,是种使用在几何度量空间的几何学用语, 用以标明两个点在标准坐标系上的绝对轴距总和。曼哈顿距离的命名原因是从规划为方型建筑区块的城市(如曼哈顿) 间,最短的行车路径而来(忽略曼哈顿的单向车道以及只存在于3、14大道的斜向车道)。 任何往东三区块、往北六区块的的路径一定最少要走九区块,没有其他捷径。 x = [1, 2, 0] y = [0, 1, 0] """ x = x or self.x y = y or self.y w = w or self.w return distance.cityblock(x, y, w)
def score_routing(self, routing, usage_matrix): """ For the given layout, and the routing, produce the score of the routing. The score is composed of its constituent nets' scores, and the score of each net is based on the number of violations it has, the number of vias and pins and the ratio of its actual length and the lower bound on its length. layout is the 3D matrix produced by the placer. """ alpha = 3 beta = 0.1 gamma = 1 net_scores = {} net_num_violations = {} # Score each net segment in the entire net for net_name, d in routing.iteritems(): net_scores[net_name] = [] net_num_violations[net_name] = [] for i, segment in enumerate(d["segments"]): routed_net = segment["net"] # Violations violation_matrix = segment["violation"] violations = self.compute_net_violations(violation_matrix, usage_matrix) net_num_violations[net_name].append(violations) # Number of vias and pins vias = 0 num_pins = 2 pins_vias = vias - num_pins # Lower length bound coord_a = segment["pins"][0]["route_coord"] coord_b = segment["pins"][1]["route_coord"] lower_length_bound = max(1, cityblock(coord_a, coord_b)) length_ratio = len(routed_net) / lower_length_bound score = (alpha * violations) + (beta * pins_vias) + (gamma * length_ratio) net_scores[net_name].append(score) # print(routing) # print(net_scores) return net_scores, net_num_violations
def __init__(self, height: int, width: int, seed: int = None, levelname: str = ""): super().__init__(height, width, seed=seed, levelname=levelname) # Key variables room_count = max(1, round(self.size / Voronoi.ROOM_SIZE)) rooms = self.random_points(room_count, margin=1) areas = collections.defaultdict(list) # Generating vonoroi for c1 in range(self.height): for c2 in range(self.width): areas[tuple( min(rooms, key=lambda room: dist.cityblock(room, (c1, c2))))].append( (c1, c2)) # Placing walls for area in areas: for row in set(point[1] for point in areas[area]): points = [point for point in areas[area] if point[1] == row] minimum = min(points, key=lambda p: p[0]) maximum = max(points, key=lambda p: p[0]) self.map[minimum] = 1 self.map[maximum] = 1 for column in set(point[0] for point in areas[area]): points = [point for point in areas[area] if point[0] == column] minimum = min(points, key=lambda p: p[1]) maximum = max(points, key=lambda p: p[1]) self.map[minimum] = 1 self.map[maximum] = 1 # Removing borders for area in [area for area in areas]: for point in areas[area][:]: if self.map[point]: areas[area].remove(point) if not areas[area]: areas.pop(area) # Parameters self.areas = areas
def manhattan_distance(series1, series2): """ Compute the City Block (Manhattan) distance between two series Quantifies the absolute magnitude of the difference between time series. Args: series1 (numpy.ndarray): First series series2 (numpy.ndarray): Second series Returns: City Block (Manhattan) distance coefficient between the two series """ return sc.cityblock(series1, series2)
def KNNsearch(query, k, metric): result = [] for i in range(len(Collection["names"])): dist = math.inf if(metric=="eucledian"): ndist = distance.euclidean(query, Collection["encodings"][i]) elif(metric=="manhattan"): ndist = distance.cityblock(query, Collection["encodings"][i]) dist = min(dist,ndist) if(len(result)<k): heapq.heappush(result,(-dist,Collection["names"][i])) elif(heapq.nsmallest(1,result)[0][0]<-dist): heapq.heapreplace(result,(-dist,Collection["names"][i])) return [heapq.heappop(result) for i in range(len(result))]
def __init__(self, screen_width, screen_height, state_representation): self.wn = turtle.Screen() self.screen_width = screen_width self.screen_height = screen_height self.max_distance = distance.cityblock( np.array([0, 0]), np.array([screen_width, screen_height])) self.current_food = None self.snake = None self.actions = {0: "up", 1: "right", 2: "down", 3: "left"} self.points = 0 self.n_moves = 0 self.max_moves_without_food = 1000 self.theta = 45 self.state_representation = state_representation(self)
def manhattan_distance(a, b): """Compute the manhattan (L1) distance between two numpy arrays. Parameters ---------- a: numpy array b: numpy array Returns ------- distance: float """ dist = distance.cityblock(a, b) return dist
def compute_distance(X, centroid, type="euclidian", weight=1): """Computes the distance using the type passed as parameter. Can compute weighted distance only for minkowski.""" # Initialize the weight to all ones if not specified for weighted minkowski. if type is "wminkowski" and weight is 1: weight = np.ones(len(X)) # Computation of the distance using one of the implemented formulas distance = { "euclidian": (sp.euclidean(X, centroid)), "manhattan": (sp.cityblock(X, centroid)), "wminkowski": (sp.wminkowski(X, centroid, 2, weight)) }[type] return distance
def chamfer_with_normalize(test, target): distances = [] for i in range(0, len(test)): point_dst = [] for j in range(0, len(target)): point_dst.append(distance.cityblock(test[i], target[j])) distances.append(min(point_dst)) distances = np.array(distances) distances = (distances - np.min(distances)) / np.ptp(distances) return distances
def __call__(self): ''' returns vector features. They are: 1. cosine similarity 2. L2 normed distance 3. L1 normed distance 4. Bray-Curtis distance 5. Correlation distance 6. absolute distance vector between q1 and q2 (ndim features) ''' from scipy.spatial import distance self.q1s = np.array_split(self.q1.values, self.n_batches) self.q2s = np.array_split(self.q2.values, self.n_batches) indices = np.array_split(self.feat.index, self.n_batches) dfs = [] for index, (q1, q2) in zip(indices, zip(self.q1s, self.q2s)): df = pd.DataFrame(index=index.values) pre = self.prefix vec = self.vectorizer df[pre + 'cos_dist'] = [ distance.cosine(vec(x), vec(y)) for x, y in zip(q1, q2) ] df[pre + 'euc_dist'] = [ distance.euclidean(vec(x), vec(y)) for x, y in zip(q1, q2) ] df[pre + 'manhattan_dist'] = [ distance.cityblock(vec(x), vec(y)) for x, y in zip(q1, q2) ] df[pre + 'braycurt_dist'] = [ distance.braycurtis(vec(x), vec(y)) for x, y in zip(q1, q2) ] df[pre + 'correlation_dist'] = [ distance.correlation(vec(x), vec(y)) for x, y in zip(q1, q2) ] for i in range(self.size): if self.transformer is not None: df[pre + 'vec_{}'.format(i)] = abs( self.transformer.transform( np.array([vec(x) for x in q1]))[:, i] - self.transformer.transform( np.array([vec(x) for x in q2]))[:, i]) else: df[pre + 'vec_{}'.format(i)] = abs( (np.array([vec(x) for x in q1])) - (np.array([vec(x) for x in q2])))[:, i] dfs += [df] df = pd.concat(dfs) return self.feat.join(df)
def cross_channel_distance_features(image): """calculates the cross channel distance features Calculates the distances across channels Parameters ---------- image : 3D array, shape (M, N, C) The input image with multiple channels. Returns ------- features : dict dictionary including different distances across channels """ features = dict() for ch1 in range(image.shape[2]): for ch2 in range(ch1 + 1, image.shape[2]): # rehaping the channels to 1D channel1 = image[:, :, ch1].ravel() channel2 = image[:, :, ch2].ravel() # creating the suffix name for better readability suffix = "_Ch" + str(ch1 + 1) + "_Ch" + str(ch2 + 1) # storing the distance values features["braycurtis_distance" + suffix] = dist.braycurtis( channel1, channel2) features["canberra_distance" + suffix] = dist.canberra( channel1, channel2) features["chebyshev_distance" + suffix] = dist.chebyshev( channel1, channel2) features["cityblock_distance" + suffix] = dist.cityblock( channel1, channel2) features["correlation_distance" + suffix] = dist.correlation( channel1, channel2) features["cosine_distance" + suffix] = dist.cosine( channel1, channel2) features["euclidean_distance" + suffix] = dist.euclidean( channel1, channel2) features["jensenshannon_distance" + suffix] = dist.jensenshannon( channel1, channel2) features["minkowski_distance" + suffix] = dist.minkowski( channel1, channel2) features["sqeuclidean_distance" + suffix] = dist.sqeuclidean( channel1, channel2) return features
def trainingAccuracy(K, type): minVal = [] minVal1 = [] true = 0 false = 0 correct = 0 wrong = 0 for i in range(0, len(trainMatrix)): initial = np.array(trainMatrix[i]) a = initial[1:len(initial) - 1] b = np.delete(trainMatrix, (i), axis=0) for j in range(0, len(b)): c = np.array(b[j][1:len(b[j]) - 1]) Edist = distance.euclidean(a, c) Mdist = distance.cityblock(a, c) result3.append([Edist, b[j][len(b[j]) - 1]]) result4.append([Mdist, b[j][len(b[j]) - 1]]) # print result3 # print result4 result3.sort(key=lambda row: row[0:]) result4.sort(key=lambda row: row[0:]) # print result3 # print result4 for k in range(0, K): minVal.append(result3[k][1]) minVal1.append(result4[k][1]) # print "top k elements" , minVal count = Counter(minVal) count1 = Counter(minVal1) # print " test ans =" , count.most_common()[0] # print 'type = ' , count.most_common() # print 'type1 = ', count1.most_common() # print " test ans =", count.most_common()[0][0] # print "real ans = " , testrow[len(testrow)-1] if (count.most_common()[0][0] == initial[len(initial) - 1]): true += 1.0 else: false += 1.0 if (count1.most_common()[0][0] == initial[len(initial) - 1]): correct += 1.0 else: wrong += 1.0 minVal = [] minVal1 = [] if (type == 2): print 'accuracy for K = ', K, "=", (true / (true + false)) * 100, "%" else: print 'accuracy for K = ', K, "=", (correct / (correct + wrong)) * 100, "%"
def testingAccuracy(K): minVal = [] minVal1 = [] i = 0 j = 0 true = 0 false = 0 correct = 0 wrong = 0 for testrow in testMatrix: # for testrow in testMatrix1: j += 1 i = 0 for trainrow in trainMatrix: a = np.array(trainrow[1:len(trainrow) - 1]) b = np.array(testrow[1:len(testrow) - 1]) # dist = distance.euclidean(a, b) dist1 = distance.cityblock(a, b) dist = np.linalg.norm(a - b) # print "distance for " , i , "= " ,dist result.append([dist, trainrow[len(trainrow) - 1]]) result1.append([dist1, trainrow[len(trainrow) - 1]]) i += 1 result.sort(key=lambda row: row[0:]) result1.sort(key=lambda row: row[0:]) # print "after sorting = " , result for k in range(0, K): minVal.append(result[k][1]) minVal1.append(result[k][1]) # print "top k elements" , minVal count = Counter(minVal) count1 = Counter(minVal1) # print " test ans =" , count.most_common()[0] # print 'type = ' , count.most_common() # print " test ans =", count.most_common()[0][0] # print "real ans = " , testrow[len(testrow)-1] if (count.most_common()[0][0] == testrow[len(testrow) - 1]): true += 1.0 else: false += 1.0 if (count1.most_common()[0][0] == testrow[len(testrow) - 1]): correct += 1.0 else: wrong += 1.0 minVal = [] minVal1 = [] # print 'correct = ' , true # print 'wrong = ' , false print 'accuracy for K = ', K, "=", (true / (true + false)) * 100, "%"
def determine_longest_at_turn_n(board_state: GameState): """ Determine which snake is longest in current state. Tie break using 'closest head to a fruit' :return: """ longest_snake_idx = None for snake_i, snake in enumerate(board_state.snakes): if snake.alive: if longest_snake_idx is None or snake.length > board_state.snakes[ longest_snake_idx].length: longest_snake_idx = snake_i continue if snake.length == board_state.snakes[ longest_snake_idx].length: # Tie Break snake_manhattan_dists = sorted([ cityblock(snake.head, trophy_i) for trophy_i in board_state.fruits_locations ]) longest_manhattan_dists = sorted([ cityblock(board_state.snakes[longest_snake_idx].head, trophy_i) for trophy_i in board_state.fruits_locations ]) for d1, d2 in zip(snake_manhattan_dists, longest_manhattan_dists): if d1 < d2: longest_snake_idx = snake_i break elif d1 > d2: break else: # equal distance, tie break with later trophy.. pass return longest_snake_idx
def calculate_manhattan_score(self): # calculate the key hold manhattan scores # key hold genuine score for i in range(self.kh_test_genuine.shape[0]): current_score = cityblock(self.kh_test_genuine.iloc[i].values, self.kh_mean_vector) self.kh_genuine_score.append(current_score) # key hold impostor score for i in range(self.kh_test_impostor.shape[0]): current_score = cityblock(self.kh_test_impostor.iloc[i].values, self.kh_mean_vector) self.kh_impostor_score.append(current_score) # calculate the key interval manhattan scores # key interval genuine score for i in range(self.ki_test_genuine.shape[0]): current_score = cityblock(self.ki_test_genuine.iloc[i].values, self.ki_mean_vector) self.ki_genuine_score.append(current_score) # key interval impostor score for i in range(self.ki_test_impostor.shape[0]): current_score = cityblock(self.ki_test_impostor.iloc[i].values, self.ki_mean_vector) self.ki_impostor_score.append(current_score)
def testReplicaHistAdaptation(self): """Verify that adaptation leads to linear replica directions.""" chain = self.GetAdaptiveIsingChain( self.nsteps_per_sweep, self.nswaps_per_sweep, self.burn_roundtrips) ntemps = len(self.temps) linear_hist = [(ntemps/(ntemps-1))*(x/10) for x in reversed(range(ntemps))] # Run chain with linearly spaced temperatures. self.RunForRoundtrips(chain, self.adaptation_roundtrips) pre_ntransitions = chain.statistics.transitions pre_hist = chain.statistics.replica.hist # Verify that the histogram of replica directions is far from linear. self.assertGreater(distance.cityblock(linear_hist, pre_hist), 1.0) # Adapt temperatures. chain.AdaptTemperatures() chain.Reset() # Run chain with new temperatures. self.RunForRoundtrips(chain, self.adaptation_roundtrips) post_ntransitions = chain.statistics.transitions post_hist = chain.statistics.replica.hist # Verify that the histogram of replica directions is close to linear. self.assertLess(distance.cityblock(linear_hist, post_hist), 1.0) # Verify that the number of transitions necessary to reach the # same number of roundtrips is shorter after adaptation. self.assertLess(post_ntransitions, pre_ntransitions)
def calculate(self, row): seq1 = str(row['question1']).split() seq2 = str(row['question2']).split() seq1 = [word for word in seq1 if word in self.model] seq2 = [word for word in seq2 if word in self.model] if len(seq1) == 0 or len(seq2) == 0: return [0.0, 0.0, 0.0] vec_seq1 = [self.model[x] for x in seq1] vec_seq2 = [self.model[x] for x in seq2] vec_seq1 = np.array(vec_seq1).mean(axis=0) vec_seq2 = np.array(vec_seq2).mean(axis=0) cos_sim = 1 - cosine(vec_seq1, vec_seq2) euclidean_sim = 1 - euclidean(vec_seq1, vec_seq2) manhattan_sim = 1 - cityblock(vec_seq1, vec_seq2) return [cos_sim, euclidean_sim, manhattan_sim]
def ae_to_distance(mat1, mat2, metric='euclidean', method='avg', cov=None): # Methods: min, max, avg, centroid # Metrics: Euclidean, Manhattan (Cityblock), Mahalanobis (requires a covariance matrix) ,Add: Maximum if not method == 'centroid': vec_dist = cdist(mat1, mat2, metric=metric) if method == 'min': calc_dist = np.min(vec_dist) elif method == 'max': calc_dist = np.max(vec_dist) elif method == 'avg': calc_dist = np.nanmean(vec_dist) else: raise ValueError("Wrong name of method") else: cent_1 = np.mean(mat1, axis=0).reshape((1, -1)) cent_2 = np.mean(mat2, axis=0).reshape((1, -1)) if metric == 'euclidean': calc_dist = euclidean(cent_1, cent_2) elif metric == 'cityblock': calc_dist = cityblock(cent_1, cent_2) elif metric == 'hamming': calc_dist = hamming(cent_1, cent_2) elif metric == 'correlation': calc_dist = correlation(cent_1, cent_2) elif metric == 'cityblock': calc_dist = cityblock(cent_1, cent_2) elif metric == 'sqeuclidean': calc_dist = sqeuclidean(cent_1, cent_2) elif metric == 'mahalanobis': if cov is None: raise ValueError("Insert covariance matrix") calc_dist = mahalanobis(cent_1, cent_2, VI=np.linalg.inv(cov)) else: raise ValueError("Wrong name of metric") return calc_dist
def calculate_shap_distance(alert, counterfactuals): alert_counterfactuals = pd.concat([alert, counterfactuals], axis=0) alert_counterfactuals_shap = get_shap(alert_counterfactuals) dist = [] for i in range(1, len(alert_counterfactuals.index)): dist.append(cityblock(alert_counterfactuals_shap[0], alert_counterfactuals_shap[i])) # sum = 0 # for j in range(0, len(alert_counterfactuals_shap[i])): # sum = sum + alert_counterfactuals_shap[0][j] - alert_counterfactuals_shap[i][j] # # dist.append(sum) return dist
def __init__(self, rad): """Constructor Parameters ---------- rad: radius, in voxels, of the sphere inscribed in the searchlight cube, not counting the center voxel """ super().__init__(rad) self.mask_ = np.zeros((2*rad+1, 2*rad+1, 2*rad+1), dtype=np.bool) for r1 in range(2*self.rad+1): for r2 in range(2*self.rad+1): for r3 in range(2*self.rad+1): if(cityblock((r1, r2, r3), (self.rad, self.rad, self.rad)) <= self.rad): self.mask_[r1, r2, r3] = True
def __calculateStatistics(self, result): """ Create the RefDB and some statistics :param result: :return: """ """ Set the center sequence for each cluster """ for cluster in result.cluster_set.all(): # store the cluster representative min = None sequence = None statistics = { 'dists-variance' : 0, 'dists-amax' : 0, 'dists-average' : 0, 'size' : cluster.sequences.count(), 'radius' : 0 } if cluster.sequences: distances = [] for sequence in cluster.sequences.all(): dist = cityblock(sequence.dna, cluster.centerMean) if dist > statistics['radius']: statistics['radius'] = dist if (dist < min) or (min == None): min = dist sequence = sequence distances.append(dist) statistics['dists-variance'] = np.var(distances) statistics['dists-amax'] = np.amax(distances) statistics['dists-average'] = np.average(distances) cluster.statistics = statistics cluster.representative = sequence cluster.save()
def compare_segments_image(seg1, seg2, slen, cwl): #return 2 #for i in range(slen): # seg1[:,i:60+i] #tmp=seg1[:,5:55] # seg is 50 by 10 #for i in range(slen): #print 'template size',seg2.shape #print 'template',seg2[:,5:55] #print citblock(seg1[:,0:50],seg2[:,5:55]) dist=[] #print seg2.shape #print seg1.shape for i in range(10): #print 'seg 1 shape',a.shape dist.append(cityblock(seg1[:,i:50+i].flatten(),seg2.flatten())) return min(dist)
def compHist(hist1, hist2, method, formula): """Compare two histograms with given method and formula. Parameters ---------- hist1 : 1D array The first histogram hist2 : 1D array The second histogram method : str(cv integer) Options for method ('cv_comp', 'scipy_comp', 'kl_div') formula: str(cv integer) Options for formula. For method == 'cv_comp' (cv.CV_COMP_CORREL, cv.CV_COMP_CHISQR, cv.CV_COMP_INTERSECT, cv.CV_COMP_BHATTACHARYYA) For method == 'scipy_comp' ("Euclidean", "Manhattan", "Chebysev") """ ## using opencv if method == 'cv_comp': dis = cv2.compareHist(np.float32(hist1), np.float32(hist2), formula) if formula == cv.CV_COMP_CORREL: dis = -dis + 1 ## using Scipy distance metrics if method == 'scipy_comp': if formula == 'Euclidean': dis = dist.euclidean(hist1, hist2) if formula == 'Manhattan': dis = dist.cityblock(hist1, hist2) if formula == 'Chebysev': dis = dist.chebyshev(hist1, hist2) ## using KL divergence hist1 = np.float32(hist1) + 1 hist2 = np.float32(hist2) + 1 if method == 'kl_div': kbp = np.sum(hist1 * np.log(hist1 / hist2), 0) kbq = np.sum(hist2 * np.log(hist2 / hist1), 0) dis = np.double(kbp + kbq)/2 return dis
def determineDistance(point1, point2, set_type=distance_type): ''' This function determines the distance between two points in any number of dimensions. As such, it can also be used to determine the radius of a preference circle (by inputting a point and the status quo). ''' if preference_shape == 'ellipse': point2 = (point2[0], point2[1]) if set_type == 'pyth': # determines distance between two points using Pythagorean theorem distance = dist.euclidean(point1, point2) elif set_type == 'city-block': # determines the city-block or Manhattan distance between two points distance = dist.cityblock(point1, point2) return distance
def find_block_types(world, type_, player='Player', sort=True, chunks_to_search=9, limit=0): px, py, pz = world.getPlayerPosition(player=player) type_id = world.__getattribute__('materials').__getattribute__(type_).ID blocks = [] try: for chunk in get_surrounding_chunks(world, num=chunks_to_search): cx, cz = chunk.chunkPosition mask = (chunk.Blocks == type_id) for bx, bz, y in get_block_pos_from_mask(mask): x = bx + (cx << 4) z = bz + (cz << 4) distance = cityblock((px, pz, py), (x, z, y)) blocks.append((x, z, y, int(distance))) except ChunkNotPresent: pass if not blocks: return [] if sort: blocks.sort(key=lambda tup: tup[-1]) return blocks[0:min(len(blocks) - 1, limit - 1)]
def get(self): args = self.parser.parse_args() node1, node2 = args['node1'], args['node2'] nodeType1, nodeType2 = nodes[node1], nodes[node2] nodeKey1, nodeKey2 = args[nodeType1 + '1'], args[nodeType2 + '2'] result1 = self.getVector(node1, nodeType1, nodeKey1) try: assert len(result1) > 0 except: raise ValueError("%s does not exist in database" % node1) result2 = self.getVector(node2, nodeType2, nodeKey2) try: assert len(result2) > 0 except: raise ValueError("%s does not exist in database" % node2) result1 = np.array(map(float, result1[0][self.getVecName()].split(' '))) result2 = np.array(map(float, result2[0][self.getVecName()].split(' '))) diff = ','.join(map(str, result1 - result2)) inner = np.inner(result1, result2) l1, l2, cos = cityblock(result1, result2), euclidean(result1, result2), cosine(result1, result2) return json.dumps({'Difference': diff, 'Manhattan Distance': l1, 'Euclidean Distance': l2, 'Cosine Distance': cos, 'Inner Product': inner})
def assignPoints(self, centroids): # centroids # l = len(centroids) # for i in range(0,l): # centroids[i]+=random.random() #set to True when there is a change in assigning points to clusters(centroids) changed = False assignedCentroids = pd.DataFrame() for i in self.df.index: distances = {} for c in centroids.index: if self.metric == "euclidean": x = self.myEuclidean(self.df.loc[c], self.df.loc[i]) if self.metric == "chebyshev": x = chebyshev(self.df.loc[c], self.df.loc[i]) if self.metric == "cityblock": x = cityblock(self.df.loc[c], self.df.loc[i]) # print"i: "+str(i)+" c: "+str(c) # print "self.df.loc[i]: "+ str(self.df.loc[i]) # print "self.df.loc[c]: "+ str(self.df.loc[c]) # print "x: "+str(x) #dictionary that stores centroid as a key and distance between point and centroid as a value distances[c] = x # find the minimum by comparing the second element of each tuple (values) m=min(distances.items(), key=lambda x: x[1]) #m[0] is a key of a min value in a dictionary, so m[0] is centroid # point i 'belongs' to centroid m[0] # if not assignedCentroids.at[i,'centroids']==m[0]: #if centroid is changed # changed=True # changed=True # assignedCentroids.at[i,'centroids']=m[0] assignedCentroids.at[i]=m[0] # print "centroidyyyyyyyyyy" # print assignedCentroids return (assignedCentroids, changed)
def metrykaManhattan(self, array1, array2): """ Computes the Manhattan distance between two n-vectors u and v, which is defined as .. math:: \\sum_i {\\left| u_i - v_i \\right|}. Parameters ---------- u : ndarray An :math:`n`-dimensional vector. v : ndarray An :math:`n`-dimensional vector. Returns ------- d : double The City Block distance between vectors ``u`` and ``v``. """ # od = abs(xa-xb) + abs(ya-yb) return cityblock(array1,array2)
def main(): print "# KNN Classifier" parser = ld.parse_arguments() # priting args print '\t-k = ' + str(parser.k) print '\t-d = ' + parser.distance stopwords = None if parser.stopwords_path: stopwords = ld.load_stopwords(parser.stopwords_path) voc = load_vocabulary(parser.train_path, stopwords) answers = load_answers(parser.train_path) train = transform(voc, parser.train_path) test = transform(voc, parser.test_path) # output file out_path = '../results/' + parser.distance + '_' + str(parser.k) out_path += '.txt' out_file = open(out_path, 'w') for point in test: neighbors = [] for i in xrange(len(train)): neigh = train[i] distance = 0.0 if parser.distance == 'cosine': distance = spd.cosine(neigh, point) elif parser.distance == 'jaccard': distance = spd.jaccard(neigh, point) elif parser.distance == 'euclidean': distance = spd.euclidean(neigh, point) elif parser.distance == 'dice': distance = spd.dice(neigh, point) elif parser.distance == 'correlation': distance = spd.correlation(neigh, point) elif parser.distance == 'manhattan': distance = spd.cityblock(neigh, point) else: print >> stderr, "ERRO! - Distância informada inválida." exit() tup = (distance, i) heapq.heappush(neighbors, tup) # return the highest k similar points top_k = heapq.nsmallest(parser.k, neighbors) # classifing classification = np.zeros(2) for (_, idi) in top_k: classe = answers[idi] classification[int(classe)] += 1 # outputing classification if(classification[0] >= classification[1]): print >> out_file, '0' print '0' else: print >> out_file, '1' print '1' # outputing the results' print print "# Resultados salvos no arquivo: " + out_path out_file.close() result.result("../data/imdb_test", out_path)
def _build_phase_data(self): profile = pyscarphase.proto.meta.load_profile(self.args.profile) thread = profile.threads[self.args.thread] reader = pyscarphase.proto.data.DataReader( thread.profile.filename, uuid=thread.profile.uuid ) phases = {} for w in reader: # pid = w.phase_info.phase # if not pid in phases: phases[pid] = self.Phase(pid) phases[pid].centroid = \ np.zeros(len(w.phase_info.signature.fv_values)) # phases[pid].centroid = \ np.add( phases[pid].centroid, w.phase_info.signature.fv_values[:] ) # Normalize centroid for p in phases.itervalues(): p.centroid = np.linalg.norm(p.centroid, 1) # Do second pass reader.seek(0) offset = 0 for w in reader: # pid = w.phase_info.phase # Calc distance d = spd.cityblock( phases[pid].centroid, w.phase_info.signature.fv_values[:] ) # phases[pid].windows.append((offset, w.size, d)) # offset += w.size # Order phases in descending length phases = sorted( phases.itervalues(), key=lambda p: len(p.windows), reverse=True ) # return phases
#print "[+] Matrix in use is: \n", a #print "===================================================================" normA = normalize(a, norm='l1') temp_max = np.zeros(k) temp_min = np.zeros(k) min_array = np.zeros(k) max_array = np.zeros(k) r = np.zeros(k) for i in range(k): temp_min[i] = sys.maxint temp_max[i] = -1 for i in range(k): for j in range(k): if i != j: if (dist.cityblock(normA[i],normA[j]) < temp_min[i]): min_array[i] = dist.cityblock(normA[i],normA[j]) temp_min[i] = min_array[i] if (dist.cityblock(normA[i],normA[j]) > temp_max[i]): max_array[i] = dist.cityblock(normA[i],normA[j]) temp_max[i] = max_array[i] for i in range(k): r[i] = min_array[i]/max_array[i] #print "[+] Min distances are: \n", min_array #print "===================================================================" #print "[+] Max distances are: \n", max_array #print "===================================================================" #print "[+] Ratios are: \n", r print "*******************************************************************"
Qcosines=cosine_similarity(QuestionTVectorArray[0:1],QuestionTVectorArray) Acosines=cosine_similarity(AnswerTVectorArray[0:1],AnswerTVectorArray) Qbray=[dist.braycurtis(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Abray=[dist.braycurtis(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qcanberra=[dist.canberra(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Acanberra=[dist.canberra(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qhamming=[dist.hamming(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Ahamming=[dist.hamming(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qcorrelation=[dist.correlation(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Acorrelation=[dist.correlation(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qcityblock=[dist.cityblock(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Acityblock=[dist.cityblock(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qdice=[dist.dice(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Adice=[dist.dice(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] Qyule=[dist.yule(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray] Ayule=[dist.yule(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray] #C_Q=np.histogram2d(QuestionTVectorArray[1],QuestionTVectorArray[1])[0] #print "question mutual info-->",mutual_info_score(None,None,contigency=C_Q)#QuestionTVectorArray[0:1],QuestionTVectorArray) #QuestionVectorArray=Qvectorizer.fit_transform(all_questions).toarray() #AnswerVectorArray=Avectorizer.fit_transform(all_answers).toarray() #QUserinputVectorArray=Qvectorizer.transform(userinput).toarray()
def cityblock((x, y)): return distance.cityblock(x, y)
def wvCity(a): return [distance.cityblock(x[0], x[1]) for x in a]