def cal_cost_matrix_with_later_pos(agents, targets, other_costs, later_pos): k = len(agents) distance_matrix = np.zeros((k, k), dtype=float) for i in range(k): for j in range(k): distance_matrix[i, j] = distance(agents[i][1], agents[i][0], targets[j][1], targets[j][0]) + \ distance(targets[j][1], targets[j][0], later_pos[i][1], later_pos[i][0]) + \ other_costs[i] return distance_matrix
def staypoint_detection(self, tr, dr): """ 根据points挖掘staypoints """ point_num = len(self.points) i = 0 while i < point_num: if point_num - i < 2: # 只剩下一个点或没有点 return j = i + 1 for j in range(i + 1, point_num): dist = distance(self.points[i], self.points[j]) if dist > dr: break if j == i + 1: i += 1 continue elif 2 <= j <= point_num - 2 or (j == point_num - 1 and distance(self.points[i], self.points[j]) > dr): duration = interval_points(self.points[i], self.points[j-1]) if duration >= tr: lng = 0 lat = 0 for k in range(i, j): lng += self.points[k].lng lat += self.points[k].lat lng /= (j - i) # 起点i, 终点j-1 lat /= (j - i) arrival_time = self.points[i].timestamp leaving_time = self.points[j-1].timestamp staypoint = StayPoint(lng, lat, arrival_time, leaving_time) self.staypoints.append(staypoint) i = j continue else: i += 1 continue else: # j == point_num - 1 and distance(self.points[i], self.points[j]) <= dr 防止路径最后的若干点全部被浪费 duration = interval_points(self.points[i], self.points[j]) if duration >= tr: lng = 0 lat = 0 for k in range(i, j + 1): lng += self.points[k].lng lat += self.points[k].lat lng /= (j - i + 1) lat /= (j - i + 1) arrival_time = self.points[i].timestamp leaving_time = self.points[j].timestamp staypoint = StayPoint(lng, lat, arrival_time, leaving_time) self.staypoints.append(staypoint) return else: i += 1 continue
def __init__(self, agent, already_spent_cost, seq_targets): self.agent = agent self.already_spent_cost = already_spent_cost self.seq_targets = seq_targets cost = distance(self.agent[1], self.agent[0], self.seq_targets[0][1], self.seq_targets[0][0]) for i in range(len(self.seq_targets) - 1): cost += distance(self.seq_targets[i][1], self.seq_targets[i][0], self.seq_targets[i + 1][1], self.seq_targets[i + 1][0]) self.cost = cost + self.already_spent_cost
def update_unit_costs(unit_costs, agents, seq_targets, assignment, changed_idx): k = len(agents) T = len(seq_targets) for j in range(k): if changed_idx == 0: pre_loc = agents[j] else: pre_loc = seq_targets[changed_idx - 1][assignment[changed_idx - 1, j]] cur_loc = seq_targets[changed_idx][assignment[changed_idx, j]] unit_costs[j, changed_idx] = distance(pre_loc[1], pre_loc[0], cur_loc[1], cur_loc[0]) # if the changed arc is not the last arc, recompute the next unit costs if changed_idx != (T - 1): pre_loc = cur_loc cur_loc = seq_targets[changed_idx + 1][assignment[changed_idx + 1, j]] unit_costs[j, changed_idx + 1] = distance(pre_loc[1], pre_loc[0], cur_loc[1], cur_loc[0]) return unit_costs
def solve_mbap(agents, seq_targets, already_spent_costs): """ Solve the multi-level bottleneck assignment problem. A multi-level bottleneck assignment approach to the bus drivers' rostering problem :param agents: k agents, each agent is in a position :param seq_targets: two-dim array, T x k :param already_spent_costs: already spent costs of k agents :return: minmaxcost, a matrix, T x k, index is agent index, each row is target location index """ k = len(agents) T = len(seq_targets) opt_assignment = -1 * np.ones((T, k), dtype=int) opt_cost = 0 # phase 1, init assign # print('phase 1...') pre_costs = already_spent_costs.copy() pre_pos = agents.copy() for i in range(T): c_i, assign_i = solve_bap(pre_pos, seq_targets[i], pre_costs) opt_cost = c_i for j in range(k): cur_pos = seq_targets[i][assign_i[j]] # update opt_assignment opt_assignment[i, j] = assign_i[j] # update pre_costs (the cost from seq_targets[i-1] to seq_targets[i]) pre_costs[j] += distance(pre_pos[j][1], pre_pos[j][0], cur_pos[1], cur_pos[0]) # update agent pre_pos pre_pos[j] = cur_pos # print('phase 1 opt cost: ' + str(opt_cost)) # phase 2, iterate assign until coverage # print('phase 2...') unit_costs = cal_unit_costs(opt_assignment, agents, seq_targets) assignment_unstable = True while assignment_unstable: assignment_unstable = False for i in range(T): other_costs = cal_other_costs(i, unit_costs) + already_spent_costs if i == 0: pre_pos = agents.copy() else: for j in range(k): pre_pos[j] = seq_targets[i - 1][opt_assignment[i - 1, j]] later_pos = None if i != (T - 1): later_pos = [] for j in range(k): later_pos.append(seq_targets[i + 1][opt_assignment[i + 1, j]]) c_i, assign_i = solve_bap(pre_pos, seq_targets[i], other_costs, later_pos) if c_i < opt_cost: assignment_unstable = True opt_cost = c_i for j in range(k): opt_assignment[i, j] = assign_i[j] unit_costs = update_unit_costs(unit_costs, agents, seq_targets, opt_assignment, i) # print('phase 2 opt cost:' + str(opt_cost) + ', current cost:' + str(c_i)) return opt_cost, opt_assignment
def contrastive_loss(output, label, margin=2): '''contrastive loss - Deep Supervised Hashing for Fast Image Retrieval ''' batch_size = output.shape[0] S = torch.mm(label.float(), label.float().t()) dist = distance(output, dist_type='euclidean2') loss_1 = S * dist + (1 - S) * torch.max(margin - dist, torch.zeros_like(dist)) loss = torch.sum(loss_1) / (batch_size * (batch_size - 1)) return loss
def get_candidate_locations_ellipse(f1, f2, major_axis, row_num, col_num): """ get candidate locations within ellipse :param f1: focus point :param f2: focus point :param major_axis: :param row_num: :param col_num: :return: """ f1_y, f1_x = f1 f2_y, f2_x = f2 a = major_axis / 2.0 c = distance(f1_x, f1_y, f2_x, f2_y) / 2.0 b = np.sqrt(a * a - c * c) major_bound_1 = ((f2_x - f1_x) * (a + c) / (2 * c) + f1_x, (f2_y - f1_y) * (a + c) / (2 * c) + f1_y) major_bound_2 = ((f1_x - f2_x) * (a + c) / (2 * c) + f2_x, (f1_y - f2_y) * (a + c) / (2 * c) + f2_y) delta_x = b * (abs(major_bound_1[1] - major_bound_2[1])) / (2 * a) if f1_x == f2_x: delta_y = 0 else: delta_y = np.sqrt(b * b - delta_x * delta_x) p1 = (major_bound_1[0] + delta_x, major_bound_1[1] - delta_y) p2 = (major_bound_1[0] - delta_x, major_bound_1[1] + delta_y) p3 = (major_bound_2[0] - delta_x, major_bound_2[1] + delta_y) p4 = (major_bound_2[0] + delta_x, major_bound_2[1] - delta_y) min_x = int(max(min([p1[0], p2[0], p3[0], p4[0]]), 0)) max_x = int(min(max([p1[0], p2[0], p3[0], p4[0]]), col_num - 1)) min_y = int(max(min([p1[1], p2[1], p3[1], p4[1]]), 0)) max_y = int(min(max([p1[1], p2[1], p3[1], p4[1]]), row_num - 1)) candidates = [] for x in range(min_x, max_x + 1): for y in range(min_y, max_y + 1): if distance(f1_x, f1_y, x, y) + distance(x, y, f2_x, f2_y) < major_axis: candidates.append((y, x)) return candidates
def cal_unit_costs(assignment, agents, seq_targets): k = len(agents) T = len(seq_targets) unit_costs = np.zeros((k, T), dtype=float) for j in range(k): for i in range(T): if i == 0: pre_loc = agents[j] else: pre_loc = seq_targets[i - 1][assignment[i - 1, j]] cur_loc = seq_targets[i][assignment[i, j]] unit_costs[j, i] = distance(pre_loc[1], pre_loc[0], cur_loc[1], cur_loc[0]) return unit_costs
def get_cities_in_radius(radius, origin): cities = get_data(); cities_in_radius = [] for city in cities: dist = distance(origin['lat'], origin['lon'], city['lat'], city['lon']) #print('distance from Dublin to {:s} is {:f}'.format(city['city'], dist)) if dist <= radius: cities_in_radius.append(city) city_names = map_city_names(cities_in_radius) sorted_names = sorted(city_names) print('Cities in a {:d} km radius from Dublin:'.format(radius)) print('------------------------------------------') for name in sorted_names: print(name)
def get_candidate_locations(cur_location, radius, row_num, col_num): """ get candidate locations within distance :param cur_location: :param distance: :param row_num: :param col_num :return: """ cur_y, cur_x = cur_location delta = int(radius) max_x = cur_x + delta if cur_x + delta < col_num else col_num - 1 min_x = cur_x - delta if cur_x - delta >= 0 else 0 max_y = cur_y + delta if cur_y + delta < row_num else row_num - 1 min_y = cur_y - delta if cur_y - delta >= 0 else 0 candidates = [] for x in range(min_x, max_x + 1): for y in range(min_y, max_y + 1): if distance(cur_x, cur_y, x, y) < radius: candidates.append((y, x)) return candidates
def exp_loss(output, label, wordvec=None, alpha=5.0, balanced=False): '''exponential loss ''' batch_size, bit = output.shape mask = (torch.eye(batch_size) == 0).to(torch.device("cuda")) S = torch.mm(label.float(), label.float().t()) S_m = torch.masked_select(S, mask) wordvec_u = torch.mm(label.float(), wordvec) W = distance(wordvec_u, dist_type='cosine') W_m = torch.masked_select(W, mask) ## inner product # balance = True # ip = torch.mm(output, output.t()) / 32 # ip = F.linear(F.normalize(output), F.normalize(output)) # ip_m = torch.masked_select(ip, mask) # loss_1 = (S_m - ip_m) ** 2 ## sigmoid # D = distance(output, dist_type='cosine') # E = torch.log(1 + torch.exp(-alpha * (1-2*D))) # E_m = torch.masked_select(E, mask) # loss_1 = 10 * S_m * E_m + (1 - S_m) * (E_m - torch.log((torch.exp(E_m) - 1).clamp(1e-6))) ## baseline balanced = True alpha_1 = 8 alpha_2 = 8 m1 = 0 m2 = 0 scale = 1 dot_product = torch.mm(output, output.t()) / 32 E1 = torch.log(1 + torch.exp(-alpha_1 * (dot_product - m1))) E1_m = torch.masked_select(E1, mask) loss_s1 = scale * S_m * E1_m E2 = torch.log(1 + torch.exp(-alpha_2 * (dot_product - m2))) E2_m = torch.masked_select(E2, mask) loss_s0 = (1 - S_m) * (E2_m - torch.log((torch.exp(E2_m) - 1).clamp(1e-6))) loss_1 = loss_s1 + loss_s0 # print(f'max:{dot_product.max().item():.4f} min:{dot_product.min().item():.4f}') print('loss_s1:{:.4f} loss_s0:{:.4f}'.format(loss_s1.sum().item(), loss_s0.sum().item())) ## hyper sigmoid # alpha = 9 # belta = 20 # gamma = 1.5 # margin = 0.25 # D = distance(output, dist_type='cosine') # E1 = torch.log(1 + torch.exp(-alpha * (1-gamma*2*D))) # E1_m = torch.masked_select(E1, mask) # loss_s1 = S_m * E1_m # E2 = torch.log(1 + torch.exp(-alpha * (1-gamma*2*(D-margin)))) # E2_m = torch.masked_select(E2, mask) # loss_s0 = (1 - S_m) * (E2_m - torch.log((torch.exp(E2_m) - 1)).clamp(1e-6)) # loss_1 = belta * loss_s1 + loss_s0 ## margin hash # D = distance(output, dist_type='cosine') # E1 = torch.exp(2* D) - 1 # E2 = torch.exp(2 * (1 - D)) - 1 # E1_m = torch.masked_select(E1, mask) # E2_m = torch.masked_select(E2, mask) # loss_1 = S_m * E1_m + (1 - S_m) * E2_m if balanced: S_all = batch_size * (batch_size - 1) S_1 = torch.sum(S) balance_param = (S_all / S_1) * S + (1 - S) B_m = torch.masked_select(balance_param, mask) loss_1 = B_m * loss_1 loss = torch.mean(loss_1) return loss
test_df['q2_tfidf'] = tfidf(test_df['question2']) # 3. Find length of longest question and equal all other questions to it train_longest = longest_question(train_df['q1_feats'], train_df['q2_feats']) test_longest = longest_question(test_df['q1_feats'], test_df['q2_feats']) train_questions1 = sequence.pad_sequences(train_df['q1_feats'], train_longest) train_questions2 = sequence.pad_sequences(train_df['q2_feats'], train_longest) test_questions1 = sequence.pad_sequences(test_df['q1_feats'], test_longest) test_questions2 = sequence.pad_sequences(test_df['q2_feats'], test_longest) # 4. Calculate features x_train = pd.DataFrame() x_test = pd.DataFrame() x_train['euclidean'] = distance(train_df['q1_tfidf'], train_df['q2_tfidf'], 'euclidean') #x_train['cos_similarity'] = distance(train_df['q1_tfidf'], train_df['q2_tfidf'], 'cos_similarity') x_train['word_share'] = train_df.apply(word_match_share, axis=1, raw=True) x_test['euclidean'] = distance(train_df['q1_tfidf'], train_df['q1_tfidf'], 'euclidean') #x_test['cos_similarity'] = distance(test_df['q1_tfidf'], test_df['q2_tfidf'], 'cos_similarity') x_test['word_share'] = test_df.apply(word_match_share, axis=1, raw=True) # 5. XGBoost X_train, X_test, y_train, y_test = train_test_split( x_train, train_duplicate['is_duplicate'].values, test_size=0.2, random_state=0)