def testing(trajectories, alpha, max_r): curr_r = alpha while curr_r <= max_r: chord_l = math.sqrt(4 * alpha * curr_r - 2 * alpha * alpha) sample = [pyscan.grid_direc_kernel(pyscan.dp_compress(traj, alpha), chord_l, alpha) for traj in trajectories] pts = list(itertools.chain.from_iterable(sample)) print("Grid Directional radius = {0:.4f} : {1:.4f} ".format(curr_r * 3000, len(pts) / len(trajectories))) curr_r *= 2 sample = [pyscan.grid_kernel(pyscan.dp_compress(traj, alpha), alpha) for traj in trajectories] pts = list(itertools.chain.from_iterable(sample)) print("Grid : {0:.2f}".format(len(pts) / len(trajectories))) sample = [pyscan.halfplane_kernel(pyscan.dp_compress(traj, alpha), alpha) for traj in trajectories] pts = list(itertools.chain.from_iterable(sample)) print("Halfplane : {0:.2f}".format(len(pts) / len(trajectories))) sample = [pyscan.dp_compress(traj, alpha) for traj in trajectories] pts = list(itertools.chain.from_iterable(sample)) print("DP : {0:.2f}".format(len(pts) / len(trajectories))) # sample = [pyscan.lifting_kernel(pyscan.dp_compress(traj, alpha), .01) for traj in trajectories] # pts = list(itertools.chain.from_iterable(sample)) # print("Lifting : {0:.2f}".format(len(pts) / len(trajectories))) sample = [pyscan.convex_hull([pyscan.Point(pt[0], pt[1], 1.0) for pt in traj]) for traj in trajectories] pts = list(itertools.chain.from_iterable(sample)) print("Hull : {0:.2f}".format(len(pts) / len(trajectories))) sample = [pyscan.even_sample_error([pyscan.Point(pt[0], pt[1], 1.0) for pt in traj], alpha, False) for traj in trajectories] pts = list(itertools.chain.from_iterable(sample)) print("Even : {0:.2f}".format(len(pts) / len(trajectories)))
def normalize_all_projection(traces): mxx = max(list(itertools.chain(*[trace for trace in traces])), key=lambda x: x[0]) mnx = min(list(itertools.chain(*[trace for trace in traces])), key=lambda x: x[0]) mxy = max(list(itertools.chain(*[trace for trace in traces])), key=lambda x: x[1]) mny = min(list(itertools.chain(*[trace for trace in traces])), key=lambda x: x[1]) proj_traces = [] for trace in traces: proj_trace = [] for pt in trace: x, y = equirectangular_projection_box(pt[0], pt[1], mnx, mxx, mny) proj_trace.append(normalize((x, y), mxx, mnx, mxy, mny)) proj_traces.append(proj_trace) mxx = max(list(itertools.chain(*[trace for trace in proj_traces])), key=lambda x: x[0]) mnx = min(list(itertools.chain(*[trace for trace in proj_traces])), key=lambda x: x[0]) mxy = max(list(itertools.chain(*[trace for trace in proj_traces])), key=lambda x: x[1]) mny = min(list(itertools.chain(*[trace for trace in proj_traces])), key=lambda x: x[1]) norm_traces = [] for trace in proj_traces: norm_trace = [] for pt in trace: x, y = pt norm_trace.append( normalize(pyscan.Point(x, y, 1.0), mxx, mnx, mxy, mny)) norm_traces.append(norm_trace) return norm_traces
def testing_geometric_error(trajectories, alpha, max_r, count): random.shuffle(trajectories) trajectories = trajectories[:30] print("got here") curr_r = alpha while curr_r < max_r: chord_l = math.sqrt(4 * alpha * curr_r - 2 * alpha * alpha) sample = [pyscan.grid_direc_kernel(pyscan.dp_compress(traj, alpha), chord_l, alpha) for traj in trajectories] for error, traj in zip(test_halfspace_error(trajectories, sample), trajectories): if error > alpha: print(error) print(traj) #print("Grid Direc Error {} {}".format(i, post_process_error(test_halfspace_error(trajectories, sample)))) sample = [pyscan.grid_kernel(pyscan.dp_compress(traj, alpha), alpha) for traj in trajectories] print("Grid : {}".format(post_process_error(test_halfspace_error(trajectories, sample)))) sample = [pyscan.halfplane_kernel(pyscan.dp_compress(traj, alpha), alpha) for traj in trajectories] print("Halfplane : {}".format( post_process_error(test_halfspace_error(trajectories, sample)))) sample = [pyscan.dp_compress(traj, alpha) for traj in trajectories] print("DP Error: {}".format(post_process_error(test_halfspace_error(trajectories, sample)))) sample = [pyscan.convex_hull([pyscan.Point(pt[0], pt[1], 1.0) for pt in traj]) for traj in trajectories] print("Hull Error: {}".format( post_process_error(test_halfspace_error(trajectories, sample)))) sample = [pyscan.even_sample_error(pyscan.dp_compress(traj, alpha), alpha) for traj in trajectories] print("Even : {}".format(post_process_error(test_halfspace_error(trajectories, sample))))
def traj_to_labels(traj): ix = 0 pts = [] labels = [] for trace in traj: pts.extend([pyscan.Point(pt[0], pt[1], 1) for pt in trace]) labels.extend([ix for _ in trace]) ix += 1 return pts, labels
def alpha_simplification(alpha, trace, include_end_points=False): new_trace = [] curr_alpha = 0 for lpt, rpt in zip(trace, trace[1:]): curr_pt_seg = dist(lpt, rpt) while curr_pt_seg + curr_alpha > alpha: curr_pt_seg -= alpha - curr_alpha a = curr_pt_seg / dist(lpt, rpt) new_trace.append( pyscan.Point(lpt[0] * a + rpt[0] * (1 - a), lpt[1] * a + rpt[1] * (1 - a), 1.0)) curr_alpha = 0 curr_alpha += curr_pt_seg if include_end_points: new_trace.append(pyscan.Point(lpt[0], lpt[1], 1.0)) if include_end_points: new_trace.append(pyscan.Point(trace[-1][0], trace[-1][1], 1.0)) return new_trace
def read_geolife_files(count): traj_set = list(only_plt('/data/Trajectory_Sets/Geolife Trajectories 1.3')) #print(len(traj_set)) trajectory_files = random.sample(traj_set, min(count, len(traj_set))) all_traces = [] for fname in trajectory_files: with open(fname, 'r') as f: ix = 0 for line in f: ix += 1 if ix >= 6: break reader = csv.reader(f) trace = [] for row in reader: trace.append(pyscan.Point(float(row[0]), float(row[1]), 1)) all_traces.append(trace) normed_traces = clean(all_traces) return normed_traces
def testing_flux_framework(output_file, red, blue, l_s, h_s, count, region_name="disk", two_level_sample=True, ham_sample=False, max_time=None): fieldnames = [ "disc", "region", "n", "s", "time", "m_disc", "m_disc_approx" ] with open(output_file, 'w') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for i in np.logspace(l_s, h_s, count): eps = i n = 1 / eps s = 1 / (2 * eps * eps) n = int(round(n) + .1) s = int(round(s) + .1) disc = utils.disc_to_func("disc") start_time = time.time() m_sample = [ pyscan.WPoint(1.0, p[0], p[1], 1.0) for p in pyscan.my_sample(red, s) ] b_sample = [ pyscan.WPoint(1.0, p[0], p[1], 1.0) for p in pyscan.my_sample(blue, s) ] if two_level_sample: net_set1 = pyscan.my_sample(m_sample, n) net_set2 = pyscan.my_sample(b_sample, n) if ham_sample: s = int(1 / (2 * eps**(4.0 / 3)) * math.log(1 / eps)**(2 / 3.0)) m_sample = pyscan.ham_tree_sample(m_sample, s) b_sample = pyscan.ham_tree_sample(b_sample, s) else: net_set1 = [pyscan.Point(p[0], p[1], p[2]) for p in m_sample] net_set2 = [pyscan.Point(p[0], p[1], p[2]) for p in b_sample] n = s net_set1 = [pyscan.Point(p[0], p[1], p[2]) for p in net_set1] net_set2 = [pyscan.Point(p[0], p[1], p[2]) for p in net_set2] net_set = net_set1 + net_set2 if region_name == "halfplane": reg, mx = pyscan.max_halfplane(net_set, m_sample, b_sample, disc) elif region_name == "disk": reg, mx = pyscan.max_disk(net_set, m_sample, b_sample, disc) elif region_name == "rectangle": grid = pyscan.Grid(n, m_sample, b_sample) s1 = pyscan.max_subgrid_linear(grid, -1.0, 1.0) s2 = pyscan.max_subgrid_linear(grid, 1.0, -1.0) if s1.fValue() > s2.fValue(): reg = grid.toRectangle(s1) mx = s1.fValue() else: reg = grid.toRectangle(s2) mx = s2.fValue() else: return end_time = time.time() st = time.time() actual_mx = pyscan.evaluate_range(reg, red, blue, disc) et = time.time() print("Time to evaluate region {}".format(et - st)) row = { "disc": "disc", "region": region_name, "n": n, "s": s, "time": end_time - start_time, "m_disc_approx": mx, "m_disc": actual_mx } writer.writerow(row) f.flush() print(row) if max_time is not None and end_time - start_time > max_time: return
def testing_partial_framework(red, blue, output_file, l_s, h_s, count, r=.04, p=0.5, q=.2, error_thresh=3, two_level_sample=True, ham_sample=True, disc_name="disc", region_name="disk", sample_method="block", max_time=None): """ How do I convert the trajectories over? 1) Just sample evenly from the length. 2) Choose points evenly 3) Choose :param trajectories: :param l_s: :param h_s: :param count: :param vparam: :param eps: :param eps_r: :param r: :param q: :param disc_name: :param region_name: :param input_size: :return: """ fieldnames = [ "disc", "region", "n", "s", "r", "p", "q", "time", "m_disc", "m_disc_approx", "sample_method" ] with open(output_file, 'w') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() disc = utils.disc_to_func(disc_name) s_prime = int(10**(2 * error_thresh) + .5) m_sample_prime = pyscan.uniform_sample(red, s_prime, False) b_sample_prime = pyscan.uniform_sample(blue, s_prime, False) for i in np.logspace(l_s, h_s, count): eps = i n = 1 / eps s = 1 / (2 * eps * eps) n = int(round(n) + .1) s = int(round(s) + .1) start_time = time.time() if sample_method == "block": f_sample = pyscan.block_sample elif sample_method == "even": f_sample = pyscan.even_sample elif sample_method == "uniform": f_sample = pyscan.uniform_sample m_sample = f_sample(red, s, False) b_sample = f_sample(blue, s, False) m_sample = pyscan.to_weighted(m_sample) b_sample = pyscan.to_weighted(b_sample) if two_level_sample: net_set1 = pyscan.my_sample(m_sample, n) net_set2 = pyscan.my_sample(b_sample, n) if ham_sample: s = int(1 / (2 * eps**(4.0 / 3)) * math.log(1 / eps)**(2 / 3.0)) m_sample = pyscan.ham_tree_sample(m_sample, s) b_sample = pyscan.ham_tree_sample(b_sample, s) else: net_set1 = [pyscan.Point(p[0], p[1], p[2]) for p in m_sample] net_set2 = [pyscan.Point(p[0], p[1], p[2]) for p in b_sample] n = s net_set1 = [pyscan.Point(p[0], p[1], p[2]) for p in net_set1] net_set2 = [pyscan.Point(p[0], p[1], p[2]) for p in net_set2] net_set = net_set1 + net_set2 if region_name == "halfplane": reg, mx = pyscan.max_halfplane(net_set, m_sample, b_sample, disc) elif region_name == "disk": reg, mx = pyscan.max_disk(net_set, m_sample, b_sample, disc) elif region_name == "rectangle": grid = pyscan.Grid(n, m_sample, b_sample) s1 = pyscan.max_subgrid_linear(grid, -1.0, 1.0) s2 = pyscan.max_subgrid_linear(grid, 1.0, -1.0) if s1.fValue() > s2.fValue(): reg = grid.toRectangle(s1) mx = s1.fValue() else: reg = grid.toRectangle(s2) mx = s2.fValue() else: return end_time = time.time() actual_mx = pyscan.evaluate_range( reg, pyscan.to_weighted(m_sample_prime), pyscan.to_weighted(b_sample_prime), disc) row = { "disc": disc_name, "region": region_name, "n": n, "s": s, "r": r, "q": q, "p": p, "time": end_time - start_time, "m_disc_approx": mx, "m_disc": actual_mx, "sample_method": sample_method } writer.writerow(row) print(row) f.flush() if max_time is not None and end_time - start_time > max_time: return
def rescale(pt): return pyscan.Point((pt[0] - 39.83) / (40.2 - 39.83), (pt[1] - 116) / 0.8, 1.0)
for row in reader: population2017[row['GEO.id2'][-3:]] = int(row['respop72017']) population2010[row['GEO.id2'][-3:]] = int(row['respop72010']) regions = [] weights2017 = [] weights2010 = [] for reg in shape.shapeRecords(): ignore = False for p in reg.shape.points: # remove counties outside of the continental US if not (-124.84 <= p[0] <= -66.9 and 24.396 <= p[1] <= 49.4): ignore = True break if not ignore: weights2010.append( population2010[reg.record[1]]) #reg.record[2], reg.record[5]) weights2017.append( population2017[reg.record[1]]) #reg.record[2], reg.record[5]) regions.append( [pyscan.Point(p[0], p[1], 1.0) for p in reg.shape.points]) disc_f = pyscan.DISC alpha = 0.5 r_min = 1.0 r_max = 4.0 disk, value = pyscan.max_disk_region(regions, regions, weights2010, regions, weights2017, r_min, r_max, alpha, disc_f) print(disk)
def toTraj(pts): return pyscan.Trajectory([pyscan.Point(p[0], p[1], 1.0) for p in pts])
def testing_full_framework( red, blue, output_file, l_s, h_s, count, vparam="eps", eps=.01, alpha=.01, max_disk_r=None, min_disk_r=None, disc_name="disc", region_name="halfplane", sample_method="halfplane", fast_disk = True, two_level_sample=True, max_time=None): """ How do I convert the trajectories over? 1) Just sample evenly from the length. 2) Choose points evenly 3) Choose :param trajectories: :param l_s: :param h_s: :param count: :param vparam: :param eps: :param eps_r: :param r: :param q: :param disc_name: :param region_name: :param input_size: :return: """ fieldnames = ["vparam", "disc", "region", "n", "s", "n_pts", "m_pts", "b_pts", "alpha", "time", "m_disc", "m_disc_approx", "sample_method"] with open(output_file, 'w') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for i in np.logspace(l_s, h_s, count): if vparam == "eps": eps = i elif vparam == "alpha": alpha = i n = 1 / eps s = 1 / (2 * eps * eps) n = int(round(n) + .1) s = int(round(s) + .1) disc = utils.disc_to_func(disc_name) red_sample = pyscan.my_sample(red, s) blue_sample = pyscan.my_sample(blue, s) if two_level_sample: red_net = pyscan.my_sample(red, n) blue_net = pyscan.my_sample(blue, n) else: red_net = red_sample blue_net = blue_sample net = red_net + blue_net print("Running: {} {}".format(n, s)) start_time = time.time() if region_name == "multiscale_disk": if max_disk_r is not None and alpha > max_disk_r: print("Max Disk Radius is greater than alpha") continue reg, mx = multiscale_disk(min_disk_r, max_disk_r, alpha, red_sample, blue_sample, net, disc, fast_disk) m_sample, b_sample, net_set = [], [], [] else: if sample_method == "halfplane": m_sample = [pyscan.halfplane_kernel([pyscan.Point(pt[0], pt[1], 1.0) for pt in traj], alpha) for traj in red_sample] b_sample = [pyscan.halfplane_kernel([pyscan.Point(pt[0], pt[1], 1.0) for pt in traj], alpha) for traj in blue_sample] pt_net = [pyscan.halfplane_kernel([pyscan.Point(pt[0], pt[1], 1.0) for pt in traj], alpha) for traj in net] elif sample_method == "dp": m_sample = [pyscan.dp_compress(traj, alpha) for traj in red_sample] b_sample = [pyscan.dp_compress(traj, alpha) for traj in blue_sample] pt_net = [pyscan.dp_compress(traj, alpha) for traj in net] elif sample_method == "hull": m_sample = [pyscan.convex_hull([pyscan.Point(pt[0], pt[1], 1.0) for pt in traj]) for traj in red_sample] b_sample = [pyscan.convex_hull([pyscan.Point(pt[0], pt[1], 1.0) for pt in traj]) for traj in blue_sample] pt_net = [pyscan.convex_hull([pyscan.Point(pt[0], pt[1], 1.0) for pt in traj]) for traj in net] elif sample_method is None: #just takes the waypoints. m_sample = [[pyscan.Point(pt[0], pt[1], 1.0) for pt in traj] for traj in red_sample] b_sample = [[pyscan.Point(pt[0], pt[1], 1.0) for pt in traj] for traj in blue_sample] pt_net = [[pyscan.Point(pt[0], pt[1], 1.0) for pt in traj] for traj in net] elif sample_method == "grid": m_sample = [pyscan.grid_kernel(traj, alpha) for traj in red_sample] b_sample = [pyscan.grid_kernel(traj, alpha) for traj in blue_sample] pt_net = [pyscan.grid_kernel(traj, alpha) for traj in net] elif sample_method == "lifting": m_sample = [pyscan.lifting_kernel(traj, alpha) for traj in red_sample] b_sample = [pyscan.lifting_kernel(traj, alpha) for traj in blue_sample] pt_net = [pyscan.lifting_kernel(traj, alpha) for traj in net] elif sample_method == "grid_direc": if max_disk_r is not None and alpha > max_disk_r: print("Max Disk Radius is greater than alpha") continue chord_l = math.sqrt(4 * alpha * max(min_disk_r, alpha) - 2 * alpha * alpha) m_sample = [pyscan.grid_direc_kernel(pyscan.dp_compress(traj, alpha), chord_l, alpha) for traj in red_sample] b_sample = [pyscan.grid_direc_kernel(pyscan.dp_compress(traj, alpha), chord_l, alpha) for traj in blue_sample] pt_net = [pyscan.grid_direc_kernel(pyscan.dp_compress(traj, alpha), chord_l, alpha) for traj in net] elif sample_method == "even": m_sample = [pyscan.even_sample_error(traj, alpha, False) for traj in red_sample] b_sample = [pyscan.even_sample_error(traj, alpha, False) for traj in blue_sample] pt_net = [pyscan.even_sample_error(traj, alpha, False) for traj in net] else: return if region_name == "multiscale_disk_fixed": m_sample = list(pyscan.trajectories_to_labels(m_sample)) b_sample = list(pyscan.trajectories_to_labels(b_sample)) net_set = list(pyscan.trajectories_to_labels(pt_net)) reg, mx = multiscale_disk_fixed(min_disk_r, max_disk_r, m_sample, b_sample, net_set, disc, fast_disk) else: m_sample = list(pyscan.trajectories_to_labels(m_sample)) b_sample = list(pyscan.trajectories_to_labels(b_sample)) net_set = list(itertools.chain.from_iterable(pt_net)) if region_name == "halfplane": reg, mx = pyscan.max_halfplane_labeled(net_set, m_sample, b_sample, disc) elif region_name == "disk": reg, mx = pyscan.max_disk_labeled(net_set, m_sample, b_sample, disc) elif region_name == "rectangle": reg, mx = pyscan.max_rect_labeled(n, 2 * max_disk_r, m_sample, b_sample, disc) elif region_name == "rectangle_scale": reg, mx = pyscan.max_rect_labeled_scale(n, 2 * max_disk_r, alpha, net_set, m_sample, b_sample, disc) else: return end_time = time.time() actual_mx = pyscan.evaluate_range_trajectory(reg, red, blue, disc) row = {"vparam": vparam, "disc": disc_name, "region": region_name, "n": n, "s": s, "n_pts": len(net_set), "m_pts":len(m_sample), "b_pts":len(b_sample), "alpha":alpha, "time": end_time - start_time, "m_disc_approx": mx, "m_disc": actual_mx, "sample_method": sample_method} writer.writerow(row) f.flush() print(row) if max_time is not None and end_time - start_time > max_time: return