def vis_focused_grid(kernels): res = 1000 lats_lngs = [] lats_lngs.append(np.mgrid[0.555:0.612:res * 1j, -1.265:-1.248:res * 1j]) lats_lngs.append(np.mgrid[-0.565:-0.46:res * 1j, 1.46:1.6:res * 1j]) lats_lngs.append(np.mgrid[-0.5:0.2:res * 1j, -0.85:-0.7:res * 1j]) lats_lngs.append(np.mgrid[0:0.17:res * 1j, -0.78:-0.72:res * 1j]) lats_lngs.append(np.mgrid[-0.65:-0.4:res * 1j, -0.6:-0.15:res * 1j]) lats_lngs.append(np.mgrid[0.445:0.695:res * 1j, -1.2845:-1.2305:res * 1j]) for j, (lat, lng) in enumerate(lats_lngs[0:1]): pos = np.dstack((lng, lat)) logger.info("%sx%s Grid created.", res, res) heatmap = np.zeros((res, res)) T = len(kernels) percent = T // 100 for i, k in enumerate(kernels): if (i + 1) % percent == 0 or (i + 1) == T: print_progress_bar(i + 1, T, prefix='Progress:', suffix='Complete', length=50) np.add(heatmap, k.pdf(pos), heatmap) logger.info("Probabilities for grid calculated.") hlp.save_array(heatmap, "combined_gp_heat_focused/{}_{}x{}".format(j, res, res), logger) plot_heatmap(heatmap, identifier="_focused/{}_{}x{}".format(j, res, res), show_title=False, with_alpha=True)
def create_trajectory_distributions(trajectory_results): logger.info("Create trajectory distributions...") logger.info("Test trajectory IDs: {}".format(trajectory_results.keys())) trajectory_distributions = defaultdict(object) for test_id, test_trajectory_result in trajectory_results.items(): logger.info("Segments: {}".format(len(test_trajectory_result))) segment_distributions = [] for segment_i, segment_result in enumerate(test_trajectory_result): logger.info("Segment {} tested on {} GPs".format( segment_i, len(segment_result["pred"]))) # segment_result elements have 2 keys: # pred: Arrival time predictions (means, vars) # true: True arrival times. # metrics: Metrics for the prediction. # TODO: Plot ATP distribution for segment_i. # How? Well, we create PDF distributions with the (mean, var)-pairs # we get from the results. We can then plot the PDFs, as they are 1D on input. # We also need to show the true arival time, so we can get a grasp of how far "off" we are. y_true = segment_result["true"] feature_fitted = segment_result["feature"] distribution = defaultdict(list) #print(segment_result["feature_fitted"]) #for mu, var, feature_fitted in zip(segment_result["pred"], segment_result["feature_fitted"]): # for point_i, (mu_i, std_i, feature_i) in enumerate(zip(mu, np.sqrt(var), feature_fitted)): for mu, var in segment_result["pred"]: for point_i, (mu_i, std_i) in enumerate(zip(mu, np.sqrt(var))): distribution[point_i].append(norm(mu_i, std_i)) #, feature_i]) segment_distributions.append( (distribution, y_true, feature_fitted)) trajectory_distributions[test_id] = segment_distributions hlp.save_array(trajectory_distributions, "trajectory_distributions", logger, BASE_DIR) return trajectory_distributions
def test_model(test_ids, all_trajectories, model, logger): f1_gp = model["f1_gp"] f1_scaler = model["f1_scaler"] segment_GPs = model["segment_GPs"] segment_scalers = model["segment_scalers"] test_trajectories = [] for test_id in test_ids: test_trajectories.append(all_trajectories[test_id]) segments_list, arrival_times_list = segment_trajectories(test_trajectories, level=0, f1_gp=f1_gp) trajectory_results = defaultdict(object) for traj_j, (segments, arrival_times) in enumerate( zip(segments_list, arrival_times_list)): logger.info("Testing model {}".format(traj_j)) segment_results = [] for seg_i, (segment, arrival_time) in enumerate(zip(segments, arrival_times)): #if i > 1: continue feature = np.vstack( [event_to_tau(e, f1_scaler, f1_gp) for e in segment]) truth = np.vstack([(arrival_time - e["date"]).total_seconds() for e in segment]) result = defaultdict(list) result["true"] = truth result["feature"] = feature for gp_k, gp in enumerate(segment_GPs[seg_i]): if gp_k == test_ids[traj_j]: # GP is trained on this data continue feature_fitted = segment_scalers[seg_i][gp_k].transform( feature) mean, var = gp.predict_y(feature_fitted) result["pred"].append([mean, var]) #abs_errors = [abs(t - m) for t, m in zip(truth, mean)] #mae = mean_absolute_error(truth, mean) #if mae > 50: # logger.warn("{}, {}: {}".format(traj_j, seg_i, mae)) #mse = mean_squared_error(truth, mean) #metrics = { # "mae": mae, # "mse": mse, # "rmse": np.sqrt(mse), # "median_abs_err": median_absolute_error(truth, mean), # "max_err": max(abs_errors), # "min_err": min(abs_errors) #} #result["metrics"].append(metrics) #logger.info("Segment {}, GP {} metrics: {}".format(seg_i, gp_k, metrics)) segment_results.append(result) trajectory_results[test_ids[traj_j]] = segment_results hlp.save_array(trajectory_results, "trajectory_results", logger, BASE_DIR) return trajectory_results
def vis_whole_grid(kernels): res = 7500 lat, lng = np.mgrid[-1.7:2:res * 1j, -1.35:1.65:res * 1j] pos = np.dstack((lng, lat)) logger.info("%sx%s Grid created.", res, res) heatmap = np.zeros((res, res)) T = len(kernels) percent = T // 100 for i, k in enumerate(kernels): if (i + 1) % percent == 0 or (i + 1) == T: print_progress_bar(i + 1, T, prefix='Progress:', suffix='Complete', length=50) np.add(heatmap, k.pdf(pos), heatmap) logger.info("Probabilities for grid calculated.") hlp.save_array(heatmap, "heatmap_{}x{}".format(res, res), logger) plot_heatmap(heatmap)
def train_trajectory_GPs(all_trajectories, f1_gp, f1_scaler, session): f2_f3_scalers = [] for i, (vehicle_id, journey) in enumerate(all_trajectories): events = [ e for e in journey.route if e["event.type"] == "ObservedPositionEvent" and e["speed"] > 0.1 ] events = hlp.filter_duplicates(events) xx = np.vstack([e["gps"][::-1] for e in events]) xx_fit = f1_scaler.transform(xx) xx_lng = xx_fit[:, 0].reshape(-1, 1) xx_lat = xx_fit[:, 1].reshape(-1, 1) tau_mean, _var = f1_gp.predict_y(xx_fit) tau_mean = tau_mean[:, 0].reshape(-1, 1) f2_f3_scaler = StandardScaler().fit(tau_mean) f2_f3_scalers.append(f2_f3_scaler) tau_mean_fitted = f2_f3_scaler.transform(tau_mean) train_GP(tau_mean_fitted, xx_lng, session, number=i, gp_name="f2") train_GP(tau_mean_fitted, xx_lat, session, number=i, gp_name="f3") hlp.save_array(f2_f3_scalers, "f2_f3_scalers", logger, BASE_DIR)
def run(line_number, load_model): visualise_trajectory_gp = False # Quicker if False visualise_tau_gp = False # Quicker if False session = gpflow.saver.Saver() trajectories = hlp.load_trajectories_from_file(line_number, logger) #hlp.plot_trajectories(trajectories, logger, print_only=True) trajectory_key = "Lötgatan:Linköpings resecentrum" #vehicle_id, journey = trajectories[trajectory_key][0] #pprint.pprint(journey.route) #hlp.plot_speed_time(trajectories["Lötgatan:Fönvindsvägen östra"][4][1].segment_at("Linköpings resecentrum")[0]) #hlp.plot_speed_stops(journey.route) #plot_speed_stops(journey.route) #plot_speed_time(journey) all_trajectories = hlp.get_all_trajectories(trajectories, trajectory_key) if load_model: model = load_GPS_variation_GPs(session, load_only="all", f1_version="_ard") output_file = "f1_ard_contour_segment3_pdf" else: create_GPS_variation_GPs(all_trajectories, session, f1_version="_ard") exit(0) f1_gp = model["f1_gp"] f1_scaler = model["f1_scaler"] f2_f3_GPs = model["f2_f3_GPs"] f2_f3_scalers = model["f2_f3_scalers"] # for i, (vehicle_id, journey) in enumerate(all_trajectories[1:]): # events = [e for e in journey.route if e["event.type"] == "ObservedPositionEvent"] # X = [[e["date"].isoformat(), e["gps"][::-1][0], e["gps"][::-1][-1], e["speed"], e["dir"]] for e in events] # np.savetxt("d{}.txt".format(i + 2), X, fmt='%s', delimiter=";") # exit(1) res = 50 #lat_step = get_step_size(lat_start, lat_stop, res) #lng_step = get_step_size(lng_start, lng_stop, res) #lat, lng = np.mgrid[58.410317:58.427006:res*1j, 15.490352:15.523200:res*1j] # Big Grid #lat, lng = np.mgrid[58.416317:58.42256:res*1j, 15.494352:15.503200:res*1j] # Middle Grid #lat, lng = np.mgrid[58.4173:58.419:res*1j, 15.4965:15.499:res*1j] # Small Grid #lat, lng = np.mgrid[58.4174:58.4178:res*1j, 15.4967:15.4974:res*1j] # Super small Grid (krök) #lat, lng = np.mgrid[58.4185:58.4188:res*1j, 15.4985:15.49875:res*1j] # Super small Grid (sträcka) #lat, lng = np.mgrid[58.4190:58.422:res*1j, 15.500:15.502:res*1j] # Small Grid (new start) #lat, lng = np.mgrid[58.4175:58.422:res*1j, 15.508:15.517:res*1j] # Small Grid (segment 2) lat, lng = np.mgrid[58.408:58.418:res * 1j, 15.61:15.63:res * 1j] # Small Grid (segment 3, final) pos_grid = np.dstack((lng, lat)).reshape(-1, 2) logger.info("Grid created.") pos_grid_fitted = f1_scaler.transform(pos_grid) logger.info("Grid scaled.") grid_tau, _var = f1_gp.predict_y(pos_grid_fitted) logger.info("Grid predicted.") hlp.save_array(grid_tau, "grid_tau", logger, BASE_DIR) logger.info("Evaluate Grid with GPs...") probs = calculate_prob_grip(grid_tau, f2_f3_GPs, f2_f3_scalers, pos_grid_fitted, res, method="pdf") pdf_grid_sum = None for traj_j, pdf_grid in probs.items(): visualise_probabilities(pdf_grid, all_trajectories, pos_grid, lat, lng, res, file_name=output_file + "_{}".format(traj_j)) if pdf_grid_sum is None: pdf_grid_sum = pdf_grid else: np.add(pdf_grid_sum, pdf_grid, pdf_grid_sum) pdf_grid_sum /= len(probs.keys()) visualise_probabilities(pdf_grid_sum, all_trajectories, pos_grid, lat, lng, res, file_name=output_file + "_all") exit(1)
def calculate_prob_grip(grid_tau, f2_f3_GPs, f2_f3_scalers, pos_grid_fitted, res, method=None): lng_ss_scaled = get_step_size(pos_grid_fitted[0][0], pos_grid_fitted[-1][0], res) lat_ss_scaled = get_step_size(pos_grid_fitted[0][1], pos_grid_fitted[-1][1], res) points = grid_tau.shape[0] probs = np.zeros(points) if method == "combine": lng_means = [] lng_vars = [] lat_means = [] lat_vars = [] elif method == "pdf": kernels_list = defaultdict(list) probs = {} else: result = defaultdict(list) for traj_j, (f2_gp, f3_gp) in f2_f3_GPs.items(): grid_tau_fitted = f2_f3_scalers[traj_j].transform(grid_tau) lng_mean, lng_var = f2_gp.predict_y(grid_tau_fitted) lat_mean, lat_var = f3_gp.predict_y(grid_tau_fitted) if method == "combine": lng_means.append(lng_mean) lng_vars.append(lng_var) lat_means.append(lat_mean) lat_vars.append(lat_var) elif method == "pdf": probs[traj_j] = np.zeros(points) kernels = [] for m1, v1, m2, v2 in zip(lng_mean, lng_var, lat_mean, lat_var): k = multivariate_normal(mean=[m1[0], m2[0]], cov=np.diag([v1[0], v2[0]])) kernels.append(k) kernels_list[traj_j] = kernels else: result[traj_j].extend((lng_mean, lng_var, lat_mean, lat_var)) if method == "combine": lng_means = np.array(lng_means) lng_vars = np.array(lng_vars) lat_means = np.array(lat_means) lat_vars = np.array(lat_vars) c_lng_means = combine_mean(lng_means) c_lng_vars = combine_variance(lng_vars, lng_means, c_lng_means) c_lat_means = combine_mean(lat_means) c_lat_vars = combine_variance(lat_vars, lat_means, c_lat_means) traj_count = len(f2_f3_GPs.keys()) for grid_i, (lng_i, lat_i) in enumerate(pos_grid_fitted): if grid_i % 100 == 0: logger.info(grid_i) if method == "combine": lng_prob = prob_of(lng_i, lng_ss_scaled, c_lng_means[grid_i], c_lng_vars[grid_i]) lat_prob = prob_of(lat_i, lat_ss_scaled, c_lat_means[grid_i], c_lat_vars[grid_i]) probs[grid_i] += lng_prob * lat_prob # We assume independent! elif method == "pdf": for traj_j, kernels in kernels_list.items(): probs[traj_j][grid_i] = kernels[grid_i].pdf((lng_i, lat_i)) else: for traj_j, (lng_mean, lng_var, lat_mean, lat_var) in result.items(): lng_prob = prob_of(lng_i, lng_ss_scaled, lng_mean[grid_i], lng_var[grid_i]) lat_prob = prob_of(lat_i, lat_ss_scaled, lat_mean[grid_i], lat_var[grid_i]) probs[grid_i] += lng_prob * lat_prob # We assume independent! probs[grid_i] = probs[grid_i] / traj_count print(traj_count) #probs[probs < 1e-03] = 0 hlp.save_array(probs, "grid_probs", logger, BASE_DIR) return probs
def plot_arrival_time_distributions(trajectory_distributions): logger.info("Plot Arrival Time Prediction Distributions...") path = BASE_DIR + "arrival_time_distributions" hlp.ensure_dir(path) precision = 1e-03 for trajectory_i, segment_distributions in trajectory_distributions.items( ): logger.info("Trajectory {} has {} segment(s).".format( trajectory_i, len(segment_distributions))) traj_path = path + "/{}".format(trajectory_i) hlp.ensure_dir(traj_path) for segment_i, (point_distribution, truth, feature) in enumerate(segment_distributions): logger.info("Plotting Segment {}...".format(segment_i)) seg_path = traj_path + "/{}".format(segment_i) hlp.ensure_dir(seg_path) arrival_time_naive = [] for point_i, distribution in point_distribution.items(): #temp plt.figure() #temp plt.axvline(x=truth[point_i], color="r", linestyle='-', lw=0.5) #temp min_t = 1000 #temp max_t = -1000 #temp sum_of_weights = 0 #temp for k in distribution: #temp min_t = min(min_t, math.floor(k.ppf(precision)[0])) #temp max_t = max(max_t, math.ceil(k.ppf(1-precision)[0])) #sum_of_weights += calculate_weight(feature, k) #temp pdf_res = np.zeros((max_t-min_t)*100) #temp pdf_res_weighted = np.zeros((max_t-min_t)*100) #temp xx = np.linspace(min_t, max_t, (max_t-min_t)*100) distr_mean = 0 for k in distribution: # TODO: Implement different things here. #temp pdf = k.pdf(xx) distr_mean += k.mean() #temp np.add(pdf_res, pdf, pdf_res) # naive mixture #weight = calculate_weight(feature, k) #np.add(pdf_res_weighted, pdf * (weight - sum_of_weights), pdf_res_weighted) # weighted mixture #temp plt.plot(xx, pdf) distr_mean = distr_mean / len(distribution) arrival_time_naive.append(distr_mean) #temp plt.savefig("{}/{}".format(seg_path, point_i)) #temp plt.close() #temp pdf_res = pdf_res / len(distribution) #pdf_res_weighted = pdf_res_weighted / sum_of_weights # naive mixture #temp plt.figure() #temp plt.axvline(x=distr_mean, color="k", linestyle="--", lw=0.5) #temp plt.axvline(x=truth[point_i], color="r", linestyle='-', lw=0.5) #temp plt.plot(xx, pdf_res) #temp plt.savefig("{}/res_{}".format(seg_path, point_i)) #temp plt.close() # weighted mixture TODO: No idea if this works #plt.figure() #plt.axvline(x=distr_mean, color="k", linestyle="--", lw=0.5) #plt.axvline(x=truth[point_i], color="r", linestyle='-', lw=0.5) #plt.plot(xx, pdf_res_weighted) #plt.savefig("{}/weighted_res_{}".format(seg_path, point_i)) #plt.close() # abs_errors = [abs(t - m) for t, m in zip(truth, arrival_time_naive)] # mae = mean_absolute_error(truth, arrival_time_naive) # mse = mean_squared_error(truth, arrival_time_naive) # metrics = { # "mae": mae, # "mse": mse, # "rmse": np.sqrt(mse), # "median_abs_err": median_absolute_error(truth, arrival_time_naive), # "max_err": max(abs_errors), # "min_err": min(abs_errors) # } # logger.info(metrics) hlp.save_array( { "truth": truth, "predicted": arrival_time_naive, "feature": feature }, "{}/predicted".format(seg_path))