def main(args): """Generate R4R data from the original R2R data. Args: args: argparse containing paths to input and output files. """ print('******Generating R4R Data********') print(' Distance threshold: {} meters'.format(args.distance_threshold)) print(' Heading threshold: {} radians'.format(args.heading_threshold)) def _connections_file_path(scan): return os.path.join(args.connections_dir, '{}_connectivity.json'.format(scan)) inputs = json.load(open(args.input_file_path)) outputs = list() filtered = collections.Counter() # Group by scan to save memory. scans = dict() for value in inputs: scan = value['scan'] if scan not in scans: scans[scan] = [] scans[scan].append(value) for scan, values in scans.items(): print('Loading graph for scan {}.'.format(scan)) graph = graph_utils.load(_connections_file_path(scan)) pos2d = nx.get_node_attributes(graph, 'pos2d') # Cache format: (node, (distance, path)) ((node obj, (dict, dict))) cache = dict(nx.all_pairs_dijkstra(graph, weight='weight3d')) shortest_distance = {k: v[0] for k, v in cache.items()} shortest_path = {k: v[1] for k, v in cache.items()} for first in values: for second in values: first_target = first['path'][-1] second_source = second['path'][0] # Compute the end-start distance (meters). distance = shortest_distance[first_target][second_source] # Compute the absolute end-start heading difference (radians). x, y = pos2d[first['path'][-1]] - pos2d[first['path'][-2]] heading = abs(second['heading'] - np.arctan2(y, x) % (2 * np.pi)) if (args.distance_threshold is not None and distance > args.distance_threshold): filtered['distance'] += 1 elif (args.heading_threshold is not None and heading > args.heading_threshold): filtered['heading'] += 1 else: value = dict() value['path'] = ( first['path'][:-1] + shortest_path[first_target][second_source] + second['path'][1:]) value['distance'] = ( first['distance'] + shortest_distance[first_target][second_source] + second['distance']) value['instructions'] = [ x + y # pylint: disable=g-complex-comprehension for x in first['instructions'] for y in second['instructions'] ] value['heading'] = first['heading'] value['path_id'] = len(outputs) value['scan'] = scan # Additional data. path_source = first['path'][0] path_target = second['path'][-1] value['shortest_path_distance'] = cache[path_source][0][ path_target] value['shortest_path'] = cache[path_source][1][path_target] value['first_path_id'] = first['path_id'] value['second_path_id'] = second['path_id'] outputs.append(value) with open(args.output_file_path, 'w') as f: json.dump(outputs, f, indent=2, sort_keys=True, separators=(',', ': ')) # Dataset summary metrics. tot_instructions = np.sum([len(x['instructions']) for x in outputs]) avg_distance = np.mean([x['distance'] for x in outputs]) avg_path_len = np.mean([len(x['path']) for x in outputs]) avg_sp_distance = np.mean([x['shortest_path_distance'] for x in outputs]) avg_sp_path_len = np.mean([len(x['shortest_path']) for x in outputs]) print('******Final Results********') print(' Total instructions generated: {}'.format(tot_instructions)) print(' Average path distance (meters): {}'.format(avg_distance)) print(' Average shortest path distance: {}'.format(avg_sp_distance)) print(' Average path length (steps): {}'.format(avg_path_len)) print(' Average shortest path length: {}'.format(avg_sp_path_len)) print(' Total paths generated: {}'.format(len(outputs))) print(' Total distance filtered paths: {}'.format(filtered['distance'])) print(' Total heading filtered paths: {}'.format(filtered['heading']))
nodes, m, seed), check_shortest_path=False) print(json.dumps(props, indent=4)) return props if __name__ == "__main__": if len(sys.argv) < 3: print_help() exit(1) pages = sys.argv[1] pagelinks = sys.argv[2] check_shortest_path = False if len(sys.argv) == 4: if re.match(r"true", sys.argv[3].strip(), re.IGNORECASE): check_shortest_path = True #graph_utils.dump_geffi(nx.barabasi_albert_graph(yi_nodes_no, 22), graph_BA_model_geffi_filename) #ba = analyze_ba(yi_nodes_no, 22) #graph_utils.dump_json(ba, BA_modeling_yi_properties_filename) #ba = analyze_ba_spectrum(yi_nodes_no) #graph_utils.dump_json(ba, BA_properties_filename_37k) #graph_utils.dump_geffi(nx.barabasi_albert_graph(yi_nodes_no, 50), graph_BA_model_geffi_m50_filename) G = graph_utils.load(graph_cache_filename) if G is None: G = generate_graph(pages, pagelinks, nx.Graph()) graph_utils.dump(G, graph_cache_filename) graph_utils.dump_geffi(G, graph_geffi_filename) printSomeThingsRegardingGraph(G, check_shortest_path=check_shortest_path)