def main(args):
    """Generate R4R data from the original R2R data.

  Args:
    args: argparse containing paths to input and output files.
  """
    print('******Generating R4R Data********')
    print('  Distance threshold: {} meters'.format(args.distance_threshold))
    print('  Heading threshold:  {} radians'.format(args.heading_threshold))

    def _connections_file_path(scan):
        return os.path.join(args.connections_dir,
                            '{}_connectivity.json'.format(scan))

    inputs = json.load(open(args.input_file_path))
    outputs = list()
    filtered = collections.Counter()

    # Group by scan to save memory.
    scans = dict()
    for value in inputs:
        scan = value['scan']
        if scan not in scans:
            scans[scan] = []
        scans[scan].append(value)

    for scan, values in scans.items():
        print('Loading graph for scan {}.'.format(scan))
        graph = graph_utils.load(_connections_file_path(scan))
        pos2d = nx.get_node_attributes(graph, 'pos2d')

        # Cache format: (node, (distance, path)) ((node obj, (dict, dict)))
        cache = dict(nx.all_pairs_dijkstra(graph, weight='weight3d'))
        shortest_distance = {k: v[0] for k, v in cache.items()}
        shortest_path = {k: v[1] for k, v in cache.items()}

        for first in values:
            for second in values:
                first_target = first['path'][-1]
                second_source = second['path'][0]

                # Compute the end-start distance (meters).
                distance = shortest_distance[first_target][second_source]

                # Compute the absolute end-start heading difference (radians).
                x, y = pos2d[first['path'][-1]] - pos2d[first['path'][-2]]
                heading = abs(second['heading'] - np.arctan2(y, x) %
                              (2 * np.pi))

                if (args.distance_threshold is not None
                        and distance > args.distance_threshold):
                    filtered['distance'] += 1
                elif (args.heading_threshold is not None
                      and heading > args.heading_threshold):
                    filtered['heading'] += 1
                else:
                    value = dict()
                    value['path'] = (
                        first['path'][:-1] +
                        shortest_path[first_target][second_source] +
                        second['path'][1:])
                    value['distance'] = (
                        first['distance'] +
                        shortest_distance[first_target][second_source] +
                        second['distance'])
                    value['instructions'] = [
                        x + y  # pylint: disable=g-complex-comprehension
                        for x in first['instructions']
                        for y in second['instructions']
                    ]
                    value['heading'] = first['heading']
                    value['path_id'] = len(outputs)
                    value['scan'] = scan

                    # Additional data.
                    path_source = first['path'][0]
                    path_target = second['path'][-1]
                    value['shortest_path_distance'] = cache[path_source][0][
                        path_target]
                    value['shortest_path'] = cache[path_source][1][path_target]
                    value['first_path_id'] = first['path_id']
                    value['second_path_id'] = second['path_id']

                    outputs.append(value)

    with open(args.output_file_path, 'w') as f:
        json.dump(outputs, f, indent=2, sort_keys=True, separators=(',', ': '))

    # Dataset summary metrics.
    tot_instructions = np.sum([len(x['instructions']) for x in outputs])
    avg_distance = np.mean([x['distance'] for x in outputs])
    avg_path_len = np.mean([len(x['path']) for x in outputs])
    avg_sp_distance = np.mean([x['shortest_path_distance'] for x in outputs])
    avg_sp_path_len = np.mean([len(x['shortest_path']) for x in outputs])

    print('******Final Results********')
    print('  Total instructions generated:    {}'.format(tot_instructions))
    print('  Average path distance (meters):  {}'.format(avg_distance))
    print('  Average shortest path distance:  {}'.format(avg_sp_distance))
    print('  Average path length (steps):     {}'.format(avg_path_len))
    print('  Average shortest path length:    {}'.format(avg_sp_path_len))
    print('  Total paths generated:           {}'.format(len(outputs)))
    print('  Total distance filtered paths:   {}'.format(filtered['distance']))
    print('  Total heading filtered paths:    {}'.format(filtered['heading']))
Пример #2
0
        nodes, m, seed),
                                       check_shortest_path=False)
    print(json.dumps(props, indent=4))
    return props


if __name__ == "__main__":
    if len(sys.argv) < 3:
        print_help()
        exit(1)
    pages = sys.argv[1]
    pagelinks = sys.argv[2]

    check_shortest_path = False
    if len(sys.argv) == 4:
        if re.match(r"true", sys.argv[3].strip(), re.IGNORECASE):
            check_shortest_path = True
    #graph_utils.dump_geffi(nx.barabasi_albert_graph(yi_nodes_no, 22), graph_BA_model_geffi_filename)
    #ba = analyze_ba(yi_nodes_no, 22)
    #graph_utils.dump_json(ba, BA_modeling_yi_properties_filename)
    #ba = analyze_ba_spectrum(yi_nodes_no)
    #graph_utils.dump_json(ba, BA_properties_filename_37k)
    #graph_utils.dump_geffi(nx.barabasi_albert_graph(yi_nodes_no, 50), graph_BA_model_geffi_m50_filename)

    G = graph_utils.load(graph_cache_filename)
    if G is None:
        G = generate_graph(pages, pagelinks, nx.Graph())
        graph_utils.dump(G, graph_cache_filename)
    graph_utils.dump_geffi(G, graph_geffi_filename)
    printSomeThingsRegardingGraph(G, check_shortest_path=check_shortest_path)