Пример #1
0
def test_skip_to_global_left_binary():
    args = get_test_args(
        input_path=base_path('samples/skip-to-left-test.nt'),
        target_size=500,
        search_type='binary',
    )
    part_positions = list(compute_parts(args))

    assert 2 == len(part_positions)
    assert (314, 1703) == part_positions[0][1:]
    assert (1703, 3587) == part_positions[1][1:]
Пример #2
0
def test_skip_to_global_right_binary():
    args = get_test_args(
        input_path=base_path('samples/skip-to-right-test.nt'),
        target_size=500,
        search_type='binary',
    )
    part_positions = list(compute_parts(args))

    assert 2 == len(part_positions)
    assert (4615, 7243) == part_positions[0][1:]
    assert (7243, 7743) == part_positions[1][1:]
Пример #3
0
def test_skip_to_global_middle_binary():
    args = get_test_args(
        input_path=base_path('samples/skip-to-middle-test.nt'),
        target_size=500,
        search_type='binary',
    )
    part_positions = list(compute_parts(args))

    assert 3 == len(part_positions)
    assert (1834, 6236) == part_positions[0][1:]
    assert (6236, 6949) == part_positions[1][1:]
    assert (6949, 7676) == part_positions[2][1:]
Пример #4
0
def make_graph_elements(args):
    print(f'Reading from {args.input_path} ...')

    prefixer = None
    if args.shorten_uris:
        prefixer = NamespacePrefixer()

    if args.parallel:
        pool = multiprocessing.Pool()
        tasks = []

        for part_path, left, right in compute_parts(args):
            tasks.append(
                pool.apply_async(transform_part, (
                    args.input_path,
                    args.global_id_marker,
                    part_path,
                    left,
                    right,
                    prefixer,
                )))

        results = [task.get(timeout=args.task_timeout) for task in tasks]
        pool.close()
    else:
        results = [
            transform_part(
                args.input_path,
                args.global_id_marker,
                part_path,
                left,
                right,
                prefixer,
            ) for part_path, left, right in compute_parts(args)
        ]

    pcounts_path = os.path.join(args.output_dir, 'predicate-counts.json')
    with open(pcounts_path, 'w') as pcounts_file:
        json.dump(dict(results), pcounts_file, indent=4)
    print(f'\nDone! Predicate counts have been saved to {pcounts_path}')