def test_skip_to_global_left_binary(): args = get_test_args( input_path=base_path('samples/skip-to-left-test.nt'), target_size=500, search_type='binary', ) part_positions = list(compute_parts(args)) assert 2 == len(part_positions) assert (314, 1703) == part_positions[0][1:] assert (1703, 3587) == part_positions[1][1:]
def test_skip_to_global_right_binary(): args = get_test_args( input_path=base_path('samples/skip-to-right-test.nt'), target_size=500, search_type='binary', ) part_positions = list(compute_parts(args)) assert 2 == len(part_positions) assert (4615, 7243) == part_positions[0][1:] assert (7243, 7743) == part_positions[1][1:]
def test_skip_to_global_middle_binary(): args = get_test_args( input_path=base_path('samples/skip-to-middle-test.nt'), target_size=500, search_type='binary', ) part_positions = list(compute_parts(args)) assert 3 == len(part_positions) assert (1834, 6236) == part_positions[0][1:] assert (6236, 6949) == part_positions[1][1:] assert (6949, 7676) == part_positions[2][1:]
def make_graph_elements(args): print(f'Reading from {args.input_path} ...') prefixer = None if args.shorten_uris: prefixer = NamespacePrefixer() if args.parallel: pool = multiprocessing.Pool() tasks = [] for part_path, left, right in compute_parts(args): tasks.append( pool.apply_async(transform_part, ( args.input_path, args.global_id_marker, part_path, left, right, prefixer, ))) results = [task.get(timeout=args.task_timeout) for task in tasks] pool.close() else: results = [ transform_part( args.input_path, args.global_id_marker, part_path, left, right, prefixer, ) for part_path, left, right in compute_parts(args) ] pcounts_path = os.path.join(args.output_dir, 'predicate-counts.json') with open(pcounts_path, 'w') as pcounts_file: json.dump(dict(results), pcounts_file, indent=4) print(f'\nDone! Predicate counts have been saved to {pcounts_path}')