async def cmd_index(args): to_text = curry(pdf_text) is_item = flip(isinstance, (Page, Stroke)) is_page = flip(isinstance, Page) fmt_header = lambda p: \ '#. Page {} ({})\n'.format(p.get_label(), p.get_index()) fmt_text = ' * ``{}``'.format path = norm_path(args.input) async with remt_ctx() as ctx: data = fn_metadata(ctx.meta, path) to_copy = fn_path(data, ext='*') await ctx.sftp.mget(to_copy, ctx.dir_data, recurse=True) fin_pdf = fn_path(data, base=ctx.dir_data, ext='.pdf') pdf_doc = pdf_open(fin_pdf) get_page = pdf_doc.get_page items = parse_document(ctx, data) # find pages and strokes items = (v for v in items if is_item(v)) # split into (page, strokes) items = split(is_page, items) # get PDF pages items = ((get_page(p.number), s) for p, s in items) # for each page and stroke get text under stroke items = ((p, map(to_text(p), s)) for p, s in items) # page header and each highlighted text formatted items = ((fmt_header(p), map(fmt_text, t)) for p, t in items) for header, texts in items: print(header) for text in texts: print(text) print()
def test_split(): """ Test splitting sequence by a function key. """ items = [ 'a', 1, 2, 3, 'b', 5, 6, 7, 8, 'c', 4, 3, 'd', 'e', 1, 2, 3, 4, ] is_str = flip(isinstance, str) result = split(is_str, items) expected = [ ('a', (1, 2, 3)), ('b', (5, 6, 7, 8)), ('c', (4, 3)), ('e', (1, 2, 3, 4)), ] assert expected == list(result)
def test_flip(): def f(a, b): return a, b assert flip(f, 'a', 'b') == ('b', 'a')
def test_flip(): def f(a, b): return a, b assert flip(f, "a", "b") == ("b", "a")