Пример #1
0
def subcommand_samples(input_file, state):
    """Generate a list of example files with different threshold values.

    :param input_file: The input file.
    :type input_file: jfscripts._utils.FilePath
    :param state: The state object.
    :type state: jfscripts.pdf_compress.State

    :return: None
    """

    args = state.args

    def fix_output_path(output_file):
        output_file = str(output_file).replace('_-000', '')
        return FilePath(output_file, absolute=True)

    if state.input_is_pdf:
        page_count = do_pdfinfo_page_count(input_file)
        page_number = random.randint(1, page_count)
        print('Used page number {} of {} pages to generate a series of images '
              'with different threshold values.'.format(
                  page_number, page_count))
        do_pdfimages(input_file, state, page_number)
        images = collect_images(state)
        input_file = FilePath(images[0], absolute=True)

    if args.threshold:
        for threshold in range(40, 100, 5):
            appendix = '_threshold-{}'.format(threshold)
            output_file = input_file.new(extension='tiff',
                                         append=appendix,
                                         del_substring=tmp_identifier)
            output_file = str(output_file).replace('_-000', '')
            do_magick_convert(input_file,
                              fix_output_path(output_file),
                              threshold='{}%'.format(threshold))

    if args.quality:
        for quality in range(40, 100, 5):
            appendix = '_quality-{}'.format(quality)
            output_file = input_file.new(extension='pdf',
                                         append=appendix,
                                         del_substring=tmp_identifier)
            do_magick_convert(input_file,
                              fix_output_path(output_file),
                              color=True,
                              quality=quality)

    if args.blur:
        for blur in (1, 2, 3, 4, 5):
            appendix = '_blur-{}'.format(blur)
            output_file = input_file.new(extension='pdf',
                                         append=appendix,
                                         del_substring=tmp_identifier)
            do_magick_convert(input_file,
                              fix_output_path(output_file),
                              color=True,
                              blur=blur,
                              quality=100)
 def test_extraction(self):
     pdf = FilePath(tmp_pdf)
     subprocess.check_output(['extract-pdftext.py', str(pdf)])
     txt = pdf.new(extension='txt')
     self.assertTrue(os.path.exists(str(txt)))
     self.assertTrue('## Seite' in open(str(txt)).read())
     self.assertTrue('Andrew Lloyd Webber' in open(str(txt)).read())
     self.assertTrue('-' *
                     extract_pdftext.line_length in open(str(txt)).read())
Пример #3
0
def main():
    args = get_parser().parse_args()

    run.setup(verbose=args.verbose, colorize=args.colorize)

    check_dependencies(*dependencies)

    pdf = FilePath(args.file, absolute=True)
    txt_path = pdf.new(extension='txt')
    txt_file = Txt(txt_path)

    page_count = get_page_count(pdf)

    txt_file.add_line('# ' + pdf.basename)

    for i in range(1, page_count + 1):
        txt_file.add_line('')
        txt_file.add_line('-' * line_length)
        txt_file.add_line('')
        txt_file.add_line('## Seite ' + str(i))
        txt_file.add_line('')
        get_text_per_page(pdf, i, txt_file)
Пример #4
0
def main():
    args = get_parser().parse_args()

    run.setup(verbose=args.verbose, colorize=args.colorize)

    check_dependencies(*dependencies)

    main_pdf = FilePath(args.pdf, absolute=True)
    image = FilePath(args.image, absolute=True)
    if hasattr(args, 'number'):
        number = args.number

    if image.extension == 'pdf':
        insert_pdf = image
    else:
        identify_pdf = do_pdftk_cat_first_page(main_pdf)
        pdf_dimensions = do_magick_identify_dimensions(identify_pdf)
        image_dimensions = do_magick_identify_dimensions(image)
        insert_pdf = convert_image_to_pdf_page(
            image,
            image_dimensions['width'],
            pdf_dimensions['width'],
            pdf_dimensions['x'],
        )

    info = get_pdf_info(main_pdf)

    if args.subcmd_args in ['add', 'ad', 'a']:

        if args.after:
            number = int(args.after[0])
            position = 'after'
        elif args.before:
            number = int(args.before[0])
            position = 'before'
        elif args.first:
            number = 1
            position = 'before'
        elif args.last:
            number = info['page_count']
            position = 'after'
        else:
            number = info['page_count']
            position = 'after'

        joined_pdf = assemble_pdf(main_pdf,
                                  insert_pdf,
                                  info['page_count'],
                                  number,
                                  mode='add',
                                  position=position)
        message = 'Successfully added the image “{}” {} page {} of the PDF ' \
                  'file “{}”. Result: “{}”'
        print(message.format(image, position, number, main_pdf, joined_pdf))

    elif args.subcmd_args in ['convert', 'cv', 'c']:
        if image.extension == 'pdf':
            raise ('Specify an image file, not a PDF file.')
        result_pdf = main_pdf.new(append='_insert')
        os.rename(str(insert_pdf), str(result_pdf))
        message = 'Successfully converted the image “{}” to the PDF file ' \
                  '“{}” using the dimensions of the PDF “{}”.'
        print(message.format(image, result_pdf, main_pdf))

    elif args.subcmd_args in ['replace', 're', 'r']:
        joined_pdf = assemble_pdf(main_pdf,
                                  insert_pdf,
                                  info['page_count'],
                                  args.number,
                                  mode='replace')
        message = 'Successfully replaced page {} of the PDF file “{}” with ' \
                  'the image “{}”. Result: “{}”'
        print(message.format(number, main_pdf, image, joined_pdf))
Пример #5
0
 def test_method_new(self):
     path = FilePath('test.jpg')
     self.assertEqual(str(path.new()), 'test.jpg')
     self.assertEqual(str(path.new(extension='png')), 'test.png')
     self.assertEqual(str(path.new(append='123')), 'test123.jpg')
     self.assertEqual(str(path.new(del_substring='est')), 't.jpg')