Пример #1
0
def write_data_to_file(data, filename, quiet=False):
    """ 
        Writes the data to the given filename. 
        If the data did not change, the file is not touched.
    
    """
    if not isinstance(data, bytes):
        msg = 'Expected "data" to be bytes, not %s.' % type(data).__name__
        raise ValueError(msg)
    if len(filename) > 256:
        msg = 'Invalid argument filename: too long. Did you confuse it with data?'
        raise ValueError(msg)
    
    
    make_sure_dir_exists(filename)
    
    if os.path.exists(filename):
        current = open(filename).read()
        if current == data:
            if not 'assets' in filename:
                if not quiet:
                    logger.debug('already up to date %s' % friendly_path(filename))
            return
         
    with open(filename, 'wb') as f:
        f.write(data)
        
    if not quiet:
        logger.debug('Written to: %s' % friendly_path(filename))
Пример #2
0
def process_one(filename, page, target_pdf, target_svg, tmpdir, name):
    extracted = os.path.join(tmpdir, name + '_extracted.pdf')
    cropped = os.path.join(tmpdir, name + '_extracted_no_label.pdf')
    extract_page(filename, page, extracted)
    pdfcrop_margins(extracted, cropped, "0mm 0mm 0mm 5cm")
    pdfcrop(cropped, target_pdf)
    pdf2svg(target_pdf, target_svg)
    logger.info('Wrote %s' % friendly_path(target_pdf))
    logger.info('Wrote %s' % friendly_path(target_svg))
Пример #3
0
def embed_css_files(soup):
    """ Look for <link> elements of CSS and embed them if they are local files"""
    # <link href="..." rel="stylesheet" type="text/css"/>
    for link in list(
            soup.findAll('link', attrs={
                'rel': 'stylesheet',
                'href': True
            })):
        href = link.attrs['href']
        if href.startswith('file://'):
            filename = href.replace('file://', '')
        elif href.startswith('/'):  # not on windows?
            filename = href
        else:
            filename = None

        if filename is not None:

            if not os.path.exists(filename):
                msg = 'Cannot find CSS file %s' % filename
                logger.error(msg)
            else:
                logger.info('Embedding %r' % friendly_path(filename))
                data = open(filename).read()
                style = Tag(name='style')
                style.attrs['type'] = 'text/css'
                style.string = data
                link.replace_with(style)
Пример #4
0
 def check(ext, msg):
     if ext in ext2filenames:
         msg += '\nOffending files: '
         for f in ext2filenames[ext]:
             msg += '\n  %s ' % friendly_path(f)
     
         raise Exception(msg)
Пример #5
0
def process_svg_files(root, out, preamble):
    logger.info('root = %s' % root)
    logger.info('out = %s' % out)
    logger.info('preamble = %s' % preamble)
    if not os.path.exists(out):
        os.makedirs(out)
    logger.info('Looking for *.svg files...')
    svgs = locate_files(root, '*.svg')
    logger.info('%d found in %s' % (len(svgs), friendly_path(root)))

    errors = []
    for f in svgs:
        dirname = os.path.dirname(f)
        basename, _ = os.path.splitext(os.path.basename(f))
        target = os.path.join(out, basename + '.pdf')
        target = os.path.join(dirname, basename + '.pdf')
        if not needs_remake(f, target):
            msg = 'The target %r is up to date.' % target
            logger.info(msg)

        else:
            msg = 'Will build target %r.' % target
            logger.info(msg)
            tmpdir = create_tmpdir('svg-%s' % basename)

            try:
                process_svg_file(f, target, preamble, tmpdir)
            except Exception as e:
                logger.error(e)
                errors.append(e)
Пример #6
0
 def __repr__(self):
     d = OrderedDict()
     d['filename'] = friendly_path(self.filename)
     if self.github_info is not None:
         d['github'] = self.github_info
     else:
         d['github'] = '(not available)'
     s = "LocalFile"
     s += '\n' + indent(pretty_print_dict(d), '| ')
     return s
Пример #7
0
def check_bad_input_file_presence(d):

    ext2filenames = collect_by_extension(d)
    
    s = '## Filename extensions statistics'
    s += "\nFound in %s:" % friendly_path(d)
    for ext in sorted(ext2filenames, key=lambda k: -len(ext2filenames[k])):
        x = ext if  ext else '(no ext)'
            
        s += '\n %3d  %10s  files' % ( len(ext2filenames[ext]), x)
#     from mcdp import logger
#     logger.info(s)
    
    no_forbidden(ext2filenames)
    check_lfs_checkout(ext2filenames)
Пример #8
0
def read_commands_from_file(filename, context):
    from compmake.jobs.uptodate import CacheQueryDB
    
    filename = os.path.realpath(filename)
    if filename in context.rc_files_read:
        return
    else:
        context.rc_files_read.append(filename)

    cq = CacheQueryDB(context.get_compmake_db())
    assert context is not None
    info('Reading configuration from %r.' % friendly_path(filename))
    with open(filename, 'r') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            if line[0] == '#':
                continue
            interpret_commands_wrap(line, context=context, cq=cq)
Пример #9
0
def read_commands_from_file(filename, context):
    from compmake.jobs.uptodate import CacheQueryDB

    filename = os.path.realpath(filename)
    if filename in context.rc_files_read:
        return
    else:
        context.rc_files_read.append(filename)

    cq = CacheQueryDB(context.get_compmake_db())
    assert context is not None
    info('Reading configuration from %r.' % friendly_path(filename))
    with open(filename, 'r') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            if line[0] == '#':
                continue
            interpret_commands_wrap(line, context=context, cq=cq)
Пример #10
0
def manual_jobs(context,
                src_dirs,
                output_file,
                generate_pdf,
                stylesheet,
                use_mathjax,
                raise_errors,
                resolve_references=True,
                remove=None,
                filter_soup=None,
                extra_css=None,
                symbols=None,
                do_last_modified=False):
    """
        src_dirs: list of sources
        symbols: a TeX preamble (or None)
    """
    filenames = get_markdown_files(src_dirs)
    print('using:')
    print("\n".join(filenames))

    if not filenames:
        msg = 'Could not find any file for composing the book.'
        raise Exception(msg)

    files_contents = []
    for i, filename in enumerate(filenames):
        if is_ignored_by_catkin(filename):
            logger.debug('Ignoring because of CATKIN_IGNORE: %s' % filename)
            continue
        logger.info('adding document %s ' % friendly_path(filename))

        docname, _ = os.path.splitext(os.path.basename(filename))

        contents = open(filename).read()
        contents_hash = get_md5(contents)[:8]
        # because of hash job will be automatically erased if the source changes
        out_part_basename = '%03d-%s-%s' % (i, docname, contents_hash)
        job_id = '%s-%s-%s' % (docname, get_md5(filename)[:8], contents_hash)

        source_info = get_source_info(filename)

        # find the dir
        for d in src_dirs:
            if os.path.realpath(d) in filename:
                break
        else:
            msg = 'Could not find dir for %s in %s' % (filename, src_dirs)

        html_contents = context.comp(render_book,
                                     generate_pdf=generate_pdf,
                                     src_dirs=src_dirs,
                                     data=contents,
                                     realpath=filename,
                                     use_mathjax=use_mathjax,
                                     symbols=symbols,
                                     raise_errors=raise_errors,
                                     main_file=output_file,
                                     out_part_basename=out_part_basename,
                                     filter_soup=filter_soup,
                                     extra_css=extra_css,
                                     job_id=job_id)

        doc = DocToJoin(docname=out_part_basename,
                        contents=html_contents,
                        source_info=source_info)
        files_contents.append(tuple(doc))  # compmake doesn't do namedtuples

    bib_files = get_bib_files(src_dirs)

    logger.debug('Found bib files:\n%s' % "\n".join(bib_files))
    if bib_files:
        bib_contents = job_bib_contents(context, bib_files)
        entry = DocToJoin(docname='bibtex',
                          contents=bib_contents,
                          source_info=None)
        files_contents.append(tuple(entry))

    if do_last_modified:
        data = context.comp(make_last_modified, files_contents=files_contents)
        entry = DocToJoin(docname='last_modified',
                          contents=data,
                          source_info=None)
        files_contents.append(tuple(entry))

    root_dir = src_dirs[0]

    template = get_main_template(root_dir)

    references = OrderedDict()
    #     base_url = 'http://book.duckietown.org/master/duckiebook/pdoc'
    #     for extra_dir in extra_dirs:
    #         res = read_references(extra_dir, base_url, prefix='python:')
    #         references.update(res)

    #     extra = look_for_files(extra_dirs, "*.html")
    #
    #     for filename in extra:
    #         contents = open(filename).read()
    #         docname = os.path.basename(filename) + '_' + get_md5(filename)[:5]
    #         c = (('unused', docname), contents)
    #         files_contents.append(c)

    d = context.comp(manual_join,
                     template=template,
                     files_contents=files_contents,
                     stylesheet=stylesheet,
                     remove=remove,
                     references=references,
                     resolve_references=resolve_references)

    context.comp(write, d, output_file)

    if os.path.exists(MCDPManualConstants.pdf_metadata_template):
        context.comp(generate_metadata, root_dir)
Пример #11
0
def manual_jobs(context,
                src_dirs,
                resources_dirs,
                out_split_dir,
                output_file,
                generate_pdf,
                stylesheet,
                stylesheet_pdf,
                use_mathjax,
                raise_errors,
                resolve_references=True,
                remove=None,
                filter_soup=None,
                symbols=None,
                out_pdf=None,
                only_refs=False,
                permalink_prefix=None,
                compose_config=None,
                output_crossref=None,
                do_last_modified=False,
                wordpress_integration=False,
                ignore_ref_errors=False,
                likebtn=None,
                extra_crossrefs=None):
    """
        src_dirs: list of sources
        symbols: a TeX preamble (or None)
    """
    #
    # if symbols is not None:
    #     symbols = open(symbols).read()
    if stylesheet_pdf is None:
        stylesheet_pdf = stylesheet
    # outdir = os.path.dirname(out_split_dir)  # XXX
    filenames = get_markdown_files(src_dirs)

    if not filenames:
        msg = "Could not find any file for composing the book."
        raise Exception(msg)

    files_contents = []
    for i, filename in enumerate(filenames):
        if is_ignored_by_catkin(filename):
            logger.debug('Ignoring because of CATKIN_IGNORE: %s' % filename)
            continue
        logger.info('adding document %s ' % friendly_path(filename))

        docname, _ = os.path.splitext(os.path.basename(filename))

        contents = open(filename).read()
        contents_hash = get_md5(contents)[:8]
        # because of hash job will be automatically erased if the source changes
        out_part_basename = '%03d-%s-%s' % (i, docname, contents_hash)
        job_id = '%s-%s-%s' % (docname, get_md5(filename)[:8], contents_hash)

        try:
            source_info = get_source_info(filename)
        except NoSourceInfo as e:
            logger.warn('No source info for %s:\n%s' % (filename, e))
            source_info = None

        for d in src_dirs:
            if filename.startswith(d):
                break
        else:
            msg = "Could not find dir for %s in %s" % (filename, src_dirs)
            raise Exception(msg)

        html_contents = context.comp(render_book,
                                     generate_pdf=generate_pdf,
                                     src_dirs=src_dirs + resources_dirs,
                                     data=contents,
                                     realpath=filename,
                                     use_mathjax=use_mathjax,
                                     symbols=symbols,
                                     raise_errors=raise_errors,
                                     filter_soup=filter_soup,
                                     ignore_ref_errors=ignore_ref_errors,
                                     job_id=job_id)

        doc = DocToJoin(docname=out_part_basename,
                        contents=html_contents,
                        source_info=source_info)
        files_contents.append(tuple(doc))  # compmake doesn't do namedtuples

    ignore = []
    if output_crossref:
        ignore.append(output_crossref)

    crossrefs_aug = get_cross_refs(resources_dirs,
                                   permalink_prefix,
                                   extra_crossrefs,
                                   ignore=ignore)

    bib_files = get_bib_files(src_dirs)

    logger.debug('Found bib files:\n%s' % "\n".join(bib_files))
    if bib_files:
        bib_contents_aug = job_bib_contents(context, bib_files)
        entry = DocToJoin(docname='bibtex',
                          contents=bib_contents_aug,
                          source_info=None)
        files_contents.append(tuple(entry))

    if do_last_modified:
        data_aug = context.comp(make_last_modified,
                                files_contents=files_contents)
        entry = DocToJoin(docname='last_modified',
                          contents=data_aug,
                          source_info=None)
        files_contents.append(tuple(entry))

    root_dir = src_dirs[0]

    template = get_main_template(root_dir, resources_dirs)

    references = OrderedDict()
    #     base_url = 'http://book.duckietown.org/master/duckiebook/pdoc'
    #     for extra_dir in extra_dirs:
    #         res = read_references(extra_dir, base_url, prefix='python:')
    #         references.update(res)

    #     extra = look_for_files(extra_dirs, "*.html")
    #
    #     for filename in extra:
    #         contents = open(filename).read()
    #         docname = os.path.basename(filename) + '_' + get_md5(filename)[:5]
    #         c = (('unused', docname), contents)
    #         files_contents.append(c)

    cs = get_md5((crossrefs_aug.get_result()))[:8]

    joined_aug = context.comp(manual_join,
                              template=template,
                              files_contents=files_contents,
                              stylesheet=None,
                              remove=remove,
                              references=references,
                              resolve_references=resolve_references,
                              crossrefs_aug=crossrefs_aug,
                              permalink_prefix=permalink_prefix,
                              job_id='join-%s' % cs)

    if compose_config is not None:
        try:
            data = yaml.load(open(compose_config).read())  # XXX
            compose_config_interpreted = ComposeConfig.from_yaml(data)
        except ValueError as e:
            msg = 'Cannot read YAML config file %s' % compose_config
            raise_wrapped(UserError, e, msg, compact=True)
        else:
            joined_aug = context.comp(make_composite,
                                      compose_config_interpreted, joined_aug)

    joined_aug = context.comp(mark_errors_and_rest, joined_aug)

    if likebtn:
        joined_aug = context.comp(add_likebtn, joined_aug, likebtn)

    if wordpress_integration:
        joined_aug = context.comp(add_related, joined_aug)

    if output_file is not None:
        context.comp(write, joined_aug, output_file)

    if out_split_dir is not None:

        joined_aug_with_html_stylesheet = context.comp(add_style, joined_aug,
                                                       stylesheet)

        extra_panel_content = context.comp(get_extra_content,
                                           joined_aug_with_html_stylesheet)
        id2filename_aug = context.comp_dynamic(
            create_split_jobs,
            data_aug=joined_aug_with_html_stylesheet,
            mathjax=True,
            preamble=symbols,
            extra_panel_content=extra_panel_content,
            output_dir=out_split_dir,
            nworkers=0,
            output_crossref=output_crossref,
            permalink_prefix=permalink_prefix,
            only_refs=only_refs)

        if not only_refs:
            context.comp(write_errors_and_warnings_files, id2filename_aug,
                         out_split_dir)
        context.comp(write_manifest_html, out_split_dir)

    if out_pdf is not None:
        joined_aug_with_pdf_stylesheet = context.comp(add_style, joined_aug,
                                                      stylesheet_pdf)
        prerendered = context.comp(prerender,
                                   joined_aug_with_pdf_stylesheet,
                                   symbols=symbols)
        pdf_data = context.comp(render_pdf, prerendered)
        context.comp(write_data_to_file, pdf_data, out_pdf)
        context.comp(write_manifest_pdf, out_pdf)