Пример #1
0
def add_a_target_blank(a):
    if a.has_attr('href'):
        if a['href'].startswith('http://') or a['href'].startswith('https://'):
            a['target'] = '_blank'
            status.log(NAME, (
                "... an outbound link, add target='_blank' tag"))
    return a
Пример #2
0
def add_a_target_blank(a):
    if a.has_attr('href'):
        if a['href'].startswith('http://') or a['href'].startswith('https://'):
            a['target'] = '_blank'
            status.log(NAME, (
                "... an outbound link, add target='_blank' tag"))
    return a
Пример #3
0
def make_tree(tree_segments):
    tree = os.path.join(*tree_segments)
    if not os.path.exists(tree):
        status.log(NAME, ('Making', tree))
        os.makedirs(tree)
    else:
        status.log(NAME, ('Tree', tree, 'already exists OK'))
    return tree
Пример #4
0
def make_tree(tree_segments):
    tree = os.path.join(*tree_segments)
    if not os.path.exists(tree):
        status.log(NAME,('Making', tree))
        os.makedirs(tree)
    else:
        status.log(NAME,('Tree', tree, 'already exists OK'))
    return tree
Пример #5
0
def add_a_class(a):
    _class = "link--impt"
    if a.has_attr('href'):
        if not a['href'].startswith('/static'):
            a['class'] = _class
            status.log(NAME, (
                "... add class '{}' to anchor"
            ).format(_class))
    return a
Пример #6
0
def add_a_class(a):
    _class = "link--impt"
    if a.has_attr('href'):
        if not a['href'].startswith('/static'):
            a['class'] = _class
            status.log(NAME, (
                "... add class '{}' to anchor"
            ).format(_class))
    return a
Пример #7
0
def overwrite_leaves(tree, leaves):
    for leaf in leaves:
        path_leaf = os.path.join(tree, leaf[1])
        status.log(NAME, ('Writing in', path_leaf))
        with open(path_leaf, 'wb') as f:
            if leaf[1].endswith('.json'):
                json.dump(leaf[0], f, indent=4)
                f.write("\n")
            else:
                f.write(str(leaf[0]))
    return
Пример #8
0
def overwrite_leaves(tree, leaves):
    for leaf in leaves:
        path_leaf = os.path.join(tree, leaf[1])
        status.log(NAME,('Writing in', path_leaf))
        with open(path_leaf, 'wb') as f:
            if leaf[1].endswith('.json'):
                json.dump(leaf[0], f, indent=4)
                f.write("\n")
            else:
                f.write(str(leaf[0]))
    return
Пример #9
0
def add_img_alt(img):
    _src = img['src']
    _url = os.path.split(os.path.split(_src)[0])[1]
    _filepath = os.path.basename(_src)
    _filename, _ = os.path.splitext(_filepath)
    _id = _filename.replace('image', '')
    _alt = _url.replace('-', ' ').capitalize() + ' ' + _id
    img['alt'] = _alt
    status.log(NAME, (
        "... img, add alt='{}'").format(_alt))
    return img
Пример #10
0
def add_img_alt(img):
    _src = img['src']
    _url = os.path.split(os.path.split(_src)[0])[1]
    _filepath = os.path.basename(_src)
    _filename, _ = os.path.splitext(_filepath)
    _id = _filename.replace('image', '')
    _alt = _url.replace('-', ' ').capitalize() + ' ' + _id
    img['alt'] = _alt
    status.log(NAME, (
        "... img, add alt='{}'").format(_alt))
    return img
Пример #11
0
def translate_script_src(soup, dir_url, translate_static):
    Script = soup.findAll('script')
    for script in Script:
        if not script.has_attr('src'):
            continue
        for src_head in translate_static.keys():
            if script['src'].startswith(src_head):
                status.log(NAME,('src (static) to translate found: ', script['src']))
                new = get_new(script['src'],translate_static[src_head],dir_url)
                script['src'] = script['src'].replace(src_head,new)
                status.log(NAME,('... translated to (but will get stripped): ', script['src']))
                break
    return soup
Пример #12
0
def translate_link_href(soup, dir_url, translate_static):
    Link = soup.findAll('link')
    for link in Link:
        if not link.has_attr('href'):
            continue
        for href_head in translate_static.keys():
            if link['href'].startswith(href_head):
                status.log(NAME,('href (static) to translate found: ', link['href']))
                new = get_new(link['href'],translate_static[href_head],dir_url)
                link['href'] = link['href'].replace(href_head,new)
                status.log(NAME,('... translated to: ', link['href']))
                break
    return soup
Пример #13
0
def strip(body):
    attrs_to_rm = ["class", "id", "name", "style"]
    for attr in attrs_to_rm:
        del body[attr]
    for tag in body():
        for attr in attrs_to_rm:
            del tag[attr]
    status.log(NAME, (
        "Striping attributes", ' | '.join(attrs_to_rm), 'inside body !')
    )
    Script = body.findAll('script')
    for script in Script:
        script.extract()
    status.log(NAME, "Striping all <script> inside body !")
    return body
Пример #14
0
def translate_link_href(soup, dir_url, translate_static):
    Link = soup.findAll('link')
    for link in Link:
        if not link.has_attr('href'):
            continue
        for href_head in translate_static.keys():
            if link['href'].startswith(href_head):
                status.log(NAME, (
                    'href (static) to translate found: ', link['href']))
                new = get_new(link['href'],
                              translate_static[href_head], dir_url)
                link['href'] = link['href'].replace(href_head, new)
                status.log(NAME, (
                    '... translated to: ', link['href']))
                break
    return soup
Пример #15
0
def strip(body):
    attrs_to_rm = ["class", "id", "name", "style",
                   "colspan", "rowspan", "cellpadding", "cellspacing"]
    for attr in attrs_to_rm:
        del body[attr]
    for tag in body():
        for attr in attrs_to_rm:
            del tag[attr]
    status.log(NAME, (
        "Striping attributes", ' | '.join(attrs_to_rm), 'inside body !')
    )
    Script = body.findAll('script')
    for script in Script:
        script.extract()
    status.log(NAME, "Striping all <script> inside body !")
    return body
Пример #16
0
def check_tree_config(tree, config, flags):
    try:
        path_config = os.path.join(tree,'config.json')
        with open(path_config) as f:
            config_old = json.load(f)
        if config_old != config:
            config = config_old
            status.log(NAME,(
                "Not overwriting `{}`, as modifications were found"
            ).format(path_config))
        flags = ['show-config']
        if not config['tags']['title']: flags += ['no-title'] 
        if not config['tags']['meta_description']: flags += ['no-meta_description'] 
        if not config['tutorial_name']: flags += ['no-tutorial_name'] 
    except:
        pass
    return config, flags
Пример #17
0
def translate_script_src(soup, dir_url, translate_static):
    Script = soup.findAll('script')
    for script in Script:
        if not script.has_attr('src'):
            continue
        for src_head in translate_static.keys():
            if script['src'].startswith(src_head):
                status.log(NAME, (
                    'src (static) to translate found: ', script['src']))
                new = get_new(script['src'],
                              translate_static[src_head], dir_url)
                script['src'] = script['src'].replace(src_head, new)
                status.log(NAME, (
                    '... translated to (but will get stripped): ',
                    script['src']))
                break
    return soup
Пример #18
0
def translate_img_src(soup, path_html, dir_url, translate_static):
    folder_html = os.path.split(path_html)[0]
    Img = soup.findAll('img')
    paths_image = []
    for img in Img:
        if not img.has_attr('src'):
            continue
        for src_head in translate_static.keys():
            if img['src'].startswith(src_head):  # TODO add support for no folder
                status.log(NAME,('src (static) to translate found: ', img['src']))
                paths_image.append(os.path.join(folder_html,img['src']))
                new = get_new(img['src'],translate_static[src_head],dir_url)
                img['src'] = img['src'].replace(src_head,new)
                status.log(NAME,('... translated to: ', img['src']))
                img = add_img_alt(img)
                break
    return soup, paths_image
Пример #19
0
def get_display_latex_content(body):
    latex_starts = ("$$", "\\begin{equation}")
    latex_ends = ("$$", "\\end{equation}")
    Span = body.findAll('span')
    for span in Span:
        try:
            # Loop through possible latex starts
            for latex_start in latex_starts:
                if latex_start in span.contents[0]:
                    # Init. latex content and trackers
                    in_latex = True
                    in_latex_content = span.contents
                    in_latex_tags = [span]
                    status.log(NAME, (
                        "<span> containing latex (i.e. {start}) found:"
                    ).format(start=latex_start))
                    # Get next tag, TODO generalize?
                    _next = span.findNext(('p', 'span'))
                    while in_latex:
                        if _next.name != 'span':
                            # If not span, find next span
                            in_latex_tags += [_next]
                            _next = _next.findChild('span')
                            continue
                        # Add content to leading span
                        in_latex_content += _next.contents
                        status.log(NAME, (
                            '... more in-line latex content:',
                            ' '.join((_next.contents)).replace('\n', '')
                        ))
                        for latex_end in latex_ends:
                            # Check if latex end is reached
                            if latex_end in _next.contents[0]:
                                in_latex = False
                        # Add in latex tag, for tracking
                        in_latex_tags += [_next]
                        _next = _next.findNext(('p', 'span'))
                    # Delete in latex tags
                    for in_latex_tag in in_latex_tags[1:]:
                        in_latex_tag.extract()
                    # TODO wrap in latex content in "$$" ?
        except (IndexError, TypeError):  # TODO generalize?
            pass
    return body
Пример #20
0
def get_display_latex_content(body):
    latex_starts = ("$$", "\\begin{equation}")
    latex_ends = ("$$", "\\end{equation}")
    Span = body.findAll('span')
    for span in Span:
        try:
            # Loop through possible latex starts
            for latex_start in latex_starts:
                if latex_start in span.contents[0]:
                    # Init. latex content and trackers
                    in_latex = True
                    in_latex_content = span.contents
                    in_latex_tags = [span]
                    status.log(NAME, (
                        "<span> containing latex (i.e. {start}) found:"
                    ).format(start=latex_start))
                    # Get next tag, TODO generalize?
                    _next = span.findNext(('p', 'span'))
                    while in_latex:
                        if _next.name != 'span':
                            # If not span, find next span
                            in_latex_tags += [_next]
                            _next = _next.findChild('span')
                            continue
                        # Add content to leading span
                        in_latex_content += _next.contents
                        status.log(NAME, (
                            '... more in-line latex content:',
                            ' '.join((_next.contents)).replace('\n', '')
                        ))
                        for latex_end in latex_ends:
                            # Check if latex end is reached
                            if latex_end in _next.contents[0]:
                                in_latex = False
                        # Add in latex tag, for tracking
                        in_latex_tags += [_next]
                        _next = _next.findNext(('p', 'span'))
                    # Delete in latex tags
                    for in_latex_tag in in_latex_tags[1:]:
                        in_latex_tag.extract()
                    # TODO wrap in latex content in "$$" ?
        except (IndexError, TypeError):  # TODO generalize?
            pass
    return body
Пример #21
0
def translate_img_src(soup, path_html, dir_url, translate_static):
    folder_html = os.path.split(path_html)[0]
    Img = soup.findAll('img')
    paths_image = []
    for img in Img:
        if not img.has_attr('src'):
            continue
        for src_head in translate_static.keys():
            if img['src'].startswith(src_head):
                status.log(NAME, (  # TODO add support for no folder
                    'src (static) to translate found: ',
                    img['src']))
                paths_image.append(os.path.join(folder_html, img['src']))
                new = get_new(img['src'], translate_static[src_head], dir_url)
                img['src'] = img['src'].replace(src_head, new)
                status.log(NAME, (
                    '... translated to: ', img['src']))
                img = add_img_alt(img)
                break
    return soup, paths_image
Пример #22
0
def check_tree_config(tree, config, flags):
    try:
        path_config = os.path.join(tree, 'config.json')
        with open(path_config) as f:
            config_old = json.load(f)
        if config_old != config:
            config = config_old
            status.log(NAME, ("Not overwriting `{}`, as modifications "
                              "from default were found").format(path_config))
        flags = ['show-config']
        if not config['tags']['title']:
            flags += ['no-title']
        if not config['tags']['meta_description']:
            flags += ['no-meta_description']
        if not config['tutorial_name']:
            flags += ['no-tutorial_name']
        if config['banner_image'] == "":
            flags += ['no-banner_image']
    except:
        pass
    return config, flags
Пример #23
0
def add_header_anchors(body):
    H_str = ['h1', 'h2', 'h3', 'h4']
    h_class = "heading alpha push--ends text--center"
    a_class = "link--impt"
    for h_str in H_str:
        H = body.findAll(h_str)
        insert_tag = 'a'
        for h in H:
            text = h.getText(strip=True, separator=u' ')
            status.log(NAME, (
                'Header found! text:', text.encode('utf8')))
            # If <h{}>  is empty, remove it
            if not text:
                h.extract()
                status.log(NAME, ('... is empty, removing it!'))
                continue
            # Add id attr to <h{}>
            _id = text.replace(' ', '-').lower()
            h['id'] = _id
            status.log(NAME, (
                "... add id: '{}'"
            ).format(_id.encode('utf8')))
            # Add <a href= > around text
            # -> add class to <a>
            a_href = '#' + _id
            insert_attrs = {'href': a_href, 'class': a_class}
            inserter(h, insert_tag, insert_attrs, text)
            status.log(NAME, (
                "... insert <a href='{}' class='{}'>"
            ).format(a_href.encode('utf8'), a_class.encode('utf8')))
            # -> add class to <h{}>
            h['class'] = h_class
            status.log(NAME, (
                "... add class '{}' to header"
            ).format(h_class))
    return body
Пример #24
0
def add_header_anchors(body):
    H_str = ['h1', 'h2', 'h3', 'h4']
    h_class = "heading alpha push--ends text--center"
    a_class = "link--impt"
    for h_str in H_str:
        H = body.findAll(h_str)
        insert_tag = 'a'
        for h in H:
            text = h.getText(strip=True, separator=u' ')
            status.log(NAME, (
                'Header found! text:', text.encode('utf8')))
            # If <h{}>  is empty, remove it
            if not text:
                h.extract()
                status.log(NAME, ('... is empty, removing it!'))
                continue
            # Add id attr to <h{}>
            _id = text.replace(' ', '-').lower()
            h['id'] = _id
            status.log(NAME, (
                "... add id: '{}'"
            ).format(_id.encode('utf8')))
            # Add <a href= > around text
            # -> add class to <a>
            a_href = '#' + _id
            insert_attrs = {'href': a_href, 'class': a_class}
            inserter(h, insert_tag, insert_attrs, text)
            status.log(NAME, (
                "... insert <a href='{}' class='{}'>"
            ).format(a_href.encode('utf8'), a_class.encode('utf8')))
            # -> add class to <h{}>
            h['class'] = h_class
            status.log(NAME, (
                "... add class '{}' to header"
            ).format(h_class))
    return body
Пример #25
0
def add_lightbox(body):
    Img = body.findAll('img')
    wrap_tag = 'a'
    for img in Img:
        status.log(NAME, ('Image found! src:', img['src']))
        # If not <a> around <img />, add lightbox !
        if not img.findParent('a'):
            wrap_attrs = {
                'href': img['src'],
                'data-lightbox': os.path.splitext(os.path.basename(img['src']))[0]
            }
            wrap(img, wrap_tag, wrap_attrs)
            status.log(NAME, ('... wrap with lightbox <a>'))
        else:
            status.log(NAME, ("... <a> found around it, doing nothing"))
    return body
Пример #26
0
def add_lightbox(body):
    Img = body.findAll('img')
    wrap_tag = 'a'
    for img in Img:
        status.log(NAME, ('Image found! src:', img['src']))
        # If not <a> around <img />, add lightbox !
        if not img.findParent('a'):  # TODO maybe only lightbox <a>
            src = img['src']
            data = os.path.splitext(os.path.basename(img['src']))[0]
            wrap_attrs = {
                'href': src,
                'data-lightbox': data
            }
            wrap(img, wrap_tag, wrap_attrs)
            status.log(NAME, ('... wrap with lightbox <a>'))
        else:
            status.log(NAME, ("... <a> found around it, doing nothing"))
    return body
Пример #27
0
def format_paragraphs(body):
    P = body.findAll('p')
    p_class = 'push-half--ends'
    for p in P:
        status.log(NAME, ('Paragraph found!'))
        # get new inner contents, strip old and insert new
        inner_contents = get_inner_contents(p.contents,
                                            tag_ignore='span',
                                            string_ignore=u'\n')
        strip_contents(p)
        insert_inner_contents(p, inner_contents)
        status.log(NAME, (
            "... formatting it"
        ).format(p_class))
        # -> add class to <p>
        p['class'] = p_class
        status.log(NAME, (
            "... add class '{}'"
        ).format(p_class))
    return body
Пример #28
0
def format_paragraphs(body):
    P = body.findAll('p')
    p_class = 'push-half--ends'
    for p in P:
        status.log(NAME, ('Paragraph found!'))
        # get new inner contents, strip old and insert new
        inner_contents = get_inner_contents(p.contents,
                                            tag_ignore='span',
                                            string_ignore=u'\n')
        strip_contents(p)
        insert_inner_contents(p, inner_contents)
        status.log(NAME, (
            "... formatting it"
        ).format(p_class))
        # -> add class to <p>
        p['class'] = p_class
        status.log(NAME, (
            "... add class '{}'"
        ).format(p_class))
    return body
Пример #29
0
def check_redirects(folder, translate_redirects):
    paths_subdirs = [
        os.path.join(folder, 'published', 'includes'),
        os.path.join(folder, 'published', 'static', 'images')
    ]
    for path_subdirs in paths_subdirs:
        for new, olds in translate_redirects.items():
            path_subdir_new = os.path.join(path_subdirs, new) + '/'
            path_subdir_old = os.path.join(path_subdirs, olds[-1]) + '/'
            if (os.path.isdir(path_subdir_old)
                    and not os.path.isdir(path_subdir_new)):
                status.log(NAME, ('Making', path_subdir_new))
                os.makedirs(path_subdir_new)
                for item in os.listdir(path_subdir_old):
                    path_item = os.path.join(path_subdir_old, item)
                    shutil.copy(path_item, path_subdir_new)
                    status.log(NAME, ('Copying {} to {}').format(
                        path_item, path_subdir_new))
                shutil.rmtree(path_subdir_old)
                status.log(NAME,
                           ('Removing directory {}').format(path_subdir_old))
    return
Пример #30
0
def check_redirects(folder, translate_redirects):
    paths_subdirs = [
        os.path.join(folder,'published','includes'),
        os.path.join(folder,'published','static','images')
    ]
    for path_subdirs in paths_subdirs:
        for new, olds in translate_redirects.items():
            path_subdir_new = os.path.join(path_subdirs, new) + '/'
            path_subdir_old = os.path.join(path_subdirs, olds[-1]) + '/'
            if (os.path.isdir(path_subdir_old) and 
                not os.path.isdir(path_subdir_new)):
                status.log(NAME,('Making', path_subdir_new))
                os.makedirs(path_subdir_new)
                for item in os.listdir(path_subdir_old):
                    path_item = os.path.join(path_subdir_old, item)
                    shutil.copy(path_item, path_subdir_new)
                    status.log(NAME,(
                        'Copying {} to {}'
                    ).format(path_item, path_subdir_new))
                shutil.rmtree(path_subdir_old)
                status.log(NAME,(
                    'Removing directory {}'
                ).format(path_subdir_old))
    return
Пример #31
0
def overwrite_redirects(folder, redirects_py):
    f_redirects = "{}/published/redirects.py".format(folder)
    with open(f_redirects, "w") as f:
        status.log(NAME, ('Writes in', f_redirects))
        f.write(redirects_py)
    return
Пример #32
0
def print_flags(flags, config, path_html, tree):
    for flag in flags:
        if flag == 'show-config':
            status.log(NAME,
                       ("{}/config.json ['tutorial_name']:\n\t'{}'").format(
                           tree, config['tutorial_name']))
            status.log(NAME,
                       ("{}/config.json ['banner_image']:\n\t'{}'").format(
                           tree, config['banner_image']))
            status.log(NAME,
                       ("{}/config.json ['tags']['title']:\n\t'{}'").format(
                           tree, config['tags']['title']))
            status.log(NAME, (
                "{}/config.json ['tags']['meta_description']:\n\t'{}'").format(
                    tree, config['tags']['meta_description']))
        elif flag == 'no-title':
            status.important(NAME, ("There is no <title>\nin `{}`.\n"
                                    "Please fill in\n`{}/config.json`").format(
                                        path_html, tree))
        elif flag == 'multiple-title':
            status.important(
                NAME, ("There is more than one <title>\nin `{}`.\n"
                       "Picking the last one for\n`{}/config.json`").format(
                           path_html, tree))
            status.log(NAME, ('With last <title> tag, set meta'
                              'title to "{}"').format(config['tags']['title']))
        elif flag == 'no-meta_description':
            status.important(
                NAME,
                ("There is more than one <meta name='description'> in\n`{}`.\n"
                 "Please fill in\n`{}/config.json`").format(path_html, tree))
        elif flag == 'multiple-meta_descriptions':
            status.important(
                NAME,
                ("There is more than one <meta name='description'> in\n`{}`.\n"
                 "Picking the last one for\n`{}/config.json`").format(
                     path_html, tree))
            status.log(NAME, ('With last <meta name="description"> tag, '
                              'set meta description to "{}"').format(
                                  config['tags']['meta_description']))
        elif flag == 'no-tutorial_name':
            status.important(
                NAME, ("Please fill 'tutorial_name' in\n`{}/config.json`"
                       ).format(tree))
        elif flag == 'no-banner_image':
            status.important(
                NAME,
                ("Please fill 'banner_image' in\n`{tree}/config.json`:\n"
                 "- For an iframe: set 'banner_image' to the url\n"
                 "- For a static image: set 'banner_image' "
                 "to the image file name\n"
                 "      AND copy the image to:\n"
                 "      ``{tree_image}``/\n"
                 "- For no banner image, set 'banner_image' to false").format(
                     tree=tree,
                     tree_image=tree.replace('includes', 'static/images')))
        elif flag == 'missing-banner_image':
            status.important(
                NAME, ("The static banner image linked to 'banner_image'  "
                       "({image}) in\n  "
                       "`{tree}/config.json`\n  "
                       "is not found in\n  "
                       "`{tree_image}`/\n  "
                       "Please copy it over.").format(
                           image=config['banner_image'],
                           tree=tree,
                           tree_image=tree.replace('includes',
                                                   'static/images')))
        else:
            status.log(NAME,
                       ('With <title> tag, set meta title to:\n\t"{}"').format(
                           config['tags']['title']))
            status.log(NAME, ('With <meta name="description"> tag, '
                              'set meta description to:\n\t"{}"').format(
                                  config['tags']['meta_description']))
    return
Пример #33
0
def print_flags(flags, config, path_html, tree):
    for flag in flags:
        if flag == 'show-config':
            status.log(NAME, (
                "{}/config.json ['tutorial_name']:\n\t'{}'"
            ).format(tree, config['tutorial_name']))
            status.log(NAME, (
                "{}/config.json ['banner_image']:\n\t'{}'"
            ).format(tree, config['banner_image']))
            status.log(NAME, (
                "{}/config.json ['tags']['title']:\n\t'{}'"
            ).format(tree, config['tags']['title']))
            status.log(NAME, (
                "{}/config.json ['tags']['meta_description']:\n\t'{}'"
            ).format(tree, config['tags']['meta_description']))
        elif flag == 'no-title':
            status.important(NAME, (
                "There is no <title>\nin `{}`.\n"
                "Please fill in\n`{}/config.json`"
                ).format(path_html, tree))
        elif flag == 'multiple-title':
            status.important(NAME, (
                "There is more than one <title>\nin `{}`.\n"
                "Picking the last one for\n`{}/config.json`"
            ).format(path_html, tree))
            status.log(NAME, (
                'With last <title> tag, set meta'
                'title to "{}"'
            ).format(config['tags']['title']))
        elif flag == 'no-meta_description':
            status.important(NAME, (
                "There is more than one <meta name='description'> in\n`{}`.\n"
                "Please fill in\n`{}/config.json`"
            ).format(path_html, tree))
        elif flag == 'multiple-meta_descriptions':
            status.important(NAME, (
                "There is more than one <meta name='description'> in\n`{}`.\n"
                "Picking the last one for\n`{}/config.json`"
            ).format(path_html, tree))
            status.log(NAME, (
                'With last <meta name="description"> tag, '
                'set meta description to "{}"'
            ).format(config['tags']['meta_description']))
        elif flag == 'no-tutorial_name':
            status.important(NAME, (
                "Please fill 'tutorial_name' in\n`{}/config.json`"
            ).format(tree))
        elif flag == 'no-banner_image':
            status.important(NAME, (
                "Please fill 'banner_image' in\n`{tree}/config.json`:\n"
                "- For an iframe: set 'banner_image' to the url\n"
                "- For a static image: set 'banner_image' "
                "to the image file name\n"
                "      AND copy the image to:\n"
                "      ``{tree_image}``/\n"
                "- For no banner image, set 'banner_image' to false"
            ).format(tree=tree,
                     tree_image=tree.replace('includes', 'static/images')))
        elif flag == 'missing-banner_image':
            status.important(NAME, (
                "The static banner image linked to 'banner_image'  "
                "({image}) in\n  "
                "`{tree}/config.json`\n  "
                "is not found in\n  "
                "`{tree_image}`/\n  "
                "Please copy it over."
            ).format(image=config['banner_image'], tree=tree,
                     tree_image=tree.replace('includes', 'static/images')))
        else:
            status.log(NAME, (
                'With <title> tag, set meta title to:\n\t"{}"'
            ).format(config['tags']['title']))
            status.log(NAME, (
                'With <meta name="description"> tag, '
                'set meta description to:\n\t"{}"'
            ).format(config['tags']['meta_description']))
    return
def run_analysis(args):
	img = args[0]           #Image to be analyzed
	batch_path = args[1]    #Used for save location

	batch_log = open(os.path.join(batch_path, 'batch_log.txt'), 'a+')
	detected_log = open(os.path.join(batch_path, 'detected_log.txt'), 'a+')
	other_log = open(os.path.join(batch_path, 'other_log.txt'), 'a+')

	#Accepted extensions
	ext = os.path.splitext(img)[-1]

	#Image name to save the heatmap under
	img_name = ".".join(os.path.split(img)[1].split(".")[:-1])

	#Handles video input
	if ext.lower() == ".mp4":
		#Create and start the timer
		t = timer.Timer()
		t.start()

		#Analyze the video
		rtn_name = analyzeVideo( (img, batch_path, 30) )

		#Stop the timer
		t.stop()

		#Log the results
		rtn_str =  rtn_name, t.get_time(), 0.0
		status('-v-', rtn_str )
		log(detected_log, '-v-', rtn_str )

	else:

		#Call the algorithms
		final_scores, final_time, final_stats, unused_images = algorithms(img)

		#--------------------------------------------------------------------------------------------------------
		#--------------------------------------------------------------------------------------------------------

		#Apply colormap to the combined heatmap if it is not already a color image
		if len(final_scores.shape) < 3:
			final_image = cv.applyColorMap( final_scores.astype(np.uint8), cv.COLORMAP_JET )		#TODO: Compare with COLORMAP_RAINBOW (First Responders stated that they tend to use it instead of Jet)
		else:
			final_image = final_scores
		

		#--------------------------------------------------------------------------------------------------------
		#--------------------------------------------------------------------------------------------------------

		#Save heatmap in the correct folder
		if np.max(final_scores) >= 50:		#TODO: Re-Evaluate whether this is still a valid requirement
			results_str = [ img_name, final_time, final_stats ]
			status('-d-', results_str)
			log(batch_log, '-d-', results_str)
			log(detected_log, '-d-', results_str)
			#cv.imwrite(os.path.join( detected_folder, img_name + ".jpg"), final_image)
			cv.imwrite(os.path.join( batch_path, "Analyzed", img_name + ".jpg"), final_image)        

		else:
			results_str = [ img_name, final_time, final_stats ]
			status( '-o-', results_str)
			log(batch_log, '-o-', results_str)
			log(other_log, '-o-', results_str)
			#cv.imwrite(os.path.join( other_folder, img_name + ".jpg"), final_image)
			cv.imwrite(os.path.join(  batch_path, "Other Analyzed", img_name + ".jpg"), final_image)      
			
		#Save any resulting images from the algorithms that couldn't be used to produce the heatmap
#		ct = 1
#		for u_img in unused_images:
#			results_str = [ img_name, final_time, "Modified Original" ]
#			status( '-i-', "An unused image was detected. Image saved to 'Modified Original' folder.")
#			status( '-m-', results_str)
#			log(batch_log, '-m-', results_str)
#			cv.imwrite(os.path.join(  batch_path, "Modified Original", img_name + ".jpg"), u_img)    
#			ct += 1

	batch_log.close()
	detected_log.close()
	other_log.close()
	return
Пример #35
0
def add_img_alt(img):
    src = img['src']
    alt = os.path.split(os.path.split(src)[0])[1] + '/' + os.path.basename(src)
    img['alt'] = alt
    status.log(NAME,("... img, add alt='{}'").format(alt))
    return img
Пример #36
0
def get_body_head(soup):
    status.log(NAME, 'Grabs <body> and <head>')
    return soup.body, soup.head
Пример #37
0
def overwrite_urls(folder, urls_py):
    f_urls = "{}/published/urls.py".format(folder)
    with open(f_urls, "w") as f:
        status.log(NAME, ('Writes in', f_urls))
        f.write(urls_py)
    return
Пример #38
0
def overwrite_sitemaps(folder, sitemaps_py):
    f_urls = "{}/published/sitemaps.py".format(folder)
    with open(f_urls, "w") as f:
        f.write(sitemaps_py)
        status.log(NAME, ('Writes in', f_urls))
    return
def run_analysis(args):
    img = args[0]  #Image to be analyzed
    batch_path = args[1]  #Used for save location

    batch_log = open(os.path.join(batch_path, 'batch_log.txt'), 'a+')
    detected_log = open(os.path.join(batch_path, 'detected_log.txt'), 'a+')
    other_log = open(os.path.join(batch_path, 'other_log.txt'), 'a+')

    #Accepted extensions
    ext = os.path.splitext(img)[-1]

    #Image name to save the heatmap under
    img_name = ".".join(os.path.split(img)[1].split(".")[:-1])

    #Handles video input
    if ext.lower() == ".mp4":
        #Create and start the timer
        t = timer.Timer()
        t.start()

        #Analyze the video
        rtn_name = analyzeVideo((img, batch_path, 30))

        #Stop the timer
        t.stop()

        #Log the results
        rtn_str = rtn_name, t.get_time(), 0.0
        status('-v-', rtn_str)
        log(detected_log, '-v-', rtn_str)

    else:

        #Call the algorithms
        final_scores, final_time, final_stats = algorithms(img)

        #--------------------------------------------------------------------------------------------------------
        #--------------------------------------------------------------------------------------------------------

        #Apply colormap to the combined heatmap
        final_heatmap = cv.applyColorMap(final_scores.astype(np.uint8),
                                         cv.COLORMAP_JET)

        #--------------------------------------------------------------------------------------------------------
        #--------------------------------------------------------------------------------------------------------

        #Save heatmap in the correct folder
        if np.max(final_scores) >= 50:
            results_str = [img_name, final_time, final_stats]
            status('-d-', results_str)
            log(batch_log, '-d-', results_str)
            log(detected_log, '-d-', results_str)
            #cv.imwrite(os.path.join( detected_folder, img_name + ".jpg"), final_heatmap)
            cv.imwrite(os.path.join(batch_path, "Detected", img_name + ".jpg"),
                       final_heatmap)

        else:
            results_str = [img_name, final_time, final_stats]
            status('-o-', results_str)
            log(batch_log, '-o-', results_str)
            log(other_log, '-o-', results_str)
            #cv.imwrite(os.path.join( other_folder, img_name + ".jpg"), final_heatmap)
            cv.imwrite(os.path.join(batch_path, "Other", img_name + ".jpg"),
                       final_heatmap)

    batch_log.close()
    detected_log.close()
    other_log.close()
    return
Пример #40
0
def copy_leaves(tree, paths_leaf):
    status.log(NAME,('Copying leaves to', tree))
    for path_leaf in paths_leaf:
        shutil.copy(path_leaf, tree)
    return
Пример #41
0
def overwrite_urls(folder, urls_py):
    f_urls = "{}/published/urls.py".format(folder)
    with open(f_urls, "w") as f:
        status.log(NAME, ('Writes in', f_urls))
        f.write(urls_py)
    return
Пример #42
0
def get_soup(path_html):
    with open(path_html, "r") as f:
        status.log(NAME,("Opening", path_html))
        return BeautifulSoup(f)
Пример #43
0
def get_soup(path_html):
    with open(path_html, "r") as f:
        status.log(NAME, ("Opening", path_html))
        return BeautifulSoup(f)
Пример #44
0
def get_body_head(soup):
    status.log(NAME,'Grabs <body> and <head>')
    return soup.body, soup.head
Пример #45
0
def translate_a_href(soup, dir_url,
                     translate_static, translate_filename_url):
    A = soup.findAll('a')
    for a in A:
        is_translated = False  # to log relevant output
        # Clean up case
        if not a.getText(strip=True) and not a.findChildren():
            a.extract()
            status.log(NAME, (
                'Anchor with nothing in it found, removing it!'))
            continue
        if not a.has_attr('href'):
            a.extract()
            status.log(NAME, (
                'Anchor without href found, removing it!!'))
            continue
        # Now if 'real' anchor found
        status.log(NAME, ('Anchor found! href: ', a['href']))
        # Case 1: <a> to static location (translated from streambed)
        for href_head in translate_static.keys():
            if a['href'].startswith(href_head):
                status.log(NAME, (
                    '... href has a *static* start: ', href_head))
                new = get_new(a['href'], translate_static[href_head], dir_url)
                a['href'] = a['href'].replace(href_head, new)
                is_translated = True
                break
        # Case *: handle Google redirects
        google_starts = ('https://www.google.com/url?q=',
                         'http://www.google.com/url?q=')
        google_end = '&'  # TODO could this be more strict?
        for google_start in google_starts:
            if a['href'].startswith(google_start):
                status.log(NAME, ('... href has a google redirect'))
                _s = a['href'].find(google_start) + len(google_start)
                _e = a['href'].find(google_end)
                a['href'] = (
                    a['href'][_s:_e].replace('%3A', ':')
                                    .replace('%2F', '/')
                )
        # Case 2: <a> to url location (translated to relative domain)
        href_starts = ['https://plot.ly/', 'plot.ly/', 'http://plot.ly/', '/']
        for href_start in href_starts:
            if a['href'].startswith(href_start):
                # 2.1 href to shareplot should have full URI
                if a['href'].startswith(href_start+'~'):
                    status.log(NAME, (
                        '... href links to shareplot:', a['href']))
                    status.log(NAME, (
                        '... guessing this is referring to a plot on prod'))
                    a['href'] = a['href'].replace(
                        href_start, 'https://plot.ly/', 1)
                    is_translated = True
                    continue
                # 2.2 Translate href start to django root
                if not a['href'].startswith('/'):
                    status.log(NAME, ('... href *url* start: ', href_start))
                    a['href'] = a['href'].replace(href_start, '/', 1)
                    is_translated = True
                # 2.3 Translate href to other docs using translate_filename_url
                for href_tail in translate_filename_url.keys():
                    if href_tail in a['href']:
                        status.log(NAME, ('... href has tail: ', a['href']))
                        a['href'] = a['href'].replace(
                            href_head, translate_static[href_tail])
                        is_translated = True
                        break

        # Log output
        if is_translated:
            status.log(NAME, ('... translated to: ', a['href']))
        else:
            status.log(NAME, ('... no translation required'))
        # Add attributes
        a = add_a_class(a)
        a = add_a_target_blank(a)
    return soup
Пример #46
0
def print_flags(flags, config, path_html, tree):
    for flag in flags:
        if flag=='show-config':
            status.log(NAME,(
                "{}/config.json ['tutorial_name']:\n\t'{}'"
            ).format(tree,config['tutorial_name']))
            status.log(NAME,(
                "{}/config.json ['tags']['title']:\n\t'{}'"
            ).format(tree,config['tags']['title']))
            status.log(NAME,(
                "{}/config.json ['tags']['meta_description']:\n\t'{}'"
            ).format(tree,config['tags']['meta_description']))
        elif flag=='no-title': 
            status.important(NAME,(
                "There is no <title>\nin `{}`.\n"
                "Please fill in\n`{}/config.json`"
                ).format(path_html,tree))
        elif flag=='multiple-title':
            status.important(NAME,(
                "There is more than one <title>\nin `{}`.\n"
                "Picking the last one for\n`{}/config.json`"
            ).format(path_html,tree))
            status.log(NAME,(
                'With last <title> tag, set meta'
                'title to "{}"'
           ).format(config['tags']['title']))
        elif flag=='no-meta_description':
            status.important(NAME,(
                "There is more than one <meta name='description'> in\n`{}`.\n"
                "Please fill in\n`{}/config.json`"
            ).format(path_html,tree))
        elif flag=='multiple-meta_descriptions':
            status.important(NAME,(
                "There is more than one <meta name='description'> in\n`{}`.\n"
                "Picking the last one for\n`{}/config.json`"
            ).format(path_html,tree))
            status.log(NAME,(
                'With last <meta name="description"> tag, '
                'set meta description to "{}"'
            ).format(config['tags']['meta_description']))
        elif flag=='no-tutorial_name':
            status.important(NAME,(
                "Please fill 'tutorial_name' in\n`{}/config.json`"
            ).format(tree))
        else:
            status.log(NAME,(
                'With <title> tag, set meta title to:\n\t"{}"'
            ).format(config['tags']['title']))
            status.log(NAME,(
                'With <meta name="description"> tag, set meta description to:\n\t"{}"'
           ).format(config['tags']['meta_description']))
    return
Пример #47
0
def translate_a_href(soup, dir_url,
                     translate_static, translate_filename_url):
    A = soup.findAll('a')
    for a in A:
        is_translated = False  # to log relevant output
        # Clean up case
        if not a.getText(strip=True) and not a.findChildren():
            a.extract()
            status.log(NAME, (
                'Anchor with nothing in it found, removing it!'))
            continue
        if not a.has_attr('href'):
            a.extract()
            status.log(NAME, (
                'Anchor without href found, removing it!!'))
            continue
        # Now if 'real' anchor found
        status.log(NAME, ('Anchor found! href: ', a['href']))
        # Case 1: <a> to static location (translated from streambed)
        for href_head in translate_static.keys():
            if a['href'].startswith(href_head):
                status.log(NAME, (
                    '... href has a *static* start: ', href_head))
                new = get_new(a['href'], translate_static[href_head], dir_url)
                a['href'] = a['href'].replace(href_head, new)
                is_translated = True
                break
        # Case *: handle Google redirects
        google_starts = ('https://www.google.com/url?q=',
                         'http://www.google.com/url?q=')
        google_end = '&'  # TODO could this be more strict?
        for google_start in google_starts:
            if a['href'].startswith(google_start):
                status.log(NAME, ('... href has a google redirect'))
                _s = a['href'].find(google_start) + len(google_start)
                _e = a['href'].find(google_end)
                a['href'] = (
                    a['href'][_s:_e].replace('%3A', ':')
                                    .replace('%2F', '/')
                )
        # Case 2: <a> to url location (translated to relative domain)
        href_starts = ['https://plot.ly/', 'plot.ly/', 'http://plot.ly/', '/']
        for href_start in href_starts:
            if a['href'].startswith(href_start):
                # 2.1 href to shareplot should have full URI
                if a['href'].startswith(href_start+'~'):
                    status.log(NAME, (
                        '... href links to shareplot:', a['href']))
                    status.log(NAME, (
                        '... guessing this is referring to a plot on prod'))
                    a['href'] = a['href'].replace(
                        href_start, 'https://plot.ly/', 1)
                    is_translated = True
                    continue
                # 2.2 Translate href start to django root
                if not a['href'].startswith('/'):
                    status.log(NAME, ('... href *url* start: ', href_start))
                    a['href'] = a['href'].replace(href_start, '/', 1)
                    is_translated = True
                # 2.3 Translate href to other docs using translate_filename_url
                for href_tail in translate_filename_url.keys():
                    if href_tail in a['href']:
                        status.log(NAME, ('... href has tail: ', a['href']))
                        a['href'] = a['href'].replace(
                            href_head, translate_static[href_tail])
                        is_translated = True
                        break

        # Log output
        if is_translated:
            status.log(NAME, ('... translated to: ', a['href']))
        else:
            status.log(NAME, ('... no translation required'))
        # Add attributes
        a = add_a_class(a)
        a = add_a_target_blank(a)
    return soup
Пример #48
0
def main():

    folders = get_args()

    for folder in folders:

        # Get translate info for folder-specific files
        translate_static = translate.get_translate_static(folder)
        translate_filename_url, translate_redirects = (
            translate.get_translate_filename_url(folder))

        # Get paths of all html files in {folder}/raw/
        paths_html = get_paths_html(folder)

        # Check if paths are in translate_filename_url (update if necessary)
        paths_html = check_translate(folder, paths_html, translate_filename_url)

        # Check if there are directories to redirect
        check_redirects(folder, translate_redirects)

        # Check if {folder}/published/* corresp. to translate_filename_url
        check_published_subdirectories(folder, translate_filename_url)

        # (1) Make body.html and config.json for each html file 
        for path_html in paths_html:

            # Get published files directory url (and name, they are the same!)
            file_html = os.path.split(path_html)[1]
            dir_url = translate_filename_url[file_html]
            
            # Get published tree for this html file
            tree_includes = make_tree([folder, 'published', 'includes', dir_url])
            
            # Get soup and split <body> and <head>
            soup = get_soup(path_html)
            body, head = get_body_head(soup) 

            # Translate 'href' and 'src' in body 
            body, paths_image = translate.translate(body,path_html,dir_url,
                                                    translate_static,
                                                    translate_filename_url)

            # Get config info from head
            config = make_config.make_config(head, path_html, tree_includes)

            # Update <body> !
            body = update_body.update_body(body)

            # Overwrite body.html and config.json leaves
            overwrite_leaves(tree_includes,
                             [(body, 'body.html'),
                              (config, 'config.json')])

            # (2) Copy images in the appropriate published/ subdirectories
            tree_images = make_tree([folder,
                                     'published',
                                     'static',
                                     'images',
                                     dir_url])
            copy_leaves(tree_images,paths_image)

            status.log(NAME,'---- done with `{}`\n'.format(dir_url))

        # (3) Make/print folder-wide urls, redirects and sitemaps files
        make_urls.make_urls(folder, translate_filename_url)
        make_redirects.make_redirects(folder, translate_redirects)
        make_sitemaps.make_sitemaps(folder, translate_filename_url)
Пример #49
0
def copy_leaves(tree, paths_leaf):
    status.log(NAME, ('Copying leaves to', tree))
    for path_leaf in paths_leaf:
        shutil.copy(path_leaf, tree)
    return
Пример #50
0
def overwrite_redirects(folder, redirects_py):
    f_redirects = "{}/published/redirects.py".format(folder)
    with open(f_redirects, "w") as f:
        status.log(NAME, ('Writes in', f_redirects))
        f.write(redirects_py)
    return
Пример #51
0
def main():

    folders = get_args()

    for folder in folders:

        # Get translate info for folder-specific files
        translate_static = translate.get_translate_static(folder)
        translate_filename_url, translate_redirects = (
            translate.get_translate_filename_url(folder))

        # Get paths of all html files in {folder}/raw/
        paths_html = get_paths_html(folder)

        # Check if paths are in translate_filename_url (update if necessary)
        paths_html = check_translate(folder, paths_html,
                                     translate_filename_url)

        # Check if there are directories to redirect
        check_redirects(folder, translate_redirects)

        # Check if {folder}/published/* corresp. to translate_filename_url
        check_published_subdirectories(folder, translate_filename_url)

        # (1) Make body.html and config.json for each html file
        for path_html in paths_html:

            # Get published files directory url (and name, they are the same!)
            file_html = os.path.split(path_html)[1]
            dir_url = translate_filename_url[file_html]

            # Get published tree for this html file
            tree_includes = make_tree(
                [folder, 'published', 'includes', dir_url])

            # Get soup, split <body> and <head> and strip style
            soup = get_soup(path_html)
            body, head = get_body_head(soup)
            body = update_body.strip(body)

            # Translate 'href' and 'src' in body
            body, paths_image = translate.translate(body, path_html, dir_url,
                                                    translate_static,
                                                    translate_filename_url)

            # Get config info from head
            config = make_config.make_config(head, path_html, tree_includes)

            # Update <body> !
            body = update_body.update_body(body)

            # Overwrite body.html and config.json leaves
            overwrite_leaves(tree_includes, [(body, 'body.html'),
                                             (config, 'config.json')])

            # (2) Copy images in the appropriate published/ subdirectories
            tree_images = make_tree(
                [folder, 'published', 'static', 'images', dir_url])
            copy_leaves(tree_images, paths_image)

            status.log(NAME, '---- done with `{}`\n'.format(dir_url))

        # (3) Make/print folder-wide urls, redirects and sitemaps files
        make_urls.make_urls(folder, translate_filename_url)
        make_redirects.make_redirects(folder, translate_redirects)
        make_sitemaps.make_sitemaps(folder, translate_filename_url)
Пример #52
0
def overwrite_sitemaps(folder,sitemaps_py):
    f_urls = "{}/published/sitemaps.py".format(folder)
    with open(f_urls, "w") as f:
        f.write(sitemaps_py)
        status.log(NAME,('Writes in', f_urls))
    return
    #Create the log files if they do not exist
    #TODO: Add a time stamp when the file was created
    b_log = open(os.path.join(batch_path, 'batch_log.txt'), 'a+')
    d_log = open(os.path.join(batch_path, 'detected_log.txt'), 'a+')
    o_log = open(os.path.join(batch_path, 'other_log.txt'), 'a+')
    e_log = open(os.path.join(batch_path, 'error_log.txt'), 'a+')

    #--------------------------------------------------------------------------------------------------------
    #--------------------------------------------------------------------------------------------------------

    if __name__ == '__main__':
        total_time.start()
        status('-i-', 'Initialization completed')
        status('-i-', 'Beginning Image Analysis...')
        log(b_log, '-i-', 'Initialization completed')
        log(b_log, '-i-', 'Beginning Image Analysis...')

    #--------------------------------------------------------------------------------------------------------
    #--------------------------------------------------------------------------------------------------------

    if __name__ == '__main__':  #Free up the log files
        b_log.close()
        d_log.close()
        o_log.close()
        e_log.close()

    #--------------------------------------------------------------------------------------------------------
    #--------------------------------------------------------------------------------------------------------

    if __name__ == '__main__':  #In Windows you need to protect the thread creation froms each child thread. If not done, each child thread will create subthreads.