Python getNeighborParas示例，shared_utils.lxml_utils.getNeighborParas Python示例

示例#1

0

显示文件

文件： doc_prepare.py 项目： macmillanpublishers/sectionstart_converter

def insertEbookISBN(report_dict, doc_root, copyrightsection_stylename,
                    copyrightstyles, isbn, isbnstyle):
    logger.info("* * * commencing insertEbookISBN function...")
    sectionpara = lxml_utils.findParasWithStyle(copyrightsection_stylename,
                                                doc_root)[0]
    lastpara = sectionpara
    pneighbors = lxml_utils.getNeighborParas(sectionpara)
    while pneighbors["nextstyle"] in copyrightstyles:
        lastpara = pneighbors["next"]
        # increment the loop
        pneighbors = lxml_utils.getNeighborParas(lastpara)
    # add para
    lxml_utils.insertPara(copyrightstyles[0], lastpara, doc_root, isbn,
                          "after")
    # add runstyle to the isbn:
    new_para = lastpara.getnext()
    new_text = new_para.find(".//*w:t", wordnamespaces)
    # create runstyle and append to run element
    new_run_props = etree.Element("{%s}rPr" % wnamespace)
    new_run_props_style = etree.Element("{%s}rStyle" % wnamespace)
    new_run_props_style.attrib["{%s}val" % wnamespace] = isbnstyle
    new_run_props.append(new_run_props_style)
    new_text.addprevious(new_run_props)
    # log for report
    lxml_utils.logForReport_old(report_dict, doc_root, lastpara.getnext(),
                                "added_ebook_isbn", "added '%s'" % isbn)
    return report_dict

示例#2

0

显示文件

文件： doc_prepare.py 项目： macmillanpublishers/sectionstart_converter

def concatTitleParas(titlestyle, report_dict, doc_root):
    logger.info("* * * commencing concatTitleParas function...")
    # combine runs from titleparas
    searchstring = ".//*w:pStyle[@w:val='%s']" % titlestyle
    firsttitlepara = doc_root.find(searchstring,
                                   wordnamespaces).getparent().getparent()
    # replace softbreaks in the firsttitlepara
    report_dict = replaceSoftBreak(firsttitlepara, report_dict)
    # set vars
    titlestring = lxml_utils.getParaTxt(firsttitlepara)
    newtitlestring = titlestring
    pneighbors = lxml_utils.getNeighborParas(firsttitlepara)
    while pneighbors['nextstyle'] == titlestyle:
        # replace softbreaks in this title para (this will add spaces in cases where a softbreak was used)
        report_dict = replaceSoftBreak(pneighbors['next'], report_dict)
        # set newtitlestring
        newtitlestring = "%s %s" % (newtitlestring,
                                    lxml_utils.getParaTxt(pneighbors['next']))
        # newtitlestring = "{} {}".format(newtitlestring, nexttext)  # should review why this failed with unicode
        # increment, and delete this para
        tmp_para = pneighbors['next']
        pneighbors = lxml_utils.getNeighborParas(pneighbors['next'])
        tmp_para.getparent().remove(tmp_para)
    # if we have changes in the titlestring, remove existing contents and write the new full title as a new run
    if newtitlestring != titlestring:
        lxml_utils.addRunToPara(newtitlestring, firsttitlepara, True)
        # log for report (optional)
        lxml_utils.logForReport_old(
            report_dict, doc_root, pneighbors['next'],
            "concatenated_extra_titlepara_and_removed", newtitlestring)
    return report_dict

示例#3

0

显示文件

def evalPosition(sectionname, section_start_rules, cbstring, sectionbegin_para,
                 sectiontypes):
    logger.debug("evaluate 'position' rule...")
    # get previous para style then scan upwards with while loop
    pneighbors = lxml_utils.getNeighborParas(sectionbegin_para)
    while pneighbors['prevstyle'] and pneighbors[
            'prevstyle'] not in sectiontypes["all"]:
        # increment para upwards
        para_tmp = pneighbors['prev']
        pneighbors = lxml_utils.getNeighborParas(para_tmp)
    last_sectionstart = pneighbors['prevstyle']
    # in case there were no preceding section starts:
    if last_sectionstart not in sectiontypes["all"]:
        last_sectionstart = sectiontypes["frontmatter"][0]

    # get next SectionStart style
    pneighbors = lxml_utils.getNeighborParas(sectionbegin_para)
    # para_tmp = sectionbegin_para
    while pneighbors['nextstyle'] and pneighbors[
            'nextstyle'] not in sectiontypes["all"]:
        # increment para (down)
        para_tmp = pneighbors['next']
        pneighbors = lxml_utils.getNeighborParas(para_tmp)
    next_sectionstart = pneighbors['nextstyle']
    # in case there were no follwoing section starts:
    if next_sectionstart not in sectiontypes["all"]:
        next_sectionstart = sectiontypes["backmatter"][0]

    # the desired 'position':
    position = section_start_rules[sectionname]["position"]

    # evaluate desired position vs. position as determined by Seciton start position
    if position == "frontmatter" and last_sectionstart in sectiontypes[
            "frontmatter"]:
        logger.debug(
            "'frontmatter' criteria matched- prev_sectionstart: '%s'" %
            last_sectionstart)
        return True
    elif position == "main" and ((last_sectionstart in sectiontypes["main"]) or
                                 (next_sectionstart in sectiontypes["main"])):
        logger.debug("'main' criteria matched- betweem '%s' and '%s'" %
                     (last_sectionstart, next_sectionstart))
        return True
    elif position == "backmatter" and next_sectionstart in sectiontypes[
            "backmatter"]:
        logger.debug("'backmatter' criteria matched- next_sectionstart: '%s'" %
                     next_sectionstart)
        return True
    else:
        logger.debug("'%s' criteria not matched- betweem '%s' and '%s'" %
                     (position, last_sectionstart, next_sectionstart))
        return False

示例#4

0

显示文件

def evalSectionRequired(sectionname, section_start_rules, doc_root,
                        titlestyle):
    logger.debug("evaluate section-required rule...")
    # set default return to None
    sectionbegin_para = None
    # lets see if this section start is already present:
    if checkForParaStyle(lxml_utils.transformStylename(sectionname),
                         doc_root) == False:
        # get insert_before styles
        insertstyles = [
            lxml_utils.transformStylename(s)
            for s in section_start_rules[sectionname]["section_required"]
            ["insert_before"]
        ]
        # two find the first insert style, I can either find the first occurrence of each
        #   insertstyle and compare para indexes, or start at the top of the document (titlepage) and scan downwards
        # For the only section_required style in use at time of writing this, (section-chapter),
        #   the latter seems less resource intensive.
        # It's possible we would encounter a doc wihtout a titlepage, but then we have bigger problems
        searchstring = ".//*w:pStyle[@w:val='%s']" % lxml_utils.transformStylename(
            titlestyle)
        titlestyle = doc_root.find(searchstring, wordnamespaces)
        if titlestyle is not None:
            titlepara = titlestyle.getparent().getparent()
            # get next SectionStart style
            pneighbors = lxml_utils.getNeighborParas(titlepara)
            # para_tmp = titlepara
            while pneighbors['nextstyle'] and pneighbors[
                    'nextstyle'] not in insertstyles:
                # increment para (down)
                para_tmp = pneighbors['next']
                pneighbors = lxml_utils.getNeighborParas(para_tmp)
            next_sectionstart = pneighbors['nextstyle']
            # this needs a conditional in case there were no following insertstyles ever:
            if next_sectionstart in insertstyles:
                sectionbegin_para = pneighbors['next']
                logger.debug(
                    "section_required criteria met; 1st insertbefore_style: '%s'"
                    % next_sectionstart)
            else:
                logger.debug(
                    "no 'insert_before' styles found, cannot insert sectionstart"
                )
        else:
            logger.debug(
                "no titlepageTitle para, cannot process sectionrequired")

    return sectionbegin_para

示例#5

0

显示文件

def deletePrecedingPageBreak(para, report_dict):
    logger.debug("checking for page break in preceding para...")
    pneighbors = lxml_utils.getNeighborParas(para)
    if len(pneighbors['prev']):
        # find all pagebreaks in the preceding paragraph
        pagebreakstring = ".//*w:br[@w:type='page']"
        breaks = pneighbors['prev'].findall(pagebreakstring, wordnamespaces)
        if len(breaks) == 1 and not pneighbors['prevtext'].strip(
        ):  # we need the strip.. apparently a pb carries some whitespace value
            logger.info("empty preceding pb para, deleting it")
            # # optional - log location for debug:  (has to come before removal or the reference fails (para is gone)
            # report_dict = lxml_utils.logForReport_old(report_dict,doc_root,para,"removed_pagebreak","rm'd pagebreak para preceding inserted section-start")
            # remove pagebreak para
            pneighbors['prev'].getparent().remove(pneighbors['prev'])
        elif len(breaks) > 0 and pneighbors['prevtext'].strip():
            # could remove the last pb anyways, here, consolidate with next case; or just remove the text and the pb
            logger.info("preceding pagebreak has text contents, not deleting")
        elif len(breaks) > 1 and not pneighbors['prevtext'].strip():
            logger.info(
                "multiple pagebreak chars in preceding para: removing the last one"
            )
            # # optional - log location for debug:  (has to come before removal or the reference fails (para is gone)
            # report_dict = lxml_utils.logForReport_old(report_dict,doc_root,para,"removed_pagebreak","rm'd preceding pagebreak char preceding inserted section-start")
            # remove last pagebreak char from the preceding paragraph
            breaks[len(breaks) - 1].getparent().remove(breaks[len(breaks) - 1])
        elif len(breaks) == 0:
            logger.info("preceding para is not a pagebreak, skipping delete")
    return report_dict

示例#6

0

显示文件

def findSectionBegin(sectionname, section_start_rules, doc_root,
                     versatileblockparas, para, cbstring):
    # set header lists
    headers = [
        lxml_utils.transformStylename(s)
        for s in section_start_rules[sectionname][cbstring]["styles"]
    ]
    if "optional_heading_styles" in section_start_rules[sectionname][cbstring]:
        optheaders = [
            lxml_utils.transformStylename(s)
            for s in section_start_rules[sectionname][cbstring]
            ["optional_heading_styles"]
        ]
        allheaders = headers + optheaders
    else:
        allheaders = headers
    allheaders_plus_versatileparas = allheaders + versatileblockparas

    # set vars for our loop & output
    pneighbors = lxml_utils.getNeighborParas(para)
    sectionbegin_para = para
    sectionbegin_tmp = para
    firstStyleOfBlock = True

    # // scan upwards through any optional headers, versatile block paras, or styles in Style list (for contiguous block criteria)
    while pneighbors['prevstyle'] in allheaders_plus_versatileparas:
        logger.debug("found leading header/versatile styled para:'%s'" %
                     pneighbors['prevstyle'])
        # increment the loop upwards
        sectionbegin_tmp = pneighbors['prev']
        pneighbors = lxml_utils.getNeighborParas(sectionbegin_tmp)
        sectionbegin_tmp_style = lxml_utils.getParaStyle(sectionbegin_tmp)
        # adjust matching & leadingParas if we found optional header or para with style from
        #  style list directly preceding a versatile block para
        if sectionbegin_tmp_style in allheaders:
            sectionbegin_para = sectionbegin_tmp
            # this is to help us save time, now we can stop processing this particular style-match
            if sectionbegin_tmp_style in headers:
                firstStyleOfBlock = False

    return sectionbegin_para, firstStyleOfBlock

示例#7

0

显示文件

def evalPrevUntil(sectionname, section_start_rules, cbstring,
                  sectionbegin_para):
    logger.debug("evaluating previous until rule...")
    requiredstyles = [
        lxml_utils.transformStylename(style)
        for style in section_start_rules[sectionname][cbstring]
        ["previous_sibling"]["required_styles"]
    ]
    prevuntil_styles = [
        lxml_utils.transformStylename(style) for style in
        section_start_rules[sectionname][cbstring]["previous_until"]
    ]
    required_plus_prevuntil_styles = requiredstyles + prevuntil_styles

    # get previous para style then scan upwards with while loop
    pneighbors = lxml_utils.getNeighborParas(sectionbegin_para)
    para_tmp = sectionbegin_para

    while pneighbors['prevstyle'] and pneighbors[
            'prevstyle'] not in required_plus_prevuntil_styles:
        # increment para upwards
        para_tmp = pneighbors['prev']
        pneighbors = lxml_utils.getNeighborParas(para_tmp)

    # figure out whether we matched a prevuntil style or required style
    if pneighbors['prevstyle'] in requiredstyles:
        logger.debug(
            "false: found required-style before prev_until-style:'%s'" %
            pneighbors['prevstyle'])
        return False
    elif pneighbors['prevstyle'] in prevuntil_styles:
        logger.debug(
            "true: found required-style before prev_until-style:'%s'" %
            pneighbors['prevstyle'])
        return True
    elif not pneighbors['prevstyle']:
        logger.debug(
            "false: reached the beginning of the document, which indicates erroneous styling"
        )
        return False

示例#8

0

显示文件

def precedingStyleCheck(sectionname, section_start_rules, cbstring,
                        sectionbegin_para, sectiontypes):
    logger.debug("checking prev-sibling for existing acceptable style...")
    # get acceptable previous sibling style list:
    requiredStyles = [
        lxml_utils.transformStylename(s)
        for s in section_start_rules[sectionname][cbstring]["previous_sibling"]
        ["required_styles"]
    ]
    required_plus_section_styles = requiredStyles + sectiontypes["all"]
    # get preceding para style
    pneighbors = lxml_utils.getNeighborParas(sectionbegin_para)
    # check to see if previous para style is already acceptable
    if pneighbors["prevstyle"] in required_plus_section_styles:
        logger.debug("previous style already has section start style: '%s'" %
                     pneighbors["prevstyle"])
        return True
    else:
        return False