Exemplo n.º 1
0
def logTextOfRunsWithStyle(report_dict,
                           doc_root,
                           stylename,
                           report_category,
                           sectionnames,
                           scriptname=""):
    logger.info("Logging runs styled as %s to report_dict['%s']" %
                (stylename, report_category))
    runs = lxml_utils.findRunsWithStyle(
        lxml_utils.transformStylename(stylename), doc_root)
    for run in runs:
        # skip if the prev runstyle matches this one; that means we already processed it
        rneighbors = lxml_utils.getNeighborRuns(run)
        if rneighbors['prevstyle'] == lxml_utils.transformStylename(stylename):
            continue
        # aggregate next text of subsequent runs if stylename is the same
        runtxt = lxml_utils.getParaTxt(run)
        while rneighbors['nextstyle'] == lxml_utils.transformStylename(
                stylename):
            runtmp = rneighbors['next']
            runtxt += lxml_utils.getParaTxt(runtmp)
            rneighbors = lxml_utils.getNeighborRuns(runtmp)
        para = run.getparent()
        # if we're running this for rsuitevalidate & have an imageholder style, need to do extra checks:
        if stylename in cfg.imageholder_styles and scriptname == 'rsuitevalidate':
            validateImageHolders(report_dict, doc_root, stylename, para,
                                 runtxt, sectionnames)
        lxml_utils.logForReport(report_dict, doc_root, para, report_category,
                                runtxt, ['para_string', 'para_index'])
    return report_dict
Exemplo n.º 2
0
def getSectionTypes(section_start_rules):
    logger.debug("getting section type lists")
    sectiontypes = {'all': [], 'frontmatter': [], 'main': [], 'backmatter': []}
    for sectionname, value in section_start_rules.iteritems():
        sectiontypes["all"].append(lxml_utils.transformStylename(sectionname))
        if section_start_rules[sectionname]["section_type"] == "frontmatter":
            sectiontypes["frontmatter"].append(
                lxml_utils.transformStylename(sectionname))
        elif section_start_rules[sectionname]["section_type"] == "main":
            sectiontypes["main"].append(
                lxml_utils.transformStylename(sectionname))
        elif section_start_rules[sectionname]["section_type"] == "backmatter":
            sectiontypes["backmatter"].append(
                lxml_utils.transformStylename(sectionname))
    return sectiontypes
Exemplo n.º 3
0
def evalSectionRequired(sectionname, section_start_rules, doc_root,
                        titlestyle):
    logger.debug("evaluate section-required rule...")
    # set default return to None
    sectionbegin_para = None
    # lets see if this section start is already present:
    if checkForParaStyle(lxml_utils.transformStylename(sectionname),
                         doc_root) == False:
        # get insert_before styles
        insertstyles = [
            lxml_utils.transformStylename(s)
            for s in section_start_rules[sectionname]["section_required"]
            ["insert_before"]
        ]
        # two find the first insert style, I can either find the first occurrence of each
        #   insertstyle and compare para indexes, or start at the top of the document (titlepage) and scan downwards
        # For the only section_required style in use at time of writing this, (section-chapter),
        #   the latter seems less resource intensive.
        # It's possible we would encounter a doc wihtout a titlepage, but then we have bigger problems
        searchstring = ".//*w:pStyle[@w:val='%s']" % lxml_utils.transformStylename(
            titlestyle)
        titlestyle = doc_root.find(searchstring, wordnamespaces)
        if titlestyle is not None:
            titlepara = titlestyle.getparent().getparent()
            # get next SectionStart style
            pneighbors = lxml_utils.getNeighborParas(titlepara)
            # para_tmp = titlepara
            while pneighbors['nextstyle'] and pneighbors[
                    'nextstyle'] not in insertstyles:
                # increment para (down)
                para_tmp = pneighbors['next']
                pneighbors = lxml_utils.getNeighborParas(para_tmp)
            next_sectionstart = pneighbors['nextstyle']
            # this needs a conditional in case there were no following insertstyles ever:
            if next_sectionstart in insertstyles:
                sectionbegin_para = pneighbors['next']
                logger.debug(
                    "section_required criteria met; 1st insertbefore_style: '%s'"
                    % next_sectionstart)
            else:
                logger.debug(
                    "no 'insert_before' styles found, cannot insert sectionstart"
                )
        else:
            logger.debug(
                "no titlepageTitle para, cannot process sectionrequired")

    return sectionbegin_para
Exemplo n.º 4
0
def getMatchingParas(sectionname, section_start_rules, doc_root, cbstring):
    logger.debug("getting matchingParas...")
    matchingParas = []
    for stylename in section_start_rules[sectionname][cbstring]["styles"]:
        stylename = lxml_utils.transformStylename(stylename)
        searchstring = ".//*w:pStyle[@w:val='%s']" % stylename
        for pstyle in doc_root.findall(searchstring, wordnamespaces):
            para = pstyle.getparent().getparent()
            matchingParas.append(para)
    logger.debug("found '%s' matchingParas" % len(matchingParas))
    return matchingParas
Exemplo n.º 5
0
def findSectionBegin(sectionname, section_start_rules, doc_root,
                     versatileblockparas, para, cbstring):
    # set header lists
    headers = [
        lxml_utils.transformStylename(s)
        for s in section_start_rules[sectionname][cbstring]["styles"]
    ]
    if "optional_heading_styles" in section_start_rules[sectionname][cbstring]:
        optheaders = [
            lxml_utils.transformStylename(s)
            for s in section_start_rules[sectionname][cbstring]
            ["optional_heading_styles"]
        ]
        allheaders = headers + optheaders
    else:
        allheaders = headers
    allheaders_plus_versatileparas = allheaders + versatileblockparas

    # set vars for our loop & output
    pneighbors = lxml_utils.getNeighborParas(para)
    sectionbegin_para = para
    sectionbegin_tmp = para
    firstStyleOfBlock = True

    # // scan upwards through any optional headers, versatile block paras, or styles in Style list (for contiguous block criteria)
    while pneighbors['prevstyle'] in allheaders_plus_versatileparas:
        logger.debug("found leading header/versatile styled para:'%s'" %
                     pneighbors['prevstyle'])
        # increment the loop upwards
        sectionbegin_tmp = pneighbors['prev']
        pneighbors = lxml_utils.getNeighborParas(sectionbegin_tmp)
        sectionbegin_tmp_style = lxml_utils.getParaStyle(sectionbegin_tmp)
        # adjust matching & leadingParas if we found optional header or para with style from
        #  style list directly preceding a versatile block para
        if sectionbegin_tmp_style in allheaders:
            sectionbegin_para = sectionbegin_tmp
            # this is to help us save time, now we can stop processing this particular style-match
            if sectionbegin_tmp_style in headers:
                firstStyleOfBlock = False

    return sectionbegin_para, firstStyleOfBlock
Exemplo n.º 6
0
def evalPrevUntil(sectionname, section_start_rules, cbstring,
                  sectionbegin_para):
    logger.debug("evaluating previous until rule...")
    requiredstyles = [
        lxml_utils.transformStylename(style)
        for style in section_start_rules[sectionname][cbstring]
        ["previous_sibling"]["required_styles"]
    ]
    prevuntil_styles = [
        lxml_utils.transformStylename(style) for style in
        section_start_rules[sectionname][cbstring]["previous_until"]
    ]
    required_plus_prevuntil_styles = requiredstyles + prevuntil_styles

    # get previous para style then scan upwards with while loop
    pneighbors = lxml_utils.getNeighborParas(sectionbegin_para)
    para_tmp = sectionbegin_para

    while pneighbors['prevstyle'] and pneighbors[
            'prevstyle'] not in required_plus_prevuntil_styles:
        # increment para upwards
        para_tmp = pneighbors['prev']
        pneighbors = lxml_utils.getNeighborParas(para_tmp)

    # figure out whether we matched a prevuntil style or required style
    if pneighbors['prevstyle'] in requiredstyles:
        logger.debug(
            "false: found required-style before prev_until-style:'%s'" %
            pneighbors['prevstyle'])
        return False
    elif pneighbors['prevstyle'] in prevuntil_styles:
        logger.debug(
            "true: found required-style before prev_until-style:'%s'" %
            pneighbors['prevstyle'])
        return True
    elif not pneighbors['prevstyle']:
        logger.debug(
            "false: reached the beginning of the document, which indicates erroneous styling"
        )
        return False
Exemplo n.º 7
0
def getAllStylesUsed_RevertToBase(stylematch,
                                  macmillanstyles,
                                  report_dict,
                                  doc_root,
                                  stylename_full,
                                  para,
                                  sectionnames,
                                  run_style=None):
    macmillanstyle_shortnames = [
        lxml_utils.transformStylename(s) for s in macmillanstyles
    ]
    basedon_element = stylematch.getparent().find(".//w:basedOn",
                                                  wordnamespaces)
    if basedon_element is not None:
        basedonstyle = basedon_element.get('{%s}val' % wnamespace)
        if basedonstyle in macmillanstyle_shortnames:
            if run_style is not None:
                run_style.set(attrib_style_key, basedonstyle)
            else:
                attrib_style_key = '{%s}val' % wnamespace
                para.find(".//*w:pStyle",
                          wordnamespaces).set(attrib_style_key, basedonstyle)
            # optionally, log to json:
            lxml_utils.logForReport(
                report_dict, doc_root, para,
                'changed_custom_style_to_Macmillan_basestyle',
                "'{}', based on '{}'".format(stylename_full, basedonstyle))
        else:
            if run_style is not None:
                # log char styles
                lxml_utils.logForReport(report_dict, doc_root, para,
                                        'non-Macmillan_charstyle_used',
                                        stylename_full)
            # log para styles not reverted to base; separate categories for table-paras...
            elif para.getparent().tag == '{{{}}}tc'.format(wnamespace):
                lxml_utils.logForReport(report_dict, doc_root, para,
                                        'non-Macmillan_style_used_in_table',
                                        stylename_full, ['section_info'],
                                        sectionnames)
            # and regular paras:
            else:
                lxml_utils.logForReport(report_dict, doc_root, para,
                                        'non-Macmillan_style_used',
                                        stylename_full, ['section_info'],
                                        sectionnames)
    return report_dict
Exemplo n.º 8
0
def logTextOfParasWithStyle(report_dict,
                            doc_root,
                            stylename,
                            report_category,
                            sectionnames,
                            scriptname=""):
    logger.info("Logging paras styled as '%s' to report_dict['%s']" %
                (stylename, report_category))
    paras = lxml_utils.findParasWithStyle(
        lxml_utils.transformStylename(stylename), doc_root)
    for para in paras:
        paratxt = lxml_utils.getParaTxt(para)
        # if we're running this for rsuitevalidate & have an imageholder style, need to do extra checks:
        if stylename in cfg.imageholder_styles and scriptname == 'rsuitevalidate':
            validateImageHolders(report_dict, doc_root, stylename, para,
                                 paratxt, sectionnames)
        lxml_utils.logForReport(report_dict, doc_root, para, report_category,
                                paratxt, ['para_string', 'para_index'])
    return report_dict
Exemplo n.º 9
0
def precedingStyleCheck(sectionname, section_start_rules, cbstring,
                        sectionbegin_para, sectiontypes):
    logger.debug("checking prev-sibling for existing acceptable style...")
    # get acceptable previous sibling style list:
    requiredStyles = [
        lxml_utils.transformStylename(s)
        for s in section_start_rules[sectionname][cbstring]["previous_sibling"]
        ["required_styles"]
    ]
    required_plus_section_styles = requiredStyles + sectiontypes["all"]
    # get preceding para style
    pneighbors = lxml_utils.getNeighborParas(sectionbegin_para)
    # check to see if previous para style is already acceptable
    if pneighbors["prevstyle"] in required_plus_section_styles:
        logger.debug("previous style already has section start style: '%s'" %
                     pneighbors["prevstyle"])
        return True
    else:
        return False
            cfg.doc_xml, cfg.settings_xml, cfg.footnotes_xml, cfg.endnotes_xml)

        # log for the rest o the validator suite:
        isbn_dict["password_protected"] = protection

        ########## RUN STUFF
        # Basic requirements passed, proceed with validation & cleanup
        if protection == "":
            logger.info("Proceeding with isbn_check! protection='%s')" %
                        (protection))

            # get doc_root
            doc_xml = cfg.doc_xml
            doc_tree = etree.parse(doc_xml)
            doc_root = doc_tree.getroot()
            isbnstyle = lxml_utils.transformStylename(cfg.isbnstyle)
            hyperlinkstyle = lxml_utils.transformStylename(cfg.hyperlinkstyle)

            # # # scan for styled ISBNs and strip non-ISBN chars
            isbn_dict, isbn_dict[
                "styled_isbns"] = doc_prepare.removeNonISBNsfromISBNspans(
                    isbn_dict, doc_root, isbnstyle, cfg.isbnspanregex)

            # # # scan for unstyled ISBNs and style them. Also captures properly styled isbns that may have spanned multiple 'runs' in xml
            isbn_dict, isbn_dict[
                "programatically_styled_isbns"] = doc_prepare.styleLooseISBNs(
                    isbn_dict, cfg.isbnregex, cfg.isbnspanregex, doc_root,
                    isbnstyle, hyperlinkstyle)

            # # # run it again, to clean up any isbn-styled leading/trailing txt created incidentally from the last method
            isbn_dict, isbn_dict[
def docPrepare(report_dict):
    logger.info("* * * commencing docPrepare function...")
    # local vars
    bookinfo_json = os.path.join(cfg.tmpdir, "book_info.json")
    config_json = os.path.join(cfg.tmpdir, "config.json")
    section_start_rules_json = cfg.section_start_rules_json
    styleconfig_json = cfg.styleconfig_json
    doc_xml = cfg.doc_xml
    doc_tree = etree.parse(doc_xml)
    doc_root = doc_tree.getroot()
    isbnstyle = lxml_utils.transformStylename(cfg.isbnstyle)
    hyperlinkstyle = lxml_utils.transformStylename(cfg.hyperlinkstyle)
    # isbnregex = re.compile(r"(97[89]((\D?\d){10}))")
    isbnregex = cfg.isbnregex
    isbnspanregex = cfg.isbnspanregex  #re.compile(r"(^.*?)(97[89](\D?\d){10})(.*?$)")

    logger.info("reading in json resource files")
    # read rules & heading-style list from JSONs
    section_start_rules = os_utils.readJSON(section_start_rules_json)
    styleconfig_dict = os_utils.readJSON(styleconfig_json)

    # set vars based on JSON imports
    headingstyles = [
        classname[1:] for classname in styleconfig_dict["headingparas"]
    ]
    bookinfo = getBookInfoFromExternalLookups(bookinfo_json, config_json)

    # get Section Start names & styles from sectionstartrules
    sectionnames = lxml_utils.getAllSectionNamesFromSSR(section_start_rules)

    # delete shapes, pictures, clip art, + section breaks
    report_dict, doc_root = deleteObjects(
        report_dict, doc_root, cfg.shape_objects + cfg.section_break,
        "shapes_and_section_breaks")

    # remove character styles from headings in list
    report_dict = rmCharStylesFromHeads(report_dict, doc_root,
                                        cfg.nocharstyle_headingstyles)
    report_dict = rmCharStylesFromHeads(report_dict, doc_root, headingstyles)

    # # # setup required frontmatter
    # remove non-isbn chars from ISBN span
    report_dict, isbns = removeNonISBNsfromISBNspans(report_dict, doc_root,
                                                     isbnstyle, isbnspanregex)
    # make sure Copyright page exists, with isbn from lookup
    report_dict = insertRequiredSectionStart(cfg.copyrightsection_stylename,
                                             doc_root, "Copyright",
                                             report_dict)
    # # rm existing styled ISBNs and append isbn from lookup to after last Copyright Page section
    if bookinfo["isbn"]:
        report_dict = removeTextWithCharacterStyle(report_dict, doc_root,
                                                   isbnstyle)
        report_dict = insertEbookISBN(report_dict, doc_root,
                                      cfg.copyrightsection_stylename,
                                      cfg.copyrightstyles, bookinfo["isbn"],
                                      isbnstyle)
    else:
        logger.warn(
            "No lookup-ISBN available, skipping ISBN cleanup & auto-insertion."
        )
    # make sure Titlepage exists: leaving contents empty, that will get auto-added
    report_dict = insertRequiredSectionStart(cfg.titlesection_stylename,
                                             doc_root, "", report_dict)
    # add author info to titlepage if it's not present
    report_dict = insertBookinfo(
        report_dict, doc_root, lxml_utils.transformStylename(cfg.authorstyle),
        cfg.titlesection_stylename, bookinfo["author"])
    # add title info to titlepage if it's not present
    report_dict = insertBookinfo(report_dict, doc_root,
                                 lxml_utils.transformStylename(cfg.titlestyle),
                                 cfg.titlesection_stylename, bookinfo["title"])

    # concatenate consecutive titleparas and remove softbreaks
    report_dict = concatTitleParas(
        lxml_utils.transformStylename(cfg.titlestyle), report_dict, doc_root)

    # # # tally and repair section start paras & their contents
    # get all Section Starts paras in the doc, add content to each para as needed:
    report_dict = lxml_utils.sectionStartTally(report_dict, sectionnames,
                                               doc_root, "insert",
                                               headingstyles)

    # remove first or last paras if they contain only white space
    #   (this has to come after sectionStartTally function, otherwise it may rip out empty Section Start para at beginning of doc)
    report_dict = rmEmptyFirstLastParas(doc_root, report_dict)

    # autonumber contents for chapter, Appendix, Part
    report_dict = lxml_utils.autoNumberSectionParaContent(
        report_dict, sectionnames, cfg.autonumber_sections, doc_root)

    # write our changes back to doc.xml
    logger.debug("writing changes out to doc_xml file")
    os_utils.writeXMLtoFile(doc_root, doc_xml)

    # add/update para index numbers
    logger.debug("Update all report_dict records with para_index-")
    report_dict = lxml_utils.calcLocationInfoForLog(report_dict, doc_root,
                                                    sectionnames)

    logger.info("* * * ending docPrepare function.")

    return report_dict
Exemplo n.º 12
0
def getAllStylesUsed(report_dict,
                     doc_root,
                     styles_xml,
                     sectionnames,
                     macmillanstyledata,
                     bookmakerstyles,
                     call_type,
                     valid_native_word_styles,
                     container_starts=[],
                     container_ends=[],
                     runs_only=False):
    logger.info("** running function 'getAllStylesUsed'")
    styles_tree = etree.parse(styles_xml)
    styles_root = styles_tree.getroot()
    # macmillanstyle_shortnames = [lxml_utils.transformStylename(s) for s in macmillanstyledata]
    # get a list of macmillan stylenames from macmillan json, start with native word styles
    # if we want to exclude valid native word styles from report instead, would add them to conditional on line 110
    macmillanstyles = valid_native_word_styles[:]  # <- slice the orig. list, to make a shallow copy
    for stylename in macmillanstyledata:
        macmillanstyles.append(stylename)
    macmillan_styles_found = []  # <- non-rsuite Macmillan para styles
    macmillan_styles_found_dict = []  # <- for rsuite para styles
    charstyles_found = [
    ]  # <- for all Macmillan char styles, to make sure we don't report them more than once (we are summarizing)
    # now capture / add Macmillan charstyles found in previous runs of other xml files in doc
    if "Macmillan_charstyle_first_use" in report_dict:
        for charstyle_dict in report_dict["Macmillan_charstyle_first_use"]:
            styleshortname = lxml_utils.transformStylename(
                charstyle_dict['description'])
            charstyles_found.append(styleshortname)
    if "non-Macmillan_charstyle_used" in report_dict:
        for charstyle_dict in report_dict["non-Macmillan_charstyle_used"]:
            styleshortname = lxml_utils.transformStylename(
                charstyle_dict['description'])
            charstyles_found.append(styleshortname)

    # adding "runs_only" option so I can re-use this to capture charstyles for footnotes/endnotes
    if runs_only == True:
        logger.info(
            "runs_only set to: %s, we are probably scanning xml other than doc itself, just for charstyles"
            % runs_only)
    else:
        logger.info(
            "logging 1st use of every Macmillan para style, and any use of other style"
        )
        this_section = ""
        container_prefix = ""
        for para in doc_root.findall(".//*w:p", wordnamespaces):
            # get stylename from each para
            stylename = lxml_utils.getParaStyle(para)

            # track current section & container as we loop through styles
            if stylename in sectionnames:
                this_section = stylename
                container_prefix = ""
                continue
            elif stylename in container_starts:
                container_prefix = lxml_utils.getStyleLongname(
                    stylename).split()[0] + " > "
                continue
            elif stylename in container_ends:
                container_prefix = ""
                continue

            shortstylename_with_container = container_prefix + stylename
            found_para_context = {this_section: shortstylename_with_container}

            # check index to see if style has already been noted (with section / container context where apropos)
            test_if_present = False
            if not container_starts and stylename in macmillan_styles_found:
                test_if_present = True
            elif container_starts:
                for d in macmillan_styles_found_dict:
                    if this_section in d and d[
                            this_section] == shortstylename_with_container:
                        test_if_present = True

            # if stylename not in macmillan_styles_found, proceed to process/ log it!:
            if test_if_present == False:
                container_styles = container_starts + container_ends
                report_dict = getAllStylesUsed_ProcessParaStyle(
                    report_dict, stylename, styles_root, doc_root,
                    macmillanstyles, sectionnames, found_para_context,
                    container_styles, container_prefix,
                    macmillan_styles_found_dict, macmillan_styles_found, para,
                    call_type, bookmakerstyles)

    # Now get runstyles!
    logger.info(
        "logging 1st use of every Macmillan char style, and any use of other char-style"
    )
    for run_style in doc_root.findall(".//*w:rStyle", wordnamespaces):
        # get run_stylename from each styled run
        attrib_style_key = '{%s}val' % wnamespace
        stylename = run_style.get(attrib_style_key)

        # There are seven cases / conditions for charstyles:
        #   first checking if we've already encountered this style, b/c unless calltype is "validate",
        #   we can maybe skip some processing & goto next
        if stylename in charstyles_found and call_type == "validate":
            # search styles.xlm for corresponding full stylename so we can determine if its a Macmillan style
            stylesearchstring = ".//w:style[@w:styleId='%s']/w:name" % stylename
            stylematch = styles_root.find(stylesearchstring, wordnamespaces)
            stylename_full = stylematch.get('{%s}val' % wnamespace)
            if stylename_full not in macmillanstyles and container_starts:
                # for RSuite styles, just delete all previously encountered non-Macmillan charstyles
                run_style.getparent().remove(run_style)
            ## Right now we are not handling subsequent non-MAcmillan charstyles any differentyl outside
            ##  of RSuite validator -- if we do, we would uncomment here \/ & add & return values to charstyles_found
            # elif stylename_full not in macmillanstyles and not container_starts:
            #     # for non-RSuite styles, try to revert all non-Macmillan charstyles
            #     para = run_style.getparent().getparent().getparent()
            #     report_dict = getAllStylesUsed_RevertToBase(stylematch, macmillanstyles, report_dict, doc_root, stylename_full, para, run_style)

        # cases for first time a stylename is encountered:
        elif stylename not in charstyles_found:
            # get para for report
            para = run_style.getparent().getparent().getparent()
            # search styles.xlm for corresponding full stylename so we can determine if its a Macmillan style
            stylesearchstring = ".//w:style[@w:styleId='%s']/w:name" % stylename
            stylematch = styles_root.find(stylesearchstring, wordnamespaces)
            stylename_full = stylematch.get('{%s}val' % wnamespace)
            # First encounter of Macmillan charstyle, logging for report and appending to 'found' list
            if stylename_full in macmillanstyles:
                charstyles_found.append(stylename)
                lxml_utils.logForReport(report_dict, doc_root, para,
                                        'Macmillan_charstyle_first_use',
                                        stylename_full)
            # First encounter of non-Macmillan style, NOT 'validate' call-type
            elif call_type != "validate" and container_starts:
                # log for report
                lxml_utils.logForReport(report_dict, doc_root, para,
                                        'non-Macmillan_charstyle_used',
                                        stylename_full)
                # add to the list of found charstyles so we don't reprocess:
                charstyles_found.append(stylename)
            # First encounter of non-Macmillan style, for RSuite-styled docs, with 'validate' call-type
            elif call_type == "validate" and container_starts:
                # report first encounter for each, then add to list of found charstyles so we don't re-log
                lxml_utils.logForReport(report_dict, doc_root, para,
                                        'non-Macmillan_charstyle_removed',
                                        stylename_full)
                charstyles_found.append(stylename)
                # then delete the runstyle!
                run_style.getparent().remove(run_style)
            # First encounter of non-Macmillan style, for NON-RSuite-styled docs, with 'validate' call-type
            elif call_type == "validate" and not container_starts:
                # for non-RSuite styles, try to revert all non-Macmillan charstyles
                para = run_style.getparent().getparent().getparent()
                report_dict = getAllStylesUsed_RevertToBase(
                    stylematch, macmillanstyles, report_dict, doc_root,
                    stylename_full, para, sectionnames, run_style)

    return report_dict
Exemplo n.º 13
0
def runRule(sectionname, section_start_rules, doc_root, versatileblockparas,
            sectiontypes, call_type, report_dict, titlestyle, headingstyles,
            sectionnames):
    # cycle through for multiple contiguous blocks, apply 'last' key=True, run the rule!
    counter = 1
    cbstring, nextcbstring = getCBStrings(counter)
    while cbstring in section_start_rules[sectionname]:
        logger.debug(
            "* Running sectionstart rule for: '%s', contiguous_block_criteria_%s"
            % (sectionname, counter))
        sectionstylename = lxml_utils.transformStylename(sectionname)
        # see if the section already exists ; if so and multiple is False we can move on to the next section
        sectionpresent = checkForParaStyle(
            lxml_utils.transformStylename(sectionname), doc_root)
        if sectionpresent == True and section_start_rules[sectionname][
                cbstring]["multiple"] == False:
            # increment the cbstrings
            counter += 1
            cbstring, nextcbstring = getCBStrings(counter)
            continue

        # get all paras matching "styles" for this section
        matchingParas = getMatchingParas(sectionname, section_start_rules,
                                         doc_root, cbstring)

        # Walk through the criteria for each matching para!
        for para in matchingParas:
            # optional headers, blocks of matching styles are evaluated here, returning a "sectionbegin_para"
            sectionbegin_para, firstStyleOfBlock = findSectionBegin(
                sectionname, section_start_rules, doc_root,
                versatileblockparas, para, cbstring)
            # this para-match is redundant, another matching style para starts this block, so move on the next para
            if firstStyleOfBlock == False:
                logger.debug(
                    "disqualified: not the first para in its own style block! next match"
                )
                continue

            # evaluate 1st child if criteria is present
            if "first_child" in section_start_rules[sectionname][cbstring]:
                firstchild_results = evalFirstChild(sectionname,
                                                    section_start_rules,
                                                    cbstring,
                                                    sectionbegin_para)
                if firstchild_results == False:
                    logger.debug(
                        "disqualified: firstchild criteria not met. next match"
                    )
                    continue

            # evaluate previous sibling (see if there's already a section start)
            if precedingStyleCheck(sectionname, section_start_rules, cbstring,
                                   sectionbegin_para, sectiontypes) == True:
                logger.debug(
                    "disqualified: previous_style is acceptable. next match")
                continue

            # // check criteria for position
            if "position" in section_start_rules[sectionname]:
                # print "position is here"
                position_results = evalPosition(sectionname,
                                                section_start_rules, cbstring,
                                                sectionbegin_para,
                                                sectiontypes)
                if position_results == False:
                    logger.debug(
                        "disqualified: position criteria not met. next match")
                    continue

            # // check criteria for previous until
            if "previous_until" in section_start_rules[sectionname][cbstring]:
                prev_until_results = evalPrevUntil(sectionname,
                                                   section_start_rules,
                                                   cbstring, sectionbegin_para)
                if prev_until_results == False:
                    print "disqualified: prev_until criteria not met. next match!"
                    continue

            # if we made it this far, go ahead and insert our section start &/or log it for style report!
            logger.info("All criteria met for '%s' rule!  %sing para" %
                        (sectionname, call_type))
            if call_type == "insert":
                report_dict = deletePrecedingPageBreak(sectionbegin_para,
                                                       report_dict)
                contents = lxml_utils.getContentsForSectionStart(
                    sectionbegin_para, doc_root, headingstyles,
                    sectionstylename, sectionnames)
                lxml_utils.insertPara(sectionstylename, sectionbegin_para,
                                      doc_root, contents, "before")
            report_dict = lxml_utils.logForReport_old(report_dict, doc_root,
                                                      sectionbegin_para,
                                                      "section_start_needed",
                                                      sectionname)

            # break the loop for this rule if 'multiple' value is False
            if section_start_rules[sectionname][cbstring]["multiple"] == False:
                logger.debug(
                    "'Multiple' set to 'False', moving on to next rule.")
                break

        # increment the cbstrings
        counter += 1
        cbstring, nextcbstring = getCBStrings(counter)

    # evaluate section_required rule if present (could move this into 'main', does not need to be in this function)
    if "section_required" in section_start_rules[
            sectionname] and section_start_rules[sectionname][
                "section_required"]["value"] == True:
        logger.info("section_required is true, evaluating for %s" %
                    sectionname)
        sectionbegin_para = evalSectionRequired(sectionname,
                                                section_start_rules, doc_root,
                                                titlestyle)
        #  if we have an insertion point for Section_Required, insert Section Start styled para
        if sectionbegin_para is not None:
            report_dict = lxml_utils.logForReport_old(report_dict, doc_root,
                                                      sectionbegin_para,
                                                      "section_start_needed",
                                                      "{}".format(sectionname))
            if call_type == "insert":
                report_dict = deletePrecedingPageBreak(sectionbegin_para,
                                                       report_dict)
                contents = lxml_utils.getContentsForSectionStart(
                    sectionbegin_para, doc_root, headingstyles,
                    sectionstylename, sectionnames)
                lxml_utils.insertPara(sectionstylename, sectionbegin_para,
                                      doc_root, contents, "before")

    return report_dict