Пример #1
0
 def render_act(self, act):
     buf = self.buf
     buf.write('<article class="act">\n')
     if act.bookmark_id:
         buf.write('<header id="')
         buf.write(act.bookmark_id)
         buf.write('">')
     else:
         buf.write('<header>')
     buf.write(act.name)
     buf.write('</header>\n'
               '<ol class="history">\n')
     for h in act.history:
         buf.write('<li>')
         for noun, abbr in HIST_ABBREVIATION_LIST:
             h = h.replace(noun, abbr)
         h = RE_NUMERIC_DATE_FORMAT.sub(utils.repl_numeric_date, h)
         h = RE_NUMERIC_DATE_INLINE_FORMAT.sub(utils.repl_numeric_inline_date, h)
         h = RE_REPUBLIC_DATE_FORMAT.sub(utils.repl_cjk_date, h)
         h = RE_SEMESTER_DATE_FORMAT.sub(utils.repl_cjk_semester, h)
         h = h.replace('中華民國', '民國')
         h = RE_HIST_INS_FORMAT.sub(r'\1-\2條', h)
         h = RE_HIST_STATUTE_FORMAT.sub(r'\1', h)
         h = utils.normalize_bracketed_numbers(h)
         h = normalize_spaces(h)
         h = apply_emphasis(h)
         buf.write(h)
         if h[-1] not in '>))。':  # Consider <span> as well
             buf.write('。')
         buf.write('</li>\n')
     buf.write('</ol>\n')
     super().render_act(act)
     buf.write('</article>\n')
 def clear_text_for_snippets(self, text):
     cleaner = re.compile('[\(].*?[\)]')
     punctuations = ',.;:!?'
     text = cleaner.sub('', text)
     for mark in punctuations:
         text = text.replace(' {0}'.format(mark), '{0}'.format(mark))
     return normalize_spaces(text)
Пример #3
0
def parse_act(buf):
    if isinstance(buf, str):
        buf = StringIO(buf)

    try:
        act = Act()
        act.name = normalize_spaces(buf.readline().strip())
        assert act.name
        assert buf.readline() == '\n'

        while True:
            line = buf.readline()
            if line == '\n':
                break
            else:
                assert line
                act.history.append(line.strip())

        while True:
            indented_line = buf.readline()
            if not indented_line:
                break

            indented_line = indented_line.rstrip()
            if not indented_line:
                continue

            line = indented_line.lstrip()
            indent = len(indented_line) - len(line)

            if indent == 0:  # Chapter title
                m = re.match(r'^(第\S+)\s+(\S+)', line)
                assert m
                chapter = Chapter(number=m.group(1), caption=m.group(2))
                act.articles.append(chapter)
            elif indent == 2:  # Article
                m = re.match(
                    r'^(?P<number>(第|附件)[^【\s]+)\s*(【(?P<caption>\S+)】)?',
                    line)
                if m:
                    article = Article(number=m.group('number'),
                                      caption=(m.group('caption') or ''))
                    act.articles.append(article)
                else:
                    act.articles.append(line)  # Foreword text
            elif indent == 4:  # Paragraph
                paragraph = Paragraph(caption=line)
                article.subitems.append(paragraph)
            elif indent == 6:  # Subsection
                subsection = Subsection(caption=line)
                paragraph.subitems.append(subsection)
            elif indent == 8:  # Item
                subsection.subitems.append(line)

        return act
    except AssertionError:
        sys.stderr.write(line)
        raise  # parse failed, malformed file
Пример #4
0
 def print_constr(self, sexp_str):
     if not hasattr(self, 'constr_cache'):
         self.constr_cache = {}
     if sexp_str not in self.constr_cache:
         try:
             responses, _ = self.send(
                 '(Print ((pp_format PpStr)) (CoqConstr %s))' % sexp_str)
             self.constr_cache[sexp_str] = normalize_spaces(
                 symbol2str(responses[1][2][1][0][1]))
         except CoqExn as ex:
             if ex.err_msg == 'Not_found':
                 return None
             else:
                 raise ex
         except TypeError as ex:
             self.constr_cache[sexp_str] = normalize_spaces(
                 symbol2str(responses[0][2][1][0][1]))
     return self.constr_cache[sexp_str]
Пример #5
0
 def render_paragraph(self, paragraph):
     buf = self.buf
     buf.write('<li>')
     buf.write(apply_emphasis(normalize_spaces(paragraph.caption)))
     buf.write('</li>\n')
     if paragraph.subitems:
         buf.write('<ol class="subsections">\n')
         super().render_paragraph(paragraph)
         buf.write('</ol>\n')
Пример #6
0
 def render_subsection(self, subsection):
     buf = self.buf
     caption = RE_SUBSECTION_NUMBERING.sub('', subsection.caption)
     buf.write('<li>')
     buf.write(apply_emphasis(normalize_spaces(caption)))
     buf.write('</li>\n')
     if subsection.subitems:
         buf.write('<ol class="items">\n')
         super().render_subsection(subsection)
         buf.write('</ol>\n')
Пример #7
0
def parse_interpretation(buf):
    if isinstance(buf, str):
        buf = StringIO(buf)

    try:
        intp = Interpretation()
        intp.name = normalize_spaces(buf.readline().strip())
        if intp.name.startswith('解釋'):
            intp.name = intp.name[2:]  # Remove redundant title
        assert intp.name
        assert buf.readline() == '\n'

        while True:
            line = buf.readline()
            if line == '\n':
                break

            line = line.strip()
            assert line

            m = RE_REPUBLIC_DATE_FORMAT.fullmatch(line)
            if m:
                intp.date = (m.group('year'), m.group('month'), m.group('day'))
                continue  # Eats the whole line, append later

            intp.meta.append(line)

        while True:
            indented_line = buf.readline()
            if not indented_line:
                break

            indented_line = indented_line.rstrip()
            if not indented_line:
                continue

            line = indented_line.lstrip()
            indent = len(indented_line) - len(line)

            if indent == 0 and RE_CHAPTER_FORMAT.fullmatch(line):
                heading = Heading(line, is_chapter=True)
                intp.subentries.append(heading)
            elif (indent <= 2 and
                  not line.startswith('邱丞正、')) or RE_HEADING_FORMAT.fullmatch(
                      line):  # workaround for bad indent
                heading = Heading(line)
                intp.subentries.append(heading)
            else:
                line = line.replace('【', '《').replace('】', '》')
                intp.subentries.append(line)

        return intp
    except AssertionError:
        sys.stderr.write(line)
        raise  # parse failed, malformed file
Пример #8
0
    def render_interpretation_text(self, text):
        buf = self.buf

        # Apply formats
        text = normalize_brackets(text)
        text = normalize_spaces(text)
        text = RE_INTP_JARGON_FORMAT.sub(r'<span class="jargon">\1</span>', text)
        text = RE_INTP_JARGON_INLINE_FORMAT.sub(r'<span class="jargon">\1</span>', text)
        text = RE_INTP_CITATION_FORMAT.sub(r'<cite>\1</cite>', text)
        text = RE_INTP_REMARK_FORMAT.sub(r'<span class="note">\1</span>', text)

        # Sniff subheading and footnote
        if RE_INTP_FOOTER_FORMAT.fullmatch(text):
            buf.write('<p class="footnote">')
        else:
            text = RE_INTP_SUBHEADING_FORMAT.sub(r'<span class="subheading">\1\2</span>', text)
            buf.write('<p>')
        buf.write(text)
        buf.write('</p>\n')
Пример #9
0
 def render_interpretation(self, intp):
     buf = self.buf
     buf.write('<article class="interpretation">\n')
     if intp.bookmark_id:
         buf.write('<header id="')
         buf.write(intp.bookmark_id)
         buf.write('">')
     else:
         buf.write('<header>')
     buf.write(intp.name)
     buf.write('</header>\n'
               '<div class="meta">')
     meta = normalize_spaces(','.join(intp.meta))
     meta = RE_INTP_META_REMARK_FORMAT.sub(r'<span class="note">\1</span>', meta)
     buf.write(meta)
     buf.write('</div>\n')
     for subentry in intp.subentries:
         if isinstance(subentry, Heading):
             self.render_heading(subentry)
         else:
             self.render_interpretation_text(subentry)
     if intp.date:
         buf.write('<time>民國 {} 年 {} 月 {} 日</time>\n'.format(*intp.date))
     buf.write('</article>\n')
Пример #10
0
 def render_heading(self, heading):
     grade = 5 if heading.is_chapter else 6
     caption = normalize_spaces(normalize_brackets(heading.caption))
     self.buf.write('<h{}>{}</h{}>\n'.format(grade, caption, grade))
Пример #11
0
 def render_item(self, item):
     buf = self.buf
     item = RE_ITEM_NUMBERING.sub('', item)
     buf.write('<li>')
     buf.write(apply_emphasis(normalize_spaces(item)))
     buf.write('</li>\n')