Exemplo n.º 1
0
def import_questions(tables, file, test):
    tables = remove_empty_paragraphs(tables)
    if test is None:
        raise 'File cannot be empty'
    qcount = Question.objects.count()
    for (i, j, k, l), paragraph in enum_at_depth(tables, 4):
        if l % 6 == 0:
            q = Question.objects.create(title=paragraph, test=test)
        elif '<b>' in paragraph and l % 6 != 0:
            Choice.objects.create(title=paragraph, question=q, correct=True)
        elif l % 6 != 0:
            Choice.objects.create(title=paragraph, question=q)
        else:
            print(paragraph)
    return Question.objects.count() - qcount
Exemplo n.º 2
0
def get_text(xml: bytes, context: Dict[str, Any]) -> TablesList:
    """Xml as a string to a list of cell strings.

    :param xml: an xml bytes object which might contain text
    :param context: dictionary of document attributes generated in get_docx_text
    :returns: A 4-deep nested list of strings.

    Sorts the text into the DepthCollector instance, five-levels deep

    ``[table][row][cell][paragraph][run]`` is a string

    Joins the runs before returning, so return list will be

    ``[table][row][cell][paragraph]`` is a string

    If you'd like to extend or edit this package, this function is probably where you
    want to do it. Nothing tricky here except keeping track of the text formatting.
    """
    tables = DepthCollector(5)
    do_html = context["do_html"]

    # noinspection PyPep8Naming
    def branches(branch: Element) -> None:
        """
        Recursively iterate over descendents of branch. Add text when found.

        :param branch: An Element from an xml file (ElementTree)
        :return: None. Adds text cells to outer variable `tables`.
        """
        for child in branch:
            tag = child.tag

            # set caret depth
            if tag == TABLE:
                tables.set_caret(1)
            elif tag == TABLE_ROW:
                tables.set_caret(2)
            elif tag == TABLE_CELL:
                tables.set_caret(3)
            elif tag == PARAGRAPH:
                tables.set_caret(4)

            # open elements
            if tag == PARAGRAPH:
                tables.insert(_get_bullet_string(child, context))

            elif tag == RUN and do_html is True:
                # new text run
                run_style = get_run_style(child)
                open_style = getattr(tables, "open_style", ())
                if run_style != open_style:
                    tables.insert(style_close(open_style))
                    tables.insert(style_open(run_style))
                    tables.open_style = run_style

            elif tag == TEXT:
                # new text object. oddly enough, these don't all contain text
                text = child.text if child.text is not None else ""
                if do_html is True:
                    text = text.replace("<", "&lt;")
                    text = text.replace(">", "&gt;")
                tables.insert(text)

            elif tag == FOOTNOTE:
                if "separator" not in child.attrib.get(qn("w:type"), "").lower():
                    tables.insert("footnote{})\t".format(child.attrib[qn('w:id')]))

            elif tag == ENDNOTE:
                if "separator" not in child.attrib.get(qn("w:type"), "").lower():
                    tables.insert("endnote{})\t".format(child.attrib[qn('w:id')]))

            # add placeholders
            elif tag == FOOTNOTE_REFERENCE:
                tables.insert("----footnote{}----".format(child.attrib[qn('w:id')]))

            elif tag == ENDNOTE_REFERENCE:
                tables.insert("----endnote{}----".format(child.attrib[qn('w:id')]))

            elif tag == IMAGE:
                rId = child.attrib[qn("r:embed")]
                image = context["rId2Target"].get(rId)
                if image:
                    tables.insert("----{}----".format(image))

            elif tag == IMAGEDATA:
                rId = child.attrib[qn("r:id")]
                image = context["rId2Target"].get(rId)
                if image:
                    tables.insert("----{}----".format(image))

            elif tag == TAB:
                tables.insert("\t")

            # enter child element
            branches(child)

            # close elements
            if tag == PARAGRAPH and do_html is True:
                tables.insert(style_close(getattr(tables, "open_style", ())))
                tables.open_style = ()

            if tag in {TABLE_ROW, TABLE_CELL, PARAGRAPH}:
                tables.raise_caret()

            elif tag == TABLE:
                tables.set_caret(1)

    branches(ElementTree.fromstring(xml))

    tree = tables.tree
    for (i, j, k, l), paragraph in enum_at_depth(tree, 4):
        tree[i][j][k][l] = "".join(paragraph)

    return tree
Exemplo n.º 3
0
 def test_enum_at_depth_high(self) -> None:
     """Raise ValueError when attempting to enumerate over depth < 1."""
     with pytest.raises(TypeError) as msg:
         tuple(enum_at_depth(TABLES, 5))
     assert "will not iterate over sequence item" in str(msg.value)
Exemplo n.º 4
0
 def test_enum_at_depth_low(self) -> None:
     """Raise ValueError when attempting to enumerate over depth < 1."""
     with pytest.raises(ValueError) as msg:
         tuple(enum_at_depth(TABLES, 0))
     assert "must be >= 1" in str(msg.value)