示例#1
0
class NestedOuterPage(PageModel):
    model_class = dict
    page_tree = Html(
        StrictNode("div.outer")(
            Node("> span")(outertxt=Text()),
            Node("div.inner")(nested=NestedInnerPage()),
        ))
示例#2
0
class SimplePage(PageModel):
    model_class = dict
    page_tree = Html(
        Node("body")(
            Node("div.div_1")(div1=Text()),
            Node("#div_2", "#asdf")(div2=Text()),
            Node("span"),
            body=Text(),
        ), )
示例#3
0
class SubSense(PageModel):
    model_class = models.SubSense

    page_tree = StrictHtml(
        Node.optional("> div.SENSE-NUM"),
        Node.optional("> span.SYNTAX-CODING"),
        Node.optional("> span.STYLE-LEVEL")(
            style_level=Text()
        ),
        Node.optional("> span.SUBJECT-AREA")(
            subject_area=Text()
        ),
        Node.optional("> span.SYNTAX-CODING")(
            syntax_coding=Text()
        ),
        Node("> span.DEFINITION", "> span.QUICK-DEFINITION")(
            definition=Text()
        ),
        Node.list("> strong", "> span.SENSE-VARIANT span.BASE",
                  "> span.MULTIWORD span.BASE").concat(" | ")(
            original_key=Text()
        ),
        Node.list("> div.EXAMPLES")(
            examples=Example()
        ),
        Node.optional("> div.THES"),
    )
示例#4
0
class Example(PageModel):
    model_class = models.Example

    page_tree = StrictHtml(
        Node.optional("strong")(
            original_key=Text()
        ),
        Node.optional("div.SEP"),
        Node("p.EXAMPLE")(
            content=Text()
        ),
    )
示例#5
0
class PostprocPage(PageModel):
    model_class = dict
    page_tree = Html(Node("div.lower")(lower=Text()))

    @classmethod
    def postproc(cls, dic):
        dic['upper'] = dic.pop('lower', '').upper()
        return dic
示例#6
0
class AttrPage(PageModel):
    model_class = dict
    page_tree = Html(
        Node("a.mylink")(
            href=Attr("href"),
            title=Attr("title"),
            text=Text(),
        ))
示例#7
0
class PhrasalVerbLink(PageModel):
    model_class = models.Link

    page_tree = Html(
        Node("a")(
            url=Attr("href"),
            key=Attr("title"),
            link_type=Constant("phrasal verbs"),
            part_of_speech=Constant("phrasal verb"),
        )
    )
示例#8
0
class RelatedWordLink(PageModel):
    model_class = models.Link

    page_tree = Html(
        Node("a")(
            Node.optional("span.PART-OF-SPEECH")(
                part_of_speech=Text()
            ),
            key=Attr("title"),
            url=Attr("href"),
            link_type=Constant("related words"),
        )
    )

    @classmethod
    def postproc(cls, dic):
        k = dic["key"]
        p = dic.get("part_of_speech", "")
        k = k[:-len(p)]
        k = k.strip()
        dic["key"] = k
示例#9
0
class Entry(PageModel):
    model_class = models.Entry

    page_tree = Html(
        Node("div#headword div#headwordleft span.BASE")(
            original_key=Text()
        ),
        
        Node("div#headbar")(
            Node.optional("span.STYLE-LEVEL")(
                style_level=Text()
            ),
            Node.optional("span.PRON")(
                pron=Text()
            ),
            Node.optional("span.PART-OF-SPEECH")(
                part_of_speech=Text()
            )
        ),
        Node.optional("div.SUMMARY div.p")(
            intro_paragraph=Text()
        ),
        Node.list("div.SENSE-BODY")(
            senses=Sense()
        ),
        Node.optional("div#phrases_container > ul")(
            Node.list("li")(
                phrs=PhraseLink()
                
            )
        ),
        Node.optional("div#phrasal_verbs_container > ul")(
            Node.list("li")(
                phrvbs=PhrasalVerbLink()
            )
        ),
        Node.optional("div.entrylist > ul")(
            Node.list("li")(
                relwrds=RelatedWordLink()
            )
        )
    )

    @classmethod
    def postproc(cls, dic):
        dic['links'] = dic.pop('relwrds', [])
        dic['links'] += dic.pop('phrvbs', []) + dic.pop('phrs', [])
        return dic
示例#10
0
 class InvalidPage(PageModel):
     model_class = dict
     page_tree = Html(
         Node("div.one")(dupfield=Text()),
         Node("div.two")(dupfield=Text()))
示例#11
0
class OptionalNodePage(PageModel):
    model_class = dict
    page_tree = Html(Node.optional("div.missing")(x=Text()))
示例#12
0
class ThisClassElem(PageModel):
    model_class = dict
    page_tree = Html(
        Node("> div.head")(head=Text()),
        Node.optional("> div.tail")(tail=ThisClass()),
    )
示例#13
0
class MissingNodePage(PageModel):
    model_class = dict
    page_tree = Html(Node("div.missing")(x=Text()))
示例#14
0
class ThisClassPage(PageModel):
    model_class = dict
    page_tree = Html(Node("div.list")(li=ThisClassElem()))
示例#15
0
class StrictPage(PageModel):
    model_class = dict
    page_tree = Html(StrictNode("div.strict")(Node("span"), ))
示例#16
0
class ConstantPage(PageModel):
    model_class = dict
    page_tree = Html(Node("div.doesnotmatter")(const=Constant("myconstant")))
示例#17
0
class ConcatPage(PageModel):
    model_class = dict
    page_tree = Html(
        Node("div.list")(
            Node.list("span.elem").concat(", ")(concatenated=Text())))
示例#18
0
class TakefirstPage(PageModel):
    model_class = dict
    page_tree = Html(Node.list("div.listelem").take_first()(firstelem=Text()))
示例#19
0
class NestedInnerPage(PageModel):
    model_class = dict
    page_tree = Html(Node("span")(innertxt=Text()))
示例#20
0
 class InvalidPageTwo(PageModel):
     model_class = dict
     page_tree = Html(Node("div")(Text()))
示例#21
0
class ListPage(PageModel):
    model_class = dict
    page_tree = Html(Node(".list")(Node.list(".listelem")(mylistfield=Text())))