class NestedOuterPage(PageModel): model_class = dict page_tree = Html( StrictNode("div.outer")( Node("> span")(outertxt=Text()), Node("div.inner")(nested=NestedInnerPage()), ))
class AttrPage(PageModel): model_class = dict page_tree = Html( Node("a.mylink")( href=Attr("href"), title=Attr("title"), text=Text(), ))
class PostprocPage(PageModel): model_class = dict page_tree = Html(Node("div.lower")(lower=Text())) @classmethod def postproc(cls, dic): dic['upper'] = dic.pop('lower', '').upper() return dic
class SimplePage(PageModel): model_class = dict page_tree = Html( Node("body")( Node("div.div_1")(div1=Text()), Node("#div_2", "#asdf")(div2=Text()), Node("span"), body=Text(), ), )
class PhrasalVerbLink(PageModel): model_class = models.Link page_tree = Html( Node("a")( url=Attr("href"), key=Attr("title"), link_type=Constant("phrasal verbs"), part_of_speech=Constant("phrasal verb"), ) )
class Entry(PageModel): model_class = models.Entry page_tree = Html( Node("div#headword div#headwordleft span.BASE")( original_key=Text() ), Node("div#headbar")( Node.optional("span.STYLE-LEVEL")( style_level=Text() ), Node.optional("span.PRON")( pron=Text() ), Node.optional("span.PART-OF-SPEECH")( part_of_speech=Text() ) ), Node.optional("div.SUMMARY div.p")( intro_paragraph=Text() ), Node.list("div.SENSE-BODY")( senses=Sense() ), Node.optional("div#phrases_container > ul")( Node.list("li")( phrs=PhraseLink() ) ), Node.optional("div#phrasal_verbs_container > ul")( Node.list("li")( phrvbs=PhrasalVerbLink() ) ), Node.optional("div.entrylist > ul")( Node.list("li")( relwrds=RelatedWordLink() ) ) ) @classmethod def postproc(cls, dic): dic['links'] = dic.pop('relwrds', []) dic['links'] += dic.pop('phrvbs', []) + dic.pop('phrs', []) return dic
class RelatedWordLink(PageModel): model_class = models.Link page_tree = Html( Node("a")( Node.optional("span.PART-OF-SPEECH")( part_of_speech=Text() ), key=Attr("title"), url=Attr("href"), link_type=Constant("related words"), ) ) @classmethod def postproc(cls, dic): k = dic["key"] p = dic.get("part_of_speech", "") k = k[:-len(p)] k = k.strip() dic["key"] = k
class NestedInnerPage(PageModel): model_class = dict page_tree = Html(Node("span")(innertxt=Text()))
class ThisClassElem(PageModel): model_class = dict page_tree = Html( Node("> div.head")(head=Text()), Node.optional("> div.tail")(tail=ThisClass()), )
class OptionalNodePage(PageModel): model_class = dict page_tree = Html(Node.optional("div.missing")(x=Text()))
class StrictPage(PageModel): model_class = dict page_tree = Html(StrictNode("div.strict")(Node("span"), ))
class MissingNodePage(PageModel): model_class = dict page_tree = Html(Node("div.missing")(x=Text()))
class ThisClassPage(PageModel): model_class = dict page_tree = Html(Node("div.list")(li=ThisClassElem()))
class InvalidPageTwo(PageModel): model_class = dict page_tree = Html(Node("div")(Text()))
class ConcatPage(PageModel): model_class = dict page_tree = Html( Node("div.list")( Node.list("span.elem").concat(", ")(concatenated=Text())))
class ConstantPage(PageModel): model_class = dict page_tree = Html(Node("div.doesnotmatter")(const=Constant("myconstant")))
class TakefirstPage(PageModel): model_class = dict page_tree = Html(Node.list("div.listelem").take_first()(firstelem=Text()))
class InvalidPage(PageModel): model_class = dict page_tree = Html( Node("div.one")(dupfield=Text()), Node("div.two")(dupfield=Text()))
class ListPage(PageModel): model_class = dict page_tree = Html(Node(".list")(Node.list(".listelem")(mylistfield=Text())))