class Sense(PageModel): model_class = models.Sense page_tree = StrictHtml( Node.optional("> div.SENSE-NUM"), Node.optional("> span.SYNTAX-CODING"), Node.optional("> span.STYLE-LEVEL")( style_level=Text() ), Node.optional("> span.SUBJECT-AREA")( subject_area=Text() ), Node.optional("> span.SYNTAX-CODING")( syntax_coding=Text() ), Node("> span.DEFINITION", "> span.QUICK-DEFINITION")( definition=Text() ), Node.list("> strong", "> span.SENSE-VARIANT span.BASE", "> span.MULTIWORD span.BASE").concat(" | ")( original_key=Text() ), Node.list("> div.EXAMPLES")( examples=Example() ), Node.optional("> div.THES"), Node.optional("> ol.SUB-SENSES")( Node.list("div.SUB-SENSE-CONTENT")( subsenses=SubSense() ) ), )
class Entry(PageModel): model_class = models.Entry page_tree = Html( Node("div#headword div#headwordleft span.BASE")( original_key=Text() ), Node("div#headbar")( Node.optional("span.STYLE-LEVEL")( style_level=Text() ), Node.optional("span.PRON")( pron=Text() ), Node.optional("span.PART-OF-SPEECH")( part_of_speech=Text() ) ), Node.optional("div.SUMMARY div.p")( intro_paragraph=Text() ), Node.list("div.SENSE-BODY")( senses=Sense() ), Node.optional("div#phrases_container > ul")( Node.list("li")( phrs=PhraseLink() ) ), Node.optional("div#phrasal_verbs_container > ul")( Node.list("li")( phrvbs=PhrasalVerbLink() ) ), Node.optional("div.entrylist > ul")( Node.list("li")( relwrds=RelatedWordLink() ) ) ) @classmethod def postproc(cls, dic): dic['links'] = dic.pop('relwrds', []) dic['links'] += dic.pop('phrvbs', []) + dic.pop('phrs', []) return dic
class ConcatPage(PageModel): model_class = dict page_tree = Html( Node("div.list")( Node.list("span.elem").concat(", ")(concatenated=Text())))
class TakefirstPage(PageModel): model_class = dict page_tree = Html(Node.list("div.listelem").take_first()(firstelem=Text()))
class ListPage(PageModel): model_class = dict page_tree = Html(Node(".list")(Node.list(".listelem")(mylistfield=Text())))