#!/usr/bin/python #coding:utf8 # Created: 2013-11-11 # import sys sys.path.append('..') from Page import Page from lxml import etree page = Page() page.title = u'title' page.ns = u'0' page.id = u'10' page.text = u'text' page.redirect = u'redirect' print page.toTuple() print page.toDict() print page.toDict(noNone=True) print etree.tostring(page.toXML())
def getPageData(self,page): data = Page() for elem in page: tag = elem.tag if tag == self.tags.title: if self.select.title is None: if u'title' in self.fields: data.title = elem.text else: continue else: data.title = elem.text if not all(select(data.toTuple()) for select in self.select.title): return None if u'title' not in self.fields: data.title = None elif tag == self.tags.ns: if self.select.ns is None: if u'ns' in self.fields: data.ns = elem.text else: continue else: data.ns = elem.text if not all(select(data.toTuple()) for select in self.select.ns): return None if u'ns' not in self.fields: data.ns = None elif tag == self.tags.id: if self.select.id is None: if u'id' in self.fields: data.id = elem.text else: continue else: data.id = elem.text if not all(select(data.toTuple()) for select in self.select.id): return None if u'id' not in self.fields: data.id = None elif tag == self.tags.redirect: if self.select.redirect is None: if u'redirect' in self.fields: data.redirect = elem.get(u'title') else: continue else: data.redirect = elem.get(u'title') if not all(select(data.toTuple()) for select in self.select.redirect): return None if u'redirect' not in self.fields: data.redirect = None elif tag == self.tags.revision: if self.select.text is None: if u'text' in self.fields: text = elem.find(self.tags.text).text data.text = text if self.parse_text is None else self.parse_text(text) else: continue else: text = elem.find(self.tags.text).text data.text = text if self.parse_text is None else self.parse_text(text) if not all(select(data.toTuple()) for select in self.select.text): return None if u'text' not in self.fields: data.text = None break elif tag == self.tags.upload: break return data