def build_doc(self, content): content = JsonPage.build_doc(self, content) if 'data' in content: # The value contains HTML # Must be encoded into str because HTMLPage.build_doc() uses BytesIO # which expects bytes html_page = HTMLPage(self.browser, self.response) return html_page.build_doc(content['data'].encode(self.encoding)) return content
def build_doc(self, content): # HTML embedded in XML: parse XML first then extract the html xml = XMLPage.build_doc(self, content) transactions_html = ( xml.xpath('//partial-response/changes/update[1]')[0].text.encode( encoding=self.encoding)) investments_html = ( xml.xpath('//partial-response/changes/update[2]')[0].text.encode( encoding=self.encoding)) html = transactions_html + investments_html return HTMLPage.build_doc(self, html)
def build_doc(self, text): doc = super(TrackPage, self).build_doc(text) content = ''.join([doc['top'], doc['tab']]) html_page = HTMLPage(self.browser, self.response) return html_page.build_doc(content.encode(self.encoding))