def __init__(self, *args, **kwargs): HTMLPage.__init__(self, *args, **kwargs) json_content = Regexp( CleanText('//script'), r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]" )(self.doc) json_content = codecs.unicode_escape_decode(json_content)[0] json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8') self.doc = json.loads(json_content)
def __init__(self, *args, **kwargs): HTMLPage.__init__(self, *args, **kwargs) add_content = CleanText('(//body/script)[3]', replace=[('window.FLUX_STATE = ', '')])(self.doc) api_content = CleanText('(//body/script)[2]', replace=[('window.APP_CONFIG = ', '')])(self.doc) self.htmldoc = self.doc self.api_content = json.loads(api_content) self.doc = json.loads(add_content)
def __init__(self, *args, **kwargs): HTMLPage.__init__(self, *args, **kwargs) add_content = CleanText('(//body/script)[4]', replace=[('window.FLUX_STATE = ', '') ])(self.doc) api_content = CleanText('(//body/script)[3]', replace=[('window.APP_CONFIG = ', '') ])(self.doc) self.htmldoc = self.doc self.api_content = json.loads(api_content) self.doc = json.loads(add_content)