def __init__(self, *args, **kwargs): HTMLPage.__init__(self, *args, **kwargs) json_content = Regexp( CleanText('//script'), r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]" )(self.doc) json_content = codecs.unicode_escape_decode(json_content)[0] json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8') self.doc = json.loads(json_content)
def __init__(self, *args, **kwargs): HTMLPage.__init__(self, *args, **kwargs) json_content = Regexp( CleanText('//script'), r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);")(self.doc) json_content = codecs.unicode_escape_decode(json_content)[0] json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8') self.doc = { "advert": json.loads(json_content).get('advert', {}).get('mainAdvert', {}), "agency": json.loads(json_content).get('agency', {}) }