def extract_user_info(doc): try: scripts = util.extract_script(doc) script = util.select_script( scripts, r'"domid":"Pl_Official_PersonalInfo__63"' ) if script is None: script = util.select_script( scripts, r'"domid":"Pl_Official_PersonalInfo__62"' ) if script is None: script = util.select_script( scripts, r'"domid":"Pl_Official_PersonalInfo__61"' ) if script is None: script = util.select_script( scripts, r'"domid":"Pl_Official_PersonalInfo__59"' ) html = util.extract_html_from_script(script.text.strip()) html = etree.HTML(html) lis = html.xpath(r'//ul/li') info = [] for li in lis: text = li.xpath("string()") info.append(util.clean_text(text)) level_info = extract_level_info(doc) if level_info: info.append(level_info) return info except: traceback.print_exc() return None
def extract_user_info(doc): try: scripts = util.extract_script(doc) script = util.select_script(scripts, r'"domid":"Pl_Official_PersonalInfo__63"') if script is None: script = util.select_script( scripts, r'"domid":"Pl_Official_PersonalInfo__62"') if script is None: script = util.select_script( scripts, r'"domid":"Pl_Official_PersonalInfo__61"') if script is None: script = util.select_script( scripts, r'"domid":"Pl_Official_PersonalInfo__59"') html = util.extract_html_from_script(script.text.strip()) html = etree.HTML(html) lis = html.xpath(r'//ul/li') info = [] for li in lis: text = li.xpath("string()") info.append(util.clean_text(text)) level_info = extract_level_info(doc) if level_info: info.append(level_info) return info except: traceback.print_exc() return None
def extract_level_info(doc): try: scripts = util.extract_script(doc) script = util.select_script(scripts, r'"domid":"Pl_Official_RightGrowNew') html = util.extract_html_from_script(script.text.strip()) html = etree.HTML(html) p = html.xpath(r'//p[@class="level_info"]') if p: text = p[0].xpath("string()") info = util.clean_text(text) return info except: traceback.print_exc() return None
def extract_level_info(doc): try: scripts = util.extract_script(doc) script = util.select_script( scripts, r'"domid":"Pl_Official_RightGrowNew' ) html = util.extract_html_from_script(script.text.strip()) html = etree.HTML(html) p = html.xpath(r'//p[@class="level_info"]') if p: text = p[0].xpath("string()") info = util.clean_text(text) return info except: traceback.print_exc() return None