Пример #1
0
def extract_user_info(doc):
    try:
        scripts = util.extract_script(doc)
        script = util.select_script(
            scripts, r'"domid":"Pl_Official_PersonalInfo__63"'
            )
        if script is None:
            script = util.select_script(
                scripts, r'"domid":"Pl_Official_PersonalInfo__62"'
            )
        if script is None:
            script = util.select_script(
                scripts, r'"domid":"Pl_Official_PersonalInfo__61"'
            )
        if script is None:
            script = util.select_script(
                scripts, r'"domid":"Pl_Official_PersonalInfo__59"'
            )
        html = util.extract_html_from_script(script.text.strip())
        html = etree.HTML(html)

        lis = html.xpath(r'//ul/li')
        info = []
        for li in lis:
            text = li.xpath("string()")
            info.append(util.clean_text(text))
        level_info = extract_level_info(doc)
        if level_info:
            info.append(level_info)
        return info
    except:
        traceback.print_exc()
        return None
Пример #2
0
def extract_user_info(doc):
    try:
        scripts = util.extract_script(doc)
        script = util.select_script(scripts,
                                    r'"domid":"Pl_Official_PersonalInfo__63"')
        if script is None:
            script = util.select_script(
                scripts, r'"domid":"Pl_Official_PersonalInfo__62"')
        if script is None:
            script = util.select_script(
                scripts, r'"domid":"Pl_Official_PersonalInfo__61"')
        if script is None:
            script = util.select_script(
                scripts, r'"domid":"Pl_Official_PersonalInfo__59"')
        html = util.extract_html_from_script(script.text.strip())
        html = etree.HTML(html)

        lis = html.xpath(r'//ul/li')
        info = []
        for li in lis:
            text = li.xpath("string()")
            info.append(util.clean_text(text))
        level_info = extract_level_info(doc)
        if level_info:
            info.append(level_info)
        return info
    except:
        traceback.print_exc()
        return None
Пример #3
0
def extract_level_info(doc):
    try:
        scripts = util.extract_script(doc)
        script = util.select_script(scripts,
                                    r'"domid":"Pl_Official_RightGrowNew')
        html = util.extract_html_from_script(script.text.strip())
        html = etree.HTML(html)
        p = html.xpath(r'//p[@class="level_info"]')
        if p:
            text = p[0].xpath("string()")
            info = util.clean_text(text)
        return info
    except:
        traceback.print_exc()
        return None
Пример #4
0
def extract_level_info(doc):
    try:
        scripts = util.extract_script(doc)
        script = util.select_script(
            scripts, r'"domid":"Pl_Official_RightGrowNew'
        )
        html = util.extract_html_from_script(script.text.strip())
        html = etree.HTML(html)
        p = html.xpath(r'//p[@class="level_info"]')
        if p:
            text = p[0].xpath("string()")
            info = util.clean_text(text)
        return info
    except:
        traceback.print_exc()
        return None