Пример #1
0
def crawler(url):
    text = requests.get(url, headers=headers).text
    font_face = re.findall('base64,(.*?)\)', text)
    with TemporaryFile() as f:
        f.write(base64.b64decode(font_face[0]))
        f.seek(0)
        font = TTFont(f)
    # font.saveXML('glided_sky.xml')  # 转换成xml
    number_list = font.getGlyphOrder()[1:11]
    font_map = font.getBestCmap()
    rows = BeautifulSoup(text, 'lxml').find_all('div', class_="col-md-1")
    scores = []
    for row in rows:
        numbers = []
        for font in row.text.strip():
            unicode_str = font.encode('unicode-escape').decode()
            sixteen_str = unicode_str.replace('\\u', '0x')
            # print(int(sixteen_str, 16))
            number = number_list.index(font_map[int(sixteen_str, 16)])
            numbers.append(str(number))
        scores.append(int(''.join(numbers)))
    print(f"第{url.split('=')[-1]}页 合计:{sum(scores)} 明细:{scores}")
    return sum(scores)