Пример #1
0
def process(infile, outfile, layer):
    font = TTFont(infile)
    glyf = font["glyf"]

    glyphNamesToKeep = []
    if layer == "letters":
        for glyphName in font.getGlyphNames():
            if glyphName not in _LAYER2_GLYPHS and glyphName not in _LAYER3_GLYPHS:
                glyphNamesToKeep.append(glyphName)
    elif layer == "diacritics":
        glyphNamesToKeep = _LAYER2_GLYPHS
    elif layer == "quranic-signs":
        glyphNamesToKeep = _LAYER3_GLYPHS

    for glyphName in font.getGlyphNames():
        if glyphName not in glyphNamesToKeep:
            glyph = glyf[glyphName]
            if glyphName in ("uni0670.medi", "uni06E5.medi",
                             "uni06E6.medi") and layer == "letters":
                # We want to keep the kashida part of those glyphs.
                components = []
                for component in glyph.components:
                    if component.glyphName != glyphName.split(".")[0]:
                        components.append(component)
                glyph.components = components
            else:
                # This will cause FontTools not to output any outlines for that
                # glyph.
                glyph.numberOfContours = 0

    font.save(outfile)
Пример #2
0
def process(infile, outfile, layer):
    font = TTFont(infile)
    glyf = font["glyf"]

    glyphNamesToKeep = []
    if layer == "letters":
        for glyphName in font.getGlyphNames():
            if glyphName not in _LAYER2_GLYPHS and glyphName not in _LAYER3_GLYPHS:
                glyphNamesToKeep.append(glyphName)
    elif layer == "diacritics":
        glyphNamesToKeep = _LAYER2_GLYPHS
    elif layer == "quranic-signs":
        glyphNamesToKeep = _LAYER3_GLYPHS

    for glyphName in font.getGlyphNames():
        if glyphName not in glyphNamesToKeep:
            glyph = glyf[glyphName]
            if glyphName in ("uni0670.medi", "uni06E5.medi", "uni06E6.medi") and layer == "letters":
                # We want to keep the kashida part of those glyphs.
                components = []
                for component in glyph.components:
                    if component.glyphName != glyphName.split(".")[0]:
                        components.append(component)
                glyph.components = components
            else:
                # This will cause FontTools not to output any outlines for that
                # glyph.
                glyph.numberOfContours = 0

    font.save(outfile)
def read_metadata(font):
    ttf = TTFont(font, lazy=True)
    try:
        ttf.getGlyphNames()
    except Exception:
        logging.error('Not a vaild font: ' + request['url'])
        return None
    reader = ttf.reader

    metadata = {
        'table_sizes':
        {tag: reader.tables[tag].length
         for tag in sorted(reader.keys())},
        'names':
        _read_names(ttf, (_NAME_ID_VERSION, _NAME_ID_POSTSCRIPT_NAME,
                          _NAME_ID_LICENSE_URL)),
        'OS2':
        _read_os2(ttf),
        'post':
        _read_post(ttf),
        'fvar':
        _read_fvar(ttf),
        'counts':
        _read_codepoint_glyph_counts(ttf),
    }
    ttf.close()

    return {k: v for k, v in metadata.items() if v is not None}
Пример #4
0
def process_font(url):
    # loc.woff是事先下载好的字体文件
    # 可以通过font1.saveXML()来了解文件的结构, font1就像一个的字典, XML文件里的tag可以对font1用字典的方法获取
    font1 = TTFont('loc.woff')
    # 使用百度的FontEditor手动确认本地字体文件name和数字之间的对应关系, 保存到字典中
    loc_dict = {
        'uniE8B2': '5',
        'uniF818': '3',
        'uniECCC': '8',
        'uniE622': '1',
        'uniEC92': '2',
        'uniF31A': '4',
        'uniE86D': '9',
        'uniE33C': '6',
        'uniE1FA': '7',
        'uniE13E': '0'
    }
    # 获取字符的name列表, 打印出来后发现第一个和最后一个name所对应的不是数字, 所以切片
    uni_list1 = font1.getGlyphNames()[1:-1]

    # 网页源码
    rsp = urlopen(url).read().decode()
    # 获取动态的字体文件并下载
    font_url = 'http://' + re.findall(r'url\(\'//(.*?\.woff)', rsp)[0]
    # web字体文件落地名
    filename = font_url.split('/')[-1]
    # 下载web字体文件
    urlretrieve(font_url, filename)

    # 打开web字体文件
    font2 = TTFont(filename)
    # 获取字符的name列表
    uni_list2 = font2.getGlyphNames()[1:-1]

    # web字体文件中name和num映射
    new_map = {}

    for uni2 in uni_list2:
        # 获取name 'uni2' 在font2中对应的对象
        obj2 = font2['glyf'][uni2]
        for uni1 in uni_list1:
            # 获取name 'uni1' 在font1中对应的对象
            obj1 = font1['glyf'][uni1]
            # 如果两个对象相等, 说明对应的数字一样
            if obj1 == obj2:
                # 将name键num值对加入new_map
                new_map[uni2] = loc_dict[uni1]

    # 将数字替换至源码
    for i in uni_list2:
        pattern = '&#x' + i[3:].lower() + ';'
        rsp = re.sub(pattern, new_map[i], rsp)

    # 返回处理处理后的源码
    return rsp
Пример #5
0
    def __init__(self, bad_font_file, experiment_dir,
                 src_fonts_dir='charset/ZhongHuaSong',
                 fonts_json='/disks/sdb/projs/AncientBooks/data/chars/font_missing.json', fonts_root=None,
                 type_fonts='type/宋黑类字符集.txt',
                 input_nc=1, embedding_num=250, embedding_dim=128,  # model settings
                 Lconst_penalty=15, Lcategory_penalty=1.0, gpu_ids=['cuda'], resume=240000,  # model settings
                 char_size=250, canvas_size=256,
                 fake_prob=0.03):
        fontPlane00 = TTFont(os.path.join(src_fonts_dir, 'FZSONG_ZhongHuaSongPlane00_2020051520200519101119.TTF'))
        fontPlane02 = TTFont(os.path.join(src_fonts_dir, 'FZSONG_ZhongHuaSongPlane02_2020051520200519101142.TTF'))

        self.charSetPlane00 = processGlyphNames(fontPlane00.getGlyphNames())
        self.charSetPlane02 = processGlyphNames(fontPlane02.getGlyphNames())
        self.charSetTotal = self.charSetPlane00 | self.charSetPlane02
        self.charListTotal = list(self.charSetTotal)

        self.char_size = char_size
        self.canvas_size = canvas_size
        self.fake_prob = fake_prob

        self.fontPlane00 = ImageFont.truetype(
            os.path.join(src_fonts_dir, 'FZSONG_ZhongHuaSongPlane00_2020051520200519101119.TTF'), char_size)
        self.fontPlane02 = ImageFont.truetype(
            os.path.join(src_fonts_dir, 'FZSONG_ZhongHuaSongPlane02_2020051520200519101142.TTF'), char_size)

        self.fonts = self.get_fonts(fonts_json)
        self.fonts_root = fonts_root
        self.fonts2idx = {os.path.splitext(font['font_name'])[0]: idx for idx, font in enumerate(self.fonts)}

        with open(type_fonts, 'r', encoding='utf-8') as fp:
            self.type_fonts = {idx: font_line.strip() for idx, font_line in enumerate(fp)}
        self.type_fonts_rev = {v: k for k, v in self.type_fonts.items()}

        if bad_font_file:
            with open(bad_font_file, 'r', encoding='utf-8') as fp:
                self.bad_font_ids = [int(_) for _ in fp.readline().strip().split()]
        else:
            self.bad_font_ids = []
        self.fake_prob = 0.05

        checkpoint_dir = os.path.join(experiment_dir, "checkpoint")

        self.model = FontMagicModel(
            input_nc=input_nc,
            embedding_num=embedding_num,
            embedding_dim=embedding_dim,
            Lconst_penalty=Lconst_penalty,
            Lcategory_penalty=Lcategory_penalty,
            save_dir=checkpoint_dir,
            gpu_ids=gpu_ids,
            is_training=False
        )
        self.model.setup()
        self.model.print_networks(True)
        self.model.load_networks(resume)
Пример #6
0
    def tran(self, text, html):
        url = re.findall("url\('(.*?.woff)'", html)[0]
        with open('人人车01.ttf', 'wb') as f:
            f.write(requests.get(url=url).content)
        font1 = TTFont('人人车.ttf')
        obj_list1 = font1.getGlyphNames()[1:]  # 获取所有字符的对象,去除第一个和最后一个
        uni_list1 = font1.getGlyphOrder()[1:]
        font2 = TTFont('人人车01.ttf')
        obj_list2 = font2.getGlyphNames()[1:]  # 获取所有字符的对象,去除第一个和最后一个
        uni_list2 = font2.getGlyphOrder()[1:]
        dict = {
            'zero': '0',
            'one': '1',
            'two': '2',
            'three': '3',
            'four': '4',
            'five': '5',
            'six': '6',
            'seven': '7',
            'eight': '8',
            'nine': '9'
        }
        dict1 = {
            'zero': '0',
            'one': '1',
            'two': '2',
            'four': '3',
            'three': '4',
            'five': '5',
            'seven': '6',
            'nine': '7',
            'six': '8',
            'eight': '9'
        }
        ''' 遍历加密的内容text,在新的ttf文件中查找每一个text的元素。如果找到,则替换'''
        for a in text:
            for uni2 in uni_list2:
                # print(uni2)
                try:
                    id = dict[str(uni2)]  # 找到unit2未加密对应的数字
                except:
                    continue
                id_1 = font2.getGlyphID(str(uni2))  # Z找到unit2在ttf文件中的id
                obj2 = font2['glyf'][uni2]
                # str(id) != str(id_1):  # 若未加密的数字id和ttf中对应的id_1不相等,说明a加密了
                if str(id) == str(a):
                    for uni1 in uni_list1:

                        obj1 = font1['glyf'][uni1]
                        if obj1 == obj2:
                            text = text.replace(a, dict1[uni1])
        return text
Пример #7
0
 def get_movie_ticket(self, html, flag=False):
     p = re.compile(r"url\('(.*?)'\) format\('woff'\);")
     uni_font_url = re.findall(p, html)
     url = 'http:%s' % uni_font_url[0]
     # print("字体url:" + url)
     resp = requests.get(url)
     with open('maoyan.woff', 'wb') as fontfile:
         fontfile.write(resp.content)
     baseFonts = TTFont('basefont.woff')  # 这个文件是保存在本地的, 需要手动解析一个字体库, 作为不变的部分
     base_nums = ['4', '1', '3', '0', '5', '6', '7', '9', '2', '8']  # 基本的数字表
     base_fonts = ['uniF66E', 'uniE944', 'uniE4BE', 'uniEF0F', 'uniEF8D', 'uniE963', 'uniE142', 'uniE023',
                   'uniE995',
                   'uniF3A0']  # 基本的映射表
     onlineFonts = TTFont('maoyan.woff')  # 网络上下载的动态的字体文件
     uni_list = onlineFonts.getGlyphNames()[1:-1]  # 只有中间的部分是数字
     temp = {}
     # 解析字体库
     for i in range(10):
         onlineGlyph = onlineFonts['glyf'][uni_list[i]]  # 返回的是unicode对应信息的对象
         for j in range(10):
             baseGlyph = baseFonts['glyf'][base_fonts[j]]
             if onlineGlyph == baseGlyph:
                 temp[uni_list[i].replace('uni', '0x').lower()] = base_nums[j]
     # print(temp)
     for key in temp.keys():
         initstr = key + ';'
         html = html.replace(initstr, str(temp[key]))
     if flag:
         return html
     else:
         return self.find_ticket(html)
Пример #8
0
    def parse_fonts(content):
        """
        :param filepath: 请求ttf地址的响应
        :return: 字体字典
        """
        font = TTFont(BytesIO(content))
        glyphnames = font.getGlyphNames()
        unknown_list, fonts = [], {}
        for glyphname in glyphnames[1:]:
            item = {}
            glyph = font['glyf'][glyphname]
            item["cp"] = glyph.endPtsOfContours
            item["glyphname"] = glyphname
            if item['cp'] == [11]:
                item['xy'] = glyph.coordinates[0]
            unknown_list.append(item)
        for font in fonts_list:
            for dom in unknown_list:
                if dom.get("cp") == font.get("cp") and dom.get("cp") != [12]:
                    fonts[dom['glyphname'][3:]] = font.get("value")
                else:
                    if dom.get("cp") == [12]:

                        if int(dom.get("xy")[0][1]) > 200:
                            fonts[dom['glyphname'][3:]] = "十"
                        else:
                            fonts[dom['glyphname'][3:]] = "上"
        return fonts
Пример #9
0
    def extract_all_characters(self, woff_file):
        ttfont = TTFont(woff_file)
        glyph_names = ttfont.getGlyphNames()
        glyphs = ttfont['glyf']
        font_map = dict()
        # 提取字符

        # 协程池
        # glyph_names.remove('glyph00000')
        # glyph_names.remove('x')
        # coroutine_list = [self.pool.spawn(self.extract_single_character, glyph_name, glyphs) for glyph_name in
        #                   glyph_names]
        # gevent.joinall(coroutine_list)
        # for coroutine in coroutine_list:
        #     font_map.update(coroutine.value)
        # del coroutine_list

        # 线程池
        glyph_names.remove('glyph00000')
        glyph_names.remove('x')
        thread_list = []
        with ThreadPoolExecutor() as pool:
            for glyph_name in glyph_names:
                thread = pool.submit(self.extract_single_character, glyph_name,
                                     glyphs)
                thread_list.append(thread)
            for future in as_completed(thread_list):
                font_map.update(future.result())
        return font_map
Пример #10
0
    def _create_font_mapping(self, base_font: TTFont, base_font_mapping: dict,
                             content):
        font_file_path = './temp.woff'
        with open(font_file_path, 'wb') as font_file:
            font_file.write(content)

        self.mapping = {}

        online_font = TTFont(font_file_path)
        uni_list = online_font.getGlyphNames()
        online_data_count = len(uni_list)

        base_font_keys = list(base_font_mapping.keys())
        base_data_count = len(base_font_keys)

        for i in range(online_data_count):
            online_glyph = online_font['glyf'][uni_list[i]]
            for j in range(base_data_count):
                base_glyph = base_font['glyf'][base_font_keys[j]]
                if online_glyph == base_glyph:
                    if uni_list[i] in base_font_mapping:
                        key = f'"\\u{uni_list[i][3:]}"'
                        key = json.loads(key)
                        self.mapping[key] = base_font_mapping[
                            base_font_keys[j]]
Пример #11
0
    async def shouldParseFont(self, bodyClass):
        # self.resetCookie()
        print(bodyClass)
        file = self._fontCachedPath % (bodyClass)
        if not os.path.exists(file):
            #self.resetCookie()
            async with aiohttp.ClientSession() as session:
                async with session.get(self._fontUrl %
                                       (bodyClass[0:2], bodyClass),
                                       headers=self._headers) as _resp:
                    assert _resp.status == 200
                    with open(file, 'wb') as fd:
                        while True:
                            chunk = await _resp.content.read(1024)
                            if not chunk:
                                break
                            fd.write(chunk)
                    font = TTFont(file)
                    #font.saveXML("./fonts/1.xml")
                    gly_list = font.getGlyphOrder()
                    gly_names = font.getGlyphNames()
                    gly_list = gly_list[2:12]
                    gly_names = gly_names[0:10]
                    secrets = {}
                    for i in range(10):
                        secrets[gly_list[i]] = gly_names[i]

                    self._redis.set(bodyClass, json.dumps(secrets))
                    return secrets
        else:
            return json.loads(self._redis.get(bodyClass))
Пример #12
0
def ocr_processor(filename):
    """

    Args:
        filename:上传的字体文件的文件名
        remote_addr:
        file_suffix:
        has_pic_detail:

    Returns:

    """
    ocr_results = []

    f = TTFont(filename)
    ProgressBar.max_length = len(f.getGlyphNames())

    for i, name in f.getBestCmap().items():
        pil = uni_2_png_stream(i, filename, 100)
        buffered = BytesIO()
        pil.save(buffered, format="PNG")
        ocr_results.append({
            'name': name,
            'img': 'data:image/png;base64,' + base64.b64encode(buffered.getvalue()).decode(),
            'ocr_result': tesseract_single_character(pil)
        })
        SocketQueue.res_queue.put(name)

    return ocr_results
Пример #13
0
def findstar():
    words = '1234567890店中美家馆小车大市公酒行国品发电金心业商司超生装园场食有新' \
           '限天面工服海华水房饰城乐汽香部利籽老艺花专东肉菜学福饭人百餐茶务' \
           '通味所山区门药银农龙停尚安广鑫一容动南具源兴鲜记时机烤文康信果阳理' \
           '锅宝达地儿衣特产西批坊州牛佳化五米修爱北养卖建材三会鸡室红站' \
           '德王光名丽油院堂烧江社合星货型村自科快便日民营和活童明器烟育' \
           '宾精屋经居庄石顺林尔县手厅销用好客火雅盛体旅之鞋辣作粉包楼校' \
           '鱼平彩上吧保永万物教吃设医正造丰健点汤网庆技斯洗料配汇木缘加' \
           '麻联卫川泰色世方寓风幼羊烫来高厂兰阿贝皮全女拉成云维贸道术运' \
           '都口博河瑞宏京际路祥青镇厨培力惠连马鸿钢训影甲助窗布富牌头四' \
           '多妆吉苑沙恒隆春干饼氏里二管诚制售嘉长轩杂副清计黄讯太鸭号街' \
           '交与叉附近层旁对巷栋环省桥湖段乡厦府铺内侧元购前幢滨处向座下' \
           '県凤港开关景泉塘放昌线湾政步宁解白田町溪十八古双胜本单同九迎' \
           '第台玉锦底后七斜期武岭松角纪朝峰六振珠局岗洲横边济井办汉代临' \
           '弄团外塔杨铁浦字年岛陵原梅进荣友虹央桂沿事津凯莲丁秀柳集紫旗' \
           '张谷的是不了很还个也这我就在以可到错没去过感次要比觉看得说常' \
           '真们但最喜哈么别位能较镜非为欢然他挺着价那意种想出员两推做排' \
           '实分间甜度起满给热完格荐喝等其再几只现朋侯样直而买于般豆量选' \
           '奶打每评少算又因情找些份置适什蛋师气你姐棒试总定啊足级整带虾' \
           '如态且尝主话强当更板知己无酸让入啦式笑赞片酱差像提队走嫩才刚' \
           '午接重串回晚微周值费性桌拍跟块调糕.'
    print(len(words))
    words_list = []
    for word in words:
        words_list.append(word)
    # print(words_list)
    data = []
    new_font = []
    xmlfilepath_temp = os.path.abspath("to.xml")
    domobj_temp = xmldom.parse(xmlfilepath_temp)
    elementobj_temp = domobj_temp.documentElement
    subElementObj = elementobj_temp.getElementsByTagName("TTGlyph")
    for i in range(1, len(subElementObj)):
        rereobj = re.compile(r"name=\"(.*)\"")
        find_list = rereobj.findall(str(subElementObj[i].toprettyxml()))
        data.append(str(subElementObj[i].toprettyxml()))
    # 根据字体模板解码本次请求下载的字体
    xmlfilepath_find = os.path.abspath("to2333.xml")
    domobj_find = xmldom.parse(xmlfilepath_find)
    elementobj_find = domobj_find.documentElement
    tunicode = elementobj_find.getElementsByTagName("TTGlyph")
    for i in range(1, len(tunicode)):
        th = tunicode[i].toprettyxml()
        report = re.compile(r"name=\"(.*)\"")
        find_this = report.findall(th)
        get_code = th
        for j in range(len(data)):
            if get_code == data[j]:
                new_font.append(words_list[j])

    font = TTFont("demo.woff")
    font_list = font.getGlyphNames()
    font_list.remove('glyph00000')

    for i in range(len(font_list)):
        font_list[i] = str(font_list[i]).lower().replace("uni", '')

    return (new_font, font_list)
Пример #14
0
class ItemListController(QObject):
    def __init__(self, parent):
        super(ItemListController, self).__init__()

        self._parent = parent
        self._items = []

        self.ttf = None

        self.font_name = None
        self.font_extension = None
        self.xml_file = None
        self.xml_out_file = None
        self.output_dir = None

        self._init_table()

    def load_file(self, filename):
        split_file = os.path.basename(filename).split('.')
        self.font_name = '.'.join(split_file[:-1])
        self.font_extension = split_file[-1]

        self.ttf = TTFont(filename)
        self._load_items()

    def _init_table(self):
        self.table = self._parent.ui.item_table

        self.table_model = LigatureTableModel([], ['Name', 'Ligature'],
                                              self.table)
        self.table.setModel(self.table_model)

        self.table.setSortingEnabled(True)

    def _load_items(self):
        self.table_model.clear()

        for name in self.ttf.getGlyphNames():
            self.table_model.add(LigatureItem(name, ''))

        self.table_model.restore_mapping()

    @pyqtSlot()
    def save(self):
        if not self.output_dir:
            self._parent.log('no dir!')
        else:
            self.save_to_dir(self.output_dir)

    def save_to_dir(self, directory):
        try:
            mapping = self.table_model.get_mapping()
            processor = FontProcessor(self.ttf, mapping)
            processor.save_files(directory, self.font_name)
            self._parent.log('OK!')
        except ReferenceError as e:
            self._parent.log(e)
Пример #15
0
def get_font_map():
    """
    获取code和数字的映射表
    """
    font1 = TTFont('base.woff')
    #font1.saveXML('font_base.xml')
    base_dict = {
        'glyph00009': 7,
        'glyph00013': 2,
        'glyph00018': 1,
        'glyph00023': 6,
        'glyph00028': 9,
        'glyph00030': 8,
        'glyph00034': 4,
        'glyph00039': 5,
        'glyph00044': 3,
        'glyph00048': 0
    }
    name_list1 = font1.getGlyphNames()

    font2 = TTFont('a.woff')
    #font2.saveXML('font_1.xml')
    name_list2 = font2.getGlyphNames()

    new_name_list1 = get_new_name_list(font1, name_list1)
    new_name_list2 = get_new_name_list(font2, name_list2)
    print(new_name_list1)
    print(new_name_list2)
    # 获取name与数字的映射关系
    new_dict = {}
    for name2 in new_name_list2:
        coord_list2 = font2['glyf'][name2].coordinates
        for name1 in new_name_list1:
            coord_list1 = font1['glyf'][name1].coordinates
            if coord_list1[:10] == coord_list2[:10]:
                new_dict[name2] = base_dict[name1]
    print(new_dict)
    font_map = {}
    # 使用getBestCmap方法来获取name和code的映射关系
    for key, value in font2.getBestCmap().items():
        if value in new_dict.keys():
            font_map[hex(key)] = new_dict[value]
    print(font_map)
    return font_map
Пример #16
0
def job():
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/66.0.3359.139 Safari/537.36 "
    }

    index_url = 'http://maoyan.com/'
    # 获取首页内容
    response_index = requests.get(index_url, headers=headers).text
    index_xml = etree.HTML(response_index)
    info_list = index_xml.xpath(
        '//*[@id="app"]/div/div[1]/div[1]/div/div[2]/ul/li[1]/a/div[2]/div//text()'
    )
    a = u'电影名称:%s, 票房总数:%s' % (info_list[1], info_list[4])
    print(a)

    # 获取字体文件的url
    woff_ = re.search(r"url\('(.*\.woff)'\)", response_index).group(1)
    woff_url = 'http:' + woff_
    response_woff = requests.get(woff_url, headers=headers).content

    with open('fonts.woff', 'wb') as f:
        f.write(response_woff)

    # base_nums, base_fonts 需要自己手动解析映射关系, 要和basefonts.woff一致
    baseFonts = TTFont('basefonts.woff')
    base_nums = ['7', '9', '0', '3', '6', '5', '2', '1', '4', '8']
    base_fonts = [
        'uniF04C', 'uniE374', 'uniF426', 'uniEAAA', 'uniF519', 'uniEEC4',
        'uniF543', 'uniF7C7', 'uniF046', 'uniF08E'
    ]

    onlineFonts = TTFont('fonts.woff')
    # onlineFonts.saveXML('test.xml')
    uni_list = onlineFonts.getGlyphNames()[1:-1]
    temp = {}
    # 解析字体库
    for i in range(10):
        onlineGlyph = onlineFonts['glyf'][uni_list[i]]
        for j in range(10):
            baseGlyph = baseFonts['glyf'][base_fonts[j]]
            if onlineGlyph == baseGlyph:
                temp["&#x" + uni_list[i][3:].lower() + ';'] = base_nums[j]

    # 字符替换
    pat = '(' + '|'.join(temp.keys()) + ')'
    response_index = re.sub(pat, lambda x: temp[x.group()], response_index)

    # 内容提取
    index_xml = etree.HTML(response_index)
    info_list = index_xml.xpath(
        '//*[@id="app"]/div/div[1]/div[1]/div/div[2]/ul/li[1]/a/div[2]/div//text()'
    )
    a = u'电影名称:%s, 票房总数:%s' % (info_list[1], info_list[4])
    print(a)
Пример #17
0
def get_font_dict(font_face):
    '''
    返回字符编码对应的数字
    '''
    # 对页面上的font_face进行解码并保存字体文件
    font_code = base64.b64decode(font_face)
    with open('002.ttf', 'wb') as f:
        f.write(font_code)

    # 打开字体文件,001.ttf为基准比对文件,002.ttf为本次页面中的字体文件
    font1 = TTFont('001.ttf')
    font2 = TTFont('002.ttf')

    # 获取字体文件中uin编码的列表,去掉头尾
    # font1.getGlyphOrder() 也能获取,但顺序不一样
    uni_list_1 = font1.getGlyphNames()[1:-1]
    uni_list_2 = font2.getGlyphNames()[1:-1]

    # 手动匹配001.ttf中编码对应的数字
    font_dict_1 = {
        'uniE035': 5,
        'uniE285': 0,
        'uniE8D4': 9,
        'uniED7F': 7,
        'uniF11E': 8,
        'uniF137': 2,
        'uniF1BF': 4,
        'uniF4EC': 1,
        'uniF59C': 3,
        'uniF750': 6
    }

    # 用来存放本次页面中编码和数字的对照字典
    font_dict_2 = {}

    # 根据字体对象是否一致进行匹配
    for uni_code2 in uni_list_2:
        for uni_code1 in font_dict_1:
            if font2['glyf'][uni_code2] == font1['glyf'][uni_code1]:
                font_dict_2[uni_code2] = font_dict_1[uni_code1]

    return font_dict_2
Пример #18
0
    def _create_base_mapping(self, base_font_file: str, base_font_mapping_file: str, type_: str) -> None:
        font = TTFont(base_font_file)
        uni_list = font.getGlyphNames()
        logger.info(f'There is {len(uni_list)} fonts in {base_font_file}')

        with open(base_font_mapping_file, 'r') as ifile:
            mapping = json.load(ifile)

        for key, value in mapping.items():
            glyph = font['glyf'][key]
            key = eval(r"u'\u" + key[3:] + "'")
            self.base_glyph_mapping[type_][key] = glyph
            self.base_str_mapping[type_][key] = value
Пример #19
0
def get_base_map():
    """
    生成手动映射关系
    :return:
    """
    data = {}
    font = TTFont('base.ttf')
    res = font.getGlyphNames()
    for item in res:
        if 'uni' in item:
            data[item] = ''
    with open('base.map.json', 'w', encoding='utf-8') as f:

        json.dump(data, f, indent=4)
Пример #20
0
 def parse_font(self, value):
     # 解析字体还原数据
     fontfile = '%s/%s' % (self.temp, 'tyc-num.woff')
     if not os.path.exists(fontfile):
         return None
     font = TTFont(fontfile)
     source_num = font.getGlyphOrder()[2:10]
     target_num = font.getGlyphNames()[:8]
     value = list(''.join(value))
     for i, v in enumerate(value):
         if v in source_num:
             index = source_num.index(v)
             value[i] = target_num[index]
     return ''.join(value)
def listFontGlyphNames(fontName):
    path = fontPath(fontName)
    if path is None:
        return []
    try:
        fontToolsFont = TTFont(path, lazy=True, fontNumber=0)
    except TTLibError:
        # warn if fontTools cannot read the file
        return []
    characters = []
    glyphNames = fontToolsFont.getGlyphNames()
    fontToolsFont.close()
    if ".notdef" in glyphNames:
        glyphNames.remove(".notdef")
    return glyphNames
Пример #22
0
def decrypt_font(url,headers):
    '''
    输入:链接和头部信息
    输出:返回解决字体反爬后的页面源码
    
    '''

    font1=TTFont('./fonts/base.woff')
    # 使用百度的FontEditor找到本地字体文件name和数字之间的对应关系, 保存到字典中
    base_dict={'uniE18E': '3', 'uniE585': '2', 'uniE194': '9', 'uniF439': '4', 'uniE7DB': '7','uniF115': '0',
                'uniF0A4': '5', 'uniE311': '1', 'uniF7EF': '8', 'uniEACB': '6'}
    name_list1=font1.getGlyphNames()[1:-1]    
    response=requests.get(url,headers).text
    # 正则匹配字体woff文件
    font_file=re.findall(r'vfile\.meituan\.net\/colorstone\/(\w+\.woff)', response)[0]
    url2='http://vfile.meituan.net/colorstone/' + font_file
    new_file=requests.get(url2,headers)
    with open('./fonts/'+font_file,'wb') as f:
        f.write(new_file.content)
    font2=TTFont('./fonts/'+font_file)
    font2.saveXML('font_2.xml')
    name_list2=font2.getGlyphNames()[1:-1]
    # 构造新映射
    new_dict={}
    for name2 in name_list2:
        obj2=font2['glyf'][name2]
        for name1 in name_list1:
            obj1=font1['glyf'][name1]
            # 对象相等则说明对应的数字相同​
            if obj1==obj2:
                new_dict[name2]=base_dict[name1]

    for i in name_list2:
        pattern='&#x'+i[3:].lower()+';'
        response=re.sub(pattern,new_dict[i],response)
    return response
Пример #23
0
def single_font_to_pic(filename, content):
    res_list = []
    pils = []

    with open(filename, 'wb') as f:
        f.write(content)

    font = TTFont('./' + filename)
    for glyph in font.getGlyphNames():
        if glyph.isdigit():
            pils.append(generate_pic(glyph, font, 30, 0.04))

    res = ocr_func_for_digit(pils)

    for idx, foo in enumerate(res):
        res_list.append({
            "ocr_result":
            foo,
            "name":
            str(font.getGlyphNames()[idx]).replace('.png',
                                                   '').replace('_', ''),
        })

    return res_list
Пример #24
0
    def handle_fonts(self, url):
        print('downloading {}'.format(url))
        r = requests.get('http:' + url)
        with open("./static/damn.woff", "wb") as code:
            code.write(r.content)
        font = TTFont("./static/damn.woff")
        font.saveXML('./static/damnTo.xml')

        # 加载字体模板
        num = [8, 6, 2, 1, 4, 3, 0, 9, 5, 7]
        data = []
        new_font = []
        _xml_file_path = os.path.abspath("./static/temp.xml")
        _dom = xmldom.parse(_xml_file_path)
        _ele = _dom.documentElement
        # 标签中是这些数字的具体坐标画法,一个TTGlyph对应一个数字
        # 其中contour标签的坐标数据,就是唯一确定数字的方法
        _TTGlyphs = _ele.getElementsByTagName("TTGlyph")
        for i in range(len(_TTGlyphs)):
            th = _TTGlyphs[i].toprettyxml()
            _pattern = re.compile(r"name=\"(.*)\"")
            _found = _pattern.findall(str(th))
            data.append(str(th).replace(_found[0], '').replace("\n", ''))

        # 根据字体模板解码本次请求下载的字体
        _down_xml = os.path.abspath("./static/damnTo.xml")
        _new_dom = xmldom.parse(_down_xml)
        _new_ele = _new_dom.documentElement
        _new_TTGlyphs = _new_ele.getElementsByTagName("TTGlyph")
        for i in range(len(_new_TTGlyphs)):
            th = _new_TTGlyphs[i].toprettyxml()
            _pattern = re.compile(r"name=\"(.*)\"")
            _found = _pattern.findall(th)
            get_code = th.replace(_found[0], '').replace("\n", '')
            for j in range(len(data)):
                if get_code == data[j]:
                    new_font.append(num[j])

        ans = {}
        font = TTFont("./static/damn.woff")
        font_list = font.getGlyphNames()
        font_list.remove('glyph00000')
        font_list.remove('x')
        for i in range(len(font_list)):
            font_list[i] = str(font_list[i]).lower().replace("uni", '&#x') + ';'
            ans[font_list[i]] = new_font[i]

        return ans
Пример #25
0
    def __init__(self, ttf_path, default_ttf_path, char_size, canvas_size):
        self.ttf_path_list = []
        for ttf_file in os.listdir(ttf_path):
            if os.path.splitext(ttf_file)[-1].lower() in ['.ttf', '.otf', '.ttc']:
                self.ttf_path_list.append(os.path.join(ttf_path, ttf_file))

        self.default_ttf_path = []
        for ttf_file in os.listdir(default_ttf_path):
            if os.path.splitext(ttf_file)[-1].lower() in ['.ttf', '.otf', '.ttc']:
                self.default_ttf_path.append(os.path.join(default_ttf_path, ttf_file))
        self.default_ttf_charset = []
        for default_ttf in self.default_ttf_path:
            ttfont = TTFont(default_ttf)
            self.default_ttf_charset.append(processGlyphNames(ttfont.getGlyphNames()))
        self.char_size = char_size
        self.canvas_size = canvas_size
Пример #26
0
def font_creator(html):
    """
    这个函数是用来处理动态数字加载问题
    :param html:
    :return:饭后的是处理之后,带有正确数字的html代码
    """
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
    }
    # 用正在表达式匹配后缀为woff的url
    woff_name = re.search(
        r"url\('//vfile.meituan.net/colorstone/(.*\.woff)'\)", html).group(1)

    # 判断文件是否存在,不存在再下载
    file_list = os.listdir('./fonts')
    if woff_name not in file_list:
        woff_url = 'http://vfile.meituan.net/colorstone/' + woff_name
        response_woff = requests.get(woff_url, headers=headers).content
        with open('./fonts/' + woff_name, 'wb') as f:
            f.write(response_woff)

    # 手动解析一组basefonts.woff的映射
    baseFonts = TTFont('./fonts/basefonts.woff')
    base_nums = ['9', '4', '2', '1', '3', '7', '8', '0', '6', '5']
    base_fonts = [
        'uniECE2', 'uniF284', 'uniF5F6', 'uniE3CA', 'uniF798', 'uniF7E7',
        'uniF020', 'uniE4A7', 'uniF4B5', 'uniE0FC'
    ]

    # 调用在线下载的
    onlineFonts = TTFont('./fonts/' + woff_name)
    uni_list = onlineFonts.getGlyphNames()[1:-1]
    temp = {}
    # 解析字体库,通过我们给出的,和新下载的做比对
    for i in range(10):
        onlineGlyph = onlineFonts['glyf'][uni_list[i]]
        for j in range(10):
            baseGlyph = baseFonts['glyf'][base_fonts[j]]
            if onlineGlyph == baseGlyph:
                temp["&#x" + uni_list[i][3:].lower() + ';'] = base_nums[j]

    # 字符替换
    pat = '(' + '|'.join(temp.keys()) + ')'
    html = re.sub(pat, lambda x: temp[x.group()], html)
    # 返回是正确数字的html源码
    return html
def fonts(response_index):
    try:
        # 获取字体文件的url
        woff_ = re.search(r"url\('(.*\.woff)'\)", response_index).group(1)
        # print(woff_)
        woff_url = 'http:' + woff_
        response_woff = requests.get(woff_url, headers=headers).content
        # 将字体文件保存到本地, 每次爬取都需要保存
        with open('fonts.woff', 'wb') as f:
            f.write(response_woff)

        # baseFonts: 从网站的源代码的font-face中的url下载woff文件  并改名为basefonts.woff
        baseFonts = TTFont('basefonts.woff')
        # 用http://fontstore.baidu.com/static/editor/index.html#解析basefonts.woff文件
        # base_nums, base_fonts 需要自己手动解析映射关系, 要和basefonts.woff一致
        base_nums = ['9', '5', '6', '7', '3', '8', '4', '2', '1', '0']
        base_fonts = [
            'uniF59C', 'uniF65B', 'uniE3C2', 'uniECD9', 'uniE676', 'uniF7AD',
            'uniF4B7', 'uniF7F7', 'uniE683', 'uniF044'
        ]
        # onlineFonts: 从get中解析出font-face的url, 并以二进制写入fonts.woff文件中
        onlineFonts = TTFont('fonts.woff')

        # onlineFonts.saveXML('test.xml')

        # 获取数字的编码
        uni_list = onlineFonts.getGlyphNames()[1:-1]
        temp = {}
        # 解析字体库
        for i in range(10):
            # 获取fonts.woff中的第i个信息
            onlineGlyph = onlineFonts['glyf'][uni_list[i]]
            for j in range(10):
                # 获取basefonts.woff中的第j个信息
                baseGlyph = baseFonts['glyf'][base_fonts[j]]
                # 如果fonts.woff的第i个信息与basefonts.woff的第j个信息相同, 就保存在temp中
                if onlineGlyph == baseGlyph:
                    # 键为f&@x加onts.woff的第i个小写信息, 值为basefonts.woff的第j个信息
                    temp["&#x" + uni_list[i][3:].lower() + ';'] = base_nums[j]
        # print(temp)
        # 字符替换
        pat = '(' + '|'.join(temp.keys()) + ')'
        response_index = re.sub(pat, lambda x: temp[x.group()], response_index)
        response = etree.HTML(response_index)
        return response
    except:
        print('解析失败!')
Пример #28
0
    def parse(self, response):
        item = {}
        # 获得字体url
        style = ''.join(response.xpath('//style').extract())
        font_url = 'http:' + ''.join(re.findall("url\('(.*?woff)'\)", style))
        # 字体文件下载
        font_response = requests.get(url=font_url)
        with open('on_maoyan.woff', 'wb') as f:
            f.write(font_response.content)
        # 获得的字体转为xml格式
        tfont = TTFont('base_maoyan.woff')
        # 字体0101形状 tfont['glyf']['name']
        tfont.saveXML('shilie_maoyan.xml')
        # 参照的字体
        basefont = TTFont('on_maoyan.woff')
        # 作为[参照的] 所有数字的键
        base_numlist = basefont.getGlyphNames()[1:-1]
        # 作为参照的maoyanBase字体文件的映射关系
        font_guanxi = {
            'uniE877': '5', 'uniF0B5': '0', 'uniE3C8': '6', 'uniF076': '3', 'uniF833': '8',
            'uniF079': '2', 'uniECED': '9', 'uniE49B': '1', 'uniEB89': '4', 'uniE56F': '7',
        }
        # 解密的字典

        # 获取信息的节点
        node = etree.HTML(response.text)
        node = node.xpath('//div[@class="board-item-content"]')
        for i in node:
            item['title'] = ''.join(i.xpath('.//p[@class="name"]/a/text()'))
            piao = etree.tostring(i)
            piao = re.findall(b'<span class="stonefont">(.*?)</span>', piao)[0]
            print(piao)
            piao = re.sub(b'&#', b'', piao).decode('utf-8')
            piao = re.sub('\.', '.;', piao)
            piao = piao.split(';')
            print(piao)
            item['piao'] = piao
            item['piao'] = []
            for p in piao:
                if p != '':
                    if p=='.':
                        item['piao'].append('.')
                    else:
                        item['piao'].append(jiemi(tfont, p, basefont, font_guanxi))
            item['piao']=''.join(item['piao'])
            yield item
Пример #29
0
def main():
    args = parser.parse_args()

    for font_path in args.fonts:

        glyphWidths = {}

        print("\n-----------------------------------------\n")
        print(font_path)
        ttfont = TTFont(font_path)

        print(type(ttfont["cmap"].getBestCmap()))

        unicodesDict = {}

        # key is the glyph number
        # cmap[key] is the glyph name

        for key in ttfont["cmap"].getBestCmap().keys():
            print(key, ttfont["cmap"].getBestCmap()[key])
            glyphName = ttfont["cmap"].getBestCmap()[key]
            # unicodesDict[glyphName] = '%04x' % key
            unicodesDict[glyphName] = key

        print('unicodesDict')
        print(unicodesDict)

        for glyphName in ttfont.getGlyphNames():

            if ttfont['hmtx'][glyphName][0] == 600:
                print(".")
            else:
                # print(glyphName)
                # print(ttfont['hmtx'][glyphName][0])

                if glyphName in unicodesDict.keys():
                    glyphUnicode = str(unicodesDict[glyphName])
                    # glyphWidths[glyphUnicode] = [ttfont['hmtx'][glyphName][0], glyphName]
                    glyphWidths[glyphUnicode] = ttfont['hmtx'][glyphName][0]
                # else:
                #     glyphWidths[glyphName] = [ttfont['hmtx'][glyphName][0]]

        filename, file_extension = os.path.splitext(font_path)
        jsonPath = f'{filename}-glyph_widths.json'
        saveToJSON(glyphWidths, jsonPath)
Пример #30
0
 def parse_front_html(self, tmpe_file, html):
     new_font_dict = dict()
     font1 = TTFont(tmpe_file)
     # font1 = TTFont('./font/new_base.woff')  # 读取新的woff文件
     ff_list = font1.getGlyphNames()  # 返回一个对象
     ff_news = font1.getGlyphOrder()
     for fo in ff_news:
         fo2 = font1['glyf'][fo]
         for fff1 in self.num_list:
             fo3 = self.font['glyf'][fff1]
             if fo2 == fo3:
                 k = fo.replace("uni", "&#x").lower()
                 v = self.num_dict[fff1]
                 new_font_dict[fo.replace("uni", "&#x").lower()] = self.num_dict[fff1]
     #             html = html.replace(k, str(v))
     # for k, v in new_font_dict.items():
     #     html = html.replace(k, str(v))
     return new_font_dict
Пример #31
0
def findstar(titles):
    # 加载字体模板
    num = [8, 6, 2, 1, 4, 3, 0, 9, 5, 7]
    data = []
    new_font = []
    xmlfilepath_temp = os.path.abspath(
        r"C:\Users\Administrator\Desktop\python-maoyan-spider\Maoyan\com\sider\temp.xml"
    )
    domobj_temp = xmldom.parse(xmlfilepath_temp)
    elementobj_temp = domobj_temp.documentElement
    subElementObj = elementobj_temp.getElementsByTagName("TTGlyph")
    for i in range(len(subElementObj)):
        rereobj = re.compile(r"name=\"(.*)\"")
        find_list = rereobj.findall(str(subElementObj[i].toprettyxml()))
        data.append(
            str(subElementObj[i].toprettyxml()).replace(find_list[0],
                                                        '').replace("\n", ''))

    #根据字体模板解码本次请求下载的字体
    xmlfilepath_find = os.path.abspath(
        r"C:\Users\Administrator\Desktop\python-maoyan-spider\Maoyan\com\sider\to.xml"
    )
    domobj_find = xmldom.parse(xmlfilepath_find)
    elementobj_find = domobj_find.documentElement
    tunicode = elementobj_find.getElementsByTagName("TTGlyph")
    for i in range(len(tunicode)):
        th = tunicode[i].toprettyxml()
        report = re.compile(r"name=\"(.*)\"")
        find_this = report.findall(th)
        get_code = th.replace(find_this[0], '').replace("\n", '')
        for j in range(len(data)):
            if get_code == data[j]:
                new_font.append(num[j])

    font = TTFont(
        r"C:\Users\Administrator\Desktop\python-maoyan-spider\Maoyan\com\sider\demo.woff"
    )
    font_list = font.getGlyphNames()
    font_list.remove('glyph00000')
    font_list.remove('x')
    for i in range(len(font_list)):
        font_list[i] = str(font_list[i]).lower().replace("uni", '')
    return (new_font, font_list)