Пример #1
0
def get_glyph_name(font: TTFont, codepoint: int) -> Optional[str]:
  next_best_cmap = font.getBestCmap()

  if codepoint in next_best_cmap:
    return next_best_cmap[codepoint]

  return None
Пример #2
0
url  = 'https://bj.58.com/haidian/pinpaigongyu/pn/{0}/?minprice=2000_3000'
page = 0

file_dir = "{0}".format(os.getcwd())
path = os.path.join(file_dir,"BJ_haidian_2000_3000.csv")
columns = ['name','location','price','url']
house_data = []
while True:
    page += 1
    resp = requests.get(url.format(page),headers=random.choice(headers))

    if resp:
        base64_str = re.findall('data:application/font-ttf;charset=utf-8;base64,(.*?)\'\) format\(\'truetype\'\)}',resp.text)
        bin_data = base64.b64decode(base64_str[0])
        fonts = TTFont(io.BytesIO(bin_data))
        bestcmap = fonts.getBestCmap()
        newmap = {}
        for key in bestcmap.keys():
            value = int(re.findall(r'(\d+)', bestcmap[key])[0]) - 1
            key = hex(key)
            newmap[key] = value

        print('==========', newmap)
        resp_ = resp.text
    doc = pq(resp_)
    house_list = doc.find('.list li')

    if not house_list:
        break
    #print(doc.find('.page a').eq(-2).text())
    for each in house_list.items():
Пример #3
0
class UFOFont(BaseFont):

    ufoState = None

    def resetCache(self):
        super().resetCache()
        del self.defaultVerticalAdvance
        del self.defaultVerticalOriginY
        del self.globalColorLayerMapping

    def _setupReaderAndGlyphSet(self):
        self.reader = UFOReader(self.fontPath, validate=False)
        self.glyphSet = self.reader.getGlyphSet()
        self.glyphSet.glyphClass = Glyph
        self.layerGlyphSets = {}

    async def load(self, outputWriter):
        if hasattr(self, "reader"):
            self._cachedGlyphs = {}
            return
        self._setupReaderAndGlyphSet()
        self.info = SimpleNamespace()
        self.reader.readInfo(self.info)
        self.lib = self.reader.readLib()
        self._cachedGlyphs = {}
        if self.ufoState is None:
            includedFeatureFiles = extractIncludedFeatureFiles(
                self.fontPath, self.reader)
            self.ufoState = UFOState(
                self.reader,
                self.glyphSet,
                getUnicodesAndAnchors=self._getUnicodesAndAnchors,
                includedFeatureFiles=includedFeatureFiles)

        fontData = await compileUFOToBytes(self.fontPath, outputWriter)

        f = io.BytesIO(fontData)
        self.ttFont = TTFont(f, lazy=True)
        self.shaper = self._getShaper(fontData)

    def updateFontPath(self, newFontPath):
        """This gets called when the source file was moved."""
        super().updateFontPath(newFontPath)
        self._setupReaderAndGlyphSet()

    def getExternalFiles(self):
        return self.ufoState.includedFeatureFiles

    def canReloadWithChange(self, externalFilePath):
        if self.reader.fileStructure != UFOFileStructure.PACKAGE:
            # We can't (won't) partially reload .ufoz
            return False

        if externalFilePath:
            # Features need to be recompiled no matter what
            return False

        self.glyphSet.rebuildContents()

        self.ufoState = self.ufoState.newState()
        (needsFeaturesUpdate, needsGlyphUpdate, needsInfoUpdate,
         needsCmapUpdate, needsLibUpdate) = self.ufoState.getUpdateInfo()

        if needsFeaturesUpdate:
            return False

        if needsInfoUpdate:
            # font.info changed, all we care about is a possibly change unitsPerEm
            self.info = SimpleNamespace()
            self.reader.readInfo(self.info)

        if needsCmapUpdate:
            # The cmap changed. Let's update it in-place and only rebuild the shaper
            newCmap = {
                code: gn
                for gn, codes in self.ufoState.unicodes.items()
                for code in codes
            }
            fb = FontBuilder(font=self.ttFont)
            fb.setupCharacterMap(newCmap)
            f = io.BytesIO()
            self.ttFont.save(f, reorderTables=False)
            self.shaper = self._getShaper(f.getvalue())

        if needsLibUpdate:
            self.lib = self.reader.readLib()

        # We don't explicitly track changes in layers, but they may be involved
        # in building layered color glyphs, so let's just always reset the cache.
        self.resetCache()

        return True

    def _getUnicodesAndAnchors(self):
        unicodes = defaultdict(list)
        for code, gn in self.ttFont.getBestCmap().items():
            unicodes[gn].append(code)
        anchors = pickle.loads(self.ttFont["FGAx"].data)
        return unicodes, anchors

    def _getShaper(self, fontData):
        return HBShape(fontData,
                       getHorizontalAdvance=self._getHorizontalAdvance,
                       getVerticalAdvance=self._getVerticalAdvance,
                       getVerticalOrigin=self._getVerticalOrigin,
                       ttFont=self.ttFont)

    @cachedProperty
    def unitsPerEm(self):
        return self.info.unitsPerEm

    def _getGlyph(self, glyphName, layerName=None):
        glyph = self._cachedGlyphs.get((layerName, glyphName))
        if glyph is None:
            if glyphName == ".notdef" and glyphName not in self.glyphSet:
                # We need a .notdef glyph, so let's make one.
                glyph = NotDefGlyph(self.info.unitsPerEm)
                self._addOutlinePathToGlyph(glyph)
            else:
                try:
                    if layerName is None:
                        glyph = self.glyphSet[glyphName]
                    else:
                        glyph = self.getLayerGlyphSet(layerName)[glyphName]
                    self._addOutlinePathToGlyph(glyph)
                except Exception as e:
                    # TODO: logging would be better but then capturing in mainWindow.py is harder
                    print(f"Glyph '{glyphName}' could not be read: {e!r}",
                          file=sys.stderr)
                    glyph = self._getGlyph(".notdef")
            self._cachedGlyphs[(layerName, glyphName)] = glyph
        return glyph

    def _addOutlinePathToGlyph(self, glyph):
        pen = CocoaPen(self.glyphSet)
        glyph.draw(pen)
        glyph.outline = pen.path

    def _getHorizontalAdvance(self, glyphName):
        glyph = self._getGlyph(glyphName)
        return glyph.width

    @cachedProperty
    def defaultVerticalAdvance(self):
        ascender = getattr(self.info, "ascender", None)
        descender = getattr(self.info, "descender", None)
        if ascender is None or descender is None:
            return self.info.unitsPerEm
        else:
            return ascender + abs(descender)

    @cachedProperty
    def defaultVerticalOriginY(self):
        ascender = getattr(self.info, "ascender", None)
        if ascender is None:
            return self.info.unitsPerEm  # ???
        else:
            return ascender

    def _getVerticalAdvance(self, glyphName):
        glyph = self._getGlyph(glyphName)
        vAdvance = glyph.height
        if vAdvance is None or vAdvance == 0:  # XXX default vAdv == 0 -> bad UFO spec
            vAdvance = self.defaultVerticalAdvance
        return -abs(vAdvance)

    def _getVerticalOrigin(self, glyphName):
        glyph = self._getGlyph(glyphName)
        vOrgX = glyph.width / 2
        lib = getattr(glyph, "lib", {})
        vOrgY = lib.get("public.verticalOrigin")
        if vOrgY is None:
            vOrgY = self.defaultVerticalOriginY
        return True, vOrgX, vOrgY

    def _getGlyphDrawing(self, glyphName, colorLayers):
        glyph = self._getGlyph(glyphName)
        if colorLayers:
            colorLayerMapping = glyph.lib.get(COLOR_LAYER_MAPPING_KEY)
            if colorLayerMapping is None:
                colorLayerMapping = self.globalColorLayerMapping
            if colorLayerMapping is not None:
                layers = []
                for layerName, colorID in colorLayerMapping:
                    glyph = self._getGlyph(glyphName, layerName)
                    if not isinstance(glyph, NotDefGlyph):
                        layers.append((glyph.outline, colorID))
                if layers:
                    return GlyphDrawing(layers)
        return GlyphDrawing([(glyph.outline, None)])

    @cachedProperty
    def colorPalettes(self):
        return self.lib.get(COLOR_PALETTES_KEY)

    @cachedProperty
    def globalColorLayerMapping(self):
        return self.lib.get(COLOR_LAYER_MAPPING_KEY)

    def getLayerGlyphSet(self, layerName):
        layerGlyphSet = self.layerGlyphSets.get(layerName)
        if layerGlyphSet is None:
            layerGlyphSet = self.reader.getGlyphSet(layerName)
            self.layerGlyphSets[layerName] = layerGlyphSet
        return layerGlyphSet
Пример #4
0
     "cid10920": '8',
     "cid00026": '9',
     "cid00771": '9',
     "cid00939": '9',
     "cid00919": '9',
     "cid01068": '9',
     "cid26924": '9',
     "cid19425": '8',
     "cid00783": '9',
     "cid01923": '9',
     "cid09631": '9',
     "cid02040": '9',
     "cid00959": "9",
 }
 dict_ = {}
 for k, v in font.getBestCmap().items():
     k = hex(k).replace('0x',
                        '\\u').encode('utf-8').decode('unicode_escape')
     dict_[k] = v
 html = etree.HTML(text)
 nums = html.xpath("//div[@class='col-md-1']/text()")
 import re
 for num in nums:
     list_ = []
     list_2 = []
     num = re.findall(r"[\u4e00-\u9fa5]+", num)
     for n in num[0]:
         s.add(dict_[n])
         list_2.append(dict_[n])
         list_3.append(dict_[n])
         list_.append(d[dict_[n]])
Пример #5
0
# # 从网络上抓取网页源代码,然后获取code->name->文字形状
headers = {
	'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
}
resp = requests.get("https://cs.58.com/chuzu/", headers=headers)
text = resp.text
# 抓取网页的字体文件
font_face = re.search(r"@font-face.+base64,(.+?)'\)", text).group(1)
# 保存到内存中
font_bytes = io.BytesIO(base64.b64decode(font_face))
currentFont = TTFont(font_bytes)

# code->name
# 获取字体的code和name的映射
codeNameMap = currentFont.getBestCmap()
# 获取当前网页字体的所有字体的形状
currentGlyf = currentFont['glyf']
# 循环code和name的映射
for code, name in codeNameMap.items():
	# 先获取当前网页,某个name下的形状
	currentShape = currentGlyf[name]
	# 循环字典,并找到映射对网页进行替换并保存
	for number, shape in baseFontMap.items():
		if currentShape == shape:
			# 得到的code是十进制,这里转为十六进制进行处理
			webcode = str(hex(code)).replace("0", "&#", 1) + ";"
			text = re.sub(webcode, str(number), text)

# with open("58_1.html", 'w', encoding='utf-8') as fp:
# 	fp.write(text)
Пример #6
0
    def parse(self, response):
        text = response.xpath('//div[@class="conttxt"]/div[1]').xpath(
            'string(.)').extract()[0]
        print(text)
        font_url = 'https:' + re.search(r",url\('(.*\.ttf)'\)", response.text,
                                        re.S).group(1)
        # test = re.search(r' layer1="text-s"></div>新车已经行驶5500公里,现给<span.*?>(.*?)</span>', response.text, re.S)
        # text = response.xpath('//div[@class="conttxt"]/div[1]').xpath('string(.)').extract()
        # bb = "".join(text)
        # templetpate = re.search(r'<div class="w740">(.*?)<a href=".*?" name="shang"></a>', response.text, re.S).group(1)
        # cc = response.xpath('//div[@class="conttxt"]/div[1]//text()').extract()
        # cc = ''.join(cc)
        # print(bb)
        # print(cc)
        #
        # print(font_url)
        with open('online_qc.ttf', 'wb') as f:
            f.write(requests.get(font_url).content)

        base_font = TTFont('qiche.ttf')
        # base_font.saveXML('qiche.xml')
        base_uni = base_font.getGlyphOrder()[1:]
        print('base_uni', base_uni)

        online_font = TTFont('online_qc.ttf')
        # online_font.saveXML('online_qc.xml')
        online_uni = online_font.getGlyphNames()[1:]
        print('online_uni', online_uni)

        bm = online_font.getBestCmap()
        print('bm', bm)

        dict_font = {
            'uniEC1B': '八',
            'uniEC6D': '大',
            'uniEDAE': '右',
            'uniECFA': '十',
            'uniED4C': '呢',
            'uniEC99': '四',
            'uniECEB': '小',
            'uniEC37': '好',
            'uniED78': '三',
            'uniEDCA': '是',
            'uniED16': '短',
            'uniEC63': '五',
            'uniECB5': '下',
            'uniEDF5': '少',
            'uniEC53': '近',
            'uniED94': '长',
            'uniECE0': '地',
            'uniED32': '多',
            'uniEC7F': '更',
            'uniEDBF': '左',
            'uniEC1D': '不',
            'uniED5E': '矮',
            'uniEDAF': '和',
            'uniECFC': '高',
            'uniEC49': '一',
            'uniEC9A': '很',
            'uniEDDB': '的',
            'uniED28': '六',
            'uniED79': '得',
            'uniECC6': '七',
            'uniED18': '坏',
            'uniEC64': '着',
            'uniEDA5': '九',
            'uniEDF7': '上',
            'uniED43': '远',
            'uniEC90': '低',
            'uniECE2': '了',
            'uniEC2E': '二'
        }

        temp = {}
        for bs_uni in base_uni:
            base_obj = base_font['glyf'][bs_uni]
            for ol_uni in online_uni:
                online_obj = online_font['glyf'][ol_uni]
                ol = ol_uni[3:]
                if base_obj == online_obj:
                    temp[eval(r"u'\u" + ol.lower() + "'")] = dict_font[bs_uni]

        # for i in range(38):
        #     base_obj = base_font['glyf'][base_uni[i]]
        #     for j in range(38):
        #         ol_obj = online_font['glyf'][online_uni[j]]
        #         if base_obj == ol_obj:
        #             # temp["&#x" + online_uni[j][3:].lower() + ';'] = dict_font[base_uni[i]]
        #             temp[eval(r"u'\u" + online_uni[j][3:].lower() + "'")] = dict_font[base_uni[i]]

        print(temp)

        # pat = '(' + '|'.join(temp.keys()) + ')'
        # text = re.sub(pat, lambda x: temp[x.group()], text)

        for i in range(38):
            text = text.replace(list(temp.keys())[i], list(temp.values())[i])

        print(text)
Пример #7
0
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36",
}
r = requests.get(url, headers=headers)
with open("./font.woff", "wb") as f:
    f.write(r.content)

url = "https://static.tianyancha.com/fonts-styles/fonts/b1/b17d9d87/tyc-num.ttf"

headers = {
    "User-Agent":
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36",
}
r = requests.get(url, headers=headers)
with open("./font.ttf", "wb") as f:
    f.write(r.content)

online_fonts = TTFont('font.woff')

online_fonts.saveXML("text.xml")

_dict = online_fonts.getBestCmap()

print("字典:", _dict)

# online_fonts = TTFont('font.tff')
#
# online_fonts.saveXML("text.xml")
#
# _dict = online_fonts.getBestCmap()
#
# print("字典:", _dict)
Пример #8
0
    def parse(self, response):

        title_urls = response.css(
            'div.des h2 a.strongbox::attr(href)').extract()

        res_html = response.text
        title = r'(?:target="_blank" |target="_blank"  rel="nofollow" )>(.*?)</a>'
        titles = re.findall(title, res_html, re.S | re.M)
        room = r'<p class="room">(.*?)</p>'
        rooms = re.findall(room, res_html, re.S | re.M)
        money = r'<b class="strongbox">(.*?)</b>'
        moneys = re.findall(money, res_html, re.S | re.M)

        # 处理信息
        titles = [c.replace('\n', '') for c in titles]
        titles = [c.replace(' ', '') for c in titles]
        rooms = [c.replace(' ', '') for c in rooms]
        rooms = [c.replace('&nbsp;', ' ') for c in rooms]

        sizes = []
        looks = []
        for i in range(0, len(rooms)):
            looks.append(rooms[i].split(" ", 1)[0])
            sizes.append(rooms[i].split(" ", 1)[1])
        sizes = [c.replace(' ', '') for c in sizes]
        sizes = [c.replace('\n', '') for c in sizes]

        font_pattern = r"base64,(.*?)format"
        font_base64 = re.findall(font_pattern, res_html, re.S | re.M)

        # 解码数字
        str_base64 = font_base64[0][:-3]
        bin_data = base64.decodebytes(str_base64.encode())
        with open("font.woff", r"wb") as f:
            f.write(bin_data)
        onlineFonts = TTFont('font.woff')
        self.dict = onlineFonts.getBestCmap()
        # 数字解决
        for i in range(len(titles)):
            titles[i] = self.convert_title_room(titles[i])
        for i in range(len(rooms)):
            looks[i] = self.convert_title_room(looks[i])
        for i in range(len(rooms)):
            sizes[i] = self.convert_title_room(sizes[i])
        for i in range(len(moneys)):
            moneys[i] = self.convert_money(moneys[i])

        house_item = HouseItem()
        house_item["title_urls"] = [title_urls]
        house_item["titles"] = [titles]
        house_item["moneys"] = [moneys]
        house_item["looks"] = [looks]
        house_item["sizes"] = [sizes]

        # 解析具体页
        for title_url in title_urls:
            yield Request(url=title_url, callback=self.parse_detail)

        # 下一页
        # next_urls = response.css('div.pager a.next::attr(href)').extract()
        # if next_urls:
        #     yield Request(url=next_urls, callback=self.parse)

        yield house_item
Пример #9
0
def do_build(opt):
    PBAR_desc('prepare', opt['dst'])
    font = TTFont(opt['src'])
    glyph_map = {}
    fwid = font['GSUB'].table.LookupList.Lookup[0].SubTable[0].mapping
    hwid = font['GSUB'].table.LookupList.Lookup[1].SubTable[0].mapping
    for code, name in font.getBestCmap().items():
        pos = 0 if code < 0xF0000 else 1 if code < 0x100000 else 2
        code &= 0xFFFF
        if code not in glyph_map:
            glyph_map[code] = [None, None, None, None, None, None]
            # 0 : src half normal
            # 1 : src half italic
            # 2 : src full normal
        glyph_map[code][pos] = name
    for code, row in glyph_map.items():
        eaw = unicodedata.east_asian_width(chr(code))
        norm = row[0] or row[1]
        ital = row[1] or row[0]
        full = row[2]
        if norm and full:
            fwid[norm] = full
            hwid[full] = norm
        if eaw in ('H', 'Na'):
            row[:] = norm, ital, norm, ital, norm, ital
        elif eaw in ('F', 'W'):
            row[:] = full, full, full, full, full, full
        elif eaw == 'N':
            row[:] = norm, ital, norm, ital, full, full
        elif eaw == 'A':
            row[:] = norm, ital, full, full, full, full
    maps = [{code: row[x]
             for code, row in glyph_map.items() if row[x]} for x in range(6)]
    font['OS/2'].xAvgCharWidth = 1024
    font['OS/2'].panose.bProportion = 9
    font['OS/2'].ulCodePageRange1 |= 0x00020000
    font['OS/2'].ulCodePageRange1 ^= 0x00000004
    font['OS/2'].ulCodePageRange2 ^= 0x00020000
    font['OS/2'].ulUnicodeRange3 ^= 0x04C00000
    font['post'].isFixedPitch = 1
    del font['FFTM']
    del font['GPOS']
    PBAR.update(1)
    for i in range(6):
        PBAR_desc('generate', f'{i}.ttf')
        i_map = maps[i]
        i_opt = opt['font'][i]
        full_table = font['cmap'].getcmap(3, 10)
        full_cmap = full_table.cmap
        full_cmap.clear()
        base_table = font['cmap'].getcmap(3, 1)
        base_cmap = base_table.cmap
        base_cmap.clear()
        for code, name in i_map.items():
            full_cmap[code] = name
            if code <= 0xFFFF:
                base_cmap[code] = name
        font['head'].macStyle = i_opt['macStyle']
        font['post'].italicAngle = i_opt['italicAngle']
        font['OS/2'].fsSelection = i_opt['fsSelection']
        font['OS/2'].usWeightClass = i_opt['usWeightClass']
        font['OS/2'].panose.bWeight = i_opt['panoseWeight']
        font['OS/2'].panose.bLetterForm = i_opt['panoseLetterForm']
        for record in font['name'].names:
            if record.nameID in i_opt:
                record.string = i_opt[record.nameID]
        font.save(opt['ttf'][i])
        PBAR.update(1)
    PBAR_desc('otf2otc', opt['dst'])
    command = ['otf2otc', '-o', opt['dst']] + opt['ttf']
    run(command, stdout=DEVNULL)
    PBAR.update(1)
Пример #10
0
#
# print(my_dic)
map_str_2_number = {
    'period': '.',
    'zero': 0,
    'one': 1,
    'two': 2,
    'three': 3,
    'four': 4,
    'five': 5,
    'six': 6,
    'seven': 7,
    'eight': 8,
    'nine': 9
}

font_value = font.getBestCmap()
print(font_value)

for key in font_value.keys():

    font_value[key] = map_str_2_number[font_value[key]]

print(font_value)

for key, value in font_value.items():
    text = text.replace("&#" + str(key) + ";", str(value))

with open('2.html', 'w') as f:
    f.write(text)
Пример #11
0
from fontTools.ttLib import TTCollection, TTFont
from fontTools.unicode import Unicode
from itertools import chain
import json

if __name__ == "__main__":
    f = TTFont(
        'D:/work/git/CycleGan-handwriting_generation/data/fonts/simhei.ttf')
    cmap = f.getBestCmap()  # look up the encoding
    list_char_of_font = set()
    for char in sorted(cmap):
        list_char_of_font.add(chr(char))
    # print(len(cmap))
    # print(list_char_of_font)

    list_char_of_hw = set()
    c = 0
    with open('labels.json', encoding='utf-8') as f:
        json_load = json.load(f)
        for line in json_load:
            c += 1
            text = json_load[line]
            for char in list(text):
                if char not in list_char_of_hw:
                    list_char_of_hw.add(char)

    print('len of font: ', len(list_char_of_font))
    print('len of hw: ', len(list_char_of_hw))
    result = list_char_of_font - list_char_of_hw
    print('len list character miss: ', len(result))
Пример #12
0
    def get_novelcontent(self, response):
        myPgae = response.body
        targentcontent = response.meta['targentcontent']
        unicodePage = myPgae.decode('utf-8')
        selector = Selector(response)
        url = response.meta['url']
        # 获取页面内容
        r = requests.get(url)
        response = html.fromstring(r.text)
        # print response
        # 匹配ttf font,获取随机生成的字体url,https://qidian.gtimg.com/qd_anti_spider/woqFfmqF.woff
        cmp = re.compile("url\('(//.*.woff)'\) format\('woff'\)")
        rst = cmp.findall(r.text)
        fontUrl = str(rst[0]).split('\'')[8]
        # print fontUrl
        fontName = fontUrl.split('/')[4].split(".")[0]
        # print fontName
        # ttf = requests.get("http:" + fontUrl, stream=True)
        ttf = requests.get(fontUrl, stream=True)
        with open("./font/qidian.woff", "wb") as pdf:
            for chunk in ttf.iter_content(chunk_size=1024):
                if chunk:
                    pdf.write(chunk)
        # 解析字体库font文件
        font = TTFont('./font/qidian.woff')
        uniList = font['cmap'].tables[0].ttFont.getGlyphOrder()
        # print uniList # ['.notdef', 'period', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']
        cmap = font.getBestCmap()
        # print(cmap) # {100064: 'one', 100065: 'six', 100066: 'two', 100067: 'three', 100068: 'five', 100069: 'nine', 100070: 'eight', 100071: 'four', 100072: 'period', 100061: 'seven', 100063: 'zero'}
        # for num, un_size in enumerate(cm):
        #     print(un_size, num)
        # 上面3个方法都可以获得字符集,通过对比网页上的字符返回,可以发现getGlyphOrder()是按数字顺序返回。
        # print(font.getGlyphNames())
        # print(font.getGlyphNames2())
        # print(font.getGlyphOrder()) # ['.notdef', 'period', 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']
        # 使用 getGlyphOrder() 获取各数字的字符,并生成字典 tmp_dic
        glyphs = font.getGlyphOrder()[2:]
        tmp_dic = {}
        for num, un_size in enumerate(glyphs):
            # print(un_size, num)
            font_uni = un_size.replace('uni', '0x').lower()
            tmp_dic[font_uni] = num
        tmp_dic['period'] = "."

        numbers = re.findall(r'<span class="' + fontName + '">(.*?)</span>',
                             unicodePage, re.S)  # 获取当前页面的Table
        if numbers is not None:
            serialnumber = numbers[0]
            serialnumber = self.decode(serialnumber, cmap, tmp_dic)
            # 历史点击
            click_num_total = numbers[1]
            click_num_total = self.decode(click_num_total, cmap, tmp_dic)
            # 本月点击
            click_num_month = numbers[2]
            click_num_month = self.decode(click_num_month, cmap, tmp_dic)
            # 历史收藏
            collect_num_total = numbers[3]
            collect_num_total = self.decode(collect_num_total, cmap, tmp_dic)

            targentcontent['serialnumber'] = int(serialnumber)
            targentcontent['click_num_total'] = int(click_num_total)
            targentcontent['click_num_month'] = int(click_num_month) * 4
            targentcontent['collect_num_total'] = int(collect_num_total)
            yield targentcontent
Пример #13
0
class ParseTTFFont:

    baidu = Baidu(access_token=ACCESS_TOKEN)

    def __init__(self, font, ignore_names=[], overwrite_ignore=False):
        if isinstance(font, str):
            self.font = TTFont(font)
        elif isinstance(font, bytes):
            self.font = TTFont(BytesIO(font))
        else:
            raise ValueError('unknown font type')
        self.glyphnames = self.font.getGlyphOrder()
        self.ignore_names = ignore_names if overwrite_ignore else ignore_names + IGNORE_NAMES

    def parse_fonts(self, project):
        """
        根据ttf所属项目找到其对应的结果集json解析文件,如果是新的project,将会自动生产结果集
        :param project: 文件所属项目
        :return:
        """
        project = project + ".json"
        json_path = os.path.join(os.path.join(os.path.dirname(__file__), "font_jsons"), project)
        if os.path.exists(json_path):
            with open(json_path, "r", encoding="utf-8") as f:
                font_json = json.load(f)
        else:
            font_json = self.get_fonts_by_orc()
            logging.info(f"结果json保存路径为:{json_path}")
            # 保存
            with open(json_path, "w", encoding="utf-8") as f:
                json.dump(font_json, f)

        fonts_coordinate_matrix = self.get_font_message()

        temp_dict, result = {}, {}
        for item in fonts_coordinate_matrix:
            if not isinstance(temp_dict.get(item.get("endPtsOfContours")), list):
                temp_dict[item.get("endPtsOfContours")] = [item]
            else:
                temp_dict[item.get("endPtsOfContours")].append(item)
        for key, item_list in temp_dict.items():
            if len(item_list) == 1:
                # 可以直接根据endPtsOfContours 进行区分
                result[item_list[0].get("glyphname")] = font_json.get(item_list[0].get("endPtsOfContours"))
            else:
                # 有多个字体拥有相同的endPtsOfContours, 那么使用像素坐标点的平均值排序进行区分字体(可能出现意外情况导致识别错误)
                item_list.sort(key=lambda item: self.avg(item["coordinates"]))
                for index, item in enumerate(item_list):
                    result[item.get("glyphname")] = font_json.get(item.get("endPtsOfContours"))[index]

        cmaps = self.font.getBestCmap()
        fonts_result = {}
        for cmap_id, glyname in cmaps.items():
            fonts_result[hex(cmap_id)] = result.get(glyname)
        return fonts_result

    def get_fonts_by_orc(self):
        """
        根据font文件获取 每个字体ID所对应的文字
        通过字体的contoursOfPts得到已知字体字典,如果contoursOfPts值出现一样的,则对比一个最佳坐标点进行辨认
        所谓最佳坐标点,在这里是取一个坐标差值很大的点。
        :param fonts_coordinate_matrix:
        :return: font_json 返回一个已知的字体json文件
        """
        fonts_coordinate_matrix = self.get_coordinate_matrix_and_value()
        temp_dict, result = {}, {}
        for item in fonts_coordinate_matrix:
            if not isinstance(temp_dict.get(item.get("endPtsOfContours")), list):
                temp_dict[item.get("endPtsOfContours")] = [item]
            else:
                temp_dict[item.get("endPtsOfContours")].append(item)
        for endPtsOfContours, item_list in temp_dict.items():
            if len(item_list) == 1:
                result[endPtsOfContours] = item_list[0].get("value")
            else:
                item_list.sort(key=lambda item: self.avg(item["coordinates"]))
                result[endPtsOfContours] = [item.get("value") for item in item_list]
        return result

    def accurate_basic(self):
        """
        使用百度API接口获取转成图片后的ttf, DEBUG模式下可以补入未识别到的文字
        :return:
        """
        word_list = []
        image, name_list, image_dict = self.ttf_to_image()
        response = self.baidu.accurate_basic_of_pillow(image)
        print(response.json())
        [word_list.extend(list(words.get("words"))) for words in response.json().get("words_result")]
        logging.info(f"百度识图结果:{word_list}")
        words = dict(zip(name_list, word_list))
        if len(word_list) is not len(name_list):
            # 有未识别到的字,数量少可以手动添加,数量大,拜拜
            warnings.warn("words length is not equal to gly length,")
            if DEBGU:
                # 非debug模式,忽略识别失败的字体
                for glyname, faild in self.get_orc_faild_font(words, name_list, image_dict).items():
                    faild.show()
                    word = input("请输入图片中显示的文字:")
                    words[glyname] = word
        return words

    def get_coordinate_matrix_and_value(self):
        """
        百度orc识别文字,生产特征字典
        :return:
        """
        words = self.accurate_basic()
        fonts_coordinate_matrix = []
        for glyphname, word in words.items():
            if glyphname[0] in ['.', 'g'] or glyphname in self.ignore_names:  # 跳过'.notdef', '.null'
                continue
            item = {}
            glyph = self.font['glyf'][glyphname]
            item["coordinates"] = glyph.coordinates._a.tolist()
            item["endPtsOfContours"] = base64.b64encode(str(glyph.endPtsOfContours).encode("utf-8")).decode("utf-8")
            item["value"] = word
            fonts_coordinate_matrix.append(item)
        return fonts_coordinate_matrix

    def get_font_message(self):
        """
        获取字体文件信息
            coordinates: 该字体所有x,y坐标(固定顺序) [x,y,x1,y1,x2,y2...]
            endPtsOfContours: 根据contours和pt对应关系,形成的列表,然后编码为base64作为ID
                              始 endPtsOfContours, 记录contours和pt坐标对应的关系,如[3,9] 代表 该字体有2个contour,
                              第一个contour 包含前四个坐标点[0,1,2,3], 第二个包含[4,5,6,7,8,9]六个点。所有的偶数位为x,奇数位为y
        :return:
        """
        fonts_coordinate_matrix = []  # 结果集
        for glyphname in self.glyphnames:  # 根据name遍历字体文件中的所有字体
            if glyphname[0] in ['.', 'g'] or glyphname in self.ignore_names:  # 跳过'.notdef', '.null' 'x'
                continue
            item = {}
            glyph = self.font['glyf'][glyphname]
            item["coordinates"] = glyph.coordinates._a.tolist()
            item["endPtsOfContours"] = base64.b64encode(str(glyph.endPtsOfContours).encode("utf-8")).decode("utf-8")
            item["glyphname"] = glyphname
            fonts_coordinate_matrix.append(item)
        if DEBGU:
            logging.debug(msg=fonts_coordinate_matrix)
        return fonts_coordinate_matrix

    def ttf_to_image(self):
        """
        将ttf字体文件的字体绘制在Image对象上
        :return:
        """
        glyphset = self.font.getGlyphSet()
        size = (BASE_BACKGOUND_WIDTH * FONT_NUMS_PER_LINE,
                ceil(len(self.glyphnames) / FONT_NUMS_PER_LINE) * BASE_BACKGOUND_HEIGHT)  # 背景图片尺寸
        image = Image.new("RGB", size=size, color=(255, 255, 255))  # 初始化背景图片
        name_list, image_dict = [], {}
        for index, glyphname in enumerate(self.glyphnames):
            if glyphname[0] in ['.', 'g'] or glyphname in self.ignore_names:  # 跳过'.notdef', '.null'
                continue
            g = glyphset[glyphname]
            pen = ReportLabPen(self.glyphnames, Path(fillColor=colors.black, strokeWidth=1))
            g.draw(pen)
            # w, h = g.width, g.width
            w, h = g.width if g.width > 1000 else 1000, g.width if g.width > 1000 else 1000
            g = Group(pen.path)
            g.translate(0, 200)
            d = Drawing(w, h)
            d.add(g)
            im = renderPM.drawToPIL(d, dpi=72).resize((FONT_WIDTH, FONT_HEIGHT))
            box = (
                (index % FONT_NUMS_PER_LINE) * BASE_BACKGOUND_WIDTH,
                (index // FONT_NUMS_PER_LINE) * BASE_BACKGOUND_HEIGHT)
            image.paste(im, box=box)
            name_list.append(glyphname)
            image_dict[glyphname] = im
        return image, name_list, image_dict

    @staticmethod
    def get_orc_faild_font(words, name_list, image_dict):
        faild_fonts_img = {}
        for glypname in name_list:
            if not words.get(glypname):
                faild_fonts_img[glypname] = image_dict.get(glypname)
        return faild_fonts_img

    @staticmethod
    def avg(alist):
        return sum(alist) // len(alist)
Пример #14
0
    url = 'https://www.shixiseng.com' + url[0]
    res = session.get(url).content
    with open('base.ttf', 'wb') as f:
        f.write(res)
    # font_str = re.findall(r";base64,(.*?)'\)", res, re.S)[0]
    # base_font = make_font_file(font_str, 'base')
    old = [
        '一', '师', '会', '四', '计', '财', '场', '聘', '招', '工', '周', '端', '年', '设',
        '程', '二', '五', '天', '前', '网', '广', '市', '月', '个', '告', '作', '三', '互',
        '生', '人', '政', '件', '行', '软', '银', '联', '0', '1', '2', '3', '4', '5',
        '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
        'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
        'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
        'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ]
    new_font = TTFont('base.ttf')
    new_font.saveXML('base.xml')

    keys = new_font.getBestCmap()
    new_map = []
    for key in keys:
        key = hex(key)
        new_map.append(key)
    new_map = ['&#x%s' % i[-4:] for i in new_map[1:]]

    items = [(new_map[i], old[i]) for i in range(len(new_map))]

    for i in range(len(items)):
        html = html.replace(items[i][0], items[i][1])

    print(html)
Пример #15
0
class MyFont:
    def __init__(self, font_path):
        self.font = TTFont(font_path)
        self.glyph_set = self.font.getGlyphSet()
        self.cmap = self.font.getBestCmap()

    def getGlyph(self, char):
        """
		フォントファイルからグリフ情報を抽出する
		--- Parameters ---
		char : ターゲットの1文字
		--- Return ---
		グリフ情報
		"""
        if isinstance(char, int):
            glyph_name = self.cmap[char]
        else:
            glyph_name = self.cmap[ord(char)]
        return self.glyph_set[glyph_name]

    def getVectorControl(self, char):
        """
		グリフ情報からベクタ画像用の制御点情報を抽出する
		--- Parameters ---
		char : ターゲットの1文字
		--- Return ---
		制御点情報
		"""
        #recording_pen = RecordingPen()
        recording_pen = DecomposingRecordingPen(self.glyph_set)
        obj = self.getGlyph(char)
        obj.draw(recording_pen)
        return recording_pen.value

    def control2Lines(self, control, log=True):
        """
		抽出した制御点情報をベジェ曲線と直線の集合系に変換する

		--- Parameters ---
		control : getVectorControl()で取得した制御点情報
		--- Return ---
		lines : Bezier3, Bezier4, LineSegment の集合
		"""
        path_start_point = None
        start_point = None
        lines = []

        for val in control:
            if log:
                print(val)
            if val[0] == "closePath":
                l = PlaneLine([path_start_point, start_point])
                lines.append(l)
            elif val[0] == "moveTo":
                ## Update close path start point
                start_point = val[1][0]
                path_start_point = val[1][0]
            elif val[0] == "qCurveTo" or val[0] == "curveTo":
                b = Bezier([start_point] + list(val[1]))
                lines.append(b)
                start_point = tuple(b.plist[-1])
            elif val[0] == "lineTo":
                l = PlaneLine([start_point, val[1][0]])
                lines.append(l)
                start_point = val[1][0]
            elif val[0] == "addComponent":
                print("[ERROR] 'addComponent' is not implemented!")
                print("Please use 'DecomposingRecordingPen'")
            else:
                print("[ERROR] Unknown command: ", val[0])
        return lines

    def draw(self, char, control_path=False, show=True):
        """
		指定した文字を描画する
		--- Parameters ---
		char : 描画する1文字
		control_path : 制御点も描画するかどうか
		"""
        ctrl = self.getVectorControl(char)
        lines = self.control2Lines(ctrl, False)

        ax = None
        for line in lines:
            if isinstance(line, Bezier):
                ax = line.plot(ax, control_path, resolution=20)
            elif isinstance(line, PlaneLine):
                ax = line.plot(ax, linestyle="-", color="black")
        #ax.set_xlim(0, 2000)
        #ax.set_ylim(0, 2000)
        #ax.grid()
        if show:
            plt.show()
        return ax

    def make_lines(self, base_line, div_num):
        """
		ベースラインを基準にdiv_numで指定した数の放射線集合を作成する

		--- Parameters ---
		base_line : 基準となる線(PlaneLineオブジェクト)
		div_num   : 作成する線の数 
		--- Return ---
		lines : PlaneLineの集合
		"""
        deg_per = 360 / div_num
        lines = []
        for n in range(div_num):
            rad = np.deg2rad(deg_per * n)
            l = base_line.translation().rotate(rad).translation(
                base_line.plist[0])
            lines.append(l)
        return lines

    def fetch_distance_vectors(self, char="a", line_num=32, debug=False):
        """
		文字中心から文字交点までの最大,最小の長さ集合を取得する

		--- Parameters ---
		char     : 調査する文字
		line_num : 使用する放射線の数 
		--- Return ---
		selected_point_max : 最大長をとる座標の集合 
		selected_point_min : 最小長をとる座標の集合
		r_max : 最大長集合
		r_min : 最小長集合
		"""
        ctrl = self.getVectorControl(char)
        lines = self.control2Lines(ctrl, log=False)

        ## 文字の座標範囲をチェック(放射線の長さを決めるのに使用)
        verts = [p for line in lines for p in line.plist]
        xs, ys = zip(*verts)
        min_p = Point(min(xs), min(ys))
        max_p = Point(max(xs), max(ys))

        if debug:
            ## 文字を描画(for debug)
            ax = self.draw(char, show=False)

        ## 文字の中心座標を取得
        gp = Point(min_p.x + (max_p.x - min_p.x) / 2,
                   min_p.y + (max_p.y - min_p.y) / 2)

        ## 基準線を作成
        base_line = PlaneLine([gp.point, (gp.x + max(max_p.x, max_p.y), gp.y)])
        radial_lines = self.make_lines(base_line, line_num)
        if debug:
            for line in radial_lines:
                line.plot(ax, color="gray")

        ## 放射線との交点を調査
        points = []
        bc = BezierClipping()
        for line in radial_lines:
            tmp = []
            for l in lines:
                if isinstance(l, Bezier):
                    result = bc.detect_intersection(l, line)
                    if result == []:
                        continue
                    _, ps = zip(*result)
                    for p in ps:
                        tmp.append(p)
                elif isinstance(l, PlaneLine):
                    res = l.intersection(line)
                    if res is None:
                        continue
                    #print("line", res)
                    #ax.plot(res[0], res[1], 'o', color="red")
                    #plt.pause(1)
                    tmp.append(res)
            points.append(tmp)

        ## 1つの線に対して最大点と最小点だけを抽出
        selected_point_max = []
        selected_point_min = []
        for p, line in zip(points, radial_lines):
            max_len = 0
            min_len = line.length
            max_p = gp
            min_p = gp
            for pp in p:
                l1 = PlaneLine([gp, pp])
                if max_len < l1.length:
                    max_p = pp
                    max_len = l1.length
                if min_len > l1.length:
                    min_p = pp
                    min_len = l1.length
            selected_point_max.append(max_p.point)
            selected_point_min.append(min_p.point)

        if debug:
            ## 抽出した最大点と最小点を描画(for debug)
            xs, ys = zip(*selected_point_max)
            ax.plot(xs, ys, 'o', color="red")
            xs, ys = zip(*selected_point_min)
            ax.plot(xs, ys, '.', color="blue")

        ## 中心と最大点,最小点の距離集合を作成
        r_max = [PlaneLine([gp.point, p]).length for p in selected_point_max]
        r_min = [PlaneLine([gp.point, p]).length for p in selected_point_min]

        return selected_point_max, selected_point_min, r_max, r_min
Пример #16
0
    'six': 6,
    'seven': 7,
    'eight': 8,
    'nine': 9
}

# 专门用于读取字体
# 1. 视同 ttfont 读取字体
base_font = TTFont('OqcBUBPX.woff')
# 2. 把字体文件保存为 xml 格式
base_font.saveXML('font.xml')

map_order = base_font.getGlyphOrder()
print(map_order)
# 获取字体的映射规则(特殊字符->应该显示的字符)
map_list = base_font.getBestCmap()
print(map_list)

# 构建一个可以替换的规则
for key in map_list.keys():
    # map_list[key] 取到'period', 然后对'period'重新赋值
    # map_str_2_number['period'] 取到.
    map_list[key] = map_str_2_number[map_list[key]]

print(map_list)

with open('替换之前的.html', mode='r', encoding='utf-8') as f:
    text = f.read()

for key, value in map_list.items():
    text = text.replace('&#' + str(key) + ";", str(value))
Пример #17
0
def obfuscate_plus(plain_text,
                   filename: str,
                   only_ttf: bool,
                   target_path: str = 'output'):
    """
    :param plain_text: 用户看到的内容
    :param filename: 不含格式后缀的文件名
    :param only_ttf: 是否需要woff、woff2格式
    :param target_path: 生成的目标目录
    """

    if str_has_whitespace(plain_text):
        raise Exception('明文不允许含有空格')

    if str_has_emoji(plain_text):
        raise Exception('明文不允许含有emoji')

    plain_text = deduplicate_str(plain_text)

    original_font = TTFont(root / BASE_FONT_FILE)
    # https://github.com/fonttools/fonttools/blob/4.0.1/Lib/fontTools/fontBuilder.py#L28

    # <class 'dict'>: {32: 'cid00001', 33: 'cid00002', 34: 'cid00003'...}
    # key 为 ord(字符串)
    original_cmap: dict = original_font.getBestCmap()

    try:
        ensure_cmap_has_all_text(original_cmap, plain_text)
    except Exception as e:
        raise e

    # print('plain_text', plain_text)

    glyphs, metrics, cmap = {}, {}, {}

    # Unicode字符平面映射
    # https://zh.wikipedia.org/wiki/Unicode%E5%AD%97%E7%AC%A6%E5%B9%B3%E9%9D%A2%E6%98%A0%E5%B0%84
    private_codes = random.sample(range(0xE000, 0xF8FF), len(plain_text))

    # 中文汉字和常见英文数字等的unicode编码范围实例页面
    # https://www.zhangxinxu.com/study/201611/chinese-language-unicode-range.html
    cjk_codes = random.sample(range(0x4E00, 0x9FA5), len(plain_text))

    # print('private_codes', private_codes)
    # print('cjk_codes', cjk_codes)

    # https://github.com/fonttools/fonttools/blob/4.0.1/Tests/pens/ttGlyphPen_test.py#L21
    glyph_set = original_font.getGlyphSet()

    pen = TTGlyphPen(glyph_set)

    glyph_order = original_font.getGlyphOrder()

    # print('glyph_order', glyph_order)

    final_shadow_text: list = []

    if 'null' in glyph_order:
        # print('基础字体含有 null')
        glyph_set['null'].draw(pen)
        glyphs['null'] = pen.glyph()
        metrics['null'] = original_font['hmtx']['null']

        final_shadow_text += ['null']

    if '.notdef' in glyph_order:
        # print('基础字体含有 .notdef')
        glyph_set['.notdef'].draw(pen)
        glyphs['.notdef'] = pen.glyph()
        metrics['.notdef'] = original_font['hmtx']['.notdef']

        final_shadow_text += ['.notdef']

    html_entities = []

    # 理论上这里还可以再打散一次顺序
    for index, plain in enumerate(plain_text):
        # print('index', index, 'plain', plain)

        try:
            shadow_cmap_name = original_cmap[cjk_codes[index]]
            # print('shadow_cmap_name', shadow_cmap_name)
        except KeyError:
            # 遇到基础字库不存在的字会出现这种错误
            traceback.print_exc()
            return obfuscate_plus(filename, plain_text, only_ttf, target_path)

        final_shadow_text += [shadow_cmap_name]

        glyph_set[original_cmap[ord(plain)]].draw(pen)
        glyphs[shadow_cmap_name] = pen.glyph()

        metrics[shadow_cmap_name] = original_font['hmtx'][original_cmap[ord(
            plain)]]

        cmap[private_codes[index]] = shadow_cmap_name
        html_entities += [hex(private_codes[index]).replace('0x', '&#x')]

    # print('cmap', cmap)
    # print('metrics', metrics)
    # print('final_shadow_text', final_shadow_text)
    # print('html_entities', html_entities)

    horizontal_header = {
        'ascent': original_font['hhea'].ascent,
        'descent': original_font['hhea'].descent,
    }

    fb = FontBuilder(original_font['head'].unitsPerEm, isTTF=True)
    fb.setupGlyphOrder(final_shadow_text)
    fb.setupCharacterMap(cmap)
    fb.setupGlyf(glyphs)
    fb.setupHorizontalMetrics(metrics)
    fb.setupHorizontalHeader(**horizontal_header)
    fb.setupNameTable(NAME_STRING)
    fb.setupOS2()
    fb.setupPost()
    fb.save(f'{root}/{target_path}/{filename}.ttf')
    # print('创建了新字体文件', f'{root}/{target_path}/{filename}.ttf')

    result = dict()
    result['ttf'] = f'{root}/{target_path}/{filename}.ttf'

    if only_ttf:
        return result
    else:
        woff_and_woff2 = subset_ttf_font(f'{root}/{target_path}/{filename}')
        return {
            **result,
            **woff_and_woff2
        }, dict(zip(plain_text, html_entities))
Пример #18
0
def qd_Font_url(font_url):
    font_response = requests.get(font_url, headers=headers).content
    font = TTFont(io.BytesIO(font_response))
    # 5、获取当前字体映射关系
    map_ele_dict = font.getBestCmap()
    return map_ele_dict
Пример #19
0
def obfuscate(plain_text,
              shadow_text,
              filename: str,
              only_ttf: bool,
              target_path: str = 'output') -> dict:
    """
    :param plain_text: 用户看到的内容
    :param shadow_text: 爬虫看到的内容
    :param filename: 不含格式后缀的文件名
    :param only_ttf: 是否需要woff、woff2格式
    :param target_path: 生成的目标目录
    """

    if str_has_whitespace(plain_text) | str_has_whitespace(shadow_text):
        raise Exception('明文或阴书不允许含有空格')

    if str_has_emoji(plain_text) | str_has_emoji(shadow_text):
        raise Exception('明文或阴书不允许含有emoji')

    plain_text = deduplicate_str(plain_text)
    shadow_text = deduplicate_str(shadow_text)

    if plain_text == shadow_text:
        raise Exception('没有意义的混淆')

    if len(plain_text) != len(shadow_text):
        raise Exception('阴书的有效长度需与明文一致')

    original_font = TTFont(root / BASE_FONT_FILE)
    # https://github.com/fonttools/fonttools/blob/4.0.1/Lib/fontTools/fontBuilder.py#L28

    # <class 'dict'>: {32: 'cid00001', 33: 'cid00002', 34: 'cid00003'...}
    # key 为 ord(字符串)
    original_cmap: dict = original_font.getBestCmap()

    try:
        ensure_cmap_has_all_text(original_cmap, plain_text)
    except Exception as e:
        raise e

    # print('plain_text', plain_text)
    # print('shadow_text', shadow_text)

    glyphs, metrics, cmap = {}, {}, {}

    # https://github.com/fonttools/fonttools/blob/4.0.1/Tests/pens/ttGlyphPen_test.py#L21
    glyph_set = original_font.getGlyphSet()

    pen = TTGlyphPen(glyph_set)

    glyph_order = original_font.getGlyphOrder()

    # print('glyph_order', glyph_order)

    final_shadow_text: list = []

    if 'null' in glyph_order:
        # print('基础字体含有 null')
        glyph_set['null'].draw(pen)
        glyphs['null'] = pen.glyph()
        metrics['null'] = original_font['hmtx']['null']

        final_shadow_text += ['null']

    if '.notdef' in glyph_order:
        # print('基础字体含有 .notdef')
        glyph_set['.notdef'].draw(pen)
        glyphs['.notdef'] = pen.glyph()
        metrics['.notdef'] = original_font['hmtx']['.notdef']

        final_shadow_text += ['.notdef']

    for index, (plain, shadow) in enumerate(zip(plain_text, shadow_text)):
        # print('index', index, 'plain', plain, 'shadow', shadow)

        shadow_cmap_name = original_cmap[ord(shadow)]
        # print('shadow_cmap_name', shadow_cmap_name)

        final_shadow_text += [shadow_cmap_name]

        glyph_set[original_cmap[ord(plain)]].draw(pen)
        glyphs[shadow_cmap_name] = pen.glyph()

        metrics[shadow_cmap_name] = original_font['hmtx'][original_cmap[ord(
            plain)]]

        cmap[ord(shadow)] = shadow_cmap_name

    # print('cmap', cmap)
    # print('metrics', metrics)
    # print('final_shadow_text', final_shadow_text)

    horizontal_header = {
        'ascent': original_font['hhea'].ascent,
        'descent': original_font['hhea'].descent,
    }

    fb = FontBuilder(original_font['head'].unitsPerEm, isTTF=True)
    fb.setupGlyphOrder(final_shadow_text)
    fb.setupCharacterMap(cmap)
    fb.setupGlyf(glyphs)
    fb.setupHorizontalMetrics(metrics)
    fb.setupHorizontalHeader(**horizontal_header)
    fb.setupNameTable(NAME_STRING)
    fb.setupOS2()
    fb.setupPost()
    # print('创建了新字体文件', f'{target_path}/{filename}.ttf')
    fb.save(f'{root}/{target_path}/{filename}.ttf')
    # print('创建了新字体文件', f'{target_path}/{filename}.ttf')

    result = dict()
    result['ttf'] = f'{root}/{target_path}/{filename}.ttf'

    if not only_ttf:
        woff_and_woff2 = subset_ttf_font(f'{root}/{target_path}/{filename}')
        result = {**result, **woff_and_woff2}

    return result
Пример #20
0
class TycTTF():
    _instance = {}
    def __init__(self,font_key,url=None,imgSize=(0,0),imgMode='RGB',bg_color=(0,0,0),fg_color=(255,255,255),fontsize=30):
        self.imgSize = imgSize
        self.imgMode = imgMode
        self.fontsize = fontsize
        self.bg_color = bg_color
        self.fg_color = fg_color
        self.font_key = font_key
        self.url = url or self.make_url
        self.get_ttl()
        self.client = AipClient(APP_ID, API_KEY, SECRET_KEY,REDIS_URL)
        self.r = RedisClient(REDIS_URL)

    def __new__(cls, url, *args, **kw):
        '''
        伪单例模式 缓存优化
        '''
        if url not in cls._instance:
            cls._instance[url] = super().__new__(cls)
        return cls._instance[url]

    @property
    def make_url(self):
        return 'https://static.tianyancha.com/fonts-styles/fonts/%s/%s/tyc-num.woff' % (self.font_key[:2],self.font_key)

    def get_ttl(self):
        res = requests.get(self.url)
        # PIL 字体对象
        self.font = ImageFont.truetype(BytesIO(res.content),self.fontsize)
        # ttf字体对象
        self.ttf = TTFont(BytesIO(res.content))
        # 反向解析 获取字体库所有文字 
        self.strings = {hex(string).replace('0x','\\u').encode('utf-8').decode('unicode-escape') if string > 2**8 else hex(string).replace('0x','\\x').encode('utf-8').decode('unicode-escape') for string in self.ttf.getBestCmap().keys() }

    def GenLetterImage(self,letters:str):
        self.letters = letters
        (self.letterWidth,self.letterHeight) = self.font.getsize(letters)
        if self.imgSize==(0,0):
            # 文字大小基础上 长宽各加10个像素点
            self.imgSize=(self.letterWidth+10,self.letterHeight+10)
        self.imgWidth,self.imgHeight=self.imgSize
        # new一个image对象  
        self.img = Image.new(self.imgMode, self.imgSize, self.bg_color)
        # 画笔对象
        self.drawBrush = ImageDraw.Draw(self.img)
        textY0 = (self.imgHeight-self.letterHeight+1)/2
        textY0 = int(textY0)
        textX0 = int((self.imgWidth-self.letterWidth+1)/2)
        # 从font对象内获取 letter 映射 文字  并写入空白image对象内
        self.drawBrush.text((textX0,textY0), self.letters, fill=self.fg_color,font=self.font)

    def _orc(self, word:str):
        # image = pretreat_image(self.img)
        self.GenLetterImage(word)
        # 实例化image容器
        img = ImageBytes()
        # 将img bytes 传给image容器
        self.img.save(img, 'JPEG')
        if word in {'0','1','2','3','4','5','6','7','8','9','x'}:
            # 数字 用eng 解析
            kwarg = {'language_type':'ENG'}
        else:
            # 其他使用中英文
            kwarg = {'language_type':'CHN_ENG'}
        return self.client.run(img.img,self.font_key,word,**kwarg)

    def orc(self,word:str):
        if self.r.hexists(self.url, word):
            return self.r.hget(self.font_key, word).decode('utf-8')
        else:
            return self._orc(word)

    def run(self, word:str):
        string = ''
        for letter in word:
            if letter in self.strings:
                string += self.orc(letter)
            else:
                string += letter
        return string
Пример #21
0
    def parse(self, response):
        font_url = 'http:' + re.search(r"url\('(.*\.woff)'\)",
                                       response.text).group(1)
        with open('on_maoyan.woff', 'wb') as f:
            f.write(requests.get(font_url).content)

        base_font = TTFont('base_maoyan.woff')  # 获取基础字体对象
        # base_font.saveXML('base_maoyan.xml')
        base_uni = base_font.getGlyphOrder()[2:]  # 获取基础字体编码,从第二个开始
        print('base_uni:', base_uni)
        base_obj = base_font.getGlyphNames()[1:-1]  #获取基础字体字符对象

        print('base_obj:', base_obj)
        base_dict = {
            'uniE5A1': '9',
            'uniF2B5': '5',
            'uniE3BD': '8',
            'uniF48F': '1',
            'uniE6B8': '0',
            'uniF03F': '2',
            'uniEFB6': '6',
            'uniF7EF': '7',
            'uniF822': '3',
            'uniF14B': '4'
        }

        online_font = TTFont('on_maoyan.woff')  # 获取动态字体对象
        online_font.saveXML('on_maoyan.xml')  # 将动态字体转成xml格式 查看结构

        on_name = online_font.getBestCmap()
        print('on_name:', on_name)

        online_uni = online_font.getGlyphOrder()[2:]
        print('online_uni:', online_uni)

        online_obj = online_font.getGlyphNames()[1:-1]
        print('online_obj:', online_obj)

        selector = etree.HTML(response.body.decode('utf-8'))
        node_list = selector.xpath('//*[@class="board-item-content"]')
        print(node_list)
        for node in node_list:
            print(node)
            item = {}
            item['title'] = node.xpath('.//p[@class="name"]/a/text()')[0]
            rt = node.xpath('.//p[@class="realtime"]/span/span/text()')[0]
            print('rt:', rt)
            print(etree.tostring(node))
            i = etree.tostring(node)
            a = node.xpath('.//p[@class="realtime"]/span/span/text()')
            print('a:', a)

            print(type(a[0]))
            print(type(i))

            print('a:', bytes(a[0], encoding="utf8").decode('unicode-escape'))
            print('a:', a[0].encode('utf-8').decode('utf-8'))
            print('a[0]:', b'a'[0])
            print('a[0]:', b'a[0]'.decode('utf-8'))
            b = re.findall(b'span class="stonefont">(.*?)</span>',
                           i)[0].decode('utf-8')
            print(b)
            b = re.sub('&#', '', b)
            b = re.sub('\.', '.;', b)
            b = b.split(';')
            item['p'] = []
            for i in b:
                if i != '':
                    if i == '.':
                        item['p'].append(i)
                    else:
                        item['p'].append(
                            pojie(online_font, i, base_dict, base_uni,
                                  base_font))
            item['p'] = ''.join(item['p'])
            yield item