Exemplo n.º 1
0
    def parse(self, response):
        numbers = []
        images_base64 = re.findall(r'background-image:url\("data:image/png;base64,(.*?)"\)', str(response.text))[0]
        css_content_label = re.findall(r"\.(.*?) { background-position-x:(.*?)px }", str(response.text))
        num_map = dict()
        for cont in css_content_label:
            num_map[cont[0] + " sprite"] = cont[1]

        img_fp = BytesIO(base64.b64decode(str(images_base64).encode("utf-8")))
        img = Image.open(img_fp)
        right_border_list = self.split_img_number(img)

        col_list = response.xpath("//div[@class='col-md-1']")
        for col in col_list:
            class_names = col.xpath("./div/@class").extract()
            tags_nums = []
            for tag in class_names:
                tag_position_x = num_map[tag]
                for index, border_x in enumerate(right_border_list):
                    cur_pos_x = abs(int(tag_position_x))
                    if index + 1 >= len(right_border_list):
                        break
                    if border_x <= cur_pos_x < right_border_list[index + 1]:
                        tags_nums.append(str(index))
                        break
            numbers.append(int("".join(tags_nums)))
        item = GlidedskyItem(numbers=numbers)
        yield item
Exemplo n.º 2
0
 def parse(self, response):
     base_font = TTFont(r"glidedsky/data/font.ttf")
     base_uni_list = base_font.getGlyphOrder()[1:]
     origin_dict = {
         "five": "0",
         "four": "1",
         "two": "2",
         "three": "3",
         "zero": "4",
         "one": "5",
         "nine": "6",
         "six": "7",
         "eight": "8",
         "seven": "9",
     }
     str2num_dict = {
         "zero": "0",
         "one": "1",
         "two": "2",
         "three": "3",
         "four": "4",
         "five": "5",
         "six": "6",
         "seven": "7",
         "eight": "8",
         "nine": "9",
     }
     num_map = dict()
     ttf_base64 = re.findall(r"data:font;charset=utf-8;base64,(.*?)\)",
                             response.text)[0]
     base64_data = base64.decodebytes(ttf_base64.encode())
     online_font = TTFont(BytesIO(base64_data))
     online_uni_list = online_font.getGlyphOrder()[1:]
     for uni2 in online_uni_list:
         obj2 = online_font["glyf"][uni2]
         for uni1 in base_uni_list:
             obj1 = base_font["glyf"][uni1]
             if obj1 == obj2:
                 num_map[str2num_dict[uni2]] = origin_dict[uni1]
     num = response.xpath("//div[@class='col-md-1']/text()").extract()
     numbers = []
     for one in num:
         new_num = ""
         for i in one.strip():
             new_num += num_map[i]
         numbers.append(int(new_num))
     item = GlidedskyItem(numbers=numbers)
     yield item
Exemplo n.º 3
0
 def parse(self, response):
     numbers = []
     css_label = "".join(response.xpath("//style/text()").extract())
     before_dict = dict()
     css_content_label = re.findall(r'\.(.*?):before { content:"(.*?)" }',
                                    str(css_label))
     for cont in css_content_label:
         before_dict[cont[0]] = cont[1]
     css_del_names = re.findall(r"\.(.*?) { margin-right:-1em }",
                                str(css_label))
     css_left_label = re.findall(r"\.(.*?) { left:(.*?)em }",
                                 str(css_label))
     count_dict = dict()
     for count in css_left_label:
         count_dict[count[0]] = count[1]
     col_list = response.xpath("//div[@class='col-md-1']")
     for col in col_list:
         class_names = col.xpath("./div/@class").extract()
         values = col.xpath("./div/text()").extract()
         if len(class_names) >= 3:
             move_num = ["0", "0", "0"]
             index = 0
             for class_name, value in zip(class_names, values):
                 if class_name not in css_del_names:
                     if class_name in count_dict and class_name not in css_del_names:
                         move_num[index +
                                  int(count_dict[class_name])] = value
                     else:
                         move_num[index] = value
                     index += 1
             new_number = "".join(move_num)
             numbers.append(int(new_number))
         elif len(class_names) == 1:
             new_number = before_dict[class_names[0]]
             numbers.append(int(new_number))
         else:
             for class_name in class_names:
                 if class_name in before_dict.keys():
                     new_number = before_dict[class_name]
                     numbers.append(int(new_number))
                     break
     item = GlidedskyItem(numbers=numbers)
     yield item
Exemplo n.º 4
0
    def parse(self, response):
        num_str_list = response.xpath(
            "//div[@class='col-md-1']/text()").extract()
        ttf_base64 = re.findall(r"data:font;charset=utf-8;base64,(.*?)\)",
                                response.text)[0]
        base64_data = base64.decodebytes(ttf_base64.encode())
        font = TTFont(BytesIO(base64_data))
        uni_list = font.getGlyphOrder()[1:11]
        camp_list = font.getBestCmap()

        def str2num(string):
            number = ""
            for s in string.strip():
                unicode_str = s.encode("unicode-escape").decode()
                sixteen_ary = unicode_str.replace("\\u", "0x")
                num = uni_list.index(camp_list[int(sixteen_ary, 16)])
                number += str(num)
            return int(number)

        numbers = [str2num(one.strip()) for one in num_str_list]
        item = GlidedskyItem(numbers=numbers)
        yield item
Exemplo n.º 5
0
 def parse(self, response):
     num = response.xpath("//div[@class='col-md-1']/text()").extract()
     item = GlidedskyItem(numbers=[int(str(one.strip())) for one in num])
     yield item
Exemplo n.º 6
0
 def parse(self, response):
     numbers = json.loads(response.text).get("items", [])
     item = GlidedskyItem(numbers=numbers)
     yield item