def parse(self, response): numbers = [] images_base64 = re.findall(r'background-image:url\("data:image/png;base64,(.*?)"\)', str(response.text))[0] css_content_label = re.findall(r"\.(.*?) { background-position-x:(.*?)px }", str(response.text)) num_map = dict() for cont in css_content_label: num_map[cont[0] + " sprite"] = cont[1] img_fp = BytesIO(base64.b64decode(str(images_base64).encode("utf-8"))) img = Image.open(img_fp) right_border_list = self.split_img_number(img) col_list = response.xpath("//div[@class='col-md-1']") for col in col_list: class_names = col.xpath("./div/@class").extract() tags_nums = [] for tag in class_names: tag_position_x = num_map[tag] for index, border_x in enumerate(right_border_list): cur_pos_x = abs(int(tag_position_x)) if index + 1 >= len(right_border_list): break if border_x <= cur_pos_x < right_border_list[index + 1]: tags_nums.append(str(index)) break numbers.append(int("".join(tags_nums))) item = GlidedskyItem(numbers=numbers) yield item
def parse(self, response): base_font = TTFont(r"glidedsky/data/font.ttf") base_uni_list = base_font.getGlyphOrder()[1:] origin_dict = { "five": "0", "four": "1", "two": "2", "three": "3", "zero": "4", "one": "5", "nine": "6", "six": "7", "eight": "8", "seven": "9", } str2num_dict = { "zero": "0", "one": "1", "two": "2", "three": "3", "four": "4", "five": "5", "six": "6", "seven": "7", "eight": "8", "nine": "9", } num_map = dict() ttf_base64 = re.findall(r"data:font;charset=utf-8;base64,(.*?)\)", response.text)[0] base64_data = base64.decodebytes(ttf_base64.encode()) online_font = TTFont(BytesIO(base64_data)) online_uni_list = online_font.getGlyphOrder()[1:] for uni2 in online_uni_list: obj2 = online_font["glyf"][uni2] for uni1 in base_uni_list: obj1 = base_font["glyf"][uni1] if obj1 == obj2: num_map[str2num_dict[uni2]] = origin_dict[uni1] num = response.xpath("//div[@class='col-md-1']/text()").extract() numbers = [] for one in num: new_num = "" for i in one.strip(): new_num += num_map[i] numbers.append(int(new_num)) item = GlidedskyItem(numbers=numbers) yield item
def parse(self, response): numbers = [] css_label = "".join(response.xpath("//style/text()").extract()) before_dict = dict() css_content_label = re.findall(r'\.(.*?):before { content:"(.*?)" }', str(css_label)) for cont in css_content_label: before_dict[cont[0]] = cont[1] css_del_names = re.findall(r"\.(.*?) { margin-right:-1em }", str(css_label)) css_left_label = re.findall(r"\.(.*?) { left:(.*?)em }", str(css_label)) count_dict = dict() for count in css_left_label: count_dict[count[0]] = count[1] col_list = response.xpath("//div[@class='col-md-1']") for col in col_list: class_names = col.xpath("./div/@class").extract() values = col.xpath("./div/text()").extract() if len(class_names) >= 3: move_num = ["0", "0", "0"] index = 0 for class_name, value in zip(class_names, values): if class_name not in css_del_names: if class_name in count_dict and class_name not in css_del_names: move_num[index + int(count_dict[class_name])] = value else: move_num[index] = value index += 1 new_number = "".join(move_num) numbers.append(int(new_number)) elif len(class_names) == 1: new_number = before_dict[class_names[0]] numbers.append(int(new_number)) else: for class_name in class_names: if class_name in before_dict.keys(): new_number = before_dict[class_name] numbers.append(int(new_number)) break item = GlidedskyItem(numbers=numbers) yield item
def parse(self, response): num_str_list = response.xpath( "//div[@class='col-md-1']/text()").extract() ttf_base64 = re.findall(r"data:font;charset=utf-8;base64,(.*?)\)", response.text)[0] base64_data = base64.decodebytes(ttf_base64.encode()) font = TTFont(BytesIO(base64_data)) uni_list = font.getGlyphOrder()[1:11] camp_list = font.getBestCmap() def str2num(string): number = "" for s in string.strip(): unicode_str = s.encode("unicode-escape").decode() sixteen_ary = unicode_str.replace("\\u", "0x") num = uni_list.index(camp_list[int(sixteen_ary, 16)]) number += str(num) return int(number) numbers = [str2num(one.strip()) for one in num_str_list] item = GlidedskyItem(numbers=numbers) yield item
def parse(self, response): num = response.xpath("//div[@class='col-md-1']/text()").extract() item = GlidedskyItem(numbers=[int(str(one.strip())) for one in num]) yield item
def parse(self, response): numbers = json.loads(response.text).get("items", []) item = GlidedskyItem(numbers=numbers) yield item