def get_alias(self, meta, response): regx = '//text()[preceding-sibling::span[text()="又名:"]][following-s\ ibling::br]' data = response.xpath(regx).extract() if data: meta['alias'] = validator.process_slash_str(data[0]) return meta
def set_alias(self, meta, response): regex = '//text()[preceding-sibling::span[text()="又名:"]][following-s\ ibling::br]' match = response.xpath(regex).get() if match: meta["alias"] = validator.process_slash_str(match) return meta
def get_actor_ids(self, meta, response): regx = '//a[@rel="v:starring"]/@href' actor_ids = response.xpath(regx).extract() actor_ids = [ ids.split("/")[-2]for ids in actor_ids] regx1 = '//a[@rel="v:starring"]/text()' actors = response.xpath(regx1).extract() cmb_actor = [] for i in range(len(actor_ids)): cmb_actor.append(actors[i] + ":" + actor_ids[i]) meta['actor_ids'] = validator.process_slash_str('|'.join(cmb_actor)) return meta
def get_director_ids(self, meta, response): regx = '//a[@rel="v:directedBy"]/@href' director_ids = response.xpath(regx).extract() director_ids = [ ids.split("/")[-2]for ids in director_ids] regx1 = '//a[@rel="v:directedBy"]/text()' directors = response.xpath(regx1).extract() cmb_directors = [] for i in range(len(director_ids)): cmb_directors.append(directors[i] + ":" + director_ids[i]) meta['director_ids'] = validator.process_slash_str('|'.join(cmb_directors)) return meta
def get_actors(self, meta, response): regx = '//a[@rel="v:starring"]/text()' actors = response.xpath(regx).extract() meta['actors'] = validator.process_slash_str('/'.join(actors)) return meta
def get_directors(self, meta, response): regx = '//a[@rel="v:directedBy"]/text()' directors = response.xpath(regx).extract() meta['directors'] = validator.process_slash_str('/'.join(directors)) return meta
def set_actors(self, meta, response): regex = '//a[@rel="v:starring"]/text()' matches = response.xpath(regex).getall() meta["actors"] = validator.process_slash_str("/".join(matches)) return meta