def get_release_date(self, meta, response): regx = '//span[@property="v:initialReleaseDate"]/@content' data = response.xpath(regx).extract() if data: release_date = validator.str_to_date(validator.match_date(data[0])) if release_date: meta['release_date'] = release_date return meta
def set_release_date(self, meta, response): regex = '//span[@property="v:initialReleaseDate"]/@content' match = response.xpath(regex).get() if match: release_date = validator.str_to_date(validator.match_date(match)) if release_date: meta["release_date"] = release_date return meta
def get_birth(self, meta, response): regx = '//div[@class="info"]/ul/li/text()[preceding-sibling::span[text()="出生日期"]]' data = response.xpath(regx).extract() print("============get_birth:", data) if data: meta['birth'] = validator.str_to_date( validator.match_date(data[0].strip("\n"))) if not meta['birth']: meta['birth'] = data[0].strip("\n").split(":")[-1] return meta