示例#1
0
class VnnewscrawlerPipeline(object):
    def __init__(self, download_dir):
        self.download_dir = download_dir
        self.exporter = BaseItemExporter()

    @classmethod
    def from_crawler(cls, crawler):
        return cls(download_dir=crawler.settings.get("DOWNLOAD_DIR", "downloads"))

    def process_item(self, item, spider):
        subdir = os.path.join(
            self.download_dir,
            spider.name,
            regex.sub(
                r"[\s_-]+", "-", unidecode(item.get("category", "unknown"))
            ).lower(),
        )
        os.makedirs(subdir, exist_ok=True)

        filename = os.path.join(
            subdir, item.get("code", datetime.now().strftime("%Y%m%d%H%M%S%f"))
        )
        with open(filename, "w", encoding="UTF-8") as fp:
            json.dump(
                dict(self.exporter._get_serialized_fields(item)),
                fp,
                indent=4,
                ensure_ascii=False,
            )
        return item
示例#2
0
class BeautyCrawlerPipeline(object):
    def __init__(self):
        self.exporter = BaseItemExporter()

    def process_item(self, item, spider):
        if isinstance(item, GalleryItem):
            self.process_gallery(item)
        if isinstance(item, ImageItem):
            self.process_image(item)
        return item

    def process_gallery(self, item):
        json_item = dict(self.exporter._get_serialized_fields(item))
        Gallery(**json_item).save()

    def process_image(self, item):
        json_item = dict(self.exporter._get_serialized_fields(item))
        Image(**json_item).save()
示例#3
0
 def _get_exporter(self, **kwargs):
     return BaseItemExporter(**kwargs)
示例#4
0
 def __init__(self, download_dir):
     self.download_dir = download_dir
     self.exporter = BaseItemExporter()
示例#5
0
 def __init__(self):
     self.exporter = BaseItemExporter()
示例#6
0
 def __init__(self, arquivo, *args, **kwargs):
     BaseItemExporter.__init__(self, *args, **kwargs)
     self.arquivo = arquivo
示例#7
0
 def process_item(self, item, spider):
   itemExporter=BaseItemExporter()
   for k,v in enumerate(item):
     item[v]=itemExporter.serialize_field(item.fields[v], v, item[v])