示例#1
0
 def spider_opened(self, spider):
     export_dir = settings.get("EXPORT_DIR", ".")
     t = time.strftime("%Y-%m-%d %H-%M-%S GMT+7", time.gmtime(time.time() + 7 * 3600))
     path = os.path.join(export_dir, "%s.csv" % t)
     self.file = open(path, "w+b")
     self.exporter = CSVItemExporter(self.file)
     self.exporter.start_exporting()
示例#2
0
class CafelandPipeline(object):
    def process_item(self, item, spider):
        for field in ["name", "updated_time", "particulars"]:
            item[field] = [val.strip(" \t\n\r") for val in item[field]]
            item[field] = [val for val in item[field] if val]

        pars = item["particulars"]
        for i in range(0, len(pars), 2):
            if i + 1 < len(pars):
                new_field, new_val = (unidecode(pars[i]).strip(" \t\n\r"), pars[i + 1].strip(" \t\n\r:"))
            else:
                new_field, new_val = (unidecode(pars[i]).strip(" \t\n\r"), "")
            item.fields[new_field] = Field()
            item[new_field] = new_val

            if i + 1 < len(pars):
                pars[i] = pars[i] + pars[i + 1]
                pars[i + 1] = ""
        item["particulars"] = [p for p in pars if p]

        item["updated_time"] = [val[11:] for val in item["updated_time"]]

        self.exporter.export_item(item)
        return item

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        export_dir = settings.get("EXPORT_DIR", ".")
        t = time.strftime("%Y-%m-%d %H-%M-%S GMT+7", time.gmtime(time.time() + 7 * 3600))
        path = os.path.join(export_dir, "%s.csv" % t)
        self.file = open(path, "w+b")
        self.exporter = CSVItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()