def spider_opened(self, spider): export_dir = settings.get("EXPORT_DIR", ".") t = time.strftime("%Y-%m-%d %H-%M-%S GMT+7", time.gmtime(time.time() + 7 * 3600)) path = os.path.join(export_dir, "%s.csv" % t) self.file = open(path, "w+b") self.exporter = CSVItemExporter(self.file) self.exporter.start_exporting()
class CafelandPipeline(object): def process_item(self, item, spider): for field in ["name", "updated_time", "particulars"]: item[field] = [val.strip(" \t\n\r") for val in item[field]] item[field] = [val for val in item[field] if val] pars = item["particulars"] for i in range(0, len(pars), 2): if i + 1 < len(pars): new_field, new_val = (unidecode(pars[i]).strip(" \t\n\r"), pars[i + 1].strip(" \t\n\r:")) else: new_field, new_val = (unidecode(pars[i]).strip(" \t\n\r"), "") item.fields[new_field] = Field() item[new_field] = new_val if i + 1 < len(pars): pars[i] = pars[i] + pars[i + 1] pars[i + 1] = "" item["particulars"] = [p for p in pars if p] item["updated_time"] = [val[11:] for val in item["updated_time"]] self.exporter.export_item(item) return item @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): export_dir = settings.get("EXPORT_DIR", ".") t = time.strftime("%Y-%m-%d %H-%M-%S GMT+7", time.gmtime(time.time() + 7 * 3600)) path = os.path.join(export_dir, "%s.csv" % t) self.file = open(path, "w+b") self.exporter = CSVItemExporter(self.file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.file.close()