class RealstateMonthlyPipeline(object): def process_item(self, item, spider): return item EXPORT_PATH = os.getenv("HOME") def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline # @spider_opened_working def spider_opened(self, spider): export_dir = settings.get("EXPORT_PATH", ".") t = time.strftime("%Y-%m-%d %H-%M-%S GMT+6", time.gmtime(time.time() + 6 * 3600)) path = os.path.join(export_dir, "%s.csv" % t) self.files = open(path, "w+b") self.exporter = CSVRealstateItemExporter(self.files) self.exporter.start_exporting() # End}}} # def spider_opened(self, spider): # # self.file = open('%s-%s.csv' % (spider.name, time.strftime("%Y-%m-%d-%H")), 'w+') # path = RealstateMonthlyPipeline.EXPORT_PATH + "/" + spider.name + '_export.csv' # export_file = open(path, 'ab' if os.path.isfile(path) else 'wb') # self.files[spider.name] = export_file # # self.exporter = CsvRealstateItemExporter(self.file) # self.exporter = CSVRealstateItemExporter(self.files) # self.exporter.fields_to_export = ['links', 'title', 'subur_name', 'unit_mly_jan', 'unit_mly_feb', 'unit_mly_mar', 'unit_mly_apr', 'unit_mly_may', # 'unit_mly_jun', 'unit_mly_jul', 'unit_mly_aug', 'unit_mly_sep', 'unit_mly_oct', 'unit_mly_nov', # 'unit_mly_dec', 'unit_mly_p_jan', 'unit_mly_p_feb', 'unit_mly_p_mar', # 'unit_mly_p_apr', 'unit_mly_p_may', 'unit_mly_p_jun', 'unit_mly_p_jul', 'unit_mly_p_aug', 'unit_mly_p_sep', # 'unit_mly_p_oct', 'unit_mly_p_nov', 'unit_mly_p_dec', 'unit_mly_nos_jan', # 'unit_mly_nos_feb', 'unit_mly_nos_mar', 'unit_mly_nos_apr', 'unit_mly_nos_may', 'unit_mly_nos_jun', # 'unit_mly_nos_jul', 'unit_mly_nos_aug', 'unit_mly_nos_sep', 'unit_mly_nos_oct', 'unit_mly_nos_nov', 'unit_mly_nos_dec', # ] # self.exporter.start_exporting() # def spider_opened(self, spider): # if spider.name in 'realestate': # self.file = open('current_listing.csv', 'w+b') # else: # self.file = open('past_listing.csv', 'w+b') # self.exporter = CsvItemExporter(self.file) # self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.files.close()
class RealstateMonthlyPipeline(object): def process_item(self, item, spider): return item EXPORT_PATH = os.getenv("HOME") def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline # @spider_opened_working def spider_opened(self, spider): export_dir = settings.get('EXPORT_PATH', '.') t = time.strftime('%Y-%m-%d %H-%M-%S GMT+6', time.gmtime(time.time() + 6*3600)) path = os.path.join(export_dir, '%s.csv' % t) self.files = open(path, 'w+b') self.exporter = CSVRealstateItemExporter(self.files) self.exporter.start_exporting() # End}}} # def spider_opened(self, spider): # # self.file = open('%s-%s.csv' % (spider.name, time.strftime("%Y-%m-%d-%H")), 'w+') # path = RealstateMonthlyPipeline.EXPORT_PATH + "/" + spider.name + '_export.csv' # export_file = open(path, 'ab' if os.path.isfile(path) else 'wb') # self.files[spider.name] = export_file # # self.exporter = CsvRealstateItemExporter(self.file) # self.exporter = CSVRealstateItemExporter(self.files) # self.exporter.fields_to_export = ['links', 'title', 'subur_name', 'unit_mly_jan', 'unit_mly_feb', 'unit_mly_mar', 'unit_mly_apr', 'unit_mly_may', # 'unit_mly_jun', 'unit_mly_jul', 'unit_mly_aug', 'unit_mly_sep', 'unit_mly_oct', 'unit_mly_nov', # 'unit_mly_dec', 'unit_mly_p_jan', 'unit_mly_p_feb', 'unit_mly_p_mar', # 'unit_mly_p_apr', 'unit_mly_p_may', 'unit_mly_p_jun', 'unit_mly_p_jul', 'unit_mly_p_aug', 'unit_mly_p_sep', # 'unit_mly_p_oct', 'unit_mly_p_nov', 'unit_mly_p_dec', 'unit_mly_nos_jan', # 'unit_mly_nos_feb', 'unit_mly_nos_mar', 'unit_mly_nos_apr', 'unit_mly_nos_may', 'unit_mly_nos_jun', # 'unit_mly_nos_jul', 'unit_mly_nos_aug', 'unit_mly_nos_sep', 'unit_mly_nos_oct', 'unit_mly_nos_nov', 'unit_mly_nos_dec', # ] # self.exporter.start_exporting() # def spider_opened(self, spider): # if spider.name in 'realestate': # self.file = open('current_listing.csv', 'w+b') # else: # self.file = open('past_listing.csv', 'w+b') # self.exporter = CsvItemExporter(self.file) # self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.files.close()
class RealstateMonthlyPipeline(object): def process_item(self, item, spider): return item # class CafelandPipeline(object): # def process_item(self, item, spider): # for field in ['name','updated_time','particulars']: # item[field] = [val.strip(' \t\n\r') for val in item[field]] # item[field] = [val for val in item[field] if val] # pars = item['particulars'] # for i in range(0, len(pars), 2): # if i+1 < len(pars): # new_field, new_val = (unidecode(pars[i]).strip(' \t\n\r'), pars[i+1].strip(' \t\n\r:')) # else: # new_field, new_val = (unidecode(pars[i]).strip(' \t\n\r'), '') # item.fields[new_field] = Field() # item[new_field] = new_val # if i+1 < len(pars): # pars[i] = pars[i] + pars[i+1] # pars[i+1] = '' # item['particulars'] = [p for p in pars if p] # item['updated_time'] = [val[11:] for val in item['updated_time']] # self.exporter.export_item(item) # return item @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): export_dir = settings.get('EXPORT_DIR', '.') t = time.strftime('%Y-%m-%d %H-%M-%S GMT+6', time.gmtime(time.time() + 6 * 3600)) path = os.path.join(export_dir, '%s.csv' % t) self.file = open(path, 'w+b') self.exporter = CSVRealstateItemExporter(self.file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.file.close()
class RealstateMonthlyPipeline(object): def process_item(self, item, spider): return item # class CafelandPipeline(object): # def process_item(self, item, spider): # for field in ['name','updated_time','particulars']: # item[field] = [val.strip(' \t\n\r') for val in item[field]] # item[field] = [val for val in item[field] if val] # pars = item['particulars'] # for i in range(0, len(pars), 2): # if i+1 < len(pars): # new_field, new_val = (unidecode(pars[i]).strip(' \t\n\r'), pars[i+1].strip(' \t\n\r:')) # else: # new_field, new_val = (unidecode(pars[i]).strip(' \t\n\r'), '') # item.fields[new_field] = Field() # item[new_field] = new_val # if i+1 < len(pars): # pars[i] = pars[i] + pars[i+1] # pars[i+1] = '' # item['particulars'] = [p for p in pars if p] # item['updated_time'] = [val[11:] for val in item['updated_time']] # self.exporter.export_item(item) # return item @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): export_dir = settings.get('EXPORT_DIR', '.') t = time.strftime('%Y-%m-%d %H-%M-%S GMT+6', time.gmtime(time.time() + 6*3600)) path = os.path.join(export_dir, '%s.csv' % t) self.file = open(path, 'w+b') self.exporter = CSVRealstateItemExporter(self.file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.file.close()
class RealstateMonthlyPipeline(object): def process_item(self, item, spider): return item @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): export_dir = settings.get('EXPORT_DIR', '.') t = time.strftime('%Y-%m-%d %H-%M-%S GMT+6', time.gmtime(time.time() + 6*3600)) path = os.path.join(export_dir, '%s.csv' % t) self.file = open(path, 'w+b') self.exporter = CSVRealstateItemExporter(self.file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.file.close()
class RealstateMonthlyPipeline(object): def process_item(self, item, spider): return item # class CafelandPipeline(object): # def process_item(self, item, spider): # for field in ['name','updated_time','particulars']: # item[field] = [val.strip(' \t\n\r') for val in item[field]] # item[field] = [val for val in item[field] if val] # pars = item['particulars'] # for i in range(0, len(pars), 2): # if i+1 < len(pars): # new_field, new_val = (unidecode(pars[i]).strip(' \t\n\r'), pars[i+1].strip(' \t\n\r:')) # else: # new_field, new_val = (unidecode(pars[i]).strip(' \t\n\r'), '') # item.fields[new_field] = Field() # item[new_field] = new_val # if i+1 < len(pars): # pars[i] = pars[i] + pars[i+1] # pars[i+1] = '' # item['particulars'] = [p for p in pars if p] # item['updated_time'] = [val[11:] for val in item['updated_time']] # self.exporter.export_item(item) # return item EXPORT_PATH = os.getenv("HOME") def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline # @spider_opened_working def spider_opened(self, spider): export_dir = settings.get('EXPORT_PATH', '.') t = time.strftime('%Y-%m-%d %H-%M-%S GMT+6', time.gmtime(time.time() + 6*3600)) path = os.path.join(export_dir, '%s.csv' % t) self.files = open(path, 'w+b') self.exporter = CSVRealstateItemExporter(self.files) self.exporter.start_exporting() # End}}} # def spider_opened(self, spider): # # self.file = open('%s-%s.csv' % (spider.name, time.strftime("%Y-%m-%d-%H")), 'w+') # path = RealstateMonthlyPipeline.EXPORT_PATH + "/" + spider.name + '_export.csv' # export_file = open(path, 'ab' if os.path.isfile(path) else 'wb') # self.files[spider.name] = export_file # # self.exporter = CsvRealstateItemExporter(self.file) # self.exporter = CSVRealstateItemExporter(self.files) # self.exporter.fields_to_export = ['links', 'title', 'subur_name', 'unit_mly_jan', 'unit_mly_feb', 'unit_mly_mar', 'unit_mly_apr', 'unit_mly_may', # 'unit_mly_jun', 'unit_mly_jul', 'unit_mly_aug', 'unit_mly_sep', 'unit_mly_oct', 'unit_mly_nov', # 'unit_mly_dec', 'unit_mly_p_jan', 'unit_mly_p_feb', 'unit_mly_p_mar', # 'unit_mly_p_apr', 'unit_mly_p_may', 'unit_mly_p_jun', 'unit_mly_p_jul', 'unit_mly_p_aug', 'unit_mly_p_sep', # 'unit_mly_p_oct', 'unit_mly_p_nov', 'unit_mly_p_dec', 'unit_mly_nos_jan', # 'unit_mly_nos_feb', 'unit_mly_nos_mar', 'unit_mly_nos_apr', 'unit_mly_nos_may', 'unit_mly_nos_jun', # 'unit_mly_nos_jul', 'unit_mly_nos_aug', 'unit_mly_nos_sep', 'unit_mly_nos_oct', 'unit_mly_nos_nov', 'unit_mly_nos_dec', # ] self.exporter.start_exporting() # def spider_opened(self, spider): # if spider.name in 'realestate': # self.file = open('current_listing.csv', 'w+b') # else: # self.file = open('past_listing.csv', 'w+b') # self.exporter = CsvItemExporter(self.file) # self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.files.close()