def test_header(self): output = StringIO() ie = CsvItemExporter(output, fields_to_export=self.i.fields.keys()) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n') output = StringIO() ie = CsvItemExporter(output, fields_to_export=['age']) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertEqual(output.getvalue(), 'age\r\n22\r\n') output = StringIO() ie = CsvItemExporter(output) ie.start_exporting() ie.export_item(self.i) ie.export_item(self.i) ie.finish_exporting() self.assertEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n') output = StringIO() ie = CsvItemExporter(output, include_headers_line=False) ie.start_exporting() ie.export_item(self.i) ie.finish_exporting() self.assertEqual(output.getvalue(), '22,John\xc2\xa3\r\n')
def __init__(self): self.keystat_file = open('yahoo_keystat.csv', 'w+b') self.summary_file = open('yahoo_summary.csv', 'w+b') self.summary_exporter = CsvItemExporter(self.summary_file) self.keystat_exporter = CsvItemExporter(self.keystat_file) self.summary_exporter.start_exporting() self.keystat_exporter.start_exporting() dispatcher.connect(self.spider_closed, signals.spider_closed)
def __init__(self): data_file = "%s.csv" % (datetime.datetime.now().strftime("%Y-%m-%d")) if os.path.exists(data_file): self.file = open(data_file,"ab+") self.exporter = CsvItemExporter(self.file,include_headers_line=True,encoding="gbk") else: self.file = open(data_file, "wb+") self.exporter = CsvItemExporter(self.file, include_headers_line=True, encoding="gbk") self.exporter.start_exporting()
def open_spider(self, spider): self.contracts_file = open('contracts.csv', 'w+b') self.contracts_csv = CsvItemExporter(self.contracts_file) self.contestants_file = open('contestants.csv', 'w+b') self.contestants_csv = CsvItemExporter(self.contestants_file) self.invitees_file = open('invitees.csv', 'w+b') self.invitees_csv = CsvItemExporter(self.invitees_file) self.documents_file = open('documents.csv', 'w+b') self.documents_csv = CsvItemExporter(self.documents_file) self.places_file = open('places.csv', 'w+b') self.places_csv = CsvItemExporter(self.places_file)
def spider_opened(self, spider): latest_polls_file = open('data/' + spider.name + '_latest.csv', 'w') self.latest_polls_files[spider] = latest_polls_file exporter = CsvItemExporter(latest_polls_file, fields_to_export=spider.fields_to_export) exporter.start_exporting() self.exporters[spider] = exporter prev_polls_fName = 'data/' + spider.name + '_dict.json' try: prev_polls_file = open(prev_polls_fName, 'r') prev_polls = json.load(prev_polls_file) prev_polls_file.close() except (IOError): # data/dict.json doesn't exist prev_polls = [] except ValueError: # dict.json is malformed, should be inspected before being overwritten raise ValueError("Malformed prev_polls_file for " + spider.name + ".") self.prev_polls_fNames[spider] = prev_polls_fName self.prev_polls[spider] = prev_polls self.newitems[spider] = []
def __init__(self): self.files = {} if not os.path.exists(os.path.dirname('../../Results/Reviews/')): os.makedirs(os.path.dirname('../../Results/Reviews/')) f = open("cities.txt") cities = [] #self.exporter = [] self.exportcitymap = {} for city in f.readlines(): city = city.rstrip('\n') city = city.replace("/", "_") myexport = CsvItemExporter( fields_to_export=Review.fields.keys(), file=open("../../Results/Reviews/Review_" + city + ".csv", 'w+'), delimiter='|') print('CITY ', city) #self.exporter.append(myexport) self.exportcitymap[city] = myexport f.close() @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline
def spider_opened(self, spider): file = open('Pets_LasVegas.csv', 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = ['business_name', 'industry_category', 'city', 'state', 'phone_number', 'street_address', 'website', 'email', 'url', 'count'] self.exporter.start_exporting()
def open_spider(self, spider): if spider.name == 'fto_branch': self.file = open('./output/transactions_alt' + '.csv', 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.start_exporting()
def open_spider(self, spider): if spider.name == 'fto_urls': self.file = open('./output/' + spider.stage + '.csv', 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) list = ['id', 'title', 'time', 'director', 'year', 'star', 'cost'] self.exporter.fields_to_export = list self.exporter.start_exporting()
def spider_opened(self, spider): if spider.name == 'mcc1': self.file = open('MicrocapCollector/spiders/data/data1.csv', 'w+b') if spider.name == 'mcc2': self.file = open('MicrocapCollector/spiders/data/data2.csv', 'w+b') self.exporter = CsvItemExporter(self.file, delimiter=',') self.exporter.start_exporting()
def spider_opened(self, spider): fname = self.get_file_name(spider, "csv") file = open(fname, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file, fields_to_export=self.fields_to_export) self.exporter.start_exporting()
def spider_closed(self, spider): try: file = open('res.csv', 'w+b') self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'name', 'image', 'link', 'address' ] self.exporter.start_exporting() for item in self.ordered: for res in self.result: if item['name'] == res['name']: self.exporter.export_item(item) self.exporter.finish_exporting() file.close() except: pass
def open_spider(self, spider): if spider.name == 'fto_material': self.file = open('./output/fto_material.csv', 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): filename = 'douban_tv_hanju.csv' savefile = open(filename, 'wb+') self.files[spider] = savefile print('==========pipeline==========spider_opened==========') self.exporter = CsvItemExporter(savefile) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_%s.csv' % (spider.name, datetime.datetime.strftime(datetime.datetime.now(),'%Y%m%d')), 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) # self.exporter.fields_to_export = ['company_name','contact','phone1','phone2','email','average','reviews','address','member_for','based_in'] self.exporter.fields_to_export = ['title', 'save', 'desc', 'long_desc', 'image'] self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) if spider.name == "all-coins": self.exporter.fields_to_export = [ 'rank', 'name', 'type', 'symbol', 'website', 'market_cap_usd', 'price_usd', 'price_btc', 'volume_24_usd', 'change_24', ] elif spider.name == "historical-data": self.exporter.fields_to_export = [ 'date', 'open_price', 'high_price', 'low_price', 'close_price', 'volume', 'market_cap', ] self.exporter.start_exporting()
def assertExportResult(self, item, expected, **kwargs): fp = BytesIO() ie = CsvItemExporter(fp, **kwargs) ie.start_exporting() ie.export_item(item) ie.finish_exporting() self.assertCsvEqual(fp.getvalue(), expected)
def spider_opened(self, spider): file = open( getattr(spider, 'output_filename', 'result_{}.csv'.format(spider.name)), 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = getattr(spider, 'output_fields', None) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'id', 'name', 'link', 'index', 'parent_id' ] self.exporter.start_exporting()
def __init__(self): self.files = {} self.clothingExporter = CsvItemExporter( fields_to_export=ClothingItem.fields.keys(), file=open("clothing.csv", 'wb')) self.pantsExporter = CsvItemExporter( fields_to_export=PantsItem.fields.keys(), file=open("pants.csv", 'wb')) @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'title', 'link', 'location', 'original_price', 'price', 'end_date' ] self.exporter.start_exporting()
def __init__(self, spider): self.files = [] self.full_path = result_path(spider.result_path_type, spider.name) file1 = open(self.full_path + 'temp_physicians.csv', 'wb') self.files.extend([file1]) self.exporter1 = CsvItemExporter( fields_to_export=PhysiciansItem.fields.keys(), file=file1) file2 = open(self.full_path + 'temp_specialities.csv', 'wb') self.files.extend([file2]) self.exporter2 = CsvItemExporter( fields_to_export=SpecialtiesItem.fields.keys(), file=file2) file3 = open(self.full_path + 'temp_locations.csv', 'wb') self.files.extend([file3]) self.exporter3 = CsvItemExporter( fields_to_export=LocationsItem.fields.keys(), file=file3)
def spider_opened(self, spider): self.file = open('%s.csv' % (spider.name), 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.fields_to_export = self.headers self.exporter.start_exporting()
def spider_opened(self, spider): file = open('webScrape.csv', 'w') self.files[spider] = file self.exporter = CsvItemExporter(file, True) self.exporter.fields_to_export = [ 'match', 'wcRound', 'group', 'date', 'year', 'venue', 'hometeam', 'results', 'awayteam' ] self.exporter.start_exporting()
def spider_opened(self, spider): self.file = open('data.csv', 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.fields_to_export = [ 'product_asin', 'product_name', 'product_is_have_patten', 'product_description', 'image_link', 'original_image', 'color', 'patten', 'price', 'imported_code' ] self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_result.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'title', 'brand', 'description', 'price', 'main_image_url', 'additional_image_urls', 'sku', 'category' ] self.exporter.start_exporting()
def __init__(self, settings): #here exist two variant #file with My_Exporter_URI exist: this is the case of a CsvItemExporter instance initilized with include_headers_line=False #file with My_Exporter_URI doesn't exist: is the case of include_headers_line=True self.filename = settings['My_Exporter_URI'] include_headers_line = False if os.path.isfile(self.filename) else True self.fileCsv = open(self.filename, 'ab') self.exporter = CsvItemExporter( self.fileCsv, include_headers_line=include_headers_line)
def spider_opened(self, spider): f = open(self.get_local_data_filepath(spider), 'w') self.files[spider] = f self.exporter = CsvItemExporter(f) self.exporter.fields_to_export = [ 'identifier', 'sku', 'name', 'price', 'url', 'category', 'brand', 'image_url', 'shipping_cost', 'stock', 'dealer' ] self.exporter.start_exporting() if hasattr(spider, 'market_type') and getattr( spider, 'market_type') == 'direct': f1 = open( self.get_local_data_unified_marketplace_data_filepath(spider), 'w') self.unified_marketplace_files[spider] = f1 self.exporter_market = CsvItemExporter(f1) self.exporter_market.fields_to_export = self.exporter.fields_to_export[:] self.exporter_market.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'Subject', 'Start_Date', 'Start_Time', 'End_Date', 'End_Time', 'Location', 'All_Day_Event' ] self.exporter.start_exporting()