def test_header(self):
        output = StringIO()
        ie = CsvItemExporter(output, fields_to_export=self.i.fields.keys())
        ie.start_exporting()
        ie.export_item(self.i)
        ie.finish_exporting()
        self.assertEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')

        output = StringIO()
        ie = CsvItemExporter(output, fields_to_export=['age'])
        ie.start_exporting()
        ie.export_item(self.i)
        ie.finish_exporting()
        self.assertEqual(output.getvalue(), 'age\r\n22\r\n')

        output = StringIO()
        ie = CsvItemExporter(output)
        ie.start_exporting()
        ie.export_item(self.i)
        ie.export_item(self.i)
        ie.finish_exporting()
        self.assertEqual(output.getvalue(),
                         'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')

        output = StringIO()
        ie = CsvItemExporter(output, include_headers_line=False)
        ie.start_exporting()
        ie.export_item(self.i)
        ie.finish_exporting()
        self.assertEqual(output.getvalue(), '22,John\xc2\xa3\r\n')
Exemplo n.º 2
0
    def __init__(self):
        self.keystat_file = open('yahoo_keystat.csv', 'w+b')
        self.summary_file = open('yahoo_summary.csv', 'w+b')
        self.summary_exporter = CsvItemExporter(self.summary_file)
        self.keystat_exporter = CsvItemExporter(self.keystat_file)
        self.summary_exporter.start_exporting()
        self.keystat_exporter.start_exporting()

        dispatcher.connect(self.spider_closed, signals.spider_closed)
Exemplo n.º 3
0
    def __init__(self):
        data_file = "%s.csv" % (datetime.datetime.now().strftime("%Y-%m-%d"))
        if os.path.exists(data_file):
           self.file = open(data_file,"ab+")
           self.exporter = CsvItemExporter(self.file,include_headers_line=True,encoding="gbk")
        else:
            self.file = open(data_file, "wb+")
            self.exporter = CsvItemExporter(self.file, include_headers_line=True, encoding="gbk")

        self.exporter.start_exporting()
Exemplo n.º 4
0
    def open_spider(self, spider):
        self.contracts_file = open('contracts.csv', 'w+b')
        self.contracts_csv = CsvItemExporter(self.contracts_file)

        self.contestants_file = open('contestants.csv', 'w+b')
        self.contestants_csv = CsvItemExporter(self.contestants_file)

        self.invitees_file = open('invitees.csv', 'w+b')
        self.invitees_csv = CsvItemExporter(self.invitees_file)

        self.documents_file = open('documents.csv', 'w+b')
        self.documents_csv = CsvItemExporter(self.documents_file)

        self.places_file = open('places.csv', 'w+b')
        self.places_csv = CsvItemExporter(self.places_file)
Exemplo n.º 5
0
    def spider_opened(self, spider):
        latest_polls_file = open('data/' + spider.name + '_latest.csv', 'w')
        self.latest_polls_files[spider] = latest_polls_file

        exporter = CsvItemExporter(latest_polls_file,
                                   fields_to_export=spider.fields_to_export)
        exporter.start_exporting()
        self.exporters[spider] = exporter

        prev_polls_fName = 'data/' + spider.name + '_dict.json'
        try:
            prev_polls_file = open(prev_polls_fName, 'r')
            prev_polls = json.load(prev_polls_file)
            prev_polls_file.close()
        except (IOError):
            # data/dict.json doesn't exist
            prev_polls = []
        except ValueError:
            # dict.json is malformed, should be inspected before being overwritten
            raise ValueError("Malformed prev_polls_file for " + spider.name +
                             ".")
        self.prev_polls_fNames[spider] = prev_polls_fName
        self.prev_polls[spider] = prev_polls

        self.newitems[spider] = []
    def __init__(self):
        self.files = {}
        if not os.path.exists(os.path.dirname('../../Results/Reviews/')):
            os.makedirs(os.path.dirname('../../Results/Reviews/'))

        f = open("cities.txt")
        cities = []
        #self.exporter = []
        self.exportcitymap = {}
        for city in f.readlines():
            city = city.rstrip('\n')
            city = city.replace("/", "_")
            myexport = CsvItemExporter(
                fields_to_export=Review.fields.keys(),
                file=open("../../Results/Reviews/Review_" + city + ".csv",
                          'w+'),
                delimiter='|')
            print('CITY ', city)
            #self.exporter.append(myexport)
            self.exportcitymap[city] = myexport
        f.close()

        @classmethod
        def from_crawler(cls, crawler):
            pipeline = cls()
            crawler.signals.connect(pipeline.spider_opened,
                                    signals.spider_opened)
            crawler.signals.connect(pipeline.spider_closed,
                                    signals.spider_closed)
            return pipeline
Exemplo n.º 7
0
 def spider_opened(self, spider):
   file = open('Pets_LasVegas.csv', 'w+b')
   self.files[spider] = file
   self.exporter = CsvItemExporter(file)
   self.exporter.fields_to_export = ['business_name', 'industry_category', 'city', 'state', 'phone_number',
                                     'street_address', 'website', 'email', 'url', 'count']
   self.exporter.start_exporting()
Exemplo n.º 8
0
    def open_spider(self, spider):

        if spider.name == 'fto_branch':

            self.file = open('./output/transactions_alt' + '.csv', 'w+b')
            self.exporter = CsvItemExporter(self.file)
            self.exporter.start_exporting()
Exemplo n.º 9
0
    def open_spider(self, spider):

        if spider.name == 'fto_urls':

            self.file = open('./output/' + spider.stage + '.csv', 'w+b')
            self.exporter = CsvItemExporter(self.file)
            self.exporter.start_exporting()
Exemplo n.º 10
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     list = ['id', 'title', 'time', 'director', 'year', 'star', 'cost']
     self.exporter.fields_to_export = list
     self.exporter.start_exporting()
Exemplo n.º 11
0
 def spider_opened(self, spider):
     if spider.name == 'mcc1':
         self.file = open('MicrocapCollector/spiders/data/data1.csv', 'w+b')
     if spider.name == 'mcc2':
         self.file = open('MicrocapCollector/spiders/data/data2.csv', 'w+b')
     self.exporter = CsvItemExporter(self.file, delimiter=',')
     self.exporter.start_exporting()
Exemplo n.º 12
0
 def spider_opened(self, spider):
     fname = self.get_file_name(spider, "csv")
     file = open(fname, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file,
                                     fields_to_export=self.fields_to_export)
     self.exporter.start_exporting()
Exemplo n.º 13
0
    def spider_closed(self, spider):

        try:

            file = open('res.csv', 'w+b')

            self.exporter = CsvItemExporter(file)

            self.exporter.fields_to_export = [
                'name', 'image', 'link', 'address'
            ]

            self.exporter.start_exporting()

            for item in self.ordered:

                for res in self.result:

                    if item['name'] == res['name']:

                        self.exporter.export_item(item)

            self.exporter.finish_exporting()

            file.close()

        except:

            pass
Exemplo n.º 14
0
    def open_spider(self, spider):

        if spider.name == 'fto_material':

            self.file = open('./output/fto_material.csv', 'w+b')
            self.exporter = CsvItemExporter(self.file)
            self.exporter.start_exporting()
Exemplo n.º 15
0
 def spider_opened(self, spider):
     filename = 'douban_tv_hanju.csv'
     savefile = open(filename, 'wb+')
     self.files[spider] = savefile
     print('==========pipeline==========spider_opened==========')
     self.exporter = CsvItemExporter(savefile)
     self.exporter.start_exporting()
Exemplo n.º 16
0
 def spider_opened(self, spider):
     file = open('%s_%s.csv' % (spider.name, datetime.datetime.strftime(datetime.datetime.now(),'%Y%m%d')), 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     # self.exporter.fields_to_export = ['company_name','contact','phone1','phone2','email','average','reviews','address','member_for','based_in']
     self.exporter.fields_to_export = ['title', 'save', 'desc', 'long_desc', 'image']
     self.exporter.start_exporting()        
Exemplo n.º 17
0
 def spider_opened(self, spider):
     file = open('%s.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     if spider.name == "all-coins":
         self.exporter.fields_to_export = [
             'rank',
             'name',
             'type',
             'symbol',
             'website',
             'market_cap_usd',
             'price_usd',
             'price_btc',
             'volume_24_usd',
             'change_24',
         ]
     elif spider.name == "historical-data":
         self.exporter.fields_to_export = [
             'date',
             'open_price',
             'high_price',
             'low_price',
             'close_price',
             'volume',
             'market_cap',
         ]
     self.exporter.start_exporting()
Exemplo n.º 18
0
 def assertExportResult(self, item, expected, **kwargs):
     fp = BytesIO()
     ie = CsvItemExporter(fp, **kwargs)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     self.assertCsvEqual(fp.getvalue(), expected)
Exemplo n.º 19
0
 def spider_opened(self, spider):
     file = open(
         getattr(spider, 'output_filename',
                 'result_{}.csv'.format(spider.name)), 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = getattr(spider, 'output_fields', None)
     self.exporter.start_exporting()
Exemplo n.º 20
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'id', 'name', 'link', 'index', 'parent_id'
     ]
     self.exporter.start_exporting()
Exemplo n.º 21
0
    def __init__(self):
        self.files = {}
        self.clothingExporter = CsvItemExporter(
            fields_to_export=ClothingItem.fields.keys(),
            file=open("clothing.csv", 'wb'))
        self.pantsExporter = CsvItemExporter(
            fields_to_export=PantsItem.fields.keys(),
            file=open("pants.csv", 'wb'))

        @classmethod
        def from_crawler(cls, crawler):
            pipeline = cls()
            crawler.signals.connect(pipeline.spider_opened,
                                    signals.spider_opened)
            crawler.signals.connect(pipeline.spider_closed,
                                    signals.spider_closed)
            return pipeline
Exemplo n.º 22
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'title', 'link', 'location', 'original_price', 'price', 'end_date'
     ]
     self.exporter.start_exporting()
Exemplo n.º 23
0
    def __init__(self, spider):
        self.files = []
        self.full_path = result_path(spider.result_path_type, spider.name)
        file1 = open(self.full_path + 'temp_physicians.csv', 'wb')
        self.files.extend([file1])
        self.exporter1 = CsvItemExporter(
            fields_to_export=PhysiciansItem.fields.keys(), file=file1)

        file2 = open(self.full_path + 'temp_specialities.csv', 'wb')
        self.files.extend([file2])
        self.exporter2 = CsvItemExporter(
            fields_to_export=SpecialtiesItem.fields.keys(), file=file2)

        file3 = open(self.full_path + 'temp_locations.csv', 'wb')
        self.files.extend([file3])
        self.exporter3 = CsvItemExporter(
            fields_to_export=LocationsItem.fields.keys(), file=file3)
    def spider_opened(self, spider):

        self.file = open('%s.csv' % (spider.name), 'w+b')

        self.exporter = CsvItemExporter(self.file)

        self.exporter.fields_to_export = self.headers

        self.exporter.start_exporting()
Exemplo n.º 25
0
 def spider_opened(self, spider):
     file = open('webScrape.csv', 'w')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file, True)
     self.exporter.fields_to_export = [
         'match', 'wcRound', 'group', 'date', 'year', 'venue', 'hometeam',
         'results', 'awayteam'
     ]
     self.exporter.start_exporting()
Exemplo n.º 26
0
 def spider_opened(self, spider):
     self.file = open('data.csv', 'w+b')
     self.exporter = CsvItemExporter(self.file)
     self.exporter.fields_to_export = [
         'product_asin', 'product_name', 'product_is_have_patten',
         'product_description', 'image_link', 'original_image', 'color',
         'patten', 'price', 'imported_code'
     ]
     self.exporter.start_exporting()
Exemplo n.º 27
0
 def spider_opened(self, spider):
     file = open('%s_result.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'title', 'brand', 'description', 'price', 'main_image_url',
         'additional_image_urls', 'sku', 'category'
     ]
     self.exporter.start_exporting()
Exemplo n.º 28
0
 def __init__(self, settings):
     #here exist two variant
     #file with My_Exporter_URI exist: this is the case of a CsvItemExporter instance initilized with include_headers_line=False
     #file with My_Exporter_URI doesn't exist: is the case of include_headers_line=True
     self.filename = settings['My_Exporter_URI']
     include_headers_line = False if os.path.isfile(self.filename) else True
     self.fileCsv = open(self.filename, 'ab')
     self.exporter = CsvItemExporter(
         self.fileCsv, include_headers_line=include_headers_line)
Exemplo n.º 29
0
 def spider_opened(self, spider):
     f = open(self.get_local_data_filepath(spider), 'w')
     self.files[spider] = f
     self.exporter = CsvItemExporter(f)
     self.exporter.fields_to_export = [
         'identifier', 'sku', 'name', 'price', 'url', 'category', 'brand',
         'image_url', 'shipping_cost', 'stock', 'dealer'
     ]
     self.exporter.start_exporting()
     if hasattr(spider, 'market_type') and getattr(
             spider, 'market_type') == 'direct':
         f1 = open(
             self.get_local_data_unified_marketplace_data_filepath(spider),
             'w')
         self.unified_marketplace_files[spider] = f1
         self.exporter_market = CsvItemExporter(f1)
         self.exporter_market.fields_to_export = self.exporter.fields_to_export[:]
         self.exporter_market.start_exporting()
Exemplo n.º 30
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'Subject', 'Start_Date', 'Start_Time', 'End_Date', 'End_Time',
         'Location', 'All_Day_Event'
     ]
     self.exporter.start_exporting()