示例#1
0
 def test_nonstring_types_item(self):
     item = self._get_nonstring_types_item()
     item.pop('time')  # datetime is not marshallable
     fp = tempfile.TemporaryFile()
     ie = MarshalItemExporter(fp)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     fp.seek(0)
     self.assertEqual(marshal.load(fp), item)
示例#2
0
 def test_nonstring_types_item(self):
     item = self._get_nonstring_types_item()
     item.pop('time')  # datetime is not marshallable
     fp = tempfile.TemporaryFile()
     ie = MarshalItemExporter(fp)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     fp.seek(0)
     self.assertEqual(marshal.load(fp), item)
示例#3
0
class MarshalExporterPipeline(object):

    def __init__(self, file_name):
        # Storing output filename
        self.file_name = file_name
        # Creating a file handle and setting it to None
        self.file_handle = None

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            file_name=crawler.settings.get('MARSHAL_PATH'),
        )


    def open_spider(self, spider):
        print('Custom export opened')

        # Opening file in binary-write mode
        file = open(self.file_name, 'wb')
        self.file_handle = file

        # Creating a FanItemExporter object and initiating export
        self.exporter = MarshalItemExporter(file)
        self.exporter.start_exporting()
    
    def close_spider(self, spider):
        print('Custom Exporter closed')

        # Ending the export to file from FanItemExport object
        self.exporter.finish_exporting()

        # Closing the opened output file
        self.file_handle.close()
    
    def process_item(self, item, spider):
        # passing the item to FanItemExporter object for expoting to file
        self.exporter.export_item(item)
        return item