Exemplo n.º 1
0
 def iteritems(self):
     for file_obj, fn, size in self.get_read_streams():
         stream = cohere_stream(file_obj)
         try:
             if fn in self.last_position['readed_streams']:
                 continue
             stream = self.decompressor.decompress(stream)
             stream = cohere_stream(stream)
             items_iter = self.deserializer.deserialize(stream)
             for record in items_iter:
                 yield record
             self.last_position['readed_streams'].append(fn)
         finally:
             stream.close()
     self.finished = True
Exemplo n.º 2
0
 def execute(self):
     # We can't count items on streamed bypasses
     self.valid_total_count = False
     self.bypass_state = StreamBypassState(self.config, self.metadata)
     module_loader = ModuleLoader()
     reader = module_loader.load_reader(self.config.reader_options,
                                        self.metadata)
     writer = module_loader.load_writer(self.config.writer_options,
                                        self.metadata)
     with closing(reader), closing(writer):
         for stream in reader.get_read_streams():
             if stream not in self.bypass_state.skipped:
                 file_obj = cohere_stream(reader.open_stream(stream))
                 logging.log(
                     logging.INFO,
                     'Starting to copy file {}'.format(stream.filename))
                 try:
                     writer.write_stream(stream, file_obj)
                 finally:
                     file_obj.close()
                 logging.log(
                     logging.INFO,
                     'Finished copying file {}'.format(stream.filename))
                 self.bypass_state.commit_copied(stream)
             else:
                 logging.log(logging.INFO,
                             'Skip file {}'.format(stream.filename))
Exemplo n.º 3
0
 def iteritems_retrying(self, stream_data):
     if stream_data.filename in self.last_position['readed_streams']:
         return
     stream = cohere_stream(self.open_stream(stream_data))
     try:
         stream = self.decompressor.decompress(stream)
         stream = cohere_stream(stream)
         items_readed = 0
         stream_offset = self.last_position['stream_offset']
         items_offset = stream_offset.get(stream_data.filename, 0)
         for item in self.deserializer.deserialize(stream):
             items_readed += 1
             if items_readed > items_offset:
                 stream_offset[stream_data.filename] = items_readed
                 yield item
     finally:
         stream.close()
     self.last_position['readed_streams'].append(stream_data.filename)
     del stream_offset[stream_data.filename]
Exemplo n.º 4
0
 def iteritems_retrying(self, stream_data):
     if stream_data.filename in self.last_position['readed_streams']:
         return
     stream = cohere_stream(self.open_stream(stream_data))
     try:
         stream = self.decompressor.decompress(stream)
         stream = cohere_stream(stream)
         items_readed = 0
         stream_offset = self.last_position['stream_offset']
         items_offset = stream_offset.get(stream_data.filename, 0)
         for item in self.deserializer.deserialize(stream):
             items_readed += 1
             if items_readed > items_offset:
                 stream_offset[stream_data.filename] = items_readed
                 yield item
     finally:
         stream.close()
     self.last_position['readed_streams'].append(stream_data.filename)
     del stream_offset[stream_data.filename]
Exemplo n.º 5
0
 def execute(self):
     # We can't count items on streamed bypasses
     self.valid_total_count = False
     self.bypass_state = StreamBypassState(self.config, self.metadata)
     module_loader = ModuleLoader()
     reader = module_loader.load_reader(self.config.reader_options, self.metadata)
     writer = module_loader.load_writer(self.config.writer_options, self.metadata)
     with closing(reader), closing(writer):
         for stream in reader.get_read_streams():
             if stream not in self.bypass_state.skipped:
                 file_obj = cohere_stream(reader.open_stream(stream))
                 logging.log(logging.INFO, 'Starting to copy file {}'.format(stream.filename))
                 try:
                     writer.write_stream(stream, file_obj)
                 finally:
                     file_obj.close()
                 logging.log(logging.INFO, 'Finished copying file {}'.format(stream.filename))
                 self.bypass_state.commit_copied(stream)
             else:
                 logging.log(logging.INFO, 'Skip file {}'.format(stream.filename))