Exemplo n.º 1
0
 def process(self, block_iter):
     (self._limit_files_per_ds, self._files_per_ds) = ({}, {}
                                                       )  # reset counters
     if self._limit_files_fraction >= 0:
         block_list = list(DataProcessor.process(self, block_iter))
         goal_per_ds = {}  # calculate file limit per dataset
         for (dataset_name, fn_list_len) in self._files_per_ds.items():
             goal_per_ds[dataset_name] = int(
                 self._limit_files_fraction * fn_list_len) or 1
         for block in block_list:
             self._reduce_fn_list(block, goal_per_ds)
             yield block
     else:
         for block in DataProcessor.process(self, block_iter):
             yield block
Exemplo n.º 2
0
	def process(self, block_iter):
		if self.enabled() and self._config:
			block_list = list(DataProcessor.process(self, block_iter))
			if (self._target_jobs > 0) or (self._target_jobs_ds > 0):
				self._set_split_opt(self._config, 'files per job', dict(self._files),
					self._target_jobs, self._target_jobs_ds)
				self._set_split_opt(self._config, 'events per job', dict(self._entries),
					self._target_jobs, self._target_jobs_ds)
			self._config = None
			return block_list
		return block_iter
Exemplo n.º 3
0
 def process(self, block_iter):
     if self.enabled() and self._config:
         block_list = list(DataProcessor.process(self, block_iter))
         if (self._target_jobs > 0) or (self._target_jobs_ds > 0):
             self._set_split_opt(self._config, 'files per job',
                                 dict(self._files), self._target_jobs,
                                 self._target_jobs_ds)
             self._set_split_opt(self._config, 'events per job',
                                 dict(self._entries), self._target_jobs,
                                 self._target_jobs_ds)
         self._config = None
         return block_list
     return block_iter
Exemplo n.º 4
0
 def process(self, blockIter):
     self._recordedURL = set()
     self._recordedBlock = set()
     return DataProcessor.process(self, blockIter)
Exemplo n.º 5
0
	def process(self, blockIter):
		self._recordedURL = set()
		self._recordedBlock = set()
		return DataProcessor.process(self, blockIter)
Exemplo n.º 6
0
	def process(self, block_iter):
		self._recorded_url = set()  # reset records
		self._recorded_block = set()
		return DataProcessor.process(self, block_iter)
Exemplo n.º 7
0
 def process(self, block_iter):
     self._recorded_url = set()  # reset records
     self._recorded_block = set()
     return DataProcessor.process(self, block_iter)