def statistics(self): """ Return statistics about the S3UploadController object. :rtype: dict """ logging.info("S3UploadController.statistics()") # This method can only run at the end of a non empty copy assert self.copy_end_time assert self.upload_stats # Initialise the result calculating the total runtime stat = { 'total_time': total_seconds( self.copy_end_time - self.copy_start_time), 'number_of_workers': self.cloud_interface.worker_processes_count, # Cloud uploads have no analysis 'analysis_time': 0, 'analysis_time_per_item': {}, 'copy_time_per_item': {}, 'serialized_copy_time_per_item': {}, } # Calculate the time spent uploading upload_start = None upload_end = None serialized_time = datetime.timedelta(0) for name in self.upload_stats: data = self.upload_stats[name] logging.debug('Calculating statistics for file %s, data: %s', name, json.dumps(data, indent=2, sort_keys=True, cls=BarmanEncoder)) if upload_start is None or upload_start > data['start_time']: upload_start = data['start_time'] if upload_end is None or upload_end < data['end_time']: upload_end = data['end_time'] # Cloud uploads have no analysis stat['analysis_time_per_item'][name] = 0 stat['copy_time_per_item'][name] = total_seconds( data['end_time'] - data['start_time']) parts = data['parts'] total_time = datetime.timedelta(0) for num in parts: part = parts[num] total_time += part['end_time'] - part['start_time'] stat['serialized_copy_time_per_item'][name] = total_seconds( total_time) serialized_time += total_time # Store the total time spent by copying stat['copy_time'] = total_seconds(upload_end - upload_start) stat['serialized_copy_time'] = total_seconds(serialized_time) return stat
def statistics(self): """ Return statistics about the copy object. :rtype: dict """ # This method can only run at the end of a non empty copy assert self.copy_end_time assert self.item_list assert self.jobs_done # Initialise the result calculating the total runtime stat = { 'total_time': total_seconds(self.copy_end_time - self.copy_start_time), 'number_of_workers': self.workers, 'analysis_time_per_item': {}, 'copy_time_per_item': {}, 'serialized_copy_time_per_item': {}, } # Calculate the time spent during the analysis of the items analysis_start = None analysis_end = None for item in self.item_list: # Some items don't require analysis if not item.analysis_end_time: continue # Build a human readable name to refer to an item in the output ident = item.label if not analysis_start: analysis_start = item.analysis_start_time elif analysis_start > item.analysis_start_time: analysis_start = item.analysis_start_time if not analysis_end: analysis_end = item.analysis_end_time elif analysis_end < item.analysis_end_time: analysis_end = item.analysis_end_time stat['analysis_time_per_item'][ident] = total_seconds( item.analysis_end_time - item.analysis_start_time) stat['analysis_time'] = total_seconds(analysis_end - analysis_start) # Calculate the time spent per job # WARNING: this code assumes that every item is copied separately, # so it's strictly tied to the `_job_generator` method code item_data = {} for job in self.jobs_done: # WARNING: the item contained in the job is not the same object # contained in self.item_list, as it has gone through two # pickling/unpickling cycle # Build a human readable name to refer to an item in the output ident = self.item_list[job.item_idx].label # If this is the first time we see this item we just store the # values from the job if ident not in item_data: item_data[ident] = { 'start': job.copy_start_time, 'end': job.copy_end_time, 'total_time': job.copy_end_time - job.copy_start_time } else: data = item_data[ident] if data['start'] > job.copy_start_time: data['start'] = job.copy_start_time if data['end'] < job.copy_end_time: data['end'] = job.copy_end_time data['total_time'] += job.copy_end_time - job.copy_start_time # Calculate the time spent copying copy_start = None copy_end = None serialized_time = datetime.timedelta(0) for ident in item_data: data = item_data[ident] if copy_start is None or copy_start > data['start']: copy_start = data['start'] if copy_end is None or copy_end < data['end']: copy_end = data['end'] stat['copy_time_per_item'][ident] = total_seconds(data['end'] - data['start']) stat['serialized_copy_time_per_item'][ident] = total_seconds( data['total_time']) serialized_time += data['total_time'] # Store the total time spent by copying stat['copy_time'] = total_seconds(copy_end - copy_start) stat['serialized_copy_time'] = total_seconds(serialized_time) return stat