def bench_outputs_with_multiple_files_multiple_writers( self, container_name, directory_name, file_name, output_per_rank, data=None): ''' Benchmarking outputs with pattern `Multiple Files Multiple Writers` Each processes will access a single file in different containers exclusively. Pattern of output blobs is: blob_name + 00001 where the second parts represents for the rank of the process param: container_name: target container base, target container name is composed of container_name + '{:0>5}'.format(__mpi_rank) directory_name: target container directory file_name: target file base, target file name is composed of file_name + '{:0>5}'.format(__mpi_rank) output_per_rank: size of outputs per rank in MiB data: optional cached data for outputs, currently only data less than SECTION_LIMIT is allowed return: max_write_time: maximum writing time min_write_time: minimum writing time avg_write_time: average writing time ''' # Data prepare if output_per_rank > self.SECTION_LIMIT: raise ValueError( 'Not support for {} MiB output per rank now'.format( output_per_rank)) if data == None: output_per_rank_in_bytes = output_per_rank << 20 data = common.workload_generator(self.__mpi_rank, output_per_rank_in_bytes) output_blob_name = file_name + '{:0>5}'.format(self.__mpi_rank) MPI.COMM_WORLD.Barrier() start = MPI.Wtime() self.__storage_service.create_blob_from_bytes(container_name, output_blob_name, data) end = MPI.Wtime() MPI.COMM_WORLD.Barrier() return common.collect_bench_metrics(end - start)
def bench_outputs_with_multiple_files_multiple_writers( self, container_name, directory_name, file_name, output_per_rank, data=None): ''' Benchmarking outputs with pattern `Multiple Files Multiple Writers` Each processes will access a single file within the same container exclusively. param: container_name: target container base directory_name: target directory file_name: target file base, target file name is composed of file_name + '{:0>5}'.format(__mpi_rank) output_per_rank: size of outputs per rank in MiB data: optional cached data for outputs return: max_write_time: maximum writing time min_write_time: minimum writing time avg_write_time: average writing time ''' # Data prepare output_per_rank_in_bytes = output_per_rank << 20 # in bytes if data == None: data = common.workload_generator(self.__mpi_rank, output_per_rank_in_bytes) output_file_name = file_name + '{:0>5}'.format(self.__mpi_rank) MPI.COMM_WORLD.Barrier() start = MPI.Wtime() with open(output_file_name, 'wb') as f: f.write(data) end = MPI.Wtime() MPI.COMM_WORLD.Barrier() return common.collect_bench_metrics(end - start, 5)
def bench(): # Configurations config = configparser.ConfigParser() config.read('config.ini') config_bench = config['BENCH'] config_azure = config['AZURE'] # MPI envs rank, size, proc_name = common.get_mpi_env() if bool(config_bench['show_mpi_env']): print('Rank {0} of {1}. Proc name:{2}'.format(rank, size, proc_name)) print() # Bench specifications bench_items = config_bench['bench_items'] bench_targets = config_bench['bench_targets'] repeat_times = int(config_bench['repeat_time']) bench_pattern = config_bench['bench_pattern'] # Bench infos account_name = config_azure['account_name'] account_key = config_azure['account_key'] container_name = config_azure['container_name'] directory_name = config_azure['directory_name'] file_name = config_azure['file_name'] output_per_rank = int(config_bench['output_per_rank']) MPI.COMM_WORLD.Barrier() # Benchmarking if 0 == rank: print( 'Bench Target: {0}, Bench Item: {1}, Bench Pattern:{2}, Bench repeat {3} times' .format(bench_targets, bench_items, bench_pattern, repeat_times)) # Get tool if bench_targets == 'azure_blob': bench_tool = AzureBlobBench(account_name, account_key, [container_name]) elif bench_targets == 'azure_file': bench_tool = AzureFileBench(account_name, account_key, [container_name]) elif bench_targets == 'cirrus_lustre': bench_tool = CirrusLustreBench() else: bench_tool = BaseBench(None, None, []) if bench_items == 'input': if bench_pattern == 'SFMR': for _ in range(0, repeat_times): max_time, min_time, avg_time = bench_tool.bench_inputs_with_single_file_multiple_readers( container_name, None, file_name) __print_metrics(max_time, min_time, avg_time) elif bench_pattern == 'MFMR': for _ in range(0, repeat_times): max_time, min_time, avg_time = bench_tool.bench_inputs_with_multiple_files_multiple_readers( container_name, None, file_name) __print_metrics(max_time, min_time, avg_time) elif bench_pattern == 'MFMRMC': for _ in range(0, repeat_times): max_time, min_time, avg_time = bench_tool.bench_inputs_with_multiple_files_multiple_readers_multiple_containers( container_name, None, file_name) __print_metrics(max_time, min_time, avg_time) else: raise NotImplementedError() elif bench_items == 'output': if bench_pattern == 'SFMW': data = common.workload_generator(rank, output_per_rank << 20) for _ in range(0, repeat_times): max_time, min_time, avg_time = bench_tool.bench_outputs_with_single_file_multiple_writers( container_name, directory_name, file_name, output_per_rank, data) __print_metrics(max_time, min_time, avg_time) elif bench_pattern == 'MFMW': data = common.workload_generator(rank, output_per_rank << 20) for _ in range(0, repeat_times): max_time, min_time, avg_time = bench_tool.bench_outputs_with_multiple_files_multiple_writers( container_name, directory_name, file_name, output_per_rank, data=data) __print_metrics(max_time, min_time, avg_time) elif bench_pattern == 'MFMWMC': data = common.workload_generator(rank, output_per_rank << 20) for _ in range(0, repeat_times): max_time, min_time, avg_time = bench_tool.bench_outputs_with_multiple_files_multiple_writers_multiple_containers( container_name, directory_name, file_name, output_per_rank, data=data) __print_metrics(max_time, min_time, avg_time) else: raise NotImplementedError()
def bench_outputs_with_single_file_multiple_writers(self, container_name, directory_name, file_name, output_per_rank, data = None): ''' Benchmarking outputs with pattern `Single File Multiple Writers` Each processes will access a single shared file in different sections exclusively. Data fro mdifferent rank is stored in different ranges The processes is: 1. Create the file with specified size 2. Each process update their range of File param: container_name: target container directory_name: target directory file_name: target file output_per_rank: size of outputs per rank in MiB data: optional cached data for outputs return: max_write_time: maximum writing time min_write_time: minimum writing time avg_write_time: average writing time ''' # Data prepare output_per_rank_in_bytes = output_per_rank << 20 # in bytes if data == None: data = common.workload_generator(self.__mpi_rank, self.FILE_CHUNK_LIMIT_IN_BYTES) else: data = data[0:self.FILE_CHUNK_LIMIT_IN_BYTES - 1] data_last_chunk = data chunk_count = output_per_rank // self.FILE_CHUNK_LIMIT # Last chunk doesn't full if output_per_rank % self.FILE_CHUNK_LIMIT: chunk_count = chunk_count + 1 data_last_chunk = common.workload_generator(self.__mpi_rank, (output_per_rank % self.FILE_CHUNK_LIMIT) << 20) # Step .1 File create create_start = 0 create_end = 0 if 0 == self.__mpi_rank: create_start = MPI.Wtime() self.__storage_service.create_file(container_name, directory_name, file_name, output_per_rank_in_bytes * self.__mpi_size) create_end = MPI.Wtime() create_time = create_end - create_start MPI.COMM_WORLD.Barrier() start = MPI.Wtime() for i in range(0, chunk_count): if i != (chunk_count - 1): start_range = self.__mpi_rank * output_per_rank_in_bytes + i * self.FILE_CHUNK_LIMIT_IN_BYTES end_range = start_range + len(data) - 1 self.__storage_service.update_range(container_name, directory_name, file_name, data, start_range, end_range) elif i == (chunk_count - 1): start_range = self.__mpi_rank * output_per_rank_in_bytes + i * self.FILE_CHUNK_LIMIT_IN_BYTES end_range = start_range + len(data_last_chunk) - 1 self.__storage_service.update_range(container_name, directory_name, file_name, data_last_chunk, start_range, end_range) end = MPI.Wtime() MPI.COMM_WORLD.Barrier() max_write, min_write, avg_write = common.collect_bench_metrics(end - start) max_write = round(max_write + create_time,3) min_write = round(min_write + create_time,3) avg_write = round(avg_write + create_time,3) return max_write, min_write, avg_write
def bench_outputs_with_single_file_multiple_writers( self, container_name, directory_name, file_name, output_per_rank, data=None): ''' Benchmarking outputs with pattern `Single File Multiple Writers` Each processes will access a single shared file in different sections exclusively. Data from different rank is stored in different blocks Pattern of global block ids: 00002-00005, first section represents for the rank while the second section represents block id written by the rank The process is: 1. Each rank write blocks to Azure 2. MPI_Barrier() to wait for all ranks 3. Get uncommited block list, rearrange for the order of data 4. Commit changes param: container_name: target container directory_name: target directory file_name: target file output_per_rank: size of outputs per rank in MiB data: optional cached data for outputs, in this case stands for data of a full block(100 MiB data) return: max_write_time: maximum writing time min_write_time: minimum writing time avg_write_time: average writing time ''' # Data prepare if data == None: data = common.workload_generator(self.__mpi_rank, self.BLOCK_LIMIT_IN_BYTES) else: data = data[0:self.BLOCK_LIMIT_IN_BYTES - 1] last_block_data = data block_count = output_per_rank // self.BLOCK_LIMIT # Last block doesn't full if output_per_rank % self.BLOCK_LIMIT: block_count = block_count + 1 last_block_data = common.workload_generator( self.__mpi_rank, (output_per_rank % self.BLOCK_LIMIT) << 20) # Step.1 put blocks MPI.COMM_WORLD.Barrier() start = MPI.Wtime() for i in range(0, block_count): block_id = '{:0>5}-{:0>5}'.format(self.__mpi_rank, i) if i != (block_count - 1): self.__storage_service.put_block(container_name, file_name, data, block_id) elif i == (block_count - 1): self.__storage_service.put_block(container_name, file_name, last_block_data, block_id) end = MPI.Wtime() MPI.COMM_WORLD.Barrier() max_write, min_write, avg_write = common.collect_bench_metrics(end - start) if 0 == self.__mpi_rank: start_postprocessing = MPI.Wtime() # Step.3 get block list and sort according to block id block_list = self.__storage_service.get_block_list( container_name, file_name, block_list_type=blob.BlockListType.All).uncommitted_blocks block_list.sort(key=lambda block: block.id) # Step.4 commit self.__storage_service.put_block_list(container_name, file_name, block_list) end_postprocessing = MPI.Wtime() postprocessing_time = end_postprocessing - start_postprocessing max_write = round(max_write + postprocessing_time, 3) min_write = round(min_write + postprocessing_time, 3) avg_write = round(avg_write + postprocessing_time, 3) return max_write, min_write, avg_write