Пример #1
0
    def bench_outputs_with_multiple_files_multiple_writers(
            self,
            container_name,
            directory_name,
            file_name,
            output_per_rank,
            data=None):
        '''
		Benchmarking outputs with pattern `Multiple Files Multiple Writers`
		
		Each processes will access a single file in different containers exclusively.

		Pattern of output blobs is: blob_name + 00001 where the second parts represents for the rank of the process 

		param:
		 container_name: target container base, target container name is composed of container_name + '{:0>5}'.format(__mpi_rank)
		 directory_name: target container directory
		 file_name: target file base, target file name is composed of file_name + '{:0>5}'.format(__mpi_rank)
		 output_per_rank: size of outputs per rank in MiB
		 data: optional cached data for outputs, currently only data less than SECTION_LIMIT is allowed

		return:
		 max_write_time: maximum writing time
		 min_write_time: minimum writing time
		 avg_write_time: average writing time
		'''
        # Data prepare
        if output_per_rank > self.SECTION_LIMIT:
            raise ValueError(
                'Not support for {} MiB output per rank now'.format(
                    output_per_rank))
        if data == None:
            output_per_rank_in_bytes = output_per_rank << 20
            data = common.workload_generator(self.__mpi_rank,
                                             output_per_rank_in_bytes)

        output_blob_name = file_name + '{:0>5}'.format(self.__mpi_rank)

        MPI.COMM_WORLD.Barrier()
        start = MPI.Wtime()
        self.__storage_service.create_blob_from_bytes(container_name,
                                                      output_blob_name, data)
        end = MPI.Wtime()
        MPI.COMM_WORLD.Barrier()

        return common.collect_bench_metrics(end - start)
Пример #2
0
    def bench_outputs_with_multiple_files_multiple_writers(
            self,
            container_name,
            directory_name,
            file_name,
            output_per_rank,
            data=None):
        '''
		Benchmarking outputs with pattern `Multiple Files Multiple Writers`
		
		Each processes will access a single file within the same container exclusively.

		param:
		 container_name: target container base
		 directory_name: target directory
		 file_name: target file base, target file name is composed of file_name + '{:0>5}'.format(__mpi_rank)
		 output_per_rank: size of outputs per rank in MiB
		 data: optional cached data for outputs
		
		return:
		 max_write_time: maximum writing time
		 min_write_time: minimum writing time
		 avg_write_time: average writing time
		'''
        # Data prepare
        output_per_rank_in_bytes = output_per_rank << 20  # in bytes
        if data == None:
            data = common.workload_generator(self.__mpi_rank,
                                             output_per_rank_in_bytes)

        output_file_name = file_name + '{:0>5}'.format(self.__mpi_rank)

        MPI.COMM_WORLD.Barrier()
        start = MPI.Wtime()
        with open(output_file_name, 'wb') as f:
            f.write(data)
        end = MPI.Wtime()
        MPI.COMM_WORLD.Barrier()

        return common.collect_bench_metrics(end - start, 5)
Пример #3
0
def bench():
    # Configurations
    config = configparser.ConfigParser()
    config.read('config.ini')
    config_bench = config['BENCH']
    config_azure = config['AZURE']

    # MPI envs
    rank, size, proc_name = common.get_mpi_env()
    if bool(config_bench['show_mpi_env']):
        print('Rank {0} of {1}. Proc name:{2}'.format(rank, size, proc_name))
        print()

    # Bench specifications
    bench_items = config_bench['bench_items']
    bench_targets = config_bench['bench_targets']
    repeat_times = int(config_bench['repeat_time'])
    bench_pattern = config_bench['bench_pattern']

    # Bench infos
    account_name = config_azure['account_name']
    account_key = config_azure['account_key']
    container_name = config_azure['container_name']
    directory_name = config_azure['directory_name']
    file_name = config_azure['file_name']
    output_per_rank = int(config_bench['output_per_rank'])

    MPI.COMM_WORLD.Barrier()

    # Benchmarking
    if 0 == rank:
        print(
            'Bench Target: {0}, Bench Item: {1}, Bench Pattern:{2}, Bench repeat {3} times'
            .format(bench_targets, bench_items, bench_pattern, repeat_times))

    # Get tool
    if bench_targets == 'azure_blob':
        bench_tool = AzureBlobBench(account_name, account_key,
                                    [container_name])
    elif bench_targets == 'azure_file':
        bench_tool = AzureFileBench(account_name, account_key,
                                    [container_name])
    elif bench_targets == 'cirrus_lustre':
        bench_tool = CirrusLustreBench()
    else:
        bench_tool = BaseBench(None, None, [])

    if bench_items == 'input':
        if bench_pattern == 'SFMR':
            for _ in range(0, repeat_times):
                max_time, min_time, avg_time = bench_tool.bench_inputs_with_single_file_multiple_readers(
                    container_name, None, file_name)
                __print_metrics(max_time, min_time, avg_time)
        elif bench_pattern == 'MFMR':
            for _ in range(0, repeat_times):
                max_time, min_time, avg_time = bench_tool.bench_inputs_with_multiple_files_multiple_readers(
                    container_name, None, file_name)
                __print_metrics(max_time, min_time, avg_time)
        elif bench_pattern == 'MFMRMC':
            for _ in range(0, repeat_times):
                max_time, min_time, avg_time = bench_tool.bench_inputs_with_multiple_files_multiple_readers_multiple_containers(
                    container_name, None, file_name)
                __print_metrics(max_time, min_time, avg_time)
        else:
            raise NotImplementedError()
    elif bench_items == 'output':
        if bench_pattern == 'SFMW':
            data = common.workload_generator(rank, output_per_rank << 20)
            for _ in range(0, repeat_times):
                max_time, min_time, avg_time = bench_tool.bench_outputs_with_single_file_multiple_writers(
                    container_name, directory_name, file_name, output_per_rank,
                    data)
                __print_metrics(max_time, min_time, avg_time)
        elif bench_pattern == 'MFMW':
            data = common.workload_generator(rank, output_per_rank << 20)
            for _ in range(0, repeat_times):
                max_time, min_time, avg_time = bench_tool.bench_outputs_with_multiple_files_multiple_writers(
                    container_name,
                    directory_name,
                    file_name,
                    output_per_rank,
                    data=data)
                __print_metrics(max_time, min_time, avg_time)
        elif bench_pattern == 'MFMWMC':
            data = common.workload_generator(rank, output_per_rank << 20)
            for _ in range(0, repeat_times):
                max_time, min_time, avg_time = bench_tool.bench_outputs_with_multiple_files_multiple_writers_multiple_containers(
                    container_name,
                    directory_name,
                    file_name,
                    output_per_rank,
                    data=data)
                __print_metrics(max_time, min_time, avg_time)
        else:
            raise NotImplementedError()
Пример #4
0
	def bench_outputs_with_single_file_multiple_writers(self, container_name, directory_name, file_name, output_per_rank, data = None):
		'''
		Benchmarking outputs with pattern `Single File Multiple Writers`
		
		Each processes will access a single shared file in different sections exclusively.

		Data fro mdifferent rank is stored in different ranges

		The processes is:
		 1. Create the file with specified size
		 2. Each process update their range of File

		param:
		 container_name: target container
		 directory_name: target directory
		 file_name: target file
		 output_per_rank: size of outputs per rank in MiB
		 data: optional cached data for outputs
		
		return:
		 max_write_time: maximum writing time
		 min_write_time: minimum writing time
		 avg_write_time: average writing time
		'''
		# Data prepare
		output_per_rank_in_bytes = output_per_rank << 20 # in bytes
		if data == None:
			data = common.workload_generator(self.__mpi_rank, self.FILE_CHUNK_LIMIT_IN_BYTES)
		else:
			data = data[0:self.FILE_CHUNK_LIMIT_IN_BYTES - 1]
		data_last_chunk = data
		chunk_count = output_per_rank // self.FILE_CHUNK_LIMIT
		# Last chunk doesn't full
		if output_per_rank % self.FILE_CHUNK_LIMIT:
			chunk_count = chunk_count + 1
			data_last_chunk = common.workload_generator(self.__mpi_rank, (output_per_rank % self.FILE_CHUNK_LIMIT) << 20)

		# Step .1 File create
		create_start = 0
		create_end = 0
		if 0 == self.__mpi_rank:
			create_start = MPI.Wtime()
			self.__storage_service.create_file(container_name, directory_name, file_name, output_per_rank_in_bytes * self.__mpi_size)
			create_end = MPI.Wtime()
		create_time = create_end - create_start

		MPI.COMM_WORLD.Barrier()
		start = MPI.Wtime()
		for i in range(0, chunk_count):
			if i != (chunk_count - 1):
				start_range = self.__mpi_rank * output_per_rank_in_bytes + i * self.FILE_CHUNK_LIMIT_IN_BYTES
				end_range = start_range + len(data) - 1
				self.__storage_service.update_range(container_name, directory_name, file_name, data, start_range, end_range)
			elif i == (chunk_count - 1):
				start_range = self.__mpi_rank * output_per_rank_in_bytes + i * self.FILE_CHUNK_LIMIT_IN_BYTES
				end_range = start_range + len(data_last_chunk) - 1
				self.__storage_service.update_range(container_name, directory_name, file_name, data_last_chunk, start_range, end_range)
		end = MPI.Wtime()
		MPI.COMM_WORLD.Barrier()

		max_write, min_write, avg_write = common.collect_bench_metrics(end - start)
		max_write = round(max_write + create_time,3)	
		min_write = round(min_write + create_time,3)
		avg_write = round(avg_write + create_time,3)

		return max_write, min_write, avg_write
Пример #5
0
    def bench_outputs_with_single_file_multiple_writers(
            self,
            container_name,
            directory_name,
            file_name,
            output_per_rank,
            data=None):
        '''
		Benchmarking outputs with pattern `Single File Multiple Writers`
		
		Each processes will access a single shared file in different sections exclusively.

		Data from different rank is stored in different blocks

		Pattern of global block ids: 00002-00005, first section represents for the rank while the second section represents block id written by the rank

		The process is:
		1. Each rank write blocks to Azure
		2. MPI_Barrier() to wait for all ranks
		3. Get uncommited block list, rearrange for the order of data
		4. Commit changes

		param:
		 container_name: target container
		 directory_name: target directory
		 file_name: target file
		 output_per_rank: size of outputs per rank in MiB
		 data: optional cached data for outputs, in this case stands for data of a full block(100 MiB data)
		
		return:
		 max_write_time: maximum writing time
		 min_write_time: minimum writing time
		 avg_write_time: average writing time
		'''
        # Data prepare
        if data == None:
            data = common.workload_generator(self.__mpi_rank,
                                             self.BLOCK_LIMIT_IN_BYTES)
        else:
            data = data[0:self.BLOCK_LIMIT_IN_BYTES - 1]
        last_block_data = data
        block_count = output_per_rank // self.BLOCK_LIMIT
        # Last block doesn't full
        if output_per_rank % self.BLOCK_LIMIT:
            block_count = block_count + 1
            last_block_data = common.workload_generator(
                self.__mpi_rank, (output_per_rank % self.BLOCK_LIMIT) << 20)

        # Step.1 put blocks
        MPI.COMM_WORLD.Barrier()
        start = MPI.Wtime()
        for i in range(0, block_count):
            block_id = '{:0>5}-{:0>5}'.format(self.__mpi_rank, i)
            if i != (block_count - 1):
                self.__storage_service.put_block(container_name, file_name,
                                                 data, block_id)
            elif i == (block_count - 1):
                self.__storage_service.put_block(container_name, file_name,
                                                 last_block_data, block_id)
        end = MPI.Wtime()
        MPI.COMM_WORLD.Barrier()
        max_write, min_write, avg_write = common.collect_bench_metrics(end -
                                                                       start)

        if 0 == self.__mpi_rank:
            start_postprocessing = MPI.Wtime()
            # Step.3 get block list and sort according to block id
            block_list = self.__storage_service.get_block_list(
                container_name,
                file_name,
                block_list_type=blob.BlockListType.All).uncommitted_blocks
            block_list.sort(key=lambda block: block.id)

            # Step.4 commit
            self.__storage_service.put_block_list(container_name, file_name,
                                                  block_list)
            end_postprocessing = MPI.Wtime()

            postprocessing_time = end_postprocessing - start_postprocessing
            max_write = round(max_write + postprocessing_time, 3)
            min_write = round(min_write + postprocessing_time, 3)
            avg_write = round(avg_write + postprocessing_time, 3)

        return max_write, min_write, avg_write