def create_node(name: str, cpus: int, mem: str, labels=Dict[str, str]) -> Node: capacity = Capacity(cpu_millis=cpus * 1000, memory=parse_size_string(mem)) allocatable = Capacity(cpu_millis=cpus * 1000, memory=parse_size_string(mem)) return Node(name, capacity=capacity, allocatable=allocatable, labels=labels)
def initialize_data(self, env: FaasSimEnvironment, deployments): for i, _, _, _ in deployments: bucket = f'bucket_{i}' raw_data = DataItem(bucket, 'raw_data', parse_size_string('12Mi')) train_data = DataItem(bucket, 'train_data', parse_size_string('209Mi')) model = DataItem(bucket, 'model', parse_size_string('1500Ki')) env.cluster.storage_index.put(raw_data) env.cluster.storage_index.put(train_data) env.cluster.storage_index.put(model)
def simulate_data_upload(env: FaasSimEnvironment, replica: FunctionReplica): node = replica.node func = replica.function started = env.now if 'data.skippy.io/sends-to-storage' not in func.pod.spec.labels: return # FIXME: storage size = parse_size_string(func.pod.spec.labels['data.skippy.io/sends-to-storage']) path = func.pod.spec.labels['data.skippy.io/sends-to-storage/path'] storage_node_name = env.cluster.get_storage_nodes(path)[0] logger.debug('%.2f replica %s uploading data %s to %s', env.now, node, path, storage_node_name) if storage_node_name == node.name: # FIXME this is essentially a disk read and not a network connection yield env.timeout(size / 1.25e+8) # 1.25e+8 = 1 GBit/s return storage_node = env.cluster.get_node(storage_node_name) route = env.topology.get_route(node, storage_node) flow = SafeFlow(env, size, route) yield flow.start() for hop in route.hops: env.metrics.log_network(size, 'data_upload', hop) env.metrics.log_flow(size, env.now - started, route.source, route.destination, 'data_upload')
def setup(self, env: Environment): containers: docker.ContainerRegistry = env.container_registry # populate the global container registry with images containers.put( ImageProperties('python-pi-cpu', parse_size_string('58M'), arch='arm32')) containers.put( ImageProperties('python-pi-cpu', parse_size_string('58M'), arch='x86')) containers.put( ImageProperties('python-pi-cpu', parse_size_string('58M'), arch='aarch64')) containers.put( ImageProperties('resnet50-inference-cpu', parse_size_string('56M'), arch='arm32')) containers.put( ImageProperties('resnet50-inference-cpu', parse_size_string('56M'), arch='x86')) containers.put( ImageProperties('resnet50-inference-cpu', parse_size_string('56M'), arch='aarch64')) containers.put( ImageProperties('resnet50-inference-gpu', parse_size_string('56M'), arch='arm32')) containers.put( ImageProperties('resnet50-inference-gpu', parse_size_string('56M'), arch='x86')) containers.put( ImageProperties('resnet50-inference-gpu', parse_size_string('56M'), arch='aarch64')) # log all the images in the container for name, tag_dict in containers.images.items(): for tag, images in tag_dict.items(): logger.info('%s, %s, %s', name, tag, images)
def create_container(container: V1Container) -> Container: name = container.image resources = None if container.resources.requests is not None: resources = ResourceRequirements() resources.requests = dict( zip(container.resources.requests.keys(), [ parse_size_string(value) for value in container.resources.requests.values() ])) return Container(name, resources)
def create_node(v1node: V1Node) -> Node: name = v1node.metadata.name labels = v1node.metadata.labels cpu_millis = int(v1node.status.capacity['cpu']) * 1000 memory = parse_size_string(v1node.status.capacity['memory']) capacity = Capacity(cpu_millis, memory) allocatable = Capacity(cpu_millis, memory) return Node(name=name, labels=labels, capacity=capacity, allocatable=allocatable)
def __init__(self, filename): csvs = glob.glob(filename) dfs = [pd.read_csv(filename) for filename in csvs] df = pd.concat(dfs) # Filter failed ones (training on pi) df = df.loc[df['status'].isin(['passed'])] # Transform the bandwidth to Bytes/s df['bandwidth'] = df['bandwidth'].apply(lambda x: eval(x)) # Assume 10 GBit for no limit df['bandwidth'] = df['bandwidth'].apply( lambda x: 1.25e+8 if x is None else parse_size_string(f'{x.mbit}M') / 8) # Transform the hostname to only contain the type (cloud, tegra, pi) df['host'] = df['host'].apply(lambda x: make_tuple(x)[0][:-1]) self.dataset = df
def estimate(self, context: ClusterContext, pod: Pod, scheduling_result: SchedulingResult) -> Tuple[str, str]: if scheduling_result is None or scheduling_result.suggested_host is None: return 'bandwidth_usage', None # Calculate the image pull bandwidth bandwidth_usage = 0 node = scheduling_result.suggested_host for image_name in scheduling_result.needed_images: try: image_state: ImageState = context.images_on_nodes[ node.name][image_name] bandwidth_usage += image_state.size[ node.labels['beta.kubernetes.io/arch']] except KeyError: pass # Add the storage data usage bandwidth_usage += parse_size_string( pod.spec.labels.get('data.skippy.io/receives-from-storage', '0')) bandwidth_usage += parse_size_string( pod.spec.labels.get('data.skippy.io/sends-to-storage', '0')) return 'bandwidth_usage', str(bandwidth_usage)
def create_pod(cnt: int, image_name: str, memory: str = None, cpu: int = None, labels: Dict[str, str] = None) -> Pod: spec = PodSpec() resource_requirements = ResourceRequirements() if memory: resource_requirements.requests['memory'] = parse_size_string(memory) if cpu: resource_requirements.requests['cpu'] = cpu container = Container(image_name, resource_requirements) spec.containers = [container] spec.labels = labels pod = Pod('pod-{0}'.format(cnt), 'openfaas-fn') pod.spec = spec return pod