def generate(cls, graph: Graph, **kwargs): if flags.DEBUG: traverse.dump(graph) memory_layout = allocate(graph) console.debug( f"[FallbackDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}" ) console.debug( f"[FallbackDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}" ) console.debug( f"[FallbackDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}" ) constant_encoder = ConstantEncoder.get_encoder( kwargs.get("constant_encoder_name", None)) constants_bytes = constant_encoder.encode(memory_layout) console.debug( f"[FallbackDescriptorGenerator] constants encoded size: {len(constants_bytes)}" ) descriptor = GraphDescriptor(kernels=cls.generate_kernels( graph, memory_layout), memory_layout=memory_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, licenses=graph.licenses) return GraphExecutionData(graph, descriptor, constants_bytes)
def generate(cls, graph: Graph, **kwargs): graph, _ = WebGLOptimizeRule().optimize(graph) if flags.DEBUG: traverse.dump(graph) with open("cg.dot", "w") as f: f.write(traverse.dump_dot(graph)) memory_layout = allocate(graph) constants_map = {} for constant in traverse.filter_nodes(traverse.listup_nodes(graph), ConstantVariable): # type: ConstantVariable constants_map[constant.name] = { "byte_offset": memory_layout[constant].offset * 4, "size": constant.size } constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None)) constants_bytes = constant_encoder.encode(memory_layout) kernels = cls.generate_kernels(graph) descriptor = GraphDescriptor( kernels=kernels, memory_layout=memory_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, constants_map=constants_map, licenses=graph.licenses ) return GraphExecutionData(graph, descriptor, constants_bytes)
def generate(cls, graph: Graph, **kwargs): graph, _ = WebGPUOptimizeRule().optimize(graph) if flags.DEBUG: traverse.dump(graph) memory_layout = allocate(graph) console.debug(f"[WebGPUDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}[B]") console.debug(f"[WebGPUDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}[B]") console.debug(f"[WebGPUDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}[B]") constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None)) constants_bytes = constant_encoder.encode(memory_layout) console.debug(f"[WebGPUDescriptorGenerator] constants encoded size: {len(constants_bytes)}[B]") kernels = cls.generate_kernels(graph, memory_layout) descriptor = GraphDescriptor( kernels=kernels, memory_layout=memory_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, licenses=graph.licenses ) if flags.optimize.VALIDATE_GENERATED_SOURCE: validate_kernel_source(descriptor) return GraphExecutionData(graph, descriptor, constants_bytes)
def main(): sys.setrecursionlimit(10000) parser = argparse.ArgumentParser() parser.add_argument("--model", default="resnet50", choices=["resnet50"]) parser.add_argument('--out', '-o', default='output_tensorflow', help='Directory to output the graph descriptor') parser.add_argument("--encoding", help="name of weight encoder") parser.add_argument("--backend", default="webgpu,webgl,webassembly,fallback", help="backend") args = parser.parse_args() os.makedirs(args.out, exist_ok=True) slim_dir = os.path.join(args.out, "models/slim") if not os.path.exists(slim_dir): clone_slim(args.out) model_path = download_model(args.out) sys.path.append(slim_dir) from nets import resnet_v1 image_size = resnet_v1.resnet_v1.default_image_size checkpoints_dir = args.out sess = tf.Session() processed_images = tf.placeholder(tf.float32, [1, image_size, image_size, 3]) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, _ = resnet_v1.resnet_v1_50(processed_images, num_classes=1000, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn(model_path, slim.get_model_variables()) init_fn(sess) graph = TensorFlowConverter(sess, batch_size=1).convert([processed_images], [probabilities]) from webdnn.graph import traverse traverse.dump(graph) for backend in args.backend.split(","): graph_exec_data = generate_descriptor( backend, graph, constant_encoder_name=args.encoding) graph_exec_data.save(args.out) console.stderr("Done.")
def optimize(self, graph: Graph): traverse.dump(graph) MAX_SIZE = config.WEBGL_MAX_TEXTURE_SIZE for v in traverse.listup_variables(graph): height, width = TextureShape.get(v) assert height <= MAX_SIZE and width <= MAX_SIZE, f"Texture size is invalid: {v.name} \n" \ f" (variable shape)={v.shape}, \n" \ f" (channel mode)={ChannelMode.get(v).name}, \n" \ f" (texture shape)=(width={width}, height={height}), \n" \ f" (WEBGL_MAX_TEXTURE_SIZE)={config.WEBGL_MAX_TEXTURE_SIZE}" return graph, False
def generate(cls, graph: Graph, **kwargs): graph, _ = WebassemblyOptimizeRule().optimize(graph) if flags.DEBUG: traverse.dump(graph) memory_layout = Allocator.allocate(graph) console.debug( f"[WebassemblyDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}" ) console.debug( f"[WebassemblyDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}" ) console.debug( f"[WebassemblyDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}" ) constant_encoder = ConstantEncoder.get_encoder( kwargs.get("constant_encoder_name", None)) constants_bytes = constant_encoder.encode(memory_layout) console.debug( f"[WebassemblyDescriptorGenerator] constants encoded size: {len(constants_bytes)}" ) kernels = cls.generate_kernels(graph, memory_layout) heap_block_size = 16 * 1024 * 1024 if isinstance(memory_layout.dynamic_size, int): dynamic_size_byte_int = memory_layout.dynamic_size * 4 else: dynamic_size_byte_int = kwargs.get("dynamic_allocation_size", heap_block_size) total_size_byte = memory_layout.static_size * 4 + dynamic_size_byte_int # required for calculation (size ceiling to one block) + one block required_heap = ( (total_size_byte + heap_block_size - 1) // heap_block_size + 1) * heap_block_size descriptor = GraphDescriptor(kernels=kernels, memory_layout=memory_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, required_heap=required_heap, licenses=graph.licenses) return GraphExecutionData(graph, descriptor, constants_bytes)
def optimize(self, graph: Graph): traverse.dump(graph) MAX_SIZE = config.WEBGL_MAX_TEXTURE_SIZE for v in traverse.listup_variables(graph): if not Placeholder.check_resolved(v.size): continue height, width = TextureShape.get(v) assert height <= MAX_SIZE and width <= MAX_SIZE, f""" [SplitTexture] Texture size is invalid: {v.name} (variable shape)={v.shape} (channel mode)={ChannelMode.get(v).name} (texture shape)=(width={width}, height={height}) (WEBGL_MAX_TEXTURE_SIZE)={config.WEBGL_MAX_TEXTURE_SIZE}""" return graph, False
def generate(graph: Graph, constant_encoder_name: str = None) -> GraphExecutionData: graph, _ = WebGPUOptimizeRule().optimize(graph) if flags.DEBUG: traverse.dump(graph) variables_layout, constants_layout, constants_data = Allocator.allocate( graph) constant_encoder = ConstantEncoder.get_encoder(constant_encoder_name) constants_bytes = constant_encoder.encode(constants_layout, constants_data) if flags.DEBUG: print( f"[GraphDescriptorGeneratorWebGPU] allocated constant-buffer size: {constants_layout.size * 4} [Byte]" ) print( f"[GraphDescriptorGeneratorWebGPU] encoded constant-buffer size: {len(constants_bytes)} [Byte]" ) print( f"[GraphDescriptorGeneratorWebGPU] allocated variable-buffer size: {variables_layout.size * 4} [Byte]" ) kernels = generate_kernels(graph, constants_layout, variables_layout) descriptor = GraphDescriptor(kernels=kernels, constants_layout=constants_layout, variables_layout=variables_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, licenses=graph.licenses) if flags.optimize.VALIDATE_GENERATED_SOURCE: if flags.DEBUG: print( "[GraphDescriptorGeneratorWebGPU] validate generated kernel source" ) validate_kernel_source(descriptor) return GraphExecutionData(descriptor, constants_bytes)
def generate(graph: Graph, constant_encoder_name: str = None) -> GraphExecutionData: graph, _ = WebassemblyOptimizeRule().optimize(graph) if flags.DEBUG: traverse.dump(graph) variables_layout, constants_layout, constants_data = Allocator.allocate( graph) if flags.DEBUG: print( f"[GraphDescriptorGeneratorWebassembly] constants_layout total size: {constants_data.size} * sizeof(float)" ) print( f"[GraphDescriptorGeneratorWebassembly] variables_layout total size: {variables_layout.size} * sizeof(float)" ) constant_encoder = ConstantEncoder.get_encoder(constant_encoder_name) constants_bytes = constant_encoder.encode(constants_layout, constants_data) if flags.DEBUG: print( f"[GraphDescriptorGeneratorWebGPU] constants encoded size: {len(constants_bytes)}" ) kernels = generate_kernels(graph, constants_layout, variables_layout) weight_data_size = (variables_layout.size + constants_layout.size) * 4 # sizeof(float) required_heap = (int(weight_data_size // (16 * 1048576)) + 2) * 16 * 1048576 # required + 16MB descriptor = GraphDescriptor(kernels=kernels, constants_layout=constants_layout, variables_layout=variables_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, required_heap=required_heap, licenses=graph.licenses) return GraphExecutionData(descriptor, constants_bytes)
def generate(cls, graph: Graph, **kwargs): data_dict = {} # type: Dict[int, Tuple[GraphDescriptor, bytes]] original_graph = graph for max_texture_size in [4096, 8192, 16384]: config.WEBGL_MAX_TEXTURE_SIZE = max_texture_size graph, _ = WebGLOptimizeRule().optimize(copy.deepcopy(original_graph)) if flags.DEBUG: traverse.dump(graph) memory_layout = allocate(graph) constants_map = {} for constant in traverse.filter_nodes(traverse.listup_nodes(graph), ConstantVariable): # type: ConstantVariable constants_map[constant.name] = { "byte_offset": memory_layout[constant].offset * 4, "size": constant.size } constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None)) constants_bytes = constant_encoder.encode(memory_layout) kernels = cls.generate_kernels(graph) descriptor = GraphDescriptor( kernels=kernels, memory_layout=memory_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, constants_map=constants_map, licenses=graph.licenses ) data_dict[max_texture_size] = (descriptor, constants_bytes) return GraphExecutionData(graph, data_dict)
def main(): sys.setrecursionlimit(10000) # workaround for deep copying large graph parser = argparse.ArgumentParser() parser.add_argument("kerasmodel") parser.add_argument("--backend", default="webgpu,webgl,webassembly,fallback", help="comma-separated list of backends") parser.add_argument( "--input_shape", required=True, action="append", help= "shape of blobs for inputs (example: '(1,3,224,224)'), can be specified multiple times" ) # parser.add_argument("--input_data_format", choices=["channels_first", "channels_last"]) parser.add_argument( "--out", help="output directory (default: <model>/webdnn_graph_descriptor)") parser.add_argument("--encoding", help="name of weight encoder") parser.add_argument("--visualize_ir", action="store_true") parser.add_argument( "--plugin", action="append", help="plugin python files which are imported before transpiling") args = parser.parse_args() console.stderr(f"[{path.basename(__file__)}] Generating feedforward graph") class_list = [] if args.plugin: for plugin_path in args.plugin: class_list += _load_plugin(plugin_path) custom_objects = {} if len(class_list) > 0: # custom_objects is a dictionary for load_model to load user-defined custom layers for k, v in class_list: custom_objects[k] = v input_shapes = [ Shape.parse(input_shape)[0] for input_shape in args.input_shape ] model = keras.models.load_model(args.kerasmodel, custom_objects=custom_objects, compile=False) model.build(input_shape=None) converter = KerasConverter(batch_size=Placeholder(label='N')) graph = converter.convert(model) traverse.dump(graph) for graph_input, input_shape in zip(graph.inputs, input_shapes): for p1, p2 in zip(graph_input.shape, input_shape): if not Placeholder.check_resolved( p1) and Placeholder.check_resolved(p2): p1.value = Placeholder.force_int(p2) elif Placeholder.check_resolved( p1) and not Placeholder.check_resolved(p2): raise ValueError( f'Shape mismatch: expected:{input_shape}, real:{graph_input.shape}, {p1} != {p2}' ) elif Placeholder.check_resolved(p1) and Placeholder.check_resolved( p2): assert p1 == p2, f'Shape mismatch: expected:{input_shape}, real:{graph_input.shape}, {p1} != {p2}' if args.out: output_dir = args.out else: output_dir = path.join(path.dirname(args.kerasmodel), "webdnn_graph_descriptor") os.makedirs(output_dir, exist_ok=True) if args.visualize_ir: ir_dot_path = path.join(output_dir, "ir.dot") with open(ir_dot_path, "w") as f: f.write(dump_dot(graph)) console.stderr( f"IR graph can be visualized with graphviz command: 'dot {ir_dot_path} -T png -o output.png'" ) console.stderr(f"[{path.basename(__file__)}] Generating graph descriptor") any_backend_failed = False backends = args.backend.split(",") for i, backend in enumerate(backends): console.stderr( f"[{path.basename(__file__)}] BackendName: {console.colorize(backend, console.Color.Cyan)}" ) try: graph_exec_data = generate_descriptor( backend, graph, constant_encoder_name=args.encoding) graph_exec_data.save(output_dir) except Exception as ex: if flags.DEBUG: raise ex any_backend_failed = True console.error( f"[{path.basename(__file__)}] Failed generating descriptor for {backend} backend" ) console.stderr(traceback.format_exc()) continue if any_backend_failed: exit(1)