def f(maker): def local_opt(node): dev = theano.sandbox.gpuarray.init_dev.device if cuda_only and not dev.startswith('cuda'): return if type(node.op) in OP: # Either one of our inputs is on the gpu or # all of our client are on the gpu if (any([i.owner and i.owner.op == host_from_gpu for i in node.inputs]) or all([c != 'output' and c.op == gpu_from_host for c, idx in node.outputs[0].clients])): new_op = maker(node) # This is needed as sometimes new_op inherit from OP. if new_op and new_op != node.op: if isinstance(new_op, theano.Op): return [safe_to_cpu(o) for o in new_op(*node.inputs, return_list=True)] elif isinstance(new_op, (tuple, list)): return [safe_to_cpu(o) for o in new_op] else: # suppose it is a variable on the GPU return [host_from_gpu(new_op)] return False local_opt.__name__ = maker.__name__ return local_optimizer(OP)(local_opt)
def f(maker): def local_opt(node): if type(node.op) is OP: # This does not support nodes that have more than one output. assert len(node.outputs) == 1 # either one of our inputs is on the gpu or # all of our client are on the gpu if (any([ i.owner and i.owner.op == host_from_gpu for i in node.inputs ]) or all([ c != 'output' and c.op == gpu_from_host for c, idx in node.outputs[0].clients ])): new_op = maker(node) # This is needed as sometimes new_op inherit from OP. if new_op and new_op != node.op: if isinstance(new_op, theano.Op): return [host_from_gpu(new_op(*node.inputs))] else: # suppose it is a variable on the GPU return [host_from_gpu(new_op)] return False local_opt.__name__ = maker.__name__ return local_optimizer([OP])(local_opt)
def f(maker): def local_opt(node): if type(node.op) in OP: # Either one of our inputs is on the gpu or # all of our clients are on the gpu replace = False # TODO: Maybe set context_name with infer_context_name()? context_name = None # We replace if any input is a host_from_gpu for i in node.inputs: if i.owner and i.owner.op == host_from_gpu: context_name = i.owner.inputs[0].type.context_name replace = True break if not replace: # We replace if *all* clients are on the GPU clients = [c for o in node.outputs for c in o.clients] replace = len(clients) != 0 for c, idx in clients: if (c == 'output' or not isinstance(c.op, GpuFromHost)): replace = False # TODO: check that the clients want the same context? if replace: # All clients are GpuFromHost and we have at least one context_name = clients[0][0].op.context_name # Check if we should replace if (not replace or (cuda_only and get_context(context_name).kind != 'cuda')): return False new_op = maker(node, context_name) # This is needed as sometimes new_op inherits from OP. if new_op and new_op != node.op: if isinstance(new_op, theano.Op): # tag the inputs with the context in case # the context was derived from the outputs def tag(i, ctx): i.tag.context_name = ctx return i inputs = [tag(i, context_name) for i in node.inputs] return [ safe_to_cpu(o) for o in new_op(*inputs, return_list=True) ] elif isinstance(new_op, (tuple, list)): return [safe_to_cpu(o) for o in new_op] else: # suppose it is a variable on the GPU return [host_from_gpu(new_op)] return False local_opt.__name__ = maker.__name__ return local_optimizer(OP)(local_opt)
def f(maker): def local_opt(node): if type(node.op) in OP: # Either one of our inputs is on the gpu or # all of our clients are on the gpu replace = False # TODO: Maybe set context_name with infer_context_name()? context_name = None # We replace if any input is a host_from_gpu for i in node.inputs: if i.owner and i.owner.op == host_from_gpu: context_name = i.owner.inputs[0].type.context_name replace = True break if not replace: # We replace if *all* clients are on the GPU clients = [c for o in node.outputs for c in o.clients] replace = len(clients) != 0 for c, idx in clients: if (c == 'output' or not isinstance(c.op, GpuFromHost)): replace = False # TODO: check that the clients want the same context? if replace: # All clients are GpuFromHost and we have at least one context_name = clients[0][0].op.context_name # Check if we should replace if (not replace or (cuda_only and get_context(context_name).kind != 'cuda')): return False new_op = maker(node, context_name) # This is needed as sometimes new_op inherits from OP. if new_op and new_op != node.op: if isinstance(new_op, theano.Op): # tag the inputs with the context in case # the context was derived from the outputs def tag(i, ctx): i.tag.context_name = ctx return i inputs = [tag(i, context_name) for i in node.inputs] return [safe_to_cpu(o) for o in new_op(*inputs, return_list=True)] elif isinstance(new_op, (tuple, list)): return [safe_to_cpu(o) for o in new_op] else: # suppose it is a variable on the GPU return [host_from_gpu(new_op)] return False local_opt.__name__ = maker.__name__ return local_optimizer(OP)(local_opt)
def f(maker): def local_opt(node): if type(node.op) is OP: # This does not support nodes that have more than one output. assert len(node.outputs) == 1 # either one of our inputs is on the gpu or # all of our client are on the gpu if (any([i.owner and i.owner.op == host_from_gpu for i in node.inputs]) or all([c != 'output' and c.op == gpu_from_host for c, idx in node.outputs[0].clients])): new_op = maker(node) # This is needed as sometimes new_op inherit from OP. if new_op and new_op != node.op: if isinstance(new_op, theano.Op): return [host_from_gpu(new_op(*node.inputs))] else: # suppose it is a variable on the GPU return [host_from_gpu(new_op)] return False local_opt.__name__ = maker.__name__ return local_optimizer([OP])(local_opt)
def f(maker): def local_opt(node): if type(node.op) in OP: # Either one of our inputs is on the gpu or # all of our client are on the gpu if any([i.owner and i.owner.op == host_from_gpu for i in node.inputs]) or all( [c != "output" and c.op == gpu_from_host for c, idx in node.outputs[0].clients] ): new_op = maker(node) # This is needed as sometimes new_op inherit from OP. if new_op and new_op != node.op: if isinstance(new_op, theano.Op): return [host_from_gpu(o) for o in new_op(*node.inputs, return_list=True)] elif isinstance(new_op, (tuple, list)): return [host_from_gpu(o) for o in new_op] else: # suppose it is a variable on the GPU return [host_from_gpu(new_op)] return False local_opt.__name__ = maker.__name__ return local_optimizer(OP)(local_opt)
# add CPU TO GPU merge #@register_specialize #@local_optimizer([LargeSparseTargets]) def local_large_sparse_targets_gpu(node): if not isinstance(node.op, LargeSparseTargets) or theano.config.device == "cpu": return False if node.op.what_to_output == 0: return [GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs)] elif node.op.what_to_output == 1: return [host_from_gpu(GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs))] else: out = GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs) return [out[0], host_from_gpu(out[1])] optdb.register("local_large_sparse_targets_gpu", TopoOptimizer(local_optimizer([LargeSparseTargets])(local_large_sparse_targets_gpu)), 49, "fast_run") def optimize_large_sparse_target(inputs, H, outputs, updates): """ TODO: WRITEME """ # need to rewrite MergeLargeSparseTargetOps because there will be multiple # updates containing gradH! if not isinstance(updates, OrderedDict): raise ValueError("Updates needs to be OrderedDict otherwise keys, and" " values may not match after optimization") fgraph = gof.FunctionGraph(inputs,
if node.op.what_to_output == 0: return [GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs)] elif node.op.what_to_output == 1: return [ host_from_gpu( GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs)) ] else: out = GpuLargeSparseTargets(node.op.what_to_output)(*node.inputs) return [out[0], host_from_gpu(out[1])] optdb.register( "local_large_sparse_targets_gpu", TopoOptimizer( local_optimizer([LargeSparseTargets])(local_large_sparse_targets_gpu)), 49, "fast_run") def optimize_large_sparse_target(inputs, H, outputs, updates): """ TODO: WRITEME """ # need to rewrite MergeLargeSparseTargetOps because there will be multiple # updates containing gradH! if not isinstance(updates, OrderedDict): raise ValueError("Updates needs to be OrderedDict otherwise keys, and" " values may not match after optimization")
def f(maker): def local_opt(node): if type(node.op) in OP: # Either one of our inputs is on the gpu or # all of our clients are on the gpu replace = False # TODO: Maybe set context_name with infer_context_name()? context_name = None # We replace if any input is a host_from_gpu for i in node.inputs: if i.owner and i.owner.op == host_from_gpu and move_to_gpu( i): context_name = i.owner.inputs[0].type.context_name replace = True break if not replace: # We replace if *all* clients are on the GPU clients = [c for o in node.outputs for c in o.clients] replace = len(clients) != 0 for c, idx in clients: if c == "output" or not isinstance(c.op, GpuFromHost): replace = False # TODO: check that the clients want the same context? if replace: # All clients are GpuFromHost and we have at least one context_name = clients[0][0].op.context_name # Check if we should replace if (not replace or (cuda_only and get_context(context_name).kind != b"cuda") or any([ "complex" in getattr(i, "dtype", "") for i in node.inputs ])): return False # tag the inputs with the context in case # the context was derived from the outputs for i in node.inputs: i.tag.context_name = context_name new_op = maker(node.op, context_name, node.inputs, node.outputs) # This is needed as sometimes new_op inherits from OP. if new_op and new_op != node.op: if isinstance(new_op, Op): new_outputs = new_op(*node.inputs, return_list=True) to_cpu_fn = safe_to_cpu elif isinstance(new_op, (tuple, list)): new_outputs = new_op to_cpu_fn = safe_to_cpu else: # suppose it is a variable on the GPU new_outputs = [new_op] def to_cpu_fn(x): return x.transfer("cpu") # copy stack traces onto gpu outputs # also copy the stack traces onto HostFromGpu outputs on_cpu = [] for old_output, new_output in zip(node.outputs, new_outputs): copy_stack_trace(old_output, new_output) cpu = to_cpu_fn(new_output) on_cpu.append(cpu) copy_stack_trace(old_output, cpu) return on_cpu return False local_opt.__name__ = maker.__name__ return local_optimizer(OP)(local_opt)