def gpu_dag(dag): """ The GPU version of a CPU dag - including gpu communication """ nc_dag, sent, recvd = non_comm_dag(dag) recvs = { cpu_to_gpu_var(var)[0].clone(): { 'fn': GpuFromHost(), 'args': (var, ) } for var, it in dag.items() if isrecv(it['fn']) } sends = { it['args'][0]: { 'fn': HostFromGpu(), 'args': (cpu_to_gpu_var(it['args'][0])[0].clone(), ) } for _, it in dag.items() if issend(it['fn']) } def gpu_item((k, v)): i, op, o = v['args'], v['fn'], (k, ) gi, gop, go = gpu_job(i, op, o) return (go[0], {'fn': gop, 'args': gi}) gdag = dict(map(gpu_item, nc_dag.items())) return merge(gdag, recvs, sends)
def gpu_dag_transfers(dag): """ Edges/jobs for moving data from gpu version dag to cpu version >>> dag = {c: {'fn': dot 'args': (a, b)}} >>> gpu_dag_transfers(dag) {c: {'fn': HostFromGpu(), 'args': (gpu_c,)}, gpu_a: {'fn': GpuFromHost(), 'args': (a, )}, gpu_b: {'fn': GpuFromHost(), 'args': (b, )}} """ recv_inputs = { cpu_to_gpu_var(var)[0].clone(): { 'fn': GpuFromHost(), 'args': (var, ) } for var in inputs_of(dag) if isinstance(var, theano.Variable) } send_outputs = { var: { 'fn': HostFromGpu(), 'args': (cpu_to_gpu_var(var)[0].clone(), ) } for var in outputs_of(dag) if isinstance(var, theano.Variable) } return merge(recv_inputs, send_outputs)
def local_gpua_row_switch(node): """ Detects eligible Switch instances and replaces them with a GPU row switch. """ if (node.op.__class__ == T.Elemwise and node.op.scalar_op.__class__ != theano.scalar.Switch): return False cond, ift, iff = node.inputs out, = node.outputs # Only applies to Switch instances where a vector mask broadcasts over # matrices. bcast = cond.broadcastable if not bcast or not (not bcast[0] and all(bcast[1:]) and ift.ndim in [2, 3]): return False if not (ift.dtype == iff.dtype == "float32"): return False if cond.owner and isinstance(cond.owner.op, HostFromGpu): gpu_cond, = cond.owner.inputs else: gpu_cond = as_cuda_ndarray_variable(T.cast(cond.flatten(), "float32")) if ift.owner and isinstance(ift.owner.op, HostFromGpu): gpu_ift, = ift.owner.inputs else: gpu_ift = as_cuda_ndarray_variable(ift) if iff.owner and isinstance(iff.owner.op, HostFromGpu): gpu_iff, = iff.owner.inputs else: gpu_iff = as_cuda_ndarray_variable(iff) gpu_op = GpuRowSwitch() return [HostFromGpu()(gpu_op(cond, gpu_ift, gpu_iff))]
def _as_TensorVariable(self): return HostFromGpu()(self)