def __init__(self, solver_file, snapshot = 0, gpu = 1, sync_freq=1, report=False, do_histogram=False): self.solver_file = solver_file self.snapshot = snapshot self.num_gpu = gpu self.sync_freq = sync_freq self.report = report self.do_histogram=do_histogram if owl.has_mpi(): self.gpu = [] if gpu == 1: #self.gpu += [owl.create_gpu_device(i) for i in range(owl.get_gpu_device_count())] nodes = [owl.get_mpi_device_count(i) for i in range(1,owl.get_mpi_node_count())] for n in range(len(nodes)): print "using {} gpu's on node {}\n".format(nodes[n],n+1) self.gpu += [owl.create_mpi_device(n+1,i+1) for i in range(nodes[n])] self.num_gpu = len(self.gpu) else: self.gpu += [owl.create_cpu_device()] self.gpu += [owl.create_mpi_device(n,0) for n in range(1,owl.get_mpi_node_count())] self.num_gpu = len(self.gpu) print "using {} cpu's over all nodes".format(self.num_gpu) else: if gpu == 1: self.gpu = [owl.create_gpu_device(i) for i in range(self.num_gpu)] self.num_gpu = len(self.gpu) print "using {} gpu devices".format(len(self.gpu)) else: self.gpu = [owl.create_cpu_device()] self.num_gpu = len(self.gpu) print "using {} cpus".format(len(self.gpu))
def __init__(self, data_file='mnist_all.mat', num_epochs=100, mb_size=256, eps_w=0.01, eps_b=0.01): self.cpu = owl.create_cpu_device() self.gpu = owl.create_gpu_device(0) self.data_file = data_file self.num_epochs=num_epochs self.mb_size=mb_size self.eps_w=eps_w self.eps_b=eps_b # init weight l1 = 784; l2 = 256; l3 = 10 self.l1 = l1; self.l2 = l2; self.l3 = l3 self.w1 = owl.randn([l2, l1], 0.0, math.sqrt(4.0 / (l1 + l2))) self.w2 = owl.randn([l3, l2], 0.0, math.sqrt(4.0 / (l2 + l3))) self.b1 = owl.zeros([l2, 1]) self.b2 = owl.zeros([l3, 1])
def test(self): # Expected cpu=owl.create_cpu_device() owl.set_device(cpu) img = np.arange(0,32, dtype=np.float32) #/32 img = np.reshape(img,[1,2,4,4]) expected = np.asarray([[[5,7], [13,15]], [[21,23], [29,31]]]) #/32.0 #expected = np.asarray([[[ 110.25, 124.25], # [ 166.25, 180.25]], # [[ 278.25, 324.25], # [ 462.25, 508.25]]]) # test owlimg = owl.from_numpy(img) pooler = owl.conv.Pooler(2,2,2,2) test = pooler.ff(owlimg) print 'Expected\n',expected print "Actual\n",test.to_numpy() print "This test must be run with a fractional bit width of 12" self.assertTrue(np.allclose(expected, test.to_numpy(), atol= 1.0/(1<<12)*4))
import owl import sys owl.initialize(sys.argv) owl.create_cpu_device() gpu0 = owl.create_gpu_device(0) gpu1 = owl.create_gpu_device(1) owl.set_device(gpu0)
import owl devices = [] devices.append(owl.create_cpu_device()) if owl.has_mpi(): n = owl.get_mpi_node_count() for i in range(1,n): id = owl.create_mpi_device(i,0) devices.append(id) owl.set_device(devices[-1])
if args.gpu == 1: usegpu = True devs = [] #devs = [owl.create_cpu_device()] print "enumerating devices" if usempi: nodes = owl.get_mpi_node_count() print "{} mpi nodes found".format(nodes) if usegpu: # devs += [owl.create_gpu_device(i) for i in range(owl.get_gpu_device_count())] # devs = [owl.create_cpu_device()] devs += [owl.create_mpi_device(i,d+1) for i in range(1,nodes) for d in range(owl.get_mpi_device_count(i))] print "Using {} MPI GPU's".format(len(devs)) else: devs = [owl.create_cpu_device()] devs += [owl.create_mpi_device(i,0) for i in range(1,nodes)] print "Using {} MPI nodes".format(len(devs)) else: if usegpu: print "Using available GPU's" devs += [owl.create_gpu_device(i) for i in range(owl.get_gpu_device_count())] else: print "Using CPU only" devs = [owl.create_cpu_device()] owl.set_device(devs[0]) print "Starting model creation" model = MNISTCNNModel() print "Starting random initialization" model.init_random() print "Starting training"