def caluculateGrapgh( x, target, n_epoch = 10,batchsize = 10,using_gpu = False, kf_value = 5): if using_gpu == True: import cupy as xp else: import numpy as xp sum_loss_all, sum_accuracy_all = [],[] y_trueall,y_pridictall = [],[] vmats0, vmats1, vmats2 = [],[],[] skf = StratifiedKFold(target, n_folds=kf_value,shuffle = False) y_all = target.astype(np.int32) modellist = [] for train,test in skf: accuracylist = [] loss_list = [] NNmodel = Classifier(graph_polarity_metohd()) if using_gpu == True: NNmodel.to_gpu() optimizer = optimizers.Adam() optimizer.setup(NNmodel) first_time = time.time() x_train = x.astype(np.float32)[train] x_test = x.astype(np.float32)[test] y_train = y_all[train] y_test = y_all[test] datasize = len(y_train) for epoch in range(n_epoch): print NNmodel.predictor.W_graph.W.data.sum() indexes = np.random.permutation(datasize) loss_sum = 0 accuracy_sum = 0 for i in range(0, datasize, batchsize): x_batch = chainer.Variable(xp.array(x_train[indexes[i : i + batchsize]])) y_batch = chainer.Variable(xp.array(y_train[indexes[i : i + batchsize]])) NNmodel.zerograds() loss = NNmodel(x_batch, y_batch) loss_sum += loss.data * len(y_batch) accuracy_sum += NNmodel.accuracy.data * len(y_batch) loss.backward() optimizer.update() print "epoch:", epoch, "time:", time.time() - first_time , "training_loss:", loss_sum/len(y_train) loss_sum_test = 0 accuracy_sum_test = 0 for i in range(0, len(y_test), batchsize): x_batch_test = chainer.Variable(xp.array(x_test[i : i + batchsize])) y_batch_test = chainer.Variable(xp.array(y_test[i : i + batchsize])) loss = NNmodel(x_batch_test, y_batch_test) loss_sum_test += loss.data * len(y_batch_test) accuracy_sum_test += NNmodel.accuracy.data * len(y_batch_test) print "test_loss:", loss_sum_test/len(y_test), "accuracy_test", accuracy_sum_test/len(y_test) accuracylist.append(float(accuracy_sum_test/len(y_test))) loss_list.append(float(loss_sum_test/len(y_test))) if using_gpu == True: NNmodel.to_cpu() modellist.append(NNmodel) sum_loss_all.append(loss_list) sum_accuracy_all.append(accuracylist) return modellist, sum_accuracy_all, sum_loss_all
def testInitWithRadius(self): """ Verifies you can use radius to specify a log encoder """ # Create the encoder le = LogEncoder(w=1, radius=1, minval=1, maxval=10000, name="amount", forced=True) self.assertEqual(le.encoder.n, 5) # Verify a a couple powers of 10 are encoded as expected value = 1.0 output = le.encode(value) expected = [1, 0, 0, 0, 0] # Convert to numpy array expected = cupy.array(expected, dtype="uint8") self.assertTrue(numpy.array_equal(output, expected)) value = 100.0 output = le.encode(value) expected = [0, 0, 1, 0, 0] # Convert to numpy array expected = cupy.array(expected, dtype="uint8") self.assertTrue(numpy.array_equal(output, expected))
def test_cupy_indices_integer_array(self): shape = (2, 3) a = cupy.zeros(shape) indexes = cupy.array([0, 1]) a[:, indexes] = cupy.array(1.) testing.assert_array_equal( a, cupy.array([[1., 1., 0.], [1., 1., 0.]]))
def test_backward_case1(self): vertices = [ [-0.9, -0.9, 2.], [-0.8, 0.8, 1.], [0.8, 0.8, 0.5]] faces = [[0, 1, 2]] renderer = neural_renderer.Renderer() renderer.image_size = 64 renderer.anti_aliasing = False renderer.perspective = False renderer.camera_mode = 'none' vertices = cp.array(vertices, 'float32') faces = cp.array(faces, 'int32') vertices, faces = utils.to_minibatch((vertices, faces)) vertices = chainer.Variable(vertices) images = renderer.render_depth(vertices, faces) loss = cf.sum(cf.square(images[0, 15, 20] - 1)) loss.backward() grad = vertices.grad.get() grad2 = np.zeros_like(grad) for i in range(3): for j in range(3): eps = 1e-3 vertices2 = vertices.data.copy() vertices2[i, j] += eps images = renderer.render_depth(vertices2, faces) loss2 = cf.sum(cf.square(images[0, 15, 20] - 1)) grad2[i, j] = ((loss2 - loss) / eps).data.get() chainer.testing.assert_allclose(grad, grad2, atol=1e-3)
def test_scatter_add_cupy_arguments(self, dtype): shape = (2, 3) a = cupy.zeros(shape, dtype) slices = (cupy.array([1, 1]), slice(None)) a.scatter_add(slices, cupy.array(1.)) testing.assert_array_equal( a, cupy.array([[0., 0., 0.], [2., 2., 2.]], dtype))
def test_cupy_indices_boolean_array(self): shape = (2, 3) a = cupy.zeros(shape) indexes = cupy.array([True, False]) a[indexes] = cupy.array(1.) testing.assert_array_equal( a, cupy.array([[1., 1., 1.], [0., 0., 0.]]))
def test_scatter_add_cupy_arguments_mask(self, dtype): shape = (2, 3) a = cupy.zeros(shape, dtype) slices = (cupy.array([True, False]), slice(None)) a.scatter_add(slices, cupy.array(1.)) testing.assert_array_equal( a, cupy.array([[1., 1., 1.], [0., 0., 0.]], dtype))
def testGetBucketValues(self): """ Verify that the values of buckets are as expected for given init params """ # Create the encoder le = LogEncoder(w=5, resolution=0.1, minval=1, maxval=10000, name="amount", forced=True) # Build our expected values inc = 0.1 exp = 0 expected = [] # Incrementing to exactly 4.0 runs into fp issues while exp <= 4.0001: val = 10 ** exp expected.append(val) exp += inc expected = cupy.array(expected) actual = cupy.array(le.getBucketValues()) numpy.testing.assert_almost_equal(expected, actual, 7)
def testFilterLikelihoods(self): """ Tests _filterLikelihoods function for several cases: i. Likelihood goes straight to redzone, skipping over yellowzone, repeats ii. Case (i) with different values, and cupy array instead of float list iii. A scenario where changing the redzone from four to five 9s should filter differently """ redThreshold = 0.9999 yellowThreshold = 0.999 # Case (i): values at indices 1 and 7 should be filtered to yellowzone l = [1.0, 1.0, 0.9, 0.8, 0.5, 0.4, 1.0, 1.0, 0.6, 0.0] l = [1 - x for x in l] l2 = copy.copy(l) l2[1] = 1 - yellowThreshold l2[7] = 1 - yellowThreshold l3 = an._filterLikelihoods(l, redThreshold=redThreshold) for i in range(len(l2)): self.assertAlmostEqual(l2[i], l3[i], msg="Failure in case (i)") # Case (ii): values at indices 1-10 should be filtered to yellowzone l = cupy.array([0.999978229, 0.999978229, 0.999999897, 1, 1, 1, 1, 0.999999994, 0.999999966, 0.999999966, 0.999994331, 0.999516576, 0.99744487]) l = 1.0 - l l2 = copy.copy(l) l2[1:11] = 1 - yellowThreshold l3 = an._filterLikelihoods(l, redThreshold=redThreshold) for i in range(len(l2)): self.assertAlmostEqual(l2[i], l3[i], msg="Failure in case (ii)") # Case (iii): redThreshold difference should be at index 2 l = cupy.array([0.999968329, 0.999999897, 1, 1, 1, 1, 0.999999994, 0.999999966, 0.999999966, 0.999994331, 0.999516576, 0.99744487]) l = 1.0 - l l2a = copy.copy(l) l2b = copy.copy(l) l2a[1:10] = 1 - yellowThreshold l2b[2:10] = 1 - yellowThreshold l3a = an._filterLikelihoods(l, redThreshold=redThreshold) l3b = an._filterLikelihoods(l, redThreshold=0.99999) for i in range(len(l2a)): self.assertAlmostEqual(l2a[i], l3a[i], msg="Failure in case (iii), list a") for i in range(len(l2b)): self.assertAlmostEqual(l2b[i], l3b[i], msg="Failure in case (iii), list b") self.assertFalse(cupy.array_equal(l3a, l3b), msg="Failure in case (iii), list 3")
def caluculatemodel_without_kf( model_type, x_train,x_test, y_train, y_test, NewpreW,NewpreWdict,NewDimentionN,DimentionN,n_epoch = 10,batchsize = 10): accuracylist = [] sum_loss_all, sum_accuracy_all = [],[] y_trueall,y_pridictall = [],[] vmats0, vmats1, vmats2 = [],[],[] n_units = DimentionN modellist = [] #model = Classifier(IIalgorithm()) #model = Classifier(IIalgorithm_simple(NewpreW,NewpreWdict,NewDimentionN,DimentionN)) model = Classifier(model_type) #model.to_gpu() optimizer = optimizers.Adam() optimizer.setup(model) datasize = len(y_train) first_time = time.time() for epoch in range(n_epoch): indexes = np.random.permutation(datasize) loss_sum = 0 accuracy_sum = 0 for i in range(0, datasize, batchsize): x_batch = {} for label in range(0,DimentionN): x_batch[label] = chainer.Variable(xp.array(x_train[label][indexes[i : i + batchsize]])) y_batch = chainer.Variable(xp.array(y_train[indexes[i : i + batchsize]])) model.zerograds() loss = model(x_batch, y_batch) loss_sum += loss.data * len(y_batch) accuracy_sum += model.accuracy.data * len(y_batch) loss.backward() optimizer.update() print "epoch:", epoch, "time:", time.time() - first_time , "training_loss:", loss_sum/datasize loss_sum_test = 0 accuracy_sum_test = 0 for i in range(0, len(y_test), batchsize): x_batch_test = {} for label in range(0,DimentionN): x_batch_test[label] = chainer.Variable(xp.array(x_test[label][i : i + batchsize])) y_batch_test = chainer.Variable(xp.array(y_test[i : i + batchsize])) loss = model(x_batch_test, y_batch_test) loss_sum_test += loss.data * len(y_batch_test) accuracy_sum_test += model.accuracy.data * len(y_batch_test) print "test_loss:", loss_sum_test/len(y_test), "accuracy_test", accuracy_sum_test/len(y_test) #training_loss = model(x_batch, y_batch) #print "training_loss, epoch:", training_loss.data y_pred, vmat0, vmat1, vmat2, accuracy = predict_with_SVM(x_train, x_test, y_train,y_test,model,NewDimentionN) vmats0.append(vmat0) vmats1.append(vmat1) vmats2.append(vmat2) y_trueall = y_trueall + list(y_test) y_pridictall = y_pridictall + list(y_pred) modellist.append(model) accuracylist.append(accuracy) print (classification_report(y_trueall, y_pridictall)) print np.mean(accuracylist) return vmats0, vmats1, vmats2, modellist, accuracylist, y_trueall, y_pridictall
def testConstructClassificationVector(self): modelParams = { '__numRunCalls': 0 } spVals = { 'params': { 'activeOutputCount': 5 }, 'output': { 'bottomUpOut': cupy.array([1,1,0,0,1]) } } tpVals = { 'params': { 'cellsPerColumn': 2, 'columnCount': 2 }, 'output': { 'lrnActive': cupy.array([1,0,0,1]), 'topDownOut': cupy.array([1,0,0,0,1]) } } self.helper.clamodel.getParameter.side_effect = modelParams.get sp = self.helper.clamodel._getSPRegion() tp = self.helper.clamodel._getTPRegion() tpImp = tp.getSelf()._tfdr sp.getParameter.side_effect = spVals['params'].get sp.getOutputData.side_effect = spVals['output'].get self.helper._activeColumnCount = 5 tp.getParameter.side_effect = tpVals['params'].get tp.getOutputData.side_effect = tpVals['output'].get tpImp.getLearnActiveStateT.return_value = tpVals['output']['lrnActive'] # Test TP Cell vector self.helper._vectorType = 'tpc' vector = self.helper._constructClassificationRecord() self.assertEqual(vector.anomalyVector, tpImp.getLearnActiveStateT().nonzero()[0].tolist()) # Test SP and TP Column Error vector self.helper._vectorType = 'sp_tpe' self.helper._prevPredictedColumns = cupy.array([1,0,0,0,1]).nonzero()[0] vector = self.helper._constructClassificationRecord() self.assertEqual(vector.anomalyVector, [0, 1, 4]) self.helper._prevPredictedColumns = cupy.array([1,0,1,0,0]).nonzero()[0] vector = self.helper._constructClassificationRecord() self.assertEqual(vector.anomalyVector, [0, 1, 4, 7]) self.helper._vectorType = 'invalidType' self.assertRaises(TypeError, self.helper._constructClassificationRecord)
def main(): model = SuperResolution() chainer.serializers.load_hdf5('model.hdf5', model) if DEVICE >= 0: chainer.cuda.get_device_from_id(DEVICE).use() chainer.cuda.check_cuda_available() model.to_gpu(DEVICE) in_file = 'test.png' dest_file = 'dest.png' img = Image.open(in_file).convert('YCbCr') org_w = w = img.size[0] org_h = h = img.size[1] #resize img if w % PATCH_SIZE != 0: w = (math.floor(w/PATCH_SIZE)+1)*PATCH_SIZE if h % PATCH_SIZE != 0: h = (main.floor(w/PATCH_SIZE)+1)*PATCH_SIZE if w != img.size[0] or h != img.size[1]: img = img.resize((w, h)) dst = Image.new("YCbCr", (10*w//4, 10*h//4), 'white') cur_x = 0 while cur_x <= img.size[0] - PATCH_SIZE: cur_y = 0 while cur_y <= img.size[1]-PATCH_SIZE: rect = (cur_x, cur_y, cur_x+PATCH_SIZE, cur_y+PATCH_SIZE) cropimg = img.crop(rect) hpix = xp.array(cropimg, dtype=xp.float32) hpix = hpix[:, :, 0]/255 x = xp.array([[hpix]], dtype=xp.float32) t = model(x, train=False) dstimg = cropimg.resize((40, 40), Image.BICUBIC) hpix = np.array(dstimg, dtype=xp.float32) hpix.flags.writeable = True if DEVICE >= 0: hpix[:, :, 0] = t.data[0].get()*255 #hpix[:, :, 0] = chainer.cuda.to_cpu(t.data[0])*255 else: hpix[:, :, 0] = t.data[0]*255 buf = np.array(hpix.clip(0, 255), dtype=np.uint8) himg = Image.fromarray(buf, 'YCbCr') dst.paste(himg, (10*cur_x//4, 10*cur_y // 4, 10*cur_x//4+40, 10*cur_y//4+40)) cur_y += PATCH_SIZE cur_x += PATCH_SIZE dst = dst.convert('RGB') dst.save(dest_file)
def testBucketIndexSupport(self): """Check bucket index support""" bucketIndices = self._e.getBucketIndices(self._d) topDown = self._e.getBucketInfo(bucketIndices) topDownValues = cupy.array([elem.value for elem in topDown]) errs = topDownValues - cupy.array([320.25, 3.5, .167, 14.8]) self.assertAlmostEqual(errs.max(), 0, 4) encodings = [] for x in topDown: encodings.extend(x.encoding) self.assertTrue(cupy.array_equal(encodings, self._expected))
def _array_to_gpu(array, device, stream): if array is None: return None if isinstance(array, chainerx.ndarray): # TODO(niboshi): Update this logic once both CuPy and ChainerX support # the array interface. if array.device.backend.name == 'cuda': # Convert to cupy.ndarray on the same device as source array array = cupy.ndarray( array.shape, array.dtype, cupy.cuda.MemoryPointer( cupy.cuda.UnownedMemory( array.data_ptr + array.offset, array.data_size, array, array.device.index), 0), strides=array.strides) else: array = chainerx.to_numpy(array) elif isinstance(array, (numpy.number, numpy.bool_)): array = numpy.asarray(array) elif isinstance(array, intel64.mdarray): array = numpy.asarray(array) if isinstance(array, ndarray): if array.device == device: return array is_numpy = False elif isinstance(array, numpy.ndarray): is_numpy = True else: raise TypeError( 'The array sent to gpu must be an array or a NumPy scalar.' '\nActual type: {0}.'.format(type(array))) if stream is not None: with device: with stream: if is_numpy: return cupy.asarray(array) # Need to make a copy when an array is copied to another device return cupy.array(array, copy=True) with device: if is_numpy: return cupy.asarray(array) # Need to make a copy when an array is copied to another device return cupy.array(array, copy=True)
def update_core(self): batch = self.get_iterator('main').next() optimizer = self.get_optimizer('main') x_batch = [] y_batch = [] for img in batch: hpix = np.array(img, dtype=np.float32) / 255.0 y_batch.append([hpix[:, :, 0]]) low = img.resize((16, 16), Image.NEAREST) lpix = np.array(low, dtype=np.float32) / 255.0 x_batch.append([lpix[:, :, 0]]) xs = xp.array(x_batch, dtype=xp.float32) ys = xp.array(y_batch, dtype=xp.float32) optimizer.update(optimizer.target, xs, ys)
def tile(A, reps): """Construct an array by repeating A the number of times given by reps. Args: A (cupy.ndarray): Array to transform. reps (int or tuple): The number of repeats. Returns: cupy.ndarray: Transformed array with repeats. .. seealso:: :func:`numpy.tile` """ try: tup = tuple(reps) except TypeError: tup = (reps,) d = len(tup) if tup.count(1) == len(tup) and isinstance(A, cupy.ndarray): # Fixes the problem that the function does not make a copy if A is a # array and the repetitions are 1 in all dimensions return cupy.array(A, copy=True, ndmin=d) else: # Note that no copy of zero-sized arrays is made. However since they # have no data there is no risk of an inadvertent overwrite. c = cupy.array(A, copy=False, ndmin=d) if (d < c.ndim): tup = (1,) * (c.ndim - d) + tup shape_out = tuple(s * t for s, t in zip(c.shape, tup)) if c.size == 0: return cupy.empty(shape_out, dtype=c.dtype) c_shape = [] ret_shape = [] for dim_in, nrep in zip(c.shape, tup): if nrep == 1: c_shape.append(dim_in) ret_shape.append(dim_in) elif dim_in == 1: c_shape.append(dim_in) ret_shape.append(nrep) else: c_shape.append(1) c_shape.append(dim_in) ret_shape.append(nrep) ret_shape.append(dim_in) ret = cupy.empty(ret_shape, dtype=c.dtype) if ret.size: ret[...] = c.reshape(c_shape) return ret.reshape(shape_out)
def caluculateMLP_without_kf( model_type,x_train,x_test, y_train, y_test, NewpreW,NewpreWdict,NewDimentionN,DimentionN,n_epoch = 10,batchsize = 10): accuracylist = [] loss_list = [] sum_loss_all, sum_accuracy_all = [],[] y_trueall,y_pridictall = [],[] n_units = DimentionN modellist = [] #model = Classifier(MLP(x_train.shape[1], NewDimentionN)) model = Classifier(model_type) #model.to_gpu() optimizer = optimizers.Adam() optimizer.setup(model) datasize = len(y_train) first_time = time.time() for epoch in range(n_epoch): indexes = np.random.permutation(datasize) loss_sum = 0 accuracy_sum = 0 for i in range(0, datasize, batchsize): x_batch = chainer.Variable(xp.array(x_train[indexes[i : i + batchsize]])) y_batch = chainer.Variable(xp.array(y_train[indexes[i : i + batchsize]])) model.zerograds() loss = model(x_batch, y_batch) loss_sum += loss.data * len(y_batch) accuracy_sum += model.accuracy.data * len(y_batch) loss.backward() optimizer.update() print "epoch:", epoch, "time:", time.time() - first_time , "training_loss:", loss_sum/datasize loss_sum_test = 0 accuracy_sum_test = 0 for i in range(0, len(y_test), batchsize): x_batch_test = chainer.Variable(xp.array(x_test[i : i + batchsize])) y_batch_test = chainer.Variable(xp.array(y_test[i : i + batchsize])) loss = model(x_batch_test, y_batch_test) loss_sum_test += loss.data * len(y_batch_test) accuracy_sum_test += model.accuracy.data * len(y_batch_test) print "test_loss:", loss_sum_test/len(y_test), "accuracy_test", accuracy_sum_test/len(y_test) accuracylist.append(accuracy_sum_test/len(y_test)) loss_list.append(loss_sum_test/len(y_test)) training_loss = model(x_batch, y_batch) print "training_loss, epoch:", training_loss.data #y_trueall = y_trueall + list(y_test) #y_pridictall = y_pridictall + list(y_pred) modellist.append(model) #accuracylist.append(accuracy) return modellist, accuracylist, loss_list
def diag(v, k=0): """Returns a diagonal or a diagonal array. Args: v (array-like): Array or array-like object. k (int): Index of diagonals. Zero indicates the main diagonal, a positive value an upper diagonal, and a negative value a lower diagonal. Returns: cupy.ndarray: If ``v`` indicates a 1-D array, then it returns a 2-D array with the specified diagonal filled by ``v``. If ``v`` indicates a 2-D array, then it returns the specified diagonal of ``v``. In latter case, if ``v`` is a cupy.ndarray object, then its view is returned. .. seealso:: :func:`numpy.diag` """ if isinstance(v, cupy.ndarray): if v.ndim == 1: size = v.size + abs(k) ret = cupy.zeros((size, size), dtype=v.dtype) ret.diagonal(k)[:] = v return ret else: return v.diagonal(k) else: return cupy.array(numpy.diag(v, k))
def testEstimateNormal(self): """ This passes in a known set of data and ensures the estimateNormal function returns the expected results. """ # 100 samples drawn from mean=0.4, stdev = 0.5 samples = cupy.array( [0.32259025, -0.44936321, -0.15784842, 0.72142628, 0.8794327, 0.06323451, -0.15336159, -0.02261703, 0.04806841, 0.47219226, 0.31102718, 0.57608799, 0.13621071, 0.92446815, 0.1870912, 0.46366935, -0.11359237, 0.66582357, 1.20613048, -0.17735134, 0.20709358, 0.74508479, 0.12450686, -0.15468728, 0.3982757, 0.87924349, 0.86104855, 0.23688469, -0.26018254, 0.10909429, 0.65627481, 0.39238532, 0.77150761, 0.47040352, 0.9676175, 0.42148897, 0.0967786, -0.0087355, 0.84427985, 1.46526018, 1.19214798, 0.16034816, 0.81105554, 0.39150407, 0.93609919, 0.13992161, 0.6494196, 0.83666217, 0.37845278, 0.0368279, -0.10201944, 0.41144746, 0.28341277, 0.36759426, 0.90439446, 0.05669459, -0.11220214, 0.34616676, 0.49898439, -0.23846184, 1.06400524, 0.72202135, -0.2169164, 1.136582, -0.69576865, 0.48603271, 0.72781008, -0.04749299, 0.15469311, 0.52942518, 0.24816816, 0.3483905, 0.7284215, 0.93774676, 0.07286373, 1.6831539, 0.3851082, 0.0637406, -0.92332861, -0.02066161, 0.93709862, 0.82114131, 0.98631562, 0.05601529, 0.72214694, 0.09667526, 0.3857222, 0.50313998, 0.40775344, -0.69624046, -0.4448494, 0.99403206, 0.51639049, 0.13951548, 0.23458214, 1.00712699, 0.40939048, -0.06436434, -0.02753677, -0.23017904]) params = an.estimateNormal(samples) self.assertWithinEpsilon(params["mean"], 0.3721) self.assertWithinEpsilon(params["variance"], 0.22294) self.assertWithinEpsilon(params["stdev"], 0.47216) self.assertEqual(params["name"], "normal")
def to_gpu(array, device=None, stream=None): """Copies the given CPU array to specified device. Args: array: Array to be sent to GPU. device: Device specifier. stream (cupy.cuda.Stream): CUDA stream. Returns: cupy.ndarray: Array on GPU. If ``array`` is already on GPU, then this function just returns ``array`` without performing any copy. Note that this function does not copy :class:`cupy.ndarray` into specified device. """ check_cuda_available() assert stream is None # TODO(beam2d): FIX IT with get_device(device): dev_id = int(get_device(array)) if dev_id != -1 and dev_id != cupy.cuda.device.get_device_id(): # Need to make a copy when an array is copied to another device return cupy.array(array, copy=True) else: return cupy.asarray(array)
def setUp(self): # 3 bits for season, 1 bit for day of week, 1 for weekend, 5 for time of # day # use of forced is not recommended, used here for readibility, see scalar.py self._e = DateEncoder(season=3, dayOfWeek=1, weekend=1, timeOfDay=5) # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov, # 2010, 14:55 self._d = datetime.datetime(2010, 11, 4, 14, 55) self._bits = self._e.encode(self._d) # season is aaabbbcccddd (1 bit/month) # TODO should be <<3? # should be 000000000111 (centered on month 11 - Nov) seasonExpected = [0,0,0,0,0,0,0,0,0,1,1,1] # week is MTWTFSS # contrary to localtime documentation, Monday = 0 (for python # datetime.datetime.timetuple() dayOfWeekExpected = [0,0,0,1,0,0,0] # not a weekend, so it should be "False" weekendExpected = [1, 0] # time of day has radius of 4 hours and w of 5 so each bit = 240/5 # min = 48min 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6 # should be 30 bits total (30 * 48 minutes = 24 hours) timeOfDayExpected = ( [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0]) self._expected = cupy.array(seasonExpected + dayOfWeekExpected + weekendExpected + timeOfDayExpected, dtype=defaultDtype)
def testSamplePopulationTooSmall(self): r = Random(42) population = cupy.array([1, 2, 3, 4], dtype="uint32") choices = cupy.zeros([5], dtype="uint32") self.assertRaises( ValueError, r.sample, population, choices)
def testSampleWrongDimensionsChoices(self): """Check that passing a multi-dimensional array throws a ValueError.""" r = Random(42) population = cupy.array([1, 2, 3, 4], dtype="uint32") choices = cupy.zeros([2, 2], dtype="uint32") self.assertRaises(ValueError, r.sample, population, choices)
def update_model(self,old_seq, action, reward, new_seq): ''' モデルを更新する ''' # 経験メモリにたまってない場合は更新しない if len(self.experienceMemory)<self.batch_num: return # 経験メモリからバッチを作成 memsize=len(self.experienceMemory) batch_index = list(np.random.randint(0,memsize,(self.batch_num))) batch =np.array( [self.experienceMemory[i] for i in batch_index ]) x = Variable(cuda.to_gpu(batch[:,0:INPUT_NODE].reshape( (self.batch_num,-1)).astype(np.float32))) targets=self.model.predict(x).data.copy() for i in range(self.batch_num): #[ seq..., action, reward, seq_new] a = batch[i,INPUT_NODE] r = batch[i, INPUT_NODE+1] ai=int((a+1)/2) #±1 をindex(0,1)に new_seq= batch[i,(INPUT_NODE+2):(INPUT_NODE*2+2)] targets[i,ai]=( r+ self.gamma * np.max(self.get_action_value(new_seq))) t = Variable(xp.array(targets).reshape((self.batch_num,-1)).astype(xp.float32)) # ネットの更新 self.model.zerograds() loss=self.model(x ,t,self.epsilon*0.5) self.loss = loss.data self.outloss += loss.data/OUTPUT_FRAME loss.backward() self.optimizer.update()
def test_adv_getitem_cupy_indices2(self): shape = (2, 3, 4) a = cupy.zeros(shape) index = cupy.array([1, 0]) b = a[(slice(None), index)] b_cpu = a.get()[(slice(None), index.get())] testing.assert_array_equal(b, b_cpu)
def test_adv_getitem_cupy_indices3(self): shape = (2, 3, 4) a = cupy.zeros(shape) index = cupy.array([True, False]) b = a[index] b_cpu = a.get()[index.get()] testing.assert_array_equal(b, b_cpu)
def load(file, mmap_mode=None): """Loads arrays or pickled objects from ``.npy``, ``.npz`` or pickled file. This function just calls ``numpy.load`` and then sends the arrays to the current device. NPZ file is converted to NpzFile object, which defers the transfer to the time of accessing the items. Args: file (file-like object or string): The file to read. mmap_mode (None, 'r+', 'r', 'w+', 'c'): If not ``None``, memory-map the file to construct an intermediate :class:`numpy.ndarray` object and transfer it to the current device. Returns: CuPy array or NpzFile object depending on the type of the file. NpzFile object is a dictionary-like object with the context manager protocol (which enables us to use *with* statement on it). .. seealso:: :func:`numpy.load` """ obj = numpy.load(file, mmap_mode) if isinstance(obj, numpy.ndarray): return cupy.array(obj) elif isinstance(obj, numpy.lib.npyio.NpzFile): return NpzFile(obj) else: return obj
def _setup_cuda_fft_multiply_repeated(n_jobs, h, n_fft): """Set up repeated CUDA FFT multiplication with a given filter. Parameters ---------- n_jobs : int | str If n_jobs == 'cuda', the function will attempt to set up for CUDA FFT multiplication. h : array The filtering function that will be used repeatedly. n_fft : int The number of points in the FFT. Returns ------- n_jobs : int Sets n_jobs = 1 if n_jobs == 'cuda' was passed in, otherwise original n_jobs is passed. cuda_dict : dict Dictionary with the following CUDA-related variables: use_cuda : bool Whether CUDA should be used. fft_plan : instance of FFTPlan FFT plan to use in calculating the FFT. ifft_plan : instance of FFTPlan FFT plan to use in calculating the IFFT. x_fft : instance of gpuarray Empty allocated GPU space for storing the result of the frequency-domain multiplication. x : instance of gpuarray Empty allocated GPU space for the data to filter. h_fft : array | instance of gpuarray This will either be a gpuarray (if CUDA enabled) or ndarray. Notes ----- This function is designed to be used with fft_multiply_repeated(). """ cuda_dict = dict(n_fft=n_fft, rfft=np.fft.rfft, irfft=np.fft.irfft, h_fft=np.fft.rfft(h, n=n_fft)) if n_jobs == 'cuda': n_jobs = 1 init_cuda() if _cuda_capable: import cupy try: # do the IFFT normalization now so we don't have to later h_fft = cupy.array(cuda_dict['h_fft']) logger.info('Using CUDA for FFT FIR filtering') except Exception as exp: logger.info('CUDA not used, could not instantiate memory ' '(arrays may be too large: "%s"), falling back to ' 'n_jobs=1' % str(exp)) cuda_dict.update(h_fft=h_fft, rfft=_cuda_upload_rfft, irfft=_cuda_irfft_get) else: logger.info('CUDA not used, CUDA could not be initialized, ' 'falling back to n_jobs=1') return n_jobs, cuda_dict
def update_model(self): ''' モデルを更新する ''' # 経験メモリにたまってない場合は更新しない if len(self.experienceMemory)<self.batch_num: return # 経験メモリからバッチを作成 memsize=len(self.experienceMemory) batch_index = list(np.random.randint(0,memsize,(self.batch_num))) batch =np.array( [self.experienceMemory[i] for i in batch_index ]) x = Variable(cuda.to_gpu(batch[:,0:INPUT_NODE].reshape( (self.batch_num,-1)).astype(np.float32))) targets=self.model.predict(x).data.copy() for i in range(self.batch_num): #[ seq..., action, reward, seq_new] a = batch[i,INPUT_NODE] r = batch[i, INPUT_NODE+1] end = batch[i, INPUT_NODE+2] next_Q = 0 if end==0: dummy_seq = batch[i,(INPUT_NODE + 3):( INPUT_NODE + KIND_OF_PAI*2 + 3)] random_pai = [] for pai_index in range(len(PAI_NUM2ACT)): for j in range(int(4-dummy_seq[pai_index]-dummy_seq[pai_index+KIND_OF_PAI])): random_pai.append(PAI_NUM2ACT[pai_index]) np.random.shuffle(random_pai) if len(random_pai)>self.monte_size: random_pai = random_pai[0:self.monte_size] #important########################################################################## for pai in random_pai: dummy_seq[pai]+=1 dummy_new_seq = self.conv_state(dummy_seq[0:KIND_OF_PAI], dummy_seq[KIND_OF_PAI:KIND_OF_PAI*2]) next_Q += np.max(self.get_action_value(dummy_new_seq))/len(random_pai) dummy_seq[pai]-=1 if a!=0: targets[i,PAI_ACT2NUM[int(a)]]=( r+ self.gamma * next_Q) else: assert "update_model() a=0" #targets[i,PAI_ACT2NUM[int(a)]]=( r ) else: if a!=0: targets[i,PAI_ACT2NUM[int(a)]]=( r ) else: for pai in range(KIND_OF_PAI): targets[i,pai]= ( r ) #if PAI_ACT2NUM[int(a)]>=34: # print str(i)+","+str(int(a)) t = Variable(xp.array(targets).reshape((self.batch_num,-1)).astype(xp.float32)) # ネットの更新 self.model.zerograds() loss=self.model(x ,t) self.loss = loss.data self.outloss += loss.data/OUTPUT_FRAME loss.backward() self.optimizer.update()
def lstsq(a, b, rcond=1e-15): """Return the least-squares solution to a linear matrix equation. Solves the equation `a x = b` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `a` can be less than, equal to, or greater than its number of linearly independent columns). If `a` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. Args: a (cupy.ndarray): "Coefficient" matrix with dimension ``(M, N)`` b (cupy.ndarray): "Dependent variable" values with dimension ``(M,)`` or ``(M, K)`` rcond (float): Cutoff parameter for small singular values. For stability it computes the largest singular value denoted by ``s``, and sets all singular values smaller than ``s`` to zero. Returns: tuple: A tuple of ``(x, residuals, rank, s)``. Note ``x`` is the least-squares solution with shape ``(N,)`` or ``(N, K)`` depending if ``b`` was two-dimensional. The sums of ``residuals`` is the squared Euclidean 2-norm for each column in b - a*x. The ``residuals`` is an empty array if the rank of a is < N or M <= N, but iff b is 1-dimensional, this is a (1,) shape array, Otherwise the shape is (K,). The ``rank`` of matrix ``a`` is an integer. The singular values of ``a`` are ``s``. .. seealso:: :func:`numpy.linalg.lstsq` """ util._assert_cupy_array(a, b) util._assert_rank2(a) if b.ndim > 2: raise linalg.LinAlgError('{}-dimensional array given. Array must be at' ' most two-dimensional'.format(b.ndim)) m, n = a.shape[-2:] m2 = b.shape[0] if m != m2: raise linalg.LinAlgError('Incompatible dimensions') u, s, vt = cupy.linalg.svd(a, full_matrices=False) # number of singular values and matrix rank cutoff = rcond * s.max() s1 = 1 / s sing_vals = s <= cutoff s1[sing_vals] = 0 rank = s.size - sing_vals.sum() if b.ndim == 2: s1 = cupy.repeat(s1.reshape(-1, 1), b.shape[1], axis=1) # Solve the least-squares solution z = core.dot(u.transpose(), b) * s1 x = core.dot(vt.transpose(), z) # Calculate squared Euclidean 2-norm for each column in b - a*x if rank != n or m <= n: resids = cupy.array([], dtype=a.dtype) elif b.ndim == 2: e = b - core.dot(a, x) resids = cupy.sum(cupy.square(e), axis=0) else: e = b - cupy.dot(a, x) resids = cupy.dot(e.T, e).reshape(-1) return x, resids, rank, s
def test_invalid_type(self): a = numpy.array([1, 2, 3], dtype=object) with self.assertRaises(ValueError): cupy.array(a)
def cosine_similarity(vec1, vec2): xp_vec1 = xp.array(vec1) xp_vec2 = xp.array(vec2) return xp.dot( xp_vec1, xp_vec2) / (xp.linalg.norm(xp_vec1) * xp.linalg.norm(xp_vec1))
def main(args): global hypo, data_vis, data_uvw, data_uvw_cp, data_nant, data_nbl, data_uniqtimes, data_uniqtime_indices, data_ntime, data_inttime, \ data_chan_freq, data_chan_freq_cp, data_nchan, data_chanwidth, data_flag, data_flag_row, data_ant1, data_ant2, baseline_dict # Set command line parameters hypo = args.hypo ####### Read data from MS tab = pt.table(args.ms).query("ANTENNA1 != ANTENNA2"); # INI: always exclude autocorrs for our purposes data_vis = tab.getcol(args.col) data_ant1 = tab.getcol('ANTENNA1') data_ant2 = tab.getcol('ANTENNA2') ant_unique = np.unique(np.hstack((data_ant1, data_ant2))) baseline_dict = make_baseline_dictionary(ant_unique) # Read uvw coordinates; necessary for computing the source coherency matrix data_uvw = tab.getcol('UVW') if args.invert_uvw: data_uvw = -data_uvw # Invert uvw coordinates for comparison with MeqTrees # get data from ANTENNA subtable anttab = pt.table(args.ms+'/ANTENNA') stations = anttab.getcol('STATION') data_nant = len(stations) data_nbl = int((data_nant*(data_nant-1))/2) anttab.close() # Obtain indices of unique times in 'TIME' column data_uniqtimes, data_uniqtime_indices = np.unique(tab.getcol('TIME'), return_inverse=True) data_ntime = data_uniqtimes.shape[0] data_inttime = tab.getcol('EXPOSURE', 0, data_nbl) # Get flag info from MS data_flag = tab.getcol('FLAG') data_flag_row = tab.getcol('FLAG_ROW') data_flag = np.logical_or(data_flag, data_flag_row[:,np.newaxis,np.newaxis]) tab.close() # get frequency info from SPECTRAL_WINDOW subtable freqtab = pt.table(args.ms+'/SPECTRAL_WINDOW') data_chan_freq = freqtab.getcol('CHAN_FREQ')[0] data_nchan = freqtab.getcol('NUM_CHAN')[0] data_chanwidth = freqtab.getcol('CHAN_WIDTH')[0,0]; freqtab.close(); # Set up the GPU cp.cuda.Device(args.device).use() # Move necessary arrays to cupy from numpy data_vis = cp.array(data_vis) data_ant1 = cp.array(data_ant1) data_ant2 = cp.array(data_ant2) data_uvw_cp = cp.array(data_uvw) data_uniqtime_indices = cp.array(data_uniqtime_indices, dtype=cp.int32) data_chan_freq_cp = cp.array(data_chan_freq) # Make a callable for running dyPolyChord my_callable = dyPolyChord.pypolychord_utils.RunPyPolyChord(loglike, prior_transform, args.npar) settings_dict = {'file_root': args.fileroot, 'base_dir': args.basedir, 'seed': seed} comm = MPI.COMM_WORLD # Run dyPolyChord dyPolyChord.run_dypolychord(my_callable, dynamic_goal, settings_dict, ninit=nlive_init, nlive_const=nlive, comm=comm) return 0
def to_cupy(self, copy=False): self.host_to_device() if copy: return cp.array(self) return cp.asarray(self)
def test_csr(self): x = sparse.csr_matrix( (cupy.array([], 'f'), cupy.array([], 'i'), cupy.array([0], 'i')), shape=(0, 0), dtype='f') self.assertFalse(sparse.isspmatrix_csc(x))
def test_offsets(self): self.assertEqual(self.m.offsets.dtype, numpy.int32) testing.assert_array_equal(self.m.offsets, cupy.array([0, -1], self.dtype))
def test_indices(self): self.assertEqual(self.m.indices.dtype, numpy.int32) testing.assert_array_equal(self.m.indices, cupy.array([0, 0, 2, 1], self.dtype))
def unique(ar, axis=None, *args, **kwargs): """ For cupy v0.6.0 compatibility """ return cp.array(np.unique(cp.asnumpy(ar), axis=axis, *args, **kwargs))
def test_indptr(self): self.assertEqual(self.m.indptr.dtype, numpy.int32) testing.assert_array_equal(self.m.indptr, cupy.array([0, 1, 2, 3, 4], self.dtype))
def loglike(theta): """ Compute the loglikelihood function. NOTE: Not called directly by user code; the function signature must correspond to the requirements of the numerical sampler used. Parameters ---------- theta : Input parameter vector Returns ------- loglike : float """ global init_loglike, ndata_unflagged, per_bl_sig, weight_vector, data_vis, einschema if init_loglike == False: # Find total number of visibilities ndata = data_vis.shape[0]*data_vis.shape[1]*data_vis.shape[2]*2 # 8 because each polarisation has two real numbers (real & imaginary) flag_ll = np.logical_not(data_flag[:,0,0]) ndata_unflagged = ndata - np.where(flag_ll == False)[0].shape[0] * 8 print ('Percentage of unflagged visibilities: ', ndata_unflagged, '/', ndata, '=', (ndata_unflagged/ndata)*100) # Set visibility weights weight_vector=np.zeros(data_vis.shape, dtype='float') # ndata/2 because the weight_vector is the same for both real and imag parts of the vis. if not sigmaSim: per_bl_sig = np.zeros((data_nbl)) bl_incr = 0; for a1 in np.arange(data_nant): for a2 in np.arange(a1+1,data_nant): #per_bl_sig[bl_incr] = np.sqrt((sefds[a1]*sefds[a2])/(data_chanwidth*data_inttime[bl_incr])) # INI: Removed the sq(2) from the denom. It's for 2 pols. per_bl_sig[bl_incr] = (1.0/corr_eff) * np.sqrt((sefds[a1]*sefds[a2])/(2*data_chanwidth*data_inttime[bl_incr])) # INI: Added the sq(2) bcoz MeqS uses this convention weight_vector[baseline_dict[(a1,a2)]] = 1.0 / np.power(per_bl_sig[bl_incr], 2) bl_incr += 1; else: weight_vector[:] = 1.0 /np.power(sigmaSim, 2) weight_vector *= np.logical_not(data_flag) weight_vector = cp.array(weight_vector.reshape((data_vis.shape[0], data_vis.shape[1], 2, 2))) # Compute einsum schema einschema = einsum_schema(hypo) init_loglike = True # loglike initialised; will not enter on subsequent iterations # Set up arrays necessary for forward modelling # Set up the phase delay matrix lm = cp.array([[theta[1], theta[2]]]) phase = phase_delay(lm, data_uvw_cp, data_chan_freq_cp) if hypo == 1: # Set up the shape matrix for Gaussian sources gauss_shape = gaussian_shape(data_uvw, data_chan_freq, np.array([[theta[3], theta[4], theta[5]]])) gauss_shape = cp.array(gauss_shape) # Set up the brightness matrix stokes = cp.array([[theta[0], 0, 0, 0]]) brightness = convert(stokes, ['I', 'Q', 'U', 'V'], [['RR', 'RL'], ['LR', 'LL']]) '''print ('einschema: ', einschema) print ('phase.shape: ', phase.shape) print ('gauss_shape.shape: ', gauss_shape.shape) print ('brightness.shape: ', brightness.shape)''' # Compute the source coherency matrix (the uncorrupted visibilities, except for the phase delay) if hypo == 0: source_coh_matrix = cp.einsum(einschema, phase, brightness) elif hypo == 1: source_coh_matrix = cp.einsum(einschema, phase, gauss_shape, brightness) ### Uncomment the following and assign sampled complex gains per ant/chan/time to the Jones matrices '''# Set up the G-Jones matrices die_jones = cp.zeros((data_ntime, data_nant, data_nchan, 2, 2), dtype=cp.complex) if hypo == 0: for ant in np.arange(data_nant): for chan in np.arange(data_nchan): delayterm = theta[ant+12]*(chan-refchan_delay)*data_chanwidth # delayterm in 'turns'; 17th chan (index 16) freq is the reference frequency. pherr = theta[ant+3] + delayterm*360 # convert 'turns' to degrees; pherr = pec_ph + delay + rate; rates are zero re, im = pol_to_rec(1,pherr) die_jones[:, ant, chan, 0, 0] = die_jones[:, ant, chan, 1, 1] = re + 1j*im elif hypo == 1: for ant in np.arange(data_nant): for chan in np.arange(data_nchan): delayterm = theta[ant+15]*(chan-refchan_delay)*data_chanwidth # delayterm in 'turns'; 17th chan (index 16) freq is the reference frequency. pherr = theta[ant+6] + delayterm*360 # convert 'turns' to degrees; pherr = pec_ph + delay + rate; rates are zero re, im = pol_to_rec(1,pherr) die_jones[:, ant, chan, 0, 0] = die_jones[:, ant, chan, 1, 1] = re + 1j*im''' # Predict (forward model) visibilities # If the die_jones matrix has been declared above, assign it to both the kwargs die1_jones and die2_jones in predict_vis() model_vis = predict_vis(data_uniqtime_indices, data_ant1, data_ant2, die1_jones=None, dde1_jones=None, source_coh=source_coh_matrix, dde2_jones=None, die2_jones=None, base_vis=None) # Compute chi-squared and loglikelihood diff = model_vis - data_vis.reshape((data_vis.shape[0], data_vis.shape[1], 2, 2)) chi2 = cp.sum((diff.real*diff.real+diff.imag*diff.imag) * weight_vector) loglike = cp.float(-chi2/2.0 - cp.log(2*cp.pi*(1.0/weight_vector.flatten()[cp.nonzero(weight_vector.flatten())])).sum()) return loglike, []
# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. import numpy as np from chainladder import ARRAY_BACKEND try: import cupy as cp cp.array([1]) module = 'cupy' except: if ARRAY_BACKEND == 'cupy': import warnings warnings.warn('Unable to load CuPY. Using numpy instead.') import numpy as cp module = 'numpy' def get_array_module(*args, **kwargs): """ default array module when cupy is not present """ return np def nansum(a, *args, **kwargs): """ For cupy v0.6.0 compatibility """ return cp.sum(cp.nan_to_num(a), *args, **kwargs) def nanmean(a, *args, **kwargs): """ For cupy v0.6.0 compatibility """ return cp.sum(cp.nan_to_num(a), *args, **kwargs) / \ cp.sum(~cp.isnan(a), *args, **kwargs)
def nanmedian(a, *args, **kwargs): """ For cupy v0.6.0 compatibility """ return cp.array(np.nanmedian(cp.asnumpy(a), *args, **kwargs))
def numpy_error(_, xp): if xp == numpy: raise ValueError() elif xp == cupy: return cupy.array(1)
expect = cudf.Series(cudf.Series(data)._column[slc].view("int8")) got = cudf.Series(str_host_view(data[slc], "int8")) assert_eq(expect, got) @pytest.mark.parametrize( "data,expected", [ ( np.array([1, 2, 3, 4, 5], dtype="uint8"), cudf.core.column.as_column([1, 2, 3, 4, 5], dtype="uint8"), ), ( cp.array([1, 2, 3, 4, 5], dtype="uint8"), cudf.core.column.as_column([1, 2, 3, 4, 5], dtype="uint8"), ), ( cp.array([], dtype="uint8"), cudf.core.column.as_column([], dtype="uint8"), ), ( cp.array([453], dtype="uint8"), cudf.core.column.as_column([453], dtype="uint8"), ), ], ) def test_as_column_buffer(data, expected): actual_column = cudf.core.column.as_column(cudf.core.Buffer(data), dtype=data.dtype)
def _syevj_batched(a, UPLO, with_eigen_vector): if a.dtype == 'f' or a.dtype == 'e': dtype = 'f' inp_w_dtype = 'f' inp_v_dtype = 'f' ret_w_dtype = a.dtype ret_v_dtype = a.dtype elif a.dtype == 'd': dtype = 'd' inp_w_dtype = 'd' inp_v_dtype = 'd' ret_w_dtype = 'd' ret_v_dtype = 'd' elif a.dtype == 'F': dtype = 'F' inp_w_dtype = 'f' inp_v_dtype = 'F' ret_w_dtype = 'f' ret_v_dtype = 'F' elif a.dtype == 'D': dtype = 'D' inp_w_dtype = 'd' inp_v_dtype = 'D' ret_w_dtype = 'd' ret_v_dtype = 'D' else: # NumPy uses float64 when an input is not floating point number. dtype = 'd' inp_w_dtype = 'd' inp_v_dtype = 'd' ret_w_dtype = 'd' ret_v_dtype = 'd' *batch_shape, m, lda = a.shape batch_size = _numpy.prod(batch_shape) a = a.reshape(batch_size, m, lda) v = _cupy.array(a.swapaxes(-2, -1), order='C', copy=True, dtype=inp_v_dtype) w = _cupy.empty((batch_size, m), inp_w_dtype).swapaxes(-2, 1) dev_info = _cupy.empty((), _numpy.int32) handle = _device.Device().cusolver_handle if with_eigen_vector: jobz = _cusolver.CUSOLVER_EIG_MODE_VECTOR else: jobz = _cusolver.CUSOLVER_EIG_MODE_NOVECTOR if UPLO == 'L': uplo = _cublas.CUBLAS_FILL_MODE_LOWER else: # UPLO == 'U' uplo = _cublas.CUBLAS_FILL_MODE_UPPER if dtype == 'f': buffer_size = _cusolver.ssyevjBatched_bufferSize syevjBatched = _cusolver.ssyevjBatched elif dtype == 'd': buffer_size = _cusolver.dsyevjBatched_bufferSize syevjBatched = _cusolver.dsyevjBatched elif dtype == 'F': buffer_size = _cusolver.cheevjBatched_bufferSize syevjBatched = _cusolver.cheevjBatched elif dtype == 'D': buffer_size = _cusolver.zheevjBatched_bufferSize syevjBatched = _cusolver.zheevjBatched else: raise RuntimeError('Only float and double and cuComplex and ' + 'cuDoubleComplex are supported') params = _cusolver.createSyevjInfo() work_size = buffer_size(handle, jobz, uplo, m, v.data.ptr, lda, w.data.ptr, params, batch_size) work = _cupy.empty(work_size, inp_v_dtype) syevjBatched(handle, jobz, uplo, m, v.data.ptr, lda, w.data.ptr, work.data.ptr, work_size, dev_info.data.ptr, params, batch_size) _cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( syevjBatched, dev_info) _cusolver.destroySyevjInfo(params) w = w.astype(ret_w_dtype, copy=False) w = w.swapaxes(-2, -1).reshape(*batch_shape, m) if not with_eigen_vector: return w v = v.astype(ret_v_dtype, copy=False) v = v.swapaxes(-2, -1).reshape(*batch_shape, m, m) return w, v
def tensor(data, dtype=numpy.float64): """Tensor class """ return cp.array(data, dtype=dtype)
def test_data(self): self.assertEqual(self.m.data.dtype, self.dtype) testing.assert_array_equal(self.m.data, cupy.array([0, 1, 3, 2], self.dtype))
def forward(self, h_s, c_s, n_speakers=None, to_train=1): # h_s: (1, B, F) h_0 # c_s: (1, B, F) c_0 # n_speakers: (B,) number of speakers (for test set None) # to_train: 1 to grab S+1 speakers while training; 0 to grab S speakers if given for inference batch_size = h_s.shape[1] if n_speakers: # zeros: (B, 1, F) zeros = [cp.zeros((1, self.in_size)).astype(cp.float32) for i in range(batch_size)] #import sys #print(n_speakers) #sys.exit() max_speakers = max(n_speakers).tolist() # max_speakers = 2 A = cp.array([]) for i in range(max_speakers + to_train): h_s, c_s, _ = self.lstm(h_s, c_s, zeros) a_s = h_s[0] A = F.vstack((A, a_s)) if A.size else a_s #P = F.sigmoid(self.linear(A)) we will use sigmoid_cross_entropy P = self.linear(A) # dimension manipulation to get # A: (B, n_speakers, F) # P: (B, n_speakers, 1) A = F.swapaxes(A.reshape(max_speakers + to_train, batch_size, -1), 0, 1) P = F.swapaxes(P.reshape(max_speakers + to_train, batch_size, -1), 0, 1) # strip A = [F.get_item(a, slice(0, n_spk)) for a, n_spk in zip(A, n_speakers)] P = [F.get_item(p, slice(0, n_spk + to_train)) for p, n_spk in zip(P, n_speakers)] else: # don't know number of speakers so generate a_s and p_s until p_s < 0.5 # cannot do this batch wise like above # process it for each group in the batch # zeros: (1, 1, F) zeros = [cp.zeros((1, self.in_size)).astype(cp.float32)] A = [] for batch in range(batch_size): h_b, c_b = h_s[:, batch: batch + 1, :], c_s[:, batch: batch + 1, :] a = p = cp.array([]) while True: h_b, c_b, _ = self.lstm(h_b, c_b, zeros) a_s = h_b[0] p_s = F.sigmoid(self.linear(a_s)) if p_s.array[0] < 0.5: break a = F.vstack((a, a_s)) if a.size else a_s # p = F.vstack((p, p_s)) if p.size else p_s a = a if a.size else cp.zeros((1, h_s.shape[2])).astype(cp.float32) # p = p if p.size else Variable(np.array([[0]]).astype(np.float32)) A.append(a) # P.append(p) P = P if to_train else None return A, None
def test_csc(self): x = cupy.sparse.csc_matrix( (cupy.array([], 'f'), cupy.array([], 'i'), cupy.array([0], 'i')), shape=(0, 0), dtype='f') self.assertTrue(cupy.sparse.isspmatrix_csc(x))
def test_dia(self): x = sparse.dia_matrix((cupy.array([], 'f'), cupy.array([0], 'i')), shape=(0, 0), dtype='f') self.assertTrue(sparse.isspmatrix_dia(x))
def gesvdj(a, full_matrices=True, compute_uv=True, overwrite_a=False): """Singular value decomposition using cusolverDn<t>gesvdj(). Factorizes the matrix ``a`` into two unitary matrices ``u`` and ``v`` and a singular values vector ``s`` such that ``a == u @ diag(s) @ v*``. Args: a (cupy.ndarray): The input matrix with dimension ``(M, N)``. full_matrices (bool): If True, it returns u and v with dimensions ``(M, M)`` and ``(N, N)``. Otherwise, the dimensions of u and v are respectively ``(M, K)`` and ``(K, N)``, where ``K = min(M, N)``. compute_uv (bool): If ``False``, it only returns singular values. overwrite_a (bool): If ``True``, matrix ``a`` might be overwritten. Returns: tuple of :class:`cupy.ndarray`: A tuple of ``(u, s, v)``. """ if not check_availability('gesvdj'): raise RuntimeError('gesvdj is not available.') if a.ndim == 3: return _gesvdj_batched(a, full_matrices, compute_uv, overwrite_a) assert a.ndim == 2 if a.dtype == 'f': helper = _cusolver.sgesvdj_bufferSize solver = _cusolver.sgesvdj s_dtype = 'f' elif a.dtype == 'd': helper = _cusolver.dgesvdj_bufferSize solver = _cusolver.dgesvdj s_dtype = 'd' elif a.dtype == 'F': helper = _cusolver.cgesvdj_bufferSize solver = _cusolver.cgesvdj s_dtype = 'f' elif a.dtype == 'D': helper = _cusolver.zgesvdj_bufferSize solver = _cusolver.zgesvdj s_dtype = 'd' else: raise TypeError handle = _device.get_cusolver_handle() m, n = a.shape a = _cupy.array(a, order='F', copy=not overwrite_a) lda = m mn = min(m, n) s = _cupy.empty(mn, dtype=s_dtype) ldu = m ldv = n if compute_uv: jobz = _cusolver.CUSOLVER_EIG_MODE_VECTOR else: jobz = _cusolver.CUSOLVER_EIG_MODE_NOVECTOR full_matrices = False if full_matrices: econ = 0 u = _cupy.empty((ldu, m), dtype=a.dtype, order='F') v = _cupy.empty((ldv, n), dtype=a.dtype, order='F') else: econ = 1 u = _cupy.empty((ldu, mn), dtype=a.dtype, order='F') v = _cupy.empty((ldv, mn), dtype=a.dtype, order='F') params = _cusolver.createGesvdjInfo() lwork = helper(handle, jobz, econ, m, n, a.data.ptr, lda, s.data.ptr, u.data.ptr, ldu, v.data.ptr, ldv, params) work = _cupy.empty(lwork, dtype=a.dtype) info = _cupy.empty(1, dtype=_numpy.int32) solver(handle, jobz, econ, m, n, a.data.ptr, lda, s.data.ptr, u.data.ptr, ldu, v.data.ptr, ldv, work.data.ptr, lwork, info.data.ptr, params) _cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( gesvdj, info) _cusolver.destroyGesvdjInfo(params) if compute_uv: return u, s, v else: return s
def histogram(x, bins=10, range=None, weights=None, density=False): """Computes the histogram of a set of data. Args: x (cupy.ndarray): Input array. bins (int or cupy.ndarray): If ``bins`` is an int, it represents the number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it represents a bin edges. range (2-tuple of float, optional): The lower and upper range of the bins. If not provided, range is simply ``(a.min(), a.max())``. Values outside the range are ignored. The first element of the range must be less than or equal to the second. `range` affects the automatic bin computation as well. While bin width is computed to be optimal based on the actual data within `range`, the bin count will fill the entire range including portions containing no data. density (bool, optional): If False, the default, returns the number of samples in each bin. If True, returns the probability *density* function at the bin, ``bin_count / sample_count / bin_volume``. weights (cupy.ndarray, optional): An array of weights, of the same shape as `x`. Each value in `x` only contributes its associated weight towards the bin count (instead of 1). Returns: tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray` storing the values of the histogram, and ``bin_edges`` is a :class:`cupy.ndarray` storing the bin edges. .. warning:: This function may synchronize the device. .. seealso:: :func:`numpy.histogram` """ if x.dtype.kind == "c": # TODO(unno): comparison between complex numbers is not implemented raise NotImplementedError("complex number is not supported") if not isinstance(x, cupy.ndarray): raise ValueError("x must be a cupy.ndarray") x, weights = _ravel_and_check_weights(x, weights) bin_edges, uniform_bins = _get_bin_edges(x, bins, range) if weights is None: y = cupy.zeros(bin_edges.size - 1, dtype="l") _histogram_kernel(x, bin_edges, bin_edges.size, y) else: simple_weights = cupy.can_cast( weights.dtype, cupy.double ) or cupy.can_cast(weights.dtype, complex) if not simple_weights: # object dtype such as Decimal are supported in NumPy, but not here raise NotImplementedError( "only weights with dtype that can be cast to float or complex " "are supported" ) if weights.dtype.kind == "c": y = cupy.zeros(bin_edges.size - 1, dtype=complex) _weighted_histogram_kernel( x, bin_edges, bin_edges.size, weights.real, y.real ) _weighted_histogram_kernel( x, bin_edges, bin_edges.size, weights.imag, y.imag ) else: if weights.dtype.kind in "bui": y = cupy.zeros(bin_edges.size - 1, dtype=int) else: y = cupy.zeros(bin_edges.size - 1, dtype=float) _weighted_histogram_kernel(x, bin_edges, bin_edges.size, weights, y) if density: db = cupy.array(cupy.diff(bin_edges), float) return y / db / y.sum(), bin_edges return y, bin_edges
def strange_kw_func(self, foo): return make_result(foo, numpy.array(1), cupy.array(1))
def make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None, order='F', dtype='float32', _centroids=None, _informative_covariance=None, _redundant_covariance=None, _repeated_indices=None): """Generate a random n-class classification problem. This initially creates clusters of points normally distributed (std=1) about vertices of an ``n_informative``-dimensional hypercube with sides of length ``2*class_sep`` and assigns an equal number of clusters to each class. It introduces interdependence between these features and adds various types of further noise to the data. Without shuffling, ``X`` horizontally stacks features in the following order: the primary ``n_informative`` features, followed by ``n_redundant`` linear combinations of the informative features, followed by ``n_repeated`` duplicates, drawn randomly with replacement from the informative and redundant features. The remaining features are filled with random noise. Thus, without shuffling, all useful features are contained in the columns ``X[:, :n_informative + n_redundant + n_repeated]``. Examples -------- .. code-block:: python from cuml.datasets.classification import make_classification X, y = make_classification(n_samples=10, n_features=4, n_informative=2, n_classes=2) print("X:") print(X) print("y:") print(y) Output: .. code-block:: python X: [[-2.3249989 -0.8679415 -1.1511791 1.3525577 ] [ 2.2933831 1.3743551 0.63128835 -0.84648645] [ 1.6361488 -1.3233329 0.807027 -0.894092 ] [-1.0093077 -0.9990691 -0.00808992 0.00950443] [ 0.99803793 2.068382 0.49570698 -0.8462848 ] [-1.2750955 -0.9725835 -0.2390058 0.28081596] [-1.3635055 -0.9637669 -0.31582272 0.37106958] [ 1.1893625 2.227583 0.48750278 -0.8737561 ] [-0.05753583 -1.0939395 0.8188342 -0.9620734 ] [ 0.47910076 0.7648213 -0.17165393 0.26144698]] y: [0 1 0 0 1 0 0 1 0 1] Parameters ---------- n_samples : int, optional (default=100) The number of samples. n_features : int, optional (default=20) The total number of features. These comprise ``n_informative`` informative features, ``n_redundant`` redundant features, ``n_repeated`` duplicated features and ``n_features-n_informative-n_redundant-n_repeated`` useless features drawn at random. n_informative : int, optional (default=2) The number of informative features. Each class is composed of a number of gaussian clusters each located around the vertices of a hypercube in a subspace of dimension ``n_informative``. For each cluster, informative features are drawn independently from N(0, 1) and then randomly linearly combined within each cluster in order to add covariance. The clusters are then placed on the vertices of the hypercube. n_redundant : int, optional (default=2) The number of redundant features. These features are generated as random linear combinations of the informative features. n_repeated : int, optional (default=0) The number of duplicated features, drawn randomly from the informative and the redundant features. n_classes : int, optional (default=2) The number of classes (or labels) of the classification problem. n_clusters_per_class : int, optional (default=2) The number of clusters per class. weights : array-like of shape (n_classes,) or (n_classes - 1,),\ (default=None) The proportions of samples assigned to each class. If None, then classes are balanced. Note that if ``len(weights) == n_classes - 1``, then the last class weight is automatically inferred. More than ``n_samples`` samples may be returned if the sum of ``weights`` exceeds 1. flip_y : float, optional (default=0.01) The fraction of samples whose class is assigned randomly. Larger values introduce noise in the labels and make the classification task harder. class_sep : float, optional (default=1.0) The factor multiplying the hypercube size. Larger values spread out the clusters/classes and make the classification task easier. hypercube : boolean, optional (default=True) If True, the clusters are put on the vertices of a hypercube. If False, the clusters are put on the vertices of a random polytope. shift : float, array of shape [n_features] or None, optional (default=0.0) Shift features by the specified value. If None, then features are shifted by a random value drawn in [-class_sep, class_sep]. scale : float, array of shape [n_features] or None, optional (default=1.0) Multiply features by the specified value. If None, then features are scaled by a random value drawn in [1, 100]. Note that scaling happens after shifting. shuffle : boolean, optional (default=True) Shuffle the samples and the features. random_state : int, RandomState instance or None (default) Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary <random_state>`. order: str, optional (default='F') The order of the generated samples dtype : str, optional (default='float32') Dtype of the generated samples _centroids: array of centroids of shape (n_clusters, n_informative) _informative_covariance: array for covariance between informative features of shape (n_clusters, n_informative, n_informative) _redundant_covariance: array for covariance between redundant features of shape (n_informative, n_redundant) _repeated_indices: array of indices for the repeated features of shape (n_repeated, ) Returns ------- X : device array of shape [n_samples, n_features] The generated samples. y : device array of shape [n_samples] The integer labels for class membership of each sample. Notes ----- The algorithm is adapted from Guyon [1] and was designed to generate the "Madelon" dataset. How we optimized for GPUs: 1. Firstly, we generate X from a standard univariate instead of zeros. This saves memory as we don't need to generate univariates each time for each feature class (informative, repeated, etc.) while also providing the added speedup of generating a big matrix on GPU 2. We generate `order=F` construction. We exploit the fact that X is a generated from a univariate normal, and covariance is introduced with matrix multiplications. Which means, we can generate X as a 1D array and just reshape it to the desired order, which only updates the metadata and eliminates copies 3. Lastly, we also shuffle by construction. Centroid indices are permuted for each sample, and then we construct the data for each centroid. This shuffle works for both `order=C` and `order=F` and eliminates any need for secondary copies References ---------- .. [1] I. Guyon, "Design of experiments for the NIPS 2003 variable selection benchmark", 2003. """ generator = _create_rs_generator(random_state) np_seed = int(generator.randint(n_samples, size=1)) np.random.seed(np_seed) # Count features, clusters and samples if n_informative + n_redundant + n_repeated > n_features: raise ValueError("Number of informative, redundant and repeated " "features must sum to less than the number of total" " features") # Use log2 to avoid overflow errors if n_informative < np.log2(n_classes * n_clusters_per_class): msg = "n_classes({}) * n_clusters_per_class({}) must be" msg += " smaller or equal 2**n_informative({})={}" raise ValueError( msg.format(n_classes, n_clusters_per_class, n_informative, 2**n_informative)) if weights is not None: if len(weights) not in [n_classes, n_classes - 1]: raise ValueError("Weights specified but incompatible with number " "of classes.") if len(weights) == n_classes - 1: if isinstance(weights, list): weights = weights + [1.0 - sum(weights)] else: weights = np.resize(weights, n_classes) weights[-1] = 1.0 - sum(weights[:-1]) else: weights = [1.0 / n_classes] * n_classes n_clusters = n_classes * n_clusters_per_class # Distribute samples among clusters by weight n_samples_per_cluster = [ int(n_samples * weights[k % n_classes] / n_clusters_per_class) for k in range(n_clusters) ] for i in range(n_samples - sum(n_samples_per_cluster)): n_samples_per_cluster[i % n_clusters] += 1 # Initialize X and y X = generator.randn(n_samples * n_features, dtype=dtype) X = X.reshape((n_samples, n_features), order=order) y = cp.zeros(n_samples, dtype=np.int64) # Build the polytope whose vertices become cluster centroids if _centroids is None: centroids = cp.array( _generate_hypercube(n_clusters, n_informative, generator)).astype(dtype, copy=False) else: centroids = _centroids centroids *= 2 * class_sep centroids -= class_sep if not hypercube: centroids *= generator.rand(n_clusters, 1, dtype=dtype) centroids *= generator.rand(1, n_informative, dtype=dtype) # Create redundant features if n_redundant > 0: if _redundant_covariance is None: B = 2 * generator.rand(n_informative, n_redundant, dtype=dtype) - 1 else: B = _redundant_covariance # Create each cluster; a variant of make_blobs if shuffle: proba_samples_per_cluster = np.array(n_samples_per_cluster) / np.sum( n_samples_per_cluster) shuffled_sample_indices = cp.array( np.random.choice(n_clusters, n_samples, replace=True, p=proba_samples_per_cluster)) for k, centroid in enumerate(centroids): centroid_indices = cp.where(shuffled_sample_indices == k) y[centroid_indices[0]] = k % n_classes X_k = X[centroid_indices[0], :n_informative] if _informative_covariance is None: A = 2 * generator.rand( n_informative, n_informative, dtype=dtype) - 1 else: A = _informative_covariance[k] X_k = cp.dot(X_k, A) # NOTE: This could be done outside the loop, but a current # cupy bug does not allow that # https://github.com/cupy/cupy/issues/3284 if n_redundant > 0: X[centroid_indices[0], n_informative:n_informative + n_redundant] = cp.dot(X_k, B) X_k += centroid # shift the cluster to a vertex X[centroid_indices[0], :n_informative] = X_k else: stop = 0 for k, centroid in enumerate(centroids): start, stop = stop, stop + n_samples_per_cluster[k] y[start:stop] = k % n_classes # assign labels X_k = X[start:stop, :n_informative] # slice a view of the cluster if _informative_covariance is None: A = 2 * generator.rand( n_informative, n_informative, dtype=dtype) - 1 else: A = _informative_covariance[k] X_k = cp.dot(X_k, A) # introduce random covariance if n_redundant > 0: X[start:stop, n_informative:n_informative + n_redundant] = \ cp.dot(X_k, B) X_k += centroid # shift the cluster to a vertex X[start:stop, :n_informative] = X_k # Repeat some features if n_repeated > 0: n = n_informative + n_redundant if _repeated_indices is None: indices = ((n - 1) * generator.rand(n_repeated, dtype=dtype) + 0.5).astype(np.intp) else: indices = _repeated_indices X[:, n:n + n_repeated] = X[:, indices] # Randomly replace labels if flip_y >= 0.0: flip_mask = generator.rand(n_samples, dtype=dtype) < flip_y y[flip_mask] = generator.randint(n_classes, size=int(flip_mask.sum())) # Randomly shift and scale if shift is None: shift = (2 * generator.rand(n_features, dtype=dtype) - 1) * class_sep X += shift if scale is None: scale = 1 + 100 * generator.rand(n_features, dtype=dtype) X *= scale return X, y
def predict_dense(x): inplace_predt = booster.inplace_predict(x) d = xgb.DMatrix(x) copied_predt = cp.array(booster.predict(d)) return cp.all(copied_predt == inplace_predt)
def invalid_func(self, xp): return make_result(xp, numpy.array(1), cupy.array(2))
def nanpercentile(a, *args, **kwargs): """ For cupy v0.6.0 compatibility """ return cp.array(np.nanpercentile(cp.asnumpy(a), *args, **kwargs))
def invalid_func(self, xp): return make_result(xp, [numpy.array(1)], [cupy.array(2)])
def get_device_dmat(self): w = None if self.w is None else cp.array(self.w) X = cp.array(self.X, dtype=np.float32) y = cp.array(self.y, dtype=np.float32) return xgb.DeviceQuantileDMatrix(X, y, w)
def conv(x, w, b, count=1, x_nhwc=False, w_nhwc=False): y_shape = (bsize, ochan, ow, oh) d_layout = cudnn.CUDNN_TENSOR_NCHW w_layout = cudnn.CUDNN_TENSOR_NCHW if x_nhwc: d_layout = cudnn.CUDNN_TENSOR_NHWC x = np.transpose(x, (0, 2, 3, 1)) y_shape = (bsize, ow, oh, ochan) if w_nhwc: w_layout = cudnn.CUDNN_TENSOR_NHWC w = np.transpose(w, (0, 2, 3, 1)) x = cupy.array(x) w = cupy.array(w) b = cupy.array(b) y = cupy.ones(y_shape, dtype=x.dtype) times = [time.time()] for _ in range(count): cupy.cudnn.convolution_forward(x, w, b, y, (0, 0), (1, 1), (1, 1), 1, auto_tune=True, tensor_core='auto', d_layout=d_layout, w_layout=w_layout) cupy.cuda.device.Device().synchronize() times.append(time.time()) if count > 1: print('Elapsed:', (times[-1] - times[1]) / (count - 1)) y = chainer.cuda.to_cpu(y) if x_nhwc: y = np.transpose(y, (0, 3, 1, 2)) return y