def test_02_02_mask_invert(self): labels = np.zeros((10,15),int) labels[2:5,3:8] = 1 labels[5:8, 10:14] = 2 object_set = cpo.ObjectSet() objects = cpo.Objects() objects.segmented = labels object_set.add_objects(objects, OBJECTS_NAME) image_set_list = cpi.ImageSetList() image_set=image_set_list.get_image_set(0) np.random.seed(0) pixel_data = np.random.uniform(size=(10,15)).astype(np.float32) image_set.add(IMAGE_NAME, cpi.Image(pixel_data)) pipeline = cpp.Pipeline() module = M.MaskImage() module.source_choice.value = M.IO_OBJECTS module.object_name.value = OBJECTS_NAME module.image_name.value = IMAGE_NAME module.masked_image_name.value = MASKED_IMAGE_NAME module.invert_mask.value = True module.module_num = 1 workspace = cpw.Workspace(pipeline, module, image_set, object_set, cpmeas.Measurements(), image_set_list) module.run(workspace) masked_image = workspace.image_set.get_image(MASKED_IMAGE_NAME) self.assertTrue(isinstance(masked_image, cpi.Image)) self.assertTrue(np.all(masked_image.pixel_data[labels == 0] == pixel_data[labels == 0])) self.assertTrue(np.all(masked_image.pixel_data[labels > 0] == 0)) self.assertTrue(np.all(masked_image.mask == (labels == 0))) self.assertTrue(np.all(masked_image.masking_objects.segmented == labels))
def test_neg(vector_array): v = vector_array c = v.copy() cc = v.copy() c.scal(-1) assert np.all(almost_equal(c, -v)) assert np.all(almost_equal(v, cc))
def test_scal(vector_array): v = vector_array for ind in valid_inds(v): if v.len_ind(ind) != v.len_ind_unique(ind): with pytest.raises(Exception): c = v.copy() c[ind].scal(1.) continue ind_complement_ = ind_complement(v, ind) c = v.copy() c[ind].scal(1.) assert len(c) == len(v) assert np.all(almost_equal(c, v)) c = v.copy() c[ind].scal(0.) assert np.all(almost_equal(c[ind], v.zeros(v.len_ind(ind)))) assert np.all(almost_equal(c[ind_complement_], v[ind_complement_])) for x in (1., 1.4, np.random.random(v.len_ind(ind))): c = v.copy() c[ind].scal(x) assert np.all(almost_equal(c[ind_complement_], v[ind_complement_])) assert np.allclose(c[ind].sup_norm(), v[ind].sup_norm() * abs(x)) assert np.allclose(c[ind].l2_norm(), v[ind].l2_norm() * abs(x)) if hasattr(v, 'data'): y = v.data.copy() if NUMPY_INDEX_QUIRK and len(y) == 0: pass else: if isinstance(x, np.ndarray) and not isinstance(ind, Number): x = x[:, np.newaxis] y[ind] *= x assert np.allclose(c.data, y)
def test_03_03_color_mask(self): image_set_list = cpi.ImageSetList() image_set=image_set_list.get_image_set(0) np.random.seed(0) pixel_data = np.random.uniform(size=(10,15,3)).astype(np.float32) image_set.add(IMAGE_NAME, cpi.Image(pixel_data)) masking_image = np.random.uniform(size=(10,15)) image_set.add(MASKING_IMAGE_NAME, cpi.Image(masking_image)) expected_mask = masking_image > .5 pipeline = cpp.Pipeline() module = M.MaskImage() module.source_choice.value = M.IO_IMAGE module.object_name.value = OBJECTS_NAME module.image_name.value = IMAGE_NAME module.masking_image_name.value = MASKING_IMAGE_NAME module.masked_image_name.value = MASKED_IMAGE_NAME module.invert_mask.value = False module.module_num = 1 workspace = cpw.Workspace(pipeline, module, image_set, cpo.ObjectSet(), cpmeas.Measurements(), image_set_list) module.run(workspace) masked_image = workspace.image_set.get_image(MASKED_IMAGE_NAME) self.assertTrue(isinstance(masked_image, cpi.Image)) self.assertTrue(np.all(masked_image.pixel_data[expected_mask,:] == pixel_data[expected_mask,:])) self.assertTrue(np.all(masked_image.pixel_data[~expected_mask,:] == 0)) self.assertTrue(np.all(masked_image.mask == expected_mask)) self.assertFalse(masked_image.has_masking_objects)
def test_dofs(vector_array): v = vector_array np.random.seed(len(v) + 24 + v.dim) for ind in valid_inds(v): c = v.copy() dofs = c[ind].dofs(np.array([], dtype=np.int)) assert isinstance(dofs, np.ndarray) assert dofs.shape == (v.len_ind(ind), 0) c = v.copy() dofs = c[ind].dofs([]) assert isinstance(dofs, np.ndarray) assert dofs.shape == (v.len_ind(ind), 0) if v.dim > 0: for count in (1, 5, 10): c_ind = np.random.randint(0, v.dim, count) c = v.copy() dofs = c[ind].dofs(c_ind) assert dofs.shape == (v.len_ind(ind), count) c = v.copy() dofs2 = c[ind].dofs(list(c_ind)) assert np.all(dofs == dofs2) c = v.copy() c.scal(3.) dofs2 = c[ind].dofs(c_ind) assert np.allclose(dofs * 3, dofs2) c = v.copy() dofs2 = c[ind].dofs(np.hstack((c_ind, c_ind))) assert np.all(dofs2 == np.hstack((dofs, dofs))) if hasattr(v, 'data'): assert np.all(dofs == indexed(v.data, ind)[:, c_ind])
def lnpriorfn(self, x): if np.all(self.pmin < x) and np.all(self.pmax > x): return 0.0 else: return -np.inf return 0.0
def testLoadSave(self): """Plot with an image: test MaskToolsWidget operations""" self.plot.addImage(numpy.arange(1024**2).reshape(1024, 1024), legend='test') self.qapp.processEvents() # Draw a polygon mask toolButton = getQToolButtonFromAction(self.maskWidget.polygonAction) self.assertIsNot(toolButton, None) self.mouseClick(toolButton, qt.Qt.LeftButton) self._drawPolygon() ref_mask = self.maskWidget.getSelectionMask() self.assertFalse(numpy.all(numpy.equal(ref_mask, 0))) with temp_dir() as tmp: success = self.maskWidget.save( os.path.join(tmp, 'mask.npy'), 'npy') self.assertTrue(success) self.maskWidget.resetSelectionMask() self.assertTrue( numpy.all(numpy.equal(self.maskWidget.getSelectionMask(), 0))) result = self.maskWidget.load(os.path.join(tmp, 'mask.npy')) self.assertTrue(result) self.assertTrue(numpy.all(numpy.equal( self.maskWidget.getSelectionMask(), ref_mask)))
def test_2d_array_parameters_2d_array_input(self): """ When given an array input it must be broadcastable with all the parameters. """ t = TModel_1_2([[1, 2], [3, 4]], [[10, 20], [30, 40]], [[1000, 2000], [3000, 4000]]) y1, z1 = t([[100, 200], [300, 400]]) assert np.shape(y1) == np.shape(z1) == (2, 2) assert np.all(y1 == [[111, 222], [333, 444]]) assert np.all(z1 == [[1111, 2222], [3333, 4444]]) y2, z2 = t([[[[100]], [[200]]], [[[300]], [[400]]]]) assert np.shape(y2) == np.shape(z2) == (2, 2, 2, 2) assert np.all(y2 == [[[[111, 122], [133, 144]], [[211, 222], [233, 244]]], [[[311, 322], [333, 344]], [[411, 422], [433, 444]]]]) assert np.all(z2 == [[[[1111, 2122], [3133, 4144]], [[1211, 2222], [3233, 4244]]], [[[1311, 2322], [3333, 4344]], [[1411, 2422], [3433, 4444]]]]) with pytest.raises(ValueError): # Doesn't broadcast y3, z3 = t([[100, 200, 300], [400, 500, 600]])
def test_apply_mne_inverse_raw(): """Test MNE with precomputed inverse operator on Raw.""" start = 3 stop = 10 raw = read_raw_fif(fname_raw) label_lh = read_label(fname_label % 'Aud-lh') _, times = raw[0, start:stop] inverse_operator = read_inverse_operator(fname_full) inverse_operator = prepare_inverse_operator(inverse_operator, nave=1, lambda2=lambda2, method="dSPM") for pick_ori in [None, "normal", "vector"]: stc = apply_inverse_raw(raw, inverse_operator, lambda2, "dSPM", label=label_lh, start=start, stop=stop, nave=1, pick_ori=pick_ori, buffer_size=None, prepared=True) stc2 = apply_inverse_raw(raw, inverse_operator, lambda2, "dSPM", label=label_lh, start=start, stop=stop, nave=1, pick_ori=pick_ori, buffer_size=3, prepared=True) if pick_ori is None: assert_true(np.all(stc.data > 0)) assert_true(np.all(stc2.data > 0)) assert_true(stc.subject == 'sample') assert_true(stc2.subject == 'sample') assert_array_almost_equal(stc.times, times) assert_array_almost_equal(stc2.times, times) assert_array_almost_equal(stc.data, stc2.data)
def test_scalar_parameters_1d_array_input(self): """ The dimension of the input should match the number of models unless model_set_axis=False is given, in which case the input is copied across all models. """ t = TModel_1_1([1, 2], [10, 20], n_models=2) with pytest.raises(ValueError): y = t(np.arange(5) * 100) y1 = t([100, 200]) assert np.shape(y1) == (2,) assert np.all(y1 == [111, 222]) y2 = t([100, 200], model_set_axis=False) # In this case the value [100, 200, 300] should be evaluated on each # model rather than evaluating the first model with 100 and the second # model with 200 assert np.shape(y2) == (2, 2) assert np.all(y2 == [[111, 211], [122, 222]]) y3 = t([100, 200, 300], model_set_axis=False) assert np.shape(y3) == (2, 3) assert np.all(y3 == [[111, 211, 311], [122, 222, 322]])
def test_1d_array_parameters_1d_array_input(self): """ When the input is an array, if model_set_axis=False then it must broadcast with the shapes of the parameters (excluding the model_set_axis). Otherwise all dimensions must be broadcastable. """ t = TModel_1_1([[1, 2, 3], [4, 5, 6]], [[10, 20, 30], [40, 50, 60]], n_models=2) with pytest.raises(ValueError): y1 = t([100, 200, 300]) y1 = t([100, 200]) assert np.shape(y1) == (2, 3) assert np.all(y1 == [[111, 122, 133], [244, 255, 266]]) with pytest.raises(ValueError): # Doesn't broadcast with the shape of the parameters, (3,) y2 = t([100, 200], model_set_axis=False) y2 = t([100, 200, 300], model_set_axis=False) assert np.shape(y2) == (2, 3) assert np.all(y2 == [[111, 222, 333], [144, 255, 366]])
def testSetAllLayersInvisible( self ): tiling = Tiling((900,400), blockSize=100) tp = TileProvider(tiling, self.sims) tp.requestRefresh(QRectF(100,100,200,200)) tp.waitForTiles() tiles = tp.getTiles(QRectF(100,100,200,200)) for tile in tiles: aimg = byte_view(tile.qimg) self.assertTrue(np.all(aimg[:,:,0:3] == self.GRAY3)) self.assertTrue(np.all(aimg[:,:,3] == 255)) self.layer1.visible = False self.layer2.visible = False self.layer3.visible = False tp.requestRefresh(QRectF(100,100,200,200)) tp.waitForTiles() tiles = tp.getTiles(QRectF(100,100,200,200)) for tile in tiles: # If all tiles are invisible, then no tile is even rendered at all. assert tile.qimg is None self.layer1.visible = False self.layer2.visible = True self.layer2.opacity = 1.0 self.layer3.visible = False tp.requestRefresh(QRectF(100,100,200,200)) tp.waitForTiles() tiles = tp.getTiles(QRectF(100,100,200,200)) for tile in tiles: aimg = byte_view(tile.qimg) self.assertTrue(np.all(aimg[:,:,0:3] == self.GRAY2)) self.assertTrue(np.all(aimg[:,:,3] == 255))
def test_mean_std_12bit(self): # Input 12-bit, with an 8-bit color target input_scene = np.tile(np.arange(4096)[:, None, None], (1, 1, 3)) color_target = np.tile(np.arange(256)[:, None, None], (1, 1, 3)) luts = hm.mean_std_luts(input_scene.astype(np.uint16), color_target.astype(np.uint8)) np.testing.assert_array_equal(luts[0], luts[1]) np.testing.assert_array_equal(luts[1], luts[2]) lut = luts[0] assert np.all(lut[:8] == 0) assert np.all(lut[-8:] == 4096) assert np.diff(lut[8:-8]).min() == 1 assert np.diff(lut[8:-8]).max() == 2 # Input 12-bit, with a 12-bit color target input_scene = np.tile(np.arange(4096)[:, None, None], (1, 1, 3)) color_target = np.tile(np.arange(4096)[:, None, None], (1, 1, 3)) luts = hm.mean_std_luts(input_scene.astype(np.uint16), color_target.astype(np.uint16)) # Should be a 1 to 1 look-up-table... np.testing.assert_array_equal(luts[0], np.arange(4097))
def test_07_01_make_ijv_outlines(self): np.random.seed(70) x = cpo.Objects() ii, jj = np.mgrid[0:10, 0:20] masks = [(ii - ic) ** 2 + (jj - jc) ** 2 < r ** 2 for ic, jc, r in ((4, 5, 5), (4, 12, 5), (6, 8, 5))] i = np.hstack([ii[mask] for mask in masks]) j = np.hstack([jj[mask] for mask in masks]) v = np.hstack([[k + 1] * np.sum(mask) for k, mask in enumerate(masks)]) x.set_ijv(np.column_stack((i, j, v)), ii.shape) x.parent_image = cpi.Image(np.zeros((10, 20))) colors = np.random.uniform(size=(3, 3)).astype(np.float32) image = x.make_ijv_outlines(colors) i1 = [i for i, color in enumerate(colors) if np.all(color == image[0, 5, :])] self.assertEqual(len(i1), 1) i2 = [i for i, color in enumerate(colors) if np.all(color == image[0, 12, :])] self.assertEqual(len(i2), 1) i3 = [i for i, color in enumerate(colors) if np.all(color == image[-1, 8, :])] self.assertEqual(len(i3), 1) self.assertNotEqual(i1[0], i2[0]) self.assertNotEqual(i2[0], i3[0]) colors = colors[np.array([i1[0], i2[0], i3[0]])] outlines = np.zeros((10, 20, 3), np.float32) alpha = np.zeros((10, 20)) for i, (color, mask) in enumerate(zip(colors, masks)): my_outline = outline(mask) outlines[my_outline] += color alpha[my_outline] += 1 alpha[alpha == 0] = 1 outlines /= alpha[:, :, np.newaxis] np.testing.assert_almost_equal(outlines, image)
def test_01_04_size_color(self): secondary, mask = cpo.size_similarly(np.zeros((10, 20), int), np.zeros((10, 15, 3), np.float32)) self.assertEqual(tuple(secondary.shape), (10, 20, 3)) self.assertTrue(np.all(mask[:10, :15])) self.assertTrue(np.all(~mask[:10, 15:])) self.assertEqual(secondary.dtype, np.dtype(np.float32))
def test_no_bounds(self): x0 = np.zeros(3) h = np.ones(3) * 1e-2 inf_lower = np.empty_like(x0) inf_upper = np.empty_like(x0) inf_lower.fill(-np.inf) inf_upper.fill(np.inf) h_adjusted, one_sided = _adjust_scheme_to_bounds( x0, h, 1, '1-sided', inf_lower, inf_upper) assert_allclose(h_adjusted, h) assert_(np.all(one_sided)) h_adjusted, one_sided = _adjust_scheme_to_bounds( x0, h, 2, '1-sided', inf_lower, inf_upper) assert_allclose(h_adjusted, h) assert_(np.all(one_sided)) h_adjusted, one_sided = _adjust_scheme_to_bounds( x0, h, 1, '2-sided', inf_lower, inf_upper) assert_allclose(h_adjusted, h) assert_(np.all(~one_sided)) h_adjusted, one_sided = _adjust_scheme_to_bounds( x0, h, 2, '2-sided', inf_lower, inf_upper) assert_allclose(h_adjusted, h) assert_(np.all(~one_sided))
def test_06_05_ijv_three_overlapping(self): # # This is a regression test of a bug where a segmentation consists # of only one point, labeled three times yielding two planes instead # of three. # ijv = np.array([[4, 5, 1], [4, 5, 2], [4, 5, 3]]) x = cpo.Objects() x.set_ijv(ijv, (8, 9)) labels = [] indices = np.zeros(3, bool) for l, i in x.get_labels(): labels.append(l) self.assertEqual(len(i), 1) self.assertTrue(i[0] in (1, 2, 3)) indices[i[0] - 1] = True self.assertTrue(np.all(indices)) self.assertEqual(len(labels), 3) lstacked = np.dstack(labels) i, j, k = np.mgrid[0:lstacked.shape[0], 0:lstacked.shape[1], 0:lstacked.shape[2]] self.assertTrue(np.all(lstacked[(i != 4) | (j != 5)] == 0)) self.assertEqual((1, 2, 3), tuple(sorted(lstacked[4, 5, :])))
def test_material_functions(self): from sfepy.discrete import Material problem = self.problem conf = problem.conf ts = problem.get_default_ts(step=0) conf_mat1 = conf.get_item_by_name('materials', 'mf1') mat1 = Material.from_conf(conf_mat1, problem.functions) mat1.time_update(ts, None, mode='normal', problem=problem) coors = problem.domain.get_mesh_coors() assert_(nm.all(coors[:,0] == mat1.get_data(None, 'x_0'))) conf_mat2 = conf.get_item_by_name('materials', 'mf2') mat2 = Material.from_conf(conf_mat2, problem.functions) mat2.time_update(ts, None, mode='normal', problem=problem) assert_(nm.all(coors[:,1] == mat2.get_data(None, 'x_1'))) materials = problem.get_materials() materials.time_update(ts, problem.equations, mode='normal', problem=problem) mat3 = materials['mf3'] key = mat3.get_keys(region_name='Omega')[0] assert_(nm.all(mat3.get_data(key, 'a') == 10.0)) assert_(nm.all(mat3.get_data(key, 'b') == 2.0)) assert_(mat3.get_data(None, 'c') == 'ahoj') return True
def test_normalization(): """Test that `match_template` gives the correct normalization. Normalization gives 1 for a perfect match and -1 for an inverted-match. This test adds positive and negative squares to a zero-array and matches the array with a positive template. """ n = 5 N = 20 ipos, jpos = (2, 3) ineg, jneg = (12, 11) image = np.full((N, N), 0.5) image[ipos:ipos + n, jpos:jpos + n] = 1 image[ineg:ineg + n, jneg:jneg + n] = 0 # white square with a black border template = np.zeros((n + 2, n + 2)) template[1:1 + n, 1:1 + n] = 1 result = match_template(image, template) # get the max and min results. sorted_result = np.argsort(result.flat) iflat_min = sorted_result[0] iflat_max = sorted_result[-1] min_result = np.unravel_index(iflat_min, result.shape) max_result = np.unravel_index(iflat_max, result.shape) # shift result by 1 because of template border assert np.all((np.array(min_result) + 1) == (ineg, jneg)) assert np.all((np.array(max_result) + 1) == (ipos, jpos)) assert np.allclose(result.flat[iflat_min], -1) assert np.allclose(result.flat[iflat_max], 1)
def get_resampling_matrix(global_grid,local_grid): """Build the rectangular matrix that linearly resamples from the global grid to a local grid. The local grid range must be contained within the global grid range. Args: global_grid(numpy.ndarray): Sorted array of n global grid wavelengths. local_grid(numpy.ndarray): Sorted array of m local grid wavelengths. Returns: numpy.ndarray: Array of (m,n) matrix elements that perform the linear resampling. """ assert np.all(np.diff(global_grid) > 0),'Global grid is not strictly increasing.' assert np.all(np.diff(local_grid) > 0),'Local grid is not strictly increasing.' # Locate each local wavelength in the global grid. global_index = np.searchsorted(global_grid,local_grid) assert local_grid[0] >= global_grid[0],'Local grid extends below global grid.' assert local_grid[-1] <= global_grid[-1],'Local grid extends above global grid.' # Lookup the global-grid bracketing interval (xlo,xhi) for each local grid point. # Note that this gives xlo = global_grid[-1] if local_grid[0] == global_grid[0] # but this is fine since the coefficient of xlo will be zero. global_xhi = global_grid[global_index] global_xlo = global_grid[global_index-1] # Create the rectangular interpolation matrix to return. alpha = (local_grid - global_xlo)/(global_xhi - global_xlo) local_index = np.arange(len(local_grid),dtype=int) matrix = np.zeros((len(local_grid),len(global_grid))) matrix[local_index,global_index] = alpha matrix[local_index,global_index-1] = 1 - alpha return matrix
def max_err(self, g_pt, abs_tol, rel_tol): """Find the biggest error between g_pt and self.gf. What is measured is the violation of relative and absolute errors, wrt the provided tolerances (abs_tol, rel_tol). A value > 1 means both tolerances are exceeded. Return the argmax of min(abs_err / abs_tol, rel_err / rel_tol) over g_pt, as well as abs_err and rel_err at this point. """ pos = [] errs = [] abs_errs = [] rel_errs = [] abs_rel_errs = self.abs_rel_errors(g_pt) for abs_err, rel_err in abs_rel_errs: if not numpy.all(numpy.isfinite(abs_err)): raise ValueError('abs_err not finite', repr(abs_err)) if not numpy.all(numpy.isfinite(rel_err)): raise ValueError('rel_err not finite', repr(rel_err)) scaled_err = numpy.minimum(abs_err / abs_tol, rel_err / rel_tol) max_i = scaled_err.argmax() pos.append(max_i) errs.append(scaled_err.flatten()[max_i]) abs_errs.append(abs_err.flatten()[max_i]) rel_errs.append(rel_err.flatten()[max_i]) # max over the arrays in g_pt max_arg = numpy.argmax(errs) max_pos = pos[max_arg] return (max_arg, pos[max_arg], abs_errs[max_arg], rel_errs[max_arg])
def test_non_quantity_with_unit(self): """Test that unit attributes in objects get recognized.""" class MyQuantityLookalike(np.ndarray): pass a = np.arange(3.) mylookalike = a.copy().view(MyQuantityLookalike) mylookalike.unit = 'm' q1 = u.Quantity(mylookalike) assert isinstance(q1, u.Quantity) assert q1.unit is u.m assert np.all(q1.value == a) q2 = u.Quantity(mylookalike, u.mm) assert q2.unit is u.mm assert np.all(q2.value == 1000.*a) q3 = u.Quantity(mylookalike, copy=False) assert np.all(q3.value == mylookalike) q3[2] = 0 assert q3[2] == 0. assert mylookalike[2] == 0. mylookalike = a.copy().view(MyQuantityLookalike) mylookalike.unit = u.m q4 = u.Quantity(mylookalike, u.mm, copy=False) q4[2] = 0 assert q4[2] == 0. assert mylookalike[2] == 2. mylookalike.unit = 'nonsense' with pytest.raises(TypeError): u.Quantity(mylookalike)
def test_path_no_doubled_point_in_to_polygon(): hand = np.array( [[1.64516129, 1.16145833], [1.64516129, 1.59375], [1.35080645, 1.921875], [1.375, 2.18229167], [1.68548387, 1.9375], [1.60887097, 2.55208333], [1.68548387, 2.69791667], [1.76209677, 2.56770833], [1.83064516, 1.97395833], [1.89516129, 2.75], [1.9516129, 2.84895833], [2.01209677, 2.76041667], [1.99193548, 1.99479167], [2.11290323, 2.63020833], [2.2016129, 2.734375], [2.25403226, 2.60416667], [2.14919355, 1.953125], [2.30645161, 2.36979167], [2.39112903, 2.36979167], [2.41532258, 2.1875], [2.1733871, 1.703125], [2.07782258, 1.16666667]]) (r0, c0, r1, c1) = (1.0, 1.5, 2.1, 2.5) poly = Path(np.vstack((hand[:, 1], hand[:, 0])).T, closed=True) clip_rect = transforms.Bbox([[r0, c0], [r1, c1]]) poly_clipped = poly.clip_to_bbox(clip_rect).to_polygons()[0] assert np.all(poly_clipped[-2] != poly_clipped[-1]) assert np.all(poly_clipped[-1] == poly_clipped[0])
def test_pickle(): """Test that a module can be pickled""" M = Module() M.x = (T.dmatrix()) M.y = (T.dmatrix()) a = T.dmatrix() M.f = Method([a], a + M.x + M.y) M.g = Method([a], a * M.x * M.y) mode = get_mode() m = M.make(x=numpy.zeros((4,5)), y=numpy.ones((2,3)), mode=mode) m_dup = cPickle.loads(cPickle.dumps(m, protocol=-1)) assert numpy.all(m.x == m_dup.x) and numpy.all(m.y == m_dup.y) m_dup.x[0,0] = 3.142 assert m_dup.f.input_storage[1].data[0,0] == 3.142 assert m.x[0,0] == 0.0 #ensure that m is not aliased to m_dup #check that the unpickled version has the same argument/property aliasing assert m_dup.x is m_dup.f.input_storage[1].data assert m_dup.y is m_dup.f.input_storage[2].data assert m_dup.x is m_dup.g.input_storage[1].data assert m_dup.y is m_dup.g.input_storage[2].data
def test_tally_results(capi_run): t = openmc.capi.tallies[1] assert t.num_realizations == 5 assert np.all(t.mean >= 0) nonzero = (t.mean > 0.0) assert np.all(t.std_dev[nonzero] >= 0) assert np.all(t.ci_width()[nonzero] >= 1.95*t.std_dev[nonzero])
def pop_planes(geometry, kwargs): # Convert miller index specifications to normal vectors miller_defs = kwargs.pop("planes_miller", None) if miller_defs is not None: if np.any(np.all(abs(miller_defs[:,0:3]) < EPSILON, axis=1)): error("Emtpy miller index tuple") miller_defs[:,0:3] = miller_to_normal( np.dot(geometry.latvecs, geometry.bravais_cell), miller_defs[:,0:3]) else: miller_defs = np.zeros((0, 4), dtype=float) # Convert plane normal vector specifications into cartesian coords. normal_defs = kwargs.pop("planes_normal", None) if normal_defs is not None: normal_defs[:,0:3] = geometry.coord_transform( normal_defs[:,0:3], kwargs.pop("planes_normal_coordsys", "lattice")) if np.any(np.all(abs(normal_defs[:,0:3]) < EPSILON, axis=1)): error("Emtpy normal vector definition") else: normal_defs = np.zeros((0, 4), dtype=float) # Append two defintions planes_normal = np.vstack(( miller_defs, normal_defs )) return planes_normal
def test_rand(self): # Simple distributional checks for sparse.rand. for random_state in None, 4321, np.random.RandomState(): x = sprand(10, 20, density=0.5, dtype=np.float64, random_state=random_state) assert_(np.all(np.less_equal(0, x.data))) assert_(np.all(np.less_equal(x.data, 1)))
def __getitem__(self, key): if type(key) == slice: # if all in cache, then use slice, else don't start, stop, step = key.start, key.stop, key.step in_cache = self.existence_cache[start:stop:step] if np.all(in_cache): return self.cache[self.data_name][start:stop:step] elif np.all(np.logical_not(in_cache)): return self.__get_from_data_source(slice(start, stop, step)) key = slice_to_range(key, len(self)) if is_int_like(key): index = key if self.existence_cache[index]: return self.cache[self.data_name][index] else: return self.__get_from_data_source(index) if is_array_like(key): data = [] for index, in_cache in zip(key, self.existence_cache[key]): if in_cache: datum = self.cache[self.data_name][index] else: datum = self.__get_from_data_source(index) data.append(datum) return np.array(data) else: raise RuntimeError('key: {} is not compatible with this datasource'.format(str(key)))
def test_data_scaling(self): hdr = self.header_class() hdr.set_data_shape((1,2,3)) hdr.set_data_dtype(np.int16) S3 = BytesIO() data = np.arange(6, dtype=np.float64).reshape((1,2,3)) # This uses scaling hdr.data_to_fileobj(data, S3) data_back = hdr.data_from_fileobj(S3) # almost equal assert_array_almost_equal(data, data_back, 4) # But not quite assert_false(np.all(data == data_back)) # This is exactly the same call, just testing it works twice data_back2 = hdr.data_from_fileobj(S3) assert_array_equal(data_back, data_back2, 4) # Rescaling is the default hdr.data_to_fileobj(data, S3, rescale=True) data_back = hdr.data_from_fileobj(S3) assert_array_almost_equal(data, data_back, 4) assert_false(np.all(data == data_back)) # This doesn't use scaling, and so gets perfect precision hdr.data_to_fileobj(data, S3, rescale=False) data_back = hdr.data_from_fileobj(S3) assert_true(np.all(data == data_back))
def test_reset(Simulator, learning_rule, plt, seed, rng): """Make sure resetting learning rules resets all state.""" m, activity_p, trans_p = learning_net( learning_rule, nengo.Network(seed=seed), rng) sim = Simulator(m) sim.run(0.1) sim.run(0.2) first_t = sim.trange() first_t_trans = sim.trange(dt=0.01) first_activity_p = np.array(sim.data[activity_p], copy=True) first_trans_p = np.array(sim.data[trans_p], copy=True) sim.reset() sim.run(0.3) plt.subplot(2, 1, 1) plt.ylabel("Neural activity") plt.plot(first_t, first_activity_p, c='b') plt.plot(sim.trange(), sim.data[activity_p], c='g') plt.subplot(2, 1, 2) plt.ylabel("Connection weight") plt.plot(first_t_trans, first_trans_p[..., 0], c='b') plt.plot(sim.trange(dt=0.01), sim.data[trans_p][..., 0], c='g') assert np.all(sim.trange() == first_t) assert np.all(sim.trange(dt=0.01) == first_t_trans) assert np.all(sim.data[activity_p] == first_activity_p) assert np.all(sim.data[trans_p] == first_trans_p)
def boxplot_local_evaluation(metric="RMSE", paths=["../result/result_oracle/default-model/mode_test_.list", "../result/baselines/log/predictions_raw_RF.list"]): dic_measure = {"MAE":0, "MSE":1, "R2_S":2, "RRMSE":3, "RMSE":4, "MARE":5, "R2":6} data = [] data2 = [] data3 = [] for path in paths: print(path) if str.find(path, 'baselines') >= 0: _, y_true, y_pred = pickle.load(open(path, 'rb')) else: y_true, y_pred = pickle.load(open(path, 'rb')) y_true = np.array(y_true) y_pred = np.array(y_pred) max_value = np.max(y_true) min_value = np.min(y_true) steps = [[min_value+(50*i),min_value+(50*(i+1))] for i in range(0,int((max_value-min_value)/50)-4)] steps[len(steps)-1][1] = max_value+1 x = [] y = [] x2 = [] y2 = [] sd = [] mare = [] mare_sd = [] for step in steps: aux_true = y_true[np.all([step[0]<=y_true, y_true<step[1]], axis=0)] aux_pred = y_pred[np.all([step[0]<=y_true, y_true<step[1]], axis=0)] diff = aux_pred - aux_true y = y+diff.tolist() x = x+(["{0} - {1}".format(step[0],step[1])]*diff.shape[0]) x2 = x2+["{0} - {1}".format(step[0],step[1])] aux_mare = (np.abs(aux_true - aux_pred) / aux_pred)*100 mare = mare + [np.mean(aux_mare)] mare_sd = mare_sd + [np.std(aux_mare)] y2 = y2+[np.mean(np.abs(diff))] sd = sd+[np.std(np.abs(diff))] data.append([x,y]) data2.append([x2, y2, sd]) data3.append([x2, mare, mare_sd]) trace0 = go.Box(x=data[0][0], y=data[0][1], name='mode', marker=dict(color='#3D9970'), boxmean=True) trace1 = go.Box(x=data[1][0], y=data[1][1], name='baseline-RF', marker=dict(color='#FF851B'), boxmean=True) data = [trace0, trace1] layout = go.Layout(title="Difference between predicted and truth by range", yaxis=dict(title='Difference (predict-truth)', zeroline=False), boxmode='group') fig = go.Figure(data=data, layout=layout) plot(fig, filename='boxplot.html', auto_open=True) trace0 = go.Bar(x=data2[0][0], y=data2[0][1], name='mode', error_y=dict( type='data', array=data2[0][2], visible=True)) trace1 = go.Bar(x=data2[1][0], y=data2[1][1], name='baseline-RF', error_y=dict( type='data', array=data2[1][2], visible=True)) data = [trace0, trace1] layout = go.Layout(title="Absolute difference between predicted and truth by range", barmode='group', yaxis=dict(title="Abs Diff")) fig = go.Figure(data=data, layout=layout) plot(fig, filename = 'barplot-mare.html', auto_open=True) trace0 = go.Bar(x=data3[0][0], y=data3[0][1], name='mode', error_y=dict( type='data', array=data3[0][2], visible=True)) trace1 = go.Bar(x=data3[1][0], y=data3[1][1], name='baseline-RF', error_y=dict( type='data', array=data3[1][2], visible=True)) data = [trace0, trace1] layout = go.Layout(title="MARE measure by range", barmode='group', yaxis=dict(title="MARE")) fig = go.Figure(data=data, layout=layout) plot(fig, filename = 'barplot-diff.html', auto_open=True)
def test_gzip(filename): t_comp = read(os.path.join(ROOT, filename)) t_uncomp = read(os.path.join(ROOT, filename.replace('.gz', ''))) assert t_comp.dtype.names == t_uncomp.dtype.names assert np.all(t_comp.as_array() == t_uncomp.as_array())
def neighbor_mean_std(df, col_val, col_group, col_axis, axis_offset=None, radius=None, compute_mad=False): """Compute the neighbor mean and std of the residual matrix. Args: df (pd.DataFrame): Residual data frame. col_val ('str'): Name for column that store the residual. col_group ('str'): Name for column that store the group label. col_axis (list{str}): List of two axis column names. axis_offset (list{int} | None, optional): List of offset for each axis to make it suitable as numpy array. radius (list{int} | None, optional): List of the neighbor radius for each dimension. compute_mad (bool, optional): If compute_mad, also compute median absolute deviation. Returns: pd.DataFrame: Return the data frame with two extra columns contains neighbor mean and std. """ axis_offset = [0, 0] if axis_offset is None else axis_offset radius = [1, 1] if radius is None else radius assert col_val in df assert col_group in df assert len(col_axis) == 2 assert len(axis_offset) == 2 assert len(radius) == 2 assert all([col in df for col in col_axis]) assert all([isinstance(offset, int) for offset in axis_offset]) assert all([isinstance(r, int) for r in radius]) df_list = [ df[df[col_group] == group].reset_index() for group in df[col_group].unique() ] # separate dataset by groups for i, df_sub in enumerate(df_list): index = np.unique(np.asarray(df_sub[col_axis].values), axis=0).astype(int) new_df = pd.DataFrame({ 'group': df_sub[col_group].iloc[0], col_axis[0]: index[:, 0], col_axis[1]: index[:, 1], 'residual_mean': np.nan, 'residual_std': np.nan }) for j in index: print(j, end='\r') df_filter = df_sub.copy() for k, ax in enumerate(col_axis): rad = radius[k] ax_filter = np.abs(df_sub[col_axis[k]] - j[k]) <= rad df_filter = df_filter.loc[ax_filter] mean = df_filter[col_val].mean() std = df_filter[col_val].std() subset = np.all(new_df[col_axis] == j, axis=1).values new_df.loc[subset, 'residual_mean'] = mean new_df.loc[subset, 'residual_std'] = std df_list[i] = new_df return pd.concat(df_list)
def test_psd_from_freq_series(self): freq_data = np.array([1, 2, 3]) df = 0.1 psd = gwutils.psd_from_freq_series(freq_data, df) self.assertTrue(np.all(psd == (freq_data * 2 * df ** 0.5) ** 2))
def test_asd_from_freq_series(self): freq_data = np.array([1, 2, 3]) df = 0.1 asd = gwutils.asd_from_freq_series(freq_data, df) self.assertTrue(np.all(asd == freq_data * 2 * df ** 0.5))
def assert_correct_split_candidates(split_candidates, counts): assert isinstance(split_candidates, np.ndarray) assert split_candidates[0] == 0 assert split_candidates[-1] == len(counts) assert np.all( split_candidates[1:] > split_candidates[:-1]) # strictly ascending
def assert_correct_counts(counts): assert isinstance(counts, np.ndarray) assert counts.dtype == int assert np.all(counts >= 0) assert len(counts) > 0
def test_torchscript(tmpdir, csv_filename, should_load_model, model_type): ####### # Setup ####### dir_path = tmpdir data_csv_path = os.path.join(tmpdir, csv_filename) # Single sequence input, single category output input_features = [ binary_feature(), number_feature(), category_feature(vocab_size=3), ] if model_type == "ecd": image_dest_folder = os.path.join(tmpdir, "generated_images") audio_dest_folder = os.path.join(tmpdir, "generated_audio") input_features.extend([ sequence_feature(vocab_size=3), text_feature(vocab_size=3), vector_feature(), image_feature(image_dest_folder), audio_feature(audio_dest_folder), timeseries_feature(), date_feature(), date_feature(), h3_feature(), set_feature(vocab_size=3), bag_feature(vocab_size=3), ]) output_features = [ category_feature(vocab_size=3), ] if model_type == "ecd": output_features.extend([ binary_feature(), number_feature(), set_feature(vocab_size=3), vector_feature(), sequence_feature(vocab_size=3), text_feature(vocab_size=3), ]) predictions_column_name = "{}_predictions".format( output_features[0]["name"]) # Generate test data data_csv_path = generate_data(input_features, output_features, data_csv_path) ############# # Train model ############# backend = LocalTestBackend() config = { "model_type": model_type, "input_features": input_features, "output_features": output_features, } if model_type == "ecd": config[TRAINER] = {"epochs": 2} else: config[TRAINER] = {"num_boost_round": 2} ludwig_model = LudwigModel(config, backend=backend) ludwig_model.train( dataset=data_csv_path, skip_save_training_description=True, skip_save_training_statistics=True, skip_save_model=True, skip_save_progress=True, skip_save_log=True, skip_save_processed_input=True, ) ################### # save Ludwig model ################### ludwigmodel_path = os.path.join(dir_path, "ludwigmodel") shutil.rmtree(ludwigmodel_path, ignore_errors=True) ludwig_model.save(ludwigmodel_path) ################### # load Ludwig model ################### if should_load_model: ludwig_model = LudwigModel.load(ludwigmodel_path, backend=backend) ############################## # collect weight tensors names ############################## original_predictions_df, _ = ludwig_model.predict(dataset=data_csv_path) original_weights = deepcopy(list(ludwig_model.model.parameters())) original_weights = [t.cpu() for t in original_weights] # Move the model to CPU for tracing ludwig_model.model.cpu() ################# # save torchscript ################# torchscript_path = os.path.join(dir_path, "torchscript") shutil.rmtree(torchscript_path, ignore_errors=True) ludwig_model.model.save_torchscript(torchscript_path) ################################################### # load Ludwig model, obtain predictions and weights ################################################### ludwig_model = LudwigModel.load(ludwigmodel_path, backend=backend) loaded_prediction_df, _ = ludwig_model.predict(dataset=data_csv_path) loaded_weights = deepcopy(list(ludwig_model.model.parameters())) loaded_weights = [t.cpu() for t in loaded_weights] ##################################################### # restore torchscript, obtain predictions and weights ##################################################### training_set_metadata_json_fp = os.path.join(ludwigmodel_path, TRAIN_SET_METADATA_FILE_NAME) dataset, training_set_metadata = preprocess_for_prediction( ludwig_model.config, dataset=data_csv_path, training_set_metadata=training_set_metadata_json_fp, include_outputs=False, backend=backend, ) restored_model = torch.jit.load(torchscript_path) # Check the outputs for one of the features for correctness # Here we choose the first output feature (categorical) of_name = list(ludwig_model.model.output_features.keys())[0] data_to_predict = { name: torch.from_numpy(dataset.dataset[feature.proc_column]) for name, feature in ludwig_model.model.input_features.items() } # Get predictions from restored torchscript. logits = restored_model(data_to_predict) restored_predictions = torch.argmax( output_feature_utils.get_output_feature_tensor(logits, of_name, "logits"), -1) restored_predictions = [ training_set_metadata[of_name]["idx2str"][idx] for idx in restored_predictions ] restored_weights = deepcopy(list(restored_model.parameters())) restored_weights = [t.cpu() for t in restored_weights] ############################################### # Check if weights and predictions are the same ############################################### # Check to weight values match the original model. assert utils.is_all_close(original_weights, loaded_weights) assert utils.is_all_close(original_weights, restored_weights) # Check that predictions are identical to the original model. assert np.all(original_predictions_df[predictions_column_name] == loaded_prediction_df[predictions_column_name]) assert np.all(original_predictions_df[predictions_column_name] == restored_predictions)
def download_data(client=None, sta=None, start=UTCDateTime, end=UTCDateTime, stdata=[], ndval=nan, new_sr=0., verbose=False): """ Function to build a stream object for a seismogram in a given time window either by downloading data from the client object or alternatively first checking if the given data is already available locally. Note ---- Currently only supports NEZ Components! Parameters ---------- client : :class:`~obspy.client.fdsn.Client` Client object sta : Dict Station metadata from :mod:`~StDb` data base start : :class:`~obspy.core.utcdatetime.UTCDateTime` Start time for request end : :class:`~obspy.core.utcdatetime.UTCDateTime` End time for request stdata : List Station list ndval : float or nan Default value for missing data Returns ------- err : bool Boolean for error handling (`False` is associated with success) trN : :class:`~obspy.core.Trace` Trace of North component of motion trE : :class:`~obspy.core.Trace` Trace of East component of motion trZ : :class:`~obspy.core.Trace` Trace of Vertical component of motion """ from fnmatch import filter from obspy import read, Stream from os.path import dirname, join, exists from numpy import any from math import floor # Output print(("* {0:s}.{1:2s} - ZNE:".format(sta.station, sta.channel.upper()))) # Set Error Default to True erd = True # Check if there is local data if len(stdata) > 0: # Only a single day: Search for local data # Get Z localdata errZ, stZ = parse_localdata_for_comp(comp='Z', stdata=stdata, sta=sta, start=start, end=end, ndval=ndval) # Get N localdata errN, stN = parse_localdata_for_comp(comp='N', stdata=stdata, sta=sta, start=start, end=end, ndval=ndval) # Get E localdata errE, stE = parse_localdata_for_comp(comp='E', stdata=stdata, sta=sta, start=start, end=end, ndval=ndval) # Retreived Succesfully? erd = errZ or errN or errE if not erd: # Combine Data st = stZ + stN + stE # No local data? Request using client if erd: erd = False for loc in sta.location: tloc = loc # Construct location name if len(tloc) == 0: tloc = "--" # Construct Channel List channelsZNE = sta.channel.upper() + 'Z,' + sta.channel.upper() + \ 'N,' + sta.channel.upper() + 'E' print(("* {1:2s}[ZNE].{2:2s} - Checking Network".format( sta.station, sta.channel.upper(), tloc))) # Get waveforms, with extra 1 second to avoid # traces cropped too short - traces are trimmed later try: st = client.get_waveforms(network=sta.network, station=sta.station, location=loc, channel=channelsZNE, starttime=start, endtime=end + 1., attach_response=False) if len(st) == 3: print("* - ZNE Data Downloaded") # It's possible if len(st)==1 that data is Z12 else: # Construct Channel List channelsZ12 = sta.channel.upper() + 'Z,' + \ sta.channel.upper() + '1,' + \ sta.channel.upper() + '2' msg = "* {1:2s}[Z12].{2:2s} - Checking Network".format( sta.station, sta.channel.upper(), tloc) print(msg) try: st = client.get_waveforms(network=sta.network, station=sta.station, location=loc, channel=channelsZ12, starttime=start, endtime=end + 1., attach_response=False) if len(st) == 3: print("* - Z12 Data Downloaded") else: st = None except: st = None except: st = None # Break if we successfully obtained 3 components in st if not erd: break # Check the correct 3 components exist if st is None: print("* Error retrieving waveforms") print("**************************************************") return True, None # Three components successfully retrieved else: # Detrend and apply taper st.detrend('demean').detrend('linear').taper(max_percentage=0.05, max_length=5.) # Check start times if not np.all([tr.stats.starttime == start for tr in st]): print("* Start times are not all close to true start: ") [ print("* " + tr.stats.channel + " " + str(tr.stats.starttime) + " " + str(tr.stats.endtime)) for tr in st ] print("* True start: " + str(start)) print("* -> Shifting traces to true start") delay = [tr.stats.starttime - start for tr in st] st_shifted = Stream( traces=[traceshift(tr, dt) for tr, dt in zip(st, delay)]) st = st_shifted.copy() # Check sampling rate sr = st[0].stats.sampling_rate sr_round = float(floor_decimal(sr, 0)) if not sr == sr_round: print("* Sampling rate is not an integer value: ", sr) print("* -> Resampling") st.resample(sr_round, no_filter=False) # Try trimming try: st.trim(start, end) except: print("* Unable to trim") print("* -> Skipping") print("**************************************************") return True, None # Check final lengths - they should all be equal if start times # and sampling rates are all equal and traces have been trimmed if not np.allclose([tr.stats.npts for tr in st[1:]], st[0].stats.npts): print("* Lengths are incompatible: ") [print("* " + str(tr.stats.npts)) for tr in st] print("* -> Skipping") print("**************************************************") return True, None elif not np.allclose( [st[0].stats.npts], int((end - start) * sr), atol=1): print("* Length is too short: ") print("* " + str(st[0].stats.npts) + " ~= " + str(int((end - start) * sr))) print("* -> Skipping") print("**************************************************") return True, None else: print("* Waveforms Retrieved...") return False, st
def _cal_entropy(info): print(np.all(info / NUM_NODES)) return np.sum(np.multiply(info, np.log(info / NUM_NODES)))
def compute_pgv_contour_sequence_supplement(self): ''' Compute the supplement data representing the PGV sequence. ''' # Load the event metadata from the supplement file. meta = self.meta # Load the PGV data stream. pgv_stream = util.get_supplement_data(self.event_public_id, category = 'detectiondata', name = 'pgv', directory = self.supplement_dir) # Trim the stream. pgv_stream.trim(starttime = meta['start_time'] - 6, endtime = meta['end_time'] + 6, pad = True) inventory = self.project.inventory station_nsl = [('MSSNet', x.stats.station, x.stats.location) for x in pgv_stream] station_nsl = [':'.join(x) for x in station_nsl] stations = [inventory.get_station(nsl_string = x)[0] for x in station_nsl] times = pgv_stream[0].times("utcdatetime") data = np.array([x.data for x in pgv_stream]).transpose() detection_limits = meta['detection_limits'] sequence_df = None last_pgv_df = None last_krig_z = None no_change_cnt = 0 for k in range(len(times)): cur_time = times[k] self.logger.info("Computing frame {time}.".format(time = str(cur_time))) triggered = [] for cur_station in stations: if cur_station.nsl_string not in detection_limits.keys(): cur_trigger = False else: cur_detection_limit = detection_limits[cur_station.nsl_string] if cur_time >= cur_detection_limit[0] and cur_time <= cur_detection_limit[1]: cur_trigger = True else: cur_trigger = False triggered.append(cur_trigger) cur_points = [shapely.geometry.Point(x.x, x.y) for x in stations] cur_df = gpd.GeoDataFrame({'geom_vor': [shapely.geometry.Polygon([])] * len(stations), 'geom_stat': cur_points, 'time': [util.isoformat_tz(cur_time)] * len(stations), 'nsl': [x.nsl_string for x in stations], 'x': [x.x for x in stations], 'y': [x.y for x in stations], 'x_utm': [x.x_utm for x in stations], 'y_utm': [x.y_utm for x in stations], 'pgv': data[k, :], 'triggered': triggered}, crs = "epsg:4326", geometry = 'geom_stat') # Add the station amplification factors. self.add_station_amplification(cur_df) # Compute the corrected pgv values. cur_df['pgv_corr'] = cur_df.pgv / cur_df.sa # Use only the stations with a valid corrected pgv. cur_df = cur_df[cur_df['pgv_corr'].notna()] cur_df = cur_df.reset_index() # Update the pgv values to keep the event maximum pgv. # Track changes of the event maximum pgv. if last_pgv_df is not None: # Use the current PGV values only, if they are higher than # the last ones. # # Update the last_pgv_df with the current df. It is possible, that # rows are missing or new ones are available. # Remove the rows, that are not present in the cur_df. tmp_df = last_pgv_df[last_pgv_df.nsl.isin(cur_df.nsl)] # Add the rows, that are not present in the last_pgv_df. mask_df = tmp_df.append(cur_df[~cur_df.nsl.isin(last_pgv_df.nsl)], ignore_index = True) # Sort the two dataframes using the nsl. tmp_df = tmp_df.sort_values(by = 'nsl', ignore_index = True) mask_df = mask_df.sort_values(by = 'nsl', ignore_index = True) # Check for correct station snl. if (np.any(tmp_df['nsl'].values != mask_df['nsl'].values)): raise RuntimeError("The statin SNL codes of the two dataframes to compare are not equal.") # Reset the values for the stations, that already had a larger pgv value. mask = cur_df.pgv_corr < mask_df.pgv_corr cur_df.loc[mask, 'pgv_corr'] = mask_df.loc[mask, 'pgv_corr'] if np.all(mask): no_change_cnt += 1 else: no_change_cnt = 0 self.logger.info('no_change_cnt: ' + str(no_change_cnt)) # Exit if the was no change of the max event pgv data for some time. if no_change_cnt >= 5: self.logger.info('No change for some time, stop computation of contours.') break # Keep the last pgv dataframe. # Get the rows, that are not available in cur_df and keep them. if last_pgv_df is not None: tmp_df = last_pgv_df[~last_pgv_df.nsl.isin(cur_df.nsl)] last_pgv_df = cur_df.copy() last_pgv_df = last_pgv_df.append(tmp_df.copy(), ignore_index = True) else: last_pgv_df = cur_df.copy() # Interpolate to a regular grid using ordinary kriging. self.logger.info("Interpolate") krig_z, krig_sigmasq, grid_x, grid_y = util.compute_pgv_krigging(x = cur_df.x_utm.values, y = cur_df.y_utm.values, z = np.log10(cur_df.pgv_corr), nlags = 40, verbose = False, enable_plotting = False, weight = True) # Update the interpolated pgv values only if they are higher than the last ones. #if last_krig_z is not None: # cur_mask = krig_z < last_krig_z # krig_z[cur_mask] = last_krig_z[cur_mask] #last_krig_z = krig_z self.logger.info("Contours") # Compute the contours. intensity = np.arange(2, 8.1, 0.1) # Add lower and upper limits to catch all the data below or # above the desired intensity range. intensity = np.hstack([[-10], intensity, [20]]) # Use a low intensity_I_pgv value to make sure, that the lowest countour # level captures all PGV values. intensity_pgv = util.intensity_to_pgv(intensity = intensity, intensity_I_pgv = 1e-9) # Create and delete a figure to prevent pyplot from plotting the # contours. fig = plt.figure() ax = fig.add_subplot(111) cs = ax.contourf(grid_x, grid_y, krig_z, np.log10(intensity_pgv[:, 1])) contours = util.contourset_to_shapely(cs) fig.clear() plt.close(fig) del ax del fig del cs self.logger.info('dataframe') # Create a geodataframe of the contour polygons. cont_data = {'time': [], 'geometry': [], 'intensity': [], 'pgv': []} for cur_level, cur_poly in contours.items(): cur_intensity = util.pgv_to_intensity(pgv = [10**cur_level] * len(cur_poly)) cont_data['time'].extend([util.isoformat_tz(cur_time)] * len(cur_poly)) cont_data['geometry'].extend(cur_poly) cont_data['intensity'].extend(cur_intensity[:, 1].tolist()) cont_data['pgv'].extend([10**cur_level] * len(cur_poly)) cur_cont_df = gpd.GeoDataFrame(data = cont_data) # Convert the polygon coordinates to EPSG:4326. src_proj = pyproj.Proj(init = 'epsg:' + self.project.inventory.get_utm_epsg()[0][0]) dst_proj = pyproj.Proj(init = 'epsg:4326') cur_cont_df = util.reproject_polygons(df = cur_cont_df, src_proj = src_proj, dst_proj = dst_proj) # Clip to the network boundary. # Clipping a polygon may created multiple polygons. # Therefore create a new dataframe to have only one polygon per, # entry. Thus avoiding possible problems due to a mixture of # multipolygons and polygons. self.logger.info('Clipping.') cont_data = {'time': [], 'geometry': [], 'intensity': [], 'pgv': []} for cur_id, cur_row in cur_cont_df.iterrows(): cur_poly = cur_row.geometry clipped_poly = cur_poly.intersection(self.network_boundary.loc[0, 'geometry']) self.logger.info(type(clipped_poly)) if isinstance(clipped_poly, shapely.geometry.multipolygon.MultiPolygon): cont_data['time'].extend([cur_row.time] * len(clipped_poly)) cont_data['geometry'].extend([x for x in clipped_poly]) cont_data['intensity'].extend([cur_row.intensity] * len(clipped_poly)) cont_data['pgv'].extend([cur_row.pgv] * len(clipped_poly)) else: cont_data['time'].append(cur_row.time) cont_data['geometry'].append(clipped_poly) cont_data['intensity'].append(cur_row.intensity) cont_data['pgv'].append(cur_row.pgv) cur_cont_df = gpd.GeoDataFrame(data = cont_data) # Remove rows having an empty geometry. self.logger.info(cur_cont_df['geometry']) cur_cont_df = cur_cont_df[~cur_cont_df['geometry'].is_empty] self.logger.info(cur_cont_df['geometry']) self.logger.info('Appending to sequence.') # Add the dataframe to the sequence. if sequence_df is None: sequence_df = cur_cont_df else: sequence_df = sequence_df.append(cur_cont_df) # Get some event properties to add to the properties of the feature collections. props = {'db_id': meta['db_id'], 'event_start': util.isoformat_tz(meta['start_time']), 'event_end': util.isoformat_tz(meta['end_time']), 'sequence_start': min(sequence_df.time), 'sequence_end': max(sequence_df.time), 'author_uri': self.project.author_uri, 'agency_uri': self.project.agency_uri, 'station_correction_applied': True} # Write the voronoi dataframe to a geojson file. filepath = util.save_supplement(self.event_public_id, sequence_df, output_dir = self.supplement_dir, category = 'pgvsequence', name = 'pgvcontour', props = props) self.logger.info('Saved pgv contour sequence to file %s.', filepath)
def test_griffinlim_cqt( y_chirp, hop_length, window, use_length, over_sample, fmin, res_type, pad_mode, scale, momentum, init, random_state, dtype, ): if use_length: length = len(y_chirp) else: length = None sr = 22050 bins_per_octave = 12 * over_sample n_bins = 6 * bins_per_octave C = librosa.cqt( y_chirp, sr=sr, hop_length=hop_length, window=window, fmin=fmin, bins_per_octave=bins_per_octave, n_bins=n_bins, scale=scale, pad_mode=pad_mode, res_type=res_type, ) Cmag = np.abs(C) y_rec = librosa.griffinlim_cqt( Cmag, hop_length=hop_length, window=window, sr=sr, fmin=fmin, bins_per_octave=bins_per_octave, scale=scale, pad_mode=pad_mode, n_iter=2, momentum=momentum, random_state=random_state, length=length, res_type=res_type, init=init, dtype=dtype, ) y_inv = librosa.icqt( Cmag, sr=sr, fmin=fmin, hop_length=hop_length, window=window, bins_per_octave=bins_per_octave, scale=scale, length=length, res_type=res_type, ) # First check for length if use_length: assert len(y_rec) == length assert y_rec.dtype == dtype # Check that the data is okay assert np.all(np.isfinite(y_rec))
def test_no_other(self) : """omit "other" land classification types""" v = [ 1, 4, 6,7 ] expected = [0,2,4,4] result = ba.landcover_classification(v) self.assertTrue(np.all( expected==result))
def is_2Dlistlike(x): return np.all([is_listlike(xi) for xi in x])
def _check_bg_stats(stats): # Check that bg mean and std are close assert np.all((stats[:, :, 0] - bg_mean) ** 2 < 3 ** 2) assert np.all((stats[:, :, 1] - bg_std) ** 2 < 2 ** 2)
def test_basic(self) : """ not too hard """ v = [ 1, 4, 6,7, 11, 16 ] expected = [0, 2, 4, 6] result = ba.landcover_classification(v) self.assertTrue(np.all( expected==result))
def it_is_robust_to_different_image_sizes(): cy_ims, true_aln_offsets = _ims(mea=128) pred_aln_offsets, aln_scores = worker._align(cy_ims) assert np.all(true_aln_offsets == pred_aln_offsets)
def test_no_nonforest(self) : """omit nonforested landcover types""" v = [ 1, 4, 11, 16 ] expected = [0,2,2,4] result = ba.landcover_classification(v) self.assertTrue(np.all( expected==result))
def it_handles_zeros(): psfs = np.zeros((2, 2, 4, 4)) got = worker._psf_normalize(psfs) assert np.all(got == 0.0)
def it_is_robust_to_different_peak_sizes(): cy_ims, true_aln_offsets = _ims(std=3.0) pred_aln_offsets, aln_scores = worker._align(cy_ims) assert np.all(true_aln_offsets == pred_aln_offsets)
def it_handles_all_zeros(): _, calib = _setup(1.0) all_zeros = np.zeros((2, 4, 512, 512)) bal_ims = worker._regional_balance_chcy_ims(all_zeros, calib) assert np.all(np.abs(bal_ims - (0 - 100) * 1) < 1.0)
def it_removes_the_noise_floor(): cy_ims, true_aln_offsets = _ims() pred_aln_offsets, aln_scores = worker._align(cy_ims) assert np.all(true_aln_offsets == pred_aln_offsets)
d = safe_eval(header) except SyntaxError, e: msg = "Cannot parse header: %r\nException: %r" raise ValueError(msg % (header, e)) if not isinstance(d, dict): msg = "Header is not a dictionary: %r" raise ValueError(msg % d) keys = d.keys() keys.sort() if keys != ['descr', 'fortran_order', 'shape']: msg = "Header does not contain the correct keys: %r" raise ValueError(msg % (keys, )) # Sanity-check the values. if (not isinstance(d['shape'], tuple) or not numpy.all([isinstance(x, (int, long)) for x in d['shape']])): msg = "shape is not valid: %r" raise ValueError(msg % (d['shape'], )) if not isinstance(d['fortran_order'], bool): msg = "fortran_order is not a valid bool: %r" raise ValueError(msg % (d['fortran_order'], )) try: dtype = numpy.dtype(d['descr']) except TypeError, e: msg = "descr is not a valid dtype descriptor: %r" raise ValueError(msg % (d['descr'], )) return d['shape'], d['fortran_order'], dtype def write_array(fp, array, version=(1, 0)):
def it_normalizes_4_dim(): psfs = np.ones((2, 2, 4, 4)) got = worker._psf_normalize(psfs) assert got.shape == (2, 2, 4, 4) and np.all(got == 1.0 / 16.0)
def test_slice_tails(): profiler = Profile().from_tuples(PROFILER).resample_x(0.1) lt_tail, rt_tail = profiler.slice_tails() assert np.all(lt_tail.x < min(rt_tail.x)) assert np.all(rt_tail.x > max(lt_tail.x))
def test_ci_report_with_ndigits(confidence_interval, ndigits): """Verify output of CI report when specifiying ndigits.""" report_split = ci_report(confidence_interval, ndigits=ndigits).split('\n') period_values = [val for val in report_split[2].split()[2:]] length = [len(val.split('.')[-1]) for val in period_values] assert np.all(np.equal(length, ndigits))
def loadData(self, input_dir, name_run, script_dir, data_type, data_type_lo, delTmax, delTmin, tau, tfa_bool, timehorizon, percent_LO_points, num_ets_lo, time_step, thres_coeff_var, prior_type, prior_file): str_output = "" uniq_dups = [] np.random.seed(self.rnd_seed) pps = Preprocess(self.rnd_seed) pps.delTmax = delTmax pps.delTmin = delTmin pps.tau = tau pps.input_dir = input_dir pps.str_output = str_output pps.flag_print = self.flag_print pps.priors_file = prior_file #IF CONDITIONS HAVE DUPLICATED NAMES, PRINT A META DATA FILE CALLED "meta_data_uniq.tsv" with only unique conds metadata_1 = pps.input_dataframe(pps.meta_data_file, has_index=False, strict=False) num_dups_conds = len( metadata_1.condName[metadata_1.condName.duplicated(keep=False)]) if num_dups_conds > 0: uniq_dups = (metadata_1.condName[metadata_1.condName.duplicated( keep=False)]).unique() num_uniq_dups = len(uniq_dups) if self.flag_print: print("name of duplicated conds in meta data: ", num_dups_conds) print("number of unique in dups conds", num_uniq_dups) metadata_1.set_index(['condName'], inplace=True) metadata_1_series = metadata_1.groupby(level=0).cumcount() metadata_1_series = "repet" + metadata_1_series.astype(str) metadata_1.index = metadata_1.index + metadata_1_series.replace( 'repet0', '') #metadata_1.index = metadata_1.index + "_dup_"+ metadata_1.groupby(level=0).cumcount().astype(str).replace('0','') #The following code is to fix names of prevCol for duplicated conditions metadata_copy = metadata_1.copy() name_prev_cond = np.nan count = 0 for index, row in (metadata_1[metadata_1.isTs == True]).iterrows(): if (row['is1stLast'] == 'm') or (row['is1stLast'] == 'l'): if row['prevCol'] != name_prev_cond: if self.flag_print: print(index, row) metadata_copy.at[index, 'prevCol'] = name_prev_cond count = count + 1 name_prev_cond = index if self.flag_print: print(count) if count != num_dups_conds - num_uniq_dups: raise ValueError('Wrong meta data format') #metadata_copy.drop(['Unnamed: 0'], axis=1, inplace=True) metadata_copy.reset_index(inplace=True) metadata_copy.columns = [ 'condName', 'isTs', 'is1stLast', 'prevCol', 'del.t' ] cols = ['isTs', 'is1stLast', 'prevCol', 'del.t', 'condName'] metadata_copy = metadata_copy[cols] pps.meta_data_file = "meta_data_uniq.tsv" path_file = pps.input_path(pps.meta_data_file) # metadata_copy.is1stLast = '"' + metadata_copy.is1stLast + '"' # metadata_copy.prevCol = '"' + metadata_copy.prevCol + '"' # metadata_copy.condName = '"' + metadata_copy.condName + '"' # metadata_copy.columns = ['"isTs"', '"is1stLast"', '"prevCol"', '"del.t"', '"condName"'] metadata_copy.to_csv(path_file, sep="\t", index=False, na_rep='NA') #, quoting=csv.QUOTE_NONE) #Add to expression file duplicated conds, this is important for how the leave-out section is implemented expression_1 = pps.input_dataframe(pps.expression_matrix_file, has_index=False, strict=False) count = 0 for ud in uniq_dups: pattern = re.compile(ud + "repet" + "\d") for cond_tmp in metadata_copy.condName: if pattern.match(cond_tmp): expression_1[cond_tmp] = expression_1[ud] count = count + 1 if count != num_dups_conds - num_uniq_dups: raise ValueError('Wrong expression/meta_data format') col_arr = (np.asarray(expression_1.columns[1:])) expression_1.columns = np.insert(col_arr, 0, "") pps.expression_matrix_file = "expression_new.tsv" path_file = pps.input_path(pps.expression_matrix_file) expression_1.to_csv(path_file, sep="\t", index=False, na_rep='NA') #, quoting=csv.QUOTE_NONE) #END CODE FOR PRINTING NEW UNIQUE META DATA FILE AND NEW EXPRESSION FILE str_output = pps.get_data(thres_coeff_var, str_output, prior_type) pps.compute_common_data(uniq_dups, time_step) #CODE FOR LEAVE OUT DATA TS_vectors, steady_state_cond, index_steady_state, num_total_timeseries_points = self.readDatasetFromMetaDataFile( pps.meta_data) #Parse data to dynGenie3 format in case parse_4dyng3 is set to "True" # print pps.expression_matrix.head() # print pps.expression_matrix.index.tolist() # print pps.expression_matrix.loc["G1", :] if self.parse_4dyng3: #(TS_data,time_points,genes,TFs,alphas) # import sys # reload(sys) # sys.setdefaultencoding('utf8') print("Start parsing data to dynGenie3 format") TS_data = list() time_points = list() genes = pps.expression_matrix.index.tolist() genes = np.asarray(genes).astype(str) genes = genes.tolist() num_gene_names = len(genes) alphas = [0.02] * num_gene_names alphas = np.asarray(alphas).astype(float) alphas = alphas.tolist() for ts_tmp in TS_vectors: #for loop over a single timeseries ts_tmp_vect = list(ts_tmp.keys()) num_time_points_intstmp = len(ts_tmp_vect) ts_dynGenie3 = np.zeros( (num_time_points_intstmp, num_gene_names)) ts_dynGenie3 = np.transpose( pps.expression_matrix.loc[:, ts_tmp_vect]) TS_data.append(np.asarray(ts_dynGenie3)) time_points_i = np.zeros(num_time_points_intstmp) for j, key in enumerate(ts_tmp_vect): time_points_i[j] = np.float(ts_tmp[key]) time_points.append(time_points_i) # print TS_data # print type(TS_data[1]) SS_data = np.transpose(pps.expression_matrix[steady_state_cond]) #(TS_data,time_points,genes,TFs,alphas) TFs = np.asarray(pps.tf_names).astype(str) TFs = TFs.tolist() TS_data_file = "TS_data.pkl" path_file = pps.input_path(TS_data_file) with open(path_file, 'wb') as f: pickle.dump([TS_data, time_points, genes, TFs, alphas], f) # cPickle.dump(TS_data, f) # print type(TS_data) # cPickle.dump(time_points, f) # print type(time_points) # cPickle.dump(alphas, f) # print type(alphas) # cPickle.dump(genes, f) # print type(genes) f.close() # with open(output_path_estimators+'/Gene'+str(output_idx), 'rb') as f: # treeEstimator = cPickle.load(f) SS_data_file = "SS_data.txt" path_file = pps.input_path(SS_data_file) SS_data.to_csv(path_file, sep="\t", index=False, na_rep='NA') print("End parsing data to dynGenie3 format") # # #END parse data to dynGenie3 format #Debug # pps.design.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_design.txt", sep="\t") # pps.response.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_response.txt", sep="\t") # pps.meta_data.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_meta_data.txt", sep="\t") if data_type == "TS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "TS")): if num_ets_lo > 0: ts_lopoints_x, ts_lopoints_y, timeseries_indices_lo = self.choose_LO_timeseries_random_withTimehorizon( num_ets_lo, TS_vectors, timehorizon) else: ts_lopoints_x, ts_lopoints_y, t0_lopoints, timeseries_indices_lo = self.choose_timeseries_LO_lastPoints_random_withTimehorizon( percent_LO_points, num_total_timeseries_points, TS_vectors, timehorizon) if data_type == "SS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "SS")): ss_lo_cond_names = list() ss_lo_cond_names = np.asarray(ss_lo_cond_names) ss_lo_indices = list() ss_lo_indices = np.asarray(ss_lo_indices) if len(steady_state_cond) > 0: ss_lo_cond_names, ss_lo_indices = self.choose_steadystate_LO_points_random( percent_LO_points, steady_state_cond) #Debug # print "num_total_timeseries_points", num_total_timeseries_points # print "len(ss_lo_cond_names)", len(steady_state_cond) # print "len(pps.meta_data)", len(pps.meta_data) #TS_vectors, steady_state_cond, index_steady_state, num_total_timeseries_points # TS_vectors [OrderedDict([('S0_1', 0), # ('S1_1', 60.0), # ('S2_1', 120.0), # ('S3_1', 180.0), # ('S4_1', 240.0), # ('S5_1', 300.0), # ('S6_1', 360.0)]), # OrderedDict([('S0_2', 0), # ('S1_2', 60.0), # ('S2_2', 120.0), # ('S3_2', 180.0), # ('S4_2', 240.0), # ('S5_2', 300.0), # ('S6_2', 360.0)]),......] # steady_state_cond # array(['LBexp_1', 'LBexp_2', 'LBexp_3',....] # index_steady_state # array([163, 164, 165, 166, 167,....] # num_total_timeseries_points # 163 #Leave-out Time-series points #ts_lopoints_x, ts_lopoints_y, timeseries_indices_lo # timeseries_indices_lo left out # array([31, 15, 26, 17]) # ts_lopoints_x, ts_lopoints_y # OrderedDict([('MG+90_2', 95.0), ('SMM_1', 0), ('dia5_3', 5.0), ('SMM_3', 0)]) # OrderedDict([('MG+120_2', 125.0), ('Salt_1', 10.0), ('dia15_3', 15.0), ('Salt_3', 10.0)]) #Leave-out Steady state points #ss_lo_cond_names, ss_lo_indices # array(['H2O2_1', 'LBGexp_2', 'LBtran_2', ....] # array([100, 10, 4, 81, 97, 65, ... ] if self.flag_print: print("Shape of design var before leaving-out data: ", str(pps.design.shape)) print("Shape of response var before leaving-out data: ", str(pps.response.shape)) str_output = str_output + "Shape of design var before leaving-out data: " + str( pps.design.shape) + "\n" str_output = str_output + "Shape of response var before leaving-out data: " + str( pps.response.shape) + "\n" #Debug # w = csv.writer(open("ts_lopoints_x.csv", "w")) # for key, val in ts_lopoints_x.items(): # w.writerow([key, val]) # pps.design.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_design.txt", sep="\t") # pps.response.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_response.txt", sep="\t") #Before splitting the dataset in training and test, check if want to learn on SS only or TS only if data_type == "SS": str_output = str_output + "::::::::STEADY-STATE ONLY - LOOK AT JUST THE SHAPES OF DESIGN AND RESPONSE VARIABLES" + "\n" only_steady_state_indxes = ( pps.design.columns.isin(steady_state_cond)) pps.design = pps.design.loc[:, only_steady_state_indxes] #, axis=1, inplace=True) pps.response = pps.response.loc[:, only_steady_state_indxes] #, axis=1, inplace=True) pps.half_tau_response = pps.half_tau_response.loc[:, only_steady_state_indxes] pps.delta_vect = pps.delta_vect.loc[:, ( pps.delta_vect.columns.isin(steady_state_cond) )] #, axis=1, inplace=True) if data_type == "TS": str_output = str_output + "::::::::TIME-SERIES ONLY - LOOK AT JUST THE SHAPES OF DESIGN AND RESPONSE VARIABLES" + "\n" pps.design.drop(steady_state_cond, axis=1, inplace=True) pps.response.drop(steady_state_cond, axis=1, inplace=True) pps.half_tau_response.drop(steady_state_cond, axis=1, inplace=True) pps.delta_vect.drop(steady_state_cond, axis=1, inplace=True) # print "Shape of design design before splitting: "+str(pps.design.shape) # print "Shape of response response before splitting: "+str(pps.response.shape) # # design_tmp = pps.design # tfs_tmp = list(set(pps.tf_names).intersection(pps.expression_matrix.index)) # X_tmp = np.asarray(design_tmp.loc[tfs_tmp,:].values) # X_tmp = (X_tmp - (X_tmp.mean(axis=1)).reshape(-1,1)) / (X_tmp.std(axis=1)).reshape(-1,1) # design_tmp_2 = pd.DataFrame(X_tmp ,index = tfs_tmp, columns = design_tmp.columns) # pps.design = design_tmp_2 # # print "Shape of design after normalization/standardization: ", pps.design.shape # # response_tmp = pps.response # Y_tmp = np.asarray(response_tmp.values) # Y_tmp = (Y_tmp - (Y_tmp.mean(axis=1)).reshape(-1,1)) / (Y_tmp.std(axis=1)).reshape(-1,1) # response_tmp_2 = pd.DataFrame(Y_tmp ,index = response_tmp.index, columns = response_tmp.columns) # pps.response = response_tmp_2 # # print "Shape of response after normalization/standardization: ", pps.response.shape if data_type == "SS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "SS")): #Leaving out Steady state points pps.leave_out_ss_design = pps.design[ss_lo_cond_names] pps.design.drop(ss_lo_cond_names, axis=1, inplace=True) pps.leave_out_ss_response = pps.response[ss_lo_cond_names] pps.response.drop(ss_lo_cond_names, axis=1, inplace=True) pps.half_tau_response.drop(ss_lo_cond_names, axis=1, inplace=True) if self.flag_print: print("Shape of leave out SS design var: ", pps.leave_out_ss_design.shape) print("Shape of leave out SS response var: ", pps.leave_out_ss_response.shape) pps.delta_vect.drop(ss_lo_cond_names, axis=1, inplace=True) if data_type == "TS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "TS")): #Leaving out Time series points pps.leave_out_ts_design = pps.design[list(ts_lopoints_x.keys())] pps.design.drop(list(ts_lopoints_x.keys()), axis=1, inplace=True) pps.leave_out_ts_response = pps.response[list( ts_lopoints_x.keys())] pps.response.drop(list(ts_lopoints_x.keys()), axis=1, inplace=True) pps.half_tau_response.drop(list(ts_lopoints_x.keys()), axis=1, inplace=True) if self.flag_print: print("Shape of leave out TS design var: ", pps.leave_out_ts_design.shape) print("Shape of leave out TS response var: ", pps.leave_out_ts_response.shape) pps.delta_vect.drop(list(ts_lopoints_x.keys()), axis=1, inplace=True) if self.flag_print: print("Shape of design var after leaving-out data: ", pps.design.shape) print("Shape of response var after leaving-out data: ", pps.response.shape) str_output = str_output + "Shape of design var after leaving-out data: " + str( pps.design.shape) + "\n" str_output = str_output + "Shape of response var after leaving-out data: " + str( pps.response.shape) + "\n" if data_type == "SS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "SS")): str_output = str_output + "Shape of leave out SS design var: " + str( pps.leave_out_ss_design.shape) + "\n" str_output = str_output + "Shape of leave out SS response var: " + str( pps.leave_out_ss_response.shape) + "\n" if data_type == "TS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "TS")): str_output = str_output + "Shape of leave out TS design var: " + str( pps.leave_out_ts_design.shape) + "\n" str_output = str_output + "Shape of leave out TS response var: " + str( pps.leave_out_ts_response.shape) + "\n" #END CODE FOR LEAVE OUT DATA if data_type == "SS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "SS")): steady_state_cond_new = list(steady_state_cond.copy()) for element in ss_lo_cond_names: steady_state_cond_new.remove(element) else: steady_state_cond_new = steady_state_cond index_steady_state_new = [] indexes_all = list(range(0, len(pps.design.columns))) delta_vect = list() #Debug #print len(indexes_all) if data_type == "SS" or data_type == "TS-SS": for element in steady_state_cond_new: index_steady_state_new.append( pps.design.columns.get_loc(element)) index_steady_state_new = np.asarray(index_steady_state_new) index_time_points_new = [] if data_type == "TS" or data_type == "TS-SS": index_time_points_new = set(indexes_all) - set( index_steady_state_new) index_time_points_new = np.asarray(list(index_time_points_new)) #Debug #print len(index_time_points_new) #print len(index_steady_state_new) #Debug # print "pps.priors_data.shape", pps.priors_data.shape # print "len(pps.priors_data.abs().sum(axis=0))", len(pps.priors_data.abs().sum(axis=0)) # print "len(pps.priors_data.abs().sum(axis=0))", len(pps.priors_data.abs().sum(axis=1)) # print "len(pps.priors_data.sum(axis=0))", len(pps.priors_data.sum(axis=0)) # print "type(np.abs(pps.priors_data))", type(np.abs(pps.priors_data)) # pps.priors_data.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_ppspriors_data.txt", sep="\t") # pps.gold_standard.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_ppsgold_standard.txt", sep="\t") # print type(pps.gold_standard) # pps.design.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_design.txt", sep="\t") # pps.response.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_response.txt", sep="\t") if prior_type == "binary_all": num_edges_prior = np.sum(pps.priors_data.values != 0) num_edges_gs = np.sum(pps.gold_standard.values != 0) if self.flag_print: if prior_type == "binary_all": print("Number of edges in the prior: ", num_edges_prior, pps.priors_data.shape) print( "Number of edges in the evaluation part of the gold standard: ", num_edges_gs, pps.gold_standard.shape) if prior_type == "binary_all": str_output = str_output + "Number of edges in the prior: " + str( num_edges_prior) + str(pps.priors_data.shape) + "\n" str_output = str_output + "Number of edges in the evaluation part of the gold standard: " + str( num_edges_gs) + str(pps.gold_standard.shape) + "\n" # print "pps.activity.shape", pps.activity.shape # print pps.expression_matrix.shape # print len(pps.tf_names) # print pps.gold_standard.shape # print pps.response.shape if tfa_bool: #compute_activity() # """ # Compute Transcription Factor Activity # """ if self.flag_print: print('Computing Transcription Factor Activity ... ') tfs = list( set(pps.tf_names).intersection(pps.expression_matrix.index)) #TFA_calculator = TFA(pps.priors_data, pps.design, pps.half_tau_response, tfs) pps.activity = pps.compute_transcription_factor_activity(tfs) #pps.activity, pps.priors_data= TFA_calculator.compute_transcription_factor_activity() else: if self.flag_print: print( 'Using just expression, NO Transcription Factor Activity') expression_matrix = pps.design tfs = list( set(pps.tf_names).intersection(pps.expression_matrix.index)) activity = pd.DataFrame(expression_matrix.loc[tfs, :].values, index=tfs, columns=expression_matrix.columns) if self.flag_print: print(('Design matrix of shape: {}'.format(activity.shape))) pps.activity = activity tf_names = pps.activity.index.tolist( ) #pps.priors_data.columns #pps.tf_names #Leave-out SS if data_type == "SS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "SS")): expression_matrix_lo_ss = pps.leave_out_ss_design leave_out_ss_design = pd.DataFrame( expression_matrix_lo_ss.loc[tf_names, :].values, index=tf_names, columns=expression_matrix_lo_ss.columns) pps.leave_out_ss_design = leave_out_ss_design #Leave-out TS if data_type == "TS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "TS")): expression_matrix_lo_ts = pps.leave_out_ts_design leave_out_ts_design = pd.DataFrame( expression_matrix_lo_ts.loc[tf_names, :].values, index=tf_names, columns=expression_matrix_lo_ts.columns) pps.leave_out_ts_design = leave_out_ts_design expression = pps.expression_matrix #this is the initial one but then there is filtering and stuff goldstandard = pps.gold_standard genelist = pps.response.index.tolist( ) #pps.expression_matrix.index.tolist() numtfs = len(tf_names) X = pps.activity.transpose().values #X [n_samples, n_features] y = pps.response.transpose().values #y [n_samples, num_genes] if self.flag_print: print("Shape of design var X: " + str(X.shape)) print("Shape of response var Y: " + str(y.shape)) str_output = str_output + "Shape of design var X: " + str( X.shape) + "\n" str_output = str_output + "Shape of response var Y: " + str( y.shape) + "\n" if self.flag_print: print("X False", np.any(np.isnan(X))) print("X True", np.all(np.isfinite(X))) print("y False", np.any(np.isnan(y))) print("y True", np.all(np.isfinite(y))) X = np.float64(X) y = np.float64(y) output_path = script_dir + "/output/" + name_run + "_numgenes" + str( len(genelist)) + "_numtfs" + str(numtfs) if not os.path.exists(output_path): os.makedirs(output_path) # else: # if self.poot or not(self.auto_meth): # num_folders = len([name for name in os.listdir(script_dir+"/output/") if # os.path.isdir(os.path.join(script_dir+"/output/",name)) and (name_run+"_numgenes"+str(len(genelist))+"_numtfs"+str(numtfs)) in name]) # os.makedirs(output_path + "_" + str(num_folders)) # output_path = output_path + "_" + str(num_folders) if prior_type == "binary_all": if not os.path.exists(input_dir + "/priors"): os.makedirs(input_dir + "/priors") if prior_type == "binary_all": #Save plot of prior number of targets for each TF distribution priors_data_tmp = np.abs(pps.priors_data) index_tmp = priors_data_tmp.sum(axis=0) != 0 prior_num_tfs = np.sum(index_tmp) #Debug print TFs #print priors_data_tmp.columns[index_tmp] #Debug #print priors_data_tmp.sum(axis=0)[index_tmp] max_outdegree = np.max(priors_data_tmp.sum(axis=0)[index_tmp]) #Debug #print "max_outdegree", max_outdegree max_outdegree = np.int(max_outdegree) out_prior_tfs_outdegrees = "Num of TFs in prior: " + str( prior_num_tfs ) + " Mean and var of targets for TFs in prior: " + str( np.mean(priors_data_tmp.sum(axis=0)[index_tmp])) + " , " + str( np.std(priors_data_tmp.sum(axis=0)[index_tmp])) str_output = str_output + out_prior_tfs_outdegrees + "\n" ax = priors_data_tmp.sum(axis=0)[index_tmp].plot( kind="hist", bins=list(range(0, max_outdegree + 1))) ax.set_title("Prior outdegrees distribution") ax.set_xlabel("outdegree of TFs ( i.e. TFs num of targets)") if self.flag_print: plt.savefig(output_path + "/Prior outdegrees distribution_numTFs" + str(prior_num_tfs) + "_numEdges" + str(num_edges_prior)) plt.close() #Save plot of Eval GS number of targets for each TF distribution gold_standard_tmp = np.abs(pps.gold_standard) index_tmp2 = gold_standard_tmp.sum(axis=0) != 0 gs_num_tfs = np.sum(index_tmp2) max_outdegree2 = np.max(gold_standard_tmp.sum(axis=0)[index_tmp2]) max_outdegree2 = np.int(max_outdegree2) #Debug #print gold_standard_tmp.sum(axis=0)[index_tmp2] #Debug #print max_outdegree2 out_gs_tfs_outdegrees = "Num of TFs in eval gold standard: " + str( gs_num_tfs ) + " Mean and var of targets for TFs in eval GS: " + str( np.mean(gold_standard_tmp.sum(axis=0)[index_tmp2])) + " , " + str( np.std(gold_standard_tmp.sum(axis=0)[index_tmp2])) str_output = str_output + out_gs_tfs_outdegrees + "\n" #Debug print TFs #print gold_standard_tmp.columns[index_tmp2] ax1 = gold_standard_tmp.sum(axis=0)[index_tmp2].plot( kind="hist", bins=list(range(0, max_outdegree2 + 1))) ax1.set_title("Eval Gold standard outdegrees distribution") ax1.set_xlabel("outdegree of TFs ( i.e. TFs num of targets)") if self.flag_print: plt.savefig(output_path + "/Eval Gold standard outdegrees distribution_numTFs" + str(gs_num_tfs) + "_numEdges" + str(num_edges_gs)) plt.close() if prior_type == "binary_all": #Write gold standard priors to file pps.priors_data.to_csv(input_dir + "/priors/" + prior_file, sep="\t") if self.flag_print: outfile = open(output_path + "/_preprocessing.txt", 'w') outfile.write("Run name: " + str(name_run) + "\n") outfile.write(str_output) if data_type == "SS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "SS")): if len(steady_state_cond) > 0: #Debug if self.flag_print: print("Leave-out points for steady state: ", ss_lo_cond_names, ss_lo_indices) outfile.write("Leave-out points for steady state: " + str(ss_lo_cond_names) + str(ss_lo_indices) + "\n") if data_type == "TS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "TS")): if self.flag_print: print("Leave-out points for timeseries: ", ts_lopoints_x, ts_lopoints_y, timeseries_indices_lo) outfile.write("Leave-out points for timeseries: " + str(ts_lopoints_x) + str(ts_lopoints_y) + str(timeseries_indices_lo) + "\n") # print "New dimensions after coeff of var filter..." # outfile.write("New dimensions after coeff of var filter... \n") if self.flag_print: print("Expression dim: ", expression.shape) outfile.write("Expression dim: " + str(expression.shape) + "\n") if self.flag_print: print("Num of tfs: ", len(tf_names)) outfile.write("Num of tfs: " + str(len(tf_names)) + "\n") if self.flag_print: print("Num of genes: ", len(genelist)) outfile.write("Num of genes: " + str(len(genelist)) + "\n") if self.flag_print: if prior_type == "binary_all": print("Priors dim: ", pps.priors_data.shape) outfile.write("Priors dim: " + str(pps.priors_data.shape) + "\n") if self.flag_print: print("Goldstandard dim: ", goldstandard.shape) outfile.write("Goldstandard dim: " + str(goldstandard.shape) + "\n") #Print INFO to log file if self.flag_print: print("The number of genes is: ", len(genelist)) outfile.write("The number of genes is: " + str(len(genelist)) + "\n") if self.flag_print: print("The number of TFs is: ", len(tf_names)) outfile.write("The number of TFs is: " + str(len(tf_names)) + "\n") if self.flag_print: print("The total Number of data points in the dataset is: ", len(pps.meta_data)) outfile.write( "The total Number of data points in the dataset is: " + str(len(pps.meta_data)) + "\n") if self.flag_print: print("The total number of time series is: ", len(TS_vectors)) outfile.write("The total number of time series is: " + str(len(TS_vectors)) + "\n") if self.flag_print: print("The number of total time points is: ", num_total_timeseries_points) outfile.write("The number of total time points is: " + str(num_total_timeseries_points) + "\n") if self.flag_print: print("The number of total steady state points is: ", len(steady_state_cond)) outfile.write("The number of total steady state points is: " + str(len(steady_state_cond)) + "\n") if data_type == "SS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "SS")): if self.flag_print: print( "The percentage of leave-out steady state points is: ", str(100 * float(len(ss_lo_indices)) / len(steady_state_cond))) outfile.write( "The percentage of leave-out steady state points is: " + str(100 * float(len(ss_lo_indices)) / len(steady_state_cond)) + "\n") if data_type == "TS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "TS")): if self.flag_print: print( "The percentage of leave-out time series points is: ", str(100 * float(len(timeseries_indices_lo)) / num_total_timeseries_points)) outfile.write( "The percentage of leave-out time series points is: " + str(100 * float(len(timeseries_indices_lo)) / num_total_timeseries_points) + "\n") outfile.close() #All variables that can be returned if necessary # (All points) # TS_vectors, steady_state_cond, num_total_timeseries_points # #Training and leave out points # index_time_points_new, index_steady_state_new, pps.leave_out_ss_design(X_test_ss), pps.leave_out_ss_response, pps.leave_out_ts_design, pps.leave_out_ts_response # #leave out points # ss_lo_cond_names, ts_lopoints_x, ts_lopoints_y, timeseries_indices_lo if data_type == "SS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "SS")): X_test_ss = pps.leave_out_ss_design.transpose().values y_test_ss = pps.leave_out_ss_response.transpose().values else: X_test_ss = "" y_test_ss = "" deltas = [] if data_type == "TS" or (data_type == "TS-SS" and (data_type_lo == "TS-SS" or data_type_lo == "TS")): X_test_ts = pps.leave_out_ts_design.transpose().values y_test_ts = pps.leave_out_ts_response.transpose().values ts_lopoints_y_keys = list(ts_lopoints_y.keys()) for i, k in enumerate(ts_lopoints_x.keys()): # #Debug # #print "ts_lopoints_x[k]", ts_lopoints_x[k] # if float((ts_lopoints_x[k])) == 0: # log_of_frac = 1 # else: # #No log # #log_of_frac = float(ts_lopoints_y[ts_lopoints_y_keys[i]]) / float((ts_lopoints_x[k])) # # log_of_frac = np.log(float(ts_lopoints_y[ts_lopoints_y_keys[i]]) / float((ts_lopoints_x[k]))) #deltas.append(log_of_frac) #Original deltas.append(ts_lopoints_y[ts_lopoints_y_keys[i]] - (ts_lopoints_x[k])) y_test_ts_future_timepoint = pps.expression_matrix.loc[ genelist, ts_lopoints_y_keys].transpose().values x_test_ts_current_timepoint = pps.expression_matrix.loc[ genelist, list(ts_lopoints_x.keys())].transpose().values x_test_ts_timepoint0 = pps.expression_matrix.loc[ genelist, list(t0_lopoints.keys())].transpose().values else: X_test_ts = "" y_test_ts = "" y_test_ts_future_timepoint = "" x_test_ts_current_timepoint = "" x_test_ts_timepoint0 = "" #Debug #print y_test_ts_future_timepoint #print x_test_ts_current_timepoint return X, y, genelist, tf_names, goldstandard, output_path, pps.priors_data, X_test_ss, X_test_ts, y_test_ss, y_test_ts, x_test_ts_current_timepoint, y_test_ts_future_timepoint, deltas, x_test_ts_timepoint0, index_steady_state_new, index_time_points_new, pps.design, pps.delta_vect, pps.res_mat2
def __init__(self, zs, coords, basis='cc-pvdz'): # fn='test'): self.rcs = Elements().rcs self.basis = basis #self.fn = fn #assert np.sum(self.zs)%2 == 0, '#ERROR: spin polarised?' RawMol.__init__(self, list(zs), coords) spin = sum(self.zs)%2 symbs = [ chemical_symbols[zi] for zi in self.zs ] OBJ = pyscf_object(symbs, coords, basis, spin=spin) self.mol = OBJ.mol self.nbf = OBJ.mol.nao ids = OBJ.mol.offset_ao_by_atom()[:, 2:4] ibs, ies = ids[:,0], ids[:,1] self.aoidxs = [ np.arange(ibs[i],ies[i]) for i in range(self.na) ] self.T0 = pre_orth_ao_atm_scf(OBJ.mol) # self.T = np.eye(OBJ.mol.nao) _cnsr = {1:1, 6:4, 7:3, 8:2} cnsr = np.array([_cnsr[zi] for zi in self.zs],np.int) cns = self.g.sum(axis=0) dvs = cnsr - cns bidxs = [] #print 'dvs = ', dvs assert np.all(dvs>=0) # first add H's to sp3 N and O for ia in self.ias: zi = self.zs[ia] jas = self.ias[self.g[ia]>0] d = np.sum( self.rcs[ [1,zi] ] ) if zi==7 and cns[ia]==3: v = get_v_sp3( self.coords[ [ia]+list(jas) ] ) bidxs.append( [ia,self.coords[ia]+v*d] ) elif zi==8 and cns[ia]==2: v1,v2 = get_v12_sp3( self.coords[ [ia]+list(jas) ] ) for v in [v1,v2]: bidxs.append( [ia,self.coords[ia]+v*d] ) # add H's to sp2 C, N and O _jas = self.ias[dvs==1]; #print _jas if len(_jas) > 0: _jasr = cg.find_cliques(self.g[_jas][:,_jas]) for kdxr in _jasr: naj = len(kdxr) assert naj%2==0 jas = _jas[kdxr] #print ' * jas = ', jas cnsj = cns[jas] seq = np.argsort(cnsj) vs = [] for _j in range(naj): j = seq[_j-1] ja = jas[j] #print ' |__ ja = ', ja zj = self.zs[ja] jas2 = self.ias[self.g[ja]>0] nbr = len(jas2) d = np.sum( self.rcs[ [1,zj] ] ) if nbr==3 and zj==6: v = get_v3(self.coords[ [ja]+list(jas2) ]) vu = update_vs(v,vs); vs.append(vu) bidxs.append( [ja,self.coords[ja]+vu*d] ) #print ' |__ dot(v,vs) = ', np.dot([vu],np.array(vs).T) elif nbr==2 and zj==7: v,v1 = get_v2(self.coords[ [ja]+list(jas2) ]) for _v in [v,v1]: vu= update_vs(_v,vs); vs.append(vu) bidxs.append( [ja,self.coords[ja]+vu*d] ) elif nbr==1 and zj==8: ja2 = jas2[0] vz = vs[list(jas).index(ja2)] vx = self.coords[ja2]-self.coords[ja] v1,v2 = get_v12(vx,vz) for _v in [v,v1,v2]: vu = update_vs(_v,vs); vs.append(vu) bidxs.append( [ja,self.coords[ja]+vu*d] ) else: raise '#unknown case' nadd = len(bidxs) na = self.na if nadd > 0: na2 = na + nadd g2 = np.zeros((na2,na2)).astype(np.int) g2[:na, :na] = self.g ih = na cs2 = [] # coords of H's for bidx in bidxs: ia, ci = bidx g2[ih,ia] = g2[ia,ih] = 1 cs2.append(ci) ih += 1 zs = np.concatenate((self.zs,[1,]*nadd)) coords = np.concatenate((self.coords,cs2)) self.zs = zs self.coords = coords self.g = g2 self.ias = np.arange(na2) self.na = na2
def test_pid_user_input(): """Test if user input is handled correctly.""" # Test missing estimator name pid = PartialInformationDecomposition() with pytest.raises(RuntimeError): pid.analyse_single_target(settings={}, data=Data(), target=0, sources=[1, 2]) # Test wrong estimator name settings = {'pid_estimator': 'TestPID'} with pytest.raises(RuntimeError): pid.analyse_single_target(settings=settings, data=Data(), target=0, sources=[1, 2]) # Test default lags for network_analysis settings = {'pid_estimator': 'TartuPID'} dat = Data(np.random.randint(0, 10, size=(5, 100)), dim_order='ps', normalise=False) res = pid.analyse_network(settings=settings, data=dat, targets=[0, 1, 2], sources=[[1, 3], [2, 4], [0, 1]]) assert np.all(res[0]['settings']['lags'] == [1, 1]), ( 'Lags were not set to default.') assert np.all(res[1]['settings']['lags'] == [1, 1]), ( 'Lags were not set to default.') assert np.all(res[2]['settings']['lags'] == [1, 1]), ( 'Lags were not set to default.') n = 20 alph = 2 x = np.random.randint(0, alph, n) y = np.random.randint(0, alph, n) z = np.logical_xor(x, y).astype(int) dat = Data(np.vstack((x, y, z)), 'ps', normalise=False) # Test two-tailed significance test settings = {'pid_estimator': 'TartuPID', 'tail': 'two', 'lags': [0, 0]} pid = PartialInformationDecomposition() with pytest.raises(RuntimeError): # Test incorrect number of sources pid.analyse_single_target(settings=settings, data=dat, target=2, sources=[1, 2, 3]) settings['lags'] = [0, 0, 0] with pytest.raises(RuntimeError): # Test incorrect number of lags pid.analyse_single_target(settings=settings, data=dat, target=2, sources=[1, 3]) settings['lags'] = [n * 3, 0] with pytest.raises(RuntimeError): # Test lag > no. samples pid.analyse_single_target(settings=settings, data=dat, target=2, sources=[0, 1]) settings['lags'] = [n, 0] with pytest.raises(RuntimeError): # Test lag == no. samples pid.analyse_single_target(settings=settings, data=dat, target=2, sources=[0, 1]) settings['lags'] = [0, 0] with pytest.raises(RuntimeError): # Test target in sources pid.analyse_single_target(settings=settings, data=dat, target=2, sources=[2, 3]) with pytest.raises(IndexError): # Test target not in processes pid.analyse_single_target(settings=settings, data=dat, target=5, sources=[0, 1])
def test_slice_shoulders(): profiler = Profile().from_tuples(PROFILER).resample_x(0.1) lt_should, rt_should = profiler.slice_shoulders() assert np.all(lt_should.x < min(rt_should.x)) assert np.all(rt_should.x > max(lt_should.x))