Exemplo n.º 1
0
    def test_02_02_mask_invert(self):
        labels = np.zeros((10,15),int)
        labels[2:5,3:8] = 1
        labels[5:8, 10:14] = 2
        object_set = cpo.ObjectSet()
        objects = cpo.Objects()
        objects.segmented = labels
        object_set.add_objects(objects, OBJECTS_NAME)
        
        image_set_list = cpi.ImageSetList()
        image_set=image_set_list.get_image_set(0)
        np.random.seed(0)
        pixel_data = np.random.uniform(size=(10,15)).astype(np.float32)
        image_set.add(IMAGE_NAME, cpi.Image(pixel_data))

        pipeline = cpp.Pipeline()
        module = M.MaskImage()
        module.source_choice.value = M.IO_OBJECTS
        module.object_name.value = OBJECTS_NAME
        module.image_name.value = IMAGE_NAME
        module.masked_image_name.value = MASKED_IMAGE_NAME
        module.invert_mask.value = True
        module.module_num = 1
        
        workspace = cpw.Workspace(pipeline, module, image_set, object_set,
                                  cpmeas.Measurements(), image_set_list)
        module.run(workspace)
        masked_image = workspace.image_set.get_image(MASKED_IMAGE_NAME)
        self.assertTrue(isinstance(masked_image, cpi.Image))
        self.assertTrue(np.all(masked_image.pixel_data[labels == 0] ==
                               pixel_data[labels == 0]))
        self.assertTrue(np.all(masked_image.pixel_data[labels > 0] == 0))
        self.assertTrue(np.all(masked_image.mask == (labels == 0)))
        self.assertTrue(np.all(masked_image.masking_objects.segmented == labels))
Exemplo n.º 2
0
def test_neg(vector_array):
    v = vector_array
    c = v.copy()
    cc = v.copy()
    c.scal(-1)
    assert np.all(almost_equal(c, -v))
    assert np.all(almost_equal(v, cc))
Exemplo n.º 3
0
def test_scal(vector_array):
    v = vector_array
    for ind in valid_inds(v):
        if v.len_ind(ind) != v.len_ind_unique(ind):
            with pytest.raises(Exception):
                c = v.copy()
                c[ind].scal(1.)
            continue
        ind_complement_ = ind_complement(v, ind)
        c = v.copy()
        c[ind].scal(1.)
        assert len(c) == len(v)
        assert np.all(almost_equal(c, v))

        c = v.copy()
        c[ind].scal(0.)
        assert np.all(almost_equal(c[ind], v.zeros(v.len_ind(ind))))
        assert np.all(almost_equal(c[ind_complement_], v[ind_complement_]))

        for x in (1., 1.4, np.random.random(v.len_ind(ind))):
            c = v.copy()
            c[ind].scal(x)
            assert np.all(almost_equal(c[ind_complement_], v[ind_complement_]))
            assert np.allclose(c[ind].sup_norm(), v[ind].sup_norm() * abs(x))
            assert np.allclose(c[ind].l2_norm(), v[ind].l2_norm() * abs(x))
            if hasattr(v, 'data'):
                y = v.data.copy()
                if NUMPY_INDEX_QUIRK and len(y) == 0:
                    pass
                else:
                    if isinstance(x, np.ndarray) and not isinstance(ind, Number):
                        x = x[:, np.newaxis]
                    y[ind] *= x
                assert np.allclose(c.data, y)
Exemplo n.º 4
0
    def test_03_03_color_mask(self):
        image_set_list = cpi.ImageSetList()
        image_set=image_set_list.get_image_set(0)
        np.random.seed(0)
        pixel_data = np.random.uniform(size=(10,15,3)).astype(np.float32)
        image_set.add(IMAGE_NAME, cpi.Image(pixel_data))
        
        masking_image = np.random.uniform(size=(10,15))
        
        image_set.add(MASKING_IMAGE_NAME, cpi.Image(masking_image))
        expected_mask = masking_image > .5

        pipeline = cpp.Pipeline()
        module = M.MaskImage()
        module.source_choice.value = M.IO_IMAGE
        module.object_name.value = OBJECTS_NAME
        module.image_name.value = IMAGE_NAME
        module.masking_image_name.value = MASKING_IMAGE_NAME
        module.masked_image_name.value = MASKED_IMAGE_NAME
        module.invert_mask.value = False
        module.module_num = 1
        
        workspace = cpw.Workspace(pipeline, module, image_set, cpo.ObjectSet(),
                                  cpmeas.Measurements(), image_set_list)
        module.run(workspace)
        masked_image = workspace.image_set.get_image(MASKED_IMAGE_NAME)
        self.assertTrue(isinstance(masked_image, cpi.Image))
        self.assertTrue(np.all(masked_image.pixel_data[expected_mask,:] ==
                               pixel_data[expected_mask,:]))
        self.assertTrue(np.all(masked_image.pixel_data[~expected_mask,:] == 0))
        self.assertTrue(np.all(masked_image.mask == expected_mask))
        self.assertFalse(masked_image.has_masking_objects)
Exemplo n.º 5
0
def test_dofs(vector_array):
    v = vector_array
    np.random.seed(len(v) + 24 + v.dim)
    for ind in valid_inds(v):
        c = v.copy()
        dofs = c[ind].dofs(np.array([], dtype=np.int))
        assert isinstance(dofs, np.ndarray)
        assert dofs.shape == (v.len_ind(ind), 0)

        c = v.copy()
        dofs = c[ind].dofs([])
        assert isinstance(dofs, np.ndarray)
        assert dofs.shape == (v.len_ind(ind), 0)

        if v.dim > 0:
            for count in (1, 5, 10):
                c_ind = np.random.randint(0, v.dim, count)
                c = v.copy()
                dofs = c[ind].dofs(c_ind)
                assert dofs.shape == (v.len_ind(ind), count)
                c = v.copy()
                dofs2 = c[ind].dofs(list(c_ind))
                assert np.all(dofs == dofs2)
                c = v.copy()
                c.scal(3.)
                dofs2 = c[ind].dofs(c_ind)
                assert np.allclose(dofs * 3, dofs2)
                c = v.copy()
                dofs2 = c[ind].dofs(np.hstack((c_ind, c_ind)))
                assert np.all(dofs2 == np.hstack((dofs, dofs)))
                if hasattr(v, 'data'):
                    assert np.all(dofs == indexed(v.data, ind)[:, c_ind])
	def lnpriorfn(self, x):

		if np.all(self.pmin < x) and np.all(self.pmax > x):
		    return 0.0
		else:
		    return -np.inf  
		return 0.0
Exemplo n.º 7
0
    def testLoadSave(self):
        """Plot with an image: test MaskToolsWidget operations"""
        self.plot.addImage(numpy.arange(1024**2).reshape(1024, 1024),
                           legend='test')
        self.qapp.processEvents()

        # Draw a polygon mask
        toolButton = getQToolButtonFromAction(self.maskWidget.polygonAction)
        self.assertIsNot(toolButton, None)
        self.mouseClick(toolButton, qt.Qt.LeftButton)
        self._drawPolygon()

        ref_mask = self.maskWidget.getSelectionMask()
        self.assertFalse(numpy.all(numpy.equal(ref_mask, 0)))

        with temp_dir() as tmp:
            success = self.maskWidget.save(
                os.path.join(tmp, 'mask.npy'), 'npy')
            self.assertTrue(success)

            self.maskWidget.resetSelectionMask()
            self.assertTrue(
                numpy.all(numpy.equal(self.maskWidget.getSelectionMask(), 0)))

            result = self.maskWidget.load(os.path.join(tmp, 'mask.npy'))
            self.assertTrue(result)
            self.assertTrue(numpy.all(numpy.equal(
                self.maskWidget.getSelectionMask(), ref_mask)))
Exemplo n.º 8
0
    def test_2d_array_parameters_2d_array_input(self):
        """
        When given an array input it must be broadcastable with all the
        parameters.
        """

        t = TModel_1_2([[1, 2], [3, 4]], [[10, 20], [30, 40]],
                          [[1000, 2000], [3000, 4000]])

        y1, z1 = t([[100, 200], [300, 400]])
        assert np.shape(y1) == np.shape(z1) == (2, 2)
        assert np.all(y1 == [[111, 222], [333, 444]])
        assert np.all(z1 == [[1111, 2222], [3333, 4444]])

        y2, z2 = t([[[[100]], [[200]]], [[[300]], [[400]]]])
        assert np.shape(y2) == np.shape(z2) == (2, 2, 2, 2)
        assert np.all(y2 == [[[[111, 122], [133, 144]],
                              [[211, 222], [233, 244]]],
                             [[[311, 322], [333, 344]],
                              [[411, 422], [433, 444]]]])
        assert np.all(z2 == [[[[1111, 2122], [3133, 4144]],
                              [[1211, 2222], [3233, 4244]]],
                             [[[1311, 2322], [3333, 4344]],
                              [[1411, 2422], [3433, 4444]]]])

        with pytest.raises(ValueError):
            # Doesn't broadcast
            y3, z3 = t([[100, 200, 300], [400, 500, 600]])
Exemplo n.º 9
0
def test_apply_mne_inverse_raw():
    """Test MNE with precomputed inverse operator on Raw."""
    start = 3
    stop = 10
    raw = read_raw_fif(fname_raw)
    label_lh = read_label(fname_label % 'Aud-lh')
    _, times = raw[0, start:stop]
    inverse_operator = read_inverse_operator(fname_full)
    inverse_operator = prepare_inverse_operator(inverse_operator, nave=1,
                                                lambda2=lambda2, method="dSPM")
    for pick_ori in [None, "normal", "vector"]:
        stc = apply_inverse_raw(raw, inverse_operator, lambda2, "dSPM",
                                label=label_lh, start=start, stop=stop, nave=1,
                                pick_ori=pick_ori, buffer_size=None,
                                prepared=True)

        stc2 = apply_inverse_raw(raw, inverse_operator, lambda2, "dSPM",
                                 label=label_lh, start=start, stop=stop,
                                 nave=1, pick_ori=pick_ori,
                                 buffer_size=3, prepared=True)

        if pick_ori is None:
            assert_true(np.all(stc.data > 0))
            assert_true(np.all(stc2.data > 0))

        assert_true(stc.subject == 'sample')
        assert_true(stc2.subject == 'sample')
        assert_array_almost_equal(stc.times, times)
        assert_array_almost_equal(stc2.times, times)
        assert_array_almost_equal(stc.data, stc2.data)
Exemplo n.º 10
0
    def test_scalar_parameters_1d_array_input(self):
        """
        The dimension of the input should match the number of models unless
        model_set_axis=False is given, in which case the input is copied across
        all models.
        """

        t = TModel_1_1([1, 2], [10, 20], n_models=2)

        with pytest.raises(ValueError):
            y = t(np.arange(5) * 100)

        y1 = t([100, 200])
        assert np.shape(y1) == (2,)
        assert np.all(y1 == [111, 222])

        y2 = t([100, 200], model_set_axis=False)
        # In this case the value [100, 200, 300] should be evaluated on each
        # model rather than evaluating the first model with 100 and the second
        # model  with 200
        assert np.shape(y2) == (2, 2)
        assert np.all(y2 == [[111, 211], [122, 222]])

        y3 = t([100, 200, 300], model_set_axis=False)
        assert np.shape(y3) == (2, 3)
        assert np.all(y3 == [[111, 211, 311], [122, 222, 322]])
Exemplo n.º 11
0
    def test_1d_array_parameters_1d_array_input(self):
        """
        When the input is an array, if model_set_axis=False then it must
        broadcast with the shapes of the parameters (excluding the
        model_set_axis).

        Otherwise all dimensions must be broadcastable.
        """

        t = TModel_1_1([[1, 2, 3], [4, 5, 6]],
                          [[10, 20, 30], [40, 50, 60]], n_models=2)

        with pytest.raises(ValueError):
            y1 = t([100, 200, 300])

        y1 = t([100, 200])
        assert np.shape(y1) == (2, 3)
        assert np.all(y1 == [[111, 122, 133], [244, 255, 266]])

        with pytest.raises(ValueError):
            # Doesn't broadcast with the shape of the parameters, (3,)
            y2 = t([100, 200], model_set_axis=False)

        y2 = t([100, 200, 300], model_set_axis=False)
        assert np.shape(y2) == (2, 3)
        assert np.all(y2 == [[111, 222, 333],
                             [144, 255, 366]])
Exemplo n.º 12
0
    def testSetAllLayersInvisible( self ):
        tiling = Tiling((900,400), blockSize=100)
        tp = TileProvider(tiling, self.sims)

        tp.requestRefresh(QRectF(100,100,200,200))
        tp.waitForTiles()
        tiles = tp.getTiles(QRectF(100,100,200,200))
        for tile in tiles:
            aimg = byte_view(tile.qimg)
            self.assertTrue(np.all(aimg[:,:,0:3] == self.GRAY3))
            self.assertTrue(np.all(aimg[:,:,3] == 255))

        self.layer1.visible = False
        self.layer2.visible = False
        self.layer3.visible = False
        tp.requestRefresh(QRectF(100,100,200,200))
        tp.waitForTiles()
        tiles = tp.getTiles(QRectF(100,100,200,200))
        for tile in tiles:
            # If all tiles are invisible, then no tile is even rendered at all.
            assert tile.qimg is None

        self.layer1.visible = False
        self.layer2.visible = True
        self.layer2.opacity = 1.0
        self.layer3.visible = False
        tp.requestRefresh(QRectF(100,100,200,200))
        tp.waitForTiles()
        tiles = tp.getTiles(QRectF(100,100,200,200))
        for tile in tiles:
            aimg = byte_view(tile.qimg)
            self.assertTrue(np.all(aimg[:,:,0:3] == self.GRAY2))
            self.assertTrue(np.all(aimg[:,:,3] == 255))
Exemplo n.º 13
0
    def test_mean_std_12bit(self):
        # Input 12-bit, with an 8-bit color target
        input_scene = np.tile(np.arange(4096)[:, None, None], (1, 1, 3))
        color_target = np.tile(np.arange(256)[:, None, None], (1, 1, 3))

        luts = hm.mean_std_luts(input_scene.astype(np.uint16),
                                color_target.astype(np.uint8))

        np.testing.assert_array_equal(luts[0], luts[1])
        np.testing.assert_array_equal(luts[1], luts[2])

        lut = luts[0]
        assert np.all(lut[:8] == 0)
        assert np.all(lut[-8:] == 4096)
        assert np.diff(lut[8:-8]).min() == 1
        assert np.diff(lut[8:-8]).max() == 2

        # Input 12-bit, with a 12-bit color target
        input_scene = np.tile(np.arange(4096)[:, None, None], (1, 1, 3))
        color_target = np.tile(np.arange(4096)[:, None, None], (1, 1, 3))

        luts = hm.mean_std_luts(input_scene.astype(np.uint16),
                                color_target.astype(np.uint16))

        # Should be a 1 to 1 look-up-table...
        np.testing.assert_array_equal(luts[0], np.arange(4097))
Exemplo n.º 14
0
    def test_07_01_make_ijv_outlines(self):
        np.random.seed(70)
        x = cpo.Objects()
        ii, jj = np.mgrid[0:10, 0:20]
        masks = [(ii - ic) ** 2 + (jj - jc) ** 2 < r ** 2
                 for ic, jc, r in ((4, 5, 5), (4, 12, 5), (6, 8, 5))]
        i = np.hstack([ii[mask] for mask in masks])
        j = np.hstack([jj[mask] for mask in masks])
        v = np.hstack([[k + 1] * np.sum(mask) for k, mask in enumerate(masks)])

        x.set_ijv(np.column_stack((i, j, v)), ii.shape)
        x.parent_image = cpi.Image(np.zeros((10, 20)))
        colors = np.random.uniform(size=(3, 3)).astype(np.float32)
        image = x.make_ijv_outlines(colors)
        i1 = [i for i, color in enumerate(colors) if np.all(color == image[0, 5, :])]
        self.assertEqual(len(i1), 1)
        i2 = [i for i, color in enumerate(colors) if np.all(color == image[0, 12, :])]
        self.assertEqual(len(i2), 1)
        i3 = [i for i, color in enumerate(colors) if np.all(color == image[-1, 8, :])]
        self.assertEqual(len(i3), 1)
        self.assertNotEqual(i1[0], i2[0])
        self.assertNotEqual(i2[0], i3[0])
        colors = colors[np.array([i1[0], i2[0], i3[0]])]
        outlines = np.zeros((10, 20, 3), np.float32)
        alpha = np.zeros((10, 20))
        for i, (color, mask) in enumerate(zip(colors, masks)):
            my_outline = outline(mask)
            outlines[my_outline] += color
            alpha[my_outline] += 1
        alpha[alpha == 0] = 1
        outlines /= alpha[:, :, np.newaxis]
        np.testing.assert_almost_equal(outlines, image)
Exemplo n.º 15
0
 def test_01_04_size_color(self):
     secondary, mask = cpo.size_similarly(np.zeros((10, 20), int),
                                          np.zeros((10, 15, 3), np.float32))
     self.assertEqual(tuple(secondary.shape), (10, 20, 3))
     self.assertTrue(np.all(mask[:10, :15]))
     self.assertTrue(np.all(~mask[:10, 15:]))
     self.assertEqual(secondary.dtype, np.dtype(np.float32))
Exemplo n.º 16
0
    def test_no_bounds(self):
        x0 = np.zeros(3)
        h = np.ones(3) * 1e-2
        inf_lower = np.empty_like(x0)
        inf_upper = np.empty_like(x0)
        inf_lower.fill(-np.inf)
        inf_upper.fill(np.inf)

        h_adjusted, one_sided = _adjust_scheme_to_bounds(
            x0, h, 1, '1-sided', inf_lower, inf_upper)
        assert_allclose(h_adjusted, h)
        assert_(np.all(one_sided))

        h_adjusted, one_sided = _adjust_scheme_to_bounds(
            x0, h, 2, '1-sided', inf_lower, inf_upper)
        assert_allclose(h_adjusted, h)
        assert_(np.all(one_sided))

        h_adjusted, one_sided = _adjust_scheme_to_bounds(
            x0, h, 1, '2-sided', inf_lower, inf_upper)
        assert_allclose(h_adjusted, h)
        assert_(np.all(~one_sided))

        h_adjusted, one_sided = _adjust_scheme_to_bounds(
            x0, h, 2, '2-sided', inf_lower, inf_upper)
        assert_allclose(h_adjusted, h)
        assert_(np.all(~one_sided))
Exemplo n.º 17
0
 def test_06_05_ijv_three_overlapping(self):
     #
     # This is a regression test of a bug where a segmentation consists
     # of only one point, labeled three times yielding two planes instead
     # of three.
     #
     ijv = np.array([[4, 5, 1],
                     [4, 5, 2],
                     [4, 5, 3]])
     x = cpo.Objects()
     x.set_ijv(ijv, (8, 9))
     labels = []
     indices = np.zeros(3, bool)
     for l, i in x.get_labels():
         labels.append(l)
         self.assertEqual(len(i), 1)
         self.assertTrue(i[0] in (1, 2, 3))
         indices[i[0] - 1] = True
     self.assertTrue(np.all(indices))
     self.assertEqual(len(labels), 3)
     lstacked = np.dstack(labels)
     i, j, k = np.mgrid[0:lstacked.shape[0],
               0:lstacked.shape[1],
               0:lstacked.shape[2]]
     self.assertTrue(np.all(lstacked[(i != 4) | (j != 5)] == 0))
     self.assertEqual((1, 2, 3), tuple(sorted(lstacked[4, 5, :])))
Exemplo n.º 18
0
    def test_material_functions(self):
        from sfepy.discrete import Material

        problem = self.problem
        conf = problem.conf

        ts = problem.get_default_ts(step=0)

        conf_mat1 = conf.get_item_by_name('materials', 'mf1')
        mat1 = Material.from_conf(conf_mat1, problem.functions)
        mat1.time_update(ts, None, mode='normal', problem=problem)

        coors = problem.domain.get_mesh_coors()
        assert_(nm.all(coors[:,0] == mat1.get_data(None, 'x_0')))

        conf_mat2 = conf.get_item_by_name('materials', 'mf2')
        mat2 = Material.from_conf(conf_mat2, problem.functions)
        mat2.time_update(ts, None, mode='normal', problem=problem)

        assert_(nm.all(coors[:,1] == mat2.get_data(None, 'x_1')))

        materials = problem.get_materials()
        materials.time_update(ts, problem.equations, mode='normal',
                              problem=problem)
        mat3 = materials['mf3']
        key = mat3.get_keys(region_name='Omega')[0]

        assert_(nm.all(mat3.get_data(key, 'a') == 10.0))
        assert_(nm.all(mat3.get_data(key, 'b') == 2.0))
        assert_(mat3.get_data(None, 'c') == 'ahoj')

        return True
Exemplo n.º 19
0
def test_normalization():
    """Test that `match_template` gives the correct normalization.

    Normalization gives 1 for a perfect match and -1 for an inverted-match.
    This test adds positive and negative squares to a zero-array and matches
    the array with a positive template.
    """
    n = 5
    N = 20
    ipos, jpos = (2, 3)
    ineg, jneg = (12, 11)
    image = np.full((N, N), 0.5)
    image[ipos:ipos + n, jpos:jpos + n] = 1
    image[ineg:ineg + n, jneg:jneg + n] = 0

    # white square with a black border
    template = np.zeros((n + 2, n + 2))
    template[1:1 + n, 1:1 + n] = 1

    result = match_template(image, template)

    # get the max and min results.
    sorted_result = np.argsort(result.flat)
    iflat_min = sorted_result[0]
    iflat_max = sorted_result[-1]
    min_result = np.unravel_index(iflat_min, result.shape)
    max_result = np.unravel_index(iflat_max, result.shape)

    # shift result by 1 because of template border
    assert np.all((np.array(min_result) + 1) == (ineg, jneg))
    assert np.all((np.array(max_result) + 1) == (ipos, jpos))

    assert np.allclose(result.flat[iflat_min], -1)
    assert np.allclose(result.flat[iflat_max], 1)
Exemplo n.º 20
0
def get_resampling_matrix(global_grid,local_grid):
    """Build the rectangular matrix that linearly resamples from the global grid to a local grid.

    The local grid range must be contained within the global grid range.

    Args:
        global_grid(numpy.ndarray): Sorted array of n global grid wavelengths.
        local_grid(numpy.ndarray): Sorted array of m local grid wavelengths.

    Returns:
        numpy.ndarray: Array of (m,n) matrix elements that perform the linear resampling.
    """
    assert np.all(np.diff(global_grid) > 0),'Global grid is not strictly increasing.'
    assert np.all(np.diff(local_grid) > 0),'Local grid is not strictly increasing.'
    # Locate each local wavelength in the global grid.
    global_index = np.searchsorted(global_grid,local_grid)
    assert local_grid[0] >= global_grid[0],'Local grid extends below global grid.'
    assert local_grid[-1] <= global_grid[-1],'Local grid extends above global grid.'
    # Lookup the global-grid bracketing interval (xlo,xhi) for each local grid point.
    # Note that this gives xlo = global_grid[-1] if local_grid[0] == global_grid[0]
    # but this is fine since the coefficient of xlo will be zero.
    global_xhi = global_grid[global_index]
    global_xlo = global_grid[global_index-1]
    # Create the rectangular interpolation matrix to return.
    alpha = (local_grid - global_xlo)/(global_xhi - global_xlo)
    local_index = np.arange(len(local_grid),dtype=int)
    matrix = np.zeros((len(local_grid),len(global_grid)))
    matrix[local_index,global_index] = alpha
    matrix[local_index,global_index-1] = 1 - alpha
    return matrix
Exemplo n.º 21
0
    def max_err(self, g_pt, abs_tol, rel_tol):
        """Find the biggest error between g_pt and self.gf.

        What is measured is the violation of relative and absolute errors,
        wrt the provided tolerances (abs_tol, rel_tol).
        A value > 1 means both tolerances are exceeded.

        Return the argmax of min(abs_err / abs_tol, rel_err / rel_tol) over
        g_pt, as well as abs_err and rel_err at this point.
        """
        pos = []
        errs = []
        abs_errs = []
        rel_errs = []

        abs_rel_errs = self.abs_rel_errors(g_pt)
        for abs_err, rel_err in abs_rel_errs:
            if not numpy.all(numpy.isfinite(abs_err)):
                raise ValueError('abs_err not finite', repr(abs_err))
            if not numpy.all(numpy.isfinite(rel_err)):
                raise ValueError('rel_err not finite', repr(rel_err))
            scaled_err = numpy.minimum(abs_err / abs_tol, rel_err / rel_tol)
            max_i = scaled_err.argmax()

            pos.append(max_i)
            errs.append(scaled_err.flatten()[max_i])
            abs_errs.append(abs_err.flatten()[max_i])
            rel_errs.append(rel_err.flatten()[max_i])

        # max over the arrays in g_pt
        max_arg = numpy.argmax(errs)
        max_pos = pos[max_arg]
        return (max_arg, pos[max_arg], abs_errs[max_arg], rel_errs[max_arg])
Exemplo n.º 22
0
    def test_non_quantity_with_unit(self):
        """Test that unit attributes in objects get recognized."""
        class MyQuantityLookalike(np.ndarray):
            pass

        a = np.arange(3.)
        mylookalike = a.copy().view(MyQuantityLookalike)
        mylookalike.unit = 'm'
        q1 = u.Quantity(mylookalike)
        assert isinstance(q1, u.Quantity)
        assert q1.unit is u.m
        assert np.all(q1.value == a)

        q2 = u.Quantity(mylookalike, u.mm)
        assert q2.unit is u.mm
        assert np.all(q2.value == 1000.*a)

        q3 = u.Quantity(mylookalike, copy=False)
        assert np.all(q3.value == mylookalike)
        q3[2] = 0
        assert q3[2] == 0.
        assert mylookalike[2] == 0.

        mylookalike = a.copy().view(MyQuantityLookalike)
        mylookalike.unit = u.m
        q4 = u.Quantity(mylookalike, u.mm, copy=False)
        q4[2] = 0
        assert q4[2] == 0.
        assert mylookalike[2] == 2.

        mylookalike.unit = 'nonsense'
        with pytest.raises(TypeError):
            u.Quantity(mylookalike)
Exemplo n.º 23
0
def test_path_no_doubled_point_in_to_polygon():
    hand = np.array(
        [[1.64516129, 1.16145833],
         [1.64516129, 1.59375],
         [1.35080645, 1.921875],
         [1.375, 2.18229167],
         [1.68548387, 1.9375],
         [1.60887097, 2.55208333],
         [1.68548387, 2.69791667],
         [1.76209677, 2.56770833],
         [1.83064516, 1.97395833],
         [1.89516129, 2.75],
         [1.9516129, 2.84895833],
         [2.01209677, 2.76041667],
         [1.99193548, 1.99479167],
         [2.11290323, 2.63020833],
         [2.2016129, 2.734375],
         [2.25403226, 2.60416667],
         [2.14919355, 1.953125],
         [2.30645161, 2.36979167],
         [2.39112903, 2.36979167],
         [2.41532258, 2.1875],
         [2.1733871, 1.703125],
         [2.07782258, 1.16666667]])

    (r0, c0, r1, c1) = (1.0, 1.5, 2.1, 2.5)

    poly = Path(np.vstack((hand[:, 1], hand[:, 0])).T, closed=True)
    clip_rect = transforms.Bbox([[r0, c0], [r1, c1]])
    poly_clipped = poly.clip_to_bbox(clip_rect).to_polygons()[0]

    assert np.all(poly_clipped[-2] != poly_clipped[-1])
    assert np.all(poly_clipped[-1] == poly_clipped[0])
Exemplo n.º 24
0
def test_pickle():
    """Test that a module can be pickled"""
    M = Module()
    M.x = (T.dmatrix())
    M.y = (T.dmatrix())
    a = T.dmatrix()
    M.f = Method([a], a + M.x + M.y)
    M.g = Method([a], a * M.x * M.y)

    mode = get_mode()
    m = M.make(x=numpy.zeros((4,5)), y=numpy.ones((2,3)), mode=mode)

    m_dup = cPickle.loads(cPickle.dumps(m, protocol=-1))

    assert numpy.all(m.x == m_dup.x) and numpy.all(m.y == m_dup.y)

    m_dup.x[0,0] = 3.142
    assert m_dup.f.input_storage[1].data[0,0] == 3.142
    assert m.x[0,0] == 0.0 #ensure that m is not aliased to m_dup

    #check that the unpickled version has the same argument/property aliasing
    assert m_dup.x is m_dup.f.input_storage[1].data
    assert m_dup.y is m_dup.f.input_storage[2].data
    assert m_dup.x is m_dup.g.input_storage[1].data
    assert m_dup.y is m_dup.g.input_storage[2].data
Exemplo n.º 25
0
def test_tally_results(capi_run):
    t = openmc.capi.tallies[1]
    assert t.num_realizations == 5
    assert np.all(t.mean >= 0)
    nonzero = (t.mean > 0.0)
    assert np.all(t.std_dev[nonzero] >= 0)
    assert np.all(t.ci_width()[nonzero] >= 1.95*t.std_dev[nonzero])
Exemplo n.º 26
0
    def pop_planes(geometry, kwargs):
        # Convert miller index specifications to normal vectors
        miller_defs = kwargs.pop("planes_miller", None)
        if miller_defs is not None:
            if np.any(np.all(abs(miller_defs[:,0:3]) < EPSILON, axis=1)):
                error("Emtpy miller index tuple")
            miller_defs[:,0:3] = miller_to_normal(
                np.dot(geometry.latvecs, geometry.bravais_cell),
                miller_defs[:,0:3])
        else:
            miller_defs = np.zeros((0, 4), dtype=float)
            
        # Convert plane normal vector specifications into cartesian coords.
        normal_defs = kwargs.pop("planes_normal", None)
        if normal_defs is not None:
            normal_defs[:,0:3] = geometry.coord_transform(
                normal_defs[:,0:3],
                kwargs.pop("planes_normal_coordsys", "lattice"))
            if np.any(np.all(abs(normal_defs[:,0:3]) < EPSILON, axis=1)):
                error("Emtpy normal vector definition")
        else:
            normal_defs = np.zeros((0, 4), dtype=float)

        # Append two defintions
        planes_normal = np.vstack(( miller_defs, normal_defs ))
        return planes_normal
Exemplo n.º 27
0
 def test_rand(self):
     # Simple distributional checks for sparse.rand.
     for random_state in None, 4321, np.random.RandomState():
         x = sprand(10, 20, density=0.5, dtype=np.float64,
                    random_state=random_state)
         assert_(np.all(np.less_equal(0, x.data)))
         assert_(np.all(np.less_equal(x.data, 1)))
Exemplo n.º 28
0
    def __getitem__(self, key):
        if type(key) == slice:
            # if all in cache, then use slice, else don't
            start, stop, step = key.start, key.stop, key.step

            in_cache = self.existence_cache[start:stop:step]
            if np.all(in_cache):
                return self.cache[self.data_name][start:stop:step]
            elif np.all(np.logical_not(in_cache)):
                return self.__get_from_data_source(slice(start, stop, step))

            key = slice_to_range(key, len(self))

        if is_int_like(key):
            index = key
            if self.existence_cache[index]:
                return self.cache[self.data_name][index]
            else:
                return self.__get_from_data_source(index)

        if is_array_like(key):
            data = []

            for index, in_cache in zip(key, self.existence_cache[key]):
                if in_cache:
                    datum = self.cache[self.data_name][index]
                else:
                    datum = self.__get_from_data_source(index)

                data.append(datum)
            return np.array(data)

        else:
            raise RuntimeError('key: {} is not compatible with this datasource'.format(str(key)))
Exemplo n.º 29
0
 def test_data_scaling(self):
     hdr = self.header_class()
     hdr.set_data_shape((1,2,3))
     hdr.set_data_dtype(np.int16)
     S3 = BytesIO()
     data = np.arange(6, dtype=np.float64).reshape((1,2,3))
     # This uses scaling
     hdr.data_to_fileobj(data, S3)
     data_back = hdr.data_from_fileobj(S3)
     # almost equal
     assert_array_almost_equal(data, data_back, 4)
     # But not quite
     assert_false(np.all(data == data_back))
     # This is exactly the same call, just testing it works twice
     data_back2 = hdr.data_from_fileobj(S3)
     assert_array_equal(data_back, data_back2, 4)
     # Rescaling is the default
     hdr.data_to_fileobj(data, S3, rescale=True)
     data_back = hdr.data_from_fileobj(S3)
     assert_array_almost_equal(data, data_back, 4)
     assert_false(np.all(data == data_back))
     # This doesn't use scaling, and so gets perfect precision
     hdr.data_to_fileobj(data, S3, rescale=False)
     data_back = hdr.data_from_fileobj(S3)
     assert_true(np.all(data == data_back))
Exemplo n.º 30
0
def test_reset(Simulator, learning_rule, plt, seed, rng):
    """Make sure resetting learning rules resets all state."""
    m, activity_p, trans_p = learning_net(
        learning_rule, nengo.Network(seed=seed), rng)

    sim = Simulator(m)
    sim.run(0.1)
    sim.run(0.2)

    first_t = sim.trange()
    first_t_trans = sim.trange(dt=0.01)
    first_activity_p = np.array(sim.data[activity_p], copy=True)
    first_trans_p = np.array(sim.data[trans_p], copy=True)

    sim.reset()
    sim.run(0.3)

    plt.subplot(2, 1, 1)
    plt.ylabel("Neural activity")
    plt.plot(first_t, first_activity_p, c='b')
    plt.plot(sim.trange(), sim.data[activity_p], c='g')
    plt.subplot(2, 1, 2)
    plt.ylabel("Connection weight")
    plt.plot(first_t_trans, first_trans_p[..., 0], c='b')
    plt.plot(sim.trange(dt=0.01), sim.data[trans_p][..., 0], c='g')

    assert np.all(sim.trange() == first_t)
    assert np.all(sim.trange(dt=0.01) == first_t_trans)
    assert np.all(sim.data[activity_p] == first_activity_p)
    assert np.all(sim.data[trans_p] == first_trans_p)
Exemplo n.º 31
0
def boxplot_local_evaluation(metric="RMSE",
    paths=["../result/result_oracle/default-model/mode_test_.list", "../result/baselines/log/predictions_raw_RF.list"]):

    dic_measure = {"MAE":0, "MSE":1, "R2_S":2, "RRMSE":3, "RMSE":4, "MARE":5, "R2":6}

    data = []
    data2 = []
    data3 = []
    for path in paths:
        print(path)
        if str.find(path, 'baselines') >= 0:
            _, y_true, y_pred = pickle.load(open(path, 'rb'))
        else:
            y_true, y_pred = pickle.load(open(path, 'rb'))

        y_true = np.array(y_true)
        y_pred = np.array(y_pred)
        max_value = np.max(y_true)
        min_value = np.min(y_true)
        steps = [[min_value+(50*i),min_value+(50*(i+1))] for i in range(0,int((max_value-min_value)/50)-4)]
        steps[len(steps)-1][1] = max_value+1

        x = []
        y = []
        x2 = []
        y2 = []
        sd = []
        mare = []
        mare_sd = []

        for step in steps:
            aux_true = y_true[np.all([step[0]<=y_true, y_true<step[1]], axis=0)]
            aux_pred = y_pred[np.all([step[0]<=y_true, y_true<step[1]], axis=0)]
            diff = aux_pred - aux_true
            y = y+diff.tolist()
            x = x+(["{0} - {1}".format(step[0],step[1])]*diff.shape[0])
            x2 = x2+["{0} - {1}".format(step[0],step[1])]
            aux_mare = (np.abs(aux_true - aux_pred) / aux_pred)*100
            mare = mare + [np.mean(aux_mare)]
            mare_sd = mare_sd + [np.std(aux_mare)]
            y2 = y2+[np.mean(np.abs(diff))]
            sd = sd+[np.std(np.abs(diff))]

        data.append([x,y])
        data2.append([x2, y2, sd])
        data3.append([x2, mare, mare_sd])

    trace0 = go.Box(x=data[0][0], y=data[0][1], name='mode', marker=dict(color='#3D9970'), boxmean=True)
    trace1 = go.Box(x=data[1][0], y=data[1][1], name='baseline-RF', marker=dict(color='#FF851B'), boxmean=True)
    data = [trace0, trace1]
    layout = go.Layout(title="Difference between predicted and truth by range",
        yaxis=dict(title='Difference (predict-truth)', zeroline=False),
        boxmode='group')
    fig = go.Figure(data=data, layout=layout)
    plot(fig, filename='boxplot.html', auto_open=True)


    trace0 = go.Bar(x=data2[0][0], y=data2[0][1], name='mode',
                    error_y=dict( type='data', array=data2[0][2], visible=True))
    trace1 = go.Bar(x=data2[1][0], y=data2[1][1], name='baseline-RF',
                    error_y=dict( type='data', array=data2[1][2], visible=True))
    data = [trace0, trace1]
    layout = go.Layout(title="Absolute difference between predicted and truth by range",
        barmode='group', yaxis=dict(title="Abs Diff"))
    fig = go.Figure(data=data, layout=layout)
    plot(fig, filename = 'barplot-mare.html', auto_open=True)

    trace0 = go.Bar(x=data3[0][0], y=data3[0][1], name='mode',
                    error_y=dict( type='data', array=data3[0][2], visible=True))
    trace1 = go.Bar(x=data3[1][0], y=data3[1][1], name='baseline-RF',
                    error_y=dict( type='data', array=data3[1][2], visible=True))
    data = [trace0, trace1]
    layout = go.Layout(title="MARE measure by range",
        barmode='group', yaxis=dict(title="MARE"))
    fig = go.Figure(data=data, layout=layout)
    plot(fig, filename = 'barplot-diff.html', auto_open=True)
Exemplo n.º 32
0
def test_gzip(filename):
    t_comp = read(os.path.join(ROOT, filename))
    t_uncomp = read(os.path.join(ROOT, filename.replace('.gz', '')))
    assert t_comp.dtype.names == t_uncomp.dtype.names
    assert np.all(t_comp.as_array() == t_uncomp.as_array())
Exemplo n.º 33
0
def neighbor_mean_std(df,
                      col_val,
                      col_group,
                      col_axis,
                      axis_offset=None,
                      radius=None,
                      compute_mad=False):
    """Compute the neighbor mean and std of the residual matrix.

    Args:
        df (pd.DataFrame): Residual data frame.
        col_val ('str'): Name for column that store the residual.
        col_group ('str'): Name for column that store the group label.
        col_axis (list{str}): List of two axis column names.
        axis_offset (list{int} | None, optional):
            List of offset for each axis to make it suitable as numpy array.
        radius (list{int} | None, optional):
            List of the neighbor radius for each dimension.
        compute_mad (bool, optional):
            If compute_mad, also compute median absolute deviation.

    Returns:
        pd.DataFrame:
            Return the data frame with two extra columns contains neighbor
            mean and std.
    """
    axis_offset = [0, 0] if axis_offset is None else axis_offset
    radius = [1, 1] if radius is None else radius
    assert col_val in df
    assert col_group in df
    assert len(col_axis) == 2
    assert len(axis_offset) == 2
    assert len(radius) == 2
    assert all([col in df for col in col_axis])
    assert all([isinstance(offset, int) for offset in axis_offset])
    assert all([isinstance(r, int) for r in radius])

    df_list = [
        df[df[col_group] == group].reset_index()
        for group in df[col_group].unique()
    ]   # separate dataset by groups

    for i, df_sub in enumerate(df_list):

        index = np.unique(np.asarray(df_sub[col_axis].values), axis=0).astype(int)
        new_df = pd.DataFrame({
            'group': df_sub[col_group].iloc[0],
            col_axis[0]: index[:, 0],
            col_axis[1]: index[:, 1],
            'residual_mean': np.nan,
            'residual_std': np.nan
        })
        for j in index:
            print(j, end='\r')
            df_filter = df_sub.copy()
            for k, ax in enumerate(col_axis):
                rad = radius[k]
                ax_filter = np.abs(df_sub[col_axis[k]] - j[k]) <= rad
                df_filter = df_filter.loc[ax_filter]
            mean = df_filter[col_val].mean()
            std = df_filter[col_val].std()
            subset = np.all(new_df[col_axis] == j, axis=1).values
            new_df.loc[subset, 'residual_mean'] = mean
            new_df.loc[subset, 'residual_std'] = std

        df_list[i] = new_df

    return pd.concat(df_list)
Exemplo n.º 34
0
 def test_psd_from_freq_series(self):
     freq_data = np.array([1, 2, 3])
     df = 0.1
     psd = gwutils.psd_from_freq_series(freq_data, df)
     self.assertTrue(np.all(psd == (freq_data * 2 * df ** 0.5) ** 2))
Exemplo n.º 35
0
 def test_asd_from_freq_series(self):
     freq_data = np.array([1, 2, 3])
     df = 0.1
     asd = gwutils.asd_from_freq_series(freq_data, df)
     self.assertTrue(np.all(asd == freq_data * 2 * df ** 0.5))
Exemplo n.º 36
0
def assert_correct_split_candidates(split_candidates, counts):
    assert isinstance(split_candidates, np.ndarray)
    assert split_candidates[0] == 0
    assert split_candidates[-1] == len(counts)
    assert np.all(
        split_candidates[1:] > split_candidates[:-1])  # strictly ascending
Exemplo n.º 37
0
def assert_correct_counts(counts):
    assert isinstance(counts, np.ndarray)
    assert counts.dtype == int
    assert np.all(counts >= 0)
    assert len(counts) > 0
Exemplo n.º 38
0
def test_torchscript(tmpdir, csv_filename, should_load_model, model_type):
    #######
    # Setup
    #######
    dir_path = tmpdir
    data_csv_path = os.path.join(tmpdir, csv_filename)

    # Single sequence input, single category output
    input_features = [
        binary_feature(),
        number_feature(),
        category_feature(vocab_size=3),
    ]
    if model_type == "ecd":
        image_dest_folder = os.path.join(tmpdir, "generated_images")
        audio_dest_folder = os.path.join(tmpdir, "generated_audio")
        input_features.extend([
            sequence_feature(vocab_size=3),
            text_feature(vocab_size=3),
            vector_feature(),
            image_feature(image_dest_folder),
            audio_feature(audio_dest_folder),
            timeseries_feature(),
            date_feature(),
            date_feature(),
            h3_feature(),
            set_feature(vocab_size=3),
            bag_feature(vocab_size=3),
        ])

    output_features = [
        category_feature(vocab_size=3),
    ]
    if model_type == "ecd":
        output_features.extend([
            binary_feature(),
            number_feature(),
            set_feature(vocab_size=3),
            vector_feature(),
            sequence_feature(vocab_size=3),
            text_feature(vocab_size=3),
        ])

    predictions_column_name = "{}_predictions".format(
        output_features[0]["name"])

    # Generate test data
    data_csv_path = generate_data(input_features, output_features,
                                  data_csv_path)

    #############
    # Train model
    #############
    backend = LocalTestBackend()
    config = {
        "model_type": model_type,
        "input_features": input_features,
        "output_features": output_features,
    }
    if model_type == "ecd":
        config[TRAINER] = {"epochs": 2}
    else:
        config[TRAINER] = {"num_boost_round": 2}
    ludwig_model = LudwigModel(config, backend=backend)
    ludwig_model.train(
        dataset=data_csv_path,
        skip_save_training_description=True,
        skip_save_training_statistics=True,
        skip_save_model=True,
        skip_save_progress=True,
        skip_save_log=True,
        skip_save_processed_input=True,
    )

    ###################
    # save Ludwig model
    ###################
    ludwigmodel_path = os.path.join(dir_path, "ludwigmodel")
    shutil.rmtree(ludwigmodel_path, ignore_errors=True)
    ludwig_model.save(ludwigmodel_path)

    ###################
    # load Ludwig model
    ###################
    if should_load_model:
        ludwig_model = LudwigModel.load(ludwigmodel_path, backend=backend)

    ##############################
    # collect weight tensors names
    ##############################
    original_predictions_df, _ = ludwig_model.predict(dataset=data_csv_path)
    original_weights = deepcopy(list(ludwig_model.model.parameters()))
    original_weights = [t.cpu() for t in original_weights]

    # Move the model to CPU for tracing
    ludwig_model.model.cpu()

    #################
    # save torchscript
    #################
    torchscript_path = os.path.join(dir_path, "torchscript")
    shutil.rmtree(torchscript_path, ignore_errors=True)
    ludwig_model.model.save_torchscript(torchscript_path)

    ###################################################
    # load Ludwig model, obtain predictions and weights
    ###################################################
    ludwig_model = LudwigModel.load(ludwigmodel_path, backend=backend)
    loaded_prediction_df, _ = ludwig_model.predict(dataset=data_csv_path)
    loaded_weights = deepcopy(list(ludwig_model.model.parameters()))
    loaded_weights = [t.cpu() for t in loaded_weights]

    #####################################################
    # restore torchscript, obtain predictions and weights
    #####################################################
    training_set_metadata_json_fp = os.path.join(ludwigmodel_path,
                                                 TRAIN_SET_METADATA_FILE_NAME)

    dataset, training_set_metadata = preprocess_for_prediction(
        ludwig_model.config,
        dataset=data_csv_path,
        training_set_metadata=training_set_metadata_json_fp,
        include_outputs=False,
        backend=backend,
    )

    restored_model = torch.jit.load(torchscript_path)

    # Check the outputs for one of the features for correctness
    # Here we choose the first output feature (categorical)
    of_name = list(ludwig_model.model.output_features.keys())[0]

    data_to_predict = {
        name: torch.from_numpy(dataset.dataset[feature.proc_column])
        for name, feature in ludwig_model.model.input_features.items()
    }

    # Get predictions from restored torchscript.
    logits = restored_model(data_to_predict)
    restored_predictions = torch.argmax(
        output_feature_utils.get_output_feature_tensor(logits, of_name,
                                                       "logits"), -1)

    restored_predictions = [
        training_set_metadata[of_name]["idx2str"][idx]
        for idx in restored_predictions
    ]

    restored_weights = deepcopy(list(restored_model.parameters()))
    restored_weights = [t.cpu() for t in restored_weights]

    ###############################################
    # Check if weights and predictions are the same
    ###############################################

    # Check to weight values match the original model.
    assert utils.is_all_close(original_weights, loaded_weights)
    assert utils.is_all_close(original_weights, restored_weights)

    # Check that predictions are identical to the original model.
    assert np.all(original_predictions_df[predictions_column_name] ==
                  loaded_prediction_df[predictions_column_name])

    assert np.all(original_predictions_df[predictions_column_name] ==
                  restored_predictions)
Exemplo n.º 39
0
def download_data(client=None,
                  sta=None,
                  start=UTCDateTime,
                  end=UTCDateTime,
                  stdata=[],
                  ndval=nan,
                  new_sr=0.,
                  verbose=False):
    """
    Function to build a stream object for a seismogram in a given time window either
    by downloading data from the client object or alternatively first checking if the
    given data is already available locally.

    Note
    ----
    Currently only supports NEZ Components!

    Parameters
    ----------
    client : :class:`~obspy.client.fdsn.Client`
        Client object
    sta : Dict
        Station metadata from :mod:`~StDb` data base
    start : :class:`~obspy.core.utcdatetime.UTCDateTime`
        Start time for request
    end : :class:`~obspy.core.utcdatetime.UTCDateTime`
        End time for request
    stdata : List
        Station list
    ndval : float or nan
        Default value for missing data

    Returns
    -------
    err : bool
        Boolean for error handling (`False` is associated with success)
    trN : :class:`~obspy.core.Trace`
        Trace of North component of motion
    trE : :class:`~obspy.core.Trace`
        Trace of East component of motion
    trZ : :class:`~obspy.core.Trace`
        Trace of Vertical component of motion

    """

    from fnmatch import filter
    from obspy import read, Stream
    from os.path import dirname, join, exists
    from numpy import any
    from math import floor

    # Output
    print(("*     {0:s}.{1:2s} - ZNE:".format(sta.station,
                                              sta.channel.upper())))

    # Set Error Default to True
    erd = True

    # Check if there is local data
    if len(stdata) > 0:
        # Only a single day: Search for local data
        # Get Z localdata
        errZ, stZ = parse_localdata_for_comp(comp='Z',
                                             stdata=stdata,
                                             sta=sta,
                                             start=start,
                                             end=end,
                                             ndval=ndval)
        # Get N localdata
        errN, stN = parse_localdata_for_comp(comp='N',
                                             stdata=stdata,
                                             sta=sta,
                                             start=start,
                                             end=end,
                                             ndval=ndval)
        # Get E localdata
        errE, stE = parse_localdata_for_comp(comp='E',
                                             stdata=stdata,
                                             sta=sta,
                                             start=start,
                                             end=end,
                                             ndval=ndval)
        # Retreived Succesfully?
        erd = errZ or errN or errE
        if not erd:
            # Combine Data
            st = stZ + stN + stE

    # No local data? Request using client
    if erd:
        erd = False

        for loc in sta.location:
            tloc = loc
            # Construct location name
            if len(tloc) == 0:
                tloc = "--"
            # Construct Channel List
            channelsZNE = sta.channel.upper() + 'Z,' + sta.channel.upper() + \
                'N,' + sta.channel.upper() + 'E'
            print(("*          {1:2s}[ZNE].{2:2s} - Checking Network".format(
                sta.station, sta.channel.upper(), tloc)))

            # Get waveforms, with extra 1 second to avoid
            # traces cropped too short - traces are trimmed later
            try:
                st = client.get_waveforms(network=sta.network,
                                          station=sta.station,
                                          location=loc,
                                          channel=channelsZNE,
                                          starttime=start,
                                          endtime=end + 1.,
                                          attach_response=False)
                if len(st) == 3:
                    print("*              - ZNE Data Downloaded")

                # It's possible if len(st)==1 that data is Z12
                else:
                    # Construct Channel List
                    channelsZ12 = sta.channel.upper() + 'Z,' + \
                        sta.channel.upper() + '1,' + \
                        sta.channel.upper() + '2'
                    msg = "*          {1:2s}[Z12].{2:2s} - Checking Network".format(
                        sta.station, sta.channel.upper(), tloc)
                    print(msg)
                    try:
                        st = client.get_waveforms(network=sta.network,
                                                  station=sta.station,
                                                  location=loc,
                                                  channel=channelsZ12,
                                                  starttime=start,
                                                  endtime=end + 1.,
                                                  attach_response=False)
                        if len(st) == 3:
                            print("*              - Z12 Data Downloaded")
                        else:
                            st = None
                    except:
                        st = None
            except:
                st = None

            # Break if we successfully obtained 3 components in st
            if not erd:

                break

    # Check the correct 3 components exist
    if st is None:
        print("* Error retrieving waveforms")
        print("**************************************************")
        return True, None

    # Three components successfully retrieved
    else:

        # Detrend and apply taper
        st.detrend('demean').detrend('linear').taper(max_percentage=0.05,
                                                     max_length=5.)

        # Check start times
        if not np.all([tr.stats.starttime == start for tr in st]):
            print("* Start times are not all close to true start: ")
            [
                print("*   " + tr.stats.channel + " " +
                      str(tr.stats.starttime) + " " + str(tr.stats.endtime))
                for tr in st
            ]
            print("*   True start: " + str(start))
            print("* -> Shifting traces to true start")
            delay = [tr.stats.starttime - start for tr in st]
            st_shifted = Stream(
                traces=[traceshift(tr, dt) for tr, dt in zip(st, delay)])
            st = st_shifted.copy()

        # Check sampling rate
        sr = st[0].stats.sampling_rate
        sr_round = float(floor_decimal(sr, 0))
        if not sr == sr_round:
            print("* Sampling rate is not an integer value: ", sr)
            print("* -> Resampling")
            st.resample(sr_round, no_filter=False)

        # Try trimming
        try:
            st.trim(start, end)
        except:
            print("* Unable to trim")
            print("* -> Skipping")
            print("**************************************************")
            return True, None

        # Check final lengths - they should all be equal if start times
        # and sampling rates are all equal and traces have been trimmed
        if not np.allclose([tr.stats.npts for tr in st[1:]], st[0].stats.npts):
            print("* Lengths are incompatible: ")
            [print("*     " + str(tr.stats.npts)) for tr in st]
            print("* -> Skipping")
            print("**************************************************")

            return True, None

        elif not np.allclose(
            [st[0].stats.npts], int((end - start) * sr), atol=1):
            print("* Length is too short: ")
            print("*    " + str(st[0].stats.npts) + " ~= " +
                  str(int((end - start) * sr)))
            print("* -> Skipping")
            print("**************************************************")

            return True, None

        else:
            print("* Waveforms Retrieved...")
            return False, st
Exemplo n.º 40
0
 def _cal_entropy(info):
     print(np.all(info / NUM_NODES))
     return np.sum(np.multiply(info, np.log(info / NUM_NODES)))
    def compute_pgv_contour_sequence_supplement(self):
        ''' Compute the supplement data representing the PGV sequence.
        '''
        # Load the event metadata from the supplement file.
        meta = self.meta

        # Load the PGV data stream.
        pgv_stream = util.get_supplement_data(self.event_public_id,
                                                  category = 'detectiondata',
                                                  name = 'pgv',
                                                  directory = self.supplement_dir)

        # Trim the stream.
        pgv_stream.trim(starttime = meta['start_time'] - 6,
                        endtime = meta['end_time'] + 6,
                        pad = True)

        inventory = self.project.inventory

        station_nsl = [('MSSNet', x.stats.station, x.stats.location) for x in pgv_stream]
        station_nsl = [':'.join(x) for x in station_nsl]
        stations = [inventory.get_station(nsl_string = x)[0] for x in station_nsl]
        times = pgv_stream[0].times("utcdatetime")
        data = np.array([x.data for x in pgv_stream]).transpose()

        detection_limits = meta['detection_limits']

        sequence_df = None
        last_pgv_df = None
        last_krig_z = None
        no_change_cnt = 0
        
        for k in range(len(times)):
            cur_time = times[k]
            self.logger.info("Computing frame {time}.".format(time = str(cur_time)))
            triggered = []
            for cur_station in stations:
                if cur_station.nsl_string not in detection_limits.keys():
                    cur_trigger = False
                else:
                    cur_detection_limit = detection_limits[cur_station.nsl_string]
                    if cur_time >= cur_detection_limit[0] and cur_time <= cur_detection_limit[1]:
                        cur_trigger = True
                    else:
                        cur_trigger = False
                triggered.append(cur_trigger)

            cur_points = [shapely.geometry.Point(x.x, x.y) for x in stations]
            cur_df = gpd.GeoDataFrame({'geom_vor': [shapely.geometry.Polygon([])] * len(stations),
                                       'geom_stat': cur_points,
                                       'time': [util.isoformat_tz(cur_time)] * len(stations),
                                       'nsl': [x.nsl_string for x in stations],
                                       'x': [x.x for x in stations],
                                       'y': [x.y for x in stations],
                                       'x_utm': [x.x_utm for x in stations],
                                       'y_utm': [x.y_utm for x in stations],
                                       'pgv': data[k, :],
                                       'triggered': triggered},
                                      crs = "epsg:4326",
                                      geometry = 'geom_stat')

            # Add the station amplification factors.
            self.add_station_amplification(cur_df)

            # Compute the corrected pgv values.
            cur_df['pgv_corr'] = cur_df.pgv / cur_df.sa

            # Use only the stations with a valid corrected pgv.
            cur_df = cur_df[cur_df['pgv_corr'].notna()]
            cur_df = cur_df.reset_index()

            # Update the pgv values to keep the event maximum pgv.
            # Track changes of the event maximum pgv.
            if last_pgv_df is not None:
                # Use the current PGV values only, if they are higher than
                # the last ones.
                #
                # Update the last_pgv_df with the current df. It is possible, that
                # rows are missing or new ones are available.
                # Remove the rows, that are not present in the cur_df.
                tmp_df = last_pgv_df[last_pgv_df.nsl.isin(cur_df.nsl)]
                # Add the rows, that are not present in the last_pgv_df.
                mask_df = tmp_df.append(cur_df[~cur_df.nsl.isin(last_pgv_df.nsl)],
                                        ignore_index = True)

                # Sort the two dataframes using the nsl.
                tmp_df = tmp_df.sort_values(by = 'nsl',
                                            ignore_index = True)
                mask_df = mask_df.sort_values(by = 'nsl',
                                              ignore_index = True)

                # Check for correct station snl.
                if (np.any(tmp_df['nsl'].values != mask_df['nsl'].values)):
                    raise RuntimeError("The statin SNL codes of the two dataframes to compare are not equal.")

                # Reset the values for the stations, that already had a larger pgv value.
                mask = cur_df.pgv_corr < mask_df.pgv_corr               
                cur_df.loc[mask, 'pgv_corr'] = mask_df.loc[mask, 'pgv_corr']

                if np.all(mask):
                    no_change_cnt += 1
                else:
                    no_change_cnt = 0
                self.logger.info('no_change_cnt: ' + str(no_change_cnt))

            # Exit if the was no change of the max event pgv data for some time.
            if no_change_cnt >= 5:
                self.logger.info('No change for some time, stop computation of contours.')
                break

            # Keep the last pgv dataframe.
            # Get the rows, that are not available in cur_df and keep them.
            if last_pgv_df is not None:
                tmp_df = last_pgv_df[~last_pgv_df.nsl.isin(cur_df.nsl)]
                last_pgv_df = cur_df.copy()
                last_pgv_df = last_pgv_df.append(tmp_df.copy(),
                                                 ignore_index = True)
            else:
                last_pgv_df = cur_df.copy()
           
            # Interpolate to a regular grid using ordinary kriging.
            self.logger.info("Interpolate")
            krig_z, krig_sigmasq, grid_x, grid_y = util.compute_pgv_krigging(x = cur_df.x_utm.values,
                                                                             y = cur_df.y_utm.values,
                                                                             z = np.log10(cur_df.pgv_corr),
                                                                             nlags = 40,
                                                                             verbose = False,
                                                                             enable_plotting = False,
                                                                             weight = True)

            # Update the interpolated pgv values only if they are higher than the last ones.
            #if last_krig_z is not None:
            #    cur_mask = krig_z < last_krig_z
            #    krig_z[cur_mask] = last_krig_z[cur_mask]
            #last_krig_z = krig_z

            self.logger.info("Contours")
            # Compute the contours.
            intensity = np.arange(2, 8.1, 0.1)
            # Add lower and upper limits to catch all the data below or 
            # above the desired intensity range.
            intensity = np.hstack([[-10], intensity, [20]])
            # Use a low intensity_I_pgv value to make sure, that the lowest countour
            # level captures all PGV values.
            intensity_pgv = util.intensity_to_pgv(intensity = intensity,
                                                  intensity_I_pgv = 1e-9)

            # Create and delete a figure to prevent pyplot from plotting the
            # contours.
            fig = plt.figure()
            ax = fig.add_subplot(111)
            cs = ax.contourf(grid_x, grid_y, krig_z, np.log10(intensity_pgv[:, 1]))
            contours = util.contourset_to_shapely(cs)
            fig.clear()
            plt.close(fig)
            del ax
            del fig
            del cs

            self.logger.info('dataframe')
            # Create a geodataframe of the contour polygons.
            cont_data = {'time': [],
                         'geometry': [],
                         'intensity': [],
                         'pgv': []}

            for cur_level, cur_poly in contours.items():
                cur_intensity = util.pgv_to_intensity(pgv = [10**cur_level] * len(cur_poly))
                cont_data['time'].extend([util.isoformat_tz(cur_time)] * len(cur_poly))
                cont_data['geometry'].extend(cur_poly)
                cont_data['intensity'].extend(cur_intensity[:, 1].tolist())
                cont_data['pgv'].extend([10**cur_level] * len(cur_poly))
            cur_cont_df = gpd.GeoDataFrame(data = cont_data)

            # Convert the polygon coordinates to EPSG:4326.
            src_proj = pyproj.Proj(init = 'epsg:' + self.project.inventory.get_utm_epsg()[0][0])
            dst_proj = pyproj.Proj(init = 'epsg:4326')
            cur_cont_df = util.reproject_polygons(df = cur_cont_df,
                                                  src_proj = src_proj,
                                                  dst_proj = dst_proj)

            # Clip to the network boundary.
            # Clipping a polygon may created multiple polygons.
            # Therefore create a new dataframe to have only one polygon per,
            # entry. Thus avoiding possible problems due to a mixture of 
            # multipolygons and polygons.
            self.logger.info('Clipping.')
            cont_data = {'time': [],
                         'geometry': [],
                         'intensity': [],
                         'pgv': []}
            for cur_id, cur_row in cur_cont_df.iterrows():
                cur_poly = cur_row.geometry
                clipped_poly = cur_poly.intersection(self.network_boundary.loc[0, 'geometry'])
                self.logger.info(type(clipped_poly))
                if isinstance(clipped_poly, shapely.geometry.multipolygon.MultiPolygon):
                    cont_data['time'].extend([cur_row.time] * len(clipped_poly))
                    cont_data['geometry'].extend([x for x in clipped_poly])
                    cont_data['intensity'].extend([cur_row.intensity] * len(clipped_poly))
                    cont_data['pgv'].extend([cur_row.pgv] * len(clipped_poly))
                else:
                    cont_data['time'].append(cur_row.time)
                    cont_data['geometry'].append(clipped_poly)
                    cont_data['intensity'].append(cur_row.intensity)
                    cont_data['pgv'].append(cur_row.pgv)
            cur_cont_df = gpd.GeoDataFrame(data = cont_data)

            # Remove rows having an empty geometry.
            self.logger.info(cur_cont_df['geometry'])
            cur_cont_df = cur_cont_df[~cur_cont_df['geometry'].is_empty]
            self.logger.info(cur_cont_df['geometry'])
            
            self.logger.info('Appending to sequence.')
            # Add the dataframe to the sequence.
            if sequence_df is None:
                sequence_df = cur_cont_df
            else:
                sequence_df = sequence_df.append(cur_cont_df)

        # Get some event properties to add to the properties of the feature collections.
        props = {'db_id': meta['db_id'],
                 'event_start': util.isoformat_tz(meta['start_time']),
                 'event_end': util.isoformat_tz(meta['end_time']),
                 'sequence_start': min(sequence_df.time),
                 'sequence_end': max(sequence_df.time),
                 'author_uri': self.project.author_uri,
                 'agency_uri': self.project.agency_uri,
                 'station_correction_applied': True}

        # Write the voronoi dataframe to a geojson file.
        filepath = util.save_supplement(self.event_public_id,
                                        sequence_df,
                                        output_dir = self.supplement_dir,
                                        category = 'pgvsequence',
                                        name = 'pgvcontour',
                                        props = props)
        self.logger.info('Saved pgv contour sequence to file %s.', filepath)
Exemplo n.º 42
0
def test_griffinlim_cqt(
    y_chirp,
    hop_length,
    window,
    use_length,
    over_sample,
    fmin,
    res_type,
    pad_mode,
    scale,
    momentum,
    init,
    random_state,
    dtype,
):

    if use_length:
        length = len(y_chirp)
    else:
        length = None

    sr = 22050
    bins_per_octave = 12 * over_sample
    n_bins = 6 * bins_per_octave
    C = librosa.cqt(
        y_chirp,
        sr=sr,
        hop_length=hop_length,
        window=window,
        fmin=fmin,
        bins_per_octave=bins_per_octave,
        n_bins=n_bins,
        scale=scale,
        pad_mode=pad_mode,
        res_type=res_type,
    )

    Cmag = np.abs(C)

    y_rec = librosa.griffinlim_cqt(
        Cmag,
        hop_length=hop_length,
        window=window,
        sr=sr,
        fmin=fmin,
        bins_per_octave=bins_per_octave,
        scale=scale,
        pad_mode=pad_mode,
        n_iter=2,
        momentum=momentum,
        random_state=random_state,
        length=length,
        res_type=res_type,
        init=init,
        dtype=dtype,
    )

    y_inv = librosa.icqt(
        Cmag,
        sr=sr,
        fmin=fmin,
        hop_length=hop_length,
        window=window,
        bins_per_octave=bins_per_octave,
        scale=scale,
        length=length,
        res_type=res_type,
    )

    # First check for length
    if use_length:
        assert len(y_rec) == length

    assert y_rec.dtype == dtype

    # Check that the data is okay
    assert np.all(np.isfinite(y_rec))
Exemplo n.º 43
0
 def test_no_other(self) : 
     """omit "other" land classification types"""
     v = [ 1, 4, 6,7 ] 
     expected = [0,2,4,4]
     result = ba.landcover_classification(v)
     self.assertTrue(np.all( expected==result))
Exemplo n.º 44
0
def is_2Dlistlike(x):
    return np.all([is_listlike(xi) for xi in x])
Exemplo n.º 45
0
 def _check_bg_stats(stats):
     # Check that bg mean and std are close
     assert np.all((stats[:, :, 0] - bg_mean) ** 2 < 3 ** 2)
     assert np.all((stats[:, :, 1] - bg_std) ** 2 < 2 ** 2)
Exemplo n.º 46
0
 def test_basic(self) : 
     """ not too hard """
     v = [ 1, 4, 6,7, 11, 16 ] 
     expected = [0, 2, 4, 6]
     result = ba.landcover_classification(v)
     self.assertTrue(np.all( expected==result))
Exemplo n.º 47
0
 def it_is_robust_to_different_image_sizes():
     cy_ims, true_aln_offsets = _ims(mea=128)
     pred_aln_offsets, aln_scores = worker._align(cy_ims)
     assert np.all(true_aln_offsets == pred_aln_offsets)
Exemplo n.º 48
0
 def test_no_nonforest(self) : 
     """omit nonforested landcover types"""
     v = [ 1, 4, 11, 16 ] 
     expected = [0,2,2,4]
     result = ba.landcover_classification(v)
     self.assertTrue(np.all( expected==result))
Exemplo n.º 49
0
 def it_handles_zeros():
     psfs = np.zeros((2, 2, 4, 4))
     got = worker._psf_normalize(psfs)
     assert np.all(got == 0.0)
Exemplo n.º 50
0
 def it_is_robust_to_different_peak_sizes():
     cy_ims, true_aln_offsets = _ims(std=3.0)
     pred_aln_offsets, aln_scores = worker._align(cy_ims)
     assert np.all(true_aln_offsets == pred_aln_offsets)
Exemplo n.º 51
0
 def it_handles_all_zeros():
     _, calib = _setup(1.0)
     all_zeros = np.zeros((2, 4, 512, 512))
     bal_ims = worker._regional_balance_chcy_ims(all_zeros, calib)
     assert np.all(np.abs(bal_ims - (0 - 100) * 1) < 1.0)
Exemplo n.º 52
0
 def it_removes_the_noise_floor():
     cy_ims, true_aln_offsets = _ims()
     pred_aln_offsets, aln_scores = worker._align(cy_ims)
     assert np.all(true_aln_offsets == pred_aln_offsets)
Exemplo n.º 53
0
        d = safe_eval(header)
    except SyntaxError, e:
        msg = "Cannot parse header: %r\nException: %r"
        raise ValueError(msg % (header, e))
    if not isinstance(d, dict):
        msg = "Header is not a dictionary: %r"
        raise ValueError(msg % d)
    keys = d.keys()
    keys.sort()
    if keys != ['descr', 'fortran_order', 'shape']:
        msg = "Header does not contain the correct keys: %r"
        raise ValueError(msg % (keys, ))

    # Sanity-check the values.
    if (not isinstance(d['shape'], tuple)
            or not numpy.all([isinstance(x, (int, long))
                              for x in d['shape']])):
        msg = "shape is not valid: %r"
        raise ValueError(msg % (d['shape'], ))
    if not isinstance(d['fortran_order'], bool):
        msg = "fortran_order is not a valid bool: %r"
        raise ValueError(msg % (d['fortran_order'], ))
    try:
        dtype = numpy.dtype(d['descr'])
    except TypeError, e:
        msg = "descr is not a valid dtype descriptor: %r"
        raise ValueError(msg % (d['descr'], ))

    return d['shape'], d['fortran_order'], dtype


def write_array(fp, array, version=(1, 0)):
Exemplo n.º 54
0
 def it_normalizes_4_dim():
     psfs = np.ones((2, 2, 4, 4))
     got = worker._psf_normalize(psfs)
     assert got.shape == (2, 2, 4, 4) and np.all(got == 1.0 / 16.0)
def test_slice_tails():
    profiler = Profile().from_tuples(PROFILER).resample_x(0.1)
    lt_tail, rt_tail = profiler.slice_tails()
    assert np.all(lt_tail.x < min(rt_tail.x))
    assert np.all(rt_tail.x > max(lt_tail.x))
Exemplo n.º 56
0
def test_ci_report_with_ndigits(confidence_interval, ndigits):
    """Verify output of CI report when specifiying ndigits."""
    report_split = ci_report(confidence_interval, ndigits=ndigits).split('\n')
    period_values = [val for val in report_split[2].split()[2:]]
    length = [len(val.split('.')[-1]) for val in period_values]
    assert np.all(np.equal(length, ndigits))
Exemplo n.º 57
0
    def loadData(self, input_dir, name_run, script_dir, data_type,
                 data_type_lo, delTmax, delTmin, tau, tfa_bool, timehorizon,
                 percent_LO_points, num_ets_lo, time_step, thres_coeff_var,
                 prior_type, prior_file):

        str_output = ""
        uniq_dups = []

        np.random.seed(self.rnd_seed)
        pps = Preprocess(self.rnd_seed)

        pps.delTmax = delTmax
        pps.delTmin = delTmin
        pps.tau = tau
        pps.input_dir = input_dir
        pps.str_output = str_output
        pps.flag_print = self.flag_print
        pps.priors_file = prior_file

        #IF CONDITIONS HAVE DUPLICATED NAMES, PRINT A META DATA FILE CALLED "meta_data_uniq.tsv" with only unique conds
        metadata_1 = pps.input_dataframe(pps.meta_data_file,
                                         has_index=False,
                                         strict=False)
        num_dups_conds = len(
            metadata_1.condName[metadata_1.condName.duplicated(keep=False)])

        if num_dups_conds > 0:
            uniq_dups = (metadata_1.condName[metadata_1.condName.duplicated(
                keep=False)]).unique()
            num_uniq_dups = len(uniq_dups)
            if self.flag_print:
                print("name of duplicated conds in meta data: ",
                      num_dups_conds)
                print("number of unique in dups conds", num_uniq_dups)
            metadata_1.set_index(['condName'], inplace=True)

            metadata_1_series = metadata_1.groupby(level=0).cumcount()
            metadata_1_series = "repet" + metadata_1_series.astype(str)
            metadata_1.index = metadata_1.index + metadata_1_series.replace(
                'repet0', '')
            #metadata_1.index = metadata_1.index + "_dup_"+ metadata_1.groupby(level=0).cumcount().astype(str).replace('0','')

            #The following code is to fix names of prevCol for duplicated conditions
            metadata_copy = metadata_1.copy()
            name_prev_cond = np.nan
            count = 0
            for index, row in (metadata_1[metadata_1.isTs == True]).iterrows():
                if (row['is1stLast'] == 'm') or (row['is1stLast'] == 'l'):
                    if row['prevCol'] != name_prev_cond:
                        if self.flag_print:
                            print(index, row)
                        metadata_copy.at[index, 'prevCol'] = name_prev_cond
                        count = count + 1
                name_prev_cond = index

            if self.flag_print:
                print(count)
            if count != num_dups_conds - num_uniq_dups:
                raise ValueError('Wrong meta data format')

            #metadata_copy.drop(['Unnamed: 0'], axis=1, inplace=True)
            metadata_copy.reset_index(inplace=True)
            metadata_copy.columns = [
                'condName', 'isTs', 'is1stLast', 'prevCol', 'del.t'
            ]
            cols = ['isTs', 'is1stLast', 'prevCol', 'del.t', 'condName']
            metadata_copy = metadata_copy[cols]

            pps.meta_data_file = "meta_data_uniq.tsv"
            path_file = pps.input_path(pps.meta_data_file)
            # metadata_copy.is1stLast = '"' + metadata_copy.is1stLast + '"'
            # metadata_copy.prevCol = '"' + metadata_copy.prevCol + '"'
            # metadata_copy.condName = '"' + metadata_copy.condName + '"'
            # metadata_copy.columns = ['"isTs"', '"is1stLast"', '"prevCol"', '"del.t"', '"condName"']
            metadata_copy.to_csv(path_file, sep="\t", index=False,
                                 na_rep='NA')  #, quoting=csv.QUOTE_NONE)

            #Add to expression file duplicated conds, this is important for how the leave-out section is implemented
            expression_1 = pps.input_dataframe(pps.expression_matrix_file,
                                               has_index=False,
                                               strict=False)
            count = 0
            for ud in uniq_dups:
                pattern = re.compile(ud + "repet" + "\d")
                for cond_tmp in metadata_copy.condName:
                    if pattern.match(cond_tmp):
                        expression_1[cond_tmp] = expression_1[ud]
                        count = count + 1

            if count != num_dups_conds - num_uniq_dups:
                raise ValueError('Wrong expression/meta_data format')

            col_arr = (np.asarray(expression_1.columns[1:]))
            expression_1.columns = np.insert(col_arr, 0, "")
            pps.expression_matrix_file = "expression_new.tsv"
            path_file = pps.input_path(pps.expression_matrix_file)
            expression_1.to_csv(path_file, sep="\t", index=False,
                                na_rep='NA')  #, quoting=csv.QUOTE_NONE)

        #END CODE FOR PRINTING NEW UNIQUE META DATA FILE AND NEW EXPRESSION FILE

        str_output = pps.get_data(thres_coeff_var, str_output, prior_type)

        pps.compute_common_data(uniq_dups, time_step)

        #CODE FOR LEAVE OUT DATA
        TS_vectors, steady_state_cond, index_steady_state, num_total_timeseries_points = self.readDatasetFromMetaDataFile(
            pps.meta_data)

        #Parse data to dynGenie3 format in case parse_4dyng3 is set to "True"

        # print pps.expression_matrix.head()
        # print pps.expression_matrix.index.tolist()
        # print pps.expression_matrix.loc["G1", :]

        if self.parse_4dyng3:
            #(TS_data,time_points,genes,TFs,alphas)

            # import sys
            # reload(sys)
            # sys.setdefaultencoding('utf8')
            print("Start parsing data to dynGenie3 format")
            TS_data = list()
            time_points = list()
            genes = pps.expression_matrix.index.tolist()
            genes = np.asarray(genes).astype(str)
            genes = genes.tolist()
            num_gene_names = len(genes)
            alphas = [0.02] * num_gene_names
            alphas = np.asarray(alphas).astype(float)
            alphas = alphas.tolist()

            for ts_tmp in TS_vectors:
                #for loop over a single timeseries

                ts_tmp_vect = list(ts_tmp.keys())

                num_time_points_intstmp = len(ts_tmp_vect)

                ts_dynGenie3 = np.zeros(
                    (num_time_points_intstmp, num_gene_names))
                ts_dynGenie3 = np.transpose(
                    pps.expression_matrix.loc[:, ts_tmp_vect])
                TS_data.append(np.asarray(ts_dynGenie3))

                time_points_i = np.zeros(num_time_points_intstmp)

                for j, key in enumerate(ts_tmp_vect):
                    time_points_i[j] = np.float(ts_tmp[key])

                time_points.append(time_points_i)

            # print TS_data
            # print type(TS_data[1])

            SS_data = np.transpose(pps.expression_matrix[steady_state_cond])

            #(TS_data,time_points,genes,TFs,alphas)
            TFs = np.asarray(pps.tf_names).astype(str)
            TFs = TFs.tolist()

            TS_data_file = "TS_data.pkl"
            path_file = pps.input_path(TS_data_file)
            with open(path_file, 'wb') as f:
                pickle.dump([TS_data, time_points, genes, TFs, alphas], f)
            # cPickle.dump(TS_data, f)
            # print type(TS_data)
            # cPickle.dump(time_points, f)
            # print type(time_points)
            # cPickle.dump(alphas, f)
            # print type(alphas)
            # cPickle.dump(genes, f)
            # print type(genes)
            f.close()
            # with open(output_path_estimators+'/Gene'+str(output_idx), 'rb') as f:
            #     treeEstimator = cPickle.load(f)
            SS_data_file = "SS_data.txt"
            path_file = pps.input_path(SS_data_file)
            SS_data.to_csv(path_file, sep="\t", index=False, na_rep='NA')
            print("End parsing data to dynGenie3 format")
            # # #END parse data to dynGenie3 format

        #Debug
        # pps.design.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_design.txt", sep="\t")
        # pps.response.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_response.txt", sep="\t")
        # pps.meta_data.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_meta_data.txt", sep="\t")

        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            if num_ets_lo > 0:
                ts_lopoints_x, ts_lopoints_y, timeseries_indices_lo = self.choose_LO_timeseries_random_withTimehorizon(
                    num_ets_lo, TS_vectors, timehorizon)
            else:
                ts_lopoints_x, ts_lopoints_y, t0_lopoints, timeseries_indices_lo = self.choose_timeseries_LO_lastPoints_random_withTimehorizon(
                    percent_LO_points, num_total_timeseries_points, TS_vectors,
                    timehorizon)

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            ss_lo_cond_names = list()
            ss_lo_cond_names = np.asarray(ss_lo_cond_names)
            ss_lo_indices = list()
            ss_lo_indices = np.asarray(ss_lo_indices)

            if len(steady_state_cond) > 0:
                ss_lo_cond_names, ss_lo_indices = self.choose_steadystate_LO_points_random(
                    percent_LO_points, steady_state_cond)

        #Debug
        # print "num_total_timeseries_points", num_total_timeseries_points
        # print "len(ss_lo_cond_names)", len(steady_state_cond)
        # print "len(pps.meta_data)", len(pps.meta_data)

        #TS_vectors, steady_state_cond, index_steady_state, num_total_timeseries_points
        # TS_vectors [OrderedDict([('S0_1', 0),
        #               ('S1_1', 60.0),
        #               ('S2_1', 120.0),
        #               ('S3_1', 180.0),
        #               ('S4_1', 240.0),
        #               ('S5_1', 300.0),
        #               ('S6_1', 360.0)]),
        #  OrderedDict([('S0_2', 0),
        #               ('S1_2', 60.0),
        #               ('S2_2', 120.0),
        #               ('S3_2', 180.0),
        #               ('S4_2', 240.0),
        #               ('S5_2', 300.0),
        #               ('S6_2', 360.0)]),......]
        # steady_state_cond
        # array(['LBexp_1', 'LBexp_2', 'LBexp_3',....]

        # index_steady_state
        # array([163, 164, 165, 166, 167,....]

        # num_total_timeseries_points
        # 163

        #Leave-out Time-series points
        #ts_lopoints_x, ts_lopoints_y, timeseries_indices_lo
        # timeseries_indices_lo left out
        # array([31, 15, 26, 17])
        # ts_lopoints_x, ts_lopoints_y
        # OrderedDict([('MG+90_2', 95.0), ('SMM_1', 0), ('dia5_3', 5.0), ('SMM_3', 0)])
        # OrderedDict([('MG+120_2', 125.0), ('Salt_1', 10.0), ('dia15_3', 15.0), ('Salt_3', 10.0)])

        #Leave-out Steady state points
        #ss_lo_cond_names, ss_lo_indices
        # array(['H2O2_1', 'LBGexp_2', 'LBtran_2', ....]
        # array([100,  10,   4,  81,  97,  65, ... ]

        if self.flag_print:
            print("Shape of design var before leaving-out data: ",
                  str(pps.design.shape))
            print("Shape of response var before leaving-out data: ",
                  str(pps.response.shape))

        str_output = str_output + "Shape of design var before leaving-out data: " + str(
            pps.design.shape) + "\n"
        str_output = str_output + "Shape of response var before leaving-out data: " + str(
            pps.response.shape) + "\n"

        #Debug
        # w = csv.writer(open("ts_lopoints_x.csv", "w"))
        # for key, val in ts_lopoints_x.items():
        #     w.writerow([key, val])

        # pps.design.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_design.txt", sep="\t")
        # pps.response.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_response.txt", sep="\t")

        #Before splitting the dataset in training and test, check if want to learn on SS only or TS only
        if data_type == "SS":
            str_output = str_output + "::::::::STEADY-STATE ONLY - LOOK AT JUST THE SHAPES OF DESIGN AND RESPONSE VARIABLES" + "\n"
            only_steady_state_indxes = (
                pps.design.columns.isin(steady_state_cond))
            pps.design = pps.design.loc[:,
                                        only_steady_state_indxes]  #, axis=1, inplace=True)
            pps.response = pps.response.loc[:,
                                            only_steady_state_indxes]  #, axis=1, inplace=True)
            pps.half_tau_response = pps.half_tau_response.loc[:,
                                                              only_steady_state_indxes]

            pps.delta_vect = pps.delta_vect.loc[:, (
                pps.delta_vect.columns.isin(steady_state_cond)
            )]  #, axis=1, inplace=True)

        if data_type == "TS":
            str_output = str_output + "::::::::TIME-SERIES ONLY - LOOK AT JUST THE SHAPES OF DESIGN AND RESPONSE VARIABLES" + "\n"
            pps.design.drop(steady_state_cond, axis=1, inplace=True)
            pps.response.drop(steady_state_cond, axis=1, inplace=True)
            pps.half_tau_response.drop(steady_state_cond, axis=1, inplace=True)

            pps.delta_vect.drop(steady_state_cond, axis=1, inplace=True)

        # print "Shape of design design before splitting: "+str(pps.design.shape)
        # print "Shape of response response before splitting: "+str(pps.response.shape)
        #
        # design_tmp = pps.design
        # tfs_tmp = list(set(pps.tf_names).intersection(pps.expression_matrix.index))
        # X_tmp = np.asarray(design_tmp.loc[tfs_tmp,:].values)
        # X_tmp = (X_tmp - (X_tmp.mean(axis=1)).reshape(-1,1)) / (X_tmp.std(axis=1)).reshape(-1,1)
        # design_tmp_2 = pd.DataFrame(X_tmp ,index = tfs_tmp, columns = design_tmp.columns)
        # pps.design = design_tmp_2
        #
        # print "Shape of design after normalization/standardization: ", pps.design.shape
        #
        # response_tmp = pps.response
        # Y_tmp = np.asarray(response_tmp.values)
        # Y_tmp = (Y_tmp - (Y_tmp.mean(axis=1)).reshape(-1,1)) / (Y_tmp.std(axis=1)).reshape(-1,1)
        # response_tmp_2 = pd.DataFrame(Y_tmp ,index = response_tmp.index, columns = response_tmp.columns)
        # pps.response = response_tmp_2
        #
        # print "Shape of response after normalization/standardization: ", pps.response.shape

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            #Leaving out Steady state points
            pps.leave_out_ss_design = pps.design[ss_lo_cond_names]
            pps.design.drop(ss_lo_cond_names, axis=1, inplace=True)
            pps.leave_out_ss_response = pps.response[ss_lo_cond_names]
            pps.response.drop(ss_lo_cond_names, axis=1, inplace=True)
            pps.half_tau_response.drop(ss_lo_cond_names, axis=1, inplace=True)
            if self.flag_print:
                print("Shape of leave out SS design var: ",
                      pps.leave_out_ss_design.shape)
                print("Shape of leave out SS response var: ",
                      pps.leave_out_ss_response.shape)

            pps.delta_vect.drop(ss_lo_cond_names, axis=1, inplace=True)

        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            #Leaving out Time series points
            pps.leave_out_ts_design = pps.design[list(ts_lopoints_x.keys())]
            pps.design.drop(list(ts_lopoints_x.keys()), axis=1, inplace=True)
            pps.leave_out_ts_response = pps.response[list(
                ts_lopoints_x.keys())]
            pps.response.drop(list(ts_lopoints_x.keys()), axis=1, inplace=True)
            pps.half_tau_response.drop(list(ts_lopoints_x.keys()),
                                       axis=1,
                                       inplace=True)
            if self.flag_print:
                print("Shape of leave out TS design var: ",
                      pps.leave_out_ts_design.shape)
                print("Shape of leave out TS response var: ",
                      pps.leave_out_ts_response.shape)

            pps.delta_vect.drop(list(ts_lopoints_x.keys()),
                                axis=1,
                                inplace=True)

        if self.flag_print:
            print("Shape of design var after leaving-out data: ",
                  pps.design.shape)
            print("Shape of response var after leaving-out data: ",
                  pps.response.shape)

        str_output = str_output + "Shape of design var after leaving-out data: " + str(
            pps.design.shape) + "\n"
        str_output = str_output + "Shape of response var after leaving-out data: " + str(
            pps.response.shape) + "\n"

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            str_output = str_output + "Shape of leave out SS design var: " + str(
                pps.leave_out_ss_design.shape) + "\n"
            str_output = str_output + "Shape of leave out SS response var: " + str(
                pps.leave_out_ss_response.shape) + "\n"

        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            str_output = str_output + "Shape of leave out TS design var: " + str(
                pps.leave_out_ts_design.shape) + "\n"
            str_output = str_output + "Shape of leave out TS response var: " + str(
                pps.leave_out_ts_response.shape) + "\n"

        #END CODE FOR LEAVE OUT DATA

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            steady_state_cond_new = list(steady_state_cond.copy())
            for element in ss_lo_cond_names:
                steady_state_cond_new.remove(element)
        else:
            steady_state_cond_new = steady_state_cond

        index_steady_state_new = []
        indexes_all = list(range(0, len(pps.design.columns)))
        delta_vect = list()
        #Debug
        #print len(indexes_all)
        if data_type == "SS" or data_type == "TS-SS":
            for element in steady_state_cond_new:
                index_steady_state_new.append(
                    pps.design.columns.get_loc(element))
            index_steady_state_new = np.asarray(index_steady_state_new)

        index_time_points_new = []
        if data_type == "TS" or data_type == "TS-SS":
            index_time_points_new = set(indexes_all) - set(
                index_steady_state_new)
            index_time_points_new = np.asarray(list(index_time_points_new))

        #Debug
        #print len(index_time_points_new)
        #print len(index_steady_state_new)

        #Debug
        # print "pps.priors_data.shape", pps.priors_data.shape
        # print "len(pps.priors_data.abs().sum(axis=0))", len(pps.priors_data.abs().sum(axis=0))
        # print "len(pps.priors_data.abs().sum(axis=0))", len(pps.priors_data.abs().sum(axis=1))
        # print "len(pps.priors_data.sum(axis=0))", len(pps.priors_data.sum(axis=0))
        # print "type(np.abs(pps.priors_data))", type(np.abs(pps.priors_data))
        # pps.priors_data.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_ppspriors_data.txt", sep="\t")
        # pps.gold_standard.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_ppsgold_standard.txt", sep="\t")
        # print type(pps.gold_standard)
        # pps.design.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_design.txt", sep="\t")
        # pps.response.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_response.txt", sep="\t")

        if prior_type == "binary_all":
            num_edges_prior = np.sum(pps.priors_data.values != 0)
        num_edges_gs = np.sum(pps.gold_standard.values != 0)
        if self.flag_print:
            if prior_type == "binary_all":
                print("Number of edges in the prior: ", num_edges_prior,
                      pps.priors_data.shape)
            print(
                "Number of edges in the evaluation part of the gold standard: ",
                num_edges_gs, pps.gold_standard.shape)
        if prior_type == "binary_all":
            str_output = str_output + "Number of edges in the prior: " + str(
                num_edges_prior) + str(pps.priors_data.shape) + "\n"
        str_output = str_output + "Number of edges in the evaluation part of the gold standard: " + str(
            num_edges_gs) + str(pps.gold_standard.shape) + "\n"

        # print "pps.activity.shape", pps.activity.shape
        # print pps.expression_matrix.shape
        # print len(pps.tf_names)
        # print pps.gold_standard.shape
        # print pps.response.shape

        if tfa_bool:
            #compute_activity()
            # """
            # Compute Transcription Factor Activity
            # """
            if self.flag_print:
                print('Computing Transcription Factor Activity ... ')
            tfs = list(
                set(pps.tf_names).intersection(pps.expression_matrix.index))
            #TFA_calculator = TFA(pps.priors_data, pps.design, pps.half_tau_response, tfs)
            pps.activity = pps.compute_transcription_factor_activity(tfs)
            #pps.activity, pps.priors_data= TFA_calculator.compute_transcription_factor_activity()

        else:
            if self.flag_print:
                print(
                    'Using just expression, NO Transcription Factor Activity')
            expression_matrix = pps.design
            tfs = list(
                set(pps.tf_names).intersection(pps.expression_matrix.index))
            activity = pd.DataFrame(expression_matrix.loc[tfs, :].values,
                                    index=tfs,
                                    columns=expression_matrix.columns)
            if self.flag_print:
                print(('Design matrix of shape: {}'.format(activity.shape)))
            pps.activity = activity

        tf_names = pps.activity.index.tolist(
        )  #pps.priors_data.columns #pps.tf_names

        #Leave-out SS
        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            expression_matrix_lo_ss = pps.leave_out_ss_design
            leave_out_ss_design = pd.DataFrame(
                expression_matrix_lo_ss.loc[tf_names, :].values,
                index=tf_names,
                columns=expression_matrix_lo_ss.columns)
            pps.leave_out_ss_design = leave_out_ss_design

        #Leave-out TS
        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            expression_matrix_lo_ts = pps.leave_out_ts_design
            leave_out_ts_design = pd.DataFrame(
                expression_matrix_lo_ts.loc[tf_names, :].values,
                index=tf_names,
                columns=expression_matrix_lo_ts.columns)
            pps.leave_out_ts_design = leave_out_ts_design

        expression = pps.expression_matrix  #this is the initial one but then there is filtering and stuff

        goldstandard = pps.gold_standard
        genelist = pps.response.index.tolist(
        )  #pps.expression_matrix.index.tolist()
        numtfs = len(tf_names)

        X = pps.activity.transpose().values  #X [n_samples, n_features]
        y = pps.response.transpose().values  #y [n_samples, num_genes]

        if self.flag_print:
            print("Shape of design var X: " + str(X.shape))
            print("Shape of response var Y: " + str(y.shape))
        str_output = str_output + "Shape of design var X: " + str(
            X.shape) + "\n"
        str_output = str_output + "Shape of response var Y: " + str(
            y.shape) + "\n"

        if self.flag_print:
            print("X False", np.any(np.isnan(X)))

            print("X True", np.all(np.isfinite(X)))

            print("y False", np.any(np.isnan(y)))

            print("y True", np.all(np.isfinite(y)))

        X = np.float64(X)

        y = np.float64(y)

        output_path = script_dir + "/output/" + name_run + "_numgenes" + str(
            len(genelist)) + "_numtfs" + str(numtfs)

        if not os.path.exists(output_path):
            os.makedirs(output_path)
        # else:
        # 	if self.poot or not(self.auto_meth):
        # 		num_folders = len([name for name in os.listdir(script_dir+"/output/") if
        # 							   os.path.isdir(os.path.join(script_dir+"/output/",name)) and (name_run+"_numgenes"+str(len(genelist))+"_numtfs"+str(numtfs)) in name])
        # 		os.makedirs(output_path + "_" + str(num_folders))
        # 		output_path = output_path + "_" + str(num_folders)

        if prior_type == "binary_all":
            if not os.path.exists(input_dir + "/priors"):
                os.makedirs(input_dir + "/priors")

        if prior_type == "binary_all":
            #Save plot of prior number of targets for each TF distribution
            priors_data_tmp = np.abs(pps.priors_data)
            index_tmp = priors_data_tmp.sum(axis=0) != 0
            prior_num_tfs = np.sum(index_tmp)
            #Debug print TFs
            #print priors_data_tmp.columns[index_tmp]
            #Debug #print priors_data_tmp.sum(axis=0)[index_tmp]
            max_outdegree = np.max(priors_data_tmp.sum(axis=0)[index_tmp])
            #Debug #print "max_outdegree", max_outdegree
            max_outdegree = np.int(max_outdegree)
            out_prior_tfs_outdegrees = "Num of TFs in prior: " + str(
                prior_num_tfs
            ) + " Mean and var of targets for TFs in prior: " + str(
                np.mean(priors_data_tmp.sum(axis=0)[index_tmp])) + " , " + str(
                    np.std(priors_data_tmp.sum(axis=0)[index_tmp]))
            str_output = str_output + out_prior_tfs_outdegrees + "\n"
            ax = priors_data_tmp.sum(axis=0)[index_tmp].plot(
                kind="hist", bins=list(range(0, max_outdegree + 1)))
            ax.set_title("Prior outdegrees distribution")
            ax.set_xlabel("outdegree of TFs ( i.e. TFs num of targets)")
            if self.flag_print:
                plt.savefig(output_path +
                            "/Prior outdegrees distribution_numTFs" +
                            str(prior_num_tfs) + "_numEdges" +
                            str(num_edges_prior))
            plt.close()

        #Save plot of Eval GS number of targets for each TF distribution
        gold_standard_tmp = np.abs(pps.gold_standard)
        index_tmp2 = gold_standard_tmp.sum(axis=0) != 0
        gs_num_tfs = np.sum(index_tmp2)
        max_outdegree2 = np.max(gold_standard_tmp.sum(axis=0)[index_tmp2])
        max_outdegree2 = np.int(max_outdegree2)
        #Debug #print gold_standard_tmp.sum(axis=0)[index_tmp2]
        #Debug #print max_outdegree2
        out_gs_tfs_outdegrees = "Num of TFs in eval gold standard: " + str(
            gs_num_tfs
        ) + " Mean and var of targets for TFs in eval GS: " + str(
            np.mean(gold_standard_tmp.sum(axis=0)[index_tmp2])) + " , " + str(
                np.std(gold_standard_tmp.sum(axis=0)[index_tmp2]))
        str_output = str_output + out_gs_tfs_outdegrees + "\n"
        #Debug print TFs
        #print gold_standard_tmp.columns[index_tmp2]
        ax1 = gold_standard_tmp.sum(axis=0)[index_tmp2].plot(
            kind="hist", bins=list(range(0, max_outdegree2 + 1)))
        ax1.set_title("Eval Gold standard outdegrees distribution")
        ax1.set_xlabel("outdegree of TFs ( i.e. TFs num of targets)")
        if self.flag_print:
            plt.savefig(output_path +
                        "/Eval Gold standard outdegrees distribution_numTFs" +
                        str(gs_num_tfs) + "_numEdges" + str(num_edges_gs))
        plt.close()

        if prior_type == "binary_all":
            #Write gold standard priors to file
            pps.priors_data.to_csv(input_dir + "/priors/" + prior_file,
                                   sep="\t")

        if self.flag_print:
            outfile = open(output_path + "/_preprocessing.txt", 'w')
            outfile.write("Run name: " + str(name_run) + "\n")
            outfile.write(str_output)

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            if len(steady_state_cond) > 0:
                #Debug
                if self.flag_print:
                    print("Leave-out points for steady state: ",
                          ss_lo_cond_names, ss_lo_indices)
                    outfile.write("Leave-out points for steady state: " +
                                  str(ss_lo_cond_names) + str(ss_lo_indices) +
                                  "\n")

        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            if self.flag_print:
                print("Leave-out points for timeseries: ", ts_lopoints_x,
                      ts_lopoints_y, timeseries_indices_lo)
                outfile.write("Leave-out points for timeseries: " +
                              str(ts_lopoints_x) + str(ts_lopoints_y) +
                              str(timeseries_indices_lo) + "\n")

        # print "New dimensions after coeff of var filter..."
        # outfile.write("New dimensions after coeff of var filter... \n")
        if self.flag_print:
            print("Expression dim: ", expression.shape)
            outfile.write("Expression dim: " + str(expression.shape) + "\n")
        if self.flag_print:
            print("Num of tfs: ", len(tf_names))
            outfile.write("Num of tfs: " + str(len(tf_names)) + "\n")
        if self.flag_print:
            print("Num of genes: ", len(genelist))
            outfile.write("Num of genes: " + str(len(genelist)) + "\n")
        if self.flag_print:
            if prior_type == "binary_all":
                print("Priors dim: ", pps.priors_data.shape)
                outfile.write("Priors dim: " + str(pps.priors_data.shape) +
                              "\n")
        if self.flag_print:
            print("Goldstandard dim: ", goldstandard.shape)
            outfile.write("Goldstandard dim: " + str(goldstandard.shape) +
                          "\n")

        #Print INFO to log file
        if self.flag_print:
            print("The number of genes is: ", len(genelist))
            outfile.write("The number of genes is: " + str(len(genelist)) +
                          "\n")
        if self.flag_print:
            print("The number of TFs is: ", len(tf_names))
            outfile.write("The number of TFs is: " + str(len(tf_names)) + "\n")
        if self.flag_print:
            print("The total Number of data points in the dataset is: ",
                  len(pps.meta_data))
            outfile.write(
                "The total Number of data points in the dataset is: " +
                str(len(pps.meta_data)) + "\n")
        if self.flag_print:
            print("The total number of time series is: ", len(TS_vectors))
            outfile.write("The total number of time series is: " +
                          str(len(TS_vectors)) + "\n")
        if self.flag_print:
            print("The number of total time points is: ",
                  num_total_timeseries_points)
            outfile.write("The number of total time points is: " +
                          str(num_total_timeseries_points) + "\n")
        if self.flag_print:
            print("The number of total steady state points is: ",
                  len(steady_state_cond))
            outfile.write("The number of total steady state points is: " +
                          str(len(steady_state_cond)) + "\n")

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            if self.flag_print:
                print(
                    "The percentage of leave-out steady state points is: ",
                    str(100 * float(len(ss_lo_indices)) /
                        len(steady_state_cond)))
                outfile.write(
                    "The percentage of leave-out steady state points is: " +
                    str(100 * float(len(ss_lo_indices)) /
                        len(steady_state_cond)) + "\n")

        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            if self.flag_print:
                print(
                    "The percentage of leave-out time series points is: ",
                    str(100 * float(len(timeseries_indices_lo)) /
                        num_total_timeseries_points))
                outfile.write(
                    "The percentage of leave-out time series points is: " +
                    str(100 * float(len(timeseries_indices_lo)) /
                        num_total_timeseries_points) + "\n")
                outfile.close()

        #All variables that can be returned if necessary
        # (All points)
        # TS_vectors, steady_state_cond, num_total_timeseries_points

        # #Training and leave out points
        # index_time_points_new, index_steady_state_new, pps.leave_out_ss_design(X_test_ss), pps.leave_out_ss_response, pps.leave_out_ts_design, pps.leave_out_ts_response

        # #leave out points
        # ss_lo_cond_names, ts_lopoints_x, ts_lopoints_y, timeseries_indices_lo

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):

            X_test_ss = pps.leave_out_ss_design.transpose().values

            y_test_ss = pps.leave_out_ss_response.transpose().values
        else:
            X_test_ss = ""
            y_test_ss = ""

        deltas = []
        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            X_test_ts = pps.leave_out_ts_design.transpose().values

            y_test_ts = pps.leave_out_ts_response.transpose().values

            ts_lopoints_y_keys = list(ts_lopoints_y.keys())

            for i, k in enumerate(ts_lopoints_x.keys()):
                # #Debug
                # #print "ts_lopoints_x[k]", ts_lopoints_x[k]
                # if float((ts_lopoints_x[k])) == 0:
                # 	log_of_frac = 1
                # else:
                # 	#No log
                # 	#log_of_frac = float(ts_lopoints_y[ts_lopoints_y_keys[i]]) / float((ts_lopoints_x[k]))
                #
                # 	log_of_frac = np.log(float(ts_lopoints_y[ts_lopoints_y_keys[i]]) / float((ts_lopoints_x[k])))
                #deltas.append(log_of_frac)

                #Original
                deltas.append(ts_lopoints_y[ts_lopoints_y_keys[i]] -
                              (ts_lopoints_x[k]))

            y_test_ts_future_timepoint = pps.expression_matrix.loc[
                genelist, ts_lopoints_y_keys].transpose().values

            x_test_ts_current_timepoint = pps.expression_matrix.loc[
                genelist, list(ts_lopoints_x.keys())].transpose().values

            x_test_ts_timepoint0 = pps.expression_matrix.loc[
                genelist, list(t0_lopoints.keys())].transpose().values

        else:
            X_test_ts = ""
            y_test_ts = ""
            y_test_ts_future_timepoint = ""
            x_test_ts_current_timepoint = ""
            x_test_ts_timepoint0 = ""

        #Debug
        #print y_test_ts_future_timepoint
        #print x_test_ts_current_timepoint

        return X, y, genelist, tf_names, goldstandard, output_path, pps.priors_data, X_test_ss, X_test_ts, y_test_ss, y_test_ts, x_test_ts_current_timepoint, y_test_ts_future_timepoint, deltas, x_test_ts_timepoint0, index_steady_state_new, index_time_points_new, pps.design, pps.delta_vect, pps.res_mat2
Exemplo n.º 58
0
    def __init__(self, zs, coords, basis='cc-pvdz'): # fn='test'):
        self.rcs = Elements().rcs
        self.basis = basis
        #self.fn = fn
        #assert np.sum(self.zs)%2 == 0, '#ERROR: spin polarised?'
        RawMol.__init__(self, list(zs), coords)

        spin = sum(self.zs)%2
        symbs = [ chemical_symbols[zi] for zi in self.zs ]
        OBJ = pyscf_object(symbs, coords, basis, spin=spin)
        self.mol = OBJ.mol
        self.nbf = OBJ.mol.nao
        ids = OBJ.mol.offset_ao_by_atom()[:, 2:4]
        ibs, ies = ids[:,0], ids[:,1]
        self.aoidxs = [ np.arange(ibs[i],ies[i]) for i in range(self.na) ]
        self.T0 = pre_orth_ao_atm_scf(OBJ.mol) #
        self.T = np.eye(OBJ.mol.nao)

        _cnsr = {1:1, 6:4, 7:3, 8:2}
        cnsr = np.array([_cnsr[zi] for zi in self.zs],np.int)
        cns = self.g.sum(axis=0)
        dvs = cnsr - cns

        bidxs = []
        #print 'dvs = ', dvs
        assert np.all(dvs>=0)

        # first add H's to sp3 N and O
        for ia in self.ias:
            zi = self.zs[ia]
            jas = self.ias[self.g[ia]>0]
            d = np.sum( self.rcs[ [1,zi] ] )
            if zi==7 and cns[ia]==3:
                v = get_v_sp3( self.coords[ [ia]+list(jas) ] )
                bidxs.append( [ia,self.coords[ia]+v*d] )
            elif zi==8 and cns[ia]==2:
                v1,v2 = get_v12_sp3( self.coords[ [ia]+list(jas) ] )
                for v in [v1,v2]:
                    bidxs.append( [ia,self.coords[ia]+v*d] )

        # add H's to sp2 C, N and O
        _jas = self.ias[dvs==1]; #print _jas
        if len(_jas) > 0:
            _jasr = cg.find_cliques(self.g[_jas][:,_jas])
            for kdxr in _jasr:
                naj = len(kdxr)
                assert naj%2==0
                jas = _jas[kdxr]
                #print ' * jas = ', jas
                cnsj = cns[jas]
                seq = np.argsort(cnsj)
                vs = []
                for _j in range(naj):
                    j = seq[_j-1]
                    ja = jas[j]
                    #print '  |__ ja = ', ja
                    zj = self.zs[ja]
                    jas2 = self.ias[self.g[ja]>0]
                    nbr = len(jas2)
                    d = np.sum( self.rcs[ [1,zj] ] )
                    if nbr==3 and zj==6:
                        v = get_v3(self.coords[ [ja]+list(jas2) ])
                        vu = update_vs(v,vs); vs.append(vu)
                        bidxs.append( [ja,self.coords[ja]+vu*d] )
                        #print '  |__ dot(v,vs) = ', np.dot([vu],np.array(vs).T)
                    elif nbr==2 and zj==7:
                        v,v1 = get_v2(self.coords[ [ja]+list(jas2) ])
                        for _v in [v,v1]:
                            vu= update_vs(_v,vs); vs.append(vu)
                            bidxs.append( [ja,self.coords[ja]+vu*d] )
                    elif nbr==1 and zj==8:
                        ja2 = jas2[0]
                        vz = vs[list(jas).index(ja2)]
                        vx = self.coords[ja2]-self.coords[ja]
                        v1,v2 = get_v12(vx,vz)
                        for _v in [v,v1,v2]:
                            vu = update_vs(_v,vs); vs.append(vu)
                            bidxs.append( [ja,self.coords[ja]+vu*d] )
                    else:
                        raise '#unknown case'

        nadd = len(bidxs)
        na = self.na
        if nadd > 0:
            na2 = na + nadd
            g2 = np.zeros((na2,na2)).astype(np.int)
            g2[:na, :na] = self.g
            ih = na
            cs2 = [] # coords of H's
            for bidx in bidxs:
                ia, ci = bidx
                g2[ih,ia] = g2[ia,ih] = 1
                cs2.append(ci)
                ih += 1

            zs = np.concatenate((self.zs,[1,]*nadd))
            coords = np.concatenate((self.coords,cs2))
            self.zs = zs
            self.coords = coords
            self.g = g2
            self.ias = np.arange(na2)
            self.na = na2
def test_pid_user_input():
    """Test if user input is handled correctly."""
    # Test missing estimator name
    pid = PartialInformationDecomposition()
    with pytest.raises(RuntimeError):
        pid.analyse_single_target(settings={},
                                  data=Data(),
                                  target=0,
                                  sources=[1, 2])

    # Test wrong estimator name
    settings = {'pid_estimator': 'TestPID'}
    with pytest.raises(RuntimeError):
        pid.analyse_single_target(settings=settings,
                                  data=Data(),
                                  target=0,
                                  sources=[1, 2])

    # Test default lags for network_analysis
    settings = {'pid_estimator': 'TartuPID'}
    dat = Data(np.random.randint(0, 10, size=(5, 100)),
               dim_order='ps',
               normalise=False)
    res = pid.analyse_network(settings=settings,
                              data=dat,
                              targets=[0, 1, 2],
                              sources=[[1, 3], [2, 4], [0, 1]])
    assert np.all(res[0]['settings']['lags'] == [1, 1]), (
        'Lags were not set to default.')
    assert np.all(res[1]['settings']['lags'] == [1, 1]), (
        'Lags were not set to default.')
    assert np.all(res[2]['settings']['lags'] == [1, 1]), (
        'Lags were not set to default.')

    n = 20
    alph = 2
    x = np.random.randint(0, alph, n)
    y = np.random.randint(0, alph, n)
    z = np.logical_xor(x, y).astype(int)
    dat = Data(np.vstack((x, y, z)), 'ps', normalise=False)

    # Test two-tailed significance test
    settings = {'pid_estimator': 'TartuPID', 'tail': 'two', 'lags': [0, 0]}
    pid = PartialInformationDecomposition()

    with pytest.raises(RuntimeError):  # Test incorrect number of sources
        pid.analyse_single_target(settings=settings,
                                  data=dat,
                                  target=2,
                                  sources=[1, 2, 3])
    settings['lags'] = [0, 0, 0]
    with pytest.raises(RuntimeError):  # Test incorrect number of lags
        pid.analyse_single_target(settings=settings,
                                  data=dat,
                                  target=2,
                                  sources=[1, 3])
    settings['lags'] = [n * 3, 0]
    with pytest.raises(RuntimeError):  # Test lag > no. samples
        pid.analyse_single_target(settings=settings,
                                  data=dat,
                                  target=2,
                                  sources=[0, 1])
    settings['lags'] = [n, 0]
    with pytest.raises(RuntimeError):  # Test lag == no. samples
        pid.analyse_single_target(settings=settings,
                                  data=dat,
                                  target=2,
                                  sources=[0, 1])
    settings['lags'] = [0, 0]
    with pytest.raises(RuntimeError):  # Test target in sources
        pid.analyse_single_target(settings=settings,
                                  data=dat,
                                  target=2,
                                  sources=[2, 3])
    with pytest.raises(IndexError):  # Test target not in processes
        pid.analyse_single_target(settings=settings,
                                  data=dat,
                                  target=5,
                                  sources=[0, 1])
def test_slice_shoulders():
    profiler = Profile().from_tuples(PROFILER).resample_x(0.1)
    lt_should, rt_should = profiler.slice_shoulders()
    assert np.all(lt_should.x < min(rt_should.x))
    assert np.all(rt_should.x > max(lt_should.x))