Exemplo n.º 1
0
def masks(mask: np.ndarray) -> Sequence[np.ndarray]:
    masks = [mask]
    mask2 = mask.copy()
    mask2[0, 0, 0] = 1
    masks.append(mask2)
    mask3 = mask.copy()
    mask3[2, 2, 2] = 0
    masks.append(mask3)
    return masks
Exemplo n.º 2
0
    def __init__(self, train_x: np.ndarray, train_y: np.ndarray, features_name=None, do_standardization=True):
        # ensure that train_y is (N x 1)
        train_y = train_y.reshape((train_y.shape[0], 1))
        self.train_x = train_x
        self._raw_train_x = train_x.copy()
        self._raw_train_y = train_y.copy()
        self.train_y = train_y
        self.features_name = features_name

        self.do_standardization = do_standardization
        self._x_std_ = None
        self._x_mean_ = None
Exemplo n.º 3
0
def inv_zform(data: np.ndarray,
              out: Optional[np.ndarray] = None,
              clone: bool = True,
              sigma: float = 1,
              mu: float = 1) -> np.ndarray:
    if clone or out is None:
        out = data.copy()
def inverse_additive_log_ratio(Y: np.ndarray, ind=-1):
    """
    Inverse additive log ratio transform.
    """
    assert Y.ndim in [1, 2]
    
    X = Y.copy()
    dimensions = X.shape[X.ndim-1]
    idx = np.arange(0, dimensions+1)
    
    if ind != -1:
        idx = np.array(list(idx[idx < ind]) + 
                       [-1] + 
                       list(idx[idx >= ind+1]-1))
    
    # Add a zero-column and reorder columns
    if Y.ndim == 2:
        X = np.concatenate((X, np.zeros((X.shape[0], 1))), axis=1)
        X = X[:, idx]
    else:
        X = np.append(X, np.array([0]))
        X = X[idx]
    
    # Inverse log and closure operations
    X = np.exp(X)
    X = close(X)
    return X
Exemplo n.º 5
0
    def make_move(self, board: np.ndarray, move: int) -> np.ndarray:
        moving_player = self.get_active_player(board)
        new_board: np.ndarray = board.copy()
        available_idx, = np.where(new_board[:, move] == 0)

        new_board[available_idx[-1]][move] = moving_player
        return new_board
def old_sweep(A: np.ndarray, ind: range(1)):
    """This subroutine executes the sweep operator.

    "As input, SWEEP requires a symmetric matrix A where mean vector m
    and covariance matrix S are arranged in a special manner that simplifies the
    calculations."

    See Dempster (1969), Goodnight (1979).
    The SWEEP operator allows a statistician to quickly regress all variables against
    one specified variable, obtaining OLS estimates for regression coefficients and
    variances in a single application. Subsequent applications of the SWP operator
    allows for regressing against more variables.
    """
    S = A.copy()
    p = A.shape[1]

    for j in ind:
        S[j, j] = -1 / A[j, j]
        for i in range(0, p):
            if i != j:
                S[i, j] = -A[i, j] * S[j, j]
                S[j, i] = S[i, j]

        for i in range(0, p):
            if i != j:
                for k in range(0, p):
                    if k != j:
                        S[i, k] = A[i ,k] - S[i, j] * A[j, k]
                        S[k, i] = S[i, k]

    return S
Exemplo n.º 7
0
    def make_move(self, board: np.ndarray, move: int) -> np.ndarray:
        new_board: np.ndarray = board.copy()
        player = new_board[-1]
        new_board[-1] = -player

        if move == self.board_width * self.board_height:
            return new_board  # It's a pass.

        spaces = self.get_spaces(new_board)
        start_row = move // self.board_width
        start_column = move % self.board_width
        for di in range(-1, 2):
            for dj in range(-1, 2):
                if not (di or dj):
                    continue
                to_flip: typing.List[typing.Tuple[int, int]] = []  # [(i, j)]
                i = start_row + di
                j = start_column + dj
                while 0 <= i < self.board_height and 0 <= j < self.board_width:
                    piece = spaces[i, j]
                    if piece == player:
                        for i, j in to_flip:
                            spaces[i, j] *= -1
                        break
                    if piece == self.NO_PLAYER:
                        break
                    else:
                        to_flip.append((i, j))
                    i += di
                    j += dj
        spaces[start_row, start_column] = player
        return new_board
Exemplo n.º 8
0
def logsumexp_double_complement(a: np.ndarray, rel_tol: float = 1e-3) -> float:
    """Calculates the following expression in a numerically stable fashion:

        log(1 - (1 - exp(a_0)) x (1 - exp(a_1)) x ...)

    where a_i are the entries of `a` and assumed to be non-positive. The algorithm is as follows:

    We define:

        exp(x_n) = 1 - \prod_{i=0}^n (1 - exp(a_n)),

    Thus, we have x_0 = a_0 and the recursion relation:

        exp(x_{n+1}) = exp(x_n) + exp(b_{n+1}),

    where

        b_{n+1} = a_{n+1} + log(1 - exp(x_n)).

    We sort `a` in the descending order and update `x` term by term. It is easy to show that x_{n} is monotonically
    increasing and that |x_{N} - x_{n}| < (N - n) |x_{n} - x_{n-1}|. We use the last inequality to bound the error
    for early stopping.

    Args:
        a: a float array
        rel_tol: relative error tolerance for early stopping of calculation

    Returns:
        a float scalar
    """
    try:
        assert isinstance(a, np.ndarray)
        a = np.asarray(a.copy(), dtype=np.float)
    except AssertionError:
        try:
            a = np.asarray(a, dtype=np.float)
        except ValueError:
            raise ValueError("The input argument must be castable to a float ndarray.")
    assert len(a) > 0
    assert 0. <= rel_tol < 1.0

    # enforce all entries of a to be negative or zero
    a[a > 0.] = 0.

    if len(a) == 1:
        return np.asscalar(a)
    else:
        a = np.sort(a.flatten())[::-1]
        x = a[0]
        sz = len(a)
        for i, entry in enumerate(a[1:]):
            x_new = np.logaddexp(x, entry + logp_complement(x))
            if np.abs(x_new - x) * (sz - i - 1) < rel_tol * np.abs(x):
                return x_new
            else:
                x = x_new
        return x
Exemplo n.º 9
0
def luv_to_lch(luv_nd: ndarray) -> ndarray:
    uv_nd = luv_nd[..., slice(1, 2)]
    uv_nd[uv_nd == -0.0] = 0.0   # -0.0 screws up atan2
    lch_nd = luv_nd.copy()
    U, V = luv_nd[..., 1], luv_nd[..., 2]
    C, H = lch_nd[..., 1], lch_nd[..., 2]
    C[:] = ne.evaluate("(U ** 2 + V ** 2) ** 0.5")
    H[:] = np.degrees(ne.evaluate("arctan2(V, U)"))
    H[H < 0.0] += 360.0
    return lch_nd
Exemplo n.º 10
0
def luv_to_lch(luv_nd: ndarray) -> ndarray:
    uv_nd = _channel(luv_nd, slice(1, 2))
    uv_nd[uv_nd == -0.0] = 0.0   # -0.0 screws up atan2
    lch_nd = luv_nd.copy()
    U, V = (_channel(luv_nd, n) for n in range(1, 3))
    C, H = (_channel(lch_nd, n) for n in range(1, 3))
    C[:] = (U ** 2 + V ** 2) ** 0.5
    hrad = np.arctan2(V, U)
    H[:] = np.degrees(hrad)
    H[H < 0.0] += 360.0
    return lch_nd
Exemplo n.º 11
0
def find_zilany_scaling_factor(anr: np.ndarray) -> float:
    w1_max = 0.0
    w1_target_amplitude = 0.15e-6
    tolerance = 1e-9
    step = 1e-18
    v1 = 1e-18
    while abs(w1_max - w1_target_amplitude) >= tolerance:
        temp_anr = anr.copy() * v1
        w1_max = _total_hack(temp_anr)
        v1 += step
        if v1 > 0.5:
            print("couldn't converge")
            break

    print("V1 set to {:0.5E} and gave a wave 1 amplitude of {:0.5E}".format(v1, w1_max))
Exemplo n.º 12
0
def add_mask(field: np.ndarray):
    t_field = None
    for k in range(4):
        good = True
        t_field = field.copy()
        for i in range(field.shape[0]):
            for j in range(mask[(i + k) % 4], field.shape[1], 4):
                if t_field[i, j] != shaft_const:
                    t_field[i, j] = mask_const
                else:
                    good = False
        else:
            if good:
                break
    return t_field
def additive_log_ratio(X: np.ndarray, ind: int=-1):
    """Additive log ratio transform. """
    
    Y = X.copy()
    assert Y.ndim in [1, 2]
    dimensions = Y.shape[Y.ndim-1]
    if ind < 0: ind += dimensions
    
    if Y.ndim == 2:
        Y = np.divide(Y, Y[:, ind][:, np.newaxis])
        Y = np.log(Y[:, [i for i in range(dimensions) if not i==ind]])
    else:
        Y = np.divide(X, X[ind])
        Y = np.log(Y[[i for i in range(dimensions) if not i==ind]])
        
    return Y
Exemplo n.º 14
0
    def _find(self, goban_img: np.ndarray):
        """ The stones detection main algorithm. Delegate work to each Region and show some results on an image.

        Baselines :
        - trust contours to find single stones. but it will fail to detect all stones in a chain or cluster.
        - trust clustering to find all stones if their density is high enough. but it will fail if too few stones
        -> dynamically assign one finder per region, periodically trying to introduce clustering where it's not assigned

        """
        # 0. if startup phase: let background model get initialized (see StonesFinder._doframe())
        if self.total_f_processed < self.bg_init_frames:
            self.display_message("BACKGROUND SAMPLING ({0}/{1})".format(self.total_f_processed, self.bg_init_frames))
            return
        # 1. delegate all the work to Regions
        else:
            ref_stones = self.get_stones()
            canvas = goban_img.copy()
            for r in range(self.split):
                for c in range(self.split):
                    self.regions[r, c].process(goban_img, ref_stones, canvas=canvas)
            self._show(canvas)
Exemplo n.º 15
0
def calculate(X: np.ndarray, V: np.ndarray, E: np.array, B: np.array,
              t0: float, dt: float, iter_count: int) -> np.ndarray:
    """
    Функция рассчитывает траектории электронов в поле скрещенных сил. Все координаты, скорости и вектора сил полагаются
    трехмерными. Для расчета позиции в момент времени t + dt используется метод Эйлера.

    :param X: Начальные позиции электронов, двумерный numpy массив
    :param V: Начальные скорости электронов
    :param E: Вектор напряженности электрического поля
    :param B: Вектор магнитной индукции
    :param t0: Начальное время расчета
    :param dt: Временной шаг
    :param iter_count: Количество шагов алгоритма.
    :return: Трехмерный numpy массив следующей конфигурации:
        Количество итераций X Количество электронов X Размерность пространства (3),
    содержащий в себе позиции электронов на каждой итерации алгоритма. Тип данных массива будет выведен из
    массива позиций X.
    """
    # Удельный заряд электрона:
    EM = -1.602176565e-19 / 9.10938356e-31
    t = t0
    electron_count = len(X)
    result = np.empty((iter_count, electron_count, DIMENSION), dtype=X.dtype)
    # В указанном промежутке времени
    for j in range(iter_count):
        # Сохраним текущую координату в результирующий массив
        result[j] = X.copy()
        # Для каждого электрона...
        for i in range(electron_count):
            # Сначала пересчитаем его позицию
            X[i] += V[i] * dt
            # Затем вычислим моментальное ускорение
            a = (E + np.cross(V[i], B)) * EM
            # Затем пересчитаем скорость электрона
            V[i] += a * dt
        # Рассчитав позиции всех электронов, перейдем к следующему моменту времени
        t += dt
    return result
Exemplo n.º 16
0
def shared_nearest_neighbors(D:np.ndarray, k:int=10, metric='distance'):
    """Transform distance matrix using shared nearest neighbors [1]_.
    
    SNN similarity is based on computing the overlap between the `k` nearest 
    neighbors of two objects. SNN approaches try to symmetrize nearest neighbor 
    relations using only rank and not distance information [2]_.
    
    Parameters
    ----------
    D : np.ndarray
        The ``n x n`` symmetric distance (similarity) matrix.
        
    k : int, optional (default: 10)
        Neighborhood radius: The `k` nearest neighbors are used to calculate SNN.
        
    metric : {'distance', 'similarity'}, optional (default: 'distance')
        Define, whether the matrix `D` is a distance or similarity matrix

    Returns
    -------
    D_snn : ndarray
        Secondary distance SNN matrix
        
    References
    ---------- 
    .. [1] R. Jarvis and E. A. Patrick, “Clustering using a similarity measure 
           based on shared near neighbors,” IEEE Transactions on Computers, 
           vol. 22, pp. 1025–1034, 1973.
    
    .. [2] Flexer, A., & Schnitzer, D. (2013). Can Shared Nearest Neighbors 
           Reduce Hubness in High-Dimensional Spaces? 2013 IEEE 13th 
           International Conference on Data Mining Workshops, 460–467. 
           http://doi.org/10.1109/ICDMW.2013.101
    """
    IO._check_distance_matrix_shape(D)
    IO._check_valid_metric_parameter(metric)
    if metric == 'distance':
        self_value = 0.
        sort_order = 1
        exclude = np.inf
    if metric == 'similarity':
        self_value = 1.
        sort_order = -1
        exclude = -np.inf
    
    distance = D.copy()
    np.fill_diagonal(distance, exclude)
    n = np.shape(distance)[0]
    knn = np.zeros_like(distance, bool)
    
    # find nearest neighbors for each point
    for i in range(n):
        di = distance[i, :]
        nn = np.argsort(di)[::sort_order]
        knn[i, nn[0:k]] = True
    
    D_snn = np.zeros_like(distance)
    for i in range(n):
        knn_i = knn[i, :]
        j_idx = slice(i+1, n)
        
        # using broadcasting
        Dij = np.sum(np.logical_and(knn_i, knn[j_idx, :]), 1)
        if metric == 'distance':
            D_snn[i, j_idx] = 1. - Dij / k
        else: # metric == 'similarity':
            D_snn[i, j_idx] = Dij / k
        
    D_snn += D_snn.T
    np.fill_diagonal(D_snn, self_value)
    return D_snn
Exemplo n.º 17
0
    def get_sample_fn(self,
                      image: np.ndarray,
                      p_sample: float = 0.5) -> Tuple[np.ndarray, Callable]:
        """
        Create sampling function and superpixel mask.

        Parameters
        ----------
        image
            Image to be explained
        p_sample
            Probability for a pixel to be represented by the average value of its superpixel or
            the pixel value of a superimposed image

        Returns
        -------
        segments
            Superpixels generated from image
        sample_fn
            Function returning the sampled images with label
        """
        # check if grayscale images need to be converted to RGB for superpixel generation
        if not self.custom_segmentation and image.shape[-1] == 1:
            image_segm = np.repeat(image, 3, axis=2)
        else:
            image_segm = image.copy()

        segments = self.segmentation_fn(image_segm)  # generate superpixels

        # each superpixel is a feature
        features = list(np.unique(segments))
        n_features = len(features)

        # true label is prediction on original image
        true_label = self.predict_fn(np.expand_dims(image, axis=0))[0]

        def sample_fn_image(
            present: list,
            num_samples: int,
            compute_labels: bool = True
        ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
            """
            Create sampling function by masking certain superpixels from the original image and replacing them
            with the pixel values from superimposed images.

            Parameters
            ----------
            present
                List with features (= superpixels) present in the proposed anchor
            num_samples
                Number of samples used
            compute_labels
                Boolean whether to use labels coming from model predictions as 'true' labels

            Returns
            -------
            raw_data
                "data" output concatenated with the indices of the chosen background images for each sample
            data
                Nb of samples times nb of features matrix indicating whether a feature (= a superpixel) is
                present in the sample or masked
            labels
                Create labels using model predictions if compute_labels equals True
            """
            if not compute_labels:
                # for each sample, randomly sample whether a superpixel is represented by its average value or not
                data = np.random.randint(0, 2,
                                         num_samples * n_features).reshape(
                                             (num_samples, n_features))
                data[:,
                     present] = 1  # superpixels in candidate anchor need to be present
                return np.array([]), data, np.array([])

            # for each sample, randomly sample whether a superpixel is represented by its
            # average value or not according to p_sample
            data = np.random.choice([0, 1],
                                    num_samples * n_features,
                                    p=[p_sample, 1 - p_sample]).reshape(
                                        (num_samples, n_features))
            data[:,
                 present] = 1  # superpixels in candidate anchor need to be present

            # for each sample, need to sample one of the background images
            chosen = np.random.choice(range(len(self.images_background)),
                                      data.shape[0],
                                      replace=True)

            # create masked images
            imgs = []
            for d, r in zip(data, chosen):
                temp = copy.deepcopy(image)
                zeros = np.where(
                    d == 0)[0]  # unused superpixels for the sample
                # create mask for each superpixel not present in the sample
                mask = np.zeros(segments.shape).astype(bool)
                for z in zeros:
                    mask[segments == z] = True
                # for mask: replace values with those of background image
                temp[mask] = self.images_background[r][mask]
                imgs.append(temp)
            imgs = np.array(imgs)

            preds = self.predict_fn(imgs)  # make prediction on masked images

            # check if label for the masked images are the same as the true label
            labels = np.array((preds == true_label).astype(int))

            # concat data and indices of chosen background images for each sample
            raw_data = np.hstack(
                (data,
                 chosen.reshape(-1,
                                1)))  # nb of samples * (nb of superpixels + 1)
            return raw_data, data, labels

        if type(self.images_background) == np.ndarray:
            return segments, sample_fn_image

        # create fudged image where the pixel value in each superpixel is set to the average over the
        # superpixel for each channel
        fudged_image = image.copy()
        for x in np.unique(segments):
            fudged_image[segments == x] = [
                np.mean(image[segments == x][:, i])
                for i in range(image.shape[-1])
            ]

        def sample_fn_fudged(
            present: list,
            num_samples: int,
            compute_labels: bool = True
        ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
            """
            Create sampling function by masking certain superpixels from the original image and replacing them
            with that superpixel's average value.

            Parameters
            ----------
            present
                List with features (= superpixels) present in the proposed anchor
            num_samples
                Number of samples used
            compute_labels
                Boolean whether to use labels coming from model predictions as 'true' labels

            Returns
            -------
            raw_data
                Same as data
            data
                Nb of samples times nb of features matrix indicating whether a feature (= a superpixel) is
                present in the sample or masked
            labels
                Create labels using model predictions if compute_labels equals True
            """
            if not compute_labels:
                # for each sample, randomly sample whether a superpixel is represented by its average value or not
                data = np.random.randint(0, 2,
                                         num_samples * n_features).reshape(
                                             (num_samples, n_features))
                data[:,
                     present] = 1  # superpixels in candidate anchor need to be present
                return np.array([]), data, np.array([])

            # for each sample, randomly sample whether a superpixel is represented by its
            # average value or not according to p_sample
            data = np.random.choice([0, 1],
                                    num_samples * n_features,
                                    p=[p_sample, 1 - p_sample]).reshape(
                                        (num_samples, n_features))
            data[:,
                 present] = 1  # superpixels in candidate anchor need to be present

            # create perturbed (fudged) image for each sample using image masks
            imgs = []
            for row in data:
                temp = copy.deepcopy(image)
                zeros = np.where(
                    row == 0)[0]  # superpixels to be averaged for the sample
                # create mask for each pixel in the superpixels that are averaged
                mask = np.zeros(segments.shape).astype(bool)
                for z in zeros:
                    mask[segments == z] = True
                temp[mask] = fudged_image[mask]
                imgs.append(temp)
            imgs = np.array(imgs)

            preds = self.predict_fn(imgs)  # make prediction on masked images

            # check if labels for the masked images are the same as the true label
            labels = (preds == true_label).astype(int)

            raw_data = data
            return raw_data, data, labels

        return segments, sample_fn_fudged
Exemplo n.º 18
0
def mutual_proximity_empiric(
    D: np.ndarray, metric: str = "distance", test_set_ind: np.ndarray = None, verbose: int = 0
):
    """Transform a distance matrix with Mutual Proximity (empiric distribution).
    
    Applies Mutual Proximity (MP) [1]_ on a distance/similarity matrix using 
    the empiric data distribution (EXACT, rather SLOW). The resulting 
    secondary distance/similarity matrix should show lower hubness.
    
    Parameters
    ----------
    D : ndarray or csr_matrix
        - ndarray: The ``n x n`` symmetric distance or similarity matrix.
        - csr_matrix: The ``n x n`` symmetric similarity matrix.
          
        NOTE: In case of sparse ``D`, zeros are interpreted as missing values 
        and ignored during calculations. Thus, results may differ 
        from using a dense version.
    
    metric : {'distance', 'similarity'}, optional (default: 'distance')
        Define, whether matrix `D` is a distance or similarity matrix.
        
        NOTE: In case of sparse `D`, only 'similarity' is supported.
        
    test_sed_ind : ndarray, optional (default: None)
        Define data points to be hold out as part of a test set. Can be:
        
        - None : Rescale all distances
        - ndarray : Hold out points indexed in this array as test set. 
        
    verbose : int, optional (default: 0)
        Increasing level of output (progress report).
        
    Returns
    -------
    D_mp : ndarray
        Secondary distance MP empiric matrix.
    
    References
    ----------
    .. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012). 
           Local and global scaling reduce hubs in space. The Journal of Machine 
           Learning Research, 13(1), 2871–2902.
    """
    # Initialization
    n = D.shape[0]
    log = Logging.ConsoleLogging()

    # Check input
    IO._check_distance_matrix_shape(D)
    IO._check_valid_metric_parameter(metric)
    if metric == "similarity":
        self_value = 1
        exclude_value = np.inf
    else:  # metric == 'distance':
        self_value = 0
        exclude_value = -np.inf
        if issparse(D):
            raise ValueError("MP sparse only supports similarity matrices.")
    if test_set_ind is None:
        pass  # TODO implement
        # train_set_ind = slice(0, n)
    elif not np.all(~test_set_ind):
        raise NotImplementedError("MP empiric does not yet support train/" "test splits.")
        # train_set_ind = np.setdiff1d(np.arange(n), test_set_ind)

    # Start MP
    D = D.copy()

    if issparse(D):
        return _mutual_proximity_empiric_sparse(D, test_set_ind, verbose, log)

    # ensure correct self distances (NOT done for sparse matrices!)
    np.fill_diagonal(D, exclude_value)

    D_mp = np.zeros_like(D)

    # Calculate MP empiric
    for i in range(n - 1):
        if verbose and ((i + 1) % 1000 == 0 or i == n - 2):
            log.message("MP_empiric: {} of {}.".format(i + 1, n - 1), flush=True)
        # Calculate only triu part of matrix
        j_idx = i + 1

        dI = D[i, :][np.newaxis, :]
        dJ = D[j_idx:n, :]
        d = D[j_idx:n, i][:, np.newaxis]

        if metric == "similarity":
            D_mp[i, j_idx:] = np.sum((dI <= d) & (dJ <= d), 1) / (n - 1)
        else:  # metric == 'distance':
            D_mp[i, j_idx:] = 1 - (np.sum((dI > d) & (dJ > d), 1) / (n - 1))

    # Mirror, so that matrix is symmetric
    D_mp += D_mp.T
    np.fill_diagonal(D_mp, self_value)

    return D_mp
Exemplo n.º 19
0
def hubness(D:np.ndarray, k:int=5, metric='distance', verbose:int=0):
    """Compute hubness of a distance matrix.
    
    Hubness [1]_ is the skewness of the `k`-occurrence histogram (reverse 
    nearest neighbor count, i.e. how often does a point occur in the 
    `k`-nearest neighbor lists of other points).
    
    Parameters
    ----------
    D : ndarray
        The ``n x n`` symmetric distance (similarity) matrix.
    
    k : int, optional (default: 5)
        Neighborhood size for `k`-occurence.
    
    metric : {'distance', 'similarity'}, optional (default: 'distance')
        Define, whether matrix `D` is a distance or similarity matrix
    
    verbose : int, optional (default: 0)
        Increasing level of output (progress report).
        
    Returns
    -------
    S_k : float
        Hubness (skewness of `k`-occurence distribution)
    D_k : ndarray
        `k`-nearest neighbor lists
    N_k : ndarray
        `k`-occurence list    
    
    References
    ----------
    .. [1] Radovanović, M., Nanopoulos, A., & Ivanović, M. (2010). 
           Hubs in Space : Popular Nearest Neighbors in High-Dimensional Data. 
           Journal of Machine Learning Research, 11, 2487–2531. Retrieved from 
           http://jmlr.csail.mit.edu/papers/volume11/radovanovic10a/
           radovanovic10a.pdf
    """
    log = Logging.ConsoleLogging()
    IO._check_distance_matrix_shape(D)
    IO._check_valid_metric_parameter(metric)
    if metric == 'distance':
        d_self = np.inf
        sort_order = 1
    if metric == 'similarity':
        d_self = -np.inf
        sort_order = -1
        
    if verbose:
        log.message("Hubness calculation (skewness of {}-occurence)".format(k))
    D = D.copy()           
    D_k = np.zeros((k, D.shape[1]), dtype=np.float32)
    n = D.shape[0]
    
    if issparse(D): 
        pass # correct self-distance must be ensured upstream for sparse
    else:
        # Set self dist to inf
        np.fill_diagonal(D, d_self)
        # make non-finite (NaN, Inf) appear at the end of the sorted list
        D[~np.isfinite(D)] = d_self
    
    for i in range(n):
        if verbose and ((i+1)%10000==0 or i+1==n):
            log.message("NN: {} of {}.".format(i+1, n), flush=True)
        if issparse(D):
            d = D[i, :].toarray().ravel() # dense copy of one row
        else: # normal ndarray
            d = D[i, :]
        d[i] = d_self
        d[~np.isfinite(d)] = d_self
        # Randomize equal values in the distance matrix rows to avoid the 
        # problem case if all numbers to sort are the same, which would yield 
        # high hubness, even if there is none.
        rp = np.random.permutation(n)
        d2 = d[rp]
        d2idx = np.argsort(d2, axis=0)[::sort_order]
        D_k[:, i] = rp[d2idx[0:k]]      
               
    # N-occurence
    N_k = np.bincount(D_k.astype(int).ravel(), minlength=n)    
    # Hubness
    S_k = stats.skew(N_k)
     
    # return k-hubness, k-nearest neighbors, k-occurence
    if verbose:
        log.message("Hubness calculation done.", flush=True)
    return S_k, D_k, N_k    
Exemplo n.º 20
0
 def __init__(self, mean: np.ndarray, cov: np.ndarray) -> None:
     super().__init__()
     self._mean = mean.flatten()
     self._cov = cov.copy()
Exemplo n.º 21
0
 def _update_obs(array: np.ndarray):
     return torch.unsqueeze(torch.from_numpy(array.copy()), dim=0)
Exemplo n.º 22
0
    def _projected_sinkhorn(
        self, x: np.ndarray, x_init: np.ndarray, cost_matrix: np.ndarray, eps: np.ndarray
    ) -> np.ndarray:
        """
        The projected sinkhorn_optimizer.

        :param x: Current adversarial examples.
        :param x_init: An array with the original inputs.
        :param cost_matrix: A non-negative cost matrix.
        :param eps: Maximum perturbation that the attacker can introduce.
        :return: Adversarial examples.
        """
        # Normalize inputs
        normalization = x_init.reshape(x.shape[0], -1).sum(-1).reshape(x.shape[0], 1, 1, 1)
        x = x.copy() / normalization
        x_init = x_init.copy() / normalization

        # Dimension size for each example
        m = np.prod(x_init.shape[1:])

        # Initialize
        beta = np.log(np.ones(x.shape) / m)
        exp_beta = np.exp(-beta)

        psi = np.ones(x.shape[0])

        var_k = np.expand_dims(np.expand_dims(np.expand_dims(psi, -1), -1), -1)
        var_k = np.exp(-var_k * cost_matrix - 1)

        convergence = -np.inf

        for _ in range(self.projected_sinkhorn_max_iter):
            # Block coordinate descent iterates
            x_init[x_init == 0.0] = EPS_LOG  # Prevent divide by zero in np.log
            alpha = np.log(self._local_transport(var_k, exp_beta, self.kernel_size)) - np.log(x_init)
            exp_alpha = np.exp(-alpha)

            beta = (
                self.regularization
                * np.exp(self.regularization * x)
                * self._local_transport(var_k, exp_alpha, self.kernel_size)
            )
            beta[beta > 1e-10] = np.real(lambertw(beta[beta > 1e-10]))
            beta -= self.regularization * x
            exp_beta = np.exp(-beta)

            # Newton step
            var_g = -eps + self._batch_dot(
                exp_alpha, self._local_transport(cost_matrix * var_k, exp_beta, self.kernel_size)
            )

            var_h = -self._batch_dot(
                exp_alpha, self._local_transport(cost_matrix * cost_matrix * var_k, exp_beta, self.kernel_size)
            )

            delta = var_g / var_h

            # Ensure psi >= 0
            tmp = np.ones(delta.shape)
            neg = psi - tmp * delta < 0

            while neg.any() and np.min(tmp) > 1e-2:
                tmp[neg] /= 2
                neg = psi - tmp * delta < 0

            psi = np.maximum(psi - tmp * delta, 0)

            # Update K
            var_k = np.expand_dims(np.expand_dims(np.expand_dims(psi, -1), -1), -1)
            var_k = np.exp(-var_k * cost_matrix - 1)

            # Check for convergence
            next_convergence = self._projected_sinkhorn_evaluation(
                x,
                x_init,
                alpha,
                exp_alpha,
                beta,
                exp_beta,
                psi,
                var_k,
                eps,
            )

            if (np.abs(convergence - next_convergence) <= 1e-4 + 1e-4 * np.abs(next_convergence)).all():
                break

            convergence = next_convergence

        result = (beta / self.regularization + x) * normalization

        return result
Exemplo n.º 23
0
def spectral_heart_rate(filtered_sig: np.ndarray,
                        fs: Real,
                        hr_fs_band: Optional[Sequence[Real]] = None,
                        sig_fmt: str = "channel_first",
                        mode: str = 'hr',
                        verbose: int = 0) -> Real:
    """ finished, NOT checked,

    compute heart rate of a ecg signal using spectral method (from the frequency domain)

    Parameters:
    -----------
    filtered_sig: ndarray,
        the filtered 12-lead ecg signal, with units in mV
    fs: real number,
        sampling frequency of `filtered_sig`
    hr_fs_band: sequence of real number, optional,
        frequency band (bounds) of heart rate
    sig_fmt: str, default "channel_first",
        format of the multi-lead ecg signal,
        'channel_last' (alias 'lead_last'), or
        'channel_first' (alias 'lead_first', original)
    mode: str, default 'hr',
        mode of computation (return mean heart rate or mean rr intervals),
        can also be 'heart_rate' (alias of 'hr'), and 'rr' (with an alias of 'rr_interval'),
        case insensitive
    verbose: int, default 0,
        print verbosity
    
    Returns:
    --------
    ret_val: real number,
        mean heart rate of the ecg signal, with units in bpm;
        or mean rr intervals, with units in ms

    NOTE:
    for high frequency signal with short duration,
    the lowest frequency of the spectrogram might be too high for computing heart rate
    """
    assert sig_fmt.lower() in [
        'channel_first', 'lead_first', 'channel_last', 'lead_last'
    ]
    if sig_fmt.lower() in ['channel_last', 'lead_last']:
        s = filtered_sig.T
    else:
        s = filtered_sig.copy()

    # psd of shape (c,n,k), freqs of shape (n,)
    # where n = length of signal, c = number of leads, k rel. to freq bands
    # freqs, _, psd = SS.spectrogram(s, fs, axis=-1)
    freqs, psd = SS.welch(s, fs, axis=-1)

    if not _check_feasibility(freqs):
        raise ValueError(
            "it is not feasible to compute heart rate in frequency domain")

    fs_band = hr_fs_band or FeatureCfg.spectral_hr_fs_band
    assert len(
        fs_band
    ) >= 2, "frequency band of heart rate should at least has 2 bounds"
    fs_band = sorted(fs_band)
    fs_band = [fs_band[0], fs_band[-1]]

    if verbose >= 1:
        print(f"signal shape = {s.shape}")
        print(f"fs_band = {fs_band}")
        print(f"freqs.shape = {freqs.shape}, psd.shape = {psd.shape}")
        print(f"freqs = {freqs.tolist()}")

    inds_of_interest = np.where((fs_band[0] <= freqs)
                                & (freqs <= fs_band[-1]))[0]
    # psd_of_interest of shape (c, m), freqs_of_interest of shape (m,)
    # where m = length of inds_of_interest
    freqs_of_interest = freqs[inds_of_interest]
    psd_of_interest = psd[..., inds_of_interest]
    peak_inds = np.argmax(psd_of_interest, axis=-1)

    if verbose >= 1:
        print(f"inds_of_interest = {inds_of_interest.tolist()}")
        print(f"freqs_of_interest = {freqs_of_interest.tolist()}")
        print(
            f"peak_inds.shape = {peak_inds.shape}, peak_inds = {peak_inds.tolist()}"
        )
        print(f"psd_of_interest.shape = {psd_of_interest.shape}")

    # averaging at a neighborhood of `peak_idx`
    n_nbh = 1
    psd_mask = np.zeros_like(psd_of_interest, dtype=int)
    for l in range(psd_mask.shape[0]):
        psd_mask[l,
                 max(0, peak_inds[l] -
                     n_nbh):min(psd_mask.shape[-1], peak_inds[l] + n_nbh)] = 1
    psd_of_interest = psd_of_interest * psd_mask
    # ret_val with units in second^{-1}
    ret_val = np.mean(
        np.dot(psd_of_interest, freqs_of_interest) /
        np.sum(psd_of_interest, axis=-1))
    if mode.lower() in ['hr', 'heart_rate']:
        ret_val = 60 * ret_val
    elif mode.lower() in ['rr', 'rr_interval']:
        ret_val = 1000 / ret_val
    return ret_val
Exemplo n.º 24
0
 def extract_buildings(x: np.ndarray):
     """ Returns a mask of the buildings in x """
     buildings = x.copy()
     buildings[x < 4] = 1
     buildings[x >= 4] = 0
     return buildings
Exemplo n.º 25
0
    def _conjugate_sinkhorn(self, x: np.ndarray, grad: np.ndarray, cost_matrix: np.ndarray) -> np.ndarray:
        """
        The conjugate sinkhorn_optimizer.

        :param x: Current adversarial examples.
        :param grad: The loss gradients.
        :param cost_matrix: A non-negative cost matrix.
        :return: Adversarial examples.
        """
        # Normalize inputs
        normalization = x.reshape(x.shape[0], -1).sum(-1).reshape(x.shape[0], 1, 1, 1)
        x = x.copy() / normalization

        # Dimension size for each example
        m = np.prod(x.shape[1:])

        # Initialize
        alpha = np.log(np.ones(x.shape) / m) + 0.5
        exp_alpha = np.exp(-alpha)

        beta = -self.regularization * grad
        beta = beta.astype(np.float64)
        exp_beta = np.exp(-beta)

        # Check for overflow
        if (exp_beta == np.inf).any():
            raise ValueError("Overflow error in `_conjugate_sinkhorn` for exponential beta.")

        cost_matrix_new = cost_matrix.copy() + 1
        cost_matrix_new = np.expand_dims(np.expand_dims(cost_matrix_new, 0), 0)

        i_nonzero = self._batch_dot(x, self._local_transport(cost_matrix_new, grad, self.kernel_size)) != 0
        i_nonzero_ = np.zeros(alpha.shape).astype(bool)
        i_nonzero_[:, :, :, :] = np.expand_dims(np.expand_dims(np.expand_dims(i_nonzero, -1), -1), -1)

        psi = np.ones(x.shape[0])

        var_k = np.expand_dims(np.expand_dims(np.expand_dims(psi, -1), -1), -1)
        var_k = np.exp(-var_k * cost_matrix - 1)

        convergence = -np.inf

        for _ in range(self.conjugate_sinkhorn_max_iter):
            # Block coordinate descent iterates
            x[x == 0.0] = EPS_LOG  # Prevent divide by zero in np.log
            alpha[i_nonzero_] = (np.log(self._local_transport(var_k, exp_beta, self.kernel_size)) - np.log(x))[
                i_nonzero_
            ]
            exp_alpha = np.exp(-alpha)

            # Newton step
            var_g = -self.eps_step + self._batch_dot(
                exp_alpha, self._local_transport(cost_matrix * var_k, exp_beta, self.kernel_size)
            )

            var_h = -self._batch_dot(
                exp_alpha, self._local_transport(cost_matrix * cost_matrix * var_k, exp_beta, self.kernel_size)
            )

            delta = var_g / var_h

            # Ensure psi >= 0
            tmp = np.ones(delta.shape)
            neg = psi - tmp * delta < 0

            while neg.any() and np.min(tmp) > 1e-2:
                tmp[neg] /= 2
                neg = psi - tmp * delta < 0

            psi[i_nonzero] = np.maximum(psi - tmp * delta, 0)[i_nonzero]

            # Update K
            var_k = np.expand_dims(np.expand_dims(np.expand_dims(psi, -1), -1), -1)
            var_k = np.exp(-var_k * cost_matrix - 1)

            # Check for convergence
            next_convergence = self._conjugated_sinkhorn_evaluation(x, alpha, exp_alpha, exp_beta, psi, var_k)

            if (np.abs(convergence - next_convergence) <= 1e-4 + 1e-4 * np.abs(next_convergence)).all():
                break

            convergence = next_convergence

        result = exp_beta * self._local_transport(var_k, exp_alpha, self.kernel_size)
        result[~i_nonzero] = 0
        result *= normalization

        return result
Exemplo n.º 26
0
 def _sink_callback(self, y: np.ndarray, meta: FrameMetaData):
     r"""Callback where features are collected."""
     self._y.append(y.copy())
     self._starts.append(meta.time)
     self._ends.append(meta.time + meta.lengthSec)
    def train(self,
              x_array: np.ndarray,
              y_array: np.ndarray,
              epochs: int = None,
              batch_size: int = None,
              epoch_callback=None,
              scale_data=False,
              shuffle_data=False):

        if not epochs:
            raise ValueError("Missing required kwarg: epochs")

        if not batch_size:
            batch_size = int(1e20)

        optimizer = torch.optim.Adam(self.mlp.parameters())
        # optimizer = torch.optim.RMSprop(self.mlp.parameters())

        x_array = x_array.copy()
        y_array = y_array.copy()

        if scale_data:

            self._input_scaler = StandardScaler()
            self._input_scaler.fit(x_array)
            self._target_scaler = StandardScaler()
            self._target_scaler.fit(y_array)

            self._input_scaler.transform(x_array, copy=False)
            self._target_scaler.transform(y_array, copy=False)

        n_batches = int(np.ceil(1.0 * x_array.shape[0] / batch_size))
        losses = np.zeros(epochs)

        for t in range(epochs):

            if shuffle_data:
                idx = np.random.permutation(x_array.shape[0])
                x_array = x_array[idx]
                y_array = y_array[idx]

            for batch in range(n_batches):
                idx_lb = batch_size * batch
                idx_up = min(batch_size * (batch + 1), x_array.shape[0])
                x_batch = x_array[idx_lb:idx_up]
                y_batch = y_array[idx_lb:idx_up]

                x_variable = torch.from_numpy(x_batch.astype(np.float32)).to(
                    self.device)  # type: tensor.Tensor
                y_variable = torch.from_numpy(y_batch.astype(np.float32)).to(
                    self.device)  # type: tensor.Tensor
                assert hasattr(y_variable, 'requires_grad')
                y_variable.requires_grad = False

                output = self.mlp.forward(x_variable)
                loss = self.loss_function(output, y_variable)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                losses[t] += loss.item()

            losses[t] /= n_batches

            if callable(epoch_callback):
                epoch_callback(t, losses[t])
            else:
                print(t, losses[t])

        return losses
Exemplo n.º 28
0
 def __init__(self, grid: np.ndarray):
     self._grid = grid.copy()
Exemplo n.º 29
0
def intrinsic_dimension(X:np.ndarray, k1:int=6, k2:int=12, 
                        estimator:str='levina', metric:str='vector', 
                        trafo:str='var', mem_threshold:int=5000):
    """Calculate intrinsic dimension based on the MLE by Levina and Bickel [1]_.
    
    Parameters
    ----------
    X : ndarray
        - An ``m x n`` vector data matrix with ``n`` objects in an 
          ``m`` dimensional feature space 
        - An ``n x n`` distance matrix.
        
        NOTE: The type must be defined via parameter `metric`!
        
    k1 : int, optional (default: 6)
        Start of neighborhood range to search in.
        
    k2 : int, optional (default: 12)
        End of neighborhood range to search in.
        
    estimator : {'levina', 'mackay'}, optional (default: 'levina')
        Determine the summation strategy: see [2]_.
    
    metric : {'vector', 'distance'}, optional (default: 'vector')
        Determine data type of `X`. 
        
        NOTE: the MLE was derived for euclidean distances. Using 
        other dissimilarity measures may lead to undefined results.
        
    trafo : {None, 'std', 'var'}, optional (default: 'var')
        Transform vector data. 
        
        - None: no transformation
        - 'std': standardization 
        - 'var': subtract mean, divide by variance (default behavior of 
          Laurens van der Maaten's DR toolbox; most likely for other 
          ID/DR techniques).

    mem_treshold : int, optional, default: 5000
        Controls speed-memory usage trade-off: If number of points is higher
        than the given value, don't calculate complete distance matrix at
        once (fast, high memory), but per row (slower, less memory).

    Returns
    -------
    d_mle : int
        Intrinsic dimension estimate (rounded to next integer)
    
    References
    ----------
    .. [1] Levina, E., & Bickel, P. (2004). Maximum likelihood estimation of 
           intrinsic dimension. Advances in Neural Information …, 17, 777–784. 
           http://doi.org/10.2307/2335172
    .. [2] http://www.inference.phy.cam.ac.uk/mackay/dimension/
    """
    n = X.shape[0]
    if estimator not in ['levina', 'mackay']:
        raise ValueError("Parameter 'estimator' must be 'levina' or 'mackay'.")
    if k1 < 1 or k2 < k1 or k2 >= n:
        raise ValueError("Invalid neighborhood: Please make sure that "
                         "0 < k1 <= k2 < n. (Got k1={} and k2={}).".
                         format(k1, k2))
    X = X.copy().astype(float)
        
    if metric == 'vector':
        # New array with unique rows   
        X = X[np.lexsort(np.fliplr(X).T)]
        
        if trafo is None:
            pass
        elif trafo == 'var':
            X -= X.mean(axis=0) # broadcast
            X /= X.var(axis=0) + 1e-7 # broadcast
        elif trafo == 'std':
            # Standardization
            X -= X.mean(axis=0) # broadcast
            X /= X.std(axis=0) + 1e-7 # broadcast
        else:
            raise ValueError("Transformation must be None, 'std', or 'var'.")
        
        # Compute matrix of log nearest neighbor distances
        X2 = (X**2).sum(1)
        
        if n <= mem_threshold: # speed-memory trade-off
            distance = X2.reshape(-1, 1) + X2 - 2*np.dot(X, X.T) #2x br.cast
            distance.sort(1)
            # Replace invalid values with a small number
            distance[distance<0] = 1e-7
            knnmatrix = .5 * np.log(distance[:, 1:k2+1])
        else:
            knnmatrix = np.zeros((n, k2))
            for i in range(n):
                distance = np.sort(X2[i] + X2 - 2 * np.dot(X, X[i, :]))
                # Replace invalid values with a small number
                distance[distance < 0] = 1e-7
                knnmatrix[i, :] = .5 * np.log(distance[1:k2+1])
    
    elif metric == 'distance':
        raise NotImplementedError("ID currently only supports vector data.")
        #=======================================================================
        # # TODO calculation WRONG
        # X.sort(1)
        # X[X < 0] = 1e-7
        # knnmatrix = np.log(X[:, 1:k2+1])
        #=======================================================================
    elif metric == 'similarity':
        raise NotImplementedError("ID currently only supports vector data.")
        #=======================================================================
        # # TODO calculation WRONG
        # print("WARNING: using similarity data may return "
        #       "undefined results.", file=sys.stderr)
        # X[X < 0] = 0
        # distance = 1 - (X / X.max())
        # knnmatrix = np.log(distance[:, 1:k2+1])
        #=======================================================================
    else:
        raise ValueError("Parameter 'metric' must be 'vector' or 'distance'.")
    
    # Compute the ML estimate
    S = np.cumsum(knnmatrix, 1)
    indexk = np.arange(k1, k2+1) # broadcasted afterwards
    dhat = -(indexk - 2) / (S[:, k1-1:k2] - knnmatrix[:, k1-1:k2] * indexk)
       
    if estimator == 'levina':  
        # Average over estimates and over values of k
        no_dims = dhat.mean()
    if estimator == 'mackay':
        # Average over inverses
        dhat **= -1
        dhat_k = dhat.mean(0)
        no_dims = (dhat_k ** -1).mean()
           
    return int(no_dims.round())
Exemplo n.º 30
0
def fill_in_data(data_all_original: np.ndarray, data_mean):
    # this will fill in data_all_original's all NaN with data_mean, in corresponding locations.
    data_all_original = data_all_original.copy()
    assert np.isscalar(data_mean)
    data_all_original[np.isnan(data_all_original)] = data_mean
    return data_all_original
Exemplo n.º 31
0
def hubness(D:np.ndarray, k:int=5, metric='distance', 
            verbose:int=0, n_jobs:int=-1):
    """Compute hubness of a distance matrix.
    
    Hubness [1]_ is the skewness of the `k`-occurrence histogram (reverse 
    nearest neighbor count, i.e. how often does a point occur in the 
    `k`-nearest neighbor lists of other points).
    
    Parameters
    ----------
    D : ndarray
        The ``n x n`` symmetric distance (similarity) matrix.
    
    k : int, optional (default: 5)
        Neighborhood size for `k`-occurence.
    
    metric : {'distance', 'similarity'}, optional (default: 'distance')
        Define, whether matrix `D` is a distance or similarity matrix
    
    verbose : int, optional (default: 0)
        Increasing level of output (progress report).
        
    n_jobs : int, optional (default: -1)
        Number of parallel processes spawned for hubness calculation.
        Default value (-1): number of available CPUs.
        
    Returns
    -------
    S_k : float
        Hubness (skewness of `k`-occurence distribution)
    D_k : ndarray
        `k`-nearest neighbor lists
    N_k : ndarray
        `k`-occurence list    
    
    References
    ----------
    .. [1] Radovanović, M., Nanopoulos, A., & Ivanović, M. (2010). 
           Hubs in Space : Popular Nearest Neighbors in High-Dimensional Data. 
           Journal of Machine Learning Research, 11, 2487–2531. Retrieved from 
           http://jmlr.csail.mit.edu/papers/volume11/radovanovic10a/
           radovanovic10a.pdf
    """
    log = Logging.ConsoleLogging()
    IO._check_distance_matrix_shape(D)
    IO._check_valid_metric_parameter(metric)
    if metric == 'distance':
        d_self = np.inf
        sort_order = 1
    if metric == 'similarity':
        d_self = -np.inf
        sort_order = -1
    
    if verbose:
        log.message("Hubness calculation (skewness of {}-occurence)".format(k))
        
    # Initialization
    n = D.shape[0]
    D = D.copy()
    D_k = np.zeros((k, D.shape[1]), dtype=np.float32 )
    
    if issparse(D): 
        pass # correct self-distance must be ensured upstream for sparse
    else:
        # Set self dist to inf
        np.fill_diagonal(D, d_self)
        # make non-finite (NaN, Inf) appear at the end of the sorted list
        D[~np.isfinite(D)] = d_self
                        
    # Parallelization
    if n_jobs == -1: # take all cpus
        NUMBER_OF_PROCESSES = mp.cpu_count()  # @UndefinedVariable
    else:
        NUMBER_OF_PROCESSES = n_jobs
    tasks = []
    
    batches = []
    batch_size = n // NUMBER_OF_PROCESSES
    for i in range(NUMBER_OF_PROCESSES-1):
        batches.append( np.arange(i*batch_size, (i+1)*batch_size) )
    batches.append( np.arange((NUMBER_OF_PROCESSES-1)*batch_size, n) )
    
    for idx, batch in enumerate(batches):
        submatrix = D[batch[0]:batch[-1]+1]
        tasks.append((_partial_hubness, 
                     (k, d_self, log, sort_order, 
                      batch, submatrix, idx, n, verbose)))   
    
    task_queue = mp.Queue()  # @UndefinedVariable
    done_queue = mp.Queue()  # @UndefinedVariable
    
    for task in tasks:
        task_queue.put(task)
        
    for i in range(NUMBER_OF_PROCESSES):  # @UnusedVariable
        mp.Process(target=_worker, args=(task_queue, done_queue)).start()  # @UndefinedVariable
    
    for i in range(len(tasks)):  # @UnusedVariable
        rows, Dk_part = done_queue.get()
        D_k[:, rows[0]:rows[-1]+1] = Dk_part
        
    for i in range(NUMBER_OF_PROCESSES):  # @UnusedVariable
        task_queue.put('STOP')        
               
    # k-occurence
    N_k = np.bincount(D_k.astype(int).ravel())    
    # Hubness
    S_k = stats.skew(N_k)
     
    if verbose:
        log.message("Hubness calculation done.", flush=True)
        
    # return hubness, k-nearest neighbors, N occurence
    return S_k, D_k, N_k
Exemplo n.º 32
0
def soft_convert_objects(
    values: np.ndarray,
    datetime: bool = True,
    numeric: bool = True,
    timedelta: bool = True,
    coerce: bool = False,
    copy: bool = True,
):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    validate_bool_kwarg(datetime, "datetime")
    validate_bool_kwarg(numeric, "numeric")
    validate_bool_kwarg(timedelta, "timedelta")
    validate_bool_kwarg(coerce, "coerce")
    validate_bool_kwarg(copy, "copy")

    conversion_count = sum((datetime, numeric, timedelta))
    if conversion_count == 0:
        raise ValueError("At least one of datetime, numeric or timedelta must be True.")
    elif conversion_count > 1 and coerce:
        raise ValueError(
            "Only one of 'datetime', 'numeric' or "
            "'timedelta' can be True when when coerce=True."
        )

    if not is_object_dtype(values.dtype):
        # If not object, do not attempt conversion
        values = values.copy() if copy else values
        return values

    # If 1 flag is coerce, ensure 2 others are False
    if coerce:
        # Immediate return if coerce
        if datetime:
            from pandas import to_datetime

            return to_datetime(values, errors="coerce").to_numpy()
        elif timedelta:
            from pandas import to_timedelta

            return to_timedelta(values, errors="coerce").to_numpy()
        elif numeric:
            from pandas import to_numeric

            return to_numeric(values, errors="coerce")

    # Soft conversions
    if datetime:
        # GH 20380, when datetime is beyond year 2262, hence outside
        # bound of nanosecond-resolution 64-bit integers.
        try:
            values = lib.maybe_convert_objects(values, convert_datetime=True)
        except OutOfBoundsDatetime:
            pass

    if timedelta and is_object_dtype(values.dtype):
        # Object check to ensure only run if previous did not convert
        values = lib.maybe_convert_objects(values, convert_timedelta=True)

    if numeric and is_object_dtype(values.dtype):
        try:
            converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True)
            # If all NaNs, then do not-alter
            values = converted if not isna(converted).all() else values
            values = values.copy() if copy else values
        except Exception:
            pass

    return values
Exemplo n.º 33
0
def create_dla_partitions(
    dataset: XY,
    dirichlet_dist: np.ndarray = np.empty(0),
    num_partitions: int = 100,
    concentration: float = 0.5,
) -> Tuple[np.ndarray, XYList]:
    """Create ibalanced non-iid partitions using Dirichlet Latent
    Allocation(LDA) without resampling.
    Args:
        dataset (XY): Datasets containing samples X
            and labels Y.
        dirichlet_dist (numpy.ndarray, optional): previously generated distribution to
            be used. This s useful when applying the same distribution for train and
            validation sets.
        num_partitions (int, optional): Number of partitions to be created.
            Defaults to 100.
        concentration (float, optional): Dirichlet Concentration (:math:`\\alpha`)
            parameter.
            An :math:`\\alpha \\to \\Inf` generates uniform distributions over classes.
            An :math:`\\alpha \\to 0.0` generates on class per client. Defaults to 0.5.
    Returns:
        Tuple[numpy.ndarray, XYList]: List of XYList containing partitions
            for each dataset.
    """

    x, y = dataset
    x, y = shuffle(x, y)
    x, y = sort_by_label(x, y)
    x_l: List[np.ndarray] = list(x)

    # Get number of classes and verify if they matching with
    classes, num_samples_per_class = np.unique(y, return_counts=True)
    num_classes: int = classes.size
    remaining_indices = [j for j in range(num_classes)]

    if dirichlet_dist.size != 0:
        dist_num_partitions, dist_num_classes = dirichlet_dist.shape
        if dist_num_classes != num_classes:
            raise ValueError(f"""Number of classes in dataset ({num_classes})
              differs from the one in the provided partitions {dist_num_classes}."""
                             )
        if dist_num_partitions != num_partitions:
            raise ValueError(
                f"""The value in `num_partitions` ({num_partitions})
                differs from the one from `dirichlet_dist` {dist_num_partitions}."""
            )

    # Assuming balanced distribution
    num_samples = x.shape[0]
    num_samples_per_partition = num_samples // num_partitions

    boundaries: List[int] = np.append([0],
                                      np.cumsum(num_samples_per_class,
                                                dtype=np.int))
    list_samples_per_class: List[List[np.ndarray]] = [
        x_l[boundaries[idx]:boundaries[idx + 1]]  # noqa: E203
        for idx in range(num_classes)  # noqa: E203
    ]

    if dirichlet_dist.size == 0:
        dirichlet_dist = np.random.dirichlet(alpha=[concentration] *
                                             num_classes,
                                             size=num_partitions)
    original_dirichlet_dist = dirichlet_dist.copy()

    data: List[List[Optional[np.ndarray]]] = [[]
                                              for _ in range(num_partitions)]
    target: List[List[Optional[np.ndarray]]] = [[]
                                                for _ in range(num_partitions)]

    for partition_id in range(num_partitions):
        for _ in range(num_samples_per_partition):
            sample_class: int = np.where(
                np.random.multinomial(1, dirichlet_dist[partition_id]) ==
                1)[0][0]
            sample: np.ndarray = list_samples_per_class[sample_class].pop()

            data[partition_id].append(sample)
            target[partition_id].append(sample_class)

            # If last sample of the class was drawn,
            # then set pdf to zero for that class.
            num_samples_per_class[sample_class] -= 1
            if num_samples_per_class[sample_class] == 0:
                remaining_indices.remove(
                    np.where(classes == sample_class)[0][0])
                # Be careful to distinguish between original zero-valued
                # classes and classes that are empty
                dirichlet_dist[:, sample_class] = 0.0
                dirichlet_dist[:, remaining_indices] += 1e-5

                sum_rows = np.sum(dirichlet_dist, axis=1)
                dirichlet_dist = dirichlet_dist / (sum_rows[:, np.newaxis] +
                                                   np.finfo(float).eps)

    partitions = [(np.concatenate([data[idx]]),
                   np.concatenate([target[idx]])[..., np.newaxis])
                  for idx in range(num_partitions)]

    return partitions, original_dirichlet_dist
    def _minimal_perturbation(self, x: np.ndarray, y: np.ndarray,
                              mask: np.ndarray) -> np.ndarray:
        """
        Iteratively compute the minimal perturbation necessary to make the class prediction change. Stop when the
        first adversarial example was found.

        :param x: An array with the original inputs.
        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes).
        :return: An array holding the adversarial examples.
        """
        adv_x = x.copy()

        # Compute perturbation with implicit batching
        for batch_id in range(
                int(np.ceil(adv_x.shape[0] / float(self.batch_size)))):
            batch_index_1, batch_index_2 = (
                batch_id * self.batch_size,
                (batch_id + 1) * self.batch_size,
            )
            batch = adv_x[batch_index_1:batch_index_2]
            batch_labels = y[batch_index_1:batch_index_2]

            mask_batch = mask
            if mask is not None:
                # Here we need to make a distinction: if the masks are different for each input, we need to index
                # those for the current batch. Otherwise (i.e. mask is meant to be broadcasted), keep it as it is.
                if len(mask.shape) == len(x.shape):
                    mask_batch = mask[batch_index_1:batch_index_2]

            # Get perturbation
            perturbation = self._compute_perturbation(batch, batch_labels,
                                                      mask_batch)

            # Get current predictions
            active_indices = np.arange(len(batch))

            if isinstance(self.eps, np.ndarray):
                if len(self.eps.shape) == len(
                        x.shape) and self.eps.shape[0] == x.shape[0]:
                    current_eps = self.eps_step[batch_index_1:batch_index_2]
                    partial_stop_condition = (
                        current_eps <=
                        self.eps[batch_index_1:batch_index_2]).all()

                else:
                    current_eps = self.eps_step
                    partial_stop_condition = (current_eps <= self.eps).all()

            else:
                current_eps = self.eps_step
                partial_stop_condition = current_eps <= self.eps

            while active_indices.size > 0 and partial_stop_condition:
                # Adversarial crafting
                current_x = self._apply_perturbation(
                    x[batch_index_1:batch_index_2], perturbation, current_eps)

                # Update
                batch[active_indices] = current_x[active_indices]
                adv_preds = self.estimator.predict(batch)

                # If targeted active check to see whether we have hit the target, otherwise head to anything but
                if self.targeted:
                    active_indices = np.where(
                        np.argmax(batch_labels, axis=1) != np.argmax(
                            adv_preds, axis=1))[0]
                else:
                    active_indices = np.where(
                        np.argmax(batch_labels, axis=1) == np.argmax(
                            adv_preds, axis=1))[0]

                # Update current eps and check the stop condition
                if isinstance(self.eps, np.ndarray):
                    if len(self.eps.shape) == len(
                            x.shape) and self.eps.shape[0] == x.shape[0]:
                        current_eps = current_eps + self.eps_step[
                            batch_index_1:batch_index_2]
                        partial_stop_condition = (
                            current_eps <=
                            self.eps[batch_index_1:batch_index_2]).all()

                    else:
                        current_eps = current_eps + self.eps_step
                        partial_stop_condition = (current_eps <=
                                                  self.eps).all()

                else:
                    current_eps = current_eps + self.eps_step
                    partial_stop_condition = current_eps <= self.eps

            adv_x[batch_index_1:batch_index_2] = batch

        return adv_x
 def __init__(self, x: np.ndarray):
     """
     :param x: list of the rocket's coordinates, m
     """
     self.x = x.copy()
     self.dim = len(x)
    def _compute(
        self,
        x: np.ndarray,
        x_init: np.ndarray,
        y: np.ndarray,
        mask: Optional[np.ndarray],
        eps: Union[int, float, np.ndarray],
        eps_step: Union[int, float, np.ndarray],
        project: bool,
        random_init: bool,
    ) -> np.ndarray:
        if random_init:
            n = x.shape[0]
            m = np.prod(x.shape[1:]).item()
            random_perturbation = random_sphere(n, m, eps, self.norm).reshape(
                x.shape).astype(ART_NUMPY_DTYPE)
            if mask is not None:
                random_perturbation = random_perturbation * (
                    mask.astype(ART_NUMPY_DTYPE))
            x_adv = x.astype(ART_NUMPY_DTYPE) + random_perturbation

            if self.estimator.clip_values is not None:
                clip_min, clip_max = self.estimator.clip_values
                x_adv = np.clip(x_adv, clip_min, clip_max)
        else:
            if x.dtype == np.object:
                x_adv = x.copy()
            else:
                x_adv = x.astype(ART_NUMPY_DTYPE)

        # Compute perturbation with implicit batching
        for batch_id in range(int(np.ceil(x.shape[0] /
                                          float(self.batch_size)))):
            batch_index_1, batch_index_2 = batch_id * self.batch_size, (
                batch_id + 1) * self.batch_size
            batch_index_2 = min(batch_index_2, x.shape[0])
            batch = x_adv[batch_index_1:batch_index_2]
            batch_labels = y[batch_index_1:batch_index_2]

            mask_batch = mask
            if mask is not None:
                # Here we need to make a distinction: if the masks are different for each input, we need to index
                # those for the current batch. Otherwise (i.e. mask is meant to be broadcasted), keep it as it is.
                if len(mask.shape) == len(x.shape):
                    mask_batch = mask[batch_index_1:batch_index_2]

            # Get perturbation
            perturbation = self._compute_perturbation(batch, batch_labels,
                                                      mask_batch)

            # Compute batch_eps and batch_eps_step
            if isinstance(eps, np.ndarray):
                if len(eps.shape) == len(
                        x.shape) and eps.shape[0] == x.shape[0]:
                    batch_eps = eps[batch_index_1:batch_index_2]
                    batch_eps_step = eps_step[batch_index_1:batch_index_2]

                else:
                    batch_eps = eps
                    batch_eps_step = eps_step

            else:
                batch_eps = eps
                batch_eps_step = eps_step

            # Apply perturbation and clip
            x_adv[batch_index_1:batch_index_2] = self._apply_perturbation(
                batch, perturbation, batch_eps_step)

            if project:
                if x_adv.dtype == np.object:
                    for i_sample in range(batch_index_1, batch_index_2):
                        if isinstance(
                                batch_eps, np.ndarray
                        ) and batch_eps.shape[0] == x_adv.shape[0]:
                            perturbation = projection(
                                x_adv[i_sample] - x_init[i_sample],
                                batch_eps[i_sample], self.norm)

                        else:
                            perturbation = projection(
                                x_adv[i_sample] - x_init[i_sample], batch_eps,
                                self.norm)

                        x_adv[i_sample] = x_init[i_sample] + perturbation

                else:
                    perturbation = projection(
                        x_adv[batch_index_1:batch_index_2] -
                        x_init[batch_index_1:batch_index_2], batch_eps,
                        self.norm)
                    x_adv[batch_index_1:batch_index_2] = x_init[
                        batch_index_1:batch_index_2] + perturbation

        return x_adv
Exemplo n.º 37
0
def mutual_proximity_gammai(D: np.ndarray, metric: str = "distance", test_set_ind: np.ndarray = None, verbose: int = 0):
    """Transform a distance matrix with Mutual Proximity (indep. Gamma distr.).
    
    Applies Mutual Proximity (MP) [1]_ on a distance/similarity matrix. Gammai 
    variant assumes independent Gamma distributed distances (FAST).
    The resulting second. distance/similarity matrix should show lower hubness.
    
    Parameters
    ----------
    D : ndarray or csr_matrix
        - ndarray: The ``n x n`` symmetric distance or similarity matrix.
        - csr_matrix: The ``n x n`` symmetric similarity matrix.
        
        NOTE: In case of sparse `D`, zeros are interpreted as missing values 
        and ignored during calculations. Thus, results may differ 
        from using a dense version.
    
    metric : {'distance', 'similarity'}, optional (default: 'distance')
        Define, whether matrix `D` is a distance or similarity matrix.
        
        NOTE: In case of sparse `D`, only 'similarity' is supported.
        
    test_sed_ind : ndarray, optional (default: None)
        Define data points to be hold out as part of a test set. Can be:
        
        - None : Rescale all distances
        - ndarray : Hold out points indexed in this array as test set. 
        
    verbose : int, optional (default: 0)
        Increasing level of output (progress report).
        
    Returns
    -------
    D_mp : ndarray
        Secondary distance MP gammai matrix.
    
    References
    ----------
    .. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012). 
           Local and global scaling reduce hubs in space. The Journal of Machine 
           Learning Research, 13(1), 2871–2902.
    """
    # Initialization
    n = D.shape[0]
    log = Logging.ConsoleLogging()

    # Checking input
    IO._check_distance_matrix_shape(D)
    IO._check_valid_metric_parameter(metric)
    if metric == "similarity":
        self_value = 1
    else:  # metric == 'distance':
        self_value = 0
    if test_set_ind is None:
        train_set_ind = slice(0, n)
    else:
        train_set_ind = np.setdiff1d(np.arange(n), test_set_ind)

    # Start MP
    if verbose:
        log.message("Mutual proximity Gammai rescaling started.", flush=True)
    D = D.copy()

    if issparse(D):
        return _mutual_proximity_gammai_sparse(D, test_set_ind, verbose, log)

    np.fill_diagonal(D, np.nan)

    mu = np.nanmean(D[train_set_ind], 0)
    va = np.nanvar(D[train_set_ind], 0, ddof=1)
    A = (mu ** 2) / va
    B = va / mu

    D_mp = np.zeros_like(D)

    # MP gammai
    for i in range(n):
        if verbose and ((i + 1) % 1000 == 0 or i + 1 == n):
            log.message("MP_gammai: {} of {}".format(i + 1, n), flush=True)
        j_idx = slice(i + 1, n)

        if metric == "similarity":
            p1 = _local_gamcdf(D[i, j_idx], A[i], B[i])
            p2 = _local_gamcdf(D[j_idx, i], A[j_idx], B[j_idx])
            D_mp[i, j_idx] = (p1 * p2).ravel()
        else:  # distance
            p1 = 1 - _local_gamcdf(D[i, j_idx], A[i], B[i])
            p2 = 1 - _local_gamcdf(D[j_idx, i], A[j_idx], B[j_idx])
            D_mp[i, j_idx] = (1 - p1 * p2).ravel()

    # Mirroring the matrix
    D_mp += D_mp.T
    # set correct self dist/sim
    np.fill_diagonal(D_mp, self_value)

    return D_mp
Exemplo n.º 38
0
 def homogeneous_func(values: np.ndarray):
     if values.size == 0:
         return values.copy()
     return np.apply_along_axis(func, self.axis, values)
Exemplo n.º 39
0
def time_conv(
        dt: float,
        c_in: _np.ndarray,
        rtd: _np.ndarray,
        c_equilibration: _typing.Optional[_np.ndarray] = None,
        logger: _typing.Optional[_logger.RtdLogger] = None) -> _np.ndarray:
    """Perform convolution on time axis.

    First time-point of `c_in` and `c_rtd` is at t == 0 (and not `dt`).

    Convolution is applied to all species of `c_in`.

    Parameters
    ----------
    dt
        Time step.
    c_in
        Starting concentration profile for each specie.

        `c_in`.shape == [n_species, n_time_steps]
    rtd
        Residence time distribution (= unit impulse response).
    c_equilibration
        Initial concentrations inside the unit operation.

        E.g.: Composition of equilibration buffer for flow-through
        chromatography.
    logger
        Logger for messaging events.

    Returns
    -------
    c_out: ndarray
        Final concentration profile for each specie.

        `c_out`.shape == `c_in`.shape

    """

    # it can happen that array is empty, then just return empty one
    if c_in.size == 0:
        if logger:
            logger.i("Convolution: Got empty c_in")
        return c_in.copy()
    if rtd.size == 0:
        if logger:
            logger.w("Convolution: Got empty bio_rtd")
        return c_in.copy()

    if c_equilibration is not None and _np.all(c_equilibration == 0):
        c_equilibration = None

    c_out = _np.zeros_like(c_in)

    # simulate pre-flushing and washout
    c_ext = c_in

    n_prepend = rtd.size if c_equilibration is not None else 0
    if c_equilibration is not None:
        c_ext = _np.pad(c_ext, ((0, 0), (n_prepend, 0)), mode="constant")
        c_ext[:, :n_prepend] = c_equilibration

    # convolution
    for j in range(c_out.shape[0]):
        c_out[j] = _np.convolve(c_ext[j],
                                rtd)[n_prepend:n_prepend + c_in.shape[1]] * dt

    return c_out
Exemplo n.º 40
0
 def make_move(self, board: np.ndarray, move: int) -> np.ndarray:
     moving_player = self.get_active_player(board)
     new_board: np.ndarray = board.copy()
     i, j = move // self.board_width, move % self.board_width
     new_board[i, j] = moving_player
     return new_board
Exemplo n.º 41
0
 def proximal_l0(self, x: np.ndarray, eta: float) -> np.ndarray:
     z = x.copy()
     z[np.abs(x) < np.sqrt(2 * eta)] = 0
     return z
Exemplo n.º 42
0
 def _impute_inactive(self, X: np.ndarray) -> np.ndarray:
     X = X.copy()
     X[~np.isfinite(X)] = -1
     return X
Exemplo n.º 43
0
def draw_inference_on_hook2(img: np.ndarray,
                            cleaned_keypoints,
                            kpt_labels: List[str],
                            kpt_skeleton: List[list],
                            score: float,
                            bbox: BBox,
                            vis_keypoints: list,
                            kpt_confidences: list,
                            conf_idx_list: list,
                            not_conf_idx_list: list,
                            conf_keypoints,
                            conf_kpt_labels,
                            not_conf_keypoints,
                            not_conf_kpt_labels,
                            conf_thresh: float = 0.3,
                            show_bbox_border: bool = False,
                            bbox_label_mode: str = 'euler',
                            index_offset: int = 0,
                            diameter=1):
    # printj.red(len(vis_keypoints))
    result = img.copy()
    # diameter = 10
    # printj.yellow(f'bbox = {bbox}')
    # printj.yellow(f'vis_keypoints = {vis_keypoints}')
    # printj.yellow(f'dist = {self.dist([0, 1], [1, 2])}')
    point_a = vis_keypoints[0]
    point_b = vis_keypoints[1]
    point_cb = vis_keypoints[2]
    point_c = vis_keypoints[3]
    point_cd = vis_keypoints[4]
    point_d = vis_keypoints[5]
    point_e = vis_keypoints[6]
    # point_dl = vis_keypoints[5]
    # point_dr = vis_keypoints[6]
    len_ab = dist(point_a, point_b)
    # printj.red(len_ab)
    if diameter <= 0:
        length_ratio = np.inf
    else:
        length_ratio = len_ab / diameter
    pass_condition = (length_ratio > 4)
    bbox_color = [0, 0, 255]  # fail: ab < 4*d_rl
    kpt_ab_color = [50, 255, 255]
    c_text = 'ab < 4D'
    if pass_condition:
        bbox_color = [0, 255, 0]  # pass: ab > 4*d_rl
        kpt_ab_color = [0, 255, 0]
        c_text = 'ab > 4D'

    # bbox_height = np.absolute(bbox.ymax - bbox.ymin)
    # bbox_width = np.absolute(bbox.xmax - bbox.xmin)
    # length_diff = np.absolute(bbox_height - bbox_width)
    # if bbox_height > bbox_width:
    #     bbox.xmin = bbox.xmin - int(length_diff/2)
    #     bbox.xmax = bbox.xmax + int(length_diff/2)
    # else:
    #     bbox.ymin = bbox.ymin - int(length_diff/2)
    #     bbox.ymax = bbox.ymax + int(length_diff/2)

    # printj.cyan(bbox)
    # printj.cyan(bbox.to_int())
    # printj.cyan(bbox.to_int().to_list())
    if bbox_label_mode == 'euler':
        # bbox_text = str(round(length_ratio, 2)) + 'D'
        bbox_text = f'h {score}'
        result = draw_bbox(img=result,
                           color=bbox_color,
                           bbox=bbox,
                           text=bbox_text,
                           label_only=not show_bbox_border,
                           label_orientation='top')
        # result = draw_bbox(img=result, color=bbox_color,  bbox=bbox, text=c_text,
        #                    label_only=not show_bbox_border, label_orientation='bottom')
        result = draw_bbox(img=result,
                           color=bbox_color,
                           bbox=bbox,
                           text=str(score),
                           label_only=not show_bbox_border,
                           label_orientation='bottom')
    result = draw_skeleton(img=result,
                           keypoints=vis_keypoints,
                           keypoint_skeleton=kpt_skeleton,
                           index_offset=index_offset,
                           thickness=2,
                           color=[255, 0, 0],
                           ignore_kpt_idx=[])
    # ab
    result = draw_skeleton(img=result,
                           keypoints=vis_keypoints,
                           keypoint_skeleton=kpt_skeleton,
                           index_offset=index_offset,
                           thickness=2,
                           color=kpt_ab_color,
                           ignore_kpt_idx=[2, 3, 4, 5, 6])
    # d_lr
    # result = draw_skeleton(
    #     img=result, keypoints=vis_keypoints, keypoint_skeleton=kpt_skeleton, index_offset=index_offset, thickness=2, color=[255, 255, 0],
    #     ignore_kpt_idx=[0, 1, 2, 3, 4]
    # )
    result = draw_keypoints(img=result,
                            keypoints=vis_keypoints,
                            radius=2,
                            color=[0, 0, 255],
                            keypoint_labels=kpt_labels,
                            show_keypoints_labels=True,
                            label_thickness=1,
                            ignore_kpt_idx=conf_idx_list)
    if len(conf_keypoints) > 0:
        result = draw_keypoints(img=result,
                                keypoints=vis_keypoints,
                                radius=2,
                                color=[0, 255, 0],
                                keypoint_labels=kpt_labels,
                                show_keypoints_labels=True,
                                label_thickness=1,
                                ignore_kpt_idx=not_conf_idx_list)
    return result, len_ab
Exemplo n.º 44
0
def nankurt(
    values: np.ndarray,
    axis: Optional[int] = None,
    skipna: bool = True,
    mask: Optional[np.ndarray] = None,
) -> float:
    """
    Compute the sample excess kurtosis

    The statistic computed here is the adjusted Fisher-Pearson standardized
    moment coefficient G2, computed directly from the second and fourth
    central moment.

    Parameters
    ----------
    values : ndarray
    axis: int, optional
    skipna : bool, default True
    mask : ndarray[bool], optional
        nan-mask if known

    Returns
    -------
    result : float64
        Unless input is a float array, in which case use the same
        precision as the input array.

    Examples
    --------
    >>> import pandas.core.nanops as nanops
    >>> s = pd.Series([1, np.nan, 1, 3, 2])
    >>> nanops.nankurt(s)
    -1.2892561983471076
    """
    values = extract_array(values, extract_numpy=True)
    mask = _maybe_get_mask(values, skipna, mask)
    if not is_float_dtype(values.dtype):
        values = values.astype("f8")
        count = _get_counts(values.shape, mask, axis)
    else:
        count = _get_counts(values.shape, mask, axis, dtype=values.dtype)

    if skipna and mask is not None:
        values = values.copy()
        np.putmask(values, mask, 0)

    mean = values.sum(axis, dtype=np.float64) / count
    if axis is not None:
        mean = np.expand_dims(mean, axis)

    adjusted = values - mean
    if skipna and mask is not None:
        np.putmask(adjusted, mask, 0)
    adjusted2 = adjusted ** 2
    adjusted4 = adjusted2 ** 2
    m2 = adjusted2.sum(axis, dtype=np.float64)
    m4 = adjusted4.sum(axis, dtype=np.float64)

    with np.errstate(invalid="ignore", divide="ignore"):
        adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3))
        numer = count * (count + 1) * (count - 1) * m4
        denom = (count - 2) * (count - 3) * m2 ** 2

    # floating point error
    #
    # #18044 in _libs/windows.pyx calc_kurt follow this behavior
    # to fix the fperr to treat denom <1e-14 as zero
    numer = _zero_out_fperr(numer)
    denom = _zero_out_fperr(denom)

    if not isinstance(denom, np.ndarray):
        # if ``denom`` is a scalar, check these corner cases first before
        # doing division
        if count < 4:
            return np.nan
        if denom == 0:
            return 0

    with np.errstate(invalid="ignore", divide="ignore"):
        result = numer / denom - adj

    dtype = values.dtype
    if is_float_dtype(dtype):
        result = result.astype(dtype)

    if isinstance(result, np.ndarray):
        result = np.where(denom == 0, 0, result)
        result[count < 4] = np.nan

    return result
Exemplo n.º 45
0
def score(D:np.ndarray, target:np.ndarray, k=5, 
          metric:str='distance', test_set_ind:np.ndarray=None, verbose:int=0):
    """Perform `k`-nearest neighbor classification.
    
    Use the ``n x n`` symmetric distance matrix `D` and target class 
    labels `target` to perform a `k`-NN experiment (leave-one-out 
    cross-validation or evaluation of test set; see parameter `test_set_ind`).
    Ties are broken by the nearest neighbor.
    
    Parameters
    ----------
    D : ndarray
        The ``n x n`` symmetric distance (similarity) matrix.
    
    target : ndarray (of dtype=int)
        The ``n x 1`` target class labels (ground truth).
    
    k : int or array_like (of dtype=int), optional (default: 5)
        Neighborhood size for `k`-NN classification.
        For each value in `k`, one `k`-NN experiment is performed.
        
        HINT: Providing more than one value for `k` is a cheap means to perform 
        multiple `k`-NN experiments at once. Try e.g. ``k=[1, 5, 20]``.
    
    metric : {'distance', 'similarity'}, optional (default: 'distance')
        Define, whether matrix `D` is a distance or similarity matrix
    
    test_sed_ind : ndarray, optional (default: None)
        Define data points to be hold out as part of a test set. Can be:
        
        - None : Perform a LOO-CV experiment
        - ndarray : Hold out points indexed in this array as test set. Fit 
          model to remaining data. Evaluate model on test set.
    
    verbose : int, optional (default: 0)
        Increasing level of output (progress report).
    
    Returns
    -------
    acc : ndarray (shape=(n_k x 1), dtype=float)
        Classification accuracy (`n_k`... number of items in parameter `k`)
        
        HINT: Refering to the above example... 
        ... ``acc[0]`` gives the accuracy of the ``k=1`` experiment.
    corr : ndarray (shape=(n_k x n), dtype=int)
        Raw vectors of correctly classified items
        
        HINT: ... ``corr[1, :]`` gives these items for the ``k=5`` experiment.
    cmat : ndarray (shape=(n_k x n_t x n_t), dtype=int) 
        Confusion matrix (``n_t`` number of unique items in parameter target)
        
        HINT: ... ``cmat[2, :, :]`` gives the confusion matrix of 
        the ``k=20`` experiment.
    """
    
    # Check input sanity
    log = Logging.ConsoleLogging()
    IO._check_distance_matrix_shape(D)
    IO._check_distance_matrix_shape_fits_labels(D, target)
    IO._check_valid_metric_parameter(metric)
    if metric == 'distance':
        d_self = np.inf
        sort_order = 1
    if metric == 'similarity':
        d_self = -np.inf
        sort_order = -1
    
    # Copy, because data is changed
    D = D.copy()
    target = target.astype(int)
    
    if verbose:
        log.message("Start k-NN experiment.")
    # Handle LOO-CV vs. test set mode
    if test_set_ind is None:
        n = D.shape[0]
        test_set_ind = range(n)    # dummy 
        train_set_ind = n   # dummy
    else:  
        # number of points to be classified
        n = test_set_ind.size
        # Indices of training examples
        train_set_ind = np.setdiff1d(np.arange(n), test_set_ind)
    # Number of k-NN parameters
    try:
        k_length = k.size
    except AttributeError as e:
        if isinstance(k, int):
            k = np.array([k])
            k_length = k.size
        elif isinstance(k, list):
            k = np.array(k)
            k_length = k.size
        else:
            raise e
        
    acc = np.zeros((k_length, 1))
    corr = np.zeros((k_length, D.shape[0]))
        
    cl = np.sort(np.unique(target))
    cmat = np.zeros((k_length, len(cl), len(cl)))
    
    classes = target.copy()
    for idx, cur_class in enumerate(cl):
        # change labels to 0, 1, ..., len(cl)-1
        classes[target == cur_class] = idx
    
    cl = range(len(cl))
    
    # Classify each point in test set
    for i in test_set_ind:
        seed_class = classes[i]
        
        if issparse(D):
            row = D.getrow(i).toarray().ravel()
        else:
            row = D[i, :]
        row[i] = d_self
        
        # Sort points in training set according to distance
        # Randomize, in case there are several points of same distance
        # (this is especially relevant for SNN rescaling)
        rp = train_set_ind
        rp = np.random.permutation(rp)
        d2 = row[rp]
        d2idx = np.argsort(d2, axis=0)[::sort_order]
        idx = rp[d2idx]      
        
        # More than one k is useful for cheap multiple k-NN experiments at once
        for j in range(k_length):
            nn_class = classes[idx[0:k[j]]]
            cs = np.bincount(nn_class.astype(int))
            max_cs = np.where(cs == np.max(cs))[0]
            
            # "tie": use nearest neighbor
            if len(max_cs) > 1:
                if seed_class == nn_class[0]:
                    acc[j] += 1/n 
                    corr[j, i] = 1
                cmat[j, seed_class, nn_class[0]] += 1       
            # majority vote
            else:
                if cl[max_cs[0]] == seed_class:
                    acc[j] += 1/n
                    corr[j, i] = 1
                cmat[j, seed_class, cl[max_cs[0]]] += 1
                       
    if verbose:
        log.message("Finished k-NN experiment.")
        
    return acc, corr, cmat
Exemplo n.º 46
0
def _get_values(
    values: np.ndarray,
    skipna: bool,
    fill_value: Any = None,
    fill_value_typ: Optional[str] = None,
    mask: Optional[np.ndarray] = None,
) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]:
    """
    Utility to get the values view, mask, dtype, dtype_max, and fill_value.

    If both mask and fill_value/fill_value_typ are not None and skipna is True,
    the values array will be copied.

    For input arrays of boolean or integer dtypes, copies will only occur if a
    precomputed mask, a fill_value/fill_value_typ, and skipna=True are
    provided.

    Parameters
    ----------
    values : ndarray
        input array to potentially compute mask for
    skipna : bool
        boolean for whether NaNs should be skipped
    fill_value : Any
        value to fill NaNs with
    fill_value_typ : str
        Set to '+inf' or '-inf' to handle dtype-specific infinities
    mask : Optional[np.ndarray]
        nan-mask if known

    Returns
    -------
    values : ndarray
        Potential copy of input value array
    mask : Optional[ndarray[bool]]
        Mask for values, if deemed necessary to compute
    dtype : dtype
        dtype for values
    dtype_max : dtype
        platform independent dtype
    fill_value : Any
        fill value used
    """
    # In _get_values is only called from within nanops, and in all cases
    #  with scalar fill_value.  This guarantee is important for the
    #  maybe_upcast_putmask call below
    assert is_scalar(fill_value)
    values = extract_array(values, extract_numpy=True)

    mask = _maybe_get_mask(values, skipna, mask)

    dtype = values.dtype

    if needs_i8_conversion(values.dtype):
        # changing timedelta64/datetime64 to int64 needs to happen after
        #  finding `mask` above
        values = np.asarray(values.view("i8"))

    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(
        dtype, fill_value=fill_value, fill_value_typ=fill_value_typ
    )

    if skipna and (mask is not None) and (fill_value is not None):
        values = values.copy()
        if dtype_ok and mask.any():
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, _ = maybe_upcast_putmask(values, mask, fill_value)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max, fill_value
Exemplo n.º 47
0
def etrs_to_enu(positions: np.ndarray,
                location: EarthLocation = nenufar_position) -> np.ndarray:
    r""" Local east, north, up (ENU) coordinates centered on the 
        position ``location``.

        The conversion from cartesian coordinates :math:`(x, y, z)`
        to ENU :math:`(e, n, u)` is done as follows:

        .. math::
                \pmatrix{
                    e \\
                    n \\
                    u
                } =
                \pmatrix{
                    -\sin(b) & \cos(l) & 0\\
                    -\sin(l) \cos(b) & -\sin(l) \sin(b) & \cos(l)\\
                    \cos(l)\cos(b) & \cos(l) \sin(b) & \sin(l)
                }
                \pmatrix{
                    \delta x\\
                    \delta y\\
                    \delta z
                }

        where :math:`b` is the longitude, :math:`l` is the
        latitude and :math:`(\delta x, \delta y, \delta z)` are
        the cartesian coordinates with respect to the center
        ``location``.
    
        :param positions:
            ETRS positions
        :type positions:
            :class:`~numpy.ndarray`
        :param location:
            Center of ENU frame. Default is NenuFAR's location.
        :type location:
            :class:`~astropy.coordinates.EarthLocation`

        :returns:
            Wavelength in meters, same shape as ``frequency``. 
        :rtype:
            :class:`~numpy.ndarray`
        
        :Example:
            .. code-block:: python

                from nenupy import nenufar_position
                from nenupy.astro import etrs_to_enu

                etrs_positions = np.array([
                    [4323934.57369062,  165585.71569665, 4670345.01314493],
                    [4323949.24009871,  165567.70236494, 4670332.18016874]
                ])
                enu = etrs_to_enu(
                    positions=etrs_positions,
                    location=nenufar_position
                )

    """
    assert (len(positions.shape)==2) and positions.shape[1]==3,\
        'positions should be an array of shape (n, 3)'
    xyz = positions.copy()
    xyz_center = geo_to_etrs(location)
    xyz -= xyz_center

    cos_lat = np.cos(location.lat.rad)
    sin_lat = np.sin(location.lat.rad)
    cos_lon = np.cos(location.lon.rad)
    sin_lon = np.sin(location.lon.rad)
    transformation = np.array(
        [[-sin_lon, cos_lon, 0],
         [-sin_lat * cos_lon, -sin_lat * sin_lon, cos_lat],
         [cos_lat * cos_lon, cos_lat * sin_lon, sin_lat]])

    return np.matmul(xyz, transformation.T)
Exemplo n.º 48
0
def nanskew(
    values: np.ndarray,
    axis: Optional[int] = None,
    skipna: bool = True,
    mask: Optional[np.ndarray] = None,
) -> float:
    """
    Compute the sample skewness.

    The statistic computed here is the adjusted Fisher-Pearson standardized
    moment coefficient G1. The algorithm computes this coefficient directly
    from the second and third central moment.

    Parameters
    ----------
    values : ndarray
    axis: int, optional
    skipna : bool, default True
    mask : ndarray[bool], optional
        nan-mask if known

    Returns
    -------
    result : float64
        Unless input is a float array, in which case use the same
        precision as the input array.

    Examples
    --------
    >>> import pandas.core.nanops as nanops
    >>> s = pd.Series([1, np.nan, 1, 2])
    >>> nanops.nanskew(s)
    1.7320508075688787
    """
    values = extract_array(values, extract_numpy=True)
    mask = _maybe_get_mask(values, skipna, mask)
    if not is_float_dtype(values.dtype):
        values = values.astype("f8")
        count = _get_counts(values.shape, mask, axis)
    else:
        count = _get_counts(values.shape, mask, axis, dtype=values.dtype)

    if skipna and mask is not None:
        values = values.copy()
        np.putmask(values, mask, 0)

    mean = values.sum(axis, dtype=np.float64) / count
    if axis is not None:
        mean = np.expand_dims(mean, axis)

    adjusted = values - mean
    if skipna and mask is not None:
        np.putmask(adjusted, mask, 0)
    adjusted2 = adjusted ** 2
    adjusted3 = adjusted2 * adjusted
    m2 = adjusted2.sum(axis, dtype=np.float64)
    m3 = adjusted3.sum(axis, dtype=np.float64)

    # floating point error
    #
    # #18044 in _libs/windows.pyx calc_skew follow this behavior
    # to fix the fperr to treat m2 <1e-14 as zero
    m2 = _zero_out_fperr(m2)
    m3 = _zero_out_fperr(m3)

    with np.errstate(invalid="ignore", divide="ignore"):
        result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5)

    dtype = values.dtype
    if is_float_dtype(dtype):
        result = result.astype(dtype)

    if isinstance(result, np.ndarray):
        result = np.where(m2 == 0, 0, result)
        result[count < 3] = np.nan
        return result
    else:
        result = 0 if m2 == 0 else result
        if count < 3:
            return np.nan
        return result
Exemplo n.º 49
0
def logmart(A: np.ndarray, b: np.ndarray,
            *,
            relax: float = 1.,
            x0: float = None,
            sigma: float = None,
            max_iter: int = 200) -> tuple:
    """
    Displays delta Chisquare.
    Program is stopped if Chisquare increases.
    A is NxM array
    b is Nx1 vector
    returns Mx1 vector

    relax	     user specified relaxation constant	(default is 20.)
    x0	     user specified initial guess (N vector)  (default is backproject y, i.e., y#A)
    max_iter	user specified max number of iterations (default is 20)

    AUTHOR:	Joshua Semeter
    LAST MODIFIED:	5-2015

      Simple test problem
    A = np.diag([5, 5, 5])
    x = np.array([1,2,3])
    b = A @ x
    """
# %% parameter check
    if b.ndim != 1:
        raise ValueError('y must be a column vector')
    if A.ndim != 2:
        raise ValueError('A must be a matrix')
    if A.shape[0] != b.size:
        raise ValueError('A and y number of rows must match')
    if not isinstance(relax, float):
        raise ValueError('relax is a scalar float')

    b = b.copy()  # needed to avoid modifying outside this function!
# %% set defaults
    if sigma is None:
        sigma = np.ones_like(b)

    if x0 is None:  # backproject
        x = A.T @ b / A.ravel().sum()
        xA = A @ x
        x = x * b.max() / xA.max()
    elif isinstance(x0, (float, int)) or x0.size == 1:  # replicate
        x = x0 * np.ones_like(b)
    else:
        x = x0
# %% make sure there are no 0's in y
    b[b <= 1e-8] = 1e-8
    # W=sigma;
    # W=linspace(1,0,size(A,1))';
    # W=rand(size(A,1),1);
    W = np.ones(A.shape[0])
    W = W / W.sum()

    i = 0
    done = False
    arg = ((A @ x - b)/sigma)**2.
    chi2 = np.sqrt(arg.sum())

# %%  iterate solution, plot estimated data (diag elems of x#A)
    while not done:
        i += 1
        xold = x
        xA = A @ x
        t = (1/xA).min()
        C = relax*t*(1.-(xA/b))
        x = x / (1 - x*(A.T @ (W*C)))
# %% monitor solution
        chiold = chi2
        chi2 = np.sqrt((((xA - b)/sigma)**2).sum())
        # dchi2=(chi2-chiold);
        done = ((chi2 > chiold) & (i > 2)) | (i == max_iter) | (chi2 < 0.7)
# %% plot
#        figure(9); clf; hold off;
#        Nest=reshape(x,69,83);
#        imagesc(Nest); caxis([0,1e11]);
#        set(gca,'YDir','normal'); set(gca,'XDir','normal');
#        pause(0.02)
    y_est = A @ xold

    return xold, y_est, chi2, i
Exemplo n.º 50
0
def piece_wise_time_conv(
        dt: float,
        f_in: _np.ndarray,
        c_in: _np.ndarray,
        t_cycle: float,
        rt_mean: float,
        rtd: _np.ndarray,
        c_equilibration: _typing.Optional[_np.ndarray] = None,
        c_wash: _typing.Optional[_np.ndarray] = None,
        logger: _typing.Optional[_logger.RtdLogger] = None) -> _np.ndarray:
    """Perform convolution on time axis with periodic switching.

    First time-point of `c_in` and `c_rtd` is at t == 0 (and not `dt`).

    Convolution is applied to all species of `c_in`.

    Parameters
    ----------
    dt
        Time step.
    f_in
        Flow rate profile. It has to be either constant or box-shaped.
    c_in
        Starting concentration profile for each specie.

        `c_in`.shape == [n_species, n_time_steps]
    t_cycle
        Switch cycle duration.
    rt_mean
        Delay between inlet and outlet switch times.
    rtd
        Residence time distribution (= unit impulse response).
    c_equilibration
        Composition of equilibration buffer.
    c_wash
        Composition of wash buffer.
    logger
        Logger for messaging events.

    Returns
    -------
    c_out: ndarray
        Final concentration profile for each specie.

        `c_out`.shape == `c_in`.shape

    """

    assert c_in.shape[1] == f_in.size
    assert t_cycle > 0
    assert rt_mean >= 0

    # If input array is empty, then return empty.
    if c_in.size == 0:
        if logger:
            logger.i("Convolution: Got empty c_in")
        return c_in.copy()
    elif rtd.size == 0:
        if logger:
            logger.w("Convolution: Got empty bio_rtd")
        return c_in.copy()
    elif f_in.sum() == 0:
        if logger:
            logger.i("Convolution: Got empty f_in")
        return _np.zeros_like(c_in)

    i_cycle = int(round(t_cycle / dt))
    i_rt_mean = int(round(rt_mean / dt))
    i_start, i_end = _vectors.true_start_and_end(f_in > 0)
    assert _np.all(f_in[i_start:i_end] == f_in.max()), \
        "Flow rate profile must be boxed shaped"
    i_switch_inlet = _np.rint(_np.arange(i_start, i_end,
                                         t_cycle / dt)).astype(int)
    i_switch_inlet_off = _np.append(i_switch_inlet[1:], i_end)
    i_switch_outlet = (i_switch_inlet + i_rt_mean).clip(max=f_in.size)
    i_switch_outlet_off = _np.append(
        i_switch_outlet[1:], min(i_switch_outlet[-1] + i_cycle, f_in.size))

    c_out = _np.zeros_like(c_in)

    for i in range(i_switch_inlet.size):
        # Inlet concentration profile for the cycle.
        # Profile is prolonged by wash buffer.
        c_conv_inlet = c_in[:, i_switch_inlet[i]:i_switch_outlet_off[i]].copy()
        c_conv_inlet[:, i_switch_inlet_off[i] - i_switch_inlet[i]:] = \
            c_wash if c_wash is not None else 0

        # Calculate outlet concentration profile.
        c_conv_outlet = time_conv(dt, c_conv_inlet, rtd, c_equilibration,
                                  logger)

        # Insert the result into outlet vector.
        c_out[:, i_switch_outlet[i]:i_switch_outlet_off[
            i]] = c_conv_outlet[:, i_switch_outlet[i] -
                                i_switch_inlet[i]:i_switch_outlet_off[i] -
                                i_switch_inlet[i]]
    return c_out
Exemplo n.º 51
0
 async def output(self, offset: Tuple[int, ...], value: np.ndarray,
                  present: np.ndarray) -> None:
     self.calls.append((offset, value.copy(), present.copy()))
Exemplo n.º 52
0
def add_shafts(field: np.ndarray):
    t_field = field.copy()
    for shaft in conf.shafts:
        t_field[shaft[1], shaft[0]] = shaft_const
    return t_field
Exemplo n.º 53
0
 def transition(self, state: np.ndarray, action: np.ndarray) -> float:
     """Action a = (i, j) swaps elements in positions i and j."""
     new_state = state.copy()
     i, j = action
     new_state[[i, j]] = new_state[[j, i]]
     return new_state
Exemplo n.º 54
0
def mutual_proximity_gauss(D: np.ndarray, metric: str = "distance", test_set_ind: np.ndarray = None, verbose: int = 0):
    """Transform a distance matrix with Mutual Proximity (normal distribution).
    
    Applies Mutual Proximity (MP) [1]_ on a distance/similarity matrix. Gauss 
    variant assumes dependent normal distributions (VERY SLOW).
    The resulting second. distance/similarity matrix should show lower hubness.
    
    Parameters
    ----------
    D : ndarray
        - ndarray: The ``n x n`` symmetric distance or similarity matrix.
    
    metric : {'distance', 'similarity'}, optional (default: 'distance')
        Define, whether matrix `D` is a distance or similarity matrix.
        
    test_sed_ind : ndarray, optional (default: None)
        Define data points to be hold out as part of a test set. Can be:
        
        - None : Rescale all distances
        - ndarray : Hold out points indexed in this array as test set. 
        
    verbose : int, optional (default: 0)
        Increasing level of output (progress report).
        
    Returns
    -------
    D_mp : ndarray
        Secondary distance MP gauss matrix.
    
    References
    ----------
    .. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012). 
           Local and global scaling reduce hubs in space. The Journal of Machine 
           Learning Research, 13(1), 2871–2902.
    """
    # Initialization
    n = D.shape[0]
    log = Logging.ConsoleLogging()

    # Checking input
    IO._check_distance_matrix_shape(D)
    IO._check_valid_metric_parameter(metric)
    if metric == "similarity":
        self_value = 1
    else:  # metric == 'distance':
        self_value = 0
    if issparse(D):
        log.error("Sparse matrices not supported by MP Gauss.")
        raise TypeError("Sparse matrices not supported by MP Gauss.")
    if test_set_ind is None:
        train_set_ind = slice(0, n)
    else:
        train_set_ind = np.setdiff1d(np.arange(n), test_set_ind)

    # Start MP
    D = D.copy()

    np.fill_diagonal(D, self_value)
    # np.fill_diagonal(D, np.nan)

    mu = np.mean(D[train_set_ind], 0)
    sd = np.std(D[train_set_ind], 0, ddof=0)
    # ===========================================================================
    # mu = np.nanmean(D[train_set_ind], 0)
    # sd = np.nanstd(D[train_set_ind], 0, ddof=0)
    # ===========================================================================

    # Code for the BadMatrixSigma error [derived from matlab]
    # ===========================================================================
    # eps = np.spacing(1)
    # epsmat = np.array([[1e5 * eps, 0], [0, 1e5 * eps]])
    # ===========================================================================

    D_mp = np.zeros_like(D)

    # MP Gauss
    for i in range(n):
        if verbose and ((i + 1) % 1000 == 0 or i + 1 == n):
            log.message("MP_gauss: {} of {}.".format(i + 1, n))
        for j in range(i + 1, n):
            # ===================================================================
            # mask = np.isnan(D[[i, j], :])
            # D_mask = np.ma.array(D[[i, j], :], mask=mask)
            # c = np.ma.cov(D_mask, ddof=0)
            # ===================================================================
            c = np.cov(D[[i, j], :], ddof=0)
            x = np.array([D[i, j], D[j, i]])
            m = np.array([mu[i], mu[j]])

            low = np.tile(np.finfo(np.float32).min, 2)
            p12 = mvn.mvnun(low, x, m, c)[0]  # [0]...p, [1]...inform
            if np.isnan(p12):
                # ===============================================================
                # power = 7
                # while np.isnan(p12):
                #     c += epsmat * (10**power)
                #     p12 = mvn.mvnun(low, x, m, c)[0]
                #     power += 1
                # log.warning("p12 is NaN: i={}, j={}. Increased cov matrix by "
                #             "O({}).".format(i, j, epsmat[0, 0]*(10**power)))
                # ===============================================================

                p12 = 0.0
                log.warning("p12 is NaN: i={}, j={}. Set to zero.".format(i, j))

            if metric == "similarity":
                D_mp[i, j] = p12
            else:  # distance
                p1 = norm.cdf(D[i, j], mu[i], sd[i])
                p2 = norm.cdf(D[i, j], mu[j], sd[j])
                D_mp[i, j] = p1 + p2 - p12
    D_mp += D_mp.T
    np.fill_diagonal(D_mp, self_value)
    return D_mp
Exemplo n.º 55
0
    def sample(self, angles: np.ndarray, out: np.ndarray = None) -> np.ndarray:
        r"""

        Sample the continuous basis elements on the discrete set of angles in ``angles``.
        Optionally, store the resulting multidimentional array in ``out``.

        A value of ``nan`` is interpreted as the angle of a point placed on the origin of the axes.

        ``angles`` must be an array of shape `(1, N)`, where `N` is the number of points.

        Args:
            angles (~numpy.ndarray): angles where to evaluate the basis elements
            out (~numpy.ndarray, optional): pre-existing array to use to store the output

        Returns:
            the sampled basis

        """
        assert len(angles.shape) == 2
        assert angles.shape[0] == 1

        if out is None:
            out = np.empty(
                (self.shape[0], self.shape[1], self.dim, angles.shape[1]))

        assert out.shape == (self.shape[0], self.shape[1], self.dim,
                             angles.shape[1])

        # find points in the origin
        origin = np.isnan(angles)
        angles = angles.copy()
        angles[origin] = 0.

        angles -= self.axis

        # the basis vectors depends on the shape of the input and output irreps,
        # while their frequencies depend on the irreps frequencies
        if self.shape[0] == 2 and self.shape[1] == 2:
            out = psichi(angles,
                         s=self.s,
                         k=self.mu,
                         gamma=self.gamma,
                         out=out)

        elif self.shape[0] == 1 and self.shape[1] == 2:

            out[0, 0, ...] = np.cos(self.mu * angles + self.gamma)
            out[0, 1, ...] = np.sin(self.mu * angles + self.gamma)

        elif self.shape[0] == 2 and self.shape[1] == 1:

            out[0, 0, ...] = np.cos(self.mu * angles + self.gamma)
            out[1, 0, ...] = np.sin(self.mu * angles + self.gamma)

        elif self.shape[0] == 1 and self.shape[1] == 1:
            out[0, 0, ...] = np.cos(self.mu * angles + self.gamma)
        else:
            raise ValueError(f"Shape {self.shape} not recognized!")

        if self._has_non_zero_frequencies:
            # In the origin, only 0-frequencies are permitted.
            # Therefore, any non-0 frequency base is set to 0 in the origin

            if np.any(origin):
                mask = self._non_zero_frequencies * origin
                out *= 1 - mask

        return out
Exemplo n.º 56
0
 def gradient(self, input_tensor: np.ndarray) -> np.ndarray:
     _result = input_tensor.copy()
     _result[input_tensor >= 0] = 1
     _result[input_tensor < 0] = 0
     return _result