def eof(x, svd=False, transform=False): ''' Empirical Orthogonal Function (EOF) analysis to finds both time series and spatial patterns. :param x: (*array_like*) Input 2-D array with space-time field. :param svd: (*boolean*) Using SVD or eigen method. :param transform: (*boolean*) Do space-time transform or not. This transform will speed up the computation if the space location number is much more than time stamps. Only valid when ``svd=False``. :returns: (EOF, E, PC) EOF: eigen vector 2-D array; E: eigen values 1-D array; PC: Principle component 2-D array. ''' has_nan = False if x.contains_nan(): #Has NaN value valid_idx = np.where(x[:, 0] != np.nan)[0] xx = x[valid_idx, :] has_nan = True else: xx = x m, n = xx.shape if svd: U, S, V = np.linalg.svd(xx) EOF = U C = np.zeros((m, n)) for i in range(len(S)): C[i, i] = S[i] PC = np.dot(C, V) E = S**2 / n else: if transform: C = np.dot(xx.T, xx) E1, EOF1 = np.linalg.eig(C) EOF1 = EOF1[:, ::-1] E = E1[::-1] EOFa = np.dot(xx, EOF1) EOF = np.zeros((m, n)) for i in range(n): EOF[:, i] = EOFa[:, i] / np.sqrt(abs(E[i])) PC = np.dot(EOF.T, xx) else: C = np.dot(xx, xx.T) / n E, EOF = np.linalg.eig(C) PC = np.dot(EOF.T, xx) EOF = EOF[:, ::-1] PC = PC[::-1, :] E = E[::-1] if has_nan: _EOF = np.ones(x.shape) * np.nan _PC = np.ones(x.shape) * np.nan _EOF[valid_idx, :] = -EOF _PC[valid_idx, :] = -PC return _EOF, E, _PC else: return EOF, E, PC
def varimax(x, normalize=False, tol=1e-10, it_max=1000): ''' Rotate EOFs according to varimax algorithm :param x: (*array_like*) Input 2-D array. :param normalize: (*boolean*) Determines whether or not to normalize the rows or columns of the loadings before performing the rotation. :param tol: (*float*) Tolerance. :param it_max: (*int*) Specifies the maximum number of iterations to do. :returns: Rotated EOFs and rotate matrix. ''' p, nc = x.shape TT = np.eye(nc) d = 0 for i in range(it_max): z = np.dot(x, TT) B = np.dot( x.T, (z**3 - np.dot(z, np.diag(np.squeeze(np.dot(np.ones((1, p)), (z**2))))) / p)) U, S, Vh = np.linalg.svd(B) TT = np.dot(U, Vh) d2 = d d = np.sum(S) # End if exceeded tolerance. if d < d2 * (1 + tol): break # Final matrix. r = np.dot(x, TT) return r, TT
def _learn(self): p = self._x.shape[1] if self._attributes is None: self._attributes = numeric_attributes(p) m = int(math.floor(math.sqrt(p))) if self._mtry <= 0 else self._mtry j = self._x.shape[ 0] / self._node_size if self._max_nodes <= 1 else self._max_nodes split_rule = DecisionTree.SplitRule.valueOf(self._split_rule.upper()) k = int(self._y.max()) + 1 weight = np.ones( k, dtype='int') if self._class_weight is None else self._class_weight self._model = JRandomForest(self._attributes, self._x.tojarray('double'), self._y.tojarray('int'), self._ntrees, j, self._node_size, m, self._sub_sample, split_rule, weight.tojarray('int'))