def add(x, y): """add >>> add(1, 2) 3 >>> add([1, 2, 3], [1, 2, 3]) array([2, 4, 6]) """ return np_add(x, y)
def score_icfof(self, query: np_array, ntss: np_ndarray, rho=[0.001, 0.005, 0.01, 0.05, 0.1], each_tree_score: bool = False, fast_method: bool = True): """ Compute the *i*\ CFOF approximations. Call one of the two functions according to the parameter ``fast_method`` : - if ``True`` (default) : :func:`~pyCFOFiSAX._forest_iSAX.ForestISAX.vranglist_by_idtree_faster` - if ``False`` : :func:`~pyCFOFiSAX._forest_iSAX.ForestISAX.vranglist_by_idtree` Then sort the vrang list to get CFOF scores approximations based on ``rho`` parameter values. :param numpy.array query: The sequence to be evaluated :param numpy.ndarray ntss: Reference sequences :param list rho: Rho values for the computation of approximations :param bool each_tree_score: if `True`, returns the scores obtained in each of the trees :param bool fast_method: if `True`, uses the numpy functions for computation, otherwise goes through the tree via a FIFO list of nodes :returns: *i*\ CFOF score approximations :rtype: numpy.ndarray """ k_rho = _convert_rho_to_krho(rho, len(ntss)) k_list_result_mean = np_zeros(len(ntss)) if each_tree_score: k_list_result_ndarray = np_ndarray(shape=(self.forest_isax.number_tree, len(ntss))) for id_tree, tree in self.forest_isax.forest.items(): ntss_tmp = np_array(ntss)[:, self.forest_isax.indices_partition[id_tree]] sub_query = query[self.forest_isax.indices_partition[id_tree]] if fast_method: k_list_result_tmp = tree.vrang_list_faster(sub_query, ntss_tmp) else: k_list_result_tmp = tree.vrang_list(sub_query, ntss_tmp) ratio_klist_tmp = (len(self.forest_isax.indices_partition[id_tree]) / self.forest_isax.size_word) k_list_result_mean = np_add(k_list_result_mean, np_array(k_list_result_tmp) * ratio_klist_tmp) if each_tree_score: k_list_result_ndarray[id_tree] = k_list_result_tmp k_list_result_mean = np_sort(k_list_result_mean, axis=None) if each_tree_score: k_list_result_ndarray.sort() return score_by_listvrang(k_list_result_mean.tolist(), k_rho), \ [score_by_listvrang(list(k_l_r), k_rho) for k_l_r in k_list_result_ndarray] return score_by_listvrang(k_list_result_mean.tolist(), k_rho)
def distrib_nn_for_cdf(self, ntss_tmp, bool_print: bool = False): """ Calculates the two indicators, average and standard deviation of the distances, necessary for the use of the CDF of the normal distribution. The computation of these indicators are described in `Scoring Message Stream Anomalies in Railway Communication Systems, L.Foulon et al., 2019, ICDMWorkshop <https://ieeexplore.ieee.org/abstract/document/8955558>`_. :param numpy.ndarray ntss_tmp: Reference sequences :param boolean bool_print: and True, Displays the nodes stats on the standard output :returns: :rtype: list(numpy.ndarray, numpy.array) """ start_time = time_time() node_list, node_list_leaf, node_leaf_ndarray_mean = self.get_list_nodes_and_barycentre( ) if bool_print: print("pretrait node --- %s seconds ---" % (time_time() - start_time)) stdout.flush() print(len(node_list), " nodes whose ", len(node_list_leaf), " leafs in tree") stdout.flush() nb_leaf = len(node_list_leaf) cdf_mean = np_zeros((nb_leaf, len(ntss_tmp))) cdf_std = np_zeros(nb_leaf) nb_ts_by_node = np_zeros(nb_leaf, dtype=np_uint32) centroid_dist = np_square(cdist(node_leaf_ndarray_mean, ntss_tmp)) for num, node in enumerate(node_list_leaf): cdf_std[node.id_numpy_leaf] = np_mean(node.std) nb_ts_by_node[node.id_numpy_leaf] = node.get_nb_sequences() dist_list = np_array([np_zeros(i) for i in nb_ts_by_node], dtype=object) # calcul distance au carre entre [barycentre et ts] du meme nœud """ TODO np.vectorize ?""" for node_nn in node_list_leaf: dist_list[node_nn.id_numpy_leaf] = cdist( [node_nn.mean], node_nn.get_sequences())[0] dist_list = np_square(dist_list) """ TODO np.vectorize ?""" for num, node in enumerate(node_list_leaf): node_id = node.id_numpy_leaf centroid_dist_tmp = centroid_dist[node_id] centroid_dist_tmp = centroid_dist_tmp.reshape( centroid_dist_tmp.shape + (1, )) centroid_dist_tmp = np_repeat(centroid_dist_tmp, nb_ts_by_node[node_id], axis=1) cdf_mean_tmp = np_add(centroid_dist_tmp, dist_list[node_id]) cdf_mean[node_id] = np_sum(cdf_mean_tmp, axis=1) del dist_list del cdf_mean_tmp del centroid_dist_tmp cdf_mean = np_divide(cdf_mean.T, nb_ts_by_node) cdf_mean = np_sqrt(cdf_mean) self.cdf_mean = cdf_mean self.cdf_std = cdf_std
def triad(lhs, rhs1, rhs2, alpha): lhs = np_add(rhs1, np_kron(alpha, rhs2))
def add(lhs, rhs1, rhs2): lhs = np_add(rhs1, rhs2)
def add(self, point1, point2): return tuple(np_add(point1, point2))