Python SubCMediansWrapper_c示例

 def _get_model_features(self):
     """
     Compute and get the most important features of the current model
     """
     SubCMediansWrapper_c.get_features(self._features,
                                       self._p_subcmedians_c)
     return self._features + [self.generation]

示例#2

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def fit(self, X, y=None, verbose=1):
     """
     sklearn-like fit function, receives a dataset and build the subspace clustering that models the data
     """
     print ""
     if X is None:
         return None
     if X.size < self.N:
         raise RuntimeError(
             'The dataset provided is smaller than the sample size, use instead the fit_online function'
         )
     X_ = self._check_X_matrix_validity(X)
     self._set_data_sample(X_, y)
     for iteration in xrange(self.NbIter):
         random_element = np.random.randint(
             0, len(self.data_objects_index_not_in_sample))
         random_index = self.data_objects_index_not_in_sample.pop(
             random_element)
         data_object_index_removed_from_sample = self.data_objects_index_in_sample.pop(
             0)
         self.data_objects_index_in_sample.append(random_index)
         self.data_objects_index_not_in_sample.append(
             data_object_index_removed_from_sample)
         if y:
             self._send_array(X_[random_index, :], y[random_index])
         else:
             self._send_array(X_[random_index, :])
         SubCMediansWrapper_c.train_model_with_SubCMedianspoint(
             self._p_subcmedians_c, self._data_object)
         self.generation += 1
         if verbose:
             sys.stdout.write("\r" + str(iteration) + "/" +
                              str(self.NbIter))
             sys.stdout.flush()
     print ""

示例#3

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def __init__(self,
              SDmax=STD_SDmax,
              D=STD_D,
              N=STD_N,
              NbIter=STD_NbIter,
              threshold_cluster_validity=STD_THRESHOLD_CLUSTER_VALIDITY,
              seed=STD_SEED,
              option_deletion=STD_OPT_DEL,
              option_insertion=STD_OPT_INS,
              option_FIFO=STD_FIFO,
              option_train_with_latest=STD_TRAIN_WITH_LATEST,
              option_lazy_hill_climbing=STD_LAZY_HILL_CLIMBING,
              population_size=STD_LAMBDA,
              nb_generations_generation_update=STD_ETA):
     """
     Creates a SubCMedians customizable object. This version has more options than the one presented in the paper, we suggest to use the SubCMedians object instead.
     """
     self.SDmax = SDmax
     self.D = D
     self.N = N
     self.NbIter = NbIter
     self.threshold_cluster_validity = threshold_cluster_validity
     self.option_deletion = option_deletion
     self.option_insertion = option_insertion
     self.option_FIFO = option_FIFO
     self.option_train_with_latest = option_train_with_latest
     self.seed = seed
     self.population_size = population_size
     self.nb_generations_generation_update = nb_generations_generation_update
     self.option_lazy_hill_climbing = option_lazy_hill_climbing
     self._p_subcmedians_c = SubCMediansWrapper_c.generate_SubCMediansclust(
         SDmax, D, N, threshold_cluster_validity, seed, option_deletion,
         option_insertion, option_FIFO, option_train_with_latest,
         option_lazy_hill_climbing, population_size,
         nb_generations_generation_update)
     self._model_getter = []
     self._distances_to_cluster_getter = []
     self._lengths = []
     self._features = []
     self._object_class_cluster = []
     self._cluster_getter = []
     self._aggregatedstats = {}
     self._prng = SubCMediansWrapper_c.generate_prng(self.seed)
     self._stream = SubCMediansWrapper_c.generate_array_SubCMedians_point(
         self._prng, N, D)
     self._data_object = SubCMediansWrapper_c.generate_SubCMedians_point(
         self._prng, D)
     self._cluster_object = SubCMediansWrapper_c.generate_SubCMedians_point(
         self._prng, SDmax)
     self.time_start = timer()
     self._parameters = [
         "SDmax", "D", "N", "M", "option_deletion", "option_insertion",
         "option_FIFO", "option_train_with_latest", "seed",
         "option_lazy_hill_climbing", "population_size",
         "nb_generations_generation_update"
     ]
     self.generation = 0

示例#4

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def _get_subcmedians_model(self):
     """
     Get SubCMedians current model
     """
     SubCMediansWrapper_c.get_SubCMediansclust_model(
         self._model_getter, self._lengths, self._p_subcmedians_c)
     local_model = [
         self._model_getter[i][0:self._lengths[i + 1]]
         for i in xrange(self._lengths[0])
     ]
     return local_model

示例#5

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def _transform_array(self, x):
     """
     Apply the transform function to objects x in order to compute the distance to each candidate center in the model.
     """
     self._send_array(x)
     cluster, distance = SubCMediansWrapper_c.clusterize_SubCMedianspoint_with_model(
         self._p_subcmedians_c, self._data_object)
     SubCMediansWrapper_c.get_distances_to_core_point(
         cluster, self._p_subcmedians_c, self._data_object,
         self._distances_to_cluster_getter)
     return array(self._distances_to_cluster_getter)

示例#6

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def _get_class_clusters_current_data_sample(self):
     """
     Get the class / cluster membership of te current data sample
     """
     class_cluster_df = DataFrame(columns=["class", "cluster"])
     size_D = SubCMediansWrapper_c.get_data_window_size(
         self._p_subcmedians_c)
     for i in xrange(size_D):
         SubCMediansWrapper_c.get_D_point_class_cluster(
             i, self._p_subcmedians_c, self._object_class_cluster)
         class_cluster_df.loc[i] = self._object_class_cluster
     return class_cluster_df

示例#7

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def _send_array(self, x, y=None):
     """
     Send an data object represented as a numpy array or a list to the C library
     """
     scm_py_list = [0 for _ in xrange(POINTDESCRIPTORS)]
     for i, dim_pos in enumerate(x):
         if not isnan(dim_pos):
             scm_py_list.append([i, 1, float(dim_pos)])
     if y is not None:
         scm_py_list[POINTCLASSID] = int(y)
     scm_py_list[POINTWEIGHT] = len(scm_py_list) - POINTDESCRIPTORS
     SubCMediansWrapper_c.py2C_convert_SubCMedianspoint(
         scm_py_list, self._data_object)

示例#8

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def _cluster_data_object(self, x, y=None):
     """
     Sends a data objected encoded as a numpy array or a list and cluster it
     """
     self._send_array(x, y)
     cluster, distance = SubCMediansWrapper_c.clusterize_SubCMedianspoint_with_model(
         self._p_subcmedians_c, self._data_object)
     return [int(y), cluster], distance

示例#9

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def _set_data_sample(self, X, y=None):
     """
     Set the data sample objects drawing randomly objects from the dataset X
     """
     self.data_objects_index_in_sample = []
     self.data_objects_index_not_in_sample = range(len(X))
     for _ in xrange(self.N):
         random_element = np.random.randint(
             0, len(self.data_objects_index_not_in_sample))
         random_index = self.data_objects_index_not_in_sample.pop(
             random_element)
         self.data_objects_index_in_sample.append(random_index)
         if y:
             self._send_array(X[random_index, :], y[random_index])
         else:
             self._send_array(X[random_index, :])
         SubCMediansWrapper_c.insert_SubCMedians_point_in_D(
             self._p_subcmedians_c, self._data_object)

示例#10

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def _check_consistency_C_params_Py_params(self):
     """
     Check the consistency of the C parameters with respect to Python object parameters
     """
     c_parameters = SubCMediansWrapper_c.get_parameters(
         self._p_subcmedians_c)
     for i, param in enumerate(self._parameters):
         if getattr(self, param) != c_parameters[i]:
             raise RuntimeError(
                 'C capsule parameters and Python parameters are different '
                 '%s %s != %s' %
                 (param, str(getattr(self, param)), c_parameters[i]))

示例#11

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def score(self, X):
     """
     Compute the mean intra-cluster distance
     """
     X_ = self._check_X_matrix_validity(X)
     scores = []
     for i, x in enumerate(X_):
         self._send_array(x)
         cluster, distance = SubCMediansWrapper_c.clusterize_SubCMedianspoint_with_model(
             self._p_subcmedians_c, self._data_object)
         scores.append(distance)
     return np.asarray(scores).mean()

示例#12

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def predict(self, X):
     """
     sklearn-like predict function, receives a dataset and compute the cluster membership of its data objects
     """
     X_ = self._check_X_matrix_validity(X)
     Y_ = array([])
     for i, x in enumerate(X_):
         self._send_array(x)
         cluster, distance = SubCMediansWrapper_c.clusterize_SubCMedianspoint_with_model(
             self._p_subcmedians_c, self._data_object)
         Y_ = append(Y_, cluster)
     return Y_

示例#13

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def set_subspace_model(self, model, base_weight=1):
     model_translation = []
     total_size = 0
     for i, center in enumerate(model):
         scm_py_list = [0 for _ in xrange(POINTDESCRIPTORS)]
         w = 0
         for dim, dim_pos in enumerate(center):
             if not isnan(dim_pos):
                 scm_py_list.append([dim, base_weight, float(dim_pos)])
                 w += 1
         total_size += w
         scm_py_list[POINTINDEX] = i
         scm_py_list[POINTWEIGHT] = w
         model_translation.append(scm_py_list)
     if total_size > self.SDmax:
         raise ValueError(
             'Invalid new model size %s for estimator %s.'
             'Check the size of your model and provide a smaller or equal size model'
             'with `SubCMedians.SDmax`.' % (total_size, self))
     else:
         SubCMediansWrapper_c.clone_SubCMedians_point_from_list(
             model_translation, self._p_subcmedians_c)

示例#14

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def fit_online_mode(self, X, y=None):
     """
     Sklearn-like fit function, receives a dataset and build the subspace clustering that models the data.
     This function has been created to deal with streams of data, in this case the dataset provided as an input will never appear again, so it does not make sense to keep record of the sample used or not
     """
     if X is None:
         return None
     X_ = self._check_X_matrix_validity(X)
     if len(X_.shape) == 1:
         self._send_array(X_, y)
         SubCMediansWrapper_c.train_model_with_SubCMedianspoint(
             self._p_subcmedians_c, self._data_object)
         self.generation += 1
     else:
         for i, x in enumerate(X_):
             if y:
                 self._send_array(x, y[i])
             else:
                 self._send_array(x)
             SubCMediansWrapper_c.train_model_with_SubCMedianspoint(
                 self._p_subcmedians_c, self._data_object)
             self.generation += 1

示例#15

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

    def set_params(self, **params):
        """
        Set the parameters provided to the construtor
        """
        if not params:
            self._reallocate_memory()
            return self
        for name in params:
            if not hasattr(self, name):
                raise ValueError('Invalid parameter %s for estimator %s.'
                                 'Check the list of available parameters '
                                 'with `SubCMedians.get_params().keys()`.' %
                                 (name, self))
            setattr(self, name, params[name])

        SubCMediansWrapper_c.set_parameters(
            self._p_subcmedians_c, self.SDmax, self.D, self.N,
            self.threshold_cluster_validity, self.seed, self.option_deletion,
            self.option_insertion, self.option_FIFO,
            self.option_train_with_latest, self.option_lazy_hill_climbing,
            self.population_size, self.nb_generations_generation_update)
        self._reallocate_memory()
        return self

示例#16

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def _train_on_current_training_set(self, iterations):
     """
     Train the SubCMedians algorithm without updating the dataset sample
     """
     for i in xrange(iterations):
         SubCMediansWrapper_c.train_on_current_D(self._p_subcmedians_c)

示例#17

显示文件

文件： pySubCMedians.py 项目： SergioPeignier/SubCMedians

 def _print_me(self):
     """
     Print description regarding the current SubCMedians model
     """
     SubCMediansWrapper_c.print_SubCMediansClust(self._p_subcmedians_c)