Пример #1
0
 def merge(self, frame):
     """
     Add another DataFrame to the accumulated stats for each column.
     Parameters
     ----------
     frame: pandas DataFrame we will update our stats counter with.
     """
     for column_name, _ in self._column_stats.items():
         data_arr = frame[[column_name]].values
         count, min_max_tup, mean, _, _, _ = \
             scistats.describe(data_arr)
         stats_counter = StatCounter()
         stats_counter.n = count
         stats_counter.mu = mean
         stats_counter.m2 = np.sum((data_arr - mean)**2)
         stats_counter.minValue, stats_counter.maxValue = min_max_tup
         self._column_stats[column_name] = self._column_stats[
             column_name].mergeStats(stats_counter)
     return self
Пример #2
0
 def merge(self, frame):
     """
     Add another DataFrame to the accumulated stats for each column.
     Parameters
     ----------
     frame: pandas DataFrame we will update our stats counter with.
     """
     for column_name, _ in self._column_stats.items():
         data_arr = frame[[column_name]].values
         count, min_max_tup, mean, _, _, _ = \
             scistats.describe(data_arr)
         stats_counter = StatCounter()
         stats_counter.n = count
         stats_counter.mu = mean
         stats_counter.m2 = np.sum((data_arr - mean) ** 2)
         stats_counter.minValue, stats_counter.maxValue = min_max_tup
         self._column_stats[column_name] = self._column_stats[
             column_name].mergeStats(stats_counter)
     return self
Пример #3
0
 def merge(self, frame):
     """
     Add another DataFrame to the accumulated stats for each column.
     Parameters
     ----------
     frame: pandas DataFrame we will update our stats counter with.
     """
     for column_name, counter in self._column_stats.items():
         data_arr = frame[[column_name]].values
         count, min_max_tup, mean, unbiased_var, skew, kurt = \
             scistats.describe(data_arr)
         stats_counter = StatCounter()
         stats_counter.n = count
         stats_counter.mu = mean
         # TODO(juliet): look up paper they base their streams tat alg on,
         # write docs for statcounter class in spark
         # line below will likely need to be modified to match the alg
         stats_counter.m2 = np.sum((data_arr - mean)**2)
         stats_counter.minValue, stats_counter.maxValue = min_max_tup
         self._column_stats[column_name] = self._column_stats[
             column_name].mergeStats(stats_counter)
     return self
Пример #4
0
 def merge(self, frame):
     """
     Add another DataFrame to the accumulated stats for each column.
     Parameters
     ----------
     frame: pandas DataFrame we will update our stats counter with.
     """
     for column_name, counter in self._column_stats.items():
         data_arr = frame[[column_name]].values
         count, min_max_tup, mean, unbiased_var, skew, kurt = \
             scistats.describe(data_arr)
         stats_counter = StatCounter()
         stats_counter.n = count
         stats_counter.mu = mean
         # TODO(juliet): look up paper they base their streams tat alg on,
         # write docs for statcounter class in spark
         # line below will likely need to be modified to match the alg
         stats_counter.m2 = np.sum((data_arr - mean) ** 2)
         stats_counter.minValue, stats_counter.maxValue = min_max_tup
         self._column_stats[column_name] = self._column_stats[
             column_name].mergeStats(stats_counter)
     return self