Python StdFunc.delta примеры использования

Язык программирования: Python

Пространство имен/Пакет: functions

Класс/Тип: StdFunc

Метод/Функция: delta

Примеров на hotexamples.com: 2

Python StdFunc.delta - 2 примера найдено. Это лучшие примеры Python кода для functions.StdFunc.delta, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

delta(2)

value(2)

Основные методы

delta (2)

value (2)

Пример #1

Показать файл

Файл: score.py Проект: sirrice/dbwipes_src

class QuadScoreSample5(QuadScoreSample4):
    """
    uses global std and mean
    """
    def __init__(self, *args, **kwargs):
        QuadScoreSample4.__init__(self, *args, **kwargs)
        self.global_std = StdFunc()
        self.global_mean = AvgFunc()
        self.global_bounds = [1e10000, -1e10000]
        self.epsilon = kwargs.get('epsilon', 0.005)
    
    def evaluate_split(self, stats_list):
        probs = []
        for stat in stats_list:
            if not stat:
                continue
            est, std = stat.est, stat.std
            if not len(stat.vals):
                prob = 0.
            elif std == 0:
                prob = 1.
            else:
                weight = self.weight(max(stat.vals))
                if weight == 0:
                    prob = 1.
                else:
                    bound = max(stat.vals) - min(stat.vals)
                    prob = (std * (2.58 + 2.58)) / weight
                    prob = 1 - prob  / (self.global_bounds[1] - self.global_bounds[0])
            #prob = est + 2.58 * std
            # if std == 0:
            #     prob = 1.
            # else:
            #     # Prob( (X-mean)^2 < epsilon ) >= 0.95
            #     w = self.weight(est + 2.58 * std)
            #     alpha = self.epsilon * abs(est) / w
            #     prob = math.erf(alpha / (std * math.sqrt(2.)))
            probs.append(prob)
        return np.mean(probs) if probs else 0.
            

    def evaluate(self, table):
        if not len(table):
            return PartitionStats(self.global_mean.value(),
                                  std=self.global_std.value(),
                                  vals=[])
        
        vals = []
        newvals = []
        for row in table:
            if row[self.SCORE_ID].value == -inf:
                est = self.err_func([row])
                row[self.SCORE_ID] = est
                newvals.append(est)
            vals.append(row[self.SCORE_ID].value)
        samp_size = len(vals)

        newvals = np.array(newvals)
        self.global_std.delta(add=[newvals], update=True)
        self.global_mean.delta(add=[newvals], update=True)
        self.global_bounds[0] = min(self.global_bounds[0], min(vals))
        self.global_bounds[1] = max(self.global_bounds[1], max(vals))

        if samp_size == 1:
            est, std = vals[0], 0.
        else:
            # slightly biased std estimator
            est = np.mean(vals)
            S2 = 1. / (samp_size - 1) * sum([(v-est)**2 for v in vals])
            S = math.sqrt(S2)
            std = self.kn(samp_size) * S

        if samp_size > 2:
            _logger.debug('\tsampsize(%d)\t%.4f+-%.4f\t%.4f - %.4f',
                          samp_size,
                          est,
                          std,
                          self.global_bounds[0],
                          self.global_bounds[1]
                           )
        return PartitionStats(est, std=std, vals=vals)

    def weight(self, val):
        u = self.global_mean.value()
        std = self.global_std.value()
        if std == 0:
            return 1.

        max_std = 2.58
        #max_std = 1.6

        # weight increases quadratically.
        nstds = (val - u) / std
        nstds = min(max(0, nstds), max_std)
        y = (nstds / max_std) ** 2
        
        return y

        # linear scale, hits maximum at 2.58-0.5 i think
        r = 2.58 + 2.58 + 0.5 # why is a 0.5 here?
        v = min(r, max(0., (val - u) / std - 0.5))
        return 0.0001 + (v / r) * (1 - 0.0001)

        # using ERF
        w = .5 * (1 + math.erf( (val-u) / math.sqrt(2*std**2) ))
        # rescale to be between 0.2 - 1
        return 0.001 + w * (1 - 0.001)

    def should_stop(self, table, stats):
        if len(table) <= self.min_points:
            return True

        std, est = stats.std, stats.est
        val, allsame = None, True
        for i, row in enumerate(table):
            if i == 0:
                val = tuple([row[aggcol].value for aggcol in self.aggcols])
            else:
                if val != tuple([row[aggcol].value for aggcol in self.aggcols]):
                    allsame = False
                    break
        if allsame:
            return True

        # Prob( (X-mean)^2 < epsilon ) >= 0.95
        w = self.weight(est + 2.58 * std)
        if w == 0 or std == 0:
            prob = 1.
        else:
            alpha = math.sqrt( self.epsilon * abs(est) / w )
            #alpha = self.epsilon * 2.58 * self.global_std.value() / w
            #alpha = math.sqrt( self.epsilon * 2 * 2.58 * self.global_std.value() / w )
            prob = math.erf(alpha / (std * math.sqrt(2.)))

        return prob >= 0.95

Пример #2

Показать файл

Файл: sampler.py Проект: sirrice/dbwipes_src

class Evaluator(object):


    def __init__(self, SCORE_ID, errprob, err_funcs, aggcols, epsilon, **kwargs):
        self.global_std = StdFunc()
        self.global_mean = AvgFunc()
        self.global_bounds = [1e10000, -1e10000]

        self.SCORE_ID = SCORE_ID
        self.err_funcs = err_funcs
        self.aggcols = aggcols
        self.epsilon = epsilon
        self.errprob = errprob
        self.min_points = kwargs.get('min_points', 2)

        self.sampler = Sampler(self.errprob, self.SCORE_ID)        

        
    def kn(self, n):
        """
        return Kn, where UMVU estimator of std is Kn*S
        """
        try:
            return math.sqrt(2./(n-1)) * (math.gamma(n/2.) / (math.gamma((n-1.)/2.)))
        except:
            return 1.


    def evaluate(self, tables, sample=True):
        if isinstance(tables, list):
            self.samples = self.sampler(tables) if sample else tables

            if not self.samples:
                return None
            
            ests, stds, vals = [], [], []
            for table, err_func in zip(self.samples, self.err_funcs):
                est, std, vs = self.evaluate_table(table, err_func)
                ests.append(est)
                stds.append(std)
                vals.extend(vs)

            est = np.mean(ests)
            std = np.mean(stds)
            
            if len(self.err_funcs) and 'Sum' in str(self.err_funcs[0].klass):
                est = est / sum(map(len, self.samples)) * sum(map(len, tables))


            if len(vals) != sum(map(len, self.samples)):
                raise RuntimeError("# vals != # samples")
            
                if sample and len(vals) != sum(map(len, tables)):
                    raise RuntimeError("# vals != # pts")

            return PartitionStats(est, std=std, vals=vals)
        else:
            return self.evaluate([tables], sample=sample)


    def evaluate_table(self, table, err_func):
        if not len(table):
            return (err_func([]),
                    0.,
                    [])

        
        vals = []
        newvals = []
        for row in table:
            if row[self.SCORE_ID].value == -inf:
                est = err_func([row])
                row[self.SCORE_ID] = est
                newvals.append(est)
            vals.append(row[self.SCORE_ID].value)
        samp_size = len(vals)


        newvals = np.array(newvals)
        self.global_std.delta(add=[newvals], update=True)
        self.global_mean.delta(add=[newvals], update=True)
        self.global_bounds[0] = min(self.global_bounds[0], min(vals))
        self.global_bounds[1] = max(self.global_bounds[1], max(vals))

        if samp_size == 1:
            est, std = vals[0], 0.
        else:
            # slightly biased std estimator
            try:
                est = np.mean(vals)
            except:
                pdb.set_trace()
            S2 = 1. / (samp_size - 1) * sum([(v-est)**2 for v in vals])
            S = math.sqrt(S2)
            std = self.kn(samp_size) * S

        if samp_size > 2:
            _logger.debug('\tsampsize(%d)\t%.4f+-%.4f\t%.4f - %.4f',
                          samp_size,
                          est,
                          std,
                          self.global_bounds[0],
                          self.global_bounds[1]
                           )

        return est, std, vals


    def weight(self, val):
        u = self.global_mean.value()
        std = self.global_std.value()
        if std == 0:
            return 1.

        max_std = 2.58
        #max_std = 1.6

        # weight increases quadratically.
        nstds = (val - u) / std
        nstds = min(max(0, nstds + 2), max_std)
        y = (nstds / max_std) ** 2

        return y

        # linear scale, hits maximum at 2.58-0.5 i think
        r = 2.58 + 2.58 + 0.5 # why is a 0.5 here?
        v = min(r, max(0., (val - u) / std - 0.5))
        return 0.0001 + (v / r) * (1 - 0.0001)

        # using ERF
        w = .5 * (1 + math.erf( (val-u) / math.sqrt(2*std**2) ))
        # rescale to be between 0.2 - 1
        return 0.001 + w * (1 - 0.001)


    def should_stop(self, tables, bad_stats, good_stats):
        if max(map(len,tables)) <= self.min_points:
            return True


        # val, allsame = None, True
        # for i, row in enumerate(table):
        #     if i == 0:
        #         val = tuple([row[aggcol].value for aggcol in self.aggcols])
        #     else:
        #         if val != tuple([row[aggcol].value for aggcol in self.aggcols]):
        #             allsame = False
        #             break

        # if allsame or std == 0:
        #     return True
        if bad_stats.std == 0:
            return True


        weight = self.weight(max(bad_stats.vals))
        if weight == 0:
            return True
        threshold = (self.global_bounds[1] - self.global_bounds[0]) * self.epsilon / weight
        bounds = max(bad_stats.vals) - min(bad_stats.vals)
        bounds = max(bounds, bad_stats.std * 2.58 * 2)
        return bounds < threshold
        #w = self.weight(est + 2.58 * std)        
        wmse = np.mean([self.weight(v) * (abs(v - bad_stats.est))**2 for v in bad_stats.vals])
        return wmse < self.epsilon * (self.global_bounds[1] * 0.8)