def __init__(self, arr): self.M = arr self.m, self.n = arr.shape # print 'm, n : %d, %d' % (self.m, self.n) self.bound = array([1] * (self.m + 1)) self.bit_tree = BitTree(maxsize=self.m) # self.delta = np.zeros(self.m, dtype=float) self.model = [] for i in range(self.m): ne = sum(1 for x in self.M[i] if x == 1) pe = min(ne / self.n, 1 - 1e-6) t = (i, ne, pe) self.model.append(t) # print 'get model: ' # pprint(self.model) self.model = sorted(self.model, key=lambda x: -x[1]) for i in range(len(self.model)): self.bit_tree.update(i, self.model[i][1])
class BaseSegmentUnit(object): def __init__(self, arr): self.M = arr self.m, self.n = arr.shape # print 'm, n : %d, %d' % (self.m, self.n) self.bound = array([1] * (self.m + 1)) self.bit_tree = BitTree(maxsize=self.m) # self.delta = np.zeros(self.m, dtype=float) self.model = [] for i in range(self.m): ne = sum(1 for x in self.M[i] if x == 1) pe = min(ne / self.n, 1 - 1e-6) t = (i, ne, pe) self.model.append(t) # print 'get model: ' # pprint(self.model) self.model = sorted(self.model, key=lambda x: -x[1]) for i in range(len(self.model)): self.bit_tree.update(i, self.model[i][1]) # print 'after sort:' # pprint(self.model) # pprint(self.M) def show(self): print "获取的部分S[]数组" print self.M print "model:" pprint(self.model) @property def l(self): """ :return: 当前划分得到几个段内分组 """ return sum(1 for x in self.bound if x == 1) - 1 @property def lm(self): """ :return: 当前划分得到的Lm """ return 2 * self.l * log(self.m, 2) + self.m * log(self.m, 2) @property def ld(self): """ :return: 当前划分得到的Ld """ # print 'pr: %f' % (self.pr, ) return -log(self.pr, 2) @property def ll(self): """ :return: Ll = Ld + Lm """ return self.ld + self.lm @property def pr(self): """ 当前分组得到的Pr :return: Pr (Ld = -log(Pr, 2)) """ res = 1.0 # print 'bound: ' # print self.bound for i, v in enumerate(self.bound): if v == 0 or i == 0: continue a = self.prev_bound(i) if a is None: continue px = self.bit_tree.query(a, i - 1) / ((i - a) * self.n) """ print 'n: %d' % (self.n, ) print 'a: %d, i: %d' % (a, i) print 'sn: %d' % (self.bit_tree.query(a, i-1), ) print 'px : %f' % (px, ) """ f = lambda x: px ** x[1] * (1 - px) ** (self.n - x[1]) for i in range(a, i): res *= f(self.model[i]) return res def prev_bound(self, i): """ :param i: :return: i的前一个边界, 复杂度O(n), 可以考虑采用别的方式进行优化(例如保存起来并维护) """ a = max(x[0] for x in enumerate(self.bound) if x[1] and x[0] < i) return a def next_bound(self, i): """ :param i: 边界位置 :return: 返回i的后一个边界 """ b = min(x[0] for x in enumerate(self.bound) if x[1] and x[0] > i) return b def find(self): """ :return: 进行算法步骤, 返回一个值表示最优的Ll, 并将结果保存进self.model, self.bound. 可以考虑将结果用更好的方式进行保存. """ raise NotImplementedError()