def fitMuse(self, samples): dimensionality = samples["Dimensions"] maxCorrect = -1 bestF = -1 bestNorm = False bestHistType = None min = 4 Max = self.GetMax(samples, self.MAX_WINDOW_SIZE) self.windowLengths = [a for a in range(min, Max + 1)] breaker = False for histType in self.histTypes: for normMean in [True, False]: model = MUSE(self.maxF, self.maxS, histType, self.windowLengths, normMean, True) words = model.createWORDS(samples) f = self.minF while f <= self.maxF: bag = model.createBagOfPatterns(words, samples, dimensionality, f) bag = model.filterChiSquared(bag, self.chi) problem = self.initLibLinearProblem( bag, model.dict, self.bias) correct = self.trainLibLinear(problem, 10) if correct > maxCorrect: maxCorrect = correct bestF = f bestNorm = normMean bestHistType = histType if correct == samples["Samples"]: breaker = True break f += 2 if breaker: break if breaker: break self.word_model = MUSE(bestF, self.maxS, bestHistType, self.windowLengths, bestNorm, True) words = self.word_model.createWORDS(samples) bag = self.word_model.createBagOfPatterns(words, samples, dimensionality, bestF) bag = self.word_model.filterChiSquared(bag, self.chi) problem = self.initLibLinearProblem(bag, self.word_model.dict, self.bias) param = Parameter(self.solverType, self.c, self.iter, self.p) self.model = Linear() self.linear_model = self.model.train(problem, param) return MUSEMODEL(bestNorm, bestHistType, bestF, maxCorrect, samples["Samples"], problem.n)
def trainLibLinear(self, prob, n_folds=10): param = Parameter(self.solverType, self.c, self.iter, self.p) random.seed(1234) l = prob.l n_folds = l if n_folds > l else n_folds fold_start = [0] perm = [i for i in range(l)] random.shuffle(perm) for i in range(1, n_folds): fold_start.append(int(math.floor(i * l / n_folds))) fold_start.append(l) correct = 0 ## 10 fold cross validation of training set for i in range(n_folds): model = Linear() b = fold_start[i] e = fold_start[i + 1] subprob = Problem subprob.bias = prob.bias subprob.n = prob.n subprob.l = l - (e - b) subprob.y = [] rows = [] for j in range(b): rows.append(perm[j]) subprob.y.append(prob.y[perm[j]]) for j in range(e, l): rows.append(perm[j]) subprob.y.append(prob.y[perm[j]]) subprob.x = [prob.x[j] for j in rows] fold_model = model.train(subprob, param) fold_x = [] fold_y = [] for u in range(b, e): fold_x.append(prob.x[perm[u]]) fold_y.append(prob.y[perm[u]]) fold_labels = [] for h in range(len(fold_y)): fold_labels.append(model.predict(fold_model, fold_x[h])) for u in range(len(fold_y)): correct += 1 if fold_y[u] == fold_labels[u] else 0 return correct
def fitWeasel(self, train): maxCorrect = -1 bestF = -1 bestNorm = False self.minWindowLength = 4 maxWindowLength = self.MAX_WINDOW_LENGTH for i in range(train["Samples"]): maxWindowLength = min([len(train[i].data), maxWindowLength]) self.windows = range(self.minWindowLength, maxWindowLength) keep_going = True for normMean in [True, False]: if keep_going: model = WEASEL(self.maxF, self.maxS, self.windows, normMean) words = model.createWORDS(train) f = self.minF while (f <= self.maxF) & (keep_going == True): model.dict.reset() bop = model.createBagOfPatterns(words, train, f) bop = model.filterChiSquared(bop, self.chi) problem = self.initLibLinearProblem( bop, model.dict, self.bias) correct = self.trainLibLinear(problem, 10) if correct > maxCorrect: maxCorrect = correct bestF = f bestNorm = normMean if correct == train["Samples"]: keep_going = False f += 2 self.word_model = WEASEL(self.maxF, self.maxS, self.windows, bestNorm) words = self.word_model.createWORDS(train) bop = self.word_model.createBagOfPatterns(words, train, bestF) bop = self.word_model.filterChiSquared(bop, self.chi) problem = self.initLibLinearProblem(bop, self.word_model.dict, self.bias) param = Parameter(self.solverType, self.c, self.iter, self.p) self.model = Linear() self.linear_model = self.model.train(problem, param) return WEASELMODEL(bestNorm, bestF, maxCorrect, train["Samples"], problem.n)
def fitWeasel(self, samples): maxCorrect = -1 bestF = -1 bestNorm = False keep_going = True for normMean in self.NORMALIZATION: if keep_going: self.windows = self.getWindowLengths(samples, normMean) self.logger.Log("Windows: %s" % self.windows) model = WEASEL(self.maxF, self.maxS, self.windows, normMean, self.lowerBounding, logger=self.logger) words = model.createWORDS(samples) f = self.minF while (f <= self.maxF) & (keep_going == True): model.dict.reset() bop = model.createBagOfPatterns(words, samples, f) bop = model.filterChiSquared(bop, self.chi) problem = self.initLibLinearProblem( bop, model.dict, self.bias) correct = self.trainLibLinear(problem, 10) print(correct) if correct > maxCorrect: self.logger.Log( "New Best Correct at Norm=%s and F=%s of: %s" % (normMean, f, correct)) maxCorrect = correct bestF = f bestNorm = normMean if correct == samples["Samples"]: keep_going = False f += 2 self.logger.Log("Best Model: Norm=%s Features=%s Correct=%s/%s" % (bestNorm, bestF, maxCorrect, samples['Samples'])) self.logger.Log("Final Fitting...") self.windows = self.getWindowLengths(samples, bestNorm) self.word_model = WEASEL(self.maxF, self.maxS, self.windows, bestNorm, self.lowerBounding, logger=self.logger) words = self.word_model.createWORDS(samples) bop = self.word_model.createBagOfPatterns(words, samples, bestF) bop = self.word_model.filterChiSquared(bop, self.chi) problem = self.initLibLinearProblem(bop, self.word_model.dict, self.bias) param = Parameter(self.solverType, self.c, self.iter, self.p) self.model = Linear() self.linear_model = self.model.train(problem, param) self.bestF = bestF ## return WEASELMODEL(bestNorm, bestF, maxCorrect, samples["Samples"], problem.n)
def fitMuse(self, samples): dimensionality = samples["Dimensions"] maxCorrect = -1 bestF = -1 bestNorm = False bestHistType = None breaker = False for histType in self.histTypes: for normMean in [True, False]: self.windowLengths = self.getWindowLengths(samples, normMean) self.logger.Log("Fitting for Norm=%s and histType=%s" % (normMean, histType)) model = MUSE(self.maxF, self.maxS, histogramType=histType, windowLengths=self.windowLengths, normMean=normMean, lowerBounding=self.lowerBounding, logger=self.logger) words = model.createWORDS(samples) f = self.minF while f <= self.maxF: bag = model.createBagOfPatterns(words, samples, dimensionality, f) bag = model.filterChiSquared(bag, self.chi) problem = self.initLibLinearProblem( bag, model.dict, self.bias) correct = self.trainLibLinear(problem, 10) if correct > maxCorrect: self.logger.Log( "New Best Correct at Norm=%s and histType=%s and F=%s of: %s" % (normMean, histType, f, correct)) maxCorrect = correct bestF = f bestNorm = normMean bestHistType = histType if correct == samples["Samples"]: breaker = True break f += 2 if breaker: break if breaker: break self.logger.Log("Final Fitting") self.word_model = MUSE(bestF, self.maxS, bestHistType, self.windowLengths, bestNorm, True, logger=self.logger) words = self.word_model.createWORDS(samples) bag = self.word_model.createBagOfPatterns(words, samples, dimensionality, bestF) bag = self.word_model.filterChiSquared(bag, self.chi) problem = self.initLibLinearProblem(bag, self.word_model.dict, self.bias) param = Parameter(self.solverType, self.c, self.iter, self.p) self.model = Linear() self.linear_model = self.model.train(problem, param) return MUSEMODEL(bestNorm, bestHistType, bestF, maxCorrect, samples["Samples"], problem.n)