Пример #1
0
    def fitMuse(self, samples):
        dimensionality = samples["Dimensions"]

        maxCorrect = -1
        bestF = -1
        bestNorm = False
        bestHistType = None

        min = 4
        Max = self.GetMax(samples, self.MAX_WINDOW_SIZE)
        self.windowLengths = [a for a in range(min, Max + 1)]

        breaker = False
        for histType in self.histTypes:
            for normMean in [True, False]:
                model = MUSE(self.maxF, self.maxS, histType,
                             self.windowLengths, normMean, True)
                words = model.createWORDS(samples)

                f = self.minF
                while f <= self.maxF:
                    bag = model.createBagOfPatterns(words, samples,
                                                    dimensionality, f)
                    bag = model.filterChiSquared(bag, self.chi)

                    problem = self.initLibLinearProblem(
                        bag, model.dict, self.bias)
                    correct = self.trainLibLinear(problem, 10)

                    if correct > maxCorrect:
                        maxCorrect = correct
                        bestF = f
                        bestNorm = normMean
                        bestHistType = histType

                    if correct == samples["Samples"]:
                        breaker = True
                        break

                    f += 2
                if breaker:
                    break
            if breaker:
                break

        self.word_model = MUSE(bestF, self.maxS, bestHistType,
                               self.windowLengths, bestNorm, True)
        words = self.word_model.createWORDS(samples)
        bag = self.word_model.createBagOfPatterns(words, samples,
                                                  dimensionality, bestF)
        bag = self.word_model.filterChiSquared(bag, self.chi)
        problem = self.initLibLinearProblem(bag, self.word_model.dict,
                                            self.bias)

        param = Parameter(self.solverType, self.c, self.iter, self.p)
        self.model = Linear()
        self.linear_model = self.model.train(problem, param)

        return MUSEMODEL(bestNorm, bestHistType, bestF, maxCorrect,
                         samples["Samples"], problem.n)
Пример #2
0
    def trainLibLinear(self, prob, n_folds=10):
        param = Parameter(self.solverType, self.c, self.iter, self.p)

        random.seed(1234)
        l = prob.l

        n_folds = l if n_folds > l else n_folds

        fold_start = [0]
        perm = [i for i in range(l)]
        random.shuffle(perm)

        for i in range(1, n_folds):
            fold_start.append(int(math.floor(i * l / n_folds)))

        fold_start.append(l)
        correct = 0

        ## 10 fold cross validation of training set
        for i in range(n_folds):
            model = Linear()
            b = fold_start[i]
            e = fold_start[i + 1]

            subprob = Problem
            subprob.bias = prob.bias
            subprob.n = prob.n
            subprob.l = l - (e - b)
            subprob.y = []

            rows = []
            for j in range(b):
                rows.append(perm[j])
                subprob.y.append(prob.y[perm[j]])

            for j in range(e, l):
                rows.append(perm[j])
                subprob.y.append(prob.y[perm[j]])

            subprob.x = [prob.x[j] for j in rows]
            fold_model = model.train(subprob, param)

            fold_x = []
            fold_y = []
            for u in range(b, e):
                fold_x.append(prob.x[perm[u]])
                fold_y.append(prob.y[perm[u]])

            fold_labels = []
            for h in range(len(fold_y)):
                fold_labels.append(model.predict(fold_model, fold_x[h]))

            for u in range(len(fold_y)):
                correct += 1 if fold_y[u] == fold_labels[u] else 0

        return correct
Пример #3
0
    def fitWeasel(self, train):
        maxCorrect = -1
        bestF = -1
        bestNorm = False

        self.minWindowLength = 4
        maxWindowLength = self.MAX_WINDOW_LENGTH
        for i in range(train["Samples"]):
            maxWindowLength = min([len(train[i].data), maxWindowLength])
        self.windows = range(self.minWindowLength, maxWindowLength)

        keep_going = True
        for normMean in [True, False]:
            if keep_going:
                model = WEASEL(self.maxF, self.maxS, self.windows, normMean)
                words = model.createWORDS(train)

                f = self.minF
                while (f <= self.maxF) & (keep_going == True):
                    model.dict.reset()
                    bop = model.createBagOfPatterns(words, train, f)
                    bop = model.filterChiSquared(bop, self.chi)

                    problem = self.initLibLinearProblem(
                        bop, model.dict, self.bias)
                    correct = self.trainLibLinear(problem, 10)

                    if correct > maxCorrect:
                        maxCorrect = correct
                        bestF = f
                        bestNorm = normMean
                    if correct == train["Samples"]:
                        keep_going = False

                    f += 2

        self.word_model = WEASEL(self.maxF, self.maxS, self.windows, bestNorm)
        words = self.word_model.createWORDS(train)
        bop = self.word_model.createBagOfPatterns(words, train, bestF)
        bop = self.word_model.filterChiSquared(bop, self.chi)
        problem = self.initLibLinearProblem(bop, self.word_model.dict,
                                            self.bias)

        param = Parameter(self.solverType, self.c, self.iter, self.p)
        self.model = Linear()
        self.linear_model = self.model.train(problem, param)

        return WEASELMODEL(bestNorm, bestF, maxCorrect, train["Samples"],
                           problem.n)
Пример #4
0
    def fitWeasel(self, samples):
        maxCorrect = -1
        bestF = -1
        bestNorm = False
        keep_going = True
        for normMean in self.NORMALIZATION:
            if keep_going:
                self.windows = self.getWindowLengths(samples, normMean)
                self.logger.Log("Windows: %s" % self.windows)
                model = WEASEL(self.maxF,
                               self.maxS,
                               self.windows,
                               normMean,
                               self.lowerBounding,
                               logger=self.logger)
                words = model.createWORDS(samples)

                f = self.minF
                while (f <= self.maxF) & (keep_going == True):
                    model.dict.reset()
                    bop = model.createBagOfPatterns(words, samples, f)
                    bop = model.filterChiSquared(bop, self.chi)
                    problem = self.initLibLinearProblem(
                        bop, model.dict, self.bias)
                    correct = self.trainLibLinear(problem, 10)
                    print(correct)

                    if correct > maxCorrect:
                        self.logger.Log(
                            "New Best Correct at Norm=%s and F=%s of: %s" %
                            (normMean, f, correct))
                        maxCorrect = correct
                        bestF = f
                        bestNorm = normMean
                    if correct == samples["Samples"]:
                        keep_going = False

                    f += 2

        self.logger.Log("Best Model: Norm=%s  Features=%s  Correct=%s/%s" %
                        (bestNorm, bestF, maxCorrect, samples['Samples']))
        self.logger.Log("Final Fitting...")
        self.windows = self.getWindowLengths(samples, bestNorm)
        self.word_model = WEASEL(self.maxF,
                                 self.maxS,
                                 self.windows,
                                 bestNorm,
                                 self.lowerBounding,
                                 logger=self.logger)
        words = self.word_model.createWORDS(samples)
        bop = self.word_model.createBagOfPatterns(words, samples, bestF)
        bop = self.word_model.filterChiSquared(bop, self.chi)
        problem = self.initLibLinearProblem(bop, self.word_model.dict,
                                            self.bias)
        param = Parameter(self.solverType, self.c, self.iter, self.p)
        self.model = Linear()
        self.linear_model = self.model.train(problem, param)

        self.bestF = bestF  ##
        return WEASELMODEL(bestNorm, bestF, maxCorrect, samples["Samples"],
                           problem.n)
Пример #5
0
    def fitMuse(self, samples):
        dimensionality = samples["Dimensions"]

        maxCorrect = -1
        bestF = -1
        bestNorm = False
        bestHistType = None

        breaker = False
        for histType in self.histTypes:
            for normMean in [True, False]:
                self.windowLengths = self.getWindowLengths(samples, normMean)
                self.logger.Log("Fitting for Norm=%s and histType=%s" %
                                (normMean, histType))
                model = MUSE(self.maxF,
                             self.maxS,
                             histogramType=histType,
                             windowLengths=self.windowLengths,
                             normMean=normMean,
                             lowerBounding=self.lowerBounding,
                             logger=self.logger)
                words = model.createWORDS(samples)

                f = self.minF
                while f <= self.maxF:
                    bag = model.createBagOfPatterns(words, samples,
                                                    dimensionality, f)
                    bag = model.filterChiSquared(bag, self.chi)

                    problem = self.initLibLinearProblem(
                        bag, model.dict, self.bias)
                    correct = self.trainLibLinear(problem, 10)

                    if correct > maxCorrect:
                        self.logger.Log(
                            "New Best Correct at Norm=%s and histType=%s and F=%s of: %s"
                            % (normMean, histType, f, correct))
                        maxCorrect = correct
                        bestF = f
                        bestNorm = normMean
                        bestHistType = histType

                    if correct == samples["Samples"]:
                        breaker = True
                        break

                    f += 2
                if breaker:
                    break
            if breaker:
                break

        self.logger.Log("Final Fitting")
        self.word_model = MUSE(bestF,
                               self.maxS,
                               bestHistType,
                               self.windowLengths,
                               bestNorm,
                               True,
                               logger=self.logger)
        words = self.word_model.createWORDS(samples)
        bag = self.word_model.createBagOfPatterns(words, samples,
                                                  dimensionality, bestF)
        bag = self.word_model.filterChiSquared(bag, self.chi)
        problem = self.initLibLinearProblem(bag, self.word_model.dict,
                                            self.bias)

        param = Parameter(self.solverType, self.c, self.iter, self.p)
        self.model = Linear()
        self.linear_model = self.model.train(problem, param)

        return MUSEMODEL(bestNorm, bestHistType, bestF, maxCorrect,
                         samples["Samples"], problem.n)