Exemplo n.º 1
0
    def __init__(self, histogram_type, lowerBounding=False, logger=None):
        self.initialized = False
        self.HistogramType = histogram_type
        self.lowerBounding = lowerBounding
        self.MUSE_Bool = False

        logger.Log(self.__dict__, level=0)
        self.logger = logger

        self.sfa = SFA(histogram_type,
                       LB=self.lowerBounding,
                       logger=self.logger)
Exemplo n.º 2
0
    def createWords(self, samples):
        if self.signature == None:
            self.signature = SFA("EQUI_DEPTH")
            self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean,True)

        words = []
        for i in range(samples["Samples"]):
            sfaWords = self.signature.transformWindowing(samples[i])
            words_small = []
            for word in sfaWords:
                words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols)))
            words.append(words_small)

        return words
Exemplo n.º 3
0
 def lookup(self, windowLength, wordLength, symbols, sequence, val):
     normMean = True
     key = (windowLength, wordLength, symbols)
     if key not in self.sfa:
         sfa = SFA("EQUI_DEPTH")
         sfa.fitWindowing(self.raw, windowLength, wordLength, symbols,
                          normMean, True)
         self.sfa[key] = []
         for i in range(self.raw["Samples"]):
             sfa_sr = []
             wordList = sfa.transformWindowing(self.raw[i])
             for word in wordList:
                 sfa_sr.append(self.sfaToDWord(word, symbols))
             #print(str(i) + "-th transformed time series SFA word " + "\t" + sfaToWordList(wordList))
             self.sfa[key].append(sfa_sr)
     for i in range(self.raw["Samples"]):
         for j in range(len(self.sfa[key][i])):
             if sequence in self.sfa[key][i][j]:
                 self.acscores[i, j:(j + windowLength)] += val
Exemplo n.º 4
0
    def createWords(self, samples, index, bar=None):
        if self.signature[index] == None:
            self.signature[index] = SFA("INFORMATION_GAIN", True, False)
            self.signature[index].fitWindowing(samples,
                                               self.windowLengths[index],
                                               self.maxF, self.symbols,
                                               self.normMean, False)
            # self.signature[index].printBins()

        words = []
        for i in range(samples["Samples"]):
            words.append(self.signature[index].transformWindowingInt(
                samples[i], self.maxF))

        self.words[index] = words
        if bar != None:
            bar.update(index)
Exemplo n.º 5
0
    def createWords(self, samples, index, data):
        if self.signature[index] == None:
            self.signature[index] = [None for _ in range(samples['Dimensions'])]
            for i in range(samples['Dimensions']):
                self.signature[index][i] = SFA(self.histogramType, self.lowerBounding, logger = self.logger, mftUseMaxOrMin=False)
                self.signature[index][i].mv_fitWindowing(samples, self.windowLengths[index], self.maxF, self.alphabetSize, self.normMean, self.lowerBounding, dim = i)
                self.signature[index][i].printBins(self.logger)

        words = []
        for m in range(samples["Samples"]):
            for n in range(samples["Dimensions"]):
                if len(samples[m][n].data) >= self.windowLengths[index]:
                    words.append(self.signature[index][n].transformWindowingInt(samples[m][n], self.maxF))
                else:
                    words.append([])

        self.logger.Log("Generating %s Words for Norm=%s and Window=%s" % (data, self.normMean, self.windowLengths[index]))
        self.words[index] = words
Exemplo n.º 6
0
    def createWords(self, samples, index, bar=None):
        if self.signature[index] == None:
            self.signature[index] = SFA(self.histogramType, False,
                                        self.lowerBounding, False)
            self.signature[index].mv_fitWindowing(samples,
                                                  self.windowLengths[index],
                                                  self.maxF, self.alphabetSize,
                                                  self.normMean, False)
            # self.signature[index].printBins()

        words = []
        for m in range(samples["Samples"]):
            for n in range(samples["Dimensions"]):
                if len(samples[m][n].data) >= self.windowLengths[index]:
                    words.append(self.signature[index].transformWindowingInt(
                        samples[m][n], self.maxF))
                else:
                    words.append([])

        self.words[index] = words

        if bar != None:
            bar.update(index)
Exemplo n.º 7
0
class SFASupervised():
    def __init__(self, histogram_type, lowerBounding=False, logger=None):
        self.initialized = False
        self.HistogramType = histogram_type
        self.lowerBounding = lowerBounding
        self.MUSE_Bool = False

        logger.Log(self.__dict__, level=0)
        self.logger = logger

        self.sfa = SFA(histogram_type,
                       LB=self.lowerBounding,
                       logger=self.logger)

    def fitWindowing(self, samples, windowSize, wordLength, symbols, normMean,
                     lowerBounding):
        self.sfa.quantization = self.quantizationSupervised
        self.transformation = MFT(windowSize, normMean, lowerBounding,
                                  self.MUSE_Bool)
        sa = {}
        index = 0

        for i in range(samples["Samples"]):
            new_list = getDisjointSequences(samples[i], windowSize, normMean)
            for j in range(len(new_list)):
                sa[index] = new_list[j]
                index += 1

        sa["Samples"] = index
        self.fitTransformed(sa, wordLength, symbols, normMean)

    def fitTransformed(self, samples, wordLength, symbols, normMean):
        length = len(samples[0].data)
        transformedSignal = self.sfa.fitTransformDouble(
            samples, length, symbols, normMean)

        best = self.calcBestCoefficients(samples, transformedSignal)
        self.bestValues = [0 for i in range(min(len(best), wordLength))]
        self.maxWordLength = 0

        for i in range(len(self.bestValues)):
            if best[i][1] != -math.inf:
                self.bestValues[i] = best[i][0]
                self.maxWordLength = max(best[i][0] + 1, self.maxWordLength)

        self.maxWordLength += self.maxWordLength % 2
        self.sfa.maxWordLength = self.maxWordLength
        return self.sfa.transform(samples, transformedSignal)

    def calcBestCoefficients(self, samples, transformedSignal):
        classes = {}
        for i in range(samples["Samples"]):
            if samples[i].label in classes.keys():
                classes[samples[i].label].append(transformedSignal[i])
            else:
                classes[samples[i].label] = [transformedSignal[i]]

        nSamples = len(transformedSignal)
        nClasses = len(classes.keys())
        length = len(transformedSignal[1])

        f = self.getFoneway(length, classes, nSamples, nClasses)
        f_sorted = sorted(f, reverse=True)
        best = []
        inf_index = 0

        for value in f_sorted:
            if value == -math.inf:
                index = f.index(value) + inf_index
                inf_index += 1
            else:
                index = f.index(value)
                best.append([index, value])  #NOTE Changed to indent

        return best

    def getFoneway(self, length, classes, nSamples, nClasses):
        ss_alldata = [0. for i in range(length)]
        sums_args = {}
        keys_class = list(classes.keys())

        for key in keys_class:
            allTs = classes[key]
            sums = [0. for i in range(len(ss_alldata))]
            sums_args[key] = sums
            for ts in allTs:
                for i in range(len(ts)):
                    ss_alldata[i] += ts[i] * ts[i]
                    sums[i] += ts[i]

        square_of_sums_alldata = [0. for i in range(len(ss_alldata))]
        square_of_sums_args = {}
        for key in keys_class:
            # square_of_sums_alldata2 = [0. for i in range(len(ss_alldata))]
            sums = sums_args[key]
            for i in range(len(sums)):
                square_of_sums_alldata[i] += sums[i]
            # square_of_sums_alldata += square_of_sums_alldata2

            squares = [0. for i in range(len(sums))]
            square_of_sums_args[key] = squares
            for i in range(len(sums)):
                squares[i] += sums[i] * sums[i]

        for i in range(len(square_of_sums_alldata)):
            square_of_sums_alldata[i] *= square_of_sums_alldata[i]

        sstot = [0. for i in range(len(ss_alldata))]
        for i in range(len(sstot)):
            sstot[i] = ss_alldata[i] - square_of_sums_alldata[i] / nSamples

        ssbn = [0. for i in range(len(ss_alldata))]  ## sum of squares between
        sswn = [0. for i in range(len(ss_alldata))]  ## sum of squares within

        for key in keys_class:
            sums = square_of_sums_args[key]
            n_samples_per_class = len(classes[key])
            for i in range(len(sums)):
                ssbn[i] += sums[i] / n_samples_per_class

        for i in range(len(square_of_sums_alldata)):
            ssbn[i] += -square_of_sums_alldata[i] / nSamples

        dfbn = nClasses - 1  ## degrees of freedom between
        dfwn = nSamples - nClasses  ## degrees of freedom within
        msb = [0. for i in range(len(ss_alldata))
               ]  ## variance (mean square) between classes
        msw = [0. for i in range(len(ss_alldata))
               ]  ## variance (mean square) within samples
        f = [0. for i in range(len(ss_alldata))]  ## f-ratio

        for i in range(len(sswn)):
            sswn[i] = sstot[i] - ssbn[i]
            msb[i] = ssbn[i] / dfbn
            msw[i] = sswn[i] / dfwn
            f[i] = msb[i] / msw[i] if msw[i] != 0. else -math.inf

        return f

    def quantizationSupervised(self, one_approx):
        signal = [0 for _ in range(min(len(one_approx), len(self.bestValues)))]

        for a in range(len(signal)):
            i = self.bestValues[a]
            b = 0
            for beta in range(self.sfa.bins.shape[1]):
                if one_approx[i] < self.sfa.bins.iloc[i, beta]:
                    break
                else:
                    b += 1
            signal[a] = b - 1

        return signal
Exemplo n.º 8
0
def sfaToWord(word):
    word_string = ""
    for w in word:
        word_string += chr(w + 97)
    return word_string


def sfaToWordList(wordList):
    list_string = ""
    for word in wordList:
        list_string += sfaToWord(word)
        list_string += "; "
    return list_string


train, test, train_labels, test_labels = load("CBF", "\t")

sfa = SFA("EQUI_DEPTH")

sfa.fitWindowing(train, train_labels, windowLength, wordLength, symbols,
                 normMean, True)

sfa.printBins()

for i in range(test.shape[0]):
    wordList = sfa.transformWindowing(test.iloc[i, :])
    print(
        str(i) + "-th transformed time series SFA word " + "\t" +
        sfaToWordList(wordList))
Exemplo n.º 9
0
sys.path.append(os.getcwd()[:-5])

from src.timeseries.TimeSeriesLoader import load
from src.transformation.SFA import *

symbols = 8
wordLength = 16
normMean = False


def sfaToWord(word):
    word_string = ""
    for w in word:
        word_string += chr(w + 97)
    return word_string


train, test, train_labels, test_labels = load("CBF", "\t")

sfa = SFA("EQUI_DEPTH")

sfa.fitTransform(train, train_labels, wordLength, symbols, normMean)

sfa.printBins()

for i in range(test.shape[0]):
    wordList = sfa.transform2(test.iloc[i, :], "null")
    print(
        str(i) + "-th transformed time series SFA word " + "\t" +
        sfaToWord(wordList))
Exemplo n.º 10
0
class BOSS():

    def __init__(self, maxF, maxS, windowLength, normMean):
        self.maxF = maxF
        self.symbols = maxS
        self.windowLength = windowLength
        self.normMean = normMean
        self.signature = None


    def createWords(self, samples):
        if self.signature == None:
            self.signature = SFA("EQUI_DEPTH")
            self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean, True)
            # self.signature.printBins()

        words = []
        for i in range(samples["Samples"]):
            sfaWords = self.signature.transformWindowing(samples[i])
            words_small = []
            for word in sfaWords:
                words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols)))
            words.append(words_small)

        return words


    def createWord(self, numbers, maxF, bits):
        shortsPerLong = int(round(60 / bits))
        to = min([len(numbers), maxF])

        b = 0
        s = 0
        shiftOffset = 1
        for i in range(s, (min(to, shortsPerLong + s))):
            shift = 1
            for j in range(bits):
                if (numbers[i] & shift) != 0:
                    b |= shiftOffset
                shiftOffset <<= 1
                shift <<= 1

        limit = 2147483647
        total = 2147483647 + 2147483648
        while b > limit:
            b = b - total - 1
        return b


    def createBagOfPattern(self, words, samples, f):
        bagOfPatterns = []
        usedBits = int2byte(self.symbols)
        mask = (1 << (usedBits * f)) - 1

        for j in range(len(words)):
            BOP = {}
            lastWord = -9223372036854775808
            for offset in range(len(words[j])):
                word = words[j][offset] & mask
                if word != lastWord:
                    if word in BOP.keys():
                        BOP[word] += 1
                    else:
                        BOP[word] = 1
                lastWord = word
            bagOfPatterns.append(BOP)
        return bagOfPatterns


    def int2byte(self, number):
        log = 0
        if (number & 0xffff0000) != 0:
            number >>= 16
            log = 16
        if number >= 256:
            number >>= 8
            log += 8
        if number >= 16:
            number >>= 4
            log += 4
        if number >= 4:
            number >>= 2
            log += 2
        return log + (number >> 1)


    def bag2dict(self, bag):
        bag_dict = []
        for list in bag:
            new_dict = {}
            for element in list:
                if element in new_dict.keys():
                    new_dict[element] += 1
                else:
                    new_dict[element] = 1
            bag_dict.append(new_dict)
        return bag_dict
Exemplo n.º 11
0
class BOSSVS():

    def __init__(self, maxF, maxS, windowLength, normMean, logger = None):
        self.maxF = maxF
        self.symbols = maxS
        self.windowLength = windowLength
        self.normMean = normMean
        self.signature = None
        logger.Log(self.__dict__, level = 0)
        self.logger = logger


    def createWords(self, samples):
        if self.signature == None:
            self.signature = SFA("EQUI_DEPTH", logger = self.logger)
            self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean,True)
            self.signature.printBins(self.logger)

        words = []
        for i in range(samples["Samples"]):
            sfaWords = self.signature.transformWindowing(samples[i])
            words_small = []
            for word in sfaWords:
                words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols)))
            words.append(words_small)

        return words


    def createWord(self, numbers, maxF, bits):
        shortsPerLong = int(round(60 / bits))
        to = min([len(numbers), maxF])

        b = 0
        s = 0
        shiftOffset = 1
        for i in range(s, (min(to, shortsPerLong + s))):
            shift = 1
            for j in range(bits):
                if (numbers[i] & shift) != 0:
                    b |= shiftOffset
                shiftOffset <<= 1
                shift <<= 1

        limit = 2147483647
        total = 2147483647 + 2147483648
        while b > limit:
            b = b - total - 1
        return b


    def createBagOfPattern(self, words, samples, f):
        bagOfPatterns = []
        usedBits = int2byte(self.symbols)
        mask = (1 << (usedBits * f)) - 1

        for j in range(len(words)):
            BOP = BagOfBigrams(samples[j].label)# {}
            lastWord = -9223372036854775808
            for offset in range(len(words[j])):
                word = words[j][offset] & mask
                if word != lastWord:
                    if word in BOP.bob.keys():
                        BOP.bob[word] += 1
                    else:
                        BOP.bob[word] = 1
                lastWord = word
            bagOfPatterns.append(BOP)
        return bagOfPatterns


    def createTfIdf(self, bagOfPatterns, sampleIndices, uniqueLabels, labels):
        matrix = {}
        for label in uniqueLabels:
            matrix[label] = {}

        for j in sampleIndices:
            label = labels[j]
            for key, value in bagOfPatterns[j].bob.items():
                matrix[label][key] = matrix[label][key] + value if key in matrix[label].keys() else value

        wordInClassFreq = {}
        for key, value in matrix.items():
            for key2, value2 in matrix[key].items():
                wordInClassFreq[key2] = wordInClassFreq[key2] + 1 if key2 in wordInClassFreq.keys() else 1

        for key, value in matrix.items():
            tfIDFs = matrix[key]
            for key2, value2 in tfIDFs.items():
                wordCount = wordInClassFreq.get(key2)
                if (value2 > 0) & (len(uniqueLabels) != wordCount):
                    tfValue = 1. + math.log10(value2)
                    idfValue = math.log10(1. + len(uniqueLabels) / wordCount)
                    tfIdf = tfValue / idfValue
                    tfIDFs[key2] = tfIdf
                else:
                    tfIDFs[key2] = 0.
            matrix[key] = tfIDFs

        matrix = self.normalizeTfIdf(matrix)
        return matrix


    def normalizeTfIdf(self, classStatistics):
        for key, values in classStatistics.items():
            squareSum = 0.
            for key2, value2 in classStatistics[key].items():
                squareSum += value2 ** 2
            squareRoot = math.sqrt(squareSum)
            if squareRoot > 0:
                for key2, value2 in classStatistics[key].items():
                    classStatistics[key][key2] /= squareRoot
        return classStatistics
Exemplo n.º 12
0
from src.timeseries.TimeSeriesLoader import uv_load
from  src.transformation.SFA import *

symbols = 8
wordLength = 16
normMean = False

def sfaToWord(word):
    word_string = ""
    alphabet = "abcdefghijklmnopqrstuv"
    for w in word:
        word_string += alphabet[w]
    return word_string


train, test = uv_load("Gun_Point")

sfa = SFA("EQUI_DEPTH")

sfa.fitTransform(train, wordLength, symbols, normMean)

sfa.printBins()

for i in range(test["Samples"]):
    wordList = sfa.transform2(test[i].data, "null")
    print(str(i) + "-th transformed time series SFA word " + "\t" + sfaToWord(wordList))




Exemplo n.º 13
0
                   (FIXED_PARAMETERS['dataset'], scoreShotgunEnsemble))

    if FIXED_PARAMETERS['test'] == 'Shotgun':
        logger.Log("Test: Shotgun")
        from src.classification.ShotgunClassifier import *
        shotgun = ShotgunClassifier(FIXED_PARAMETERS, logger)
        scoreShotgun = shotgun.eval(train, test)[0]
        logger.Log("%s: %s" % (FIXED_PARAMETERS['dataset'], scoreShotgun))

    ##=========================================================================================
    ## SFA Word Tests
    ##=========================================================================================
    if FIXED_PARAMETERS['test'] == 'SFAWordTest':
        logger.Log("Test: SFAWordTest")
        from src.transformation.SFA import *
        sfa = SFA(FIXED_PARAMETERS["histogram_type"], logger=logger)
        sfa.fitTransform(train, FIXED_PARAMETERS['wordLength'],
                         FIXED_PARAMETERS['symbols'],
                         FIXED_PARAMETERS['normMean'])
        logger.Log(sfa.__dict__)

        for i in range(test["Samples"]):
            wordList = sfa.transform2(test[i].data, "null", str_return=True)
            logger.Log("%s-th transformed TEST time series SFA word \t %s " %
                       (i, wordList))

    if FIXED_PARAMETERS['test'] == 'SFAWordWindowingTest':
        logger.Log("Test: SFAWordWindowingTest")
        from src.transformation.SFA import *

        sfa = SFA(FIXED_PARAMETERS["histogram_type"], logger=logger)