Пример #1
0
 def lookup(self, windowLength, wordLength, symbols, sequence, val):
     normMean = True
     key = (windowLength, wordLength, symbols)
     if key not in self.sfa:
         sfa = SFA("EQUI_DEPTH")
         sfa.fitWindowing(self.raw, windowLength, wordLength, symbols,
                          normMean, True)
         self.sfa[key] = []
         for i in range(self.raw["Samples"]):
             sfa_sr = []
             wordList = sfa.transformWindowing(self.raw[i])
             for word in wordList:
                 sfa_sr.append(self.sfaToDWord(word, symbols))
             #print(str(i) + "-th transformed time series SFA word " + "\t" + sfaToWordList(wordList))
             self.sfa[key].append(sfa_sr)
     for i in range(self.raw["Samples"]):
         for j in range(len(self.sfa[key][i])):
             if sequence in self.sfa[key][i][j]:
                 self.acscores[i, j:(j + windowLength)] += val
Пример #2
0
def sfaToWord(word):
    word_string = ""
    for w in word:
        word_string += chr(w + 97)
    return word_string


def sfaToWordList(wordList):
    list_string = ""
    for word in wordList:
        list_string += sfaToWord(word)
        list_string += "; "
    return list_string


train, test, train_labels, test_labels = load("CBF", "\t")

sfa = SFA("EQUI_DEPTH")

sfa.fitWindowing(train, train_labels, windowLength, wordLength, symbols,
                 normMean, True)

sfa.printBins()

for i in range(test.shape[0]):
    wordList = sfa.transformWindowing(test.iloc[i, :])
    print(
        str(i) + "-th transformed time series SFA word " + "\t" +
        sfaToWordList(wordList))
Пример #3
0
class BOSS():

    def __init__(self, maxF, maxS, windowLength, normMean):
        self.maxF = maxF
        self.symbols = maxS
        self.windowLength = windowLength
        self.normMean = normMean
        self.signature = None


    def createWords(self, samples):
        if self.signature == None:
            self.signature = SFA("EQUI_DEPTH")
            self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean, True)
            # self.signature.printBins()

        words = []
        for i in range(samples["Samples"]):
            sfaWords = self.signature.transformWindowing(samples[i])
            words_small = []
            for word in sfaWords:
                words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols)))
            words.append(words_small)

        return words


    def createWord(self, numbers, maxF, bits):
        shortsPerLong = int(round(60 / bits))
        to = min([len(numbers), maxF])

        b = 0
        s = 0
        shiftOffset = 1
        for i in range(s, (min(to, shortsPerLong + s))):
            shift = 1
            for j in range(bits):
                if (numbers[i] & shift) != 0:
                    b |= shiftOffset
                shiftOffset <<= 1
                shift <<= 1

        limit = 2147483647
        total = 2147483647 + 2147483648
        while b > limit:
            b = b - total - 1
        return b


    def createBagOfPattern(self, words, samples, f):
        bagOfPatterns = []
        usedBits = int2byte(self.symbols)
        mask = (1 << (usedBits * f)) - 1

        for j in range(len(words)):
            BOP = {}
            lastWord = -9223372036854775808
            for offset in range(len(words[j])):
                word = words[j][offset] & mask
                if word != lastWord:
                    if word in BOP.keys():
                        BOP[word] += 1
                    else:
                        BOP[word] = 1
                lastWord = word
            bagOfPatterns.append(BOP)
        return bagOfPatterns


    def int2byte(self, number):
        log = 0
        if (number & 0xffff0000) != 0:
            number >>= 16
            log = 16
        if number >= 256:
            number >>= 8
            log += 8
        if number >= 16:
            number >>= 4
            log += 4
        if number >= 4:
            number >>= 2
            log += 2
        return log + (number >> 1)


    def bag2dict(self, bag):
        bag_dict = []
        for list in bag:
            new_dict = {}
            for element in list:
                if element in new_dict.keys():
                    new_dict[element] += 1
                else:
                    new_dict[element] = 1
            bag_dict.append(new_dict)
        return bag_dict
Пример #4
0
class BOSSVS():

    def __init__(self, maxF, maxS, windowLength, normMean, logger = None):
        self.maxF = maxF
        self.symbols = maxS
        self.windowLength = windowLength
        self.normMean = normMean
        self.signature = None
        logger.Log(self.__dict__, level = 0)
        self.logger = logger


    def createWords(self, samples):
        if self.signature == None:
            self.signature = SFA("EQUI_DEPTH", logger = self.logger)
            self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean,True)
            self.signature.printBins(self.logger)

        words = []
        for i in range(samples["Samples"]):
            sfaWords = self.signature.transformWindowing(samples[i])
            words_small = []
            for word in sfaWords:
                words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols)))
            words.append(words_small)

        return words


    def createWord(self, numbers, maxF, bits):
        shortsPerLong = int(round(60 / bits))
        to = min([len(numbers), maxF])

        b = 0
        s = 0
        shiftOffset = 1
        for i in range(s, (min(to, shortsPerLong + s))):
            shift = 1
            for j in range(bits):
                if (numbers[i] & shift) != 0:
                    b |= shiftOffset
                shiftOffset <<= 1
                shift <<= 1

        limit = 2147483647
        total = 2147483647 + 2147483648
        while b > limit:
            b = b - total - 1
        return b


    def createBagOfPattern(self, words, samples, f):
        bagOfPatterns = []
        usedBits = int2byte(self.symbols)
        mask = (1 << (usedBits * f)) - 1

        for j in range(len(words)):
            BOP = BagOfBigrams(samples[j].label)# {}
            lastWord = -9223372036854775808
            for offset in range(len(words[j])):
                word = words[j][offset] & mask
                if word != lastWord:
                    if word in BOP.bob.keys():
                        BOP.bob[word] += 1
                    else:
                        BOP.bob[word] = 1
                lastWord = word
            bagOfPatterns.append(BOP)
        return bagOfPatterns


    def createTfIdf(self, bagOfPatterns, sampleIndices, uniqueLabels, labels):
        matrix = {}
        for label in uniqueLabels:
            matrix[label] = {}

        for j in sampleIndices:
            label = labels[j]
            for key, value in bagOfPatterns[j].bob.items():
                matrix[label][key] = matrix[label][key] + value if key in matrix[label].keys() else value

        wordInClassFreq = {}
        for key, value in matrix.items():
            for key2, value2 in matrix[key].items():
                wordInClassFreq[key2] = wordInClassFreq[key2] + 1 if key2 in wordInClassFreq.keys() else 1

        for key, value in matrix.items():
            tfIDFs = matrix[key]
            for key2, value2 in tfIDFs.items():
                wordCount = wordInClassFreq.get(key2)
                if (value2 > 0) & (len(uniqueLabels) != wordCount):
                    tfValue = 1. + math.log10(value2)
                    idfValue = math.log10(1. + len(uniqueLabels) / wordCount)
                    tfIdf = tfValue / idfValue
                    tfIDFs[key2] = tfIdf
                else:
                    tfIDFs[key2] = 0.
            matrix[key] = tfIDFs

        matrix = self.normalizeTfIdf(matrix)
        return matrix


    def normalizeTfIdf(self, classStatistics):
        for key, values in classStatistics.items():
            squareSum = 0.
            for key2, value2 in classStatistics[key].items():
                squareSum += value2 ** 2
            squareRoot = math.sqrt(squareSum)
            if squareRoot > 0:
                for key2, value2 in classStatistics[key].items():
                    classStatistics[key][key2] /= squareRoot
        return classStatistics
Пример #5
0
        logger.Log("Test: SFAWordTest")
        from src.transformation.SFA import *
        sfa = SFA(FIXED_PARAMETERS["histogram_type"], logger=logger)
        sfa.fitTransform(train, FIXED_PARAMETERS['wordLength'],
                         FIXED_PARAMETERS['symbols'],
                         FIXED_PARAMETERS['normMean'])
        logger.Log(sfa.__dict__)

        for i in range(test["Samples"]):
            wordList = sfa.transform2(test[i].data, "null", str_return=True)
            logger.Log("%s-th transformed TEST time series SFA word \t %s " %
                       (i, wordList))

    if FIXED_PARAMETERS['test'] == 'SFAWordWindowingTest':
        logger.Log("Test: SFAWordWindowingTest")
        from src.transformation.SFA import *

        sfa = SFA(FIXED_PARAMETERS["histogram_type"], logger=logger)
        sfa.fitWindowing(train, FIXED_PARAMETERS['windowLength'],
                         FIXED_PARAMETERS['wordLength'],
                         FIXED_PARAMETERS['symbols'],
                         FIXED_PARAMETERS['normMean'], True)
        logger.Log(sfa.__dict__)

        for i in range(test["Samples"]):
            wordList = sfa.transformWindowing(test[i], str_return=True)
            logger.Log("%s-th transformed time series SFA word \t %s " %
                       (i, wordList))
except:
    logger.Log("Test and Dataset combo entered is not available")