Exemplo n.º 1
0
    def setWordCount(self, category, word, count, verbose=False):
        if not TypeValidator.isString(category):
            raise ValueError(
                'wordRepository::addWord: expected a string for [category], got',
                type(category), '.')
        if not TypeValidator.isString(word):
            raise ValueError(
                'wordRepository::addWord: expected a string for [word], got',
                type(word), ' .')

        if category not in self.data:
            self.addCategory(category, verbose=verbose)

        self.data[category][word] = count
Exemplo n.º 2
0
    def appendInBulk(self,
                     url,
                     category,
                     words,
                     imagesCount,
                     autoAddCategory=True,
                     verbose=False):
        if url in self.urls:
            if verbose:
                print('Url "', url, '" has been already added, skipping')
            return
        else:
            self.urls.add(url)

        if not TypeValidator.isString(category):
            raise ValueError(
                'wordRepository::appendWordsInBulk: expected a string for [category], got',
                type(category), '.')

        if category not in self.data:
            if autoAddCategory:
                self.addCategory(category, verbose=verbose)
            else:
                raise KeyError('wordRepository::appendWordsInBulk: key ',
                               type(category), ' not found.')

        self.articlesCounts[category] += 1
        self.imagesCounts[category] += imagesCount
        for word, count in words.items():
            newCount = self.getWordCount(category, word,
                                         verbose=verbose) + count
            self.setWordCount(category, word, newCount)
Exemplo n.º 3
0
    def getAllCategoryWords(self, category):
        if not TypeValidator.isString(category):
            raise ValueError(
                'wordRepository::getCategoryWords: expected a string for [category], got',
                type(category), '.')

        if category not in self.data:
            raise KeyError('wordRepository::getCategoryWords: key ',
                           type(category), ' not found.')

        return self.data[category]
Exemplo n.º 4
0
    def getWordCount(self, category, word, verbose=False):
        if not TypeValidator.isString(category):
            raise ValueError(
                'wordRepository::getWordCount: expected a string for [category], got',
                type(category), '.')
        if not TypeValidator.isString(word):
            raise ValueError(
                'wordRepository::getWordCount: expected a string for [word], got',
                type(word), ' .')

        if category not in self.data:
            raise KeyError('wordRepository::getWordCount: key ',
                           type(category), ' not found.')

        if word not in self.data[category]:
            if verbose:
                print('wordRepository::getWordCount: didn\'t found "', word,
                      '" in "', category, '", assuming count = 0')
            self.data[category][word] = 0

        return self.data[category][word]
Exemplo n.º 5
0
    def loadData(self, dictOfWords: list, dictOfClasses: dict,
                 tabOfImageInfo: list):  # wczytuje bazę słów, klas i obrazków
        if not TypeValidator.isDict(dictOfClasses):
            raise ValueError(
                'Bad types of input data (1). dictOfWords must be a dict')
        if not TypeValidator.isTabOfDictsOfInts(list(dictOfWords),
                                                len(dictOfClasses)):
            raise ValueError(
                'Bad types of input data (2). dictOfWords must be a tab of dicts of ints and have this same len like tabOfClasses'
            )
        if not TypeValidator.isDictOfInt(dictOfClasses):
            raise ValueError(
                'Bad types of input data (3). dictOfClasses must be a dict of ints'
            )
        if not TypeValidator.isTabOfInts(tabOfImageInfo):
            raise ValueError(
                'Bad types of input data (4). tabOfImageInfo must be a tab of ints'
            )

        self.clear()
        self.__words, self.__classes, self.__images = self.__fillData(
            dictOfWords, dictOfClasses, tabOfImageInfo)
Exemplo n.º 6
0
 def addCategory(self, category, verbose=False):
     if not TypeValidator.isString(category):
         raise ValueError(
             'wordRepository::addCategory: expected a string, got',
             type(category), '.')
     if category in self.data:
         return False
     else:
         self.data[category] = dict()
         self.articlesCounts[category] = 0
         self.imagesCounts[category] = 0
         if verbose:
             print('wordRepository::addCategory: added category "',
                   category, '".')
         return True