Python Dictionary.clean示例

编程语言: Python

命名空间/包名称: models.dictionary

类/类型: Dictionary

方法/功能: clean

hotexamples.com的示例: 2

Python Dictionary.clean - 已找到2个示例。这些是从开源项目中提取的最受好评的models.dictionary.Dictionary.clean现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

all(11)

load_from_file(7)

Dictionary(5)

clean(2)

meaning(2)

message_id(2)

pronounce(2)

put(2)

searchAndAddWord(2)

served(2)

source_lang(2)

tokenize(2)

twitter_user(2)

word(2)

serialize(1)

target_lang(1)

示例#1

显示文件

class Document:
    def __init__(self, name, path):
        super().__init__()
        self.name = name
        self.path = path
        self.dictionary = Dictionary()
        self.readedWords = []
        self.totalWords = 0

    def readWords(self, stopWords=[], headers=[], fastReading=False):
        self.dictionary.clean()

        file = open(self.path, 'r', encoding="ISO-8859-1")

        lines = file.readlines()

        if headers is not None:
            for line in lines:
                for header in headers:
                    if line.startswith(header):
                        lines.remove(line)

        if fastReading is False:
            vectorizer = CountVectorizer(stop_words=stopWords)
            x = vectorizer.fit_transform(lines)
            self.readedWords = vectorizer.get_feature_names()
            self.totalWords = len(self.readedWords)

            for arrayLine in x.toarray():
                for i in range(0, len(arrayLine)):
                    if arrayLine[i] != 0:
                        self.dictionary.searchAndAddWord(
                            CountedWord(self.readedWords[i], arrayLine[i]))
        else:
            words = []
            for line in lines:
                words += line.split()
            self.totalWords = len(words)
            for word in words:
                try:
                    wordInStopList = stopWords.index(word)
                except (ValueError, AttributeError):
                    self.dictionary.searchAndAddWord(CountedWord(word.lower()))

    def clearReadedWords(self):
        self.readedWords = []

示例#2

显示文件

class Group:
    def __init__(self, name, path, type):
        super().__init__()
        self.name = name
        self.path = path
        self.type = type

        self.dictionary = Dictionary()
        self.documents = []

        self.totalCountedWords = 0

    def readDocuments(self, stopWords=[], headers=[], fastReading=False):
        self.dictionary.clean()

        print(f"Start reading group {self.name}, type: {self.type}")
        bar = defaultProgress(len(self.documents)).start()
        i = 0
        for document in self.documents:
            document.readWords(stopWords, headers, fastReading)

            for word in document.dictionary.words:
                self.dictionary.searchAndAddWord(
                    GroupedWord(word.text, self, word.counted, 1))

            document.clearReadedWords()
            i += 1
            bar.update(i)
        self.setTotalCountedWords()
        bar.finish()
        print(f"Done reading group {self.name}")

    def setTotalCountedWords(self):
        self.totalCountedWords = 0
        for word in self.dictionary.words:
            self.totalCountedWords += word.counted

    def __str__(self):
        return f"Group: {self.name}"