def createDocumentVector(fileObj, tfRawD): fileContent = FileReader(fileObj).content fileContent = ''.join(map(lambda x: ' ' if x != '.' and ord(x) < 48 or 57 < ord(x) < 65 else x, list(fileContent))) words = fileContent.strip().split() length = len(words) uniqueWordsInDoc = set() for word in words: word = modify_word(word) if word: uniqueWordsInDoc.add(word) if word not in tfRawD: tfRawD[word] = 1 else: tfRawD[word] += 1 return uniqueWordsInDoc, words, length, tfRawD
def createTopicVector(fileObj, T, tfRaw): fileContent = FileReader(fileObj).content fileContent = ''.join(map(lambda x: ' ' if x != '.' and ord(x) < 48 or 57 < ord(x) < 65 else x, list(fileContent))) words = fileContent.strip().split() length = len(words) DVector = set() T.extend(words) for word in words: word = modify_word(word) if word: DVector.add(word) if word not in tfRaw: tfRaw[word] = 1 else: tfRaw[word] += 1 return DVector, length
def createDocumentVector(fileObj, tfRawD): fileContent = FileReader(fileObj).content fileContent = ''.join( map( lambda x: ' ' if x != '.' and ord(x) < 48 or 57 < ord(x) < 65 else x, list(fileContent))) words = fileContent.strip().split() length = len(words) uniqueWordsInDoc = set() for word in words: word = modify_word(word) if word: uniqueWordsInDoc.add(word) if word not in tfRawD: tfRawD[word] = 1 else: tfRawD[word] += 1 return uniqueWordsInDoc, words, length, tfRawD
def createTopicVector(fileObj, T, tfRaw): fileContent = FileReader(fileObj).content fileContent = ''.join( map( lambda x: ' ' if x != '.' and ord(x) < 48 or 57 < ord(x) < 65 else x, list(fileContent))) words = fileContent.strip().split() length = len(words) DVector = set() T.extend(words) for word in words: word = modify_word(word) if word: DVector.add(word) if word not in tfRaw: tfRaw[word] = 1 else: tfRaw[word] += 1 return DVector, length