def processConversation(conversation): global bag_of_words bag_of_words = {} sentences = conversation.split(".") tokenized = PreProcess.tokenize_sentences(sentences) filtered = PreProcess.RemovePunctAndStopWords(tokenized) bag_of_words = FreqDist(word.lower() for word in filtered)
def processConversation(conversation, category): global bag_of_words, documentClass bag_of_words = {} sentences = conversation.split(".") tokenized = PreProcess.tokenize_sentences(sentences) filtered = PreProcess.RemovePunctAndStopWords(tokenized) for word in filtered: if word in bag_of_words: bag_of_words[word] = int(bag_of_words[word]) + 1 else: bag_of_words[word] = 1 #total=len(filtered) #bag_of_words=calculateFrequencies(total) addTermFrequency(bag_of_words)
def processConversation(conversation,category): global bag_of_words,documentClass bag_of_words={} sentences=conversation.split(".") tokenized=PreProcess.tokenize_sentences(sentences) filtered=PreProcess.RemovePunctAndStopWords(tokenized) for word in filtered: if word in bag_of_words: bag_of_words[word]=int(bag_of_words[word])+1 else: bag_of_words[word]=1 total=len(filtered) bag_of_words=calculateFrequencies(total) if category in documentClass: new_dict=merge_two_dicts(documentClass[category],bag_of_words) documentClass[category]=new_dict else: documentClass[category]=bag_of_words