def __init__(self): ################################################################################################ # Get tokens from RSS feeds feedparser = rssfeedmanager() word_list = feedparser.get_keyword_from_articles() # word_list.append('denial of service') # word_list.append('dictionary attack') # word_list.append('botnet') # word_list = ['ack piggybacking', 'security', 'help', 'apple', 'access list', 'firmware'] print 'Total words : %d' % len(word_list) print('\n') # Remove stopwords related with special characters filtered_words_special = self.removespecialchar(word_list, self.specialStopwords) # Remove blank list filtered_words_blank = self.removeblanklist(filtered_words_special) # Remove stopwords with nltk library filtered_words_rss = [word for word in filtered_words_blank if word not in stopwords.words('english')] print 'Removed stopwords : %d' % len(filtered_words_rss) print('\n') # Remove custom stopwords filtered_words_custom = self.removestopwords(filtered_words_rss, 'english') print 'Removed custom stopwords : %d' % len(filtered_words_custom) print('\n') # Count words counted_words = self.countwords(filtered_words_custom) print 'Counted words : %d' % len(counted_words) print('\n') # Weighted word count weighted_words = self.weightedwords(counted_words, 1000) print 'Weighted words : %d' % len(weighted_words) print('\n') # Search from Google Trend API security_keywords = weighted_words[0:5] top_keywords = [] for keywordsDictionary in security_keywords: top_keywords.append(keywordsDictionary[0]) print 'Top 5 Keywords : %s' % top_keywords print('\n') # Open web browser browseropener.opengoogletrendpage(top_keywords) iteration = 0 while iteration < 10: print(weighted_words[iteration]) iteration += 1
def __init__(self): ################################################################################################ # Get tokens from RSS feeds feedparser = rssfeedmanager() word_list = feedparser.get_keyword_from_articles() # word_list.append('denial of service') # word_list.append('dictionary attack') # word_list.append('botnet') # word_list = ['ack piggybacking', 'security', 'help', 'apple', 'access list', 'firmware'] print 'Total words : %d' % len(word_list) print('\n') # Remove stopwords related with special characters filtered_words_special = self.removespecialchar( word_list, self.specialStopwords) # Remove blank list filtered_words_blank = self.removeblanklist(filtered_words_special) # Remove stopwords with nltk library filtered_words_rss = [ word for word in filtered_words_blank if word not in stopwords.words('english') ] print 'Removed stopwords : %d' % len(filtered_words_rss) print('\n') # Remove custom stopwords filtered_words_custom = self.removestopwords(filtered_words_rss, 'english') print 'Removed custom stopwords : %d' % len(filtered_words_custom) print('\n') # Count words counted_words = self.countwords(filtered_words_custom) print 'Counted words : %d' % len(counted_words) print('\n') # Weighted word count weighted_words = self.weightedwords(counted_words, 1000) print 'Weighted words : %d' % len(weighted_words) print('\n') # Search from Google Trend API security_keywords = weighted_words[0:5] top_keywords = [] for keywordsDictionary in security_keywords: top_keywords.append(keywordsDictionary[0]) print 'Top 5 Keywords : %s' % top_keywords print('\n') # Open web browser browseropener.opengoogletrendpage(top_keywords) iteration = 0 while iteration < 10: print(weighted_words[iteration]) iteration += 1
# model based filtering # model = word2vec.load('./text8.bin') # indexes, metrics = model.cosine('queen') # indexes, metrics = model.analogy(pos=['king', 'man'], neg=['woman'], n=20) # result = model.generate_response(indexes, metrics).tolist() # for item in result: # print (item) # Gethering statements for training data specialStopwords = ['.', ',', '(', ')', '[', ']', ':', '!', '--', '\"'] feedparser = rssfeedmanager() word_list = feedparser.get_keyword_from_articles() filteredWords = [] for filteredWord in word_list: for character in specialStopwords: filteredWord = filteredWord.replace(character, '') lowercase_str = filteredWord.lower() filteredWords.append(lowercase_str) print(filteredWords) str = ' '.join(filteredWords).encode('utf-8').strip() f = open('./sample-phrases', 'w') f.write(str)
# model = word2vec.load('./text8.bin') # indexes, metrics = model.cosine('queen') # indexes, metrics = model.analogy(pos=['king', 'man'], neg=['woman'], n=20) # result = model.generate_response(indexes, metrics).tolist() # for item in result: # print (item) # Gethering statements for training data specialStopwords = ['.', ',', '(', ')', '[', ']', ':', '!', '--', '\"'] feedparser = rssfeedmanager() word_list = feedparser.get_keyword_from_articles() filteredWords = [] for filteredWord in word_list: for character in specialStopwords: filteredWord = filteredWord.replace(character, '') lowercase_str = filteredWord.lower() filteredWords.append(lowercase_str) print (filteredWords) str = ' '.join(filteredWords).encode('utf-8').strip() f = open('./sample-phrases', 'w')