示例#1
0
    def __init__(self):

        ################################################################################################

        # Get tokens from RSS feeds
        feedparser = rssfeedmanager()
        word_list = feedparser.get_keyword_from_articles()
        # word_list.append('denial of service')
        # word_list.append('dictionary attack')
        # word_list.append('botnet')
        # word_list = ['ack piggybacking', 'security', 'help', 'apple', 'access list', 'firmware']

        print 'Total words : %d' % len(word_list)
        print('\n')

        # Remove stopwords related with special characters
        filtered_words_special = self.removespecialchar(word_list, self.specialStopwords)

        # Remove blank list
        filtered_words_blank = self.removeblanklist(filtered_words_special)

        # Remove stopwords with nltk library
        filtered_words_rss = [word for word in filtered_words_blank if word not in stopwords.words('english')]
        print 'Removed stopwords : %d' % len(filtered_words_rss)
        print('\n')

        # Remove custom stopwords
        filtered_words_custom = self.removestopwords(filtered_words_rss, 'english')
        print 'Removed custom stopwords : %d' % len(filtered_words_custom)
        print('\n')

        # Count words
        counted_words = self.countwords(filtered_words_custom)
        print 'Counted words : %d' % len(counted_words)
        print('\n')

        # Weighted word count
        weighted_words = self.weightedwords(counted_words, 1000)
        print 'Weighted words : %d' % len(weighted_words)
        print('\n')

        # Search from Google Trend API
        security_keywords = weighted_words[0:5]
        top_keywords = []
        for keywordsDictionary in security_keywords:
            top_keywords.append(keywordsDictionary[0])

        print 'Top 5 Keywords : %s' % top_keywords
        print('\n')

        # Open web browser
        browseropener.opengoogletrendpage(top_keywords)

        iteration = 0
        while iteration < 10:
            print(weighted_words[iteration])
            iteration += 1
示例#2
0
    def __init__(self):

        ################################################################################################

        # Get tokens from RSS feeds
        feedparser = rssfeedmanager()
        word_list = feedparser.get_keyword_from_articles()
        # word_list.append('denial of service')
        # word_list.append('dictionary attack')
        # word_list.append('botnet')
        # word_list = ['ack piggybacking', 'security', 'help', 'apple', 'access list', 'firmware']

        print 'Total words : %d' % len(word_list)
        print('\n')

        # Remove stopwords related with special characters
        filtered_words_special = self.removespecialchar(
            word_list, self.specialStopwords)

        # Remove blank list
        filtered_words_blank = self.removeblanklist(filtered_words_special)

        # Remove stopwords with nltk library
        filtered_words_rss = [
            word for word in filtered_words_blank
            if word not in stopwords.words('english')
        ]
        print 'Removed stopwords : %d' % len(filtered_words_rss)
        print('\n')

        # Remove custom stopwords
        filtered_words_custom = self.removestopwords(filtered_words_rss,
                                                     'english')
        print 'Removed custom stopwords : %d' % len(filtered_words_custom)
        print('\n')

        # Count words
        counted_words = self.countwords(filtered_words_custom)
        print 'Counted words : %d' % len(counted_words)
        print('\n')

        # Weighted word count
        weighted_words = self.weightedwords(counted_words, 1000)
        print 'Weighted words : %d' % len(weighted_words)
        print('\n')

        # Search from Google Trend API
        security_keywords = weighted_words[0:5]
        top_keywords = []
        for keywordsDictionary in security_keywords:
            top_keywords.append(keywordsDictionary[0])

        print 'Top 5 Keywords : %s' % top_keywords
        print('\n')

        # Open web browser
        browseropener.opengoogletrendpage(top_keywords)

        iteration = 0
        while iteration < 10:
            print(weighted_words[iteration])
            iteration += 1
示例#3
0
# model based filtering

# model = word2vec.load('./text8.bin')

# indexes, metrics = model.cosine('queen')
# indexes, metrics = model.analogy(pos=['king', 'man'], neg=['woman'], n=20)
# result = model.generate_response(indexes, metrics).tolist()

# for item in result:
#     print (item)

# Gethering statements for training data

specialStopwords = ['.', ',', '(', ')', '[', ']', ':', '!', '--', '\"']

feedparser = rssfeedmanager()
word_list = feedparser.get_keyword_from_articles()

filteredWords = []
for filteredWord in word_list:
    for character in specialStopwords:
        filteredWord = filteredWord.replace(character, '')
        lowercase_str = filteredWord.lower()
    filteredWords.append(lowercase_str)

print(filteredWords)

str = ' '.join(filteredWords).encode('utf-8').strip()

f = open('./sample-phrases', 'w')
f.write(str)
示例#4
0
# model = word2vec.load('./text8.bin')

# indexes, metrics = model.cosine('queen')
# indexes, metrics = model.analogy(pos=['king', 'man'], neg=['woman'], n=20)
# result = model.generate_response(indexes, metrics).tolist()

# for item in result:
#     print (item)



# Gethering statements for training data

specialStopwords = ['.', ',', '(', ')', '[', ']', ':', '!', '--', '\"']

feedparser = rssfeedmanager()
word_list = feedparser.get_keyword_from_articles()

filteredWords = []
for filteredWord in word_list:
    for character in specialStopwords:
        filteredWord = filteredWord.replace(character, '')
        lowercase_str = filteredWord.lower()
    filteredWords.append(lowercase_str)


print (filteredWords)

str = ' '.join(filteredWords).encode('utf-8').strip()

f = open('./sample-phrases', 'w')