Python KeyWords示例，keywords.KeyWords Python示例

示例#1

0

显示文件

 def test_single_word(self):
     self.file.write("hello")
     self.file.close()
     self.word = KeyWords('./temp.csv')
     self.assertEqual(
         self.word.get_keywords(), ['hello'],
         "KeyWords couldn't get keywords from the file correctly.")

示例#2

0

显示文件

 def test_remove_keyword_via_file(self):
     self.word = KeyWords('./temp.csv')
     self.word.add_keyword('hello')
     self.word.remove_keyword('Hello')
     word2 = KeyWords('./temp.csv')
     self.assertEqual(word2.get_keywords(), [],
                      "KeyWords couldn't remove keywords from a file")

示例#3

0

显示文件

文件： secret.py 项目： mihoubnaceri/project-two-treehouse

def main():

    print()
    print("This is the Secret Messages project for Treehouse  \n")
    cipher_text = None
    print("These are the current available ciphers:\n")
    ciphers = ["Caesar", "Atbash", "Keyword", "Affine"]
    for cipher in ciphers:
        print("- {}".format(cipher))
    while True:

        answer = input("Which cipher would you like to use? ")
        print()
        if answer.lower() == "atbash":
            cipher_text = Atbash()
            break
        elif answer.lower() == "keyword":
            keyword = input("what key word would you like to use ")
            if keyword is None or keyword == "":
                cipher_text = KeyWords()
                break
            else:
                cipher_text = KeyWords(keyword)
                break

        elif answer.lower() == "affine":
            cipher_text = Affine()
            break
        elif answer.lower() == "caesar":

            key_value = input("Shift Key Value:")
            if isinstance(int(key_value), int):
                cipher_text = Caesar(int(key_value))
                break
            else:
                cipher_text = Caesar()
                break

        else:
            print("not a valid cipher")

    message = input("{} Cipher,type in your message ".format(answer))
    while True:
        what_to_do = input("Do you want to encrypt ot decrypt? ")
        if what_to_do == "encrypt":
            encrypted_message = cipher_text.encrypt(message)
            break
        elif what_to_do == "decrypt":
            encrypted_message = cipher_text.decrypt(message)
            break
        else:
            print("not valid command")

    print("Your {}ed message is {} ".format(what_to_do, encrypted_message))
    regame = input("Do you want to encrypt/decrypt again y/n ")
    clear()
    if regame.lower() == "y":
        main()
    else:
        print("Good bye hope you had fun {}ing ".format(what_to_do))

示例#4

0

显示文件

 def test_multiple_words(self):
     self.file.write("hello,world")
     self.file.close()
     self.word = KeyWords('./temp.csv')
     self.assertEqual(
         self.word.get_keywords(), ['hello', 'world'],
         "KeyWords couldn't get keywords from the file "
         "correctly.")

示例#5

0

显示文件

def get_research_kwords(searched_tweets, orig_kwords):
    filters = []  #['^#.*', '^@.*']
    kw = KeyWords(orig_kwords, filters)
    id_lst = set()

    for tw in searched_tweets:
        if not tw.id_str in id_lst:
            id_lst.add(tw.id_str)
            kw.add_source(tw.text)
        if hasattr(tw, 'retweeted_status'):
            if not tw.retweeted_status.id_str in id_lst:
                id_lst.add(tw.retweeted_status.id_str)
                kw.add_source(tw.retweeted_status.text)

    return kw.calc_top()

示例#6

0

显示文件

 def test_good_path(self):
     self.word = KeyWords('./temp.csv')
     self.assertEqual(
         self.word.get_keywords(), [],
         "KeyWords couldn't access the newly made file correctly.")

示例#7

0

显示文件

 def test_init_type_error(self):
     with self.assertRaises(TypeError):
         self.word = KeyWords(69)

示例#8

0

显示文件

 def test_bad_path(self):
     with self.assertRaises(FileNotFoundError):
         self.word = KeyWords('./BAD_PATH.NONEXISTENT')

示例#9

0

显示文件

 def setUp(self):
     self.word = KeyWords()
     self.file = open('temp.csv', 'w')

示例#10

0

显示文件

class KeyWordUnit(unittest.TestCase):
    def setUp(self):
        self.word = KeyWords()
        self.file = open('temp.csv', 'w')

    def tearDown(self):
        self.word = None
        self.file.close()
        os.remove('temp.csv')

    def test_blank_init(self):
        self.assertEqual(self.word.get_keywords(), [],
                         "KeyWords doesn't initialize empty correctly.")

    def test_bad_path(self):
        with self.assertRaises(FileNotFoundError):
            self.word = KeyWords('./BAD_PATH.NONEXISTENT')

    def test_init_type_error(self):
        with self.assertRaises(TypeError):
            self.word = KeyWords(69)

    def test_add_type_error(self):
        with self.assertRaises(TypeError):
            self.word.add_keyword(69)

    def test_remove_type_error(self):
        with self.assertRaises(TypeError):
            self.word.remove_keyword(69)

    def test_occurrence_type_error(self):
        with self.assertRaises(TypeError):
            self.word.occurrence(69)

    def test_good_path(self):
        self.word = KeyWords('./temp.csv')
        self.assertEqual(
            self.word.get_keywords(), [],
            "KeyWords couldn't access the newly made file correctly.")

    def test_single_word(self):
        self.file.write("hello")
        self.file.close()
        self.word = KeyWords('./temp.csv')
        self.assertEqual(
            self.word.get_keywords(), ['hello'],
            "KeyWords couldn't get keywords from the file correctly.")

    def test_multiple_words(self):
        self.file.write("hello,world")
        self.file.close()
        self.word = KeyWords('./temp.csv')
        self.assertEqual(
            self.word.get_keywords(), ['hello', 'world'],
            "KeyWords couldn't get keywords from the file "
            "correctly.")

    def test_added_keyword(self):
        self.word.add_keyword('hello')
        self.assertEqual(self.word.get_keywords(), ['hello'],
                         "KeyWords couldn't add keywords correctly")

    def test_add_keyword_via_file(self):
        self.word = KeyWords('./temp.csv')
        self.word.add_keyword('hello')
        word2 = KeyWords('./temp.csv')
        self.assertEqual(word2.get_keywords(), ['hello'],
                         "KeyWords couldn't add keywords correctly to a file")

    def test_removed_keyword(self):
        self.word.add_keyword('hello')
        self.word.remove_keyword('Hello')
        self.assertEqual(self.word.get_keywords(), [],
                         "KeyWords couldn't remove a keyword correctly")

    def test_remove_keyword_via_file(self):
        self.word = KeyWords('./temp.csv')
        self.word.add_keyword('hello')
        self.word.remove_keyword('Hello')
        word2 = KeyWords('./temp.csv')
        self.assertEqual(word2.get_keywords(), [],
                         "KeyWords couldn't remove keywords from a file")

    def test_single_occurrence(self):
        self.word.add_keyword('hello')
        self.assertEqual(
            self.word.occurrence(
                "Hello None of this hello text makes yhello much hElLo"),
            [('hello', 3)], "Couldn't count all instances of a keyword")

    def test_multi_occurrence(self):
        self.word.add_keyword('hello')
        self.word.add_keyword('world')
        self.assertEqual(
            self.word.occurrence(
                "Hello None world this helloworld text WoRld yhello much hElLo"
            ), [('hello', 2), ('world', 2)],
            "Couldn't count all instances of multiple keyword")

    def test_empty_occurrenceA(self):
        self.word.add_keyword('hello')
        self.assertEqual(self.word.occurrence(""), [('hello', 0)],
                         "Couldn't handle empty text")

    def test_empty_occurrenceB(self):
        self.assertEqual(self.word.occurrence("This is a fun test text"), [],
                         "Couldn't handle empty KeyWords")

示例#11

0

显示文件

 def test_add_keyword_via_file(self):
     self.word = KeyWords('./temp.csv')
     self.word.add_keyword('hello')
     word2 = KeyWords('./temp.csv')
     self.assertEqual(word2.get_keywords(), ['hello'],
                      "KeyWords couldn't add keywords correctly to a file")

示例#12

0

显示文件

文件： test.py 项目： suyoufu1/legalPaltform

# -*- coding: utf-8 -*-
import sys
import codecs
if sys.stdout.encoding != 'cp850':
    sys.stdout = codecs.getwriter('cp850')(sys.stdout.buffer, 'strict')
if sys.stderr.encoding != 'cp850':
    sys.stderr = codecs.getwriter('cp850')(sys.stderr.buffer, 'strict')


from keywords import KeyWords
from nltk.corpus import stopwords

with open('script.txt', 'r') as f:
    data = f.read()

with open('transcript_1.txt', 'r', encoding="utf8") as f1:
    corpus_1 = f1.read()

with open('transcript_2.txt', 'r', encoding="utf8") as f2:
    corpus_2 = f2.read()

with open('transcript_3.txt', 'r', encoding="utf8") as f3:
    corpus_3 = f3.read()

stopWords = stopwords.words('english')
keyword = KeyWords(corpus=corpus_1, stop_words=stopWords, alpha=0.8)
d = keyword.get_keywords(data, n=20)
for i in d:
    print("Keyword : %s \n Score : %f" %(i[0], i[1]))

示例#13

0

显示文件

文件： script.py 项目： bluepokeboy/Megathon-Qualcomm-Hackathon-winning-solution

fdist = FreqDist(all_words)
# print(len(fdist))
k = int(len(fdist) / 2.8)
top_k_words = fdist.most_common(k)
# print(top_k_words[-10:])
top_k_words, _ = zip(*fdist.most_common(k))
top_k_words = set(top_k_words)
dfToList = df['text'].tolist()

final_list = []
for i in range(len(dfToList)):
    if i % 9 == 0:
        with open('testcorpus.txt', 'r', encoding="utf8") as f1:
            corpus_1 = f1.read()
        stopWords = stopwords.words('english')
        keyword = KeyWords(corpus=corpus_1, stop_words=stopWords, alpha=0.8)
        d = keyword.get_keywords(str(dfToList[i]), n=2)
        #final_list=[]
        for pair in d:
            for kw in word_tokenize(pair[0]):
                final_list.append(kw)

ps = PorterStemmer()

for i in range(len(final_list)):
    top_k_words.add(ps.stem(final_list[i]))

# print(len(top_k_words))
# print(type(top_k_words))
df['tokenized'] = df['tokenized'].apply(keep_top_k_words)
df['doc_len'] = df['tokenized'].apply(lambda x: len(x))