def test_single_word(self): self.file.write("hello") self.file.close() self.word = KeyWords('./temp.csv') self.assertEqual( self.word.get_keywords(), ['hello'], "KeyWords couldn't get keywords from the file correctly.")
def test_remove_keyword_via_file(self): self.word = KeyWords('./temp.csv') self.word.add_keyword('hello') self.word.remove_keyword('Hello') word2 = KeyWords('./temp.csv') self.assertEqual(word2.get_keywords(), [], "KeyWords couldn't remove keywords from a file")
def main(): print() print("This is the Secret Messages project for Treehouse \n") cipher_text = None print("These are the current available ciphers:\n") ciphers = ["Caesar", "Atbash", "Keyword", "Affine"] for cipher in ciphers: print("- {}".format(cipher)) while True: answer = input("Which cipher would you like to use? ") print() if answer.lower() == "atbash": cipher_text = Atbash() break elif answer.lower() == "keyword": keyword = input("what key word would you like to use ") if keyword is None or keyword == "": cipher_text = KeyWords() break else: cipher_text = KeyWords(keyword) break elif answer.lower() == "affine": cipher_text = Affine() break elif answer.lower() == "caesar": key_value = input("Shift Key Value:") if isinstance(int(key_value), int): cipher_text = Caesar(int(key_value)) break else: cipher_text = Caesar() break else: print("not a valid cipher") message = input("{} Cipher,type in your message ".format(answer)) while True: what_to_do = input("Do you want to encrypt ot decrypt? ") if what_to_do == "encrypt": encrypted_message = cipher_text.encrypt(message) break elif what_to_do == "decrypt": encrypted_message = cipher_text.decrypt(message) break else: print("not valid command") print("Your {}ed message is {} ".format(what_to_do, encrypted_message)) regame = input("Do you want to encrypt/decrypt again y/n ") clear() if regame.lower() == "y": main() else: print("Good bye hope you had fun {}ing ".format(what_to_do))
def test_multiple_words(self): self.file.write("hello,world") self.file.close() self.word = KeyWords('./temp.csv') self.assertEqual( self.word.get_keywords(), ['hello', 'world'], "KeyWords couldn't get keywords from the file " "correctly.")
def get_research_kwords(searched_tweets, orig_kwords): filters = [] #['^#.*', '^@.*'] kw = KeyWords(orig_kwords, filters) id_lst = set() for tw in searched_tweets: if not tw.id_str in id_lst: id_lst.add(tw.id_str) kw.add_source(tw.text) if hasattr(tw, 'retweeted_status'): if not tw.retweeted_status.id_str in id_lst: id_lst.add(tw.retweeted_status.id_str) kw.add_source(tw.retweeted_status.text) return kw.calc_top()
def test_good_path(self): self.word = KeyWords('./temp.csv') self.assertEqual( self.word.get_keywords(), [], "KeyWords couldn't access the newly made file correctly.")
def test_init_type_error(self): with self.assertRaises(TypeError): self.word = KeyWords(69)
def test_bad_path(self): with self.assertRaises(FileNotFoundError): self.word = KeyWords('./BAD_PATH.NONEXISTENT')
def setUp(self): self.word = KeyWords() self.file = open('temp.csv', 'w')
class KeyWordUnit(unittest.TestCase): def setUp(self): self.word = KeyWords() self.file = open('temp.csv', 'w') def tearDown(self): self.word = None self.file.close() os.remove('temp.csv') def test_blank_init(self): self.assertEqual(self.word.get_keywords(), [], "KeyWords doesn't initialize empty correctly.") def test_bad_path(self): with self.assertRaises(FileNotFoundError): self.word = KeyWords('./BAD_PATH.NONEXISTENT') def test_init_type_error(self): with self.assertRaises(TypeError): self.word = KeyWords(69) def test_add_type_error(self): with self.assertRaises(TypeError): self.word.add_keyword(69) def test_remove_type_error(self): with self.assertRaises(TypeError): self.word.remove_keyword(69) def test_occurrence_type_error(self): with self.assertRaises(TypeError): self.word.occurrence(69) def test_good_path(self): self.word = KeyWords('./temp.csv') self.assertEqual( self.word.get_keywords(), [], "KeyWords couldn't access the newly made file correctly.") def test_single_word(self): self.file.write("hello") self.file.close() self.word = KeyWords('./temp.csv') self.assertEqual( self.word.get_keywords(), ['hello'], "KeyWords couldn't get keywords from the file correctly.") def test_multiple_words(self): self.file.write("hello,world") self.file.close() self.word = KeyWords('./temp.csv') self.assertEqual( self.word.get_keywords(), ['hello', 'world'], "KeyWords couldn't get keywords from the file " "correctly.") def test_added_keyword(self): self.word.add_keyword('hello') self.assertEqual(self.word.get_keywords(), ['hello'], "KeyWords couldn't add keywords correctly") def test_add_keyword_via_file(self): self.word = KeyWords('./temp.csv') self.word.add_keyword('hello') word2 = KeyWords('./temp.csv') self.assertEqual(word2.get_keywords(), ['hello'], "KeyWords couldn't add keywords correctly to a file") def test_removed_keyword(self): self.word.add_keyword('hello') self.word.remove_keyword('Hello') self.assertEqual(self.word.get_keywords(), [], "KeyWords couldn't remove a keyword correctly") def test_remove_keyword_via_file(self): self.word = KeyWords('./temp.csv') self.word.add_keyword('hello') self.word.remove_keyword('Hello') word2 = KeyWords('./temp.csv') self.assertEqual(word2.get_keywords(), [], "KeyWords couldn't remove keywords from a file") def test_single_occurrence(self): self.word.add_keyword('hello') self.assertEqual( self.word.occurrence( "Hello None of this hello text makes yhello much hElLo"), [('hello', 3)], "Couldn't count all instances of a keyword") def test_multi_occurrence(self): self.word.add_keyword('hello') self.word.add_keyword('world') self.assertEqual( self.word.occurrence( "Hello None world this helloworld text WoRld yhello much hElLo" ), [('hello', 2), ('world', 2)], "Couldn't count all instances of multiple keyword") def test_empty_occurrenceA(self): self.word.add_keyword('hello') self.assertEqual(self.word.occurrence(""), [('hello', 0)], "Couldn't handle empty text") def test_empty_occurrenceB(self): self.assertEqual(self.word.occurrence("This is a fun test text"), [], "Couldn't handle empty KeyWords")
def test_add_keyword_via_file(self): self.word = KeyWords('./temp.csv') self.word.add_keyword('hello') word2 = KeyWords('./temp.csv') self.assertEqual(word2.get_keywords(), ['hello'], "KeyWords couldn't add keywords correctly to a file")
# -*- coding: utf-8 -*- import sys import codecs if sys.stdout.encoding != 'cp850': sys.stdout = codecs.getwriter('cp850')(sys.stdout.buffer, 'strict') if sys.stderr.encoding != 'cp850': sys.stderr = codecs.getwriter('cp850')(sys.stderr.buffer, 'strict') from keywords import KeyWords from nltk.corpus import stopwords with open('script.txt', 'r') as f: data = f.read() with open('transcript_1.txt', 'r', encoding="utf8") as f1: corpus_1 = f1.read() with open('transcript_2.txt', 'r', encoding="utf8") as f2: corpus_2 = f2.read() with open('transcript_3.txt', 'r', encoding="utf8") as f3: corpus_3 = f3.read() stopWords = stopwords.words('english') keyword = KeyWords(corpus=corpus_1, stop_words=stopWords, alpha=0.8) d = keyword.get_keywords(data, n=20) for i in d: print("Keyword : %s \n Score : %f" %(i[0], i[1]))
fdist = FreqDist(all_words) # print(len(fdist)) k = int(len(fdist) / 2.8) top_k_words = fdist.most_common(k) # print(top_k_words[-10:]) top_k_words, _ = zip(*fdist.most_common(k)) top_k_words = set(top_k_words) dfToList = df['text'].tolist() final_list = [] for i in range(len(dfToList)): if i % 9 == 0: with open('testcorpus.txt', 'r', encoding="utf8") as f1: corpus_1 = f1.read() stopWords = stopwords.words('english') keyword = KeyWords(corpus=corpus_1, stop_words=stopWords, alpha=0.8) d = keyword.get_keywords(str(dfToList[i]), n=2) #final_list=[] for pair in d: for kw in word_tokenize(pair[0]): final_list.append(kw) ps = PorterStemmer() for i in range(len(final_list)): top_k_words.add(ps.stem(final_list[i])) # print(len(top_k_words)) # print(type(top_k_words)) df['tokenized'] = df['tokenized'].apply(keep_top_k_words) df['doc_len'] = df['tokenized'].apply(lambda x: len(x))