def execute(self): numberOfPages = 1 numberOfTweetsPerPage = 200 counter = 0 cloud = "" txt = "" if self.numberOfTweets > 200: numberOfPages = ceil(self.numberOfTweets / 200) else: numberOfTweetsPerPage = self.numberOfTweets for i in range(numberOfPages): tweets = self.api.user_timeline(screen_name=self.username, count=numberOfTweetsPerPage, page=i) for each in tweets: cloud = each.text cloud = re.sub(r'[A-Za-z@_]*', '', cloud) counter += 1 txt = txt + ' ' + each.text txt = re.sub(r'[A-Za-z@]*', '', txt) twitter_mask = np.array( Image.open(path.join(self.d, "templates/cloud/twitter-logo.jpg"))) stop = [ 'می', 'من', 'که', 'به', 'رو', 'از', 'ولی', 'با', 'یه', 'این', 'نمی', 'هم', 'شد', 'ها', 'اما', 'تو', 'واقعا', 'در', 'نه', 'دارم', 'باید', 'آره', 'برای', 'تا', 'چه', 'کنم', 'بود', 'همه', 'دیگه', 'ای', 'اون', 'تی', 'حالا', 'بی', 'د', 'چرا', 'بابا', 'منم', 'کیه', 'توی', 'نیست', 'چی', 'باشه', 'که', 'بودم', 'می کنم', 'که', 'اینه', 'بهتر', 'داره', 'اینه', 'که', 'کردن', 'می', 'کن', 'بعد', 'دیگه', '', '', '', '' ] wc = WordCloudFa( # font_path='IranNastaliq.ttf', persian_normalize=True, max_words=1000, margin=0, width=3000, height=2500, min_font_size=1, max_font_size=1000, background_color=self.backGround, mask=twitter_mask, include_numbers=False, collocations=False) wc.add_stop_words(stop) wc.generate(txt) directory = 'static/images/' + self.username + '.png' directory = path.join(self.d, directory) image = wc.to_image() image.save(directory)
def save_word_cloud(user_name: str, api): raw_tweets = [] for tweet in tweepy.Cursor(api.user_timeline, id=user_name).items(): raw_tweets.append(tweet.text) # Normalize words tokenizer = WordTokenizer() lemmatizer = Lemmatizer() normalizer = Normalizer() stopwords = set( list( map(lambda w: w.strip(), codecs.open(STOPWORDS_PATH, encoding='utf8')))) words = [] for raw_tweet in raw_tweets: raw_tweet = re.sub(r"[,.;:?!،()]+", " ", raw_tweet) raw_tweet = re.sub('[^\u0600-\u06FF]+', " ", raw_tweet) raw_tweet = re.sub(r'[\u200c\s]*\s[\s\u200c]*', " ", raw_tweet) raw_tweet = re.sub(r'[\u200c]+', " ", raw_tweet) raw_tweet = re.sub(r'[\n]+', " ", raw_tweet) raw_tweet = re.sub(r'[\t]+', " ", raw_tweet) raw_tweet = normalizer.normalize(raw_tweet) raw_tweet = normalizer.character_refinement(raw_tweet) tweet_words = tokenizer.tokenize(raw_tweet) tweet_words = [ lemmatizer.lemmatize(tweet_word).split('#', 1)[0] for tweet_word in tweet_words ] tweet_words = list(filter(lambda x: x not in stopwords, tweet_words)) words.extend(tweet_words) if len(words) == 0: return # Build word_cloud mask = np.array(Image.open(MASK_PATH)) clean_string = ' '.join([str(elem) for elem in words]) clean_string = arabic_reshaper.reshape(clean_string) clean_string = get_display(clean_string) word_cloud = WordCloudFa(persian_normalize=False, mask=mask, colormap=random.sample(cmaps, 1)[0], background_color=BACKGROUND_COLOR, include_numbers=False, font_path=FONT_PATH, no_reshape=True, max_words=1000, min_font_size=2) wc = word_cloud.generate(clean_string) image = wc.to_image() image.save(word_cloud_address)
def show_chat_word_cloud(directory): with codecs.open(os.path.join(directory, 'chats.txt'), 'r', encoding='utf8') as file: print("Start putting words in picture") mask_array = np.array(Image.open("telegram.png")) wordcloud = WordCloudFa(persian_normalize=True, mask=mask_array, collocations=False) stop_words = [] with codecs.open("stop_words.txt", 'r', encoding='utf8') as words: for word in words: stop_words.append(word[:-2]) wordcloud.add_stop_words(stop_words) text = delete_extra_characters(file.read()) wc = wordcloud.generate(text) image = wc.to_image() image.show() image.save(os.path.join(directory, 'wordcloud.png'))
def main(): database = DB("ganjoor.s3db") database.connect() rows = database.select("""select p.cat_id,v.text from poem as p join verse as v on p.id=v.poem_id where p.cat_id=24 """) f = open("verses.txt", "w") for r in rows: f.write(r[1]) f.write('\n') f.close() wc = WordCloudFa(width=1200, height=800) with open('verses.txt', 'r') as file: text = file.read() word_cloud = wc.generate(text) image = word_cloud.to_image() image.show() image.save('hafez.png')
from wordcloud_fa import WordCloudFa import numpy as np from PIL import Image mask = np.array(Image.open("mask.png")) # Passing `no_reshape` parameter for you may cause problem in showing Farsi texts. If your output from the example # is not true, you can remove that parameter wodcloud = WordCloudFa(persian_normalize=True, include_numbers=False, background_color="white", mask=mask, no_reshape=True) # Adding extra stop words: wodcloud.add_stop_words(['the', 'and', 'with', 'by', 'in', 'to', 'to the', 'of', 'it', 'is', 'th', 'its', 'for', '[ ]', '. [', '] [']) text = "" with open('mixed-example.txt', 'r') as file: text = file.read() wc = wodcloud.generate(text) image = wc.to_image() image.show() image.save('masked-example.png')
exit() text = "" print("cleaning") text = " ".join([clean_word(word) for word in raw_text.split()]) ################################# print_stats(text) print("generating cloud") mask_array = np_array(Image.open(MASK)) wc_instance = WordCloudFa( width=900, height=900, background_color=BG_COLOR, font_path=FONT, mask=mask_array, persian_normalize=True, include_numbers=False, stopwords=load_stop_words(), ) word_cloud = wc_instance.generate(text) result_image = word_cloud.to_image() result_image.save(RESULT_FILE_ADD) result_image.show()
#print( "\n----\n".join(tweets_simple) ) to_print = "\n\n".join(tweets_simple) f = open("out/cleaned.txt", "w") f.write(to_print) f.close() ####################################### mask_array = np.array(Image.open("masks/tw.png")) with open('out/cleaned.txt', 'r') as file: text = file.read() wc = WordCloudFa( width=900, height=900, background_color="white", font_path="fonts/font2.ttf", mask=mask_array, persian_normalize=True, include_numbers=False, ) word_cloud = wc.generate(text) image = word_cloud.to_image() image.save(f"out/{idish}.png") image.show()
text = get_tweets_from_user( username) # to get tweets of a specific user by its username break else: print("you should enter f or u!") text = get_tweets(text) text = remove_bad_tweets(text) text = "\n".join(text) text = get_words(text) print(len(text)) text = remove_bad_words(text) print(len(text)) text1 = "\n".join(text) text1 = removeWeirdChars(text1) mask_array = np.array(Image.open(mask_path)) my_wc = WordCloudFa(width=1200, height=1200, background_color=background_color, mask=mask_array, persian_normalize=True, repeat=False, collocations=True) my_wc.add_stop_words_from_file("../stop_words_kian.txt") open("edited_tweets.txt", "w").write(text1) my_wc.generate(text1) image = my_wc.to_image() image.show() filename = datetime.now().strftime("%Y-%m-%d-%H-%M") image.save('Images/{time}_photo.png'.format(time=filename))