def create_word_cloud(): """ create the cloud of word with wordcloud_fa module""" # mask.jpg is a image in black and white picture that word will write in that mask_array = np.array(Image.open("mask.jpg")) wc = WordCloudFa(persian_normalize=True, include_numbers=False, mask=mask_array, background_color="white", collocations=False) with open('telegramtxt.txt', 'r') as file: text = file.read() frequencies = wc.process_text(text) avr = 0 count = 0 frequencies = {k: v for k, v in frequencies.items() if v > 1} for k, v in frequencies.items(): count += 1 avr += v avr = avr // count print(f'avr of word count : {avr}') frequencies = {k: v for k, v in frequencies.items() if v > avr} frequencies = { k: v for k, v in sorted( frequencies.items(), key=lambda item: item[1], reverse=True) } word_cloud = wc.generate_from_frequencies(frequencies) image = word_cloud.to_image() image.save('cloud.png')
def create_word_clod(all_cluster_hashtags, path): ''' :param all_cluster_hashtags: something like :list of Counter() ,each Counter have hashtags with the number of that hashtag in that cluster :return: ''' for i, hashtags in enumerate(all_cluster_hashtags): wodcloud = WordCloudFa() wc = wodcloud.generate_from_frequencies( dict(hashtags.most_common()[0:5])) image = wc.to_image() # image.show() image.save('{0}/cluster_{1}.png'.format(path, i))
text = text.translate(str.maketrans(' ', ' ', "\n")) word_list = WordTokenizer().tokenize(text) stop_words = stopwords.words('english') punctuations = list(string.punctuation) words = [ word.strip() for word in word_list if word not in stop_words and word not in stop_words_main and word not in punctuations ] text = "" for x in words: text += x + " " # alice = np.array(Image.open("mask.png")) word_cloud = WordCloudFa(persian_normalize=True, width=2000, height=2000, margin=20, repeat=False, max_words=500) frequencies = word_cloud.process_text(text) wc = word_cloud.generate_from_frequencies(frequencies) fig = plt.figure(figsize=(20, 20), facecolor=None) plt.figure() plt.imshow(word_cloud) plt.axis('off') plt.savefig('WordsCloud.png', dpi=2000, transparent=True) plt.show()