Exemplo n.º 1
0
def create_word_cloud():
    """ create the cloud of word with wordcloud_fa module"""
    # mask.jpg is a image in black and white picture that word will write in that
    mask_array = np.array(Image.open("mask.jpg"))
    wc = WordCloudFa(persian_normalize=True,
                     include_numbers=False,
                     mask=mask_array,
                     background_color="white",
                     collocations=False)
    with open('telegramtxt.txt', 'r') as file:
        text = file.read()
    frequencies = wc.process_text(text)
    avr = 0
    count = 0
    frequencies = {k: v for k, v in frequencies.items() if v > 1}
    for k, v in frequencies.items():
        count += 1
        avr += v
    avr = avr // count
    print(f'avr of word count : {avr}')
    frequencies = {k: v for k, v in frequencies.items() if v > avr}
    frequencies = {
        k: v
        for k, v in sorted(
            frequencies.items(), key=lambda item: item[1], reverse=True)
    }
    word_cloud = wc.generate_from_frequencies(frequencies)
    image = word_cloud.to_image()
    image.save('cloud.png')
Exemplo n.º 2
0
def save_word_cloud(user_name: str, api):
    raw_tweets = []
    for tweet in tweepy.Cursor(api.user_timeline, id=user_name).items():
        raw_tweets.append(tweet.text)

    # Normalize words
    tokenizer = WordTokenizer()
    lemmatizer = Lemmatizer()
    normalizer = Normalizer()
    stopwords = set(
        list(
            map(lambda w: w.strip(),
                codecs.open(STOPWORDS_PATH, encoding='utf8'))))
    words = []
    for raw_tweet in raw_tweets:
        raw_tweet = re.sub(r"[,.;:?!،()]+", " ", raw_tweet)
        raw_tweet = re.sub('[^\u0600-\u06FF]+', " ", raw_tweet)
        raw_tweet = re.sub(r'[\u200c\s]*\s[\s\u200c]*', " ", raw_tweet)
        raw_tweet = re.sub(r'[\u200c]+', " ", raw_tweet)
        raw_tweet = re.sub(r'[\n]+', " ", raw_tweet)
        raw_tweet = re.sub(r'[\t]+', " ", raw_tweet)
        raw_tweet = normalizer.normalize(raw_tweet)
        raw_tweet = normalizer.character_refinement(raw_tweet)
        tweet_words = tokenizer.tokenize(raw_tweet)
        tweet_words = [
            lemmatizer.lemmatize(tweet_word).split('#', 1)[0]
            for tweet_word in tweet_words
        ]
        tweet_words = list(filter(lambda x: x not in stopwords, tweet_words))
        words.extend(tweet_words)

    if len(words) == 0:
        return

    # Build word_cloud
    mask = np.array(Image.open(MASK_PATH))
    clean_string = ' '.join([str(elem) for elem in words])
    clean_string = arabic_reshaper.reshape(clean_string)
    clean_string = get_display(clean_string)
    word_cloud = WordCloudFa(persian_normalize=False,
                             mask=mask,
                             colormap=random.sample(cmaps, 1)[0],
                             background_color=BACKGROUND_COLOR,
                             include_numbers=False,
                             font_path=FONT_PATH,
                             no_reshape=True,
                             max_words=1000,
                             min_font_size=2)
    wc = word_cloud.generate(clean_string)
    image = wc.to_image()
    image.save(word_cloud_address)
def create_word_clod(all_cluster_hashtags, path):
    '''

    :param all_cluster_hashtags: something like :list of Counter() ,each Counter have hashtags with
     the number of that hashtag in that cluster

    :return:
    '''
    for i, hashtags in enumerate(all_cluster_hashtags):
        wodcloud = WordCloudFa()
        wc = wodcloud.generate_from_frequencies(
            dict(hashtags.most_common()[0:5]))
        image = wc.to_image()
        # image.show()
        image.save('{0}/cluster_{1}.png'.format(path, i))
Exemplo n.º 4
0
def get_image(UserID, chat_id, message, bot, update):
    if len(UserID) < 2:
        message(chat_id=chat_id.chat_id, text="آی دی نامعتبر است!")
    else:
        message(chat_id=chat_id.chat_id, text="در حال اتصال به اینستاگرام...")
        allword = query.start(UserID, chat_id, message, bot, update)
        allword_edited = removeWeirdChars(allword)
        my_wordcloud = WordCloudFa(font_path="Sahel.ttf",
                                   background_color="white",
                                   width=720,
                                   height=1280,
                                   margin=2).generate(allword_edited)

        image = my_wordcloud.to_image()
        saved_dir = 'analysis/' + str(UserID) + '.jpg'
        image.save(saved_dir)
        message(chat_id=chat_id.chat_id, text="درحال ارسال عکس...")
        return saved_dir
    def execute(self):

        numberOfPages = 1
        numberOfTweetsPerPage = 200

        counter = 0
        cloud = ""
        txt = ""

        if self.numberOfTweets > 200:
            numberOfPages = ceil(self.numberOfTweets / 200)
        else:
            numberOfTweetsPerPage = self.numberOfTweets
        for i in range(numberOfPages):
            tweets = self.api.user_timeline(screen_name=self.username,
                                            count=numberOfTweetsPerPage,
                                            page=i)
            for each in tweets:
                cloud = each.text
                cloud = re.sub(r'[A-Za-z@_]*', '', cloud)
                counter += 1
                txt = txt + ' ' + each.text

        txt = re.sub(r'[A-Za-z@]*', '', txt)

        twitter_mask = np.array(
            Image.open(path.join(self.d, "templates/cloud/twitter-logo.jpg")))

        stop = [
            'می', 'من', 'که', 'به', 'رو', 'از', 'ولی', 'با', 'یه', 'این',
            'نمی', 'هم', 'شد', 'ها', 'اما', 'تو', 'واقعا', 'در', 'نه', 'دارم',
            'باید', 'آره', 'برای', 'تا', 'چه', 'کنم', 'بود', 'همه', 'دیگه',
            'ای', 'اون', 'تی', 'حالا', 'بی', 'د', 'چرا', 'بابا', 'منم', 'کیه',
            'توی', 'نیست', 'چی', 'باشه', 'که', 'بودم', 'می کنم', 'که', 'اینه',
            'بهتر', 'داره', 'اینه', 'که', 'کردن', 'می', 'کن', 'بعد', 'دیگه',
            '', '', '', ''
        ]

        wc = WordCloudFa(
            # font_path='IranNastaliq.ttf',
            persian_normalize=True,
            max_words=1000,
            margin=0,
            width=3000,
            height=2500,
            min_font_size=1,
            max_font_size=1000,
            background_color=self.backGround,
            mask=twitter_mask,
            include_numbers=False,
            collocations=False)

        wc.add_stop_words(stop)
        wc.generate(txt)

        directory = 'static/images/' + self.username + '.png'
        directory = path.join(self.d, directory)
        image = wc.to_image()
        image.save(directory)
def show_chat_word_cloud(directory):
    with codecs.open(os.path.join(directory, 'chats.txt'),
                     'r',
                     encoding='utf8') as file:
        print("Start putting words in picture")
        mask_array = np.array(Image.open("telegram.png"))
        wordcloud = WordCloudFa(persian_normalize=True,
                                mask=mask_array,
                                collocations=False)
        stop_words = []
        with codecs.open("stop_words.txt", 'r', encoding='utf8') as words:
            for word in words:
                stop_words.append(word[:-2])
        wordcloud.add_stop_words(stop_words)
        text = delete_extra_characters(file.read())
        wc = wordcloud.generate(text)
        image = wc.to_image()
        image.show()
        image.save(os.path.join(directory, 'wordcloud.png'))
Exemplo n.º 7
0
def main():
    database = DB("ganjoor.s3db")
    database.connect()
    rows = database.select("""select p.cat_id,v.text 
                                from poem as p join verse as v on p.id=v.poem_id 
                                where p.cat_id=24
                            """)

    f = open("verses.txt", "w")
    for r in rows:
        f.write(r[1])
        f.write('\n')
    f.close()

    wc = WordCloudFa(width=1200, height=800)
    with open('verses.txt', 'r') as file:
        text = file.read()
    word_cloud = wc.generate(text)
    image = word_cloud.to_image()
    image.show()
    image.save('hafez.png')
 def __init__(self,
              mask=None,
              size=900,
              stop_words_addr=default_stop_words_path,
              mask_addr=None):
     self.hazm_normalizer = hazm.Normalizer()
     self.parsivar_normalizer = parsivar.Normalizer()
     self.stemmer = hazm.Stemmer()
     self.lemmatizer = hazm.Lemmatizer()
     self.stop_words = set(hazm.stopwords_list(stop_words_addr))
     mask = np.array(
         Image.open(mask_addr)) if mask_addr is not None else None
     self.generator = WordCloud(width=size,
                                height=size,
                                include_numbers=False,
                                persian_normalize=False,
                                collocations=True,
                                mask=mask,
                                background_color='white')
Exemplo n.º 9
0
from wordcloud_fa import WordCloudFa

wodcloud = WordCloudFa(no_reshape=True,
                       persian_normalize=True,
                       include_numbers=False,
                       collocations=False,
                       width=800,
                       height=400)
text = ""
with open('persian-example.txt', 'r') as file:
    text = file.read()
wc = wodcloud.generate(text)
image = wc.to_image()
image.show()
image.save('persian-example.png')
        def remove_punctuations(text):
            translator = str.maketrans('', '', punctuations_list)
            return text.translate(translator)

        words = remove_punctuations(words)
        words = re.sub(
            'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))',
            '',
            words,
            flags=re.MULTILINE)
        words = re.sub(r"@(\w+)", ' ', words, flags=re.MULTILINE)
        wordcloud = WordCloudFa(persian_normalize=True,
                                stopwords=list(STOPWORDS) +
                                hazm.stopwords_list(),
                                include_numbers=False,
                                background_color='white',
                                width=700,
                                height=500)
        frequencies = wordcloud.process_text(words)
        wc = wordcloud.generate_from_frequencies(frequencies)
        image = wc.to_image()
        st.image(image)

        # Dataframe
        st.subheader('**Data**')
        st.write(data)
        # Random Tweet
        col1, col2 = st.beta_columns(2)
        with col1:
            st.markdown('')
Exemplo n.º 11
0
from wordcloud_fa import WordCloudFa

wodcloud = WordCloudFa(persian_normalize=True,
                       include_numbers=False,
                       background_color="white")
text = ""
with open('mixed-example.txt', 'r') as file:
    text = file.read()
wc = wodcloud.generate(text)
image = wc.to_image()
image.show()
image.save('mixed-example.png')
Exemplo n.º 12
0
def make_wordcloud(font_path=None,
                   text_path='tweets/result.txt',
                   bw_img="input/southpark1.png",
                   img="input/southpark2.png",
                   add_stopwords=ADD_STOPWORDS,
                   bg_color='black',
                   include_numbers=True,
                   max_words=500,
                   random_color=False):
    assert type(add_stopwords) == type(list())
    # get data directory (using getcwd() is needed to support running example in generated IPython notebook)
    d = path.dirname(__file__) if "__file__" in locals() else getcwd()

    # load text
    text = open(path.join(d, text_path), encoding='utf-8').read()

    # load image. This has been modified in gimp to be brighter and have more saturation.
    image = np.array(Image.open(path.join(d, bw_img)))

    # subsample by factor of 3. Very lossy but for a wordcloud we don't really care.
    mask_color = np.array(Image.open(path.join(d, img)))

    # create mask  white is "masked out"
    twitter_mask = image.copy()
    twitter_mask[twitter_mask.sum(axis=2) == 0] = 255

    # some finesse: we enforce boundaries between colors so they get less washed out.
    # For that we do some edge detection in the image
    edges = np.mean([
        gaussian_gradient_magnitude(mask_color[:, :, i] / 255., 2)
        for i in range(3)
    ],
                    axis=0)
    twitter_mask[edges > .01] = 255

    # Add another stopword
    stop_words = stopwords.words('english')
    ##stop_words_fa = stopwords.words('farsi')
    for word in add_stopwords:
        STOPWORDS.add(Normalizer().normalize(word))
    stop_words.extend(STOPWORDS)
    stop_words.extend(EN_STOPWORDS)
    stop_words = set(stop_words)

    # Getting rid of the stopwords
    text_list = [word for word in text.split() if word not in stop_words]

    # Converting the list to a text
    text = ' '.join([str(elem) for elem in text_list])
    text.replace('\u200c', '')

    # Generate a word cloud image

    wordcloud = WordCloudFa(font_path=font_path,
                            persian_normalize=True,
                            include_numbers=include_numbers,
                            max_words=max_words,
                            stopwords=stop_words,
                            margin=0,
                            width=3000,
                            height=3000,
                            min_font_size=1,
                            max_font_size=2300,
                            random_state=True,
                            background_color=bg_color,
                            mask=twitter_mask,
                            relative_scaling=0,
                            repeat=True).generate(text)

    if not random_color:
        image_colors = ImageColorGenerator(mask_color)
        wordcloud.recolor(color_func=image_colors)
    image = wordcloud.to_image()
    image.show()
    image.save('output/twitter_mask.png')
Exemplo n.º 13
0
    exit()

text = ""

print("cleaning")
text = " ".join([clean_word(word) for word in raw_text.split()])

#################################

print_stats(text)

print("generating cloud")
mask_array = np_array(Image.open(MASK))

wc_instance = WordCloudFa(
    width=900,
    height=900,
    background_color=BG_COLOR,
    font_path=FONT,
    mask=mask_array,
    persian_normalize=True,
    include_numbers=False,
    stopwords=load_stop_words(),
)

word_cloud = wc_instance.generate(text)

result_image = word_cloud.to_image()
result_image.save(RESULT_FILE_ADD)
result_image.show()
Exemplo n.º 14
0
#print( "\n----\n".join(tweets_simple) )

to_print = "\n\n".join(tweets_simple)

f = open("out/cleaned.txt", "w")
f.write(to_print)
f.close()

#######################################

mask_array = np.array(Image.open("masks/tw.png"))

with open('out/cleaned.txt', 'r') as file:
    text = file.read()

    wc = WordCloudFa(
        width=900,
        height=900,
        background_color="white",
        font_path="fonts/font2.ttf",
        mask=mask_array,
        persian_normalize=True,
        include_numbers=False,
    )

    word_cloud = wc.generate(text)

    image = word_cloud.to_image()
    image.save(f"out/{idish}.png")
    image.show()
class WordCloudGen:
    """Word Cloud Generator"""
    def __init__(self,
                 mask=None,
                 size=900,
                 stop_words_addr=default_stop_words_path,
                 mask_addr=None):
        self.hazm_normalizer = hazm.Normalizer()
        self.parsivar_normalizer = parsivar.Normalizer()
        self.stemmer = hazm.Stemmer()
        self.lemmatizer = hazm.Lemmatizer()
        self.stop_words = set(hazm.stopwords_list(stop_words_addr))
        mask = np.array(
            Image.open(mask_addr)) if mask_addr is not None else None
        self.generator = WordCloud(width=size,
                                   height=size,
                                   include_numbers=False,
                                   persian_normalize=False,
                                   collocations=True,
                                   mask=mask,
                                   background_color='white')

    def get_word_cloud(self, msgs):
        return self.generator.generate_from_text(
            self._preprocess(msgs)).to_image()

    def _preprocess(self, msgs):
        words = []
        for msg in msgs:
            msg = re.sub(r"https?:\/\/\S*", "",
                         msg)  # https://github.com/MasterScrat/Chatistics
            msg = re.sub(r"\@\S*", "", msg)
            msg = self._normalize(msg)
            msg = msg.replace("ؤ", "و")
            msg = msg.replace("أ", "ا")
            msg = self._remove_punctuations(msg)
            msg = self._remove_weird_chars(msg)
            msg = self._remove_postfixes(msg)
            for word in msg.split():
                if self._is_stop_word(word):
                    word = ""
                if word:
                    # word = self.stemmer.stem(word)
                    word = word.replace(u"\u200c", "")
                    words.append(word)
        return " ".join(words)

    def _normalize(self, text):
        text = self.hazm_normalizer.normalize(text)
        text = self.parsivar_normalizer.normalize(text)
        return text

    def _is_stop_word(self, word):
        if word in {
                "بابا", "کار", "وقت", "دست", "خدا", "انقد", " چقد", "نیس",
                "جدی", "ینی", "چقد", "واسه", "دگ", "اینقد", "gt", "lt"
        }:
            return True
        if word.startswith("در"):
            modified_word = word[3:]
            if self._is_stop_verb(modified_word):
                return True
        if word.startswith("ب"):
            modified_word = word[2:]
            if self._is_stop_verb(modified_word):
                return True
        if word in self.stop_words:
            return True
        if self.stemmer.stem(word) in self.stop_words:
            return True
        if self._is_stop_verb(word):
            return True
        if self._is_stop_verb(word.replace("می", "می\u200c",
                                           1)):  # میمیرد -> می‌میرد
            return True
        if (word[0] == "ب" or word[0] == "ن"):  # برو، نره
            if word[1:] in self.stop_words:
                return True
            if word[-1] == "ه":
                if word[1:-1] + "ود" in self.stop_words:
                    return True
        if word[-1] == "ه":
            modified_word = word[:-1] + "د"  # داره
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
            if modified_word in self.stop_words:
                return True
            modified_word = word[:-1] + "ود"  # می‌ره
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
            modified_word = word + "د"  # می‌ده
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
            modified_word = word[:-1]  # رفته
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
        if word[-1] == "ن":
            modified_word = word + "د"  # داره
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
        if "میا" in word:
            modified_word = word.replace("میا", "می\u200cآی")
            if self._is_stop_verb(modified_word):
                return True
        if "گ" in word:
            modified_word = word.replace("گ", "گوی")
            modified_word = modified_word.replace("گویه", "گوید")
            modified_word = modified_word.replace("گوین", "گویند")
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
        if word[-1] == "ا":
            modified_word = word[:-1] + "ی"  # حتا -> حتی
            if modified_word in self.stop_words:
                return True
        if "ا" in word:
            modified_word = word[::-1].replace("ا", "اه\u200c", 1)[::-1]
            if modified_word in self.stop_words:
                return True
            if self.stemmer.stem(modified_word) in self.stop_words:
                return True
            modified_word = word[::-1].replace("ا", "یاه\u200c", 1)[::-1]
            if modified_word in self.stop_words:
                return True
            if self.stemmer.stem(modified_word) in self.stop_words:
                return True
        if word[-1] == "ن":
            modified_word = word[:-1]  # حتمن -> حتماً
            if self.stemmer.stem(modified_word) in self.stop_words:
                return True
            modified_word = word[:-1] + "ا"  # حتمن -> حتماً
            if modified_word in self.stop_words:
                return True
            modified_word = word[:-1] + "لا"  # اصن -> اصلاً
            if modified_word in self.stop_words:
                return True
        if word[-1] == "و":  # خودشو -> خودش را
            modified_word = word[:-1]
            if modified_word in self.stop_words:
                return True
            if self.stemmer.stem(modified_word) in self.stop_words:
                return True
        if "و" in word:
            modified_word = word.replace("و", "ا")  # همون -> همان
            modified_word = modified_word.replace("اا", "آ")  # اومده -> آمده
            if modified_word in self.stop_words:
                return True
            if self.stemmer.stem(modified_word) in self.stop_words:
                return True
            if self._is_stop_verb(modified_word):  # نمیدونم -> نمی‌دانم
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
            if word[-1] == "ا":  # خودشونو -> خودشان را
                modified_word = word[:-1]
                if modified_word in self.stop_words:
                    return True
                if self.stemmer.stem(modified_word) in self.stop_words:
                    return True
        if "خوا" in word:  # می‌خوام
            modified_word = word.replace("خوا", "خواه", 1)
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
        if "خا" in word:  # می‌خام
            modified_word = word.replace("خا", "خواه", 1)
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
        if "تو" in word:  # می‌خام
            modified_word = word.replace("تو", "اوت", 1)
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
        if "ر" in word:  # می‌رم
            modified_word = word.replace("ر", "رو", 1)
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
        if "بود" in word:  # رفته بودم
            modified_word = re.sub("ه[\u200c ]بود.*", "", word)
            if self._is_stop_verb(modified_word):
                return True
            if self._is_stop_verb(modified_word.replace("می", "می\u200c", 1)):
                return True
        if word == "فک":
            modified_word = "فکر"
            if modified_word in self.stop_words:
                return True
        return False

    def _is_stop_verb(self, word):
        lem = self.lemmatizer.lemmatize(word).split("#")
        if len(lem) == 2:
            if lem[0] in self.stop_words or lem[1] in self.stop_words:
                return True
        return False

    @staticmethod
    def _remove_punctuations(text):
        return punctuation_patterns.sub(" ", text)

    @staticmethod
    def _remove_weird_chars(text):
        return weird_patterns.sub(" ", text)

    @staticmethod
    def _remove_postfixes(text):
        text = text.replace("ٔ ", " ")
        text = text.replace(" ی ", " ")
        text = text.replace(" ها ", " ")
        text = text.replace("‌ها ", " ")
        text = text.replace(" های ", " ")
        text = text.replace("‌های ", " ")
        return text
Exemplo n.º 16
0
from wordcloud_fa import WordCloudFa
import numpy as np
from PIL import Image

mask = np.array(Image.open("mask.png"))

# Passing `no_reshape` parameter for you may cause problem in showing Farsi texts. If your output from the example
# is not true, you can remove that parameter
wodcloud = WordCloudFa(persian_normalize=True, include_numbers=False, background_color="white", mask=mask, no_reshape=True)

# Adding extra stop words:
wodcloud.add_stop_words(['the', 'and', 'with', 'by', 'in', 'to', 'to the', 'of', 'it', 'is', 'th', 'its', 'for', '[ ]', '. [', '] ['])

text = ""
with open('mixed-example.txt', 'r') as file:
    text = file.read()
wc = wodcloud.generate(text)
image = wc.to_image()
image.show()
image.save('masked-example.png')
Exemplo n.º 17
0
from wordcloud_fa import WordCloudFa

wodcloud = WordCloudFa(persian_normalize=True)
text = ""
with open('persian-example.txt', 'r') as file:
    text = file.read()
wc = wodcloud.generate(text)
image = wc.to_image()
image.show()
image.save('persian-example.png')
Exemplo n.º 18
0
text = text.translate(str.maketrans(' ', ' ', "\n"))

word_list = WordTokenizer().tokenize(text)
stop_words = stopwords.words('english')
punctuations = list(string.punctuation)
words = [
    word.strip() for word in word_list if word not in stop_words
    and word not in stop_words_main and word not in punctuations
]

text = ""
for x in words:
    text += x + " "

# alice = np.array(Image.open("mask.png"))

word_cloud = WordCloudFa(persian_normalize=True,
                         width=2000,
                         height=2000,
                         margin=20,
                         repeat=False,
                         max_words=500)
frequencies = word_cloud.process_text(text)
wc = word_cloud.generate_from_frequencies(frequencies)
fig = plt.figure(figsize=(20, 20), facecolor=None)
plt.figure()
plt.imshow(word_cloud)
plt.axis('off')
plt.savefig('WordsCloud.png', dpi=2000, transparent=True)
plt.show()
Exemplo n.º 19
0
        text = get_tweets_from_user(
            username)  # to get tweets of a specific user by its username
        break
    else:
        print("you should enter f or u!")

text = get_tweets(text)
text = remove_bad_tweets(text)
text = "\n".join(text)
text = get_words(text)
print(len(text))
text = remove_bad_words(text)
print(len(text))
text1 = "\n".join(text)
text1 = removeWeirdChars(text1)
mask_array = np.array(Image.open(mask_path))
my_wc = WordCloudFa(width=1200,
                    height=1200,
                    background_color=background_color,
                    mask=mask_array,
                    persian_normalize=True,
                    repeat=False,
                    collocations=True)
my_wc.add_stop_words_from_file("../stop_words_kian.txt")
open("edited_tweets.txt", "w").write(text1)
my_wc.generate(text1)
image = my_wc.to_image()
image.show()
filename = datetime.now().strftime("%Y-%m-%d-%H-%M")
image.save('Images/{time}_photo.png'.format(time=filename))
Exemplo n.º 20
0
from wordcloud_fa import WordCloudFa

wodcloud = WordCloudFa(persian_normalize=True, include_numbers=False)
text = ""
with open('english-example.txt', 'r') as file:
    text = file.read()
wc = wodcloud.generate(text)
image = wc.to_image()
image.show()
image.save('english-example.png')
Exemplo n.º 21
0
import numpy as np

d = path.dirname(__file__)

text = open(path.join(d, 'tweets/result.txt'), encoding='utf-8').read()

# Add another stopword
twitter_mask = np.array(Image.open(path.join(d, "input/southpark1.png")))

stopwords = set(STOPWORDS)
stopwords |= EN_STOPWORDS

# Generate a word cloud image

wordcloud = WordCloudFa(persian_normalize=True,
                        include_numbers=False,
                        max_words=200,
                        stopwords=stopwords,
                        margin=0,
                        width=3000,
                        height=3000,
                        min_font_size=10,
                        max_font_size=2300,
                        random_state=True,
                        background_color="black",
                        mask=twitter_mask).generate(text)

image = wordcloud.to_image()
image.show()
image.save('output/twitter_mask.png')
        counter += 1
        txt = txt + ' ' + each.text
        print(counter, cloud)

txt = re.sub(r'[A-Za-z@]*', '', txt)

twitter_mask = np.array(Image.open(path.join(d, "twitter-logo.jpg")))

stop = [
    'می', 'من', 'که', 'به', 'رو', 'از', 'ولی', 'با', 'یه', 'این', 'نمی', 'هم',
    'شد', 'ها', 'اما', 'تو', 'واقعا', 'در', 'نه', 'دارم', 'باید', 'آره',
    'برای', 'تا', 'چه', 'کنم', 'بود', 'همه', 'دیگه', 'ای', 'اون', 'تی', 'حالا',
    'بی', 'د', 'چرا', 'بابا', 'منم', 'کیه', 'توی', 'نیست', 'چی', 'باشه', 'که',
    'بودم', 'می کنم', 'که', 'اینه', 'بهتر', 'داره', 'اینه', 'که'
]
wc = WordCloudFa(persian_normalize=True,
                 max_words=30000,
                 margin=0,
                 width=3000,
                 height=2500,
                 min_font_size=1,
                 max_font_size=3000,
                 background_color="white",
                 mask=twitter_mask,
                 include_numbers=False,
                 stopwords=stop).generate(txt)

image = wc.to_image()
image.show()
image.save('twitter.png')