Пример #1
0
    def word_cloud(self, model: LdaModel, stopwords_path, save_path):
        with open(stopwords_path, 'r', encoding='utf8') as f:
            words = f.readlines()

        stopwords = add_stop_words(words)
        print('stop words added')
        word_cloud = PersianWordCloud(only_persian=True,
                                      max_words=10,
                                      stopwords=stopwords,
                                      width=800,
                                      height=800,
                                      background_color='black',
                                      min_font_size=1,
                                      max_font_size=300)
        topics = model.show_topics(formatted=False)

        for i, topic in enumerate(topics):
            topic_words = dict(topic[1])
            print(topic_words)
            new = {}
            for word in topic_words.keys():
                reshaped = get_display(arabic_reshaper.reshape(word))
                new[reshaped] = topic_words[word]
            print(new)
            word_cloud.generate_from_frequencies(new)
            image = word_cloud.to_image()
            image.show()
            s = save_path + '_topic_' + str(i) + '.png'
            print(s)
            image.save(s)
Пример #2
0
def word_cloud_generator(text):
    d = path.dirname(__file__)
    twitter_mask = np.array(Image.open(path.join(d, "twitter-logo.jpg")))

    stopwords = add_stop_words(['کاسپین'])
    stopwords |= EN_STOPWORDS

    # Generate a word cloud image

    wordcloud = PersianWordCloud(only_persian=False,
                                 max_words=200,
                                 stopwords=stopwords,
                                 margin=0,
                                 width=800,
                                 height=800,
                                 min_font_size=1,
                                 max_font_size=500,
                                 random_state=True,
                                 background_color="white",
                                 mask=twitter_mask).generate(text)

    image = wordcloud.to_image()
    # image.show()
    # image.save('en-fa-result.png')
    from io import BytesIO
    bio = BytesIO()
    bio.name = 'image.jpeg'
    image.save(bio, 'JPEG')
    bio.seek(0)
    return bio
Пример #3
0
def wc_without_removing_stopWords(text, number):

    wordcloud = PersianWordCloud(
        only_persian=True,
        max_words=100,
        margin=0,
        width=800,
        height=800,
        min_font_size=1,
        max_font_size=500,
        background_color="black"
    ).generate(text)
    image = wordcloud.to_image()
    image.show()
    image.save('../out/%s.jpg'%number)
Пример #4
0
def draw_cloud(cleantweets):
    text = " ".join(str(tweet) for tweet in cleantweets)
    tokens = word_tokenize(text)
    dic = Counter(tokens)
    print(dic.most_common(max_words))
    twitter_mask = np.array(Image.open("twitter-logo.jpg"))
    wordcloud = PersianWordCloud(only_persian=True,
                                 max_words=max_words,
                                 margin=0,
                                 width=800,
                                 height=800,
                                 min_font_size=1,
                                 max_font_size=500,
                                 background_color="white",
                                 mask=twitter_mask).generate(text)

    image = wordcloud.to_image()
    wordcloud.to_file(image_file_path)
    image.show()
Пример #5
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--txt', help='main text file',type=str, required=True)
    parser.add_argument('--mask', help='binary mask(generated by mask_generator.py)', type=str, required=True)
    parser.add_argument('--stopwords', help='words you don\'t want',type=str, required=False)
    # parser.add_argument('--startwords',help='stopwords that come first',type=str,required=False)
    parser.add_argument('--persian',help='language is persian?(True or False)', type=bool, default=True)
    parser.add_argument('--maxwords',help='maximum number of words to show in image', type=int, default=1000)
    parser.add_argument('--bgcolor',help='background color(black or white)', type=str, default='white')
    parser.add_argument('--out',help='image output name', type=str, default='out.png')
    parser.add_argument('--cmap',help='output image colormap', type=str, default='plasma')

    args = parser.parse_args()
    # print(args.output)

    stop = read_words(args.stopwords) if args.stopwords else ""
    out = args.out if args.out.split('.')[-1] in ['png', 'jpg'] else args.out + '.png'

    wordcloud = PersianWordCloud(
        only_persian=args.persian,
        max_words=args.maxwords,
        stopwords=stop,
        margin=0,
        width=800,
        height=800,
        min_font_size=1,
        max_font_size=500,
        random_state=True,
        background_color=args.bgcolor,
        colormap=args.cmap,
        mask=read_img(args.mask)
    ).generate(read_file(args.txt))

    image = wordcloud.to_image()
    image.show()
    image.save(out)
Пример #6
0
class TweetCloud(object):
    def __init__(self):
        self.tweet_cloud = None
        self.file_names = []
        self.d = path.dirname(__file__)
        self.all_tweets_count = None
        self.from_date = None
        self.from_time = None
        self.to_date = None

    def generate(self,
                 from_date=None,
                 to_date="Today",
                 from_time=None,
                 to_time="Now",
                 max_words=1000):
        self.from_time = abs(from_time)
        if from_date and to_date:
            if from_date == to_date and from_date == "Today":
                # Read the whole text.
                self.from_date = datetime.date.today() - datetime.timedelta(1)
                self.to_date = datetime.date.today()
            elif isinstance(from_date, float) and to_date == "Today":
                self.from_date = datetime.date.today() + datetime.timedelta(
                    from_date)
                self.to_date = datetime.date.today()
        if from_time and to_time:
            if isinstance(from_time, float) and to_time == "Now":
                self.from_date = datetime.datetime.now() + datetime.timedelta(
                    hours=from_time)
                self.to_date = datetime.datetime.now()
        all_tweets = Analysis.objects(
            Q(create_date__lt=self.to_date.replace(tzinfo=tz.tzlocal()))
            & Q(create_date__gte=self.from_date.replace(tzinfo=tz.tzlocal()))
            & Q(user_mentions=[])).all()
        self.all_tweets_count = all_tweets.count()
        all_words = []
        for item in all_tweets:
            tw_text = item.clean_text
            for sentese in tw_text:
                for item, key in sentese:
                    if key in ['Ne', 'N', 'AJ', 'AJe']:
                        word = ''
                        for w in item:
                            if u'\u0600' <= w <= u'\u06FF':
                                word += w
                        all_words.append(word)

        text = ' '.join(all_words)
        twitter_mask = np.array(
            Image.open(path.join(self.d, "image/twitter-logo.jpg")))
        # Generate a word cloud image
        stopwords = add_stop_words(['توییت', 'رو', 'توییتر'])
        self.tweet_cloud = PersianWordCloud(only_persian=True,
                                            max_words=max_words,
                                            stopwords=stopwords,
                                            margin=0,
                                            min_font_size=12,
                                            max_font_size=100,
                                            random_state=1,
                                            background_color="white",
                                            mask=twitter_mask).generate(text)

    def send(self):
        filename = datetime.datetime.today().strftime('%Y-%m-%d-%H:%m')
        image = (path.join(self.d, 'tmp/' + filename + '.png'))
        img = self.tweet_cloud.to_image()
        img.save(image)
        # img.show()
        self.file_names.append(path.join(self.d, 'tmp/' + filename + '.png'))
        media_ids = []
        for file in self.file_names:
            res = api.media_upload(file)
            media_ids.append(res.media_id)
        status_text = "ابر کلمات {} ساعت گذشته \n در تاریخ {} \n از {} توییت".format(
            int(self.from_time),
            jdatetime.datetime.fromgregorian(
                datetime=datetime.datetime.now()).strftime('%H:%m - %m/%d'),
            self.all_tweets_count,
        )
        api.update_status(status=status_text, media_ids=media_ids)

    @staticmethod
    def send_text_cloud(f_date, f_time, max_words):
        command_cloud = TweetCloud()
        MessageBoot.send('im going to generate Text CLOUD')
        command_cloud.generate(from_date=f_date,
                               from_time=f_time,
                               max_words=max_words)
        command_cloud.send()
        MessageBoot.send('Text Cloud send')
Пример #7
0
    return dict


text = open(path.join(d, 'sohrab.txt'), encoding='utf-8').read()

# Add another stopword
stopwords = add_stop_words(['شاسوسا'])
# add_stop_words

data_s = pickle.load(open("sohrab_data.pkl", "rb"))
data_m = pickle.load(open("moshiri_data.pkl", "rb"))

frequency_data = difference(data_s, data_m)

# Generate a word cloud image
wordcloud = PersianWordCloud(
    only_persian=True,
    max_words=100,
    stopwords=stopwords,
    margin=0,
    width=800,
    height=800,
    min_font_size=1,
    max_font_size=500,
    background_color="black").generate_from_frequencies(
        frequencies=frequency_data)

image = wordcloud.to_image()
image.show()
image.save('difference_word_map.png')
Пример #8
0
    background_color="Black",
    collocations=False).generate(difference_text)

wordcloud_similarity = PersianWordCloud(
    only_persian=True,
    max_words=100,
    stopwords=stopwords,
    margin=0,
    width=800,
    height=800,
    min_font_size=1,
    max_font_size=500,
    background_color="Black",
    collocations=False).generate(similarity_text)

image_difference = wordcloud_difference.to_image()
image_difference.show()
image_difference.save('difference.png')

image_similarity = wordcloud_similarity.to_image()
image_similarity.show()
image_similarity.save('similarity.png')

image_emam = wordcloud_emam.to_image()
image_emam.show()
image_emam.save('emam.png')

image_shah = wordcloud_shah.to_image()
image_shah.show()
image_shah.save('shah.png')