Пример #1
0
index = [j[0] for j in pairs]
data = [j[1] for j in pairs]
dfl = pd.DataFrame({'演员': index, '电影名称': data})

#转化为dataframe结构
result = dfl.groupby('演员', as_index=False).count()
result = result.sort_values(by='电影名称', ascending=False)
result.columns = ['演员', '参演电影数量']
print(result)

#使mpl可使用中文
mpl.rcParams['font.sans-serif'] = ['FangSong']
mpl.rcParams['axes.unicode_minus'] = False

#top100电影中拍摄影片最多的10名演员
result1 = result[0:10]
plt.figure()
result1.plot(x='演员', kind='bar')
plt.savefig('most', dpi=1200)

#top100电影中产生电影最多的5个年份
burstr = ' '.join(burst)

wc = wordcloud.WordCloud(r'msyh.ttc',
                         width=640,
                         height=640,
                         background_color='white',
                         max_words=50)

wcim = wc.generate(burstr)
wcim.to_image().save('wc_news.png')
Пример #2
0
        json_file.truncate(0)  # 消除原始信息
        file_dict['entry'].append(result_dict)
        json.dump(file_dict, json_file)
        print("成功存入json文件!")
    # 存入数据库
    for result in result_list:
        sina_news = model.SinaNews(id=None,
                                   title=result['title'],
                                   date=result['date'],
                                   link=result['link'])
        sina_news.save()
    fname = "./sinanews.txt"
    with open(fname, 'w+', encoding='utf-8') as f:
        for result in result_list:
            f.write(result['title'] + "    " + str(result['date']) + "    " +
                    result['link'] + "\n")
    # 对结果进行分析,形成词云
    textrank = analyse.textrank
    adr = fname
    nmsg = open(adr, "rb")
    nmdsg = nmsg.read()
    print("\nkeywords by textrank:")
    keywords = textrank(nmdsg)
    txt = ''
    for keyword in keywords:
        print(keyword + "\n", )
        txt = txt + ' ' + keyword
    w = wordcloud.WordCloud(width=1000, font_path="msyh.ttc", height=700)
    w.generate(" ".join(jieba.cut(txt)))
    w.to_file("focus.png")
Пример #3
0
tokens=content2.split(' ')
stoplist=stopwords.words('english')
stoplist.append('I')
for word in tokens:
    if word not in stoplist:
        words.append(word)

#词频统计
word_counts=collections.Counter(words)
word_counts_top=word_counts.most_common()
print(word_counts_top)

#词频展示
background=np.array(Image.open('wordcloud4.jpg'))
wc=wordcloud.WordCloud(
    font_path='C:/Windows/Fonts/simhei.ttf',  #字体
    mask=background,  # 背景图
    background_color='white',  #背景颜色
    max_words=800,  # 最多显示词数
    max_font_size=700,  #字体最大值
    min_font_size=20
)

wc.generate_from_frequencies(word_counts)
image_colors=wordcloud.ImageColorGenerator(background)
wc.recolor(color_func=image_colors)
plt.imshow(wc)
plt.axis('off')
wc.to_file('ciyun.png')
plt.show()
Пример #4
0
#GovRptWordCloudv2.py
import jieba
import wordcloud
from scipy.misc import imread
mask = imread("fivestart.png")
excludes = {}
f = open("关于实施乡村振兴战略的意见.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud(\
    width = 1000, height = 700,\
    background_color = "white",
    font_path = "msyh.ttc", mask = mask
    )
w.generate(txt)
w.to_file("grwordcloudm.png")
Пример #5
0
# 导入词云制作库wordcloud这中文分词库jieba
import jieba
import wordcloud
# 构建并配置词云对象w
w = wordcloud.WordCloud(
    width=1000,
    height=400,
    background_color='#6c909e',
    colormap='GnBu',
    font_path='./res/font/SimHei.ttf'
)

# 调用jieba的Lcut()方法对原始文本进行中文分词,得到string
txt = '''
从此天涯奔走 穷尽一生探寻挚友 
从此放下离愁 生生世世酒敬自由
从此泼墨煮茶 闭口不谈世间繁华
从此安分守己 也在不说浪迹天涯
从此闭心锁魂 闭口不谈一往情深
从此一人流浪 再也没有我的码头
从此青灯常守 闭口不谈往事情愁
从此江水东流 再也不说玉簪红袖
从此人海逐流 闭口不谈相依为命
从此孑然一身 再也没有十里春风
从此养花遛狗 闭口不说天长地久 
从此白日寻酒 再也不说爱恨情仇
从此莫念莫愁 闭口不言长相厮守
从此清风配酒 路长水远我一人走
从此心随风动 闭口不谈你情我浓
从此宰鸡杀狗 再也不想为你停留
从此花天酒地 闭口不说真心真意
Пример #6
0
import jieba
import wordcloud
from imageio import imread

mask = imread("中国地图.png")
f = open("2019-nCoV.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = "".join(ls)
w = wordcloud.WordCloud(font_path="msyh.ttc", \
                        width=1000, height=700, background_color="white", \
                        stopwords={"治法", "临床表现", "主症", "结合", "PCR", "2012第二版"}, mask=mask, \
                        scale=10, max_words=30)
w.generate(txt)
w.to_file("2019-nCoV1.png")
Пример #7
0
    def post(self, request):
        form = StockSentimentAnalystForm(request.POST or None)
        error = ''

        if form.is_valid():
            ticker = form.cleaned_data['tickers']
            tweet_counts = form.cleaned_data['tweet_count']
            tweet_days = form.cleaned_data['tweet_day']

            if ticker == "0" or tweet_days == "0" or tweet_counts == "0":
                error = 'Y'
                errorDescription = 'Please select values for Stock Ticker & Tweet Count & Days'
                form = StockSentimentAnalystForm()
                context = {
                    'form': form,
                    'error': error,
                    'errorDescription': errorDescription,
                }
            else:

                # get News from NewApi
                newapicall = NewApiCall()
                newsDataResponse = newapicall.getlatestnewsonticker(ticker)
                # convert returned newdata dataframe to json format to use in template
                newsDataJson = newsDataResponse[:6].reset_index().to_json(
                    orient='records')
                newsData = []
                newsData = json.loads(newsDataJson)
                print(newsData)
                #Tweet Data
                tweets, positivetweets, negativetweets, positivepercentage, negativepercentage = \
                    self.tweetanalyzer.analyzetweets(ticker, tweet_days,tweet_counts)
                # extract wordlcoud out of the tweets
                #wordCloudFilePath = os.path.join(pathlib.Path(__file__).parent.absolute(),
                #                                 "static/assets/images/wordcloud/wordcloud.png")
                wordCloudFilePath = os.path.join(
                    settings.BASE_DIR,
                    "static/StockSentimentAnalyst/assets/images/wordcloud/wordcloud.png"
                )
                allWords = ' '.join(twts for twts in tweets['text'])
                wordCloud = wordcloud.WordCloud(
                    width=400, height=300, random_state=21,
                    max_font_size=119).generate(allWords)
                plt.imshow(wordCloud)
                plt.axis('off')
                plt.savefig(wordCloudFilePath)
                print(ticker)
                #Get Ticker Financial Details from Yahoo
                startDate = datetime.datetime.strptime(
                    tweets['created_date'].min(),
                    '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y-%m-%d')
                endDate = datetime.datetime.strptime(
                    tweets['created_date'].max(),
                    '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y-%m-%d')
                yfdf = self.yfapicall.getyfinancedata(ticker,
                                                      startDate=startDate,
                                                      endDate=endDate)

                plt_div = self.getplotdiv.getplot(tweets, yfdf)
                if not plt_div:
                    plt_div = "Data Not Present for the Selected Ticker"

                context = {
                    'form': form,
                    'plt_div': plt_div,
                    'ticker': ticker,
                    'positivetweets': positivetweets,
                    'negativetweets': negativetweets,
                    'positivepercentage': positivepercentage,
                    'negativepercentage': negativepercentage,
                    'newsdata': newsData,
                    'error': error,
                    'errorDescription': ''
                }
        else:
            plt_div = form.cleaned_data['tickers']
            form = StockSentimentAnalystForm()

            context = {'form': form, 'plt_div': plt_div}
        return render(request, self.template_name, context=context)
Пример #8
0
# -*- coding: utf-8 -*-
"""
Created on Sat Feb 15 17:10:13 2020

@author: Administrator
"""

import jieba
import wordcloud
import numpy as np
from PIL import Image

f = open('聊天记录.txt', 'r', encoding='utf-8')
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = ' '.join(ls)

img = Image.open('black_mask.png')
mk = np.array(img)
w = wordcloud.WordCloud(mask=mk, font_path='msyh.ttc', mode='RGBA', \
                 background_color=None,stopwords={"Tiwo13",'Tiwo08','Tiwo09',\
                'Tiwo11','小猪',"Tiwo14","Tiwo16"},max_words=20).generate(txt)

w.to_file('聊天记录.png')
Пример #9
0
import jieba
import wordcloud
from scipy.misc import imread
mask = imread("chinamap.jpg")
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close
ls = jieba.lcut(t)
txt = "".join(ls)
w=wordcloud.WordCloud(font_path="msyh.ttf",mask=mask,\
        width=1000,height=700,background_color="white"  )
w.generate(txt)
w.to_file("wordcloud-2.png")
Пример #10
0

L = int(input("L:"))
R = int(input("R:"))
for i in range(L, R + 1):
    text = ""
    mk = imageio.imread("./img/" + str(i) + ".png")
    color_list = [
        '#F5FFFA', "#FFFFE0", '#FAFAD2', '#FFFACD', '#FDF5E6', "#F5DEB3",
        "#FAF0E6", "#FFF5EE", "#FFEFD5", "#FDF5E6"
    ]  #建立颜色数组
    colormap = colors.ListedColormap(color_list)  #调用
    w = wordcloud.WordCloud(scale=5,
                            font_path='simhei.ttf',
                            background_color=None,
                            colormap=colormap,
                            mode='RGBA',
                            mask=mk,
                            max_words=500,
                            relative_scaling=0.7)
    get_file_list('./' + 'tokens', i, i)
    words = text.split(" ")
    dic_word = {}
    for word in words:
        tags = pseg.lcut(word)
        if len(tags) == 0:
            continue
        tag = tags[0]
        if tag.flag == 'x' or tag.flag == 'm' or tag.flag == 'r' or tag.flag == 'f':
            continue
        else:
            if tag.word not in dic_word:
Пример #11
0
'''
@Author       : sean cheng
@Email        : [email protected]
@CreateTime   : 2018/7/31
@Program      : 分词统计笑傲江湖的各词出现频率,做成词云,统计出出现的名词和状态词
'''
import jieba
import jieba.analyse
import wordcloud

with open('../sample/笑傲江湖-网络版.txt', 'r', encoding='utf-8') as inFile:
    txtData = inFile.read()

wordlist = jieba.lcut(txtData)

wordcount = jieba.analyse.extract_tags(''.join(wordlist),
                                       topK=200,
                                       allowPOS=('n'))

print(wordcount)

cloud = wordcloud.WordCloud(font_path='simhei.ttf',
                            max_words=200,
                            width=800,
                            height=600)
cloud.generate(' '.join(wordcount))
# cloud.generate_from_frequencies(' '.join(wordlist))
cloud.to_file('笑傲江湖名词Top200.png')
Пример #12
0
# 12号词云:《三体Ⅱ黑暗森林》情感分析词云
# B站专栏:同济子豪兄 2019-5-23

# 导入词云制作库wordcloud和中文分词库jieba
import jieba
import wordcloud

# 导入imageio库中的imread函数,并用这个函数读取本地图片,作为词云形状图片
import imageio
mk = imageio.imread("chinamap.png")

# 构建并配置两个词云对象w1和w2,分别存放积极词和消极词
w1 = wordcloud.WordCloud(width=1000,
                         height=700,
                         background_color='white',
                         font_path='msyh.ttc',
                         mask=mk,
                         scale=15)
w2 = wordcloud.WordCloud(width=1000,
                         height=700,
                         background_color='white',
                         font_path='msyh.ttc',
                         mask=mk,
                         scale=15)

# 对来自外部文件的文本进行中文分词,得到积极词汇和消极词汇的两个列表
f = open('三体黑暗森林.txt', encoding='utf-8')
txt = f.read()
txtlist = jieba.lcut(txt)
positivelist = []
negativelist = []
Пример #13
0
Файл: 2.py Проект: cznc/nlp_demo
https://blog.csdn.net/zhuzuwei/article/details/80766563
鬼吹灯文本挖掘2:wordcloud 词云展示
@author: admin
'''
# 1.  准备数据:具体可参考前一篇分析:鬼吹灯文本挖掘1
import wordcloud  #pip install wordcloud
import matplotlib.pyplot as plt
myfont = r'C:\Windows\Fonts\simkai.ttf'  # 获取本地已安装字体

import pickle
gcd1_words_list = pickle.load(open('gcd1_words_list.txt', 'rb'))

# 2.1. 获取停用词库 #直接拷贝的
my_stop_words_path = '停用词.txt'  #'stopword.txt'
stop_words_dict = []
with open(my_stop_words_path, errors='ignore') as fr:
    for line in fr.readlines():
        stop_words_dict.append(line.strip())
print('停用词数={}'.format(len(stop_words_dict)))


cloudobj = wordcloud.WordCloud(font_path=myfont,width=1200,prefer_horizontal=0.9, height=800,\
                               mode='RGBA',background_color=None,stopwords=stop_words_dict, \
                               max_words=100).generate(' '.join(gcd1_words_list))
plt.imshow(cloudobj)
plt.show()

#cloudobj.to_file('IMG_9475.jpg')
#RGBA意思是红色,绿色,蓝色,Alpha的色彩空间,Alpha指透明度。而JPG不支持透明度,所以要么丢弃Alpha,要么保存为.png文件
# cloudobj.to_file('IMG_9475.png') #这是给 2_1.py 用的
Пример #14
0
def getWordCloud(text):
    wordc = wordcloud.WordCloud().generate(text)
    return wordc.words_
    "como": [],
    "me gusta[n]?": [],
    "yo soy": [],
    "dios": [],
    "dios mio": [],
    "Cuban[s]?": [],
    "cuban cigar[s]?": [],
    "peso[s]?": [],
    "tortilla[s]?": []
}

for i in mov:
    for j in sp:
        d[j].append(
            len(
                re.findall('[!?.\[\]\s\'\"]' + j + '[!?.\[\]\s\'\",]?',
                           i[0],
                           flags=re.IGNORECASE)))

total = {}
for i in sp:
    total[i] = sum(d[i])

cloud = wordcloud.WordCloud(background_color="white",
                            width=800,
                            height=400,
                            random_state=23)
plt.imshow(cloud.generate_from_frequencies(total), interpolation='bilinear')
plt.axis("off")
plt.show()
Пример #16
0
import pandas as pd
fr = open('D:\StanceDetection\LDA_test\output/2018标题_jieba.txt',
          'r',
          encoding='utf-8')
b = []
for line in fr.readlines():
    str_list = line.split()
    #print(str_list)
    for i in range(len(str_list)):
        b.append(str_list[i])
word_counts = collections.Counter(b)
#print(word_counts)
#
#mask = np.array(Image.open('xin.jpg'))  # 定义词频背景
wc = wordcloud.WordCloud(
    font_path='simhei.ttf',  # 字体路劲
    background_color='white',  # 背景颜色
    width=1000,
    height=1000,
    max_font_size=300,  # 字体大小
    min_font_size=50,
    # mask=plt.imread('xin.jpg'),  # 背景图片
    max_words=20  #len(word_counts)
)

wc.generate_from_frequencies(word_counts)  # 从字典生成词云
wc.to_file('2018标题.png')  # 图片保存
plt.figure('2018标题')  # 图片显示的名字
plt.imshow(wc)
plt.axis('off')  # 关闭坐标
plt.show()
Пример #17
0
    def __init__(self, path, games, logger, suffix):
        super(WordCloud, self).__init__(path, self.__class__.__name__, suffix)

        questions = []

        for game in games:
            questions.append(game.questions)
        questions = list(itertools.chain(*questions))

        # split questions into words
        word_list = []
        word_counter = collections.Counter()
        for q in questions:
            q = re.sub('[?]', '', q)
            words = re.findall(r'\w+', q)
            word_list.append(words)

            for w in words:
                word_counter[w.lower()] += 1

        word_list = list(itertools.chain(*word_list))
        pprint(word_counter)

        def color_func(word=None,
                       font_size=None,
                       position=None,
                       orientation=None,
                       font_path=None,
                       random_state=None):
            color_list = [
                "green", 'blue', 'brown', "red", 'white', "black", "yellow",
                "color", "orange", "pink"
            ]
            people_list = [
                'people', 'person', "he", "she", "human", "man", "woman",
                "guy", 'alive', "girl", "boy", "head", 'animal'
            ]
            prep = [
                'on', "in", 'of', 'to', "with", "by", "at", "or", "and", "from"
            ]
            number = [
                'one', "two", "three", "four", "five", "six", "first",
                "second", "third", "half"
            ]
            spatial = [
                "top", "left", "right", "side", "next", "front", "middle",
                "foreground", "bottom", "background", "near", "behind", "back",
                "at", "row", "far", "whole", "closest"
            ]
            verb = [
                "wearing", "have", "can", "holding", "sitting", "building",
                "standing", "see"
            ]
            obj = [
                "hand",
                "table",
                'car',
                "food",
                "plate",
                "shirt",
                "something",
                "thing",
                "object",
                "light",
                "hat",
                "tree",
                "bag",
                "book",
                "sign",
                "bottle",
                "glass",
                "bus",
                "wall",
                "vehicle",
                "chair",
                "dog",
                "cat",
                "windows",
                "boat",
                "item",
                "shelf",
                "horse",
                "furniture",
                "water",
                "camera",
                "bike",
                "train",
                "window",
                "bowl",
                "plant",
                "ball",
                "cup",
            ]
            misc = ['visible', "made", "part", "piece", "all"]

            if word in color_list: return 'rgb(0, 102, 204)'  #blue
            if word in people_list: return 'rgb(255, 0, 0)'  #red
            if word in prep: return 'rgb(0, 153, 0)'  #green
            if word in number: return 'rgb(255, 128, 0)'  #orange
            if word in spatial: return 'rgb(204, 0, 102)'  #purple
            if word in verb: return 'rgb(0, 204, 102)'  #turquoise
            if word in obj: return 'rgb(64, 64, 64)'  #grey
            if word in misc: return 'rgb(102, 102, 0)'  #yellow
            else:
                logger.warning("Unexpected in cloud of words : " + word)
                return 'rgb(0, 0, 0)'

        # take relative word frequencies into account, lower max_font_size
        wordcloud = wc.WordCloud(background_color="white", color_func=color_func, max_font_size=40, max_words=80,
                              stopwords=stopwords, prefer_horizontal=1, width=400, height=200)\
            .generate(" ".join(str(x) for x in word_list))

        plt.figure()
        plt.imshow(wordcloud)
        plt.axis("off")
Пример #18
0
#GovRptWordCloudV1.py
import jieba
import wordcloud
from scipy.misc import imread
mask = imread("fivestar.png")
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud(font_path="/Library/Fonts/Arial Unicode.ttf", mask=mask, width=1000, height=700, background_color="white", max_words=15)
w.generate(txt)
w.to_file("grwordcloud.png")
Пример #19
0
# 1号词云:葛底斯堡演说黑色背景词云
# B站专栏:同济子豪兄 2019-5-23

# 导入词云制作第三方库wordcloud
import wordcloud

# 创建词云对象,赋值给w,现在w就表示了一个词云对象
w = wordcloud.WordCloud()

# 调用词云对象的generate方法,将文本传入
w.generate('and that government of the people, by the people, for the people, shall not perish from the earth.')

# 将生成的词云保存为output1.png图片文件,保存出到当前文件夹中
w.to_file('output1.png')
Пример #20
0
import wordcloud
import jieba
c = wordcloud.WordCloud(font_path="msyh.ttc")
s = "新时代中国特色社会主义思想是全党全国人民为实现中华民族伟大复兴而奋斗的行动指南"
c.generate(jieba.lcut(s))
c.to_file("outfile.png")
Пример #21
0
import wordcloud
txt = "life is short, you need python" 
w = wordcloud.WordCloud() #建立词云对象,全部使用缺省参数配置
w.generate(txt) #加载文本,用以产生词云
w.to_file("pywcloud.png") #输出为图片文件,png jpg 可选
Пример #22
0
def getChinaallpro():
    # 获取当前时间
    now_time = dt.datetime.now().strftime('%F')  # %F为只显示年月日

    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    area = requests.get(url).json()
    data = json.loads(area['data'])
    update_time = data['lastUpdateTime']
    all_counties = data['areaTree']
    all_list = []
    for country_data in all_counties:
        if country_data['name'] != '中国':
            continue
        else:
            all_provinces = country_data['children']
            for province_data in all_provinces:
                province_name = province_data['name']
                province_today = province_data['today']
                province_total = province_data['total']
                province_result = {
                    '省份': province_name,
                    '更新日期': update_time,
                    '现有确诊': province_total['nowConfirm'],
                    '累计确诊': province_total['confirm'],
                    '累计治愈': province_total['heal'],
                    '累计死亡': province_total['dead'],
                    '今日新增确诊': province_today['confirm'],
                    '今日治愈': province_today['confirmCuts']
                }
                all_list.append(province_result)  #将当前省份的数据添加到数据列表里面

    def yxhcreater(city1, city2, city3):
        yxhtxt1 = " {0}{1}是怎么回事呢?{0}相信大家都很熟悉,但是{0}{1}是怎么回事呢,\n下面就让小编带大家一起了解吧。".format(
            city1, city2)
        yxhtxt2 = "{0}{1},其实就是{2},大家可能会很惊讶{0}怎么会{1}呢?\n但事实就是这样,小编也感到非常惊讶。".format(
            city1, city2, city3)
        yxhtxt3 = "这就是关于{0}{1}的事情了,\n大家有什么想法呢,欢迎在评论区告诉小编一起讨论哦!".format(
            city1, city2)
        print(yxhtxt1 + yxhtxt2 + yxhtxt3)

    list00 = []
    namestr = ""
    namelist = []
    for list in all_list:
        num = list["今日新增确诊"]
        # address = list["省份"]
        list00.append(num)
    list00.sort(reverse=True)
    tem = []
    for i in range(0, 5):
        tem.append(list00[i])
    try_num = 0
    for province in all_list:
        if (province["今日新增确诊"] in tem) and try_num <= 4:
            namelist.append(province["省份"])
            namestr = namestr + str(province["省份"])
            try_num = try_num + 1

    print("")
    print("新增最多的五个城市:")
    print(namestr)  #新增最多的五个城市
    print("")

    w = wordcloud.WordCloud()
    txt = " ".join(jieba.lcut(namestr))
    print("txt:", end="")
    print(txt)

    # 此段为汉字转拼音 因为我的电脑无端出现无法完成汉字词云的bug
    from xpinyin import Pinyin
    p = Pinyin()
    ret = p.get_pinyin(txt, '')
    print(ret)

    w.generate(ret)
    w.to_file("result.png")
    print("词云已生成至本文件夹下result文件\n")
    print("营销号生成文章:")
    yxhcreater(str(namelist[0]), str(namelist[1]), str(namelist[2]))

    df = pd.DataFrame(all_list)
    df.to_csv('China/' + now_time + '.csv', index=False, encoding="utf_8_sig")
Пример #23
0
import wordcloud

f = open("D:\\Python-based\\name.txt", "r", encoding="utf-8")
t = f.read()
f.close()
w = wordcloud.WordCloud(font_path="msyh.ttc",
                        width=1000,
                        height=700,
                        background_color="white",
                        max_words=80)
w.generate(t)
w.to_file("new7.png")
Пример #24
0
    notin = gg.getTheText()  #拿到排除的词库
    #print(notin)
    top_word50 = []
    j = 0
    for item in items:
        if item[0] not in notin :
            #print(item)
            top_word50.append(item)
            j+=1
        if j == 50:
            break

    print(top_word50)
        theTxt = " ".join([word[0] for word in top_word50])
    print(theTxt)
    '''
    the_top50 = [('精装', 8177), ('地铁口', 7281), ('业主', 6791), ('三房', 5518), ('花园', 5466), ('看房', 3785), ('首付', 3573), ('学位', 3374), ('红本', 3146), ('物业', 3079), ('急售', 3035), ('户型', 3001), ('地铁', 2771), ('南北', 2559), ('出售', 2495), ('两房', 2442), ('阳台', 2421), ('小区', 2406), ('精装修', 2229), ('装修', 2228), ('楼层', 2217), ('方便', 2197), ('一手', 2151), ('在手', 2113), ('诚心', 2078), ('通透', 2032), ('新房', 2011), ('朝南', 1874), ('深圳', 1793), ('学校', 1668), ('社区', 1618), ('号线', 1581), ('价格', 1555), ('安静', 1553), ('豪宅', 1546), ('采光', 1512), ('税费', 1403), ('万科', 1375), ('光明', 1327), ('高层', 1308), ('满五', 1301), ('外国语', 1290), ('公园', 1267), ('方正', 1247), ('海景', 1238), ('实验', 1231), ('30', 1224), ('赠送', 1185), ('入住', 1179), ('复式', 1177)]

    the_txt = '精装'
    for item in the_top50:
        for i in range(item[1]):
            the_txt  =the_txt+ ' '+item[0]
    print(the_txt)

    image1 = PIL.Image.open(r'image/love.jpg')
    MASK = np.array(image1)
    txt = "life is short, you need python"
    w = wordcloud.WordCloud(background_color="white",font_path="D://pythonRoot//venv//FZZH-RHJW.TTF",repeat=False,collocations=False,mask= MASK)
    w.generate(the_txt)
    w.to_file("image/pywcloud.png")
Пример #25
0
def generate_word_cloud():
    """Creates a wordcloud showing the most optimal genes to study (low ratio,
     high count) large and in red."""
    global symbols, num_ratio_zero, quartile1, median, quartile3
    _filename = main.form_elements['filename']
    initial_dir = _filename[:_filename.rfind('/')] \
        if _filename is not None else None
    initial_file = _filename.split('/')[-1] if _filename is not None else None
    options = {
        'defaultextension': '.xlsx',
        'filetypes': [('excel files', '.xlsx')],
        'initialdir': initial_dir,
        'title': "Choose an output file",
        'initialfile': initial_file
    }
    file = tkinter.filedialog.askopenfilename(**options)
    try:
        wb = load_workbook(filename=file, data_only=True)
    except (FileNotFoundError, zipfile.BadZipfile) as e:
        logging.warning("Exit generate_word_cloud because of error" + str(e))
        return
    ws = wb.active
    rows = main.read_sheet(ws)
    headers = rows[0]
    run_again_msg = " Run the program to generate a correctly formatted "\
                    "output correctly formatted output file to graph."
    showinfo = messagebox.showinfo

    # output will change header to 'Gene title'
    # will not be compatible with old output
    if 'Gene title' in headers:  # GEO format
        symbol_col = headers.index('Gene symbol')
    else:
        showinfo(title="Incorrect Output Format",
                 message="No 'Gene title' column found. Please rerun process "
                 "to generate an updated output spreadsheet.")
        return
    if 'TOTAL COUNT' in headers:
        total_count_index = headers.index('TOTAL COUNT')
    else:
        showinfo(title="Incorrect Output Format",
                 message="No 'TOTAL COUNT' column found." + run_again_msg)
        return
    if 'COUNT RATIO' in headers:
        count_ratio_index = headers.index('COUNT RATIO')
    else:
        showinfo(title="Incorrect Output Format",
                 message="No 'COUNT RATIO' column found." + run_again_msg)
        return

    symbols = []
    for row in rows[1:]:
        if row[symbol_col] is None or row[total_count_index] is None or \
                        row[count_ratio_index] is None:
            showinfo(title="Incorrect Output Format",
                     message="Empty cells in mandatory columns." +
                     run_again_msg)
            return

        if type(row[symbol_col]) == str and row[total_count_index] > 0 and \
                row[symbol_col] not in [symbol[0] for symbol in symbols]:
            # does not repeat ^
            # took out: row[count_ratio_index] > 0

            # excludes the datetime rows
            # creates a list of tuples (symbol, total count, ratio)
            symbols.append((row[symbol_col], int(row[total_count_index]),
                            row[count_ratio_index]))

    symbols.sort(key=lambda x: x[2])

    # number of symbols with 0 count ratio- will be excluded from quartiles
    num_ratio_zero = len([symbol for symbol in symbols if symbol[2] == 0])

    cloud_width = len(symbols) * 2 if len(symbols) > 200 else 400
    cloud_height = len(symbols) if len(symbols) > 200 else 200
    cloud = wordcloud.WordCloud(max_words=int(len(symbols) / 2),
                                max_font_size=100,
                                width=cloud_width,
                                height=cloud_height)

    # nothing actually wrong with this vvv
    cloud.generate_from_frequencies(tuple([symbol[:2] for symbol in symbols]))

    quarter_size = math.floor((len(symbols) - num_ratio_zero) / 4)
    quartile1 = num_ratio_zero + quarter_size
    median = num_ratio_zero + quarter_size * 2
    quartile3 = num_ratio_zero + quarter_size * 3

    cloud.recolor(color_func=set_color_scale)
    output_file = file[:-5] + '_wordcloud.png'
    cloud.to_file(output_file)
    return output_file
Пример #26
0
import wordcloud
from imageio import imread


# 数据处理
f = codecs.open('/Users/yjhmit/Documents/best.txt','r',encoding='utf-8')   # 改成自己的聊天记录文件
fl = f.read('/Users/yjhmit/Documents/best.txt')
del fl[:8]
fl = fl[1::3]
strf = ' '.join(fl)
list1 = re.findall(r'/.{2,3}', strf)
list2 = re.findall(r'\[.+?\]', strf)
set1 = set(list1)
set2 = set(list2)
strf = strf.replace('请使用最新版本手机QQ查看', '')
strf = strf.replace('请使用最新版手机QQ体验新功能', '')
for item in set1:
    strf = strf.replace(item, '')
for item in set2:
    strf = strf.replace(item, '')

# 制作词云
word_list = jieba.cut(strf, cut_all=True)
word = ' '.join(word_list)
pic = imread('/Users/yjhmit/Pictures/love.jpg')
wc = wordcloud.WordCloud(mask=pic, font_path='/Library/Fonts/Songti.ttc', width=1000, height=500, background_color='white').generate(word)

plt.imshow(wc)
plt.axis('off')
plt.show()
Пример #27
0
# 同济大学介绍

import wordcloud
import jieba

w = wordcloud.WordCloud(width=1000,
                        height=700,
                        background_color='white',
                        font_path='msyh.ttc')

f = open('同济大学.txt', encoding='utf-8')
txt = f.read()

txtlist = jieba.lcut(txt)
string = ' '.join(txtlist)

w.generate(string)

w.to_file('output4.png')
Пример #28
0
import wordcloud
file = open(r'./vue.txt', encoding='utf-8')
text = file.read()
wc = wordcloud.WordCloud(
    font_path=r'./yahei.ttc',
    scale=32,
    margin=10,
    background_color='white',
    mode='RGBA'
    )
wc.generate(text)
image = wc.to_image()
image.show()
wc.to_file('./vue.png')
Пример #29
0
    return random.choice(colors)


fd = {
    'fontsize': '32',
    'fontweight': 'normal',
    'verticalalignment': 'baseline',
    'horizontalalignment': 'center',
}

for df, name, words in zip(subreddits, subreddit_names, subreddit_words):
    wc = wordcloud.WordCloud(width=1000,
                             height=500,
                             collocations=False,
                             background_color="#fdf6e3",
                             color_func=col_func,
                             max_words=200,
                             random_state=np.random.randint(
                                 1, 8)).generate_from_frequencies(
                                     dict(Counter(words)))
    wc.to_file("india.png")
    fig, ax = plt.subplots(figsize=(20, 10))
    ax.imshow(wc, interpolation='bilinear')
    ax.axis("off")
    ax.set_title(name, pad=24, fontdict=fd)

for df in subreddits:
    df['title_length'] = df['title'].apply(lambda x: len(x))

fig = plt.subplots(figsize=(20, 20), sharex=True, sharey=True)
#fig.subplots_adjust(hspace=0.5, wspace=0.4)
Пример #30
0
import wordcloud
import numpy as np
from PIL.Image import open as o

with open('wyyyy.txt','r',encoding='utf-8') as r:
    s = r.read()
# with open('xufulin.png','rb') as r:
#     n = r.read()
n = np.array(o(r'E:\Python186共享文件夹\第三阶段\代码\day10\xufulin.jpg'))
w = wordcloud.WordCloud(font_path=r'E:\Python186共享文件夹\第三阶段\代码\day10\simhei.ttf',mask=n)
w.generate(s)
w.to_file('wyyyy.jpg')