Exemplo n.º 1
0
def hot_key_word_cloud():
    white_hotkey_list = [
        '分布式', '算法', '嵌入式', '前端', '机器学习', '公众号', '微信公众号', '数据库', '计算机', '人工智能',
        '后端', '框架', '数据结构', '程序', '大数据', '程序设计', '计算机网络', '网络', '视觉', '数据',
        '图像', '小程序', '图像分析', '操作系统', '架构', '安卓', '微服务', '爬虫', '设计模式'
    ]

    wordcloud = WordCloud(width=1300, height=900)
    name = []
    value = []
    for i, bucket in enumerate(CSDN2018BlogStar.hot_key()['aggregations']
                               ['term_comment']['buckets']):
        if re.compile(u'[\u4e00-\u9fa5]').search(bucket['key']):
            if bucket['key'] in white_hotkey_list:
                name.append(bucket['key'])
                value.append(bucket['doc_count'])
        elif re.findall('[a-zA-Z]+', bucket['key']):
            if bucket['key'].find('http') == -1 and bucket['key'].find(
                    'csdn') == -1 and 'details' != bucket[
                        'key'] and '1&orderby' != bucket['key']:
                name.append(bucket['key'])
                value.append(bucket['doc_count'])

    wordcloud.add("", name, value, word_size_range=[30, 120])
    wordcloud.render('csdn_blogstar_hotkey.html')
Exemplo n.º 2
0
def analyze_signature():

    # 个性签名列表
    data = []
    for user in friends:
        data.append(user.signature)

    # 将个性签名列表转为string
    data = ','.join(data)

    # 进行分词处理,调用接口进行分词
    # 这里不使用jieba或snownlp的原因是无法打包成exe文件或者打包后文件非常大
    postData = {
        'data': data,
        'type': 'exportword',
        'arg': '',
        'beforeSend': 'undefined'
    }
    response = post('http://life.chacuo.net/convertexportword', data=postData)
    data = response.text.replace('{"status":1,"info":"ok","data":["',
                                 '').replace('\/', '').replace('\\\\', '')

    # 解码,windows与其他系统有所不同
    if ('Windows' in system()):
        data = data.encode('unicode_escape').decode('unicode_escape')
    else:
        data = data.encode('utf-8').decode('unicode_escape')

    # 将返回的分词结果json字符串转化为python对象,并做一些处理
    data = data.split("=====================================")[0]

    # 对分词结果数据进行去除一些无意义的词操作
    stop_words = [
        ',', ',', '.', '。', '!', '!', ':', ':', '\'', '‘', '’', '“', '”', '的',
        '了', '是', '=', '\r', '\n', '\r\n', '\t', '以下关键词', '[', ']', '{', '}',
        '(', ')', '(', ')', 'span', '<', '>', 'class', 'html', '?'
    ]
    for x in stop_words:
        data = data.replace(x, "")
    data = data.replace('    ', '')

    # 将分词结果转化为list,根据分词结果,可以知道以2个空格为分隔符
    data = data.split('  ')

    # 进行词频统计,结果存入字典signature_dict中
    signature_dict = {}
    for word in data:
        if (word in signature_dict.keys()):
            signature_dict[word] += 1
        else:
            signature_dict[word] = 1

    # 开始绘制词云
    name = [x for x in signature_dict.keys()]
    value = [x for x in signature_dict.values()]
    wordcloud = WordCloud('微信好友个性签名词云图')
    wordcloud.add("", name, value, word_size_range=[20, 100])
    wordcloud.render('data/好友个性签名词云.html')

    print(signature_dict)
Exemplo n.º 3
0
    def sign(self):
        """分析bilibili用户的签名出现的词频,并且可视化"""
        print("开始分析:%s" % self.__class__.sign.__name__)
        cursor = self.collection_user.find({"sign": {"$ne": None}}, {"sign": 1, "_id": 0})
        print('数据基数:%s' % cursor.count())
        sign = list(filter(lambda x: len(x), [sign.get('sign') for sign in cursor]))
        stop = [line.strip() for line in open('stop_words.txt', 'r', encoding='utf-8').readlines()]  # 加载停用词表
        filed, attr, words = eval("[]," * 3)
        for s in sign:
            segs = posseg.cut(s)
            for seg, flag in segs:
                if seg not in stop:
                    if flag != 'm' and flag != 'x':     # 去数词和去字符串
                        words.append(seg)
        counter = Counter()
        for word in words:
            counter[word] += 1

        for k, v in counter.most_common():
            filed.append(k)
            attr.append(v)

        wordcloud = WordCloud(width=1300, height=620)
        wordcloud.add("", filed, attr, word_size_range=[30, 100], shape='diamond')
        wordcloud.render("sign.html")
        print('Generated a wordclound done!')
        print("分析结束:%s" % self.__class__.sign.__name__)
Exemplo n.º 4
0
def world_cloud_chart():
    CAT1 = 1000
    CAT2 = 800
    OFFSET = 20
    item_dict = {
        # "Python": CAT1 + random.randrange(-OFFSET, OFFSET),
        # "Anywhere": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Web Apps": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Files": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Consoles": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Databases": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Scheduled Tasks": CAT1 + random.randrange(-OFFSET, OFFSET),
        "Easy Deploy": CAT2 + random.randrange(-OFFSET, OFFSET),
        "Develop Anywhere": CAT2 + random.randrange(-OFFSET, OFFSET),
        "Amazing Support": CAT2 + random.randrange(-OFFSET, OFFSET),
        "Teach & Learn": CAT2 + random.randrange(-OFFSET, OFFSET),
    }
    name_list = item_dict.keys()
    value_list = item_dict.values()
    wordcloud = WordCloud(title="Python Anywhere Features and Advantages",
                          width=1000,
                          height=500,
                          page_title="Python anywhere Word Cloud")
    wordcloud.add("", name_list, value_list, word_size_range=[30, 60])
    return wordcloud
Exemplo n.º 5
0
    def get_welfare_wordcloud(self):
            '''
            福利的词云
            :return:
            '''
            jieba.load_userdict('D:/编程练习题/datasite/test/analysis/welfare_dict.txt')
            sql = 'select welfare from job'
            res = self.db.execute_sql(sql)[0]
            unuse_keywords = ['二', '2', '好', '可', ' ', ',', '[', ']', '#', ',', 'x', 'h', '=', 's', '!', '+', '.', ':',
                              '、',
                              'd', 'in', '~', '上'
                , '宿', '享', 'order', '(', ')', '广', '/', '17', '-', '原']
            d = {}
            for r in res:
                tmp = jieba.lcut(r[0])
                for s in tmp:
                    if s in unuse_keywords:
                        continue
                    if s not in d.keys():
                        d[s] = 0
                    d[s] += 1

            # d_order=sorted(d.items(),key=lambda x:x[1],reverse=True)
            # for s in d_order:
            #     print(s)
            wordcloud = WordCloud(width=1300, height=620)
            wordcloud.add("", d.keys(), d.values(), word_size_range=[20, 100])
            wordcloud.render(path="D:/编程练习题/datasite/test/templates/charts/welfare.html")
Exemplo n.º 6
0
def gen_gwzz_word(zwlb):
    qs = ZpWordByZwlbModel.objects
    if zwlb:
        qs = qs.filter(zwlb=zwlb)
        path = f'zp_word/{zwlb}.html'
    else:
        path = 'zp_word.html'
    df = read_frame(qs.all())
    if len(df) > 0:
        page = Page()
        Grid_chart1 = Timeline(width=1500, height=800, timeline_bottom=0)
        df_group = df.groupby(['year', 'month'])
        for name, group in df_group:
            month = group['month'].tolist()[0]
            year = group['year'].tolist()[0]
            df_new = group.groupby('word').apply(get_echarts_all_by_value,
                                                 'word')
            chart = WordCloud(f'{zwlb}岗位需求词云', width=1500)
            shape_list = [
                None, 'circle', 'cardioid', 'diamond', 'triangle-forward',
                'triangle', 'pentagon', 'star'
            ]
            chart.add("",
                      df_new['word'].tolist(),
                      df_new['count'].tolist(),
                      word_size_range=[30, 100],
                      rotate_step=66,
                      shape=shape_list[random.randint(0,
                                                      len(shape_list) - 1)])
            Grid_chart1.add(chart, f'{year}年{month}月')
        page.add(Grid_chart1)
        page.render(os.path.join(BASE_DIR, 'templates/{}'.format(path)))
Exemplo n.º 7
0
    def draw_publisher_author_word_cloud(self, publisher: list, author: list):
        """
        画出版社和作者的词云
        :param publisher:
        :param author:
        :return: 无返回值(如果self._json_mode为True则有返回值)
        """
        if len(publisher) < 1:
            logger.error("No Data!")
            raise ValueError("Publisher list is empty!")
        elif len(author) < 1:
            logger.error("No Data!")
            raise ValueError("Author list is empty!")

        name = ['热门出版社', '热门作者']

        # 画图
        page = Page()
        chart = WordCloud(name[0], **self.style.init_style)
        chart.add("", publisher[0], publisher[1], word_size_range=[12, 80], shape="cardioid")
        page.add(chart)

        chart_1 = WordCloud(name[1], **self.style.init_style)
        chart_1.add("", author[0], author[1], word_size_range=[12, 80], shape="pentagon")
        page.add(chart_1)

        if self._json_mode:
            chart_json = [chart.options, chart_1.options]
            return chart_json
        else:
            logger.debug("正在导出: " + Echart_Output_Path + "&".join(name) + ".html")
            page.render(Echart_Output_Path + "&".join(name) + ".html")
Exemplo n.º 8
0
def create_charts():
    page = Page()

    style = Style(
        width=1100, height=600
    )

    name = [
        'Sam S Club', 'Macys', 'Amy Schumer', 'Jurassic World',
        'Charter Communications', 'Chick Fil A', 'Planet Fitness',
        'Pitch Perfect', 'Express', 'Home', 'Johnny Depp', 'Lena Dunham',
        'Lewis Hamilton', 'KXAN', 'Mary Ellen Mark', 'Farrah Abraham',
        'Rita Ora', 'Serena Williams', 'NCAA baseball tournament', 'Point Break']
    value = [
        10000, 6181, 4386, 4055, 2467, 2244, 1898, 1484, 1112,
        965, 847, 582, 555, 550, 462, 366, 360, 282, 273, 265]
    chart = WordCloud("词云图-默认形状", **style.init_style)
    chart.add("", name, value, word_size_range=[30, 100], rotate_step=66)
    page.add(chart)

    chart = WordCloud("词云图-自定义形状", **style.init_style)
    chart.add("", name, value, word_size_range=[30, 100], shape='diamond')
    page.add(chart)

    return page
def draw_word_count(word_counter):
    '''
    Express the frequency of occurrences of words in the corpus in the most intuitive way
    :param word_counter: WordsCounter
    :return: Save the drawing files
    '''
    wordsCounter = word_counter

    # draw word cloud
    def counter2list(_counter):
        wordslist, nums = [], []
        for item in _counter:
            wordslist.append(item[0])
            nums.append(item[1])
        return wordslist, nums

    outputFile = COULD_HTML_PATH

    # Extract keywords
    wordslist, nums = counter2list(wordsCounter.most_common(1000))

    cloud = WordCloud("wordCloud", width=1200, height=600, title_pos='center')
    cloud.add(
        ' ',
        wordslist,
        nums,
        shape='circle',
    )

    cloud.render(outputFile)
    logging.info('Done draw cloud')
    return
Exemplo n.º 10
0
def get_WorldCloud():
    content = ''
    for advantage in all_advantage:
        if advantage != '':
            content = content + ',' + advantage
    # 去除多余字符
    content = re.sub(r'[^\w\s]', ' ', content)
    segment = content.split(' ')
    # print(segment)
    # pandas、numpy 统计每个词出现次数
    words_df = pd.DataFrame({'segment': segment})
    words_stat = words_df.groupby(by=['segment'])['segment'].agg(
        {"计数": numpy.size})
    words_stat = words_stat.reset_index().sort_values(by=["计数"],
                                                      ascending=False)

    test = words_stat.head(100).values
    # 分解数据
    codes = [test[i][0] for i in range(0, len(test))]
    counts = [test[i][1] for i in range(0, len(test))]
    # 生成词云图
    wordcloud = WordCloud(width=1300, height=620, page_title="福利关键词")
    wordcloud.add("福利关键词",
                  codes,
                  counts,
                  word_size_range=[20, 100],
                  shape='pentagon')
    wordcloud.render('../chart/world_cloud.html')
Exemplo n.º 11
0
def word_cloud(item_name,item_name_list,item_num_list,word_size_range):

    wordcloud = WordCloud(width=1400,height= 900)
    
    wordcloud.add("", item_name_list, item_num_list,word_size_range=word_size_range,shape='pentagon')
    out_file_name = './analyse/'+item_name+'.html'
    wordcloud.render(out_file_name)
Exemplo n.º 12
0
def create_city_wordcloud(city):
    city_name = get_city_name(city)
    client = MongoClient('localhost', 27017)
    db = client['tianqihoubao']
    data = db[city]
    keywords = [
        "小雨", "雷阵雨", "中雨", "大雨", "暴雨", "晴", "雾", "阴", "小到中雨", "多云", "中到大雨",
        "大到暴雨", "阵雨"
    ]
    values = []
    for k in keywords:
        value = data.count_documents({
            "weather": {
                "$regex": k
            },
            "date": {
                "$regex": "06月"
            }
        })
        values.append(value)
    wordcloud = WordCloud(city_name + "六月天气词云",
                          width=1000,
                          height=620,
                          page_title="六月天气词云")
    wordcloud.add("", keywords, values, word_size_range=[20, 100])

    wordcloud.render(city_name + "词云图.html")
def world_cloud_test(d, num):
    X = pd.Series(unique(d)).values
    Y = pd.Series(num.groupby(d).sum()).values
    wordcloud = WordCloud("", width=1300, height=620)
    wordcloud.add("", X, Y, word_size_range=[20, 100])
    wordcloud.show_config()
    wordcloud.render("E:\\py_data_html\\world_cloud_test.html")
Exemplo n.º 14
0
def DrawWordCloud(data, num):
    s1 = []
    for i in range(len(data)):
        s = str(data[i]).replace('[', '').replace(']',
                                                  '')  # 去除[],这两行按数据不同,可以选择
        s = s.replace("'", '').replace(',', '')  # 去除单引号,逗号,每行末尾追加换行符
        # 按照排名和电影名字长度设置词云虚拟频率
        for j in range(i * 2, 60 - len(data[i])):
            s1.append(s)
    word_counts = collections.Counter(s1)  #统计词频
    #word_counts_top10 = word_counts.most_common(10)  # 获取前10最高频的词

    keylist = [k[0] for k in word_counts.items()]
    valuelist = [k[1] for k in word_counts.items()]
    wordcloud = WCD(width=725, height=530)

    wordcloud.add('wordcloud',
                  keylist,
                  valuelist,
                  word_size_range=[
                      13 * (20 / num)**0.5,
                      26 * (1 + 0.02 * num) * (20 / num)**0.5
                  ],
                  rotate_step=36.4)
    wordcloud.render(path="./SE12_Cache/WordCloud.html")
Exemplo n.º 15
0
def test_wordcloud():

    # wordcloud_0
    name = [
        'Sam S Club', 'Macys', 'Amy Schumer', 'Jurassic World',
        'Charter Communications', 'Chick Fil A', 'Planet Fitness',
        'Pitch Perfect', 'Express', 'Home', 'Johnny Depp', 'Lena Dunham',
        'Lewis Hamilton', 'KXAN', 'Mary Ellen Mark', 'Farrah Abraham',
        'Rita Ora', 'Serena Williams', 'NCAA baseball tournament',
        'Point Break'
    ]
    value = [
        10000, 6181, 4386, 4055, 2467, 2244, 1898, 1484, 1112, 965, 847, 582,
        555, 550, 462, 366, 360, 282, 273, 265
    ]

    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], rotate_step=66)
    wordcloud.show_config()
    wordcloud.render()

    # wordcloud_1
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], shape='diamond')
    wordcloud.show_config()
    wordcloud.render()
Exemplo n.º 16
0
def Generator():
    with open('content.txt', 'r', encoding='utf-8') as f:
        text_body = f.read()
    f.close()

    #使用jieba进行分词
    words_lst = jieba.cut(text_body.replace('\n', '').replace(' ', ''))
    #统计词频
    total = {}
    for i in words_lst:
        total[i] = total.get(i, 0) + 1

    #按词频进行排序,只选取包含两个或两个以上字的词
    data = dict(sorted({k: v for k, v in total.items() if len(k) >= 2}.items(),\
                        key=lambda x: x[1], reverse=True)[:200])

    name = data.keys()
    value = [i for i in data.values()]#获取列表对象

    #构造一个词云对象,把所有的词放进去
    word_cloud = WordCloud(width=1600, height=1024)
    #pentagon表示用五角星的形状显示词云
    word_cloud.add("", name, value, word_size_range=[20, 100], shape='triangle')
    #把词云显示到一个html网页中
    word_cloud.render('content.html')
Exemplo n.º 17
0
    def extract_keyword(self):
        """对电影《无双》微博进行关键词提取,并且数据可视化"""
        print("开始分析:%s" % self.__class__.extract_keyword.__name__)
        count = self.collection.find().count()
        print("数据基数:%s" % count)
        cursor = self.collection.find({}, {"content": 1, "_id": 0})
        content = [item.get('content') for item in cursor]
        weibos = list(filter(lambda x: len(x) != 0, content))
        jieba.load_userdict("userdict.txt")  # 导入自定义词典

        filed, attr, keywords = eval("[]," * 3)
        for weibo in weibos:
            for x in jieba.analyse.extract_tags(weibo,
                                                withWeight=False,
                                                topK=3):
                keywords.append(x)

        counter = Counter()
        for keyword in keywords:
            counter[keyword] += 1

        for word, count in counter.most_common(50):
            filed.append(word)
            attr.append(count)
        """制作词云图"""
        wordcloud = WordCloud(width=1300, height=620)
        wordcloud.add("电影《无双》关键词统计",
                      filed,
                      attr,
                      word_size_range=[30, 100],
                      shape='diamond')
        wordcloud.render("Word_Keyword_Count.html")
        print("分析结束:%s" % self.__class__.extract_keyword.__name__)
Exemplo n.º 18
0
 def click2(self):
     year = int(self.comboBox.currentText())
     top = int(self.comboBox_2.currentText())
     t1 = threading.Thread(target=self.myshow)
     t2 = threading.Thread(target=self.mytr, args=(year, top))
     conn = pymysql.connect(host='localhost',
                            user='******',
                            password='******',
                            port=3306,
                            db='maoyan',
                            charset='utf8mb4')
     sql = "select * from films"
     db = pd.read_sql(sql, conn)
     db_ = db[(db.year == year)]
     db_ = db_.sort_values(axis=0, ascending=False, by='box_office')
     top_num = 0
     top_ = []
     for index, item in db_.iterrows():
         top_num += 1
         if top_num < (top + 1):
             top_.append(item['name'])
     value10 = [965, 847, 582, 555, 550, 462, 366, 360, 282, 273]
     value = value10[0:top]
     word = WordCloud(width=1000)
     word.add("", top_, value, word_size_range=[20, 40])
     word.render("wc%d.html" % (year * 100 + top))
     self.widget.load(QUrl('file:///wc%d.html' % (year * 100 + top)))
     t1.start()
     t2.start()
Exemplo n.º 19
0
def pic(data, file):
    # all_poet = [i[0] for i in data[:30]]
    # all_num = [i[1] for i in data[:30]]
    # br = pyecharts.Bar(title=file.rstrip('.txt')+'最爱用的单字意象:', title_top=0,  width=1200, height=700,)

    # br.add('', all_poet, all_num,  label_pos='center',is_convert=True, xaxis_interval=0, yaxis_interval=0, is_yaxis_inverse=True)
    # br.use_theme('dark')
    # br.render(path=file.rstrip('.txt')+'最爱用的单字意象:_条形图'+'.html')

    all_poet = [i[0] for i in data[:500]]
    all_num = [i[1] for i in data[:500]]
    wordcloud = WordCloud(
        width=1300,
        height=620,
    )
    shape = [
        'circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle',
        'pentagon', 'star'
    ]
    wordcloud.add('',
                  all_poet,
                  all_num,
                  shape=random.choice(shape),
                  word_gap=20,
                  word_size_range=[10, 120],
                  rotate_step=45)
    wordcloud.render(path=file.rstrip('.txt') + '最爱用的单字意象_词云' + '.html')
Exemplo n.º 20
0
def stack(request):
    if request.method == 'POST':
        language = request.POST.get('language', '')
        if not language:
            return HttpResponse('error language')
        language = language.capitalize()
        # language = 'Java'
        engine = mysql.connect(host="localhost",
                               user="******",
                               passwd="root",
                               db="Jobs",
                               charset='utf8')
        # sql = "SELECT `desc` FROM jobanalysis WHERE type = 'Python'"
        if language == 'All':
            sql = "SELECT `desc` FROM jobanalysis"
        else:
            sql = "SELECT `desc` FROM jobanalysis WHERE type = '{language}'".format(
                language=language)
        df = read_sql(sql, engine)
        desc = ' '.join(df['desc'].tolist())
        word_lst = jieba.cut(desc)
        words = [
            re.sub('\s', '', w).lower().capitalize() for w in word_lst
            if re.sub('\s', '', w) and len(w) > 2
            and re.findall(r'[0-9a-zA-Z]+', w)
        ]

        # words = words.remove('Python')
        ret = Counter(words)
        # for k, v in ret.most_common(50):
        #     print k.capitalize(), v
        dct = dict(ret.most_common(40))
        pop_lst = [
            '211', '985', 'Python', 'Java', 'And', 'Android', 'C++', 'Php',
            'Ios', 'Web', 'Api'
        ]
        for p in pop_lst:
            if p in dct:
                dct.pop(p)

        wordcloud = WordCloud(width=1300, height=620)
        rename = {
            'Mysql': 'MySQL',
            'Sql': 'SQL',
            'Mongodb': 'MongoDB',
            'Nosql': 'NoSQL',
            'Html': 'HTML',
            'Css': 'CSS',
            'Openstack': 'OpenStack'
        }
        keys = [rename.get(k, k) for k in dct.keys()]

        wordcloud.add("", keys, dct.values(), word_size_range=[25, 80])

        html = wordcloud.render_embed()
        data = {'data': html}
        return HttpResponse(json.dumps(data), content_type="application/json")

    elif request.method == 'GET':
        return render(request, 'backend/stack.html')
Exemplo n.º 21
0
    def CWordcloud(self):
        from pyecharts import WordCloud
        name = []
        value = []
        f = open("热评分词词频.txt", 'r')
        lines = f.readlines()
        i = 1
        j = 1
        for line in lines:
            li = line.strip().decode("utf-8")
            p = re.compile(ur'[^\u4e00-\u9fa5a-zA-z]')
            zh = "".join(p.split(li)).strip()
            zh = ",".join(zh.split())
            name.append(zh)
            if i == 10:
                break
            i += 1
        for line in lines:
            pattern1 = re.compile(r"(\d+)")
            v = re.findall(pattern1, line)
            li = int(v[0])
            value.append(li)
            if j == 10:
                break
            j += 1

        wordcloud = WordCloud(width=1300, height=620)
        wordcloud.add("",
                      name,
                      value,
                      shape='diamond',
                      word_size_range=[20, 100])
        wordcloud.render(r"/tmp/pycharm_project_120/Wangyiyun/result/" + song +
                         "热评词云.html")
def create_charts(data):
    # data字典格式(三个表的情况下):
    # {'charcloud':[str:表一的前描述,str:表一的后描述,数据1,数据2,...,数据n],'...':[...]}
    html = ''
    page = Page()
    style = Style(width=900, height=600)
    # 本页面包含:1:所有字的词云charcloud(两个数据chars,values)、
    # 表一:
    # 获取表一的数据
    html_before = data['charcloud'][0]
    html_after = data['charcloud'][1]
    chars = data['charcloud'][2]
    values = data['charcloud'][3]
    wordcloud = WordCloud("唐诗用字云图", **style.init_style)
    wordcloud.add("字云",
                  chars,
                  values,
                  word_size_range=[10, 100],
                  shape='pentagon')
    java_script = wordcloud.render_embed()
    html += html_before + java_script + html_after
    page.add(wordcloud)
    # 表二:
    html_before = data['chartop10'][0]
    html_after = data['chartop10'][1]
    chars = data['chartop10'][2]
    values = data['chartop10'][3]
    bar = Bar("唐诗高频十字", **style.init_style)
    bar.add("柱状图", chars, values)
    java_script = bar.render_embed()
    html += html_before + java_script + html_after
    page.add(bar)
    # 表三:
    html_before = data['frequency&times'][0]
    html_after = data['frequency&times'][1]
    keys = data['frequency&times'][2]
    values = data['frequency&times'][3]
    line = Line("唐诗字频-字数", **style.init_style)
    line.add("字频--字数",
             keys,
             values,
             is_smooth=True,
             is_fill=True,
             area_opacity=0.2,
             is_datazoom_show=True,
             datazoom_type="both",
             datazoom_range=[0, 60],
             xaxis_interval=1,
             yaxis_formatter="字",
             xaxis_name="频次",
             yaxis_name="字数",
             xaxis_name_pos="end",
             yaxis_name_pos="end",
             is_more_utils=True)
    java_script = line.render_embed()
    html += html_before + java_script + html_after
    page.add(line)
    # 最后
    script = page.get_js_dependencies()
    return html, script
Exemplo n.º 23
0
def show_word_cloud(document):
    # 需要清楚的标点符号
    left_words = ['.', ',', '?', '!', ';', ':', '\'', '(', ')']
    # 生成字典
    dic = corpora.Dictionary([document])
    # 计算得到每个单词的使用频率
    words_set = dic.doc2bow(document)

    # 生成单词列表和使用频率列表
    words, frequences = [], []
    for item in words_set:
        key = item[0]
        frequence = item[1]
        word = dic.get(key=key)
        if word not in left_words:
            words.append(word)
            frequences.append(frequence)
    # 使用pyecharts生成词云
    word_cloud = WordCloud(width=1000, height=620)
    word_cloud.add(name='Alice\'s word cloud',
                   attr=words,
                   value=frequences,
                   shape='circle',
                   word_size_range=[20, 100])
    word_cloud.render()
Exemplo n.º 24
0
 def get_word(name=NAME, value=VALUE):
     path = sys.path[1]
     maskimg = plt.imread(path + '/Image/brunomars.jpeg')
     wc = WordCloud(width=600, height=600, background_color='white')
     wc.use_theme('dark')
     wc.add("词云图", name, value, shape="circle", word_gap=20, word_size_range=None, rotate_step=45, mask=maskimg)
     return wc
Exemplo n.º 25
0
 def world_cloud2():
     """ 生成职位词云
     """
     counter = {}
     with open(os.path.join("data", "post_pre_counter.csv"),
               "r",
               encoding="utf-8") as f:
         f_csv = csv.reader(f)
         for row in f_csv:
             counter[row[0]] = counter.get(row[0], int(row[1]))
         # pprint(counter)
     x = [0] * 101
     y = [0] * 101
     i = 0
     # tuple(counter)
     for c in counter:
         x[i] = c
         y[i] = counter[c]
         i = i + 1
         if i > 100:
             break
     wordcloud = WordCloud(width=1300, height=620)
     wordcloud.add("", x, y, word_size_range=[30, 100], shape='diamond')
     wordcloud.render('wc.html')
     make_a_snapshot('wc.html', os.path.join("images", "wc2.png"))
Exemplo n.º 26
0
def word_cloud(item_name,item_name_list,item_num_list,word_size_range):

    wordcloud = WordCloud(width=1400,height= 900)
    
    wordcloud.add("", item_name_list, item_num_list,word_size_range=word_size_range,shape='pentagon')
    out_file_name = './analyse/'+item_name+'.html'
    wordcloud.render(out_file_name)
Exemplo n.º 27
0
 def Geo(self):
     wordcloud = WordCloud(width=900, height=400)
     wordcloud.add("wordcloud",brands, sales_num, word_size_range=[20, 80],shape='circle')
     wordcloud.render("wordcloud.html")
     bar = Bar("销量与品牌")
     bar.add("sales", brands, sales_num,xaxis_interval=0, xaxis_rotate=30, yaxis_rotate=30,bar_category_gap='35%')
     bar.render("sales and brands1.html")
def show_cloud(qianming_list, friend_name_list):
    '''词云图'''
    # 创建词云图对象
    wordcloud = WordCloud(width=1600, height=1000)
    # 添加数据,由于friend_name_list是字符窜类型,所以不显示。。。。
    wordcloud.add("", qianming_list, friend_name_list, word_size_range=[20, 100])
    wordcloud.render('./img/个签.html')
Exemplo n.º 29
0
    def run(self,
            num_pages=5,
            consider_flags=['n', 'nr', 'nrt'],
            include_heat=True,
            adjustment=None):

        # "n"是名词,“a”是形容词,“v”是动词,“d”是副词,“x”是非语素词
        # https://blog.csdn.net/suibianshen2012/article/details/53487157

        self.get_words_list(num_pages, include_heat)
        df_wl = self.create_words_table(adjustment)
        df_part = df_wl.loc[df_wl.flag.isin(consider_flags), :]
        df_part = df_part.loc[~df_part.words.isin(self.list_of_drop_words), :]

        print(df_part.shape[0])

        graph_title = "The hot words in douban/blabla within {} pages at {}".format(
            num_pages, time.asctime(time.localtime(time.time())))
        wordcloud_2 = WordCloud(title=graph_title,
                                title_pos='center',
                                width=1600,
                                height=800)
        wordcloud_2.add("",
                        list(df_part.words),
                        df_part.heat,
                        word_size_range=[10, 50],
                        shape='diamond')

        wordcloud_2.render("material/Hot_words_cloud.html")
def show_word_cloud(document):
    left_words = ['.', ',', '?', '!', ';', ':', '\'', '(', ')']
    # create dictionary
    dic = corpora.Dictionary([document])
    # calculate frequency
    words_set = dic.doc2bow(document)

    # create frequency list
    words, frequences = [], []
    for item in words_set:
        key = item[0]
        frequence = item[1]
        word = dic.get(key=key)
        if word not in left_words:
            words.append(word)
            frequences.append(frequence)

    # use pyecharts
    word_cloud = WordCloud(width=1000, height=620)
    word_cloud.add(name='Alice\'s word cloud',
                   attr=words,
                   value=frequences,
                   shape='circle',
                   word_sizez_range=[20, 100])
    word_cloud.render()
Exemplo n.º 31
0
 def wordcloud(self):
     cmtlist = [i[3] for i in self.data]
     for i in range(len(cmtlist)):
         cmtlist[i] = regex_change(cmtlist[i])  # 正则过滤
     txt = " ".join(cmtlist)
     ls = jieba.lcut(txt)
     count = {}
     for word in ls:
         count[word] = count.get(word, 0) + 1
     items = list(count.items())  #元组列表
     items.sort(key=lambda x: x[1], reverse=True)
     items = delete_stopwords(items)
     for i in range(10):
         word, count = items[i]
         print("{0:<10}{1:>5}".format(word, count))
     wordcloud = WordCloud(width=800, height=600)
     wordcloud.add("Top20个人说说词云", [items[i][0] for i in range(20)],
                   [items[i][1] for i in range(20)],
                   word_size_range=[30, 100])
     wordcloud.render(self.path + 'wordcloud_' + str(int(time.time())) +
                      '.html')
     webbrowser.open_new_tab(self.path + 'wordcloud_' +
                             str(int(time.time())) + '.html')
     self.ins.analysis_pb['value'] = 100
     self.ins.analysis_lb.configure(text='完毕')
Exemplo n.º 32
0
def stack(request):
    if request.method == 'POST':
        language = request.POST.get('language', '')
        if not language:
            return HttpResponse('error language')
        language = language.capitalize()
        # language = 'Java'
        engine = mysql.connect(host="localhost", user="******", passwd="root", db="Jobs", charset='utf8')
        # sql = "SELECT `desc` FROM jobanalysis WHERE type = 'Python'"
        if language == 'All':
            sql = "SELECT `desc` FROM jobanalysis"
        else:
            sql = "SELECT `desc` FROM jobanalysis WHERE type = '{language}'".format(language=language)
        df = read_sql(sql, engine)
        desc = ' '.join(df['desc'].tolist())
        word_lst = jieba.cut(desc)
        words = [re.sub('\s', '', w).lower().capitalize() for w in word_lst if re.sub('\s', '', w) and len(w) > 2 and
                 re.findall(r'[0-9a-zA-Z]+', w)]

        # words = words.remove('Python')
        ret = Counter(words)
        # for k, v in ret.most_common(50):
        #     print k.capitalize(), v
        dct = dict(ret.most_common(40))
        pop_lst = ['211', '985', 'Python', 'Java', 'And', 'Android', 'C++', 'Php', 'Ios', 'Web', 'Api']
        for p in pop_lst:
            if p in dct:
                dct.pop(p)

        wordcloud = WordCloud(width=1300, height=620)
        rename = {
            'Mysql': 'MySQL',
            'Sql': 'SQL',
            'Mongodb': 'MongoDB',
            'Nosql': 'NoSQL',
            'Html': 'HTML',
            'Css': 'CSS',
            'Openstack': 'OpenStack'
        }
        keys = [rename.get(k, k) for k in dct.keys()]

        wordcloud.add("", keys, dct.values(), word_size_range=[25, 80])

        html = wordcloud.render_embed()
        data = {
            'data': html
        }
        return HttpResponse(json.dumps(data), content_type="application/json")

    elif request.method == 'GET':
        return render(request, 'backend/stack.html')
Exemplo n.º 33
0
def test_wordcloud():

    # wordcloud_0
    name = ['Sam S Club', 'Macys', 'Amy Schumer', 'Jurassic World', 'Charter Communications',
            'Chick Fil A', 'Planet Fitness', 'Pitch Perfect', 'Express', 'Home', 'Johnny Depp',
            'Lena Dunham', 'Lewis Hamilton', 'KXAN', 'Mary Ellen Mark', 'Farrah Abraham',
            'Rita Ora', 'Serena Williams', 'NCAA baseball tournament', 'Point Break']
    value = [10000, 6181, 4386, 4055, 2467, 2244, 1898, 1484, 1112, 965, 847, 582, 555,
             550, 462, 366, 360, 282, 273, 265]

    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], rotate_step=66)
    wordcloud.show_config()
    wordcloud.render()

    # wordcloud_1
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], shape='diamond')
    wordcloud.show_config()
    wordcloud.render()
Exemplo n.º 34
0
def pythonWordCloud(x,y,label):
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", x, y, word_size_range=[20, 100],shape="triangle-forward")
    wordcloud.render()
    os.system(r"render.html")
Exemplo n.º 35
0
#词云图适合表现不同关键词的出现频率或重要性程度。
from pyecharts import WordCloud

words = ['python','jupyter','numpy','pandas','matplotlib','sklearn',
        'xgboost','lightGBM','simpy','keras','tensorflow',
         'hive','hadoop','spark']
counts = [100,90,65,95,50,60,70,70,20,70,80,80,60,60]

cloud = WordCloud(title = '数据算法常用工具',width = 600,height = 420)
cloud.add(name = 'utils',attr = words,value = counts,
          shape = "circle",word_size_range = (10,70))
cloud.render('result.词云图示范.html')
# !/usr/bin/env python
Exemplo n.º 37
0
def test_more():
    page = Page()

    # line
    line = Line("折线图示例")
    line.add(
        "最高气温",
        WEEK,
        [11, 11, 15, 13, 12, 13, 10],
        mark_point=["max", "min"],
        mark_line=["average"],
    )
    line.add(
        "最低气温",
        WEEK,
        [1, -2, 2, 5, 3, 2, 0],
        mark_point=["max", "min"],
        mark_line=["average"],
    )

    # pie
    v1 = [11, 12, 13, 10, 10, 10]
    pie = Pie("饼图-圆环图示例", title_pos="center")
    pie.add(
        "",
        CLOTHES,
        v1,
        radius=[40, 75],
        label_text_color=None,
        is_label_show=True,
        legend_orient="vertical",
        legend_pos="left",
    )

    page.add([line, pie])

    # kline
    v1 = [
        [2320.26, 2320.26, 2287.3, 2362.94],
        [2300, 2291.3, 2288.26, 2308.38],
        [2295.35, 2346.5, 2295.35, 2345.92],
        [2347.22, 2358.98, 2337.35, 2363.8],
        [2360.75, 2382.48, 2347.89, 2383.76],
        [2383.43, 2385.42, 2371.23, 2391.82],
        [2377.41, 2419.02, 2369.57, 2421.15],
        [2425.92, 2428.15, 2417.58, 2440.38],
        [2411, 2433.13, 2403.3, 2437.42],
        [2432.68, 2334.48, 2427.7, 2441.73],
        [2430.69, 2418.53, 2394.22, 2433.89],
        [2416.62, 2432.4, 2414.4, 2443.03],
        [2441.91, 2421.56, 2418.43, 2444.8],
        [2420.26, 2382.91, 2373.53, 2427.07],
        [2383.49, 2397.18, 2370.61, 2397.94],
        [2378.82, 2325.95, 2309.17, 2378.82],
        [2322.94, 2314.16, 2308.76, 2330.88],
        [2320.62, 2325.82, 2315.01, 2338.78],
        [2313.74, 2293.34, 2289.89, 2340.71],
        [2297.77, 2313.22, 2292.03, 2324.63],
        [2322.32, 2365.59, 2308.92, 2366.16],
        [2364.54, 2359.51, 2330.86, 2369.65],
        [2332.08, 2273.4, 2259.25, 2333.54],
        [2274.81, 2326.31, 2270.1, 2328.14],
        [2333.61, 2347.18, 2321.6, 2351.44],
        [2340.44, 2324.29, 2304.27, 2352.02],
        [2326.42, 2318.61, 2314.59, 2333.67],
        [2314.68, 2310.59, 2296.58, 2320.96],
        [2309.16, 2286.6, 2264.83, 2333.29],
        [2282.17, 2263.97, 2253.25, 2286.33],
        [2255.77, 2270.28, 2253.31, 2276.22],
    ]
    kline = Kline("K 线图示例")
    kline.add(
        "日K",
        ["2017/7/{}".format(i + 1) for i in range(31)],
        v1,
        is_datazoom_show=True,
    )
    page.add(kline)

    # radar
    schema = [
        ("销售", 6500),
        ("管理", 16000),
        ("信息技术", 30000),
        ("客服", 38000),
        ("研发", 52000),
        ("市场", 25000),
    ]
    v1 = [[4300, 10000, 28000, 35000, 50000, 19000]]
    v2 = [[5000, 14000, 28000, 31000, 42000, 21000]]
    radar = Radar("雷达图示例")
    radar.config(schema)
    radar.add("预算分配", v1, is_splitline=True, is_axisline_show=True)
    radar.add(
        "实际开销",
        v2,
        label_color=["#4e79a7"],
        is_area_show=False,
        legend_selectedmode="single",
    )
    page.add(radar)

    # scatter3d
    import random

    data = [
        [
            random.randint(0, 100),
            random.randint(0, 100),
            random.randint(0, 100),
        ]
        for _ in range(80)
    ]
    scatter3D = Scatter3D("3D 散点图示例", width=1200, height=600)
    scatter3D.add("", data, is_visualmap=True, visual_range_color=RANGE_COLOR)
    page.add(scatter3D)

    # wordcloud
    name = [
        "Sam S Club",
        "Macys",
        "Amy Schumer",
        "Jurassic World",
        "Charter Communications",
        "Chick Fil A",
        "Planet Fitness",
        "Pitch Perfect",
        "Express",
        "Home",
        "Johnny Depp",
        "Lena Dunham",
        "Lewis Hamilton",
        "KXAN",
        "Mary Ellen Mark",
        "Farrah Abraham",
        "Rita Ora",
        "Serena Williams",
        "NCAA baseball tournament",
        "Point Break",
    ]
    value = [
        10000,
        6181,
        4386,
        4055,
        2467,
        2244,
        1898,
        1484,
        1112,
        965,
        847,
        582,
        555,
        550,
        462,
        366,
        360,
        282,
        273,
        265,
    ]
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], rotate_step=66)
    page.add(wordcloud)

    # liquid
    liquid = Liquid("水球图示例")
    liquid.add("Liquid", [0.6])
    page.add(liquid)
    assert len(page) == 7
    assert isinstance(page[0], Line)
    assert (
        ("echarts" in page.js_dependencies)
        or ("echarts.min" in page.js_dependencies)
    )
    page.render()
Exemplo n.º 38
0
def test_wordcloud_shape_diamond():
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], shape='diamond')
    assert "diamond" in wordcloud._repr_html_()
Exemplo n.º 39
0
def test_wordcloud_default():
    wordcloud = WordCloud(width=1300, height=620)
    wordcloud.add("", name, value, word_size_range=[30, 100], rotate_step=66)
    assert "diamond" not in wordcloud._repr_html_()
Exemplo n.º 40
0
def draw_word_wc(name, count):
    wc = WordCloud(width=1300, height=620)
    wc.add("", name, count, word_size_range=[20, 100], shape='diamond')
    wc.render()