示例#1
0
文件: wc.py 项目: njr175/ERP_SCANR
def create_wc(words_in):
    """Create WordCloud object.

    Parameters
    ----------
    words_in : list of tuple
        Words to plot, with their corresponding frequencies.

    Returns
    -------
    wc : WordCloud() object
        Wordcloud definition.
    """

    # Create the WordCloud object
    wc = WordCloud(background_color=None,
                   mode='RGBA',
                   width=800,
                   height=400,
                   prefer_horizontal=1,
                   relative_scaling=0.5,
                   min_font_size=25,
                   max_font_size=80).generate_from_frequencies(words_in)

    # Change colour scheme to grey
    wc.recolor(color_func=_grey_color_func, random_state=3)

    return wc
    def get_tagcloud(self, tags, tag_limit=None):
        tag_limit = tag_limit or len(tags)
        tags = sorted(tags, key=lambda kv: -kv['count'])[:tag_limit]  # Get top X tags
        tag_dict = {t['tag_name']: t['count'] for t in tags}

        # Generate a word cloud image
        wordcloud = WordCloud(
            background_color='white',
            min_font_size=10,
            max_font_size=60,
            width=self.tagcloud_width,
            height=self.tagcloud_height or 30 * len(tags) / 2 + 10,
            font_path=os.path.sep.join([settings.STATIC_ROOT, 'fonts', 'OpenSans-Regular.ttf'])
        ).generate_from_frequencies(tag_dict)

        tag_counts = [t['count'] for t in tags]
        step = (float(max(tag_counts))) / len(self.color_selection)
        thresholds = list(reversed([int(round(i * step)) for i in range(len(self.color_selection))]))

        def get_color(word, font_size, position, orientation, random_state=None, **kwargs):
            index = next((i for i, t in enumerate(thresholds) if tag_dict[word] >= t), 0)
            return self.color_selection[index]

        wordcloud.recolor(color_func=get_color)
        image = wordcloud.to_image()
        filepath = self.get_write_to_path(ext="png")
        image.save(filepath)
        return encode_file_to_base64(filepath, "data:image/png;base64,")
def make_word_cloud(product, sentiment):
    if sentiment == "all":
        pos, neg = get_top_five_phrases(product,sentiment)

        pos.index = range(0,len(pos))
        neg.index = range(0,len(neg))

        pos_words_array = []
        neg_words_array = []
        for i in range(0,len(pos)):
            pos_words_array.append((pos["vocab"][i].upper(), float(pos["count"][i])))

        for i in range(0,len(neg)):
            neg_words_array.append((neg["vocab"][i].upper(), float(neg["count"][i])))

        wc = WordCloud(background_color="white", max_words=2000,
               max_font_size=300, random_state=42)

        # generate word cloud for positive
        positive_name = '../app/static/img/pos_wordcloud.png'
        wc.generate_from_frequencies(pos_words_array)
        wc.recolor(color_func=pos_color_func, random_state=3)
        wc.to_file(positive_name)

        # generate word cloud for negative
        negative_name = '../app/static/img/neg_wordcloud.png'
        wc.generate_from_frequencies(neg_words_array)
        wc.recolor(color_func=neg_color_func, random_state=3)
        wc.to_file(negative_name)

        return positive_name, negative_name
示例#4
0
def test_recolor_too_small():
    # check exception is raised when image is too small
    colouring = np.array(Image.new('RGB', size=(20, 20)))
    wc = WordCloud(width=30, height=30, random_state=0, min_font_size=1).generate(THIS)
    image_colors = ImageColorGenerator(colouring)
    with pytest.raises(ValueError, match='ImageColorGenerator is smaller than the canvas'):
        wc.recolor(color_func=image_colors)
def word_cloud(csv_file, stopwords_path, pic_path):
    pic_name = csv_file+"_词云图.png"
    path = os.path.abspath(os.curdir)
    csv_file = path+ "\\" + csv_file + ".csv"
    csv_file = csv_file.replace('\\', '\\\\')
    d = pd.read_csv(csv_file, engine='python', encoding='utf-8')
    content = []
    for i in d['content']:
        try:
            i = translate(i)
        except AttributeError as e:
            continue
        else:
            content.append(i)
    comment_after_split = jieba.cut(str(content), cut_all=False)
    wl_space_split = " ".join(comment_after_split)
    backgroud_Image = plt.imread(pic_path)
    stopwords = STOPWORDS.copy()
    with open(stopwords_path, 'r', encoding='utf-8') as f:
        for i in f.readlines():
            stopwords.add(i.strip('\n'))
        f.close()

    wc = WordCloud(width=1024, height=768, background_color='white',
                   mask=backgroud_Image, font_path="C:\simhei.ttf",
                   stopwords=stopwords, max_font_size=400,
                   random_state=50)
    wc.generate_from_text(wl_space_split)
    img_colors = ImageColorGenerator(backgroud_Image)
    wc.recolor(color_func=img_colors)
    plt.imshow(wc)
    plt.axis('off')  
    plt.show() 
    wc.to_file(pic_name)
示例#6
0
def wordCloud(text_array,name,keyword=""):
	new_text_arr=[]
	if keyword is not "":
		keyword=keyword.split(" ")[1]
	for text in text_array:
		if keyword in text:
			new_text_arr.append(text)

	text_array=new_text_arr

	cloud_text=""
	for text in text_array:
		cloud_text+=text+" "

	m_stopwords=['police','traffic','sir']

	for word in m_stopwords:
		STOPWORDS.add(word)

	image_mask = os.path.join(BASE_DIR, 'static/tool/img/nebula.png')
	coloring = imread(image_mask)
	
	wordcloud = WordCloud(stopwords=STOPWORDS,background_color="white",mask=coloring,ranks_only=True,max_words=50).generate(cloud_text)
	filename=os.path.join(BASE_DIR, 'static/tool/img/'+name+'.png')

	image_colors = ImageColorGenerator(coloring)
	wordcloud.recolor(color_func=image_colors)
	wordcloud.to_file(filename)
	data_uri = open(filename, 'rb').read().encode('base64').replace('\n', '')

	img_tag = '<img src="data:image/png;base64,{0}" style="height:400px;">'.format(data_uri)
	
	layout=wordcloud.layout_
	words_colours={}
	count=1
	for lo in layout:
		entry={}
		entry['word']=lo[0][0]
		color=lo[len(lo)-1]
		color=color[4:]
		color=color[:-1]
		color_split=color.split(',')
		color_num=[int(x) for x in color_split]
		color_hex='#%02x%02x%02x' % tuple(color_num)
		# print color_num
		entry['color']=color_hex
		words_colours[count]=entry
		count+=1

	# print words_colours
	list_html=""
	cap=51
	if cap>len(words_colours):
		cap=len(words_colours)

	for i in range(1,cap):
		list_html+='<li class="list-group-item" ><a class="cloud-key-'+name+'" href="#" style="color:'+words_colours[i]['color']+'">'
		list_html+="#"+str(i)+" "+words_colours[i]['word']+'</a></li>'

	return (img_tag,list_html)
def generateWordCloud(node, contribs, maskImg=None, wordsToShow=100, normalize=True, normMin=0, normMax=1):
    contrib = contribs[node]
    
    # Find side of largest magnitude
    # take k words from that side
    # normalize those words (may need abs val)
    # generate text from those normalized values
    
    # contrib should be sorted high to low
    maxVal = abs(contrib[0][1])
    minVal = abs(contrib[-1][1])
    
    #print (contrib[0][1], contrib[-1][1])
    #print ("max min = ", maxVal, minVal)
    
    newContrib = []
    if (maxVal > minVal): 
        # use front
        
        #newContrib = contrib[0:min(len(contrib), wordsToShow))]
        
        for i in range(min(len(contrib), wordsToShow)):
            newContrib.append(contrib[i])
    else:
        # use back
        for i in range(min(len(contrib), wordsToShow) - 1, -1, -1):
            newContrib.append(contrib[i])
        
        # if the most significant value was negative
        # swap the sign of all values in contrib
        for j in range(len(newContrib)):
            c = newContrib[j]
            newContrib[j] = (c[0], -1*c[1])
    
    #print ("new contrib = ", newContrib)
    
    if (normalize):
        contrib = normalizeContrib(newContrib, normMin, normMax)
    
    # for each value in normalized contrib
    # assign color value
    for c in contrib:
        word, val = c
        # add word to color map
        #wordColorMap[word] = int(round(255*(1-val)))
        wordColorMap[word] = int(round(200*(1-val)))
        
    
    # generate text
    text = generateText(contrib, min(len(contrib), wordsToShow))
    
    # gen word cloud
    #s = time.time()
    wc = WordCloud(background_color="white", max_words=2000, mask=maskImg)
    wc.generate(text)
    wc.recolor(color_func=gray_color_func)
    #e = time.time()
    #print ("word cloud only time = ", (e-s))
    return wc    
示例#8
0
def wd_cloud(request):
    base_path = path.dirname(__file__)
    font_path = path.join(base_path, 'static/fonts/simsun.ttc')
    text = [list_.list_name for list_ in SongList.objects.all()]
    # join函数的作用是将列表中的多个字符串拼接成一个长字符串
    text = ','.join(text)
    # 关于jieba的更多使用方法,可以参考原作者的github
    topK = 160
    tags = jieba.analyse.extract_tags(text, topK=topK, withWeight=True)
    text = ','.join([tag[0] for tag in tags])
    queryword = request.GET.get('queryword')
    # 使用objects.filter方法所得到的是一个QuerySet,而使用objects.get方法得到的是一个对象
    # 因此此处使用的是filter方法,此外,如果需要选择不满足条件的集合,就使用exclude方法
    res = SongList.objects.filter(list_name__contains=queryword)

    # 判断是否有歌曲重复出现在某几个歌单中
    links = ['http://music.163.com/api/playlist/detail?id=' + str(r.list_id) for r in res]
    id_lists = list()
    name_dict = dict()
    json_texts = [json.loads(requests.get(link).text) for link in links]
    # songs_appear_manytimes is a list of tuples with each tuple containing
    #  a song list's id, time it appears and its name
    songs_appear_manytimes = list()
    for i in range(len(json_texts)):
        t = json_texts[i]['result']['tracks']
        for j in range(len(t)):
            song_id = t[j]['id']
            song_name = t[j]['name']
            id_lists.append(song_id)
            name_dict[song_id] = song_name
    d = collections.Counter(id_lists)
    for k in d:
        if d[k] > 1:
            songs_appear_manytimes.append((name_dict[k], d[k], k))
            print(k)
            print(d[k])
            print(name_dict[k])

    json_response = dict()
    json_response['lists_contain_queryword'] = [(r.list_name, r.list_link) for r in res]
    json_response['songs_appaer_manytimes'] = songs_appear_manytimes
    pprint(json_response)
    region = (32, 107, 992, 661)
    mask = np.array(Image.open(path.join(base_path, "static/images/nike-logo.jpg")).crop(region).rotate(90))
    mulan_style = np.array(Image.open(path.join(base_path, "static/images/a.png")).rotate(90))
    color_style = ImageColorGenerator(mulan_style)
    wordcloud = WordCloud(font_path=font_path, mask=mask, background_color='white', max_words=400, width=400,
                          height=800, max_font_size=50, min_font_size=20, relative_scaling=.9, scale=2.0).generate(text)
    wordcloud.recolor(color_func=color_style)
    cloud_img_path = path.join(base_path, "static/images/cloud.png")
    wordcloud.to_file(cloud_img_path)

    return JsonResponse(json_response)
示例#9
0
def test_check_errors():
    wc = WordCloud()
    assert_raises(NotImplementedError, wc.to_html)

    try:
        np.array(wc)
        raise AssertionError("np.array(wc) didn'm raise")
    except ValueError as e:
        assert_true("call generate" in str(e))

    try:
        wc.recolor()
        raise AssertionError("wc.recolor didn'm raise")
    except ValueError as e:
        assert_true("call generate" in str(e))
示例#10
0
def test_check_errors():
    wc = WordCloud()
    with pytest.raises(NotImplementedError):
        wc.to_html()

    try:
        np.array(wc)
        raise AssertionError("np.array(wc) didn't raise")
    except ValueError as e:
        assert "call generate" in str(e)

    try:
        wc.recolor()
        raise AssertionError("wc.recolor didn't raise")
    except ValueError as e:
        assert "call generate" in str(e)
示例#11
0
def make_wordle_from_mallet(word_weights_file, 
                            num_topics, 
                            words,
                            TopicRanksFile,
                            outfolder,
                            font_path, 
                            dpi):
    """
    # Generate wordles from Mallet output, using the wordcloud module.
    """
    print("\nLaunched make_wordle_from_mallet.")
    for topic in range(0,num_topics):
        ## Gets the text for one topic.
        text = get_wordlewords(words, word_weights_file, topic)
        wordcloud = WordCloud(font_path=font_path, width=1600, height=1200, background_color="white", margin=4).generate(text)
        default_colors = wordcloud.to_array()
        rank = get_topicRank(topic, TopicRanksFile)
        figure_title = "topic "+ str(topic) + " ("+str(rank)+"/"+str(num_topics)+")"       
        plt.imshow(wordcloud.recolor(color_func=get_color_scale, random_state=3))
        plt.imshow(default_colors)
        plt.imshow(wordcloud)
        plt.title(figure_title, fontsize=30)
        plt.axis("off")
        
        ## Saving the image file.
        if not os.path.exists(outfolder):
            os.makedirs(outfolder)
        figure_filename = "wordle_tp"+"{:03d}".format(topic) + ".png"
        plt.savefig(outfolder + figure_filename, dpi=dpi)
        plt.close()
    print("Done.")
def generateWordCloud(node, contribs, wordsToShow=None, normalize=True, normMin=0, normMax=1):
    contrib = contribs[node]
    
    if (normalize):
        contrib = normalizeContrib(contrib, normMin, normMax)
    
    # generate text
    text = generateText(contrib, wordsToShow)
    
    # load mask
    d = path.dirname(__file__)
    circle_mask = imread(path.join(d, "black_circle_mask_whitebg.png"))
    
    # gen word cloud
    wc = WordCloud(background_color="white", max_words=2000, mask=circle_mask)
    wc.generate(text)

    # store to file
    wc.to_file(path.join(d, "node.png"))

    # show
    useColorFunc = False #normalize
    if (useColorFunc):
        plt.imshow(wc.recolor( color_func=pos_neg_color_func ))
    else:
        plt.imshow(wc)
        
    plt.axis("off")
    plt.show()
示例#13
0
def test_recolor():
    wc = WordCloud(max_words=50)
    wc.generate(THIS)
    array_before = wc.to_array()
    wc.recolor()
    array_after = wc.to_array()
    # check_list that the same places are filled
    assert_array_equal(array_before.sum(axis=-1) != 0,
                       array_after.sum(axis=-1) != 0)
    # check_list that they are not the same
    assert_greater(np.abs(array_before - array_after).sum(), 10000)

    # check_list that recoloring is deterministic
    wc.recolor(random_state=10)
    wc_again = wc.to_array()
    assert_array_equal(wc_again, wc.recolor(random_state=10))
def run_yt():
    yt = ds.acquire_youtube()
    ytimg = imread("ytlogo.png")
    wc = WordCloud(mask=ytimg)
    image_colors = ImageColorGenerator(ytimg)
    wc.generate(word_list_yt(ds.mean(yt[0]), ds.standard_deviation(yt[0])))
    
    plt.imshow(wc.recolor(color_func = image_colors))
    plt.axis("off")
    plt.savefig('popularWordsYT.png', bbox_inches = 'tight', dpi = 200)
    
    words, vidcount = ds.word_count_yt('title', ds.mean(yt[0]), ds.standard_deviation(yt[0]))
    data1 = []
    data2 = []
    labels = []
    count = 0
    for w in sorted(words, key=words.get, reverse=True):   
        labels.append(w)
        data1.append(1000 * words[w]/vidcount)
        count +=1
        if count == 10:
            break
    words, vidcount = ds.word_count_dailymotion( 0, 0 )
    for w in labels:
        data2.append(1000 * words[w]/vidcount)
    create_dualbargraph(data1, data2, labels, 'wordUseCompYT.png')
    Theta = da.yt_thetas()
    for x in xrange(len(Theta)):
        Theta[x] = Theta[x]/10000
    print Theta
    create_bargraph(Theta,('duration', 'date created', 'y-intercept'), 'barGraphYT.png')
示例#15
0
def run():
    f = open(u'words2.txt', 'r').read()
    words = list(jieba.cut(f))
    a = []
    for w in words:
        if len(w) > 1:
            a.append(w)
    text = r' '.join(a)
    
    bg = np.array(Image.open('bg.jpg'))
    wordcloud = WordCloud(
            background_color = 'white',
            #width = 1500,
            #height = 960,
            #margin = 10,
            mask = bg,
            font_path='C:/Windows/Fonts/simkai.ttf',
            ).generate(text)

    image_colors=ImageColorGenerator(bg)

    plt.imshow(wordcloud.recolor(color_func=image_colors))
    plt.axis('off')
    plt.show()
    wordcloud.to_file('words_result3.png')
    return
示例#16
0
def create_wordcloud(corpus, output, stopword_dict):
    lex_dic = build_lex_dic(corpus, stopword_dict=stopword_dict)
    total_words = get_total_words(lex_dic)
    ordered_freq_list = build_freq_list(lex_dic, total_words)

    fig = plt.figure(figsize=(10, 8), frameon=False)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    wordcloud = WordCloud(width=1000, height=800, max_words=100, background_color='white',
                          relative_scaling=0.7, random_state=15, prefer_horizontal=0.5).generate_from_frequencies(
        ordered_freq_list[0:100])
    wordcloud.recolor(random_state=42, color_func=my_color_func)

    ax.imshow(wordcloud)
    fig.savefig(output, facecolor='white')
示例#17
0
def draw_tag_cloud(users_tokens):
    from PIL import Image
    import matplotlib.pyplot as plt
    from wordcloud import WordCloud, ImageColorGenerator

    trump_coloring = np.array(Image.open("pics/trump.png"))

    freqs = get_full_frequencies(users_tokens)
    freq_pairs = freqs.items()
    wc = WordCloud(max_words=2000, mask=trump_coloring,
                   max_font_size=40, random_state=42)
    wc.generate_from_frequencies(freq_pairs)

    image_colors = ImageColorGenerator(trump_coloring)

    # plt.imshow(wc)
    # plt.axis("off")
    #
    # plt.figure()
    plt.imshow(wc.recolor(color_func=image_colors))
    # recolor wordcloud and show
    # we could also give color_func=image_colors directly in the constructor
    # plt.imshow(trump_coloring, cmap=plt.cm.gray)
    plt.axis("off")
    plt.show()
示例#18
0
def wcloud(wf, color, save_as=None):
    """Create a word cloud based on word frequencies,
    `wf`, using a color function from `wc_colors.py`

    Parameters
    ----------
    wf : list
        (token, value) tuples
    color : function
        from `wc_colors.py`
    save_as : str
        filename

    Returns
    -------
    None
    """
    wc = WordCloud(background_color=None, mode='RGBA',
                   width=2400, height=1600, relative_scaling=0.5,
                   font_path='/Library/Fonts/Futura.ttc')
    wc.generate_from_frequencies(wf)
    plt.figure()
    plt.imshow(wc.recolor(color_func=color, random_state=42))
    plt.axis("off")
    if save_as:
        plt.savefig(save_as, dpi=300, transparent=True)
示例#19
0
def make_wordcloud(user_name, sent_words, sentiment):
	text = ""
	for word in sent_words:
		for i in range(1,int(100*np.abs(word[1]))):
			text = text + " " + word[0]

	if text == "": 
		return
	wc = WordCloud(background_color="white",max_words=10, margin=10, random_state=1).generate(text)
	
	
	if sentiment == 'pos':
		wc.recolor(color_func=pos_color_func, random_state=3)
	else:
		wc.recolor(color_func=neg_color_func, random_state=3)
	wc.to_file(os.path.join('static/img/', sentiment+"_wordcloud_"+user_name+".png"))
def run_dm():
    dm = ds.acquire_dailymotion()
    dmimg = imread("dmlogo.png")
    # Read the whole text.
    wc = WordCloud(mask=dmimg)
    image_colors = ImageColorGenerator(dmimg)
    wc.generate(word_list_dailymotion(ds.mean(dm[0]), ds.standard_deviation(dm[0])))
    
    
    # Open a plot of the generated image.
    plt.imshow(wc.recolor(color_func=image_colors))
    plt.axis("off")
    plt.savefig('popularWordsDM.png', bbox_inches = 'tight', dpi = 200)
    words, vidcount = ds.word_count_dailymotion(ds.mean(dm[0]), ds.standard_deviation(dm[0]))
    data1 = []
    data2 = []
    labels = []
    count = 0
    for w in sorted(words, key=words.get, reverse=True):   
        labels.append(w)
        data1.append(1000 * words[w]/vidcount)
        count +=1
        if count == 10:
            break
    words, vidcount = ds.word_count_dailymotion( 0, 0 )
    for w in labels:
        data2.append(1000 * words[w]/vidcount)
    create_dualbargraph(data1, data2, labels, 'wordUseCompDM.png')
    create_bargraph(da.dm_thetas(),('fans','duration','date created', 'y-intercept'), 'barGraphDM.png')
示例#21
0
def cloudplot(person):

    person = re.sub(r'\+', ' ', person)

    text = GetTextRange(Emails, person)
    text = rmBoring(rmNonAlpha(text)).decode('ascii', 'ignore')

    plt.clf()

    d = path.dirname(path.abspath(__file__))

    hilcolor = np.array(Image.open(path.join(d, "static/img/hillarylogo.jpg")))

    wc = WordCloud(background_color="white", max_words=150, mask=hilcolor,
               stopwords=STOPWORDS.add("said"),
               max_font_size=80, random_state=42,
               relative_scaling = 0.5)


    wc.generate(text)
    image_colors = ImageColorGenerator(hilcolor)

    plt.imshow(wc.recolor(color_func=image_colors))
    plt.axis("off")

    fig = plt.gcf()
    img = StringIO.StringIO()
    fig.savefig(img)
    img.seek(0)

    return send_file(img, mimetype='image/png')
示例#22
0
def generate_word_cloud(text, mask_filename):
    d = path.dirname(__file__)  #??
    mask = imread(path.join(d, mask_filename))

    # adding movie script specific stopwords
    stopwords = STOPWORDS.copy()
    stopwords.add("info")
    stopwords.add("meetbot")
    stopwords.add("supybot")

    wc = WordCloud(max_words=1000, mask=mask, stopwords=stopwords, margin=10,
                random_state=1).generate(text)

    wc.recolor(color_func=grey_color_func, random_state=3)

    _, tmpfilename = tempfile.mkstemp('-wordcloud.png')
    wc.to_file(tmpfilename)
    return tmpfilename
    def generatewordcloud(freqTable, inputImageFileName, outputImageFileName):
        global stopwordshearing
        
        ImageFile.LOAD_TRUNCATED_IMAGES = True

        img = Image.open(inputImageFileName)
        img = img.resize((980,1080), Image.ANTIALIAS)
        sl = STOPWORDS | stopwordshearing
        speakerArray = np.array(img)
        wc = WordCloud(background_color="white", max_words=1000, mask=speakerArray, stopwords=sl,
                random_state=42)
        
        wc.generate_from_frequencies(freqTable)
        #print wc.words_
        # create coloring from image
        image_colors = ImageColorGenerator(speakerArray)
        wc.recolor(color_func=image_colors)
        wc.to_file(outputImageFileName)
def post_process():

    #with open('clda_data/out_prism', 'r') as fin:
    #    phi_prism = [np.array(ast.literal_eval(line.strip())) for line in fin]
    #phi_prism = np.array(phi_prism)

    #theta_pb = np.load('/tmp/peircebayes/avg_samples.npz')
    #theta_pb = np.load('/home/rares/Desktop/peircebayes_all_no_sampling/last_sample.npz')
    theta_pb = np.load('data/avg_samples.npz')
    phi = theta_pb['arr_1']
    print phi.shape

    vocab = pickle.load(open('data/vocab.pkl', 'r'))
    inv = dict((v, k) for k, v in vocab.iteritems())

    axis = 1
    index = list(np.ix_(*[np.arange(i) for i in phi.shape]))
    index[axis] = phi.argsort(axis)
    a = phi[index][:,-20:]
    counts = np.rint(a/np.sum(a, axis=1).reshape(-1,1)*1000).tolist()
    idx_l = index[axis][:,-20:].tolist()
    words = [[inv[i] for i in subl] for subl in idx_l]
    #pprint(words)

    index_prism = list(np.ix_(*[np.arange(i) for i in phi_prism.shape]))
    index_prism[axis] = phi_prism.argsort(axis)
    a_prism = phi_prism[index_prism][:,-20:]
    idx_l_prism = index_prism[axis][:,-20:].tolist()
    words_prism = [[inv[i] for i in subl] for subl in idx_l_prism]

    #pprint(words_prism)

    # topic 1
    freq1 = list(reversed(zip(words[0], list(a[0,:]))))
    # topic 2
    freq2 = list(reversed(zip(words[1], list(a[1,:]))))

    # topic 1
    #freq1_prism = list(reversed(zip(words_prism[19], list(a_prism[19,:]))))
    # topic 2
    #freq2_prism = list(reversed(zip(words_prism[18], list(a_prism[18,:]))))


    wc = WordCloud(background_color="white", width=400, height=400,
        random_state=1234).fit_words(freq1)

    plt.imshow(wc.recolor(color_func=grey_color_func, random_state=3))
    plt.axis("off")
    plt.savefig('data/topic_1.pdf', format='pdf')
    plt.close()

    plt.imshow(wc.fit_words(freq2).recolor(color_func=grey_color_func, random_state=3))
    plt.axis("off")
    plt.savefig('data/topic_2.pdf', format='pdf')
    plt.close()
示例#25
0
    def generatewordcloud(speaker, inputImageFileName, outputImageFileName):

        speakerData = data[data.Speaker == speaker]
        allText = ""
        for index, row in speakerData.iterrows():
        	allText += str(row['Text'])+" "
    
        print (allText)
        ImageFile.LOAD_TRUNCATED_IMAGES = True

        img = Image.open(inputImageFileName)
        img = img.resize((980,1080), Image.ANTIALIAS)

        speakerArray = np.array(img)
        wc = WordCloud(background_color="white", max_words=1000, mask=speakerArray, stopwords=STOPWORDS)
        wc.generate(allText)
        # create coloring from image
        image_colors = ImageColorGenerator(speakerArray)
        wc.recolor(color_func=image_colors)
        wc.to_file(outputImageFileName)
示例#26
0
def generate(text):
	resources = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources")
	masks = os.path.join(resources, "masks")
	fonts = os.path.join(resources, "fonts")

	mask = numpy.array(Image.open(os.path.join(masks, random.choice(os.listdir(masks)))))

	cloud = WordCloud(
		font_path=os.path.join(fonts, random.choice(os.listdir(fonts))),
		background_color="#1A1A1A",
		mask=mask,
		scale=2,
		max_words=None,
		relative_scaling=0.5,
		prefer_horizontal=1.0
	)

	cloud.generate(text)

	image_colors = ImageColorGenerator(mask)
	cloud.recolor(color_func=image_colors)

	cloud.to_file("cloud.png")
示例#27
0
文件: Wc.py 项目: NotSoSuper/NotSoBot
	def make_wc_custom(self, mask, text, max):
		try:
			coloring = np.array(PIL.Image.open(mask))
			wc = WordCloud(width=1024, height=768, max_words=max, mask=coloring)
			wc = wc.generate(' '.join(text))
			image_colors = ImageColorGenerator(coloring)
			wc = wc.recolor(color_func=image_colors)
			img = wc.to_image()
			b = BytesIO()
			img.save(b, 'png')
			b.seek(0)
			return b
		except Exception as e:
			return str(e)
def wordcloud(wordSource):
    #writes origional catagory list to text file
    d = os.path.dirname(__file__)
    file = open("catagory.txt", 'w')
    for item in wordSource:
        file.write("%s\n" % item)
    thefile = open(os.path.join(d, "catagory.txt")).read()

    #adds words to exclude list
    STOPWORDS.add("chronic")
    STOPWORDS.add("disease")
    STOPWORDS.add("obstructive")
    STOPWORDS.add("status")

    # generate word cloud
    wordcloud = WordCloud(stopwords=STOPWORDS,
        background_color="white",
        width = 650,
        height = 250).generate_from_text(thefile)

    #re-colers and saves wordcloud as png
    wordcloud.recolor(color_func=grey_color_func, random_state=3)
    wordcloud.to_file("wordcloud.png")
示例#29
0
def generateImage():
    if len(stdin_input) > 0:
        text = stdin_input[-1] # get the latest text
    else:
        text = ""

    wc = WordCloud(font_path='C:\Windows\Fonts\Verdana.ttf', width=750, height=400).generate(text)
    wc = wc.recolor(color_func=grey_color_func, random_state=3)
    img = wc.to_image()
    
    scale = 0.5 # % of the whole screen
    size = tuple(int(i * scale) for i in screensize)
    img = img.resize(size, Image.ANTIALIAS)
    return img
示例#30
0
    if word not in stopwords:
        new_text.append(word)

counter = Counter(new_text)
pprint.pprint(counter.most_common(50))

# 绘图
font_path = '/System/Library/fonts/PingFang.ttc'
mask = imread(MASKFILE)
img_color = ImageColorGenerator(mask)
other_stopwords = ['这是']

wordcloud = WordCloud(
    font_path=font_path,
    margin=2,  # 设置页面边缘
    mask=mask,
    scale=2,
    max_words=200,  # 最多词个数
    min_font_size=4,  # 最小字体大小
    random_state=42,
    background_color='white',  # 背景颜色
    max_font_size=150,  # 最大字体大小
)
wordcloud.generate_from_frequencies(counter)
wordcloud.recolor(color_func=img_color)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
#plt.show()
plt.savefig(SAVEIMGFILE)
示例#31
0
               stopwords = stopwords
               )
wc = wc.generate(text)#生成词云

save_path = path.join(pwd, "profile.png")
wc.to_file(save_path)

#生成一张黑色背景,词云为彩色的图片

#灰色到白色过度颜色随机
def gray_color_func(word, font_size, position, orientation, random_stata=None, **kwargs):
    #颜色HSL(色度、饱和度、亮度),亮度随机
    return 'hsl(0, 0%%, %d%%)' % random.randint(60, 100)

#展示图片
plt.imshow(wc)
plt.axis("off")

#又画一张图
plt.figure()
#灰色
wc_gray = wc.recolor(color_func = gray_color_func, random_state=None)
plt.imshow(wc_gray)
plt.axis("off")

#画一张图
plt.figure()
plt.imshow(alice_mask, cmap=plt.cm.gray)
plt.axis("off")
plt.show()
示例#32
0
def run_visualize():
    check = "\u2714"
    wrong = "\u2716"
    filepath = os.path.join(os.path.expanduser("~"), "Downloads/Image Analytics")
    workbook=xlsxwriter.Workbook(filepath+"/Image Analytics Data Visualization.xlsx")
    worksheet=workbook.add_worksheet("Dashboard")
    worksheet.hide_gridlines(2)

    merge_format_title = workbook.add_format({'bold': 1,'font_size':20,'font':"Georgia",'border': 10,'align': 'center','valign': 'vcenter',})
    merge_format = workbook.add_format({'bold': 1,'font_size':10,'font':"Georgia",'border': 1,'align': 'center','valign': 'vcenter',})
    
    worksheet.merge_range('C2:V5', 'Dashboard for Image Analytics', merge_format_title)

    worksheet.merge_range('C7:K8', 'Image and Tweets Statistics', merge_format)
    worksheet.merge_range('C26:K27', 'Label Analysis', merge_format)
    worksheet.merge_range('C53:K54', 'Safe Search Properties', merge_format)
    worksheet.merge_range('C72:K73', 'Word Cloud of Tweets', merge_format)
    worksheet.merge_range('C90:K91', 'Logo Analysis', merge_format)

    worksheet.merge_range('N7:V8', 'Statistics of Collected Images', merge_format)
    worksheet.merge_range('N26:V27', 'Web Search Properties', merge_format)
    worksheet.merge_range('N53:V54', 'Facial Expression', merge_format)
    worksheet.merge_range('N72:V73', 'Word Cloud of Image Text', merge_format)
    worksheet.merge_range('N90:V91', 'Landmark Analysis', merge_format)

    plt.style.use("seaborn")
    plt.rcParams["font.family"] = "Georgia"

    label = pd.read_excel(filepath+"/ImageDescription.xlsx",sheet_name="Label")
    web = pd.read_excel(filepath+"/ImageDescription.xlsx",sheet_name="Web Search")
    safe = pd.read_excel(filepath+"/ImageDescription.xlsx",sheet_name="Safe Search")
    face = pd.read_excel(filepath+"/ImageDescription.xlsx",sheet_name="Face")
    logo = pd.read_excel(filepath+"/ImageDescription.xlsx",sheet_name="Logo")
    landmark = pd.read_excel(filepath+"/ImageDescription.xlsx",sheet_name="Landmark")
    text_img = pd.read_excel(filepath+"/ImageDescription.xlsx",sheet_name="Text")

    plt.figure(figsize=(11,6))
    try:
        stats = pd.read_excel(filepath+"/ImageDescription.xlsx",sheet_name="Stats")
        x = []
        if "No of Tweets" in stats.columns:
            x = ("No of Tweets","Total Images","Unique Images")
            stats = pd.Series.from_array([int(stats["No of Tweets"].values),int(stats["Total Images"].values),int(stats["Unique Images"].values)])
        else:
            x = ("No of Links","Total Images","Unique Images")
            stats = pd.Series.from_array([int(stats["No of Links"].values),int(stats["Total Images"].values),int(stats["Unique Images"].values)])
        ax = stats.plot(kind="bar",fontsize=17,rot=0)
        ax.set_xticklabels(x)
        for bar in ax.patches:
            ax.annotate(str(bar.get_height()), (bar.get_x() + bar.get_width() / 2, bar.get_height()), ha='center', va='bottom', fontsize=14)
        plt.ylabel("No of occurrences",fontsize=17)
        plt.yticks(fontsize=14)
        plt.savefig(filepath+"/stats.png",bbox_inches='tight')
        worksheet.insert_image("C10",filepath+"/stats.png",{'x_scale': 0.63, 'y_scale': 0.62})
        wb.sheets[0].range('E27').value = check
    except xlrd.biffh.XLRDError:
        plt.plot([],[])
        plt.text(-0.06, 0,"In Local System Image Analytics Real Tweet Stats not available",fontdict={'family': 'Georgia','color':'red','weight': 'normal','size': 25})
        plt.axis("off")
        plt.savefig(filepath+"/stats.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("C10",filepath+"/stats.png",{'x_scale': 0.61, 'y_scale': 0.62})
        wb.sheets[0].range('E27').value = wrong
        pass

    plt.figure(figsize=(11,6))
    x = ("Label","Face","Logo","Landmark","Text","Web Search","Safe Search")
    stats_img = pd.Series.from_array([label["Label Names"].count(),face["File Name"].count(),logo["Logo Names"].count(),landmark["Landmark Names"].count(),text_img["Text"].count(),web["Web Search Properties"].count(),safe["File Name"].count()])
    ax = stats_img.plot(kind="bar",fontsize=14,rot=0)
    ax.set_xticklabels(x)
    for bar in ax.patches:
        ax.annotate(str(bar.get_height()), (bar.get_x() + bar.get_width() / 2, bar.get_height()), ha='center', va='bottom', fontsize=14)
    plt.ylabel("No of occurrences",fontsize=17)
    plt.yticks(fontsize=14)
    plt.savefig(filepath+"/stats_properties.png",bbox_inches='tight')
    worksheet.insert_image("N10",filepath+"/stats_properties.png",{'x_scale': 0.65, 'y_scale': 0.62})

    plt.figure(figsize=(11,6))
    ax = label["Label Names"].value_counts().nlargest(10).plot(kind="bar")
    plt.xticks(fontsize=16,rotation=65)
    for bar in ax.patches:
        ax.annotate(str(bar.get_height()), (bar.get_x() + bar.get_width() / 2, bar.get_height()), ha='center', va='bottom', fontsize=14)
    plt.yticks(fontsize=14)
    plt.ylabel("No of occurrences",fontsize=18)
    plt.savefig(filepath+"/label.png",bbox_inches='tight')
    worksheet.insert_image("C29",filepath+"/label.png",{'x_scale': 0.64, 'y_scale': 0.62})
    wb.sheets[0].range('F27').value = check

    plt.figure(figsize=(11,6))
    ax = web["Web Search Properties"].value_counts().nlargest(10).plot(kind="bar")
    plt.xticks(fontsize=16,rotation=65)
    for bar in ax.patches:
        ax.annotate(str(bar.get_height()), (bar.get_x() + bar.get_width() / 2, bar.get_height()), ha='center', va='bottom', fontsize=14)
    plt.yticks(fontsize=14)
    plt.ylabel("No of occurrences",fontsize=18)
    plt.savefig(filepath+"/websearch.png",bbox_inches='tight')
    worksheet.insert_image("N29",filepath+"/websearch.png",{'x_scale': 0.65, 'y_scale': 0.62})
    wb.sheets[0].range('G27').value = check

    sw= set(STOPWORDS)
    plt.figure(figsize=(13,6))
    try:
        content = pd.read_excel(filepath+"/ImageDescription.xlsx",sheet_name="Text Analytics")
        text = process_text("".join(content["CONTENT"]))
        tweet = WordCloud(font_path=fm.findfont("Georgia"),background_color="#EEEEEE",max_words=2000,normalize_plurals= True,stopwords=sw,
                                                  width=1500, height=750).generate(text=text)
        tweet.recolor(color_func=rgb_func)
        plt.imshow(tweet)
        plt.axis("off")
        plt.savefig(filepath+"/tweet.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("C75",filepath+"/tweet.png",{'x_scale': 0.61, 'y_scale': 0.58})
        wb.sheets[0].range('H27').value = check
    except xlrd.biffh.XLRDError:
        plt.plot([],[])
        plt.text(-0.05, 0,"In Local System Image Analytics Tweets not available",fontdict={'family': 'Georgia','color':'red','weight': 'normal','size': 25})
        plt.axis("off")
        plt.savefig(filepath+"/tweet.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("C75",filepath+"/tweet.png",{'x_scale': 0.56, 'y_scale': 0.59})
        wb.sheets[0].range('H27').value = wrong
        pass

    plt.figure(figsize=(13,6))
    if not text_img.empty:
        text = process_text("".join(text_img["Text"]))
        img_text = WordCloud(font_path=fm.findfont("Georgia"),background_color="#EEEEEE",max_words=2000,normalize_plurals= True,stopwords=sw,
                                                         width=1500, height=750).generate(text=text)
        img_text.recolor(color_func=rgb_func)
        plt.imshow(img_text)
        plt.axis("off")
        plt.savefig(filepath+"/img_text.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("N75",filepath+"/img_text.png",{'x_scale': 0.61, 'y_scale': 0.58})
        wb.sheets[0].range('I27').value = check
    else:
        plt.pie([],labels=[])
        plt.text(-0.35, 0,"Data Not Available",fontdict={'family': 'Georgia','color':'red','weight': 'normal','size': 25})
        plt.axis("off")
        plt.savefig(filepath+"/img_text.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("N75",filepath+"/img_text.png",{'x_scale': 0.58, 'y_scale': 0.59})
        wb.sheets[0].range('I27').value = wrong

    plt.figure(figsize=(11,6))
    plt.subplot(2,2,1)
    safe["Adult"].value_counts().plot(kind="pie",autopct='%1.1f%%',startangle=0,fontsize=14)
    pylab.ylabel('')
    plt.title("Adult",fontsize=15,fontweight="bold")
    plt.axis("equal")
    plt.subplot(2,2,2)
    safe["Medical"].value_counts().plot(kind="pie",autopct='%1.1f%%',startangle=0,fontsize=14)
    pylab.ylabel('')
    plt.title("Medical",fontsize=15,fontweight="bold")
    plt.axis("equal")
    plt.subplot(2,2,3)
    safe["Spoof"].value_counts().plot(kind="pie",autopct='%1.1f%%',startangle=0,fontsize=14)
    pylab.ylabel('')
    plt.title("Spoof",fontsize=15,fontweight="bold")
    plt.axis("equal")
    plt.subplot(2,2,4)
    safe["Violence"].value_counts().plot(kind="pie",autopct='%1.1f%%',startangle=0,fontsize=14)
    plt.title("Violence",fontsize=15,fontweight="bold")
    pylab.ylabel('')
    plt.axis("equal")
    plt.savefig(filepath+"/safe.png",bbox_inches='tight',facecolor="#EEEEEE")
    worksheet.insert_image("C56",filepath+"/safe.png",{'x_scale': 0.69, 'y_scale': 0.62})
    wb.sheets[0].range('J27').value = check

    plt.figure(figsize=(11,6))
    if not face.empty:
        plt.subplot(221)
        face["Anger"].value_counts().plot(kind="pie",autopct='%1.1f%%',startangle=0,fontsize=14)
        pylab.ylabel('')
        plt.title("Anger",fontsize=15,fontweight="bold")
        plt.axis("equal")
        plt.subplot(222)
        face["Joy"].value_counts().plot(kind="pie",autopct='%1.1f%%',startangle=0,fontsize=14)
        pylab.ylabel('')
        plt.title("Joy",fontsize=15,fontweight="bold")
        plt.axis("equal")
        plt.subplot(212)
        face["Surprise"].value_counts().plot(kind="pie",autopct='%1.1f%%',startangle=0,fontsize=14)
        pylab.ylabel('')
        plt.title("Surprise",fontsize=15,fontweight="bold")
        plt.axis("equal")
        plt.savefig(filepath+"/face.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("N56",filepath+"/face.png",{'x_scale': 0.69, 'y_scale': 0.61})
        wb.sheets[0].range('K27').value = check
    else:
        plt.pie([],labels=[])
        plt.text(-0.35, 0,"Data Not Available",fontdict={'family': 'Georgia','color':'red','weight': 'normal','size': 25})
        plt.savefig(filepath+"/face.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("N56",filepath+"/face.png",{'x_scale': 0.69, 'y_scale': 0.62})
        wb.sheets[0].range('K27').value = wrong
    
    plt.figure(figsize=(11,6))
    if not logo.empty:
        ax = logo["Logo Names"].value_counts().nlargest(10).plot(kind="bar")
        plt.xticks(fontsize=16,rotation=65)
        for bar in ax.patches:
            ax.annotate(str(bar.get_height()), (bar.get_x() + bar.get_width() / 2, bar.get_height()), ha='center', va='bottom', fontsize=14)
        plt.yticks(fontsize=14)
        plt.ylabel("No of occurrences",fontsize=18)
        plt.savefig(filepath+"/logo.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("C93",filepath+"/logo.png",{'x_scale': 0.64, 'y_scale': 0.63})
        wb.sheets[0].range('L27').value = check
    else:
        plt.plot([],[])
        plt.axis("off")
        plt.text(-0.022, 0,"Data Not Available",fontdict={'family': 'Georgia','color':'red','weight': 'normal','size': 25})
        plt.savefig(filepath+"/logo.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("C93",filepath+"/logo.png",{'x_scale': 0.65, 'y_scale': 0.64})
        wb.sheets[0].range('L27').value = wrong

    plt.figure(figsize=(11,6))
    if not landmark.empty:
        ax = landmark["Landmark Names"].value_counts().nlargest(10).plot(kind="bar")
        plt.xticks(fontsize=16,rotation=65)
        for bar in ax.patches:
            ax.annotate(str(bar.get_height()), (bar.get_x() + bar.get_width() / 2, bar.get_height()), ha='center', va='bottom', fontsize=14)
        plt.yticks(fontsize=14)
        plt.ylabel("No of occurrences",fontsize=18)
        plt.savefig(filepath+"/landmark.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("N93",filepath+"/landmark.png",{'x_scale': 0.64, 'y_scale': 0.63})
        wb.sheets[0].range('M27').value = check
    else:
        plt.plot([],[])
        plt.axis("off")
        plt.text(-0.022, 0,"Data Not Available",fontdict={'family': 'Georgia','color':'red','weight': 'normal','size': 25})
        plt.savefig(filepath+"/landmark.png",bbox_inches='tight',facecolor="#EEEEEE")
        worksheet.insert_image("N93",filepath+"/landmark.png",{'x_scale': 0.65, 'y_scale': 0.64})
        wb.sheets[0].range('M27').value = wrong

    workbook.close()

    try:
        os.remove(filepath+"/stats.png")
        os.remove(filepath+"/stats_properties.png")
        os.remove(filepath+"/label.png")
        os.remove(filepath+"/websearch.png")
        os.remove(filepath+"/tweet.png")
        os.remove(filepath+"/img_text.png")
        os.remove(filepath+"/safe.png")
        os.remove(filepath+"/face.png")
        os.remove(filepath+"/logo.png")
        os.remove(filepath+"/landmark.png")
    except OSError:
        pass

from PIL import Image
from nltk.corpus import stopwords
stop_words=set(stopwords.words())
clean_mask = np.array(Image.open("../input/images/safe-zone.png"))
clean_mask = clean_mask[:,:,1]
#wordcloud for clean comments
subset = train_data[train_data.clean==1]
text = subset.comment_text.values
wc = WordCloud(background_color='black',max_words=2000,mask=clean_mask,stopwords=stop_words)
wc.generate(" ".join(text))
plt.figure(figsize=(20,10))
plt.axis('off')
plt.title('Words frequent in clean comments',fontsize=20)
plt.imshow(wc.recolor(colormap = 'viridis',random_state=17), alpha=0.98)
plt.show()


# In[17]:


from PIL import Image
from nltk.corpus import stopwords
stop_words=set(stopwords.words())
clean_mask = np.array(Image.open("../input/images/nuclear.png"))
clean_mask = clean_mask[:,:,1]
#wordcloud for clean comments
subset = train_data[train_data.toxic==1]
text = subset.comment_text.values
wc = WordCloud(background_color='black',max_words=2000,mask=clean_mask,stopwords=stop_words)
示例#34
0
from wordcloud import WordCloud, ImageColorGenerator
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import jieba

# 打开文本
text = open('xyj.txt').read()

# 中文分词
text = ' '.join(jieba.cut(text))
print(text[:100])

# 生成对象
mask = np.array(Image.open("color_mask.png"))
wc = WordCloud(mask=mask,
               font_path='Hiragino.ttf',
               mode='RGBA',
               background_color=None).generate(text)

# 从图片中生成颜色
image_colors = ImageColorGenerator(mask)
wc.recolor(color_func=image_colors)

# 显示词云
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()

# 保存到文件
wc.to_file('wordcloud4.png')
    margin=2,  #边缘
    ranks_only=None,
    prefer_horizontal=0.9,
    mask=background_Image,  #背景图形,如果想根据图片绘制,则需要设置
    color_func=None,
    max_words=200,  #显示最多的词汇量
    stopwords=None,  #停止词设置,修正词云图时需要设置
    random_state=None,
    background_color='#ffffff',  #背景颜色设置,可以为具体颜色,比如:white或者16进制数值。
    font_step=1,
    mode='RGB',
    regexp=None,
    collocations=True,
    normalize_plurals=True,
    contour_width=0,
    colormap='viridis',  #matplotlib色图,可以更改名称进而更改整体风格
    contour_color='Blues',
    repeat=False,
    scale=2,
    min_font_size=10,
    max_font_size=200)

wc.generate_from_text(text)
wc.recolor(color_func=img_colors)

# 显示并存储图像
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
plt.tight_layout()
wc.to_file('sanguo.png')
plt.show()
async def _(event):
    if not event.reply_to_msg_id:
        await event.edit("`Mohon Balas Ke Media Apapun Petercord`")
        return
    reply_message = await event.get_reply_message()
    if not reply_message.media:
        await event.edit("`Mohon Balas Ke Gambar/Sticker/Video Petercord`")
        return
    await event.edit("`Mendownload Media.....`")
    if reply_message.photo:
        await bot.download_media(
            reply_message,
            "wc.png",
        )
    elif (DocumentAttributeFilename(file_name="AnimatedSticker.tgs")
          in reply_message.media.document.attributes):
        await bot.download_media(
            reply_message,
            "wc.tgs",
        )
        os.system("lottie_convert.py wc.tgs wc.png")
    elif reply_message.video:
        video = await bot.download_media(
            reply_message,
            "wc.mp4",
        )
        extractMetadata(createParser(video))
        os.system("ffmpeg -i wc.mp4 -vframes 1 -an -s 480x360 -ss 1 wc.png")
    else:
        await bot.download_media(
            reply_message,
            "wc.png",
        )
    try:
        await event.edit("`Sedang Memproses....`")
        text = open("userbot/utils/styles/alice.txt", encoding="utf-8").read()
        image_color = np.array(Image.open("wc.png"))
        image_color = image_color[::1, ::1]
        image_mask = image_color.copy()
        image_mask[image_mask.sum(axis=2) == 0] = 255
        edges = np.mean(
            [
                gaussian_gradient_magnitude(image_color[:, :, i] / 255.0, 2)
                for i in range(3)
            ],
            axis=0,
        )
        image_mask[edges > 0.08] = 255
        wc = WordCloud(
            max_words=2000,
            mask=image_mask,
            max_font_size=40,
            random_state=42,
            relative_scaling=0,
        )
        wc.generate(text)
        image_colors = ImageColorGenerator(image_color)
        wc.recolor(color_func=image_colors)
        wc.to_file("wc.png")
        await event.client.send_file(
            event.chat_id,
            "wc.png",
            reply_to=event.reply_to_msg_id,
        )
        await event.delete()
        os.system("rm *.png *.mp4 *.tgs *.webp")
    except BaseException as e:
        os.system("rm *.png *.mp4 *.tgs *.webp")
        return await event.edit(str(e))
hashtags_column = df["place_name"].values  # text column
all_hashtags = join_data(hashtags_column)

stopwords = [];
# https://www.flaticon.com/free-icon/world-map_290185#term=world%20map&page=3&position=19
image_name = "earth-globe.png"
mask = np.array(Image.open(image_name))
wordcloud = WordCloud(stopwords=stopwords,
                          background_color="white",
                          mode="RGBA",
                          max_words=1000, mask=mask).generate(",".join(all_hashtags))

# create coloring from image
image_colors = ImageColorGenerator(mask)
plt.figure(figsize=[7, 7])
plt.imshow(wordcloud.recolor(color_func=image_colors), interpolation="bilinear")
plt.axis("off")
# store to file
plt.savefig("wordcloud.png", format="png")
 
df_sentiment = df.drop(['created_at', 'text', 'place_type', "hashtags", "place_type", "place_fullname",
                        "place_country_code", "place_country"], axis=1)
df_sentiment = df_sentiment.dropna(subset=['place_name'])
sort_by_city = df_sentiment.sort_values("place_name")
print(sort_by_city.head(n=100))

# group by city and find mean
sum_data = df_sentiment.groupby(['place_name'])['polarity', 'subjectivity'].mean().reset_index()
print(sum_data)
fig = plt.figure()
ax = fig.add_subplot(111)
示例#38
0
    bfreq = nltk.FreqDist(bwords)
    #for word, freq in bfreq.most_common(100):
    #    print('%s: %g' % (word, freq))

    awords = nltk.word_tokenize(atext)
    awords = [word for word in awords if word not in stopwords and word not in
            commonwords and len(word) > 2 and "'" not in word]
    afreq = nltk.FreqDist(awords)
    #for word, freq in afreq.most_common(100):
    #    print('%s: %g' % (word, freq))

    cloud = np.array(Image.open('cloud.jpg'))
    bwc = WordCloud(background_color='white', stopwords = stopwords, width
            = 500, height = 500, mask = cloud, max_words = 1000).generate(btext)
    awc = WordCloud(background_color='white', stopwords = stopwords, width
            = 500, height = 500, mask = cloud, max_words = 1000).generate(atext)

    plt.imshow(bwc.recolor(colormap='viridis'), interpolation = 'bilinear')
    plt.title('Brian')
    ax = plt.gca()
    ax.axis('off')
    plt.tight_layout()
    plt.show()

    plt.imshow(awc.recolor(colormap='winter'), interpolation = 'bilinear')
    plt.title('Annie')
    ax = plt.gca()
    ax.axis('off')
    plt.tight_layout()
    plt.show()
示例#39
0
def test_recolor_too_small_set_default():
    # check no exception is raised when default colour is used
    colouring = np.array(Image.new('RGB', size=(20, 20)))
    wc = WordCloud(max_words=50, width=30, height=30, min_font_size=1).generate(THIS)
    image_colors = ImageColorGenerator(colouring, default_color=(0, 0, 0))
    wc.recolor(color_func=image_colors)
示例#40
0
tokenized_earl = Tokenizer(inputCol="text", outputCol="words")
tWords_earl = tokenized_earl.transform(earl)
SWremover_earl = StopWordsRemover(inputCol="words", outputCol="filtered")
SWremoved_earl = SWremover_earl.transform(tWords_earl).select("filtered")
FWords_earl = SWremoved_earl.rdd.flatMap(once)
WCount_earl = FWords_earl.reduceByKey(operator.add)
FreqWords_earl = WCount_earl.sortBy(lambda t: t[1], ascending = False).take(400)
FreqWordDict_earl = dict(FreqWords_earl)

mask_earl = np.array(Image.open("visualization/likesimba.png"))
wordcloud_earl = WordCloud(width =1600,height=800, background_color="white", max_words=1000, mask=mask_earl).generate_from_frequencies(FreqWordDict_earl)
image_colors_earl = ImageColorGenerator(mask_earl)

title_earl = 'Most frequent words from tips review for Earl of Sandwich'
plt.figure(figsize=[20,10],facecolor='k')
plt.imshow(wordcloud_earl.recolor(color_func=image_colors_earl),interpolation="bilinear")
plt.title(title_earl, size=25, y=1.01)
plt.axis("off")
plt.savefig("visualization/earl.png", format="png")

#WordCloud for restaurants in Ontario --> location based

df_yelp_tip.registerTempTable("df_yelp_tip")

ontario = spark.sql("""SELECT * FROM df_yelp_tip where  state == 'ON' """)
ontario = ontario.select("text")
tokenized_ontario = Tokenizer(inputCol="text", outputCol="words")
tWords_ontario = tokenized_earl.transform(ontario)
SWremover_ontario = StopWordsRemover(inputCol="words", outputCol="filtered")
SWremoved_ontario = SWremover_ontario.transform(tWords_ontario).select("filtered")
FWords_ontario = SWremoved_ontario.rdd.flatMap(once)
# WordCloud(font_path=None, width=400, height=200, margin=2, ranks_only=None,
# prefer_horizontal=0.9, mask=None, scale=1, color_func=None, max_words=200,
# min_font_size=4, stopwords=None, random_state=None, background_color='black',
# max_font_size=None, font_step=1, mode='RGB', relative_scaling='auto',
# regexp=None, collocations=True, colormap=None, normalize_plurals=True,
# contour_width=0, contour_color='black', repeat=False, include_numbers=False,
# min_word_length=0)
# max_font_size, max_words, and background_color are the primary arguments used
# to manipulate the wordcloud.
# contour_width and contour_color are used to create an outline to the cloud.
# background_color=None and mode='RGBA' at the same time makes the background
# transparent.
# stopwords=None does not mean stopwords will not be removed. It actually means
# that the default in-built stopwords list will be used. To keep stopwords in
# the wordcloud, pass an empty list.
cloud = WordCloud(background_color='white', max_words=200,
                  mask=mask, stopwords=stops, mode='RGB')
cloud.generate(full_text)
fig, ax = plt.subplots(figsize=(12.5, 7.5))
# Recoloring the wordcloud is done in this step. If a default wordcloud is
# desired, pass cloud without recolor.
# ax.imshow(cloud, interpolation='bilinear')
ax.imshow(cloud.recolor(color_func=image_colors), interpolation='bilinear')
ax.axis('off')
plt.show()
plt.close()

# Demonstrates exporting the wordcloud to a file.
export_path = r'.\images\wordcloud.jpg'
cloud.to_file(export_path)
示例#42
0
w = int(input("Width: "))
h = int(input("Height: "))

wordcloud = WordCloud(width=w,
                      height=h,
                      background_color='black',
                      min_font_size=5,
                      stopwords=stopwords,
                      random_state=42).generate(finalText)


# for grey scale
def grey_color_func(word,
                    font_size,
                    position,
                    orientation,
                    random_state=None,
                    **kwargs):
    return "hsl(0, 0%%, %d%%)" % random.randint(60, 100)


# plot the wordcloud image
plt.figure(figsize=(8, 2), facecolor=None)
plt.imshow(wordcloud)
plt.imshow(wordcloud.recolor(color_func=grey_color_func, random_state=3),
           interpolation="bilinear")
plt.axis("off")
plt.tight_layout(pad=0)
plt.savefig("wordcloud.png")
plt.show()
示例#43
0
wc = WordCloud(
    background_color="white",  #背景颜色
    font_path=font_path,  #字体选择
    max_words=1000,  #最大词数
    mask=back_coloring,  #背景图片
    max_font_size=100,  #最大字体大小
    width=1000,
    height=860,
    margin=2)

wc.fit_words(dict(over_list[20:]))

plt.figure()
#显示图片
plt.imshow(wc)
plt.axis("off")
plt.show()
#保存图片
wc.to_file(
    path.join(path.dirname(__file__), "C:/Users/TTT/Desktop/杂/wordcloud1.png"))
#改变颜色
image_color = ImageColorGenerator(back_coloring)  #从背景图片生成颜色值
plt.imshow(wc.recolor(color_func=image_color))  #使用新的颜色值布局着色
plt.axis('off')  #关闭坐标轴
#绘制背景颜色的词云
plt.figure()
plt.imshow(back_coloring, cmap=plt.cm.gray)
plt.axis('off')
plt.show()
wc.to_file(
    path.join(path.dirname(__file__), "C:/Users/TTT/Desktop/杂/wordcloud2.png"))
示例#44
0
    for feature in row['features']:
        text = " ".join([text, "_".join(feature.strip().split(" "))])
    text_da = " ".join(
        [text_da, "_".join(row['display_address'].strip().split(" "))])
    #text_desc = " ".join([text_desc, row['description']])
text = text.strip()
text_da = text_da.strip()
text_desc = text_desc.strip()

plt.figure(figsize=(12, 6))
wordcloud = WordCloud(background_color='white',
                      width=600,
                      height=300,
                      max_font_size=50,
                      max_words=40).generate(text)
wordcloud.recolor(random_state=0)
plt.imshow(wordcloud)
plt.title("Wordcloud for features", fontsize=30)
plt.axis("off")
plt.show()

# wordcloud for display address
plt.figure(figsize=(12, 6))
wordcloud = WordCloud(background_color='white',
                      width=600,
                      height=300,
                      max_font_size=50,
                      max_words=40).generate(text_da)
wordcloud.recolor(random_state=0)
plt.imshow(wordcloud)
plt.title("Wordcloud for Display Address", fontsize=30)
示例#45
0
plt.figure(figsize=(10, 10))
plt.axis('off')
plt.pause(2)
count = 0
for words in text.split(" "):
    txt += words
    if count > 100:
        wc.generate(txt)
        count=0
        plt.imshow(wc)
        plt.pause(0.1)
    else:
        count = count + 1
plt.show()

"""
# create coloring from image
image_colors = ImageColorGenerator(parrot_color)
wc.recolor(color_func=image_colors)
plt.figure(figsize=(10, 10))
plt.imshow(wc, interpolation="bilinear")
wc.to_file("parrot_new.png")

plt.figure(figsize=(10, 10))
plt.title("Original Image")
plt.imshow(parrot_color)

plt.figure(figsize=(10, 10))
plt.title("Edge map")
plt.imshow(edges)
plt.show()
示例#46
0
def TXT2WC():

    # 获取当前文件路径
    # __file__ 为当前文件, 在ide中运行此行会报错,可改为
    # d = path.dirname('.')
    d = path.dirname(__file__)
    global stopwords_path
    # stopwords = {}

    isCN = 1  #默认启用中文分词
    pic_name = var1.get()
    file_name = var2.get()
    new_name = var3.get()
    back_col = var4.get()
    back_coloring_path = "file/{}".format(pic_name)  # 设置背景图片路径
    text_path = 'file/{}'.format(file_name)  #设置要分析的文本路径
    font_path = 'file/msyhbd.ttf'  # 为matplotlib设置中文字体路径没
    stopwords_path = 'file/stopwords1893.txt'  # 停用词词表
    imgname1 = "{}-Str.jpg".format(new_name)
    # imgname1 = "云图BStr{}.png".format(random.randint(1,30)) # 保存的图片名字1(只按照背景图片形状)
    imgname2 = imgname1.replace("Str", "Col")  # 保存的图片名字2(颜色按照背景图片颜色布局生成)

    my_words_list = ['路明非']  # 在结巴的词库中添加新词
    back_coloring = imread(path.join(d, back_coloring_path))  # 设置背景图片

    # 设置词云属性
    wc = WordCloud(
        font_path=font_path,  # 设置字体
        background_color="white",  # 背景颜色
        max_words=400,  # 词云显示的最大词数
        mask=back_coloring,  # 设置背景图片
        max_font_size=350,  # 字体最大值
        random_state=42,
        scale=10,
        width=1366,
        height=768,
        margin=2,  # 设置图片默认的大小,但是如果使用背景图片的话,那么保存的图片大小将会按照其大小保存,margin为词语边缘距离
    )
    add_word(my_words_list)
    text = open(path.join(d, text_path), 'r', encoding='utf-8').read()

    if isCN:
        text = jiebaclearText(text)

    # 生成词云, 可以用generate输入全部文本(wordcloud对中文分词支持不好,建议启用中文分词),也可以我们计算好词频后使用generate_from_frequencies函数
    wc.generate(text)
    # wc.generate_from_frequencies(txt_freq)
    # txt_freq例子为[('词a', 100),('词b', 90),('词c', 80)]
    # 从背景图片生成颜色值
    # 保存的图片名字1(只按照背景图片形状)
    image_colors = ImageColorGenerator(back_coloring)
    wc.to_file(path.join(d, "img/{}".format(imgname1)))
    plt.figure()
    # 以下代码显示图片
    plt.imshow(wc)
    plt.axis("off")
    plt.show()
    # 绘制词云

    if back_col == 'n':
        pass
    else:
        # 保存的图片名字2(颜色按照背景图片颜色布局生成)
        image_colors = ImageColorGenerator(back_coloring)
        plt.imshow(wc.recolor(color_func=image_colors))
        # 保存图片
        wc.to_file(path.join(d, "img/{}".format(imgname2)))
        plt.axis("off")
        # 绘制背景图片为颜色的图片
        # plt.figure()
        # plt.imshow(back_coloring, cmap=plt.cm.gray)
        # plt.axis("off")
        plt.show()
示例#47
0
def main():
    """ Fonction principale, trouve les mots clés et genere le nuage de mot affiche avec matplotlib """
    global listeFichier
    if len(listeFichier) > 0:
        l1 = 9
        if langue.get() == 'anglais': l1 = 7
        print(listeFichier)
        if methode.get() == 'Intervalle':
            ListeMotCle = methodeIntervalle(l1)
        else:  # Méthode intervalle
            ListeMotCle = methodePageParPage(l1)
        SuppTemp()  # Suppression fichier temporaire
        ListeMotCle = tirets(ListeMotCle)
        ListeMotCleAffichage = remake(ListeMotCle)
        if export.get() != 0:
            AddFichierCVS(ListeMotCleAffichage)
        AffichageMotCle(ListeMotCleAffichage)
        #Creation chaine des mots (avec repetitions)
        text = ""
        for mot in ListeMotCle:
            text = text + " " + mot
        stopwords = set(STOPWORDS)
        stopwords.add("said")
        alice_coloring = np.array(Image.open(path.join(d, "fond.png")))
        wc = WordCloud(background_color="white",
                       max_words=2000,
                       mask=alice_coloring,
                       stopwords=stopwords,
                       max_font_size=400,
                       random_state=42)
        # generate word cloud
        wc.generate(text)

        # create coloring from image
        image_colors = ImageColorGenerator(alice_coloring)

        # show
        plt.imshow(wc, interpolation="bilinear")
        plt.axis("off")
        plt.figure()
        # recolor wordcloud and show
        # we could also give color_func=image_colors directly in the constructor
        plt.imshow(wc.recolor(color_func=image_colors),
                   interpolation="bilinear")
        plt.axis("off")
        #plt.figure()
        plt.show()
        """cloud_mask = np.array(Image.open(path.join(dirName,"fond.png")))
        stopwords = set(STOPWORDS)
        for mot in ListeMotCle:
            stopwords.add(mot)
        wordcloud = WordCloud(background_color="white", max_words=2000, mask=cloud_mask,
        stopwords=stopwords)
        # génération du nuage de mots
        wordcloud = WordCloud().generate(text)
        # lower max_font_size
        wordcloud = WordCloud(max_font_size=40).generate(text)
        plt.figure()
        plt.imshow(wordcloud, interpolation="bilinear")
        plt.axis("off")
        plt.show()"""
        MAJAffFic()
示例#48
0
            pass
BigBag = BagOfWords + BagOfHashes
stop_words = set(stopwords.words('english'))
rawWord = [w for w in BigBag if w.lower() not in stop_words]
IgnoreThese = ["yr-old", "here's", "year-old", "thi", "let's"]
rawWord = [w for w in rawWord if w.lower() not in IgnoreThese]
text = ' '.join(rawWord)

import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
##install wordcloud
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

#%%
mask = np.array(Image.open("MD.png"))
# random_state=7
wordcloud = WordCloud(background_color="white",
                      mask=mask,
                      collocations=False,
                      stopwords=STOPWORDS,
                      max_font_size=65).generate_from_text(text)
image_colors = ImageColorGenerator(mask)
# Open a plot of the generated image.
plt.figure(figsize=(12, 24))
image = wordcloud.recolor(color_func=image_colors)
plt.imshow(image, interpolation="bilinear")
plt.title('WordCloud - Python')
plt.axis("off")
plt.show()
plt.imsave('WordCloud.png', image)
示例#49
0
# movie script of "a new hope"
# http://www.imsdb.com/scripts/Star-Wars-A-New-Hope.html
# May the lawyers deem this fair use.
text = open("a_new_hope.txt").read()

# preprocessing the text a little bit
text = text.replace("HAN", "Han")
text = text.replace("LUKE'S", "Luke")

# adding movie script specific stopwords
stopwords = STOPWORDS.copy()
stopwords.add("int")
stopwords.add("ext")

wc = WordCloud(max_words=1000,
               mask=mask,
               stopwords=stopwords,
               margin=10,
               random_state=1).generate(text)
# store default colored image
default_colors = wc.to_array()
plt.title("Custom colors")
plt.imshow(wc.recolor(color_func=grey_color_func, random_state=3))
wc.to_file("a_new_hope.png")
plt.axis("off")
plt.figure()
plt.title("Default colors")
plt.imshow(default_colors)
plt.axis("off")
plt.show()
示例#50
0
    data = myfile.read()

    final = rake_words(data)
    final = str(final)
    print(final)

    alice_coloring = np.array(Image.open("maxresdefault_burned.png"))
    stopwords = set(STOPWORDS)
    stopwords.add("said")

    wc = WordCloud(background_color="white", max_words=2000, mask=alice_coloring,
               stopwords=stopwords, max_font_size=40, random_state=42)
    # generate word cloud
    wc.generate(final)

    wc.to_file("C:\\Users\\sidharth.m\\Desktop\\srk_tweetcolor.jpg")
    # create coloring from image
    image_colors = ImageColorGenerator(alice_coloring)

    # show
    fig, axes = plt.subplots(1, 3)
    axes[0].imshow(wc, interpolation="bilinear")
    # recolor wordcloud and show
    # we could also give color_func=image_colors directly in the constructor
    axes[1].imshow(wc.recolor(color_func=image_colors), interpolation="bilinear")
    axes[2].imshow(alice_coloring, cmap=plt.cm.gray, interpolation="bilinear")
    for ax in axes:
        ax.set_axis_off()
    plt.show()

示例#51
0
    def topic_word_art(self,
                       topic_idx=None,
                       n_words=20,
                       save_file=None,
                       color_func=None,
                       random_state=1,
                       fig_row_size=16,
                       **kwargs):
        if topic_idx is None:
            ncols = int(floor(sqrt(self.n_topics)))
            nrows = int(ceil(self.n_topics / float(ncols)))
            ncols_to_nrows_ratio = ncols / nrows
            plt.figure(figsize=(fig_row_size,
                                ncols_to_nrows_ratio * fig_row_size))
            for i in range(self.n_topics):
                plt.subplot(nrows, ncols, i + 1)
                self.topic_word_art(topic_idx=i,
                                    n_words=n_words,
                                    save_file=save_file,
                                    color_func=color_func,
                                    random_state=random_state,
                                    **kwargs)
            plt.gcf().subplots_adjust(wspace=.1, hspace=.1)
        # elif isinstance(topic_idx, (list, tuple, ndarray)) and len(topic_idx) == self.n_topics:
        #     ncols = int(floor(sqrt(self.n_topics)))
        #     nrows = int(ceil(self.n_topics / float(ncols)))
        #     ncols_to_nrows_ratio = ncols / nrows
        #     plt.figure(figsize=(fig_row_size, ncols_to_nrows_ratio * fig_row_size))
        #     for i in range(self.n_topics):
        #         plt.subplot(nrows, ncols, i + 1)
        #         self.topic_word_art(topic_idx=i, n_words=n_words, save_file=save_file,
        #                             color_func=color_func, random_state=random_state,
        #                             width=int(self.wordcloud_params['width'] * topic_idx[i]),
        #                             height=int(self.wordcloud_params['height'] * topic_idx[i]))
        #     plt.gcf().subplots_adjust(wspace=.1, hspace=.1)
        else:
            kwargs = dict(self.wordcloud_params, **kwargs)
            if color_func is None:
                color_func = self.word_art_params.get(
                    'color_func', self.topic_color[topic_idx])
            if isinstance(color_func, tuple):
                color_func = "rgb({}, {}, {})".format(*map(int, color_func))
            if isinstance(color_func, basestring):
                color = color_func

                def color_func(word,
                               font_size,
                               position,
                               orientation,
                               random_state=None,
                               **kwargs):
                    return color
            elif not callable(color_func):
                TypeError("Unrecognized hsl_color type ()".format(
                    type(color_func)))

            # kwargs = dict(self.word_art_params, **kwargs)
            wc = WordCloud(random_state=random_state, **kwargs)
            wc.fit_words([(self.word_preprocessor(k), v)
                          for k, v in self.topic_word_weights[topic_idx].
                          iloc[:n_words].to_dict().iteritems()])
            # wc.recolor(color_func=kwargs['color_func'], random_state=random_state)
            plt.imshow(
                wc.recolor(color_func=color_func, random_state=random_state))
            plt.grid(False)
            plt.xticks([])
            plt.yticks([])
示例#52
0
文件: cloud.py 项目: shtyui/python
from wordcloud import WordCloud,ImageColorGenerator
from matplotlib import pyplot as plt
import random
from palettable.colorbrewer.sequential import YlGnBu_9

text = open('test.txt', 'r',encoding= 'UTF-8-sig').read()
text =' '.join(jieba.cut(text))
icon_path ='icon.png'
icon = Image.open(icon_path)
mask = Image.new("RGB" ,icon.size, (255,255,255))
mask.paste(icon,icon)
mask = np.array(mask)


def color_func(word, font_size,position,orientation, random_state=None, **kwargs):
    return tuple(YlGnBu_9.colors[random.randint(0,8)])

font_path = 'SNsanafonGyou.ttf'
wc = WordCloud(font_path=font_path,background_color="black",max_words=2000,\
               mask=mask,max_font_size=300,random_state=1)
wc.generate_from_text(text)
wc.recolor(color_func=color_func,random_state=2)

output_path = 'wordcloud.png'
wc.to_file(output_path)

plt.rcParams["figure.figsize"]=(25,25)#字装游
plt.imshow(wc)
plt.axis("off")
plt.show()
def word_cloud_bigram(bigrams, mask):
    len_bigrams = len(bigrams)
    print("   * Were computed a total of {} bigrams from dataset.\n".format(len_bigrams))
    
    mask_default = "cloud_mask.png"
    #if os.path.join("data", "img", mask)
        
    def color_function(mask):
        if mask == "mapa_brasil_mask.png":
            def color_func(word, font_size, position, orientation, random_state=None,**kwargs):
                return "hsl(190, 40%%, %d%%)" % random.randint(30, 60) #sky
            color_cont = (219, 236, 240)
        elif mask == "thumbs_down_mask_3.png":
            def color_func(word, font_size, position, orientation, random_state=None,**kwargs):
                return "hsl(0, 80%%, %d%%)" % random.randint(30, 60) #fake
            color_cont = (250, 209, 209)
        elif mask == "thumbs_up_mask.png":
            def color_func(word, font_size, position, orientation, random_state=None,**kwargs):
                return "hsl(130, 40%%, %d%%)" % random.randint(30, 60) #real
            color_cont = (219, 240, 223)
        else:
            def color_func(word, font_size, position, orientation, random_state=None,**kwargs):
                return "hsl(0, 0%%, %d%%)" % random.randint(60, 100) #grey
            color_cont = (219, 236, 240)
        return color_func, color_cont

    color_function, color_cont = color_function(mask)

    vectorizer = CountVectorizer(ngram_range=(2, 2))
    bag_of_words = vectorizer.fit_transform(bigrams)
    vectorizer.vocabulary_
    sum_words = bag_of_words.sum(axis=0) 
    words_freq = [(word, sum_words[0, idx]) for word, idx in vectorizer.vocabulary_.items()]
    words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
    words_dict = dict(words_freq)

    mask = np.array(Image.open(os.path.join("data", "img", mask)))

    # cloud_of_words = WordCloud(width = 1080,
    #                            height = 1080,
    #                            max_font_size = 110,
    #                            collocations = False,
    #                            mask = mask,
    #                            background_color = "white",
    #                            contour_width = 3,
    #                            contour_color = (219, 236, 240)).generate_from_frequencies(words_dict)
    # cloud_of_words.recolor(color_func=color_func, random_state=3)

    WC_height = 1000
    WC_width = 1500
    WC_max_words = 200
    cloud_of_words = WordCloud(height=1080,
                               width=1080,
                               max_font_size = 110,
                               collocations = False,
                               background_color = "white",
                               mask = mask,
                               contour_width = 3,
                               contour_color = color_cont)
    cloud_of_words.generate_from_frequencies(words_dict)
    cloud_of_words.recolor(color_func=color_function, random_state=3)
    #plt.title('Most frequently occurring bigrams connected by same colour and font size')
    plt.figure(figsize = (12, 10))
    plt.imshow(cloud_of_words, interpolation = 'bilinear')
    plt.axis('off')
    plt.show()
    return cloud_of_words, len_bigrams
示例#54
0
wc = WordCloud(
    background_color = 'white',   #背景色
    max_words = 400,  #最大显示词数
    mask = bg,    #图片背景
    max_font_size = 60,   #字最大尺寸
    random_state = 42,
    font_path='C:/Windows/Fonts/simkai.ttf' #字体
).generate(text1)

#为图片设置字体
my_font = fm.FontProperties(fname='C:/Windows/Fonts/simkai.ttf')

#产生背景图片,基于彩色图像的颜色生成器
image_colors = ImageColorGenerator(bg)
#画图
plt.imshow(wc.recolor(color_func=image_colors))

#为云图去掉坐标轴
plt.axis('off')
#画云图,显示
plt.figure()

#为背景图去掉坐标轴
plt.axis('off')
plt.imshow(bg,cmap=plt.cm.gray)

#保存
wc.to_file('man.png')


示例#55
0
    with open(file_path, 'r')as f:
        lines=f.readlines()
    text = ''.join(lines)
    txt_freq = Keywords(text).tf_idf()

    image_path = "/home/zxingoo/supertags/tags/material/shapes/pikachu.png"
    mask = np.array(Image.open(image_path))

    wc = WordCloud( font_path,#设置字体  
                    background_color="black", #背景颜色  
                    max_words=400,# 词云显示的最大词数  
                    mask=mask,#设置背景图片   
                    )  
    wc.generate_from_frequencies(txt_freq)  

    #plt.imshow(wc)
    plt.imshow(wc.recolor(color_func=image_colors_func(image_path)), interpolation="bilinear")
    #plt.imshow(wc.recolor(color_func=grey_color_func, random_state=3),
    #       interpolation="bilinear")
    wc.to_file("/home/zxingoo/supertags/tags/temp/cartoon4.png")
    #plt.axis("off")
    #plt.savefig('/home/zxingoo/supertags/tags/temp/thumbs-up2.png',dpi = 400, bbox_inches="tight")
    # store default colored image

    
    
    
    
    
    
示例#56
0
def make_word_cloud(imagemaskurl, relative_scaling, nwords, text, title,
                    customstopwords, width, height, color, colormap, maxfont,
                    minfont, scale):
    if imagemaskurl is not None and imagemaskurl != '':
        # imgstr = re.search(r'base64,(.*)', imagemask).group(1)
        try:
            if imagemaskurl.startswith('data:image'):
                imgstr = re.search(r'base64,(.*)', imagemask).group(1)
                b = base64.b64decode(imgstr)
            else:
                r = requests.get(imagemaskurl)
                b = r.content
            image_bytes = io.BytesIO(b)
            im = Image.open(image_bytes).convert('RGBA')
            canvas = Image.new('RGBA', im.size, (255, 255, 255, 255))
            canvas.paste(im, mask=im)
            mask = np.array(canvas)
            width, height = im.size
        except:
            mask = None
            text = 'Invalid Image Mask!'
    else:
        mask = None
    from wordcloud import STOPWORDS
    STOPWORDS = list(STOPWORDS)

    for word in customstopwords:
        STOPWORDS.append(word)
        STOPWORDS.append(word + 's')
        STOPWORDS.append(word + "'s")
    if color == '':
        color = None
    cloud = WordCloud(width=width, height=height, mask=mask, background_color=color,
                      stopwords=STOPWORDS, max_words=nwords, colormap=colormap,
                      max_font_size=maxfont, min_font_size=minfont,
                      random_state=42, scale=scale, mode='RGBA',
                      relative_scaling=relative_scaling).generate(text)
    try:
        coloring = ImageColorGenerator(mask)
        cloud.recolor(color_func=coloring)
    except:
        pass
    image = cloud.to_image()

    byte_io = io.BytesIO()
    image.save(byte_io, 'PNG')
    byte_io.seek(0)
    data_uri = base64.b64encode(byte_io.getvalue()).decode('utf-8').replace('\n', '')
    src = 'data:image/png;base64,{0}'.format(data_uri)
    x = np.array(list(cloud.words_.keys()))
    y = np.array(list(cloud.words_.values()))
    order = np.argsort(y)[::-1]
    x = x[order]
    y = y[order]
    trace = go.Bar(x=x, y=y)
    layout = go.Layout(margin=go.Margin(l=10, r=00),
                       title='Relative frequency of words/bigrams')
    fig = go.Figure(data=[trace], layout=layout)
    children = [
        H2(title, className='card-title'),
        Img(src=src, width=image.size[0], height=image.size[1],
            style={'maxWidth': '100%', 'height': 'auto',
                   'margin': '0 auto', 'display': 'block'}),
        # Details([
        #     Summary('View Frequency Plot'),
        #     dcc.Graph(id='word-freq', figure=fig, config={'displayModeBar': False})
        # ])
    ]

    return children
示例#57
0
文件: wechat.py 项目: xxh0823/weChat
my_wordcloud3 = WordCloud(background_color='white', max_words=5000, mask=coloring, max_font_size=50,random_state=
                        20,scale=2,font_path='C:/Windows/Fonts/simhei.ttf').generate(nicknametext)


# In[16]:


image_colors = ImageColorGenerator(coloring)
image_colors2 = ImageColorGenerator(coloring)

# In[17]:


plt.figure(figsize=(16,9))
plt.imshow(my_wordcloud.recolor(color_func=image_colors))
plt.imshow(my_wordcloud)
plt.axis('off')
plt.savefig('D:/4.jpg')

plt.figure(figsize=(16,9))
plt.imshow(my_wordcloud2.recolor(color_func=image_colors2))
plt.imshow(my_wordcloud2)
plt.axis('off')
plt.savefig('D:/5.jpg',dpi=600)

plt.figure(figsize=(16,9))
plt.imshow(my_wordcloud3.recolor(color_func=image_colors))
plt.imshow(my_wordcloud3)
plt.axis('off')
plt.savefig('D:/6.jpg',dpi=600)
示例#58
0
def main(city,keyword,region,pages):
    """ 主函数 """
    csv_filename='zl_'+city+'_'+keyword+'.csv'
    txt_filename='zl_'+city+'_'+keyword+'.txt'
    headers=['职位','经验','学历','公司','规模','月薪','地点']
    write_csv_headers(csv_filename, headers)

    for i in range(pages):
        # 获取该页中所有职位信息,写入csv文件
        job_dict={}
        html=get_one_page(city,keyword,region,i)
        items=parse_one_page(html)
        for item in items:
            html=get_detail_page(item.get('job_url'))
            job_detail=get_job_detail(html)
            job_dict['职位']=item.get('职位')
            job_dict['经验']=job_detail.get('years')
            job_dict['学历']=job_detail.get('education')
            job_dict['公司']=item.get('公司')
            job_dict['规模']=job_detail.get('scale')
            job_dict['月薪']=item.get('月薪')
            job_dict['地点']=item.get('地点')
            # 对数据进行清洗,将标点符号等对词频统计造成影响的因素剔除
            # [\u4E00-\u9FD5]中文字的unicode范围
            pattern = re.compile(r'[\u4E00-\u9FD5]+')
            filterdata = re.findall(pattern, job_detail.get('requirement'))
            # filterdata = job_detail.get('requirement')
            write_txt_file(txt_filename,''.join(filterdata))
            write_csv_rows(csv_filename,headers,job_dict)

    # 工资统计
    if True:
        salaries=[]
        sal=read_csv_column(csv_filename,5)
        # 撇除第一项,并转换成整形,生成新的列表

        for i in range(len(sal)-1):
            print(i)
            # 工资为'0'的表示招聘上写的是'面议',不做统计
            if not (sal[i]=='0' or sal[i]=='月薪'):
                salaries.append(int(sal[i]))
        # 用直方图进行展示
        # 注意生成文件后会报错:ValueError: invalid literal for int() with base 10: '月薪'
        plt.hist(salaries,bins=10,)
        plt.show()

    if True:
        # 职位描述词频统计
        content=read_txt_file(txt_filename)
        segment=jieba.lcut(content)

        words_df=pd.DataFrame({'segment':segment})
        # 忽略常用词
        stopwords=pd.read_csv("stopwords.txt",index_col=False,quoting=3,sep=" ",names=['stopword'],encoding='utf-8')
        words_df=words_df[~words_df.segment.isin(stopwords.stopword)]

        words_stat=words_df.groupby(by=['segment'])['segment'].agg({"计数":numpy.size})
        words_stat=words_stat.reset_index().sort_values(by=["计数"],ascending=False)

        # 设置词云属性
        color_mask = imread('china.jfif')
        wordcloud=WordCloud(font_path='simhei.ttf', # 设置字体可以显示中文
        background_color="white",                   # 背景颜色
        max_words=100,                              # 词云显示的最大词数
        mask=color_mask,                            # 设置背景图片
        max_font_size=100,                          # 字体最大值
        random_state=42,
        width=1000,height=860,margin=2,             # 设置图片默认的大小,但是如果使用背景图片的话那么保存的图片大小将会按照其大小保存,margin为词语边缘距离
        )
        # 生成词云, 可以用generate输入全部文本,也可以我们计算好词频后使用generate_from_frequencies函数
        word_frequence={x[0]:x[1] for x in words_stat.head(100).values}
        # {'经验': 34, '开发': 34, '网站': 29, ...
        word_frequence_dict={}
        for key in word_frequence:
            word_frequence_dict[key]=word_frequence[key]
        wordcloud.generate_from_frequencies(word_frequence_dict)
        # 从背景图片生成颜色值
        image_colors=ImageColorGenerator(color_mask)
        # 重新上色
        wordcloud.recolor(color_func=image_colors)
        # 保存图片
        wordcloud.to_file('output.jpg')
        plt.imshow(wordcloud)
        plt.axis("off")
        plt.show()
示例#59
0
    text = jiebaClearText(text)

# 生成词云, 可以用generate输入全部文本(wordcloud对中文分词支持不好,建议启用中文分词),也可以我们计算好词频后使用generate_from_frequencies函数
cY.generate(text)

#从背景图片生成颜色值
imageColors = ImageColorGenerator(backColoring)

#绘制词云
plt.figure()
plt.imshow(cY)
plt.axis("off")
plt.show()

#保存图片
cY.to_file(path.join(dangQian, imgName1))

imageColors = ImageColorGenerator(backColoring)

plt.imshow(cY.recolor(color_func= imageColors))
plt.axis("off")

#绘制背景图片为颜色图片
plt.figure()
plt.imshow(backColoring, cmap= plt.cm.gray)
plt.axis('off')
plt.show()

#保存图片
cY.to_file(path.join(dangQian, imgName2))
示例#60
-1
    def generatewordcloud(party, inputImageFileName, outputImageFileName):
        global stopwordshearing
        speakerData = data[data.Party == party]
        allText = ""
        for index, row in speakerData.iterrows():
        	allText += str(row['Text']).lower()+" "
        allText = allText.replace("e-mail","email")
        allText = allText.replace("e- mail","email")
        allText = allText.replace("op-ed","oped")
        #print (allText)
        ImageFile.LOAD_TRUNCATED_IMAGES = True

        img = Image.open(inputImageFileName)
        img = img.resize((980,1080), Image.ANTIALIAS)
        sl = STOPWORDS | stopwordshearing
        speakerArray = np.array(img)
        wc = WordCloud(background_color="white", max_words=1000, mask=speakerArray, stopwords=sl,
                random_state=42)
        
        wc.generate(allText)
        print wc.words_
        # create coloring from image
        image_colors = ImageColorGenerator(speakerArray)
        wc.recolor(color_func=image_colors)
        wc.to_file(outputImageFileName)