示例#1
1
def wordcloud(datafile):

    #remove stop words, the most common words in a language
    vectorizer=CountVectorizer(stop_words='english')

    for word in vectorizer.get_stop_words():
        STOPWORDS.add(word)
    STOPWORDS.add("said")

    pony_mask = np.array(Image.open("../pinkyB.jpg"))
    wc = WordCloud(background_color="black", max_words=2000, mask=pony_mask, stopwords=STOPWORDS)

    #init dictionary with the five categories
    categoriesSet = set(datafile["Category"])
    categoriesDict = dict.fromkeys(categoriesSet,"")

    #Conditional Selection
    # business = datafile.ix[datafile["Category"]=="Business"]
    # print business["Content"].size

    #fill index with data from cv
    for index, row in datafile.iterrows():
        categoriesDict[row["Category"]] += str(row["Content"])

    for category, text in categoriesDict.iteritems():
        wc.generate(text)
        image = wc.to_image()
        image.save("../wordcloud/wordcloud_" + category + ".jpg")
    return
示例#2
0
def main():
	wr=WordReader()
	# wlist=wr.word_reader('data1/dt01.txt')

	wcount=''
	for root,dirs,files in os.walk('data2'):
		for file in files:
			file_path=os.path.join(root,file)
			wlist=wr.word_reader(file_path)
			wcount+=wlist

	back_coloring = np.array(Image.open("./sky.png"))
	wc = WordCloud(
                background_color="white", #背景颜色  
                max_words=1000,# 词云显示的最大词数  
                mask=back_coloring,#设置背景图片  
                max_font_size=150, #字体最大值  
                random_state=42,  
                )
                
	wc.generate(wcount) 
	# 
	# wc.generate_from_frequencies(word_list)
	# wc.fit_words(word_list)
	plt.figure() 
	plt.imshow(wc)  
	plt.axis("off")
	plt.show()  
def run_yt():
    yt = ds.acquire_youtube()
    ytimg = imread("ytlogo.png")
    wc = WordCloud(mask=ytimg)
    image_colors = ImageColorGenerator(ytimg)
    wc.generate(word_list_yt(ds.mean(yt[0]), ds.standard_deviation(yt[0])))
    
    plt.imshow(wc.recolor(color_func = image_colors))
    plt.axis("off")
    plt.savefig('popularWordsYT.png', bbox_inches = 'tight', dpi = 200)
    
    words, vidcount = ds.word_count_yt('title', ds.mean(yt[0]), ds.standard_deviation(yt[0]))
    data1 = []
    data2 = []
    labels = []
    count = 0
    for w in sorted(words, key=words.get, reverse=True):   
        labels.append(w)
        data1.append(1000 * words[w]/vidcount)
        count +=1
        if count == 10:
            break
    words, vidcount = ds.word_count_dailymotion( 0, 0 )
    for w in labels:
        data2.append(1000 * words[w]/vidcount)
    create_dualbargraph(data1, data2, labels, 'wordUseCompYT.png')
    Theta = da.yt_thetas()
    for x in xrange(len(Theta)):
        Theta[x] = Theta[x]/10000
    print Theta
    create_bargraph(Theta,('duration', 'date created', 'y-intercept'), 'barGraphYT.png')
示例#4
0
文件: wc.py 项目: bbxyard/bbxyard
def txt2pic(txt_file, out_png, font_path, mask_file):
  text_address = path.abspath(txt_file)
  text = open(text_address).read()   #读取文本
  text_cut = jieba.cut(text)   #分词
  new_textlist = ' '.join(text_cut)   #组合
  pic_address = path.abspath(mask_file)
  pic = imread(pic_address)  #读取图片
  pic_color = ImageColorGenerator(pic)   #根据图片生成颜色函数
  wc = WordCloud(background_color='white',    #构造wordcloud类
    mask=pic,
    width = 750,
    height = 750,
    max_font_size = 80,
    random_state=30,
    font_path=font_path,
    max_words=500,
    min_font_size=2,
    color_func=pic_color
  )
  wc.generate(new_textlist)    #生成词云图
  plt.figure()    #画图
  plt.imshow(wc)
  plt.axis("off")
  plt.show()
  wc.to_file(out_png)   #保存图片
def run_dm():
    dm = ds.acquire_dailymotion()
    dmimg = imread("dmlogo.png")
    # Read the whole text.
    wc = WordCloud(mask=dmimg)
    image_colors = ImageColorGenerator(dmimg)
    wc.generate(word_list_dailymotion(ds.mean(dm[0]), ds.standard_deviation(dm[0])))
    
    
    # Open a plot of the generated image.
    plt.imshow(wc.recolor(color_func=image_colors))
    plt.axis("off")
    plt.savefig('popularWordsDM.png', bbox_inches = 'tight', dpi = 200)
    words, vidcount = ds.word_count_dailymotion(ds.mean(dm[0]), ds.standard_deviation(dm[0]))
    data1 = []
    data2 = []
    labels = []
    count = 0
    for w in sorted(words, key=words.get, reverse=True):   
        labels.append(w)
        data1.append(1000 * words[w]/vidcount)
        count +=1
        if count == 10:
            break
    words, vidcount = ds.word_count_dailymotion( 0, 0 )
    for w in labels:
        data2.append(1000 * words[w]/vidcount)
    create_dualbargraph(data1, data2, labels, 'wordUseCompDM.png')
    create_bargraph(da.dm_thetas(),('fans','duration','date created', 'y-intercept'), 'barGraphDM.png')
示例#6
0
def cloudplot(person):

    person = re.sub(r'\+', ' ', person)

    text = GetTextRange(Emails, person)
    text = rmBoring(rmNonAlpha(text)).decode('ascii', 'ignore')

    plt.clf()

    d = path.dirname(path.abspath(__file__))

    hilcolor = np.array(Image.open(path.join(d, "static/img/hillarylogo.jpg")))

    wc = WordCloud(background_color="white", max_words=150, mask=hilcolor,
               stopwords=STOPWORDS.add("said"),
               max_font_size=80, random_state=42,
               relative_scaling = 0.5)


    wc.generate(text)
    image_colors = ImageColorGenerator(hilcolor)

    plt.imshow(wc.recolor(color_func=image_colors))
    plt.axis("off")

    fig = plt.gcf()
    img = StringIO.StringIO()
    fig.savefig(img)
    img.seek(0)

    return send_file(img, mimetype='image/png')
def generateWordCloud(node, contribs, wordsToShow=None, normalize=True, normMin=0, normMax=1):
    contrib = contribs[node]
    
    if (normalize):
        contrib = normalizeContrib(contrib, normMin, normMax)
    
    # generate text
    text = generateText(contrib, wordsToShow)
    
    # load mask
    d = path.dirname(__file__)
    circle_mask = imread(path.join(d, "black_circle_mask_whitebg.png"))
    
    # gen word cloud
    wc = WordCloud(background_color="white", max_words=2000, mask=circle_mask)
    wc.generate(text)

    # store to file
    wc.to_file(path.join(d, "node.png"))

    # show
    useColorFunc = False #normalize
    if (useColorFunc):
        plt.imshow(wc.recolor( color_func=pos_neg_color_func ))
    else:
        plt.imshow(wc)
        
    plt.axis("off")
    plt.show()
示例#8
0
def get_word_cloud(content, file_name,
                   dict = BASE_DICT,
                   folder_path=BASE_PATH+os.sep+"WordCloud",
                   font_path=BASE_PATH+os.sep+"WordCloud"+os.sep+"幼圆.ttf",
                   width=400,
                   height=200,
                   margin=5,
                   ranks_only=False,
                   prefer_horizontal=0.9,
                   mask=None,
                   scale=1,
                   color_func=None,
                   max_words=200,
                   stopwords=None,
                   random_state=None,
                   background_color='white',
                   max_font_size=None):
    wc = WordCloud(font_path=font_path,width=width,height=height,margin=margin,ranks_only=ranks_only,
                   prefer_horizontal=prefer_horizontal,mask=mask,scale=scale,max_words=max_words,
                   stopwords=stopwords,random_state=random_state,background_color=background_color,max_font_size=max_font_size#,color_func=color_func
                )
    # if dict:
    #     jieba.load_userdict(dict)
    # after = ' '.join(jieba.cut(content, cut_all=False))

    file_with_path = "{BASE_PATH}{sep}{file}".format( BASE_PATH=folder_path ,sep=os.sep,file=file_name)
    wc.generate(content)
    print(file_with_path)
    wc.to_file(file_with_path)
    return file_name
    def wordcloudplot_focus(self, yizhongzhazha=None, backimage=None):
        """Do wordcloud plots for contacts. need to run relationship()
        first to get self._relationship.
        Parameters
            yizhongzhazha: pandas object by loading the data
            backimage: background image file's directory

        Returns: basic word cloud plots saved in files
        """
        if yizhongzhazha is None:
            print("Need load message table first.")
            return
        if self._contacts_topN is None:
            print("need to run relationship() first.")
            return
        if backimage is not None:
            custompic = imread(backimage)
        else:
            custompic = None
        if not os.path.exists('./wordcloud'):
            os.makedirs('./wordcloud')
        wordcloud = WordCloud(background_color="white", mask=custompic,
                              max_words=2000,scale=3)
        for k in range(len(self._contacts_topN)):
            text=self._relationship.iloc[:,k]
            text_to_wordcloud=[]
            for i in range(len(text)):
                text_to_wordcloud.append((text.index.values[i]+' ')*text[i])
            text=''.join(text_to_wordcloud)
            wordcloud.generate(text)
            wordcloud.to_file("./wordcloud/"+self._relationship.columns[k]+'2.png')
示例#10
0
def genwordcloud(texts,mask=None,font_path=None,background_color='white'):
    '''生成词云
    parameter
    ----------
    mask: RGBA模式数组,最后一个分量是alpha通道, 默认会生成一个900*1200的椭圆
    font_path: 采用的字体,建议采用安卓默认字体DroidSansFallback.ttf
    
    return
    -------
    img:可以直接img.save('test.png')
    '''
    from PIL import Image
    try:
        from wordcloud import WordCloud
    except:
        #raise Exception('wordcloud need install wordcloud package.')
        print('wordcloud need install wordcloud package.')
        return None
    if mask is None:
        tmp=np.zeros((900,1200),dtype=np.uint8)
        for i in range(tmp.shape[0]):
            for j in range(tmp.shape[1]):
                if (i-449.5)**2/(430**2)+(j-599.5)**2/(580**2)>1:
                    tmp[i,j]=255
        mask=np.zeros((900,1200,4),dtype=np.uint8)
        mask[:,:,0]=tmp
        mask[:,:,1]=tmp
        mask[:,:,2]=tmp
        mask[:,:,3]=255
    else:
        mask=np.array(Image.open(mask))
    wordcloud = WordCloud(background_color = background_color,font_path=font_path, mask = mask)
    wordcloud.generate(texts)
    img=wordcloud.to_image()
    return img
示例#11
0
def word_cloud(posts):
    text = u' '.join(post['content'] for post in posts)
    
    '''
    if os.path.isdir('/home/public/stanford-corenlp-full-2015-04-20/'):
        proc = CoreNLP("pos", corenlp_jars=["/home/public/stanford-corenlp-full-2015-04-20/*"])
        sentenses = proc.parse_doc(text)['sentences']
        
        text = ''

        for sentence in sentenses:
            text += u' '.join(sentence['lemmas']) + u' '
    '''

    wordcloud = WordCloud(background_color="white", width=1200, height=900, margin=0)
    wordcloud.generate(text)
    fig = plt.gcf()
    # fig.set_size_inches(15, 8.5)
    # Open a plot of the generated image.
    plt.imshow(wordcloud)
    plt.axis("off")

    imgdata = StringIO.StringIO()
    fig.savefig(imgdata, format='png', bbox_inches='tight')
    imgdata.seek(0)  # rewind the data
    plt.close()
    uri = urllib.quote(base64.b64encode(imgdata.buf))
    return uri
def main(save_files = False, db_filename = '../output/database.sqlite'):
    conn = sqlite3.connect(db_filename)
    c = conn.cursor()

    # Retrieve papers
    c.execute('''SELECT *
                 FROM Papers''')

    paper_content = c.fetchall()
    conn.close()

    titles = ''

    for pc in paper_content:
        titles += pc[1]

    # A Marvin Minsky mask
    mask = np.array(Image.open("../files/minsky_mask.png"))

    wc = WordCloud(background_color="white", max_words=2000, mask=mask, stopwords=STOPWORDS.copy())
    # Generate word cloud
    wc.generate(titles)
    
    if (save_files):
        # Store to file
        wc.to_file("../files/title_cloud.png")
    
    # Show word cloud
    plt.imshow(wc)
    plt.axis("off")
    # Show mask
#    plt.figure()
#    plt.imshow(mask, cmap=plt.cm.gray)
#    plt.axis("off")
    plt.show()
示例#13
0
def make_cloud(words, image, size=10, filename='figures/cloud.png', max_words=200, horizontal=0.8):

    # Remove URLs, 'RT' text, screen names, etc
    my_stopwords = ['RT', 'amp', 'lt']
    words_no_urls = ' '.join([word for word in words.split()
                              if word not in my_stopwords])

    # Add stopwords, if needed
    stopwords = STOPWORDS.copy()
    stopwords.add("RT")
    stopwords.add('amp')
    stopwords.add('lt')

    # Load up a logo as a mask & color image
    logo = imread(image)

    # Generate colors
    image_colors = ImageColorGenerator(logo)

    # Generate plot
    wc = WordCloud(stopwords=stopwords, mask=logo, color_func=image_colors, scale=0.8,
                   max_words=max_words, background_color='white', random_state=42, prefer_horizontal=horizontal)

    wc.generate(words_no_urls)

    plt.figure(figsize=(size, size))
    plt.imshow(wc)
    plt.axis("off")
    plt.savefig(filename)
示例#14
0
def generate_cloud():
    d = path.dirname(__file__)
    janice = open(path.join(d, 'messages.txt')).read()
    group_mask = misc.imread(path.join(d, "mask.png"), flatten=True)
    wc = WordCloud(background_color="white", max_words = 2000, mask=group_mask)
    wc.generate(text)
    wc.to_file(path.join(d, "masked.jpg"))
示例#15
0
def test_coloring_black_works():
    # check that using black colors works.
    mask = np.zeros((50, 50, 3))
    image_colors = ImageColorGenerator(mask)
    wc = WordCloud(width=50, height=50, random_state=42,
                   color_func=image_colors, min_font_size=1)
    wc.generate(THIS)
示例#16
0
def test_repeat():
    short_text = "Some short text"
    wc = WordCloud(stopwords=[]).generate(short_text)
    assert_equal(len(wc.layout_), 3)
    wc = WordCloud(max_words=50, stopwords=[], repeat=True).generate(short_text)
    # multiple of word count larger than max_words
    assert_equal(len(wc.layout_), 51)
    # relative scaling doesn't work well with repeat
    assert_equal(wc.relative_scaling, 0)
    # all frequencies are 1
    assert_equal(len(wc.words_), 3)
    assert_array_equal(list(wc.words_.values()), 1)
    frequencies = [w[0][1] for w in wc.layout_]
    assert_array_equal(frequencies, 1)
    repetition_text = "Some short text with text"
    wc = WordCloud(max_words=52, stopwords=[], repeat=True)
    wc.generate(repetition_text)
    assert_equal(len(wc.words_), 4)
    # normalized frequencies
    assert_equal(wc.words_['text'], 1)
    assert_equal(wc.words_['with'], .5)
    assert_equal(len(wc.layout_), wc.max_words)
    frequencies = [w[0][1] for w in wc.layout_]
    # check that frequencies are sorted
    assert_true(np.all(np.diff(frequencies) <= 0))
示例#17
0
def make_word_cloud(text, save_path, background_color='black'):
    # text expected to a string or a list of [(word, count), ...]
    from wordcloud import WordCloud
    import os

    def col_fun(word, *args, **kw):
        return '#333'

    if type(text) == str:
        big_string = text
    else:
        big_string = ''
        for word in text:
            big_string = big_string + ''.join((word[0]+' ') * word[1])

    # print 'trying to make cloud: %s' % save_path
    # print os.getcwd()
    wc = WordCloud(background_color=background_color,
                   color_func=col_fun,
                   max_words=10000,
                   height=200,
                   width=700,
                   font_path='app/static/fonts/NanumScript.ttc').generate(big_string)
    wc.generate(big_string)
    wc.to_file('app/%s' % save_path)
示例#18
0
def create_word_cloud(ballots, chart_directory, image_name, mask_file,
                      stop_words, word_counts=None):
    """
    Generates a word cloud from given ballots.
    """
    if word_counts is None:
        word_counts=[25, 50, 100, 1000]
    text = ''
    for ballot in ballots:
        text = ''.join((text, ballot.feedback,))
    all_stop_words = STOPWORDS
    all_stop_words |= set(stop_words)
    for word_count in word_counts:
        if mask_file:
            color_mask = imread(mask_file)
            image_colors = ImageColorGenerator(color_mask)
            wc = WordCloud(background_color="white", max_words=word_count,
                           mask=color_mask,
                           stopwords=all_stop_words,
                           color_func=image_colors,
                           max_font_size=80, random_state=42)
        else:
            wc = WordCloud(background_color="white", max_words=word_count,
                           stopwords=all_stop_words,
                           max_font_size=80, random_state=42)
        wc.generate(text)
        axis_image = plt.imshow(wc)
        plt.axis("off")
        image_name_with_count = '{0}-{1}.png'.format(image_name, str(word_count))
        logger.info('...creating word cloud {0}'.format(image_name_with_count))
        save_location = os.path.join(chart_directory, image_name_with_count)
        plt.savefig(save_location)
        plt.close()
示例#19
0
def test_random_state():
    # check_list that random state makes everything deterministic
    wc = WordCloud(random_state=0)
    wc2 = WordCloud(random_state=0)
    wc.generate(THIS)
    wc2.generate(THIS)
    assert_array_equal(wc, wc2)
def generateWordCloud(node, contribs, maskImg=None, wordsToShow=100, normalize=True, normMin=0, normMax=1):
    contrib = contribs[node]
    
    # Find side of largest magnitude
    # take k words from that side
    # normalize those words (may need abs val)
    # generate text from those normalized values
    
    # contrib should be sorted high to low
    maxVal = abs(contrib[0][1])
    minVal = abs(contrib[-1][1])
    
    #print (contrib[0][1], contrib[-1][1])
    #print ("max min = ", maxVal, minVal)
    
    newContrib = []
    if (maxVal > minVal): 
        # use front
        
        #newContrib = contrib[0:min(len(contrib), wordsToShow))]
        
        for i in range(min(len(contrib), wordsToShow)):
            newContrib.append(contrib[i])
    else:
        # use back
        for i in range(min(len(contrib), wordsToShow) - 1, -1, -1):
            newContrib.append(contrib[i])
        
        # if the most significant value was negative
        # swap the sign of all values in contrib
        for j in range(len(newContrib)):
            c = newContrib[j]
            newContrib[j] = (c[0], -1*c[1])
    
    #print ("new contrib = ", newContrib)
    
    if (normalize):
        contrib = normalizeContrib(newContrib, normMin, normMax)
    
    # for each value in normalized contrib
    # assign color value
    for c in contrib:
        word, val = c
        # add word to color map
        #wordColorMap[word] = int(round(255*(1-val)))
        wordColorMap[word] = int(round(200*(1-val)))
        
    
    # generate text
    text = generateText(contrib, min(len(contrib), wordsToShow))
    
    # gen word cloud
    #s = time.time()
    wc = WordCloud(background_color="white", max_words=2000, mask=maskImg)
    wc.generate(text)
    wc.recolor(color_func=gray_color_func)
    #e = time.time()
    #print ("word cloud only time = ", (e-s))
    return wc    
示例#21
0
def word_cloud(text):
    """
    This function makes a wordcloud object and attempts to generate a word cloud
    using the collected messages.
    """
    wc = WordCloud()
    wc.generate(text)
    wc.to_file('test.png')
示例#22
0
	def make_cloud(self, text):
		
		self.start = random.randint(0, 255)
		cloud = WordCloud(font_path = 'bot/fonts/' + random.choice(os.listdir('bot/fonts/')), background_color = 'black', width = 1280, height = 720, scale = 1, color_func = self.light_colour_func)
		cloud.generate(text)
		cloud.to_file(self.out)
		
		return self.out
示例#23
0
    def generate_wordcloud(self, filename, bg_color='white',
                                            color_func=monochrome_color_func):
        text = ' '.join(self.text)
        wc = WordCloud(width=1280, height=1024, stopwords=STOPWORDS,
                   background_color=bg_color, color_func=color_func,
                   max_words=100)

        wc.generate(text)
        wc.to_file(filename)
示例#24
0
def test_writing_to_file():
    wc = WordCloud()
    wc.generate(THIS)
    # check_list writing to file
    f = NamedTemporaryFile(suffix=".png")
    filename = f.name
    wc.to_file(filename)
    loaded_image = Image.open(filename)
    assert_equal(loaded_image.size, (wc.width, wc.height))
示例#25
0
def test_writing_to_file(tmpdir):
    wc = WordCloud()
    wc.generate(THIS)

    # check writing to file
    filename = str(tmpdir.join("word_cloud.png"))
    wc.to_file(filename)
    loaded_image = Image.open(filename)
    assert loaded_image.size == (wc.width, wc.height)
示例#26
0
def test_empty_text():
    # test originally empty text raises an exception
    wc = WordCloud(stopwords=[])
    with pytest.raises(ValueError):
        wc.generate('')

    # test empty-after-filtering text raises an exception
    wc = WordCloud(stopwords=['a', 'b'])
    with pytest.raises(ValueError):
        wc.generate('a b a')
示例#27
0
 def make_wordcloud(self):
     """Generate the wordcloud file and save it to static/images/."""
     #plt.rcParams['figure.figsize'] = [24.0, 18.0]
     print('Creating wordcloud')
     flat_tags = [item for sublist in self.tags for item in sublist]
     wordcloud = WordCloud(width=1920,
                           height=1080,
                           relative_scaling=.5)
     wordcloud.generate(' '.join(flat_tags))
     wordcloud.to_file(os.path.join('static', 'images', 'wordcloud.png'))
示例#28
0
文件: aces.py 项目: Statistica/Aces
def create_wordcloud(posts):
	wordcloud_str=' '.join(post['message'] for post in posts) #join all posts together
	aces_mask=imread("aces.png") #add aces mask
	wc=WordCloud(background_color="BLACK", mask=aces_mask, stopwords=STOPWORDS.add("will")) #don't include the word "will" in the wordcloud
																							#(not an interesting word and took up a large chunk of the wordcloud)
	wc.generate(wordcloud_str)
	plt.axis("off")
	plt.imshow(wc)
	plt.show()
	wc.to_file("aces_wordcloud.png")
示例#29
0
def test_collocations():
    wc = WordCloud(collocations=False, stopwords=[])
    wc.generate(THIS)

    wc2 = WordCloud(collocations=True, stopwords=[])
    wc2.generate(THIS)

    assert_in("is better", wc2.words_)
    assert_not_in("is better", wc.words_)
    assert_not_in("way may", wc2.words_)
示例#30
0
    return ' '.join(mywordlist)


wc = WordCloud(
    font_path=font_path,
    background_color="white",
    max_words=2000,
    mask=back_coloring,
    max_font_size=100,
    random_state=42,
    width=1000,
    height=860,
    margin=2,
)

wc.generate(jieba_processing_txt(text))

# create coloring from image
image_colors_default = ImageColorGenerator(back_coloring)

plt.figure()
# recolor wordcloud and show
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
plt.show()

# save wordcloud
wc.to_file(path.join(d, imgname1))

# create coloring from image
image_colors_byImg = ImageColorGenerator(back_coloring)
示例#31
0
文件: lesson5.py 项目: hezuier/hzw
    else:
        itchat.send("啦啦啦", msg["FromUserName"])


@itchat.msg_register([PICTURE])
def pic_reply(msg):
    itchat.send("666", msg["FromUserName"])


itchat.auto_login()
itchat.run()

# 8、用wordcloud做海报
from wordcloud import WordCloud
import chardet
import matplotlib.pyplot as plt

with open("txt.txt", 'r', encoding="utf-8") as file:
    text = file.read()
wc1 = WordCloud(background_color="pink",
                width=1000,
                height=860,
                font_path="C:\\Windows\\Fonts\\STFANGSO.ttf",
                margin=2)
wc2 = wc1.generate(text)

plt.imshow(wc2)
plt.axis("off")
plt.show()
wc2.to_file('hzw.jpg')
示例#32
0
             wordlist_jieba = jieba.cut_for_search(mytext,HMM=True)
 # 在每个词之间添加空格
             wl_space_split = " ".join(wordlist_jieba)
 # 设置云词参数
             wc = WordCloud(
                 font_path=r'C:\Windows\Fonts\SIMYOU.TTF',
                 width=800,
                 height=600,
                 margin=10,
                 max_font_size=100,
                 background_color='white',
                 min_font_size=10,
                 max_words=500,
             )
 # 生成分词图
             wc.generate(wl_space_split)
 # 将分词图保存
             wc.to_file("pngs/"+str(av)+".png")
             f.close()
             print(str(av)+"ok")
     mytext = tt.read()
 # 删除特殊符号
     mytext = re.sub(r"[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?、~@#¥%……&*()]+", "", mytext)
 # 进行分词,这里选用的jieba.cut_for_search(互联网分词方法)
     wordlist_jieba = jieba.cut_for_search(mytext, HMM=True)
 # 在每个词之间添加空格
     wl_space_split = " ".join(wordlist_jieba)
 # 设置云词参数
     wc = WordCloud(
         font_path=r'C:\Windows\Fonts\SIMYOU.TTF',
         width=800,
示例#33
0
def main():
    """
        main function of the program
        this will do whole thigs you expect from program :D
    """

    context = determine_context()
    # find out where is the  context, is it twitter or telegram or normal text
    # this is the only function that interacts with user directly

    mask = load_mask()
    # load  image file (png or jpg)
    # and process it if its necessary
    # and finally return a numpy array

    stop_words = load_stop_words()
    # load stop words from stop words list

    text, user_id = get_text(context)
    # load text adn find twiter username (to know the address of save file)

    text = clean_text(text=text, context=context, stop_words=stop_words)
    # clean text and remove stop words if it is necessary

    print_stats(text)
    #print some stats to know the program is working well

    wc = WordCloud(
        mask=mask,
        background_color=general_config["BG_COLOR"],
        font_path=general_config["FONT"],
        include_numbers=False,
        stopwords=stop_words,
        max_words=general_config["MAX_WORDS"],
        contour_width=general_config["LINE_WIDTH"],
        contour_color=general_config["LINE_COLOR"],
        max_font_size=general_config["MAX_FONT"],
        min_font_size=general_config["MIN_FONT"],
        relative_scaling=0.2,
    )
    wc.generate(text)

    #########
    # generate main image
    #########
    result_image = wc.to_image()

    ##############
    # recolor image based on mask
    # if config[colorful] is true
    #############
    if general_config["COLORFUL_IMAGE"]:
        image_colors = ImageColorGenerator(mask)
        result_image = wc.recolor(color_func=image_colors).to_image()

    #################
    # save result image
    # and cleaned text to out folder
    ################
    # name of text file and image file are based on twitter username
    # in case of telegram or normal text, it it telegram.png or text.png
    ##################
    OUT_FOLDER = general_config['OUT_FOLDER']

    make_dir(OUT_FOLDER)
    print("saving output image to" + f"{OUT_FOLDER}{user_id}.png")

    result_image.save(f"{OUT_FOLDER}{user_id}.png")
    with open(f"{OUT_FOLDER}cleaned_{user_id}.txt",
              "w") as cleaned_result_file:
        cleaned_result_file.write(text)

    result_image.show()
示例#34
0
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
from scipy.misc import imread
import random

words = []
with open('data/skills.txt', 'r', encoding='utf-8') as f:
    # e.g. "'SQL' 100"
    line = f.readline()
    while line:
        items = line.split()
        words += [items[0].replace("'", '')] * int(items[1])
        line = f.readline()
random.shuffle(words)
text = ' '.join(words)
# # 背景掩模
color_mask = imread('src/bigdata.jpg')
wc = WordCloud(font_path="msyh.ttc",
               background_color="white",
               max_words=2000,
               mask=color_mask,
               max_font_size=500,
               random_state=10)
image_colors = ImageColorGenerator(color_mask)
my_wordcloud = wc.generate(text)
plt.imshow(my_wordcloud.recolor(color_func=image_colors))
plt.axis('off')
plt.show()
示例#35
0
def wc(data, bgcolor, title):
    plt.figure()
    wc = WordCloud(background_color=bgcolor, max_words=1000, max_font_size=50)
    wc.generate(' '.join(data))
    plt.imshow(wc)
    plt.axis('off')
示例#36
0
lda = LDA(n_components=number_topics)
lda.fit(listofConversations)

print("Topics found via LDA:")
print_topics(lda, count_vectorizer, number_words)


# In[ ]:


from wordcloud import WordCloud

wordcloud = WordCloud(background_color="white", max_words=5000, contour_width=3, contour_color='steelblue')
text = ','.join(withoutStop)
wordcloud.generate(text)
wordcloud.to_image()


# In[ ]:


plt = bof[:10].plot(kind="barh", title="10 Most Common Words") 


# In[ ]:




# read the mask / color image taken from
# http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010
alice_coloring = np.array(Image.open(path.join(d, "hasak.png")))
stopwords = set(STOPWORDS)
stopwords.add("said")

#背景颜色,显示最多词数,设置背景图片,字体最大值等
wc = WordCloud(background_color="white",
               max_words=1000,
               mask=alice_coloring,
               stopwords=stopwords,
               max_font_size=500,
               random_state=42)
# generate word cloud
#生成词云
wc.generate(text)

# create coloring from image
image_colors = ImageColorGenerator(alice_coloring)

# show
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
#绘制词云图
plt.figure()
# recolor wordcloud and show
# we could also give color_func=image_colors directly in the constructor
plt.imshow(wc.recolor(color_func=image_colors), interpolation="bilinear")
plt.axis("off")
plt.figure()
plt.imshow(alice_coloring, cmap=plt.cm.gray, interpolation="bilinear")
示例#38
0
from wordcloud import WordCloud, ImageColorGenerator  # 词云库

# 1、读入txt文本数据
text = open(r'./src/file.txt', "r").read()

# 2、结巴分词:cut_all参数可选, True为全模式,False为精确模式,默认精确模式
cut_text = jieba.cut(text, cut_all=False)
result = "/".join(cut_text)  # 必须给个符号分隔开分词结果,否则不能绘制词云

# 3、初始化自定义背景图片
image = Image.open(r'./src/beijing.jpeg')
graph = np.array(image)

# 4、产生词云图
# 有自定义背景图:生成词云图由自定义背景图像素大小决定
wc = WordCloud(font_path=r"./hanti.ttf",
               background_color='white',
               max_font_size=50,
               mask=graph)
wc.generate(result)

# 5、绘制文字的颜色以背景图颜色为参考
image_color = ImageColorGenerator(graph)  # 从背景图片生成颜色值
wc.recolor(color_func=image_color)
wc.to_file(r"./img/pear_heart.png")  # 按照背景图大小保存绘制好的词云图,比下面程序显示更清晰

# 6、显示图片
plt.figure("词云图")  # 指定所绘图名称
plt.imshow(wc)  # 以图片的形式显示词云
plt.axis("off")  # 关闭图像坐标系
plt.show()
示例#39
0
print(train[train.identity_hate == 1].iloc[4, 1])

# !ls ../input/imagesforkernal/
stopword = set(STOPWORDS)

# clean comments
clean_mask = np.array(Image.open("../input/imagesforkernal/safe-zone.png"))
clean_mask = clean_mask[:, :, 1]
# wordcloud for clean comments
subset = train[train.clean == True]
text = subset.comment_text.values
wc = WordCloud(background_color="black",
               max_words=2000,
               mask=clean_mask,
               stopwords=stopword)
wc.generate(" ".join(text))
plt.figure(figsize=(20, 10))
plt.axis("off")
plt.title("Words frequented in Clean Comments", fontsize=20)
plt.imshow(wc.recolor(colormap='viridis', random_state=17), alpha=0.98)
# plt.show()

toxic_mask = np.array(Image.open("../input/imagesforkernal/toxic-sign.png"))
toxic_mask = toxic_mask[:, :, 1]
# wordcloud for clean comments
subset = train[train.toxic == 1]
text = subset.comment_text.values
wc = WordCloud(background_color="black",
               max_words=4000,
               mask=toxic_mask,
               stopwords=stopword)
示例#40
0
def plot_wordcloud(text: List[str]) -> None:
    #     nltk.download('stopwords')
    stop = set(stopwords.words("english"))
    stop.add("https")
    stop.add("mention")
    stop.add("retweet")
    stop.add("hashtag")
    stop.add("co")
    stop.add("rt")
    stop.add("tco")
    for i_ in range(10):
        stop.add(str(i_))

    hindi_stopwords = [
        "ye",
        "tu",
        "k",
        "ki",
        "se",
        "bhi",
        "kya",
        "mai",
        "bhi",
        "kuch",
        "mein",
        "aur",
        "ab",
        "toh",
        "ho",
        "kyu",
        "nahi",
        "ko",
        "jo",
        "woh",
        "tum",
        "meri",
        "teri",
        "apna",
        "apni",
        "yeh",
        "h",
        "hai",
        "hain",
        "pe",
        "tha",
        "hai",
    ]
    with open("../data/stop_hinglish.txt") as f:
        xx = f.readlines()
        xx = [x.strip("\n") for x in xx]

    hindi_stopwords.extend(xx)
    stop = stop.union(set(hindi_stopwords))

    def _preprocess_text(text):
        corpus = []
        for tweet in text:
            words = [
                w.lower() for w in tweet.split() if
                (w.lower() not in stop and w.lower() not in string.punctuation)
            ]
            corpus.append(words)
        return corpus

    corpus = _preprocess_text(text)

    wordcloud = WordCloud(
        background_color="white",
        stopwords=set(stop),
        max_words=100,
        max_font_size=30,
        scale=3,
        random_state=1,
    )

    wordcloud = wordcloud.generate(str(corpus))

    fig = plt.figure(1, figsize=(15, 13))
    plt.axis("off")

    plt.imshow(wordcloud)
    plt.show()
示例#41
0
print(Sheershak)

#We pull up the wikipedia page as per the inputs provided by the user
Prushtha = wikipedia.page(Sheershak)

#Now we extract the contents of the page
Soochana = Prushtha.content

#We create a set of Stop words here so that these can be supplied as an inputs to generate a word cloud
anavanchit_shabdh = set(Anavanchit_Shabdh)

#Here we generate the word cloud based upon the image we have selected and we have set the limit to 100 words
shabdh_megh = Shabdh_Megh(background_color="white",
                          max_words=100,
                          mask=RangHeen_Chitra,
                          stopwords=anavanchit_shabdh,
                          contour_color='red')

#Finally we generate the Word Cloud
shabdh_megh.generate(Soochana)  # Here the text is filled into the word cloud

#We generate an instance of word cloud we created and display via matplotlib (generally used to show graphs)
Chitra_Pradarshan.imshow(shabdh_megh, interpolation='bilinear')
Chitra_Pradarshan.axis(
    "off")  # By this we ensure that Axis Bars are not displated
Chitra_Pradarshan.show()

#we savee the created file onto a picture file
shabdh_megh.to_file("Temp_Picture.png")
示例#42
0
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import jieba
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

fh = open("E:\\文学\\天局.txt", 'r')
text = fh.read()
fenci = " ".join(jieba.cut(text))

backgroud_Image = np.array(
    Image.open("F:\\Python\\procedure\\词云\\materials\\alice_color.png"))

# 设置停用词
stopwords = set(STOPWORDS)
stopwords.add("仿佛")

wc = WordCloud(
    background_color='white',  # 设置背景颜色
    mask=backgroud_Image,  # 设置背景图片
    stopwords=STOPWORDS,  # 设置停用词
    font_path='C:\\Windows\\Fonts\\simkai.ttf',  # 设置字体格式,如不设置显示不了中文
)
wc.generate(fenci)
image_colors = ImageColorGenerator(backgroud_Image)
wc.recolor(color_func=image_colors)
plt.imshow(wc)
plt.axis('off')
plt.show()
示例#43
0

# 对句子进行分词
def seg_sentence(sentence):
    sentence_seged = jieba.cut(sentence.strip())
    stopwords = stopwordslist()
    outstr = ''
    for word in sentence_seged:
        if word not in stopwords:
            if word != '\t':
                outstr += word
                outstr += " "
    return outstr


if __name__ == '__main__':
    df = pd.read_csv("csv/xiebuyazheng.csv", header=None, encoding="utf-8")
    cut_text = ''
    for row in df[0].values:
        cut_text += seg_sentence(row)
    color_mask = imread("timg.jpeg")
    cloud = WordCloud(
        font_path='simhei.ttf',  # 字体最好放在与脚本相同的目录下,而且必须设置
        background_color='white',
        # mask=color_mask,
        max_words=2000,
        max_font_size=100)
    word_cloud = cloud.generate(cut_text)
    plt.imshow(word_cloud)
    plt.axis('off')
    plt.show()
###开始绘制
import matplotlib as mpl
import matplotlib.pyplot as plt
#mpl.rcParams['font.sans-serif'] = ['SimHei']
#mpl.rcParams['font.family']='sans-serif'
mpl.rcParams['font.size'] = 10

from wordcloud import WordCloud, STOPWORDS

backgroud_Image = plt.imread(path + '词云\\词云1.jpg')
wc = WordCloud(font_path='simhei.ttf',
               stopwords=STOPWORDS,
               background_color='white',
               max_words=1000,
               mask=backgroud_Image)
wc.generate(' '.join(ys_cut))
plt.imshow(wc)
plt.axis("off")
plt.savefig(path + '词云\\ys.png', dpi=1000)
plt.show()

backgroud_Image = plt.imread(path + '词云\\词云5.jpg')
wc = WordCloud(font_path='simhei.ttf',
               stopwords=STOPWORDS,
               background_color='white',
               max_words=1000,
               mask=backgroud_Image)
wc.generate(' '.join(ls_cut))
plt.imshow(wc)
plt.axis("off")
plt.savefig(path + '词云\\ls.png', dpi=1000)
示例#45
0
number = str(random.uniform(1, 9999))

templates = os.listdir("./Template/")

alien_mask = np.array(PIL.Image.open('./Template/' + choice(templates)))

wc = WordCloud(font_path=path,
               background_color='white',
               margin=5,
               width=800,
               height=800,
               mask=alien_mask,
               max_words=2000,
               max_font_size=60,
               random_state=42)
wc = wc.generate(comment)

image_colors = ImageColorGenerator(alien_mask)

plt.figure()
# 重新着色,使用背景图片中的颜色
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis("off")
# 绘制背景图片为颜色的图片

# 保存图片
wc.to_file('./OutFiles/out-image-' + number + '.jpg')

plt.close()
print './OutFiles/out-image-out' + number + '.jpg'
from wordcloud import WordCloud # Biblioteca para trabalhar com nuvem de palavras 
# pip install wordcloud

# Verificando a lista das stopwords: 
stopwords.words('portuguese')

stops = stopwords.words('english')

# Mapa Cores = cores que iremos preencher nossa lista de palavras 
mapa_cores = ListedColormap(['orange', 'green', 'red', 'magenta'])

# Criando a nuvem de palavras: 
nuvem = WordCloud(background_color='white', colormap=mapa_cores, stopwords=stops, max_words=100)

# Gerando a nuvem: 
nuvem.generate(todo_texto)

plt.imshow(nuvem)

# Matriz de Termos Frequentes: 

# Primeiro vamos remover as stopwords 
palavras_semstop = [p for p in palavras if p not in stops]
len(palavras_semstop)
# Removendo pontuação: 
import string 

palavras_sem_pontuacao = [p for p in palavras_semstop if p not in string.punctuation]

frequencia = nltk.FreqDist(palavras_sem_pontuacao)
示例#47
0
 def make_wordcloud(strs, stopwords=[]):
     import base64
     from io import BytesIO
     from jieba import cut
     from wordcloud import WordCloud
     from wordcloud.wordcloud import np
     from wordcloud.wordcloud import Image
     from collections import Counter
     sw = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '--', '.', '..', '...', '......', '...................', './', '.一', '.数', '.日', '/', '//', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', '://', '::', ';', '<', '=', '>', '>>', '?', '@', 'A', 'Lex', '[', '\\', ']', '^', '_', '`', 'exp', 'sub', 'sup', '|', '}', '~', '~~~~', 
             '·', '×', '×××', 'Δ', 'Ψ', 'γ', 'μ', 'φ', 'φ.', 'В', '—', '——', '———', '‘', '’', '’‘', '“', '”', '”,', '…', '……', '…………………………………………………③', 
             '′∈', '′|', '℃', 'Ⅲ', '↑', '→', '∈[', '∪φ∈', '≈', '①', '②', '②c', '③', '③]', '④', '⑤', '⑥', '⑦', '⑧', '⑨', '⑩', '──', '■', '▲', '、',
             '。', '〈', '〉', '《', '》', '》),', '」', '『', '』', '【', '】', '〔', '〕', '〕〔', '㈧', '一', '一.', '一一', '一下', '一个', '一些',
             '一何', '一切', '一则', '一则通过', '一天', '一定', '一方面', '一旦', '一时', '一来', '一样', '一次', '一片', '一番', '一直', '一致', '一般',
             '一起', '一转眼', '一边', '一面', '七', '万一', '三', '三天两头', '三番两次', '三番五次', '上', '上下', '上升', '上去', '上来', '上述', '上面',
             '下', '下列', '下去', '下来', '下面', '不', '不一', '不下', '不久', '不了', '不亦乐乎', '不仅', '不仅...而且', '不仅仅', '不仅仅是', '不会',
             '不但', '不但...而且', '不光', '不免', '不再', '不力', '不单', '不变', '不只', '不可', '不可开交', '不可抗拒', '不同', '不外', '不外乎', '不够',
             '不大', '不如', '不妨', '不定', '不对', '不少', '不尽', '不尽然', '不巧', '不已', '不常', '不得', '不得不', '不得了', '不得已', '不必', '不怎么',
             '不怕', '不惟', '不成', '不拘', '不择手段', '不敢', '不料', '不断', '不日', '不时', '不是', '不曾', '不止', '不止一次', '不比', '不消', '不满',
             '不然', '不然的话', '不特', '不独', '不由得', '不知不觉','不管', '不管怎样', '不经意', '不胜', '不能', '不能不', '不至于', '不若', '不要', '不论',
             '不起', '不足', '不过', '不迭', '不问', '不限', '与', '与其', '与其说', '与否', '与此同时', '专门', '且', '且不说', '且说', '两者', '严格',
             '严重', '个', '个人', '个别', '中小', '中间', '丰富', '串行', '临', '临到', '为', '为主', '为了', '为什么', '为什麽', '为何', '为止', '为此', 
             '为着', '主张', '主要', '举凡', '举行', '乃', '乃至', '乃至于', '么', '之', '之一', '之前', '之后', '之後', '之所以', '之类', '乌乎', '乎', 
             '乒', '乘', '乘势', '乘机', '乘胜', '乘虚', '乘隙', '九', '也', '也好', '也就是说', '也是', '也罢', '了', '了解', '争取', '二', '二来', '二话不说',
             '二话没说', '于', '于是', '于是乎', '云云', '云尔', '互', '互相', '五', '些', '交口', '亦', '产生', '亲口', '亲手', '亲眼', '亲自', '亲身', '人', 
             '人人', '人们', '人家', '人民', '什么', '什么样', '什麽', '仅', '仅仅', '今','今后', '今天', '今年', '今後', '介于', '仍', '仍旧', '仍然', '从', 
             '从不', '从严', '从中', '从事', '从今以后', '从优', '从古到今', '从古至今', '从头', '从宽', '从小', '从新', '从无到有', '从早到晚', '从未', '从来', 
             '从此', '从此以后', '从而', '从轻', '从速', '从重', '他', '他人', '他们', '他是', '他的', '代替', '以', '以上', '以下', '以为', '以便', '以免', 
             '以前', '以及', '以后', '以外', '以後', '以故', '以期', '以来', '以至', '以至于', '以致', '们', '任', '任何', '任凭', '任务', '企图', '伙同', 
             '会', '伟大', '传', '传说','传闻', '似乎', '似的', '但', '但凡', '但愿', '但是', '何', '何乐而不为', '何以', '何况', '何处', '何妨', '何尝', 
             '何必', '何时', '何止', '何苦', '何须', '余外', '作为', '你', '你们', '你是', '你的', '使', '使得', '使用', '例如', '依', '依据', '依照', '依靠', 
             '便', '便于', '促进', '保持', '保管', '保险', '俺', '俺们', '倍加', '倍感', '倒不如', '倒不如说', '倒是', '倘', '倘使', '倘或', '倘然', '倘若', 
             '借', '借以', '借此', '假使', '假如', '假若', '偏偏', '做到', '偶尔', '偶而', '傥然', '像', '儿', '允许', '元/吨', '充其极','充其量', '充分', 
             '先不先', '先后', '先後', '先生', '光', '光是', '全体', '全力', '全年', '全然', '全身心', '全部', '全都', '全面', '八', '八成', '公然', '六', 
             '兮', '共', '共同', '共总', '关于', '其', '其一', '其中', '其二', '其他', '其余', '其后', '其它', '其实', '其次', '具体', '具体地说', '具体来说', 
             '具体说来', '具有', '兼之', '内', '再', '再其次', '再则', '再有', '再次', '再者', '再者说', '再说', '冒', '冲', '决不', '决定', '决非', '况且', 
             '准备', '凑巧', '凝神', '几', '几乎', '几度', '几时', '几番', '几经', '凡', '凡是', '凭', '凭借', '出', '出于', '出去', '出来', '出现', '分别', 
             '分头', '分期', '分期分批', '切', '切不可', '切切', '切勿', '切莫', '则', '则甚', '刚', '刚好', '刚巧', '刚才', '初', '别', '别人', '别处', 
             '别是', '别的', '别管', '别说', '到', '到了儿', '到处', '到头', '到头来', '到底', '到目前为止', '前后', '前此', '前者', '前进', '前面', 
             '加上', '加之', '加以', '加入', '加强', '动不动', '动辄', '勃然', '匆匆', '十分', '千', '千万', '千万千万', '半', '单', '单单', '单纯', 
             '即', '即令', '即使', '即便', '即刻', '即如', '即将', '即或', '即是说', '即若', '却', '却不', '历', '原来', '去', '又', '又及', '及', 
             '及其', '及时', '及至', '双方', '反之', '反之亦然', '反之则', '反倒', '反倒是', '反应', '反手', '反映', '反而', '反过来', '反过来说', '取得', 
             '取道', '受到', '变成', '古来', '另', '另一个', '另一方面', '另外', '另悉', '另方面', '另行', '只', '只当', '只怕', '只是', '只有','只消', 
             '只要', '只限', '叫', '叫做', '召开', '叮咚', '叮当', '可', '可以', '可好', '可是', '可能', '可见', '各', '各个', '各人', '各位', '各地', 
             '各式', '各种', '各级', '各自', '合理', '同', '同一', '同时', '同样', '后', '后来', '后者', '后面', '向', '向使', '向着', '吓', '吗', 
             '否则', '吧', '吧哒', '吱', '呀', '呃', '呆呆地', '呐', '呕', '呗', '呜','呜呼', '呢', '周围', '呵', '呵呵', '呸', '呼哧', '呼啦', '咋', 
             '和', '咚', '咦', '咧', '咱', '咱们', '咳', '哇', '哈', '哈哈', '哉', '哎', '哎呀', '哎哟', '哗', '哗啦', '哟', '哦', '哩', '哪', '哪个', 
             '哪些', '哪儿', '哪天', '哪年', '哪怕', '哪样', '哪边', '哪里', '哼', '哼唷', '唉', '唯有', '啊', '啊呀', '啊哈', '啊哟', '啐', '啥', '啦', 
             '啪达', '啷当', '喀', '喂', '喏', '喔唷', '喽', '嗡', '嗡嗡', '嗬', '嗯', '嗳', '嘎', '嘎嘎', '嘎登', '嘘', '嘛', '嘻', '嘿', '嘿嘿', 
             '四', '因', '因为', '因了', '因此', '因着', '因而', '固', '固然', '在', '在下', '在于', '地', '均', '坚决', '坚持', '基于', '基本', '基本上', 
             '处在', '处处', '处理', '复杂', '多', '多么', '多亏', '多多', '多多少少', '多多益善', '多少', '多年前', '多年来', '多数', '多次', '够瞧的', 
             '大', '大不了', '大举', '大事', '大体', '大体上', '大凡', '大力', '大多', '大多数', '大大', '大家', '大张旗鼓', '大批', '大抵', '大概', 
             '大略', '大约', '大致', '大都', '大量', '大面儿上', '失去', '奇', '奈', '奋勇', '她', '她们', '她是', '她的', '好', '好在', '好的', '好象', 
             '如', '如上', '如上所述', '如下', '如今', '如何', '如其', '如前所述', '如同', '如常', '如是', '如期', '如果', '如次', '如此', '如此等等', 
             '如若', '始而', '姑且', '存在', '存心', '孰料', '孰知', '宁', '宁可', '宁愿', '宁肯', '它', '它们', '它们的', '它是', '它的', '安全', 
             '完全', '完成', '定', '实现', '实际', '宣布', '容易', '密切', '对', '对于', '对应', '对待', '对方', '对比', '将', '将才', '将要', '将近', 
             '小', '少数', '尔', '尔后', '尔尔', '尔等', '尚且', '尤其', '就', '就地', '就是', '就是了', '就是说', '就此', '就算', '就要', '尽', '尽可能',
             '尽如人意', '尽心尽力', '尽心竭力', '尽快', '尽早', '尽然', '尽管', '尽管如此', '尽量', '局外', '居然', '届时', '属于', '屡', '屡屡', 
             '屡次', '屡次三番', '岂', '岂但', '岂止', '岂非', '川流不息', '左右', '巨大', '巩固', '差一点', '差不多', '己', '已', '已矣', '已经', 
             '巴', '巴巴', '带', '帮助', '常', '常常', '常言说', '常言说得好', '常言道', '平素', '年复一年', '并', '并不', '并不是', '并且', '并排', 
             '并无', '并没', '并没有', '并肩', '并非', '广大', '广泛', '应当', '应用', '应该', '庶乎', '庶几', '开外', '开始', '开展', '引起', '弗', 
             '弹指之间', '强烈', '强调', '归', '归根到底', '归根结底', '归齐', '当', '当下', '当中', '当儿', '当前', '当即', '当口儿', '当地', '当场', 
             '当头', '当庭', '当时', '当然', '当真', '当着', '形成', '彻夜', '彻底', '彼', '彼时', '彼此', '往', '往往', '待', '待到', '很', '很多', 
             '很少', '後来', '後面', '得', '得了', '得出', '得到', '得天独厚', '得起', '心里', '必', '必定', '必将', '必然', '必要', '必须', '快', 
             '快要', '忽地', '忽然', '怎', '怎么', '怎么办', '怎么样', '怎奈', '怎样', '怎麽', '怕', '急匆匆', '怪', '怪不得', '总之', '总是', '总的来看', 
             '总的来说', '总的说来', '总结', '总而言之', '恍然', '恐怕', '恰似', '恰好', '恰如', '恰巧', '恰恰', '恰恰相反', '恰逢', '您', '您们', '您是', 
             '惟其','惯常', '意思', '愤然', '愿意', '慢说', '成为', '成年', '成年累月', '成心', '我', '我们', '我是', '我的', '或', '或则', '或多或少', 
             '或是', '或曰', '或者', '或许', '战斗', '截然', '截至', '所', '所以', '所在', '所幸', '所有', '所谓', '才', '才能', '扑通', '打', '打从', 
             '打开天窗说亮话', '扩大', '把', '抑或', '抽冷子', '拦腰', '拿', '按', '按时', '按期', '按照', '按理', '按说', '挨个', '挨家挨户', '挨次', 
             '挨着', '挨门挨户', '挨门逐户', '换句话说', '换言之', '据', '据实', '据悉', '据我所知', '据此', '据称', '据说', '掌握', '接下来', '接着', 
             '接著', '接连不断', '放量', '故', '故意', '故此', '故而', '敞开儿', '敢', '敢于', '敢情', '数/', '整个', '断然', '方', '方便', '方才', 
             '方能', '方面', '旁人', '无', '无宁', '无法', '无论', '既', '既...又', '既往', '既是', '既然', '日复一日', '日渐', '日益', '日臻', '日见',
             '时候', '昂然', '明显', '明确', '是', '是不是', '是以', '是否', '是的', '显然', '显著', '普通', '普遍', '暗中', '暗地里', '暗自', '更',
             '更为', '更加', '更进一步', '曾', '曾经', '替', '替代', '最', '最后', '最大', '最好', '最後', '最近', '最高', '有', '有些', '有关',
             '有利', '有力', '有及', '有所', '有效', '有时', '有点', '有的', '有的是', '有着', '有著', '望', '朝', '朝着', '末##末', '本', '本人', 
             '本地', '本着','本身', '权时', '来', '来不及', '来得及', '来看', '来着', '来自', '来讲', '来说', '极', '极为', '极了', '极其', '极力', 
             '极大', '极度', '极端', '构成', '果然', '果真', '某', '某个', '某些', '某某', '根据', '根本', '格外', '梆', '概', '次第', '欢迎', 
             '欤', '正值', '正在', '正如', '正巧', '正常', '正是', '此', '此中', '此后', '此地', '此处', '此外', '此时', '此次', '此间', '殆', 
             '毋宁', '每', '每个', '每天', '每年', '每当', '每时每刻', '每每', '每逢', '比', '比及', '比如', '比如说', '比方', '比照', '比起', 
             '比较', '毕竟', '毫不', '毫无', '毫无例外', '毫无保留地', '汝', '沙沙', '没', '没奈何', '没有', '沿', '沿着', '注意', '活', '深入', 
             '清楚', '满', '满足', '漫说', '焉', '然', '然则', '然后', '然後', '然而', '照', '照着', '牢牢', '特别是', '特殊', '特点', '犹且', 
             '犹自', '独', '独自', '猛然', '猛然间', '率尔', '率然', '现代', '现在', '理应', '理当', '理该', '瑟瑟', '甚且', '甚么', '甚或', 
             '甚而', '甚至', '甚至于', '用', '用来', '甫', '甭', '由', '由于', '由是', '由此', '由此可见', '略', '略为', '略加', '略微', '白', 
             '白白', '的', '的确', '的话','皆可', '目前', '直到', '直接', '相似', '相信', '相反', '相同', '相对', '相对而言', '相应', '相当', 
             '相等', '省得', '看', '看上去', '看出', '看到', '看来', '看样子', '看看', '看见', '看起来', '真是', '真正', '眨眼', '着', '着呢', 
             '矣', '矣乎', '矣哉', '知道', '砰', '确定', '碰巧', '社会主义', '离', '种', '积极', '移动', '究竟', '穷年累月', '突出', '突然', 
             '窃', '立', '立刻', '立即', '立地', '立时', '立马', '竟', '竟然', '竟而', '第', '第二', '等', '等到', '等等', '策略地', '简直', 
             '简而言之', '简言之', '管', '类如', '粗', '精光', '紧接着', '累年', '累次', '纯', '纯粹', '纵', '纵令', '纵使', '纵然', '练习', 
             '组成', '经', '经常', '经过', '结合', '结果', '给', '绝', '绝不', '绝对', '绝非', '绝顶', '继之', '继后', '继续', '继而', '维持', 
             '综上所述', '缕缕', '罢了', '老', '老大', '老是', '老老实实', '考虑', '者', '而', '而且', '而况', '而又', '而后', '而外', '而已', 
             '而是','而言', '而论', '联系', '联袂', '背地里', '背靠背', '能', '能否', '能够', '腾', '自', '自个儿', '自从', '自各儿', '自后', 
             '自家', '自己', '自打', '自身', '臭', '至', '至于', '至今', '至若', '致', '般的', '良好', '若', '若夫', '若是', '若果', '若非',
             '范围', '莫', '莫不', '莫不然', '莫如', '莫若', '莫非', '获得', '藉以', '虽', '虽则', '虽然', '虽说', '蛮', '行为', '行动', 
             '表明', '表示', '被', '要', '要不', '要不是', '要不然', '要么', '要是', '要求', '见', '规定', '觉得', '譬喻', '譬如', '认为', 
             '认真', '认识', '让', '许多','论', '论说', '设使', '设或', '设若', '诚如', '诚然', '话说', '该', '该当', '说明', '说来', '说说', 
             '请勿', '诸', '诸位', '诸如', '谁', '谁人', '谁料', '谁知', '谨', '豁然', '贼死', '赖以', '赶', '赶快', '赶早不赶晚', '起', 
             '起先', '起初', '起头', '起来', '起见', '起首', '趁', '趁便', '趁势', '趁早', '趁机', '趁热', '趁着', '越是', '距', '跟', '路经',
             '转动', '转变', '转贴', '轰然', '较', '较为', '较之', '较比', '边', '达到', '达旦', '迄', '迅速', '过', '过于', '过去', '过来', 
             '运用', '近', '近几年来', '近年来', '近来', '还', '还是', '还有', '还要', '这', '这一来', '这个', '这么', '这么些', '这么样', 
             '这么点儿', '这些', '这会儿', '这儿', '这就是说', '这时', '这样', '这次', '这点', '这种', '这般', '这边', '这里', '这麽', 
             '进入', '进去', '进来', '进步', '进而', '进行', '连', '连同', '连声', '连日', '连日来', '连袂', '连连', '迟早', '迫于', '适应', 
             '适当', '适用', '逐步', '逐渐', '通常', '通过', '造成', '逢', '遇到', '遭到', '遵循', '遵照', '避免', '那', '那个', '那么', 
             '那么些', '那么样', '那些', '那会儿', '那儿', '那时', '那末', '那样', '那般','那边', '那里', '那麽', '部分', '都', '鄙人', 
             '采取', '里面', '重大', '重新', '重要', '鉴于', '针对', '长期以来', '长此下去', '长线', '长话短说', '问题', '间或', '防止', 
             '阿', '附近', '陈年', '限制', '陡然', '除', '除了', '除却', '除去', '除外', '除开', '除此', '除此之外', '除此以外', '除此而外', 
             '除非', '随', '随后', '随时', '随着', '随著', '隔夜', '隔日', '难得', '难怪', '难说', '难道', '难道说', '集中', '零', '需要', 
             '非但', '非常', '非徒', '非得', '非特', '非独', '靠', '顶多', '顷', '顷刻', '顷刻之间', '顷刻间', '顺', '顺着', '顿时', '颇', 
             '风雨无阻', '饱', '首先', '马上', '高低', '高兴', '默然', '默默地', '齐', '︿', '!', '#', '$', '%', '&', ''', '(', 
             ')', ')÷(1-', ')、', '*','+', '+ξ', '++', ',', ',也', '-', '-β', '--', '-[*]-', '.', '/', '0', '0:2', 
             '1', '1.', '12%', '2', '2.3%', '3', '4', '5', '5:0', '6', '7', '8', '9', ':', ';', '<', '<±', '<Δ', 
             '<λ', '<φ', '<<', '=', '=″', '=☆', '=(', '=-', '=[', '={', '>', '>λ', '?', '@', 'A', 'LI', 'R.L.', 
             'ZXFITL', '[', '[①①]', '[①②]', '[①③]', '[①④]', '[①⑤]', '[①⑥]', '[①⑦]', '[①⑧]', '[①⑨]', '[①A]', 
             '[①B]', '[①C]', '[①D]', '[①E]', '[①]', '[①a]', '[①c]', '[①d]', '[①e]', '[①f]', '[①g]', '[①h]', 
             '[①i]', '[①o]', '[②', '[②①]', '[②②]', '[②③]', '[②④', '[②⑤]', '[②⑥]', '[②⑦]', '[②⑧]', '[②⑩]', '[②B]', 
             '[②G]', '[②]', '[②a]', '[②b]', '[②c]', '[②d]', '[②e]', '[②f]', '[②g]', '[②h]', '[②i]', '[②j]', 
             '[③①]', '[③⑩]', '[③F]', '[③]', '[③a]', '[③b]', '[③c]', '[③d]', '[③e]', '[③g]', '[③h]', '[④]', 
             '[④a]', '[④b]', '[④c]', '[④d]', '[④e]', '[⑤]', '[⑤]]', '[⑤a]', '[⑤b]', '[⑤d]', '[⑤e]', '[⑤f]', 
             '[⑥]', '[⑦]', '[⑧]', '[⑨]', '[⑩]', '[*]', '[-', '[]', ']', ']∧′=[', '][', '_', 'a]', 'b]', 'c]', 
             'e]', 'f]', 'ng昉', '{', '{-', '|', '}', '}>', '~', '~±', '~+', '¥', '-------------------']
     if not stopwords:
         sw += stopwords
     cutResult = []
     for word in cut(strs):
         if word.strip() and word.strip() not in sw:
             cutResult.append(word.strip())
     # 统计词频
     wordFrequency = Counter(cutResult).most_common()
     # 统计词数量
     wordNum = len(wordFrequency)
     # 生成词云图
     imgMask = np.array(Image.open('myapps/static/wordcloud/default.jpg'))
     wc = WordCloud(
         font_path='static/msyh.ttc', 
         background_color='white',
         mask=imgMask)
     img = wc.generate(' '.join(cutResult)).to_image()
     # 将词云图转换为 base64 串
     imgIo = BytesIO()
     img.save(imgIo, format='JPEG')
     img = base64.b64encode(imgIo.getvalue()).decode('utf8')
     return (True, wordNum, wordFrequency, img)
示例#48
0
    signatures += ' '.join(jieba.analyse.extract_tags(signature))  # 关键字提取
    signatures += ' '

# Image.open,读取指定图片。
im = Image.open('photo.jpg')  # 可替换你喜欢的图⽚,在当前文件夹下(相对路径)
# np.array,将读入的im转换成背景图数据。
mask = np.array(im)
# WordCloud函数,建立词云对象
# mask参数用于设置词云形状,默认的是矩形,可以读入自己选定的图片。margin:画布偏移,默认2像素.
word_cloud = WordCloud(font_path='simhei.ttf',
                       background_color='white',
                       max_words=1200,
                       mask=mask,
                       margin=15)
# generate,向word_could这个WordCloud对象中加载signatures(文本内容),对全部文本进行自动分词(但是对中文支持不好)
word_cloud.generate(signatures)
# ImageColorGenerator函数通过mask参数生成词云颜色值
image_colors = ImageColorGenerator(mask)
# 用recolor方法重置词云颜色为(color_func=image_colors)
word_clour = word_cloud.recolor(color_func=image_colors)
# figure函数中,figsize表示输出的绘图对象的宽和高、dpi表示指定绘图对象的分辨率,即每英寸多少个像素,缺省值为80。
plt.figure(figsize=(12, 12), dpi=100)
# imshow函数用于对按照样本图片重置颜色的图像进行处理,并显示其格式,但是不能显示。
plt.imshow(word_clour)
# 不显示坐标尺寸
plt.axis('off')
# 显示词云图
plt.show()
# 输出到文件
word_cloud.to_file('signatures.png')
示例#49
0
background_color:背景颜色
mask:背景图片
stopwords
max_font_size:字体最大大小
'''
wc = WordCloud('./font/Arial.ttf',
               width=width,
               height=height,
               background_color="white",
               mask=diao_img,
               font_step=3,
               max_font_size=30,
               random_state=False,
               prefer_horizontal=0.9)

wc.generate(lstr)

# 提取背景图片的颜色

img_cl = ImageColorGenerator(diao_img)

# 显示图片
plt.imshow(wc)
plt.axis("off")

# 绘制
plt.figure()

plt.imshow(wc.recolor(color_func=img_cl))
plt.axis("off")
示例#50
0
            if word.lower() not in stopwords:
                word_list.append(word.lower())

# Eliminate non alpha elements
text_list = [word.lower() for word in txt_words if word.isalpha()]

# calculating and printing the top 10 words
top_10_words = Counter(text_list).most_common(10)

print('\nThe following are the top 10 words (Count):')
for pair in top_10_words:
    print(' -', pair[0], '(' + str(pair[1]) + ')')

##### WORDCLOUD #####
# transforming the list into a string for displaying
text_str = ' '.join(text_list)

# defining the wordcloud parameters
wc = WordCloud(background_color='white', max_words=2000)

# generating word cloud
wc.generate(text_str)

# storing to file
wc.to_file('txt.png')

# showing the cloud
plt.imshow(wc)
plt.axis('off')
plt.show()
示例#51
0
fig, ax1 = plt.subplots()
ax1.bar(repeat.keys(), repeat.values())
fig.autofmt_xdate()
plt.savefig('graph.png')
plt.show()

'''

custom_mask = np.array(Image.open('static/img/twitter_mask.jpg'))
wordcloud = WordCloud(background_color='white',
                      contour_width=3,
                      contour_color='Black',
                      max_font_size=300,
                      min_font_size=25)

wordcloud.generate(only_emotion)
'''
# plot the WordCloud image
plt.figure(figsize=(8, 8), facecolor=None)
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.tight_layout(pad=0)
plt.margins(x=0, y=0)
plt.savefig('graph1.png')
plt.show()
'''
'''
# shaping my word cloud
twitter_mask = np.array(Image.open("static/img/twitter_mask.png"))
print(twitter_mask)  # Values of 255 are pure white, whereas values of 1 are black.
twitter_mask = twitter_mask.reshape((twitter_mask.shape[0], -1), order='F') #3d into 2d
示例#52
0
novel_as_string = ' '.join(word_list)

# In[89]:

icon = Image.open(WHALE_FILE)
image_mask = Image.new(mode='RGB', size=icon.size, color=(255, 255, 255))
image_mask.paste(icon, box=icon)

rgb_array = np.array(image_mask)  # converts the image object to an array

word_cloud = WordCloud(mask=rgb_array,
                       background_color='white',
                       max_words=400,
                       colormap='ocean')

word_cloud.generate(novel_as_string)

plt.figure(figsize=[16, 8])
plt.imshow(word_cloud, interpolation='bilinear')
plt.axis('off')
plt.show()

# In[90]:

rgb_array.shape

# In[91]:

rgb_array[1023, 2047]

# In[92]:
示例#53
0
text = ""
with open("section10/res/이상한나라의앨리스.txt", "r", encoding="utf-8") as f:      #기본 root 'python'
    text = f.read()

print(text)

#금지어 설정 → 필요한 만큼 add() 함수 호출해서 추가
#금지어 → Alice, said
ignore=set(STOPWORDS)
ignore.add("Alice")
ignore.add("said")

#WordCloud 클래스의 객체 생성
wc=WordCloud(width=1200, height=800, scale=2.0, 
                     stopwords=ignore,      #금지어
                     max_font_size=150,     #최대 글자 크기
                     max_words=100        #최대 표시 단어 수
                    )

gen=wc.generate(text)
print(gen.words_)

pyplot.figure()

pyplot.imshow(gen, interpolation="bilinear")
wc.to_file("simple2.png")

pyplot.close()

示例#54
0
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from matplotlib.image import imread

with open("cache/befull523.txt", 'r', encoding='utf-8') as f:
    data = f.read()
    # print(data)

mask_img = imread('common_library/wordcloud/asset/tree.jpg')  # 需要白底图

# 词云配置
wc_config = WordCloud(
    font_path='simhei.ttf',
    width=800,
    height=600,
    background_color=None,
    mask=mask_img,  # 词云形状
)

# 生成词云
word_cloud = wc_config.generate(text=data)
# 保存词云
word_cloud.to_file("cache/befull523.jpg")
# 显示词云
plt.imshow(word_cloud, interpolation='bilinear')
plt.axis('off')
plt.show()
# importar libreria de realización de gráficas de MathPlotLib
import matplotlib.pyplot as plt 

# Importar de la libreria para generar nubes de palabras
from wordcloud import WordCloud, STOPWORDS

# Importar libreria para imagenes
from PIL import Image

# Crear generador de nubes de palabras
wordCloud = WordCloud()											# Crear generador de nubes de palabras
wordCloud = WordCloud(background_color='white')					# Estableciendo el colo de fondo
wordCloud = WordCloud(max_words=2000)							# Estableciendo el maximo numero de palabras para generar
wordCloud = WordCloud(stopwords=set(STOPWORDS))					# Eliminando palabras vacias (Sin significado)
wordCloud = WordCloud(mask=np.array(Image.open('dirección')))	# Estableciendo imagen como mascara para la nube de palabras

# Generar nube de palabras para un texto especificado
wordCloud.generate('texto')

# Graficar nube de palabras
plt.imshow(wordCloud, interpolation='bilinear')
plt.axis('off')

示例#56
0
def generate_mask_word_cloud(words, mask, tv_show):
    char_mask = np.array(Image.open(mask, 'r').convert('RGB')) 
   
    wc = WordCloud(background_color="white", width=400, height=400, mask=char_mask)
    wc.generate(words)
    wc.to_file("./" + tv_show + "/" + tv_show + '-mask-word-cloud.png')
示例#57
0
        tips.append(json.loads(line))
df_tips = pd.DataFrame(tips)
df_ri = df_business_restaurant.loc[(df_business_restaurant['name']=='Ramen Isshin') & criteria]
df_ri_tips = df_tips.loc[df_tips['business_id'].isin(df_ri.business_id)]


# replace in text
def text_prep(text):
    # filter out non-letters and transform them in lowercase
    text = re.sub('[^a-z\s]', '', text.lower())
    # filter stopwords
    text = [w for w in text.split() if w not in stopwords.words('english')]
    return ' '.join(text)


pd.set_option('mode.chained_assignment', None)
# apply function
df_ri_tips['text_cl'] = df_ri_tips['text'].apply(text_prep)
# create a word cloud
wc = WordCloud(width=1600, height=800, random_state=42, max_words=1000000)
wc.generate(str(df_ri_tips['text_cl']))
plt.figure(figsize=(15, 10), facecolor='black')
plt.title('Tips of Ramen Isshin', fontsize=40, color='white')
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
plt.tight_layout(pad=10)




import re

df = pd.read_csv("megadata_csv.csv")
print(df)

df.head()

# Load the regular expression library

# Remove punctuation
df['title'] = df['title'].map(lambda x: re.sub('[,\.!?]', '', x))
# Convert the titles to lowercase
df['title'] = df['title'].map(lambda x: x.lower())

df.to_csv("clean.csv")

# Join the different processed titles together.
long_string = ','.join(list(df['title'].values))

# Create a WordCloud object
wordcloud = WordCloud(background_color="white",
                      max_words=5000,
                      contour_width=3,
                      contour_color='steelblue')

# Generate a word cloud
wordcloud.generate(long_string)

# Visualize the word cloud
wordcloud.to_image()
示例#59
0
# draw word clouds for all sentiments
world_clouds_exp = st.beta_expander('Word Clouds By Sentiment')
if len(wdf):
    with world_clouds_exp:
        word_sentiment = st.selectbox('Sentiment Type', 
                        ['positive', 'neutral', 'negative'], key='1')
        wcdf = wdf[wdf['sentiment']==word_sentiment].copy()
        words = ' '.join(wcdf['text'])
        wc_words = ' '.join([w for w in words.split() if not w.startswith('@') and w != 'RT'])

        if len(wc_words):
            st.markdown('#### Word cloud for %s sentiment' % (word_sentiment))
            wc = WordCloud(stopwords=STOPWORDS, background_color='white', width=768, height=480)
            fig = plt.figure()
            plt.imshow(wc.generate(wc_words))
            plt.xticks([])
            plt.yticks([])
            st.pyplot(fig)
        else:
            st.markdown("#### No Words to Plot.")
            st.write('\n\n\n')


## show ent types (bar) by sentiment
ent_exp = st.beta_expander('Entities By Sentiment')
if len(wdf):
    with ent_exp:
        ent_sentiment = st.selectbox('Sentiment Type', 
                        ['positive', 'neutral', 'negative'], key='3')
        etdf = wdf[wdf['sentiment']==ent_sentiment].copy()
示例#60
-1
    def generatewordcloud(party, inputImageFileName, outputImageFileName):
        global stopwordshearing
        speakerData = data[data.Party == party]
        allText = ""
        for index, row in speakerData.iterrows():
        	allText += str(row['Text']).lower()+" "
        allText = allText.replace("e-mail","email")
        allText = allText.replace("e- mail","email")
        allText = allText.replace("op-ed","oped")
        #print (allText)
        ImageFile.LOAD_TRUNCATED_IMAGES = True

        img = Image.open(inputImageFileName)
        img = img.resize((980,1080), Image.ANTIALIAS)
        sl = STOPWORDS | stopwordshearing
        speakerArray = np.array(img)
        wc = WordCloud(background_color="white", max_words=1000, mask=speakerArray, stopwords=sl,
                random_state=42)
        
        wc.generate(allText)
        print wc.words_
        # create coloring from image
        image_colors = ImageColorGenerator(speakerArray)
        wc.recolor(color_func=image_colors)
        wc.to_file(outputImageFileName)