Python process_text示例，wordcloud.process_text Python示例

示例#1

0

显示文件

文件： make_cloud.py 项目： LQXshane/Chat_stats

def make_cloud(channel, time, myType=None, drawLabels=True, font_path=None):
    #get the log file
    directory = "logs/" + channel + '/' + time 

    if myType == None:
        file_path = os.path.relpath(directory + '/words.log')
        with open(file_path, 'r') as f:
            words = f.read().upper()

        file_path = os.path.relpath(directory + '/emotes.log')
        with open(file_path, 'r') as f:
            emotes = " ".join(filter(lambda x:len(x)>3 and x != 'double' and x != 'triple', f.read().split('\n')))

        directory = "images/" + channel + '/' + time
        if not os.path.exists(directory):
            os.makedirs(directory)

        print "Generating word cloud... Hold on! (This takes a while if there are a lot of messages)"
        scale = 2
        w = wordcloud.process_text(words, max_features=1500)
        elements = wordcloud.fit_words(w, width=w_words/scale, height=h_words/scale)
        wordcloud.draw(elements, os.path.relpath(directory + '/wordcloud.png'), 
                       width=w_words/scale, height=h_words/scale, scale=scale)
        print "Word cloud created!"

        print "Generating emote cloud..."
        w = wordcloud.process_text(emotes, max_features=1500)
        elements = wordcloud.fit_words(w, width=w_emotes, height=h_emotes)
        wordcloud.draw(elements, os.path.relpath(directory + '/emotecloud.png'), 
                       width=w_emotes, height=h_emotes)
        print "Emote cloud created!"
    else: #if running the program manually. this is mainly for my debugging purposes.
        w_custom = 1100
        h_custom = 700
        file_path = os.path.relpath(directory + '/'+myType+'.log')

        directory = "images/" + channel + '/' + time
        if not os.path.exists(directory):
            os.makedirs(directory)

        with open(file_path, 'r') as f:
            data = f.read()
        if myType.lower() == 'authors':
            data = data.upper()
        print "Generating " +myType+ " cloud... Hold on!"
        scale = 2
        w = wordcloud.process_text(data, max_features=1000)
        elements = wordcloud.fit_words(w, width=w_custom/scale, 
                                       height=h_custom/scale, font_path=font_path)
        wordcloud.draw(elements, os.path.relpath(directory + '/'+myType+'cloud.png'), 
              width=w_custom/scale, height=h_custom/scale, scale=scale, font_path=font_path)
        print myType + " cloud created!"

示例#2

0

显示文件

文件： run.py 项目： IWillScoop/makeswordclouds

	def make_cloud(self, text):
		
		words = wordcloud.process_text(text)
		elements = wordcloud.fit_words(words, width = 400, height = 400)
		wordcloud.draw(elements, self.out, width = 400, height = 400, scale = 2)
		
		return self.out

示例#3

0

显示文件

文件： run.py 项目： miggy-luppz/makeswordclouds

    def make_cloud(self, text):

        words = wordcloud.process_text(text)
        elements = wordcloud.fit_words(words, width=400, height=400)
        wordcloud.draw(elements, self.out, width=400, height=400, scale=2)

        return self.out

示例#4

0

显示文件

文件： bot.py 项目： feureau/reddit-cloud

 def makeCloud(self, text, font=None):
     if font is None:
         font = random.choice(self.fonts)
     words, counts = wordcloud.process_text(text, max_features=2000)
     elements = wordcloud.fit_words(words, counts, width=self.size,
             height=self.size, font_path=font)
     wordcloud.draw(elements, self.outFile, width=self.size,
             height=self.size, scale=self.scale, font_path=font)

示例#5

0

显示文件

文件： bot.py 项目： agroff/reddit-cloud

 def makeCloud(self, text, font=None):
     if font is None:
         font = random.choice(self.fonts)
     words, counts = wordcloud.process_text(text, max_features=2000)
     elements = wordcloud.fit_words(words, counts, width=self.size,
             height=self.size, font_path=font)
     wordcloud.draw(elements, self.outFile, width=self.size,
             height=self.size, scale=self.scale, font_path=font)

示例#6

0

显示文件

文件： clouds.py 项目： shubhampachori12110095/KDDCUP2016

def make_word_cloud(text, filepath):
    import wordcloud  #@UnresolvedImport

    if isinstance(text, str):
        text = wordcloud.process_text(text, max_features=20)

    w, h = (400, 400)
    text = remove_letters(text)
    elements = wordcloud.fit_words(text, width=w, height=h)

    wordcloud.draw(elements, filepath, width=w, height=h, scale=1)
    return filepath

示例#7

0

显示文件

文件： make_wc_auto.py 项目： Sandy4321/deonte-burton

d = path.dirname(__file__)

# String to hold the text from the webpages. 
text = "";

# Array of webpages which we'll loop through (from googling Deonte Burton draft).
url_list = ["http://www.draftexpress.com/profile/Deonte-Burton-6487/", 
            "http://blogs.rgj.com/chrismurray/2014/01/10/nba-scouts-view-nevadas-deonte-burton-as-solid-draft-pick-but-not-a-first-rounder/", 
            "http://www.nbadraftroom.com/2014/01/deonte-burton.html", 
            "http://www.nbadraftinsider.com/deonte-burton/", 
            "http://nbaprospects.blogspot.com/2012/08/scouting-report-deonte-burton-nevada.html",
            "http://rushthecourt.net/2014/01/09/a-college-basketball-resolution-for-2014-get-to-know-nevadas-deonte-burton/",
            "http://mrsportsblog.wordpress.com/2014/03/05/trust-me-on-this-dynamic-deonte-burton-of-nevada-will-be-making-a-living-in-the-nba/", 
            "http://www.draftexpress.com/article/NBA-Draft-Prospect-of-the-Week-Deonte-Burton-4392/",
            "http://www.nevadawolfpack.com/sports/m-baskbl/spec-rel/021214aad.html"] 

# Loop through url items and get the text from each. 
for url in url_list:
    content = urllib2.urlopen(url)

    text += Document(content).summary() + " "

# Separate into a list of word, frequency).
words = wordcloud.process_text(text)

# Compute the position of the words. 
elements = wordcloud.fit_words(words)

# Draw the positioned words to a PNG file. 
wordcloud.draw(elements, path.join(d, 'db2.png'))

示例#8

0

显示文件

文件： vis_words.py 项目： sourabhd/kdd2014

        txtfeatWords5 = [lmtzr.lemmatize(word) for word in txtfeatWords4]

        # stem using snowball stemmer
        txtfeatWords6 = [stemmer.stem(word) for word in txtfeatWords5]

        # remove punctuations
        txtfeatWords7 = [
            word.encode('utf-8').translate(None, string.punctuation)
            for word in txtfeatWords6
        ]

        # remove empty strings
        txtfeatWords8 = [word for word in txtfeatWords7 if word <> '']

        txtfeatWordList[i] = ' '.join(txtfeatWords8)

        #pprint('Iteration: %d'% i)
        #pprint(txtfeatWordList[i])

pprint(txtfeatWordList)

text = '\n'.join([str(txtfeatWordList[i]) for i in range(num_total)])

#tags = make_tags(get_tag_counts( '\n'.join([ str(txtfeatWordList[i]) for i in range(num_total) ]) ))
#create_tag_image(tags, 'cloud_large.png', size=(1800, 1200), fontname='Lobster')

d = os.path.dirname(__file__)
words = wordcloud.process_text(text)
elements = wordcloud.fit_words(words)
wordcloud.draw(elements, os.path.join(d, 'lemmatized_wordle.png'))

示例#9

0

显示文件

文件： presinaug-cloud.py 项目： decause/presinaug

#!/usr/bin/env python2

from os import path
import sys
import wordcloud

d = path.dirname(__file__)

# Read the whole text.
text = open(path.join(d, 'presinaug-addresses.txt')).read()
# Separate into a list of (word, frequency).
words = wordcloud.process_text(text, max_features=10000)
# Compute the position of the words.
elements = wordcloud.fit_words(words, width=900, height=1600)
# Draw the positioned words to a PNG file.
wordcloud.draw(elements,
               path.join(d, 'presinaug-wordcloud-1600x900.png'),
               width=900,
               height=1600,
               scale=1)

示例#10

0

显示文件

文件： bot.py 项目： paul-nechifor/auto-doge

 def getTopWords(self, text):
     words = wordcloud.process_text(text, self.config['maxWords'],
             self.stopwords)
     return words

示例#11

0

显示文件

文件： mkcloud.py 项目： bstrds/4chdm

#!/usr/bin/env python2

from os import path
import sys
import wordcloud

d = path.dirname(__file__)

# Read the whole text.
text = open(path.join(d, '4chdata/all.dat')).read()
# Separate into a list of (word, frequency).
words = wordcloud.process_text(text, max_features=1000)
# Compute the position of the words.
elements = wordcloud.fit_words(words, width=1000, height=1000)
# Draw the positioned words to a PNG file.
wordcloud.draw(elements, path.join(d, str(sys.argv[1])), width=1000, height=1000,
        scale=2)

示例#12

0

显示文件

文件： generateWordCloudFromXML.py 项目： kzintun/fyp-test

def produceWordCloud(inputText, outputPng):
	words = wordcloud.process_text(inputText, max_features=400)
	elements = wordcloud.fit_words(words, width=800, height=500)
	wordcloud.draw(elements, outputPng, width=800, height=500, scale=2)

示例#13

0

显示文件

def wordclouds(x):
    d = path.dirname("/Users/MrG/Capstone/")
    words = wordcloud.process_text(str(x), max_features = 500)
    elements = wordcloud.fit_words(words)
    wordcloud.draw(elements, path.join(d,"WC.png"), scale = 5)
    return Image(filename='/Users/MrG/Capstone/WC.png', height= 1000, width= 618)

示例#14

0

显示文件

文件： text_to_stats.py 项目： tschuy/cool-projects

def generateCloud(text):
	dir = path.dirname(__file__)
	words = wordcloud.process_text(text, max_features=1000)
	elements = wordcloud.fit_words(words, width=1000, height=1000)
	wordcloud.draw(elements, path.join(dir, 'wordcloud.png'), width=1000, height=1000)