def make_cloud(channel, time, myType=None, drawLabels=True, font_path=None): #get the log file directory = "logs/" + channel + '/' + time if myType == None: file_path = os.path.relpath(directory + '/words.log') with open(file_path, 'r') as f: words = f.read().upper() file_path = os.path.relpath(directory + '/emotes.log') with open(file_path, 'r') as f: emotes = " ".join(filter(lambda x:len(x)>3 and x != 'double' and x != 'triple', f.read().split('\n'))) directory = "images/" + channel + '/' + time if not os.path.exists(directory): os.makedirs(directory) print "Generating word cloud... Hold on! (This takes a while if there are a lot of messages)" scale = 2 w = wordcloud.process_text(words, max_features=1500) elements = wordcloud.fit_words(w, width=w_words/scale, height=h_words/scale) wordcloud.draw(elements, os.path.relpath(directory + '/wordcloud.png'), width=w_words/scale, height=h_words/scale, scale=scale) print "Word cloud created!" print "Generating emote cloud..." w = wordcloud.process_text(emotes, max_features=1500) elements = wordcloud.fit_words(w, width=w_emotes, height=h_emotes) wordcloud.draw(elements, os.path.relpath(directory + '/emotecloud.png'), width=w_emotes, height=h_emotes) print "Emote cloud created!" else: #if running the program manually. this is mainly for my debugging purposes. w_custom = 1100 h_custom = 700 file_path = os.path.relpath(directory + '/'+myType+'.log') directory = "images/" + channel + '/' + time if not os.path.exists(directory): os.makedirs(directory) with open(file_path, 'r') as f: data = f.read() if myType.lower() == 'authors': data = data.upper() print "Generating " +myType+ " cloud... Hold on!" scale = 2 w = wordcloud.process_text(data, max_features=1000) elements = wordcloud.fit_words(w, width=w_custom/scale, height=h_custom/scale, font_path=font_path) wordcloud.draw(elements, os.path.relpath(directory + '/'+myType+'cloud.png'), width=w_custom/scale, height=h_custom/scale, scale=scale, font_path=font_path) print myType + " cloud created!"
def make_cloud(self, text): words = wordcloud.process_text(text) elements = wordcloud.fit_words(words, width = 400, height = 400) wordcloud.draw(elements, self.out, width = 400, height = 400, scale = 2) return self.out
def make_cloud(self, text): words = wordcloud.process_text(text) elements = wordcloud.fit_words(words, width=400, height=400) wordcloud.draw(elements, self.out, width=400, height=400, scale=2) return self.out
def makeCloud(self, text, font=None): if font is None: font = random.choice(self.fonts) words, counts = wordcloud.process_text(text, max_features=2000) elements = wordcloud.fit_words(words, counts, width=self.size, height=self.size, font_path=font) wordcloud.draw(elements, self.outFile, width=self.size, height=self.size, scale=self.scale, font_path=font)
def make_word_cloud(text, filepath): import wordcloud #@UnresolvedImport if isinstance(text, str): text = wordcloud.process_text(text, max_features=20) w, h = (400, 400) text = remove_letters(text) elements = wordcloud.fit_words(text, width=w, height=h) wordcloud.draw(elements, filepath, width=w, height=h, scale=1) return filepath
d = path.dirname(__file__) # String to hold the text from the webpages. text = ""; # Array of webpages which we'll loop through (from googling Deonte Burton draft). url_list = ["http://www.draftexpress.com/profile/Deonte-Burton-6487/", "http://blogs.rgj.com/chrismurray/2014/01/10/nba-scouts-view-nevadas-deonte-burton-as-solid-draft-pick-but-not-a-first-rounder/", "http://www.nbadraftroom.com/2014/01/deonte-burton.html", "http://www.nbadraftinsider.com/deonte-burton/", "http://nbaprospects.blogspot.com/2012/08/scouting-report-deonte-burton-nevada.html", "http://rushthecourt.net/2014/01/09/a-college-basketball-resolution-for-2014-get-to-know-nevadas-deonte-burton/", "http://mrsportsblog.wordpress.com/2014/03/05/trust-me-on-this-dynamic-deonte-burton-of-nevada-will-be-making-a-living-in-the-nba/", "http://www.draftexpress.com/article/NBA-Draft-Prospect-of-the-Week-Deonte-Burton-4392/", "http://www.nevadawolfpack.com/sports/m-baskbl/spec-rel/021214aad.html"] # Loop through url items and get the text from each. for url in url_list: content = urllib2.urlopen(url) text += Document(content).summary() + " " # Separate into a list of word, frequency). words = wordcloud.process_text(text) # Compute the position of the words. elements = wordcloud.fit_words(words) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(d, 'db2.png'))
txtfeatWords5 = [lmtzr.lemmatize(word) for word in txtfeatWords4] # stem using snowball stemmer txtfeatWords6 = [stemmer.stem(word) for word in txtfeatWords5] # remove punctuations txtfeatWords7 = [ word.encode('utf-8').translate(None, string.punctuation) for word in txtfeatWords6 ] # remove empty strings txtfeatWords8 = [word for word in txtfeatWords7 if word <> ''] txtfeatWordList[i] = ' '.join(txtfeatWords8) #pprint('Iteration: %d'% i) #pprint(txtfeatWordList[i]) pprint(txtfeatWordList) text = '\n'.join([str(txtfeatWordList[i]) for i in range(num_total)]) #tags = make_tags(get_tag_counts( '\n'.join([ str(txtfeatWordList[i]) for i in range(num_total) ]) )) #create_tag_image(tags, 'cloud_large.png', size=(1800, 1200), fontname='Lobster') d = os.path.dirname(__file__) words = wordcloud.process_text(text) elements = wordcloud.fit_words(words) wordcloud.draw(elements, os.path.join(d, 'lemmatized_wordle.png'))
#!/usr/bin/env python2 from os import path import sys import wordcloud d = path.dirname(__file__) # Read the whole text. text = open(path.join(d, 'presinaug-addresses.txt')).read() # Separate into a list of (word, frequency). words = wordcloud.process_text(text, max_features=10000) # Compute the position of the words. elements = wordcloud.fit_words(words, width=900, height=1600) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(d, 'presinaug-wordcloud-1600x900.png'), width=900, height=1600, scale=1)
def getTopWords(self, text): words = wordcloud.process_text(text, self.config['maxWords'], self.stopwords) return words
#!/usr/bin/env python2 from os import path import sys import wordcloud d = path.dirname(__file__) # Read the whole text. text = open(path.join(d, '4chdata/all.dat')).read() # Separate into a list of (word, frequency). words = wordcloud.process_text(text, max_features=1000) # Compute the position of the words. elements = wordcloud.fit_words(words, width=1000, height=1000) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(d, str(sys.argv[1])), width=1000, height=1000, scale=2)
def produceWordCloud(inputText, outputPng): words = wordcloud.process_text(inputText, max_features=400) elements = wordcloud.fit_words(words, width=800, height=500) wordcloud.draw(elements, outputPng, width=800, height=500, scale=2)
def wordclouds(x): d = path.dirname("/Users/MrG/Capstone/") words = wordcloud.process_text(str(x), max_features = 500) elements = wordcloud.fit_words(words) wordcloud.draw(elements, path.join(d,"WC.png"), scale = 5) return Image(filename='/Users/MrG/Capstone/WC.png', height= 1000, width= 618)
def generateCloud(text): dir = path.dirname(__file__) words = wordcloud.process_text(text, max_features=1000) elements = wordcloud.fit_words(words, width=1000, height=1000) wordcloud.draw(elements, path.join(dir, 'wordcloud.png'), width=1000, height=1000)