def make_cloud(channel, time, myType=None, drawLabels=True, font_path=None): #get the log file directory = "logs/" + channel + '/' + time if myType == None: file_path = os.path.relpath(directory + '/words.log') with open(file_path, 'r') as f: words = f.read().upper() file_path = os.path.relpath(directory + '/emotes.log') with open(file_path, 'r') as f: emotes = " ".join(filter(lambda x:len(x)>3 and x != 'double' and x != 'triple', f.read().split('\n'))) directory = "images/" + channel + '/' + time if not os.path.exists(directory): os.makedirs(directory) print "Generating word cloud... Hold on! (This takes a while if there are a lot of messages)" scale = 2 w = wordcloud.process_text(words, max_features=1500) elements = wordcloud.fit_words(w, width=w_words/scale, height=h_words/scale) wordcloud.draw(elements, os.path.relpath(directory + '/wordcloud.png'), width=w_words/scale, height=h_words/scale, scale=scale) print "Word cloud created!" print "Generating emote cloud..." w = wordcloud.process_text(emotes, max_features=1500) elements = wordcloud.fit_words(w, width=w_emotes, height=h_emotes) wordcloud.draw(elements, os.path.relpath(directory + '/emotecloud.png'), width=w_emotes, height=h_emotes) print "Emote cloud created!" else: #if running the program manually. this is mainly for my debugging purposes. w_custom = 1100 h_custom = 700 file_path = os.path.relpath(directory + '/'+myType+'.log') directory = "images/" + channel + '/' + time if not os.path.exists(directory): os.makedirs(directory) with open(file_path, 'r') as f: data = f.read() if myType.lower() == 'authors': data = data.upper() print "Generating " +myType+ " cloud... Hold on!" scale = 2 w = wordcloud.process_text(data, max_features=1000) elements = wordcloud.fit_words(w, width=w_custom/scale, height=h_custom/scale, font_path=font_path) wordcloud.draw(elements, os.path.relpath(directory + '/'+myType+'cloud.png'), width=w_custom/scale, height=h_custom/scale, scale=scale, font_path=font_path) print myType + " cloud created!"
def make_cloud(self, text): words = wordcloud.process_text(text) elements = wordcloud.fit_words(words, width = 400, height = 400) wordcloud.draw(elements, self.out, width = 400, height = 400, scale = 2) return self.out
def make_cloud(self, text): words = wordcloud.process_text(text) elements = wordcloud.fit_words(words, width=400, height=400) wordcloud.draw(elements, self.out, width=400, height=400, scale=2) return self.out
def analizer(query, num_topics, dictionary, corpus, alpha): image_path = "/media/University/UniversityDisc/2-Master/MasterThesis/EjecucionTesis/Desarrollo/PythonProjects/QueryAnalyzer/Models/" model_path = image_path alpha=alpha #lda = models.ldamodel.LdaModel(corpus, num_topics=num_topics, id2word=dictionary, update_every=1, chunksize=50, passes=1) lda_2 = models.ldamodel.LdaModel(corpus, num_topics=num_topics, id2word=dictionary, alpha=alpha, update_every=1, chunksize=50, passes=1) dictionary.save(model_path + "tmp_dictionary.dict") corpora.MmCorpus.serialize(model_path + "tmp_corpus.mm", corpus) lda_2.save(model_path + 'tmp_model.lda') # same for tfidf, lsi, ... goals_distribution = ldam.perQueryGoalProportions(query, dictionary, lda_2) max_goal = ldam.viewPerQueryGoalProportions(goals_distribution) show_goal = lda_2.show_topic(max_goal) #print show_goal new_goal = [] for goal in show_goal: weight = goal[0] tag = goal[1] new_goal.append((tag, weight)) # Compute the position of the words. elements = wordcloud.fit_words(new_goal, width=100, height=100) image_path = "/media/University/UniversityDisc/2-Master/MasterThesis/EjecucionTesis/Desarrollo/PythonProjects/QueryAnalyzer/Models/" # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(image_path + 'image.png'), width=100, height=100, scale=2) lda_model = lda_2 return image_path, lda_model
def makeCloud(self, text, font=None): if font is None: font = random.choice(self.fonts) words, counts = wordcloud.process_text(text, max_features=2000) elements = wordcloud.fit_words(words, counts, width=self.size, height=self.size, font_path=font) wordcloud.draw(elements, self.outFile, width=self.size, height=self.size, scale=self.scale, font_path=font)
def draw_goal(lsi, topic): other_goal = lsi.show_topic(topic) new_goal = [] image_path = "/media/University/UniversityDisc/2-Master/MasterThesis/EjecucionTesis/Desarrollo/PythonProjects/QueryAnalyzer/Models/" for goal in other_goal: weight = goal[0];tag = goal[1];new_goal.append((tag, weight)) elements = wordcloud.fit_words(new_goal, width=100, height=100) wordcloud.draw(elements, path.join(image_path + 'other_image.png'), width=100, height=100, scale=2)
def make_word_cloud(text, filepath): import wordcloud #@UnresolvedImport if isinstance(text, str): text = wordcloud.process_text(text, max_features=20) w, h = (400, 400) text = remove_letters(text) elements = wordcloud.fit_words(text, width=w, height=h) wordcloud.draw(elements, filepath, width=w, height=h, scale=1) return filepath
def makeDoge(self, words, sid): image = random.choice(self.images) img = Image.open(image['original']) width, height = img.size initialFontSize = int(height * self.config['initialFontSize']) elements = wordcloud.fit_words(words, width=width, height=height, font_path=self.config['fontPath'], prefer_horiz=1.0, initial_font_size=initialFontSize) imagePath = '{0}-{1}.png'.format(datetime.now().isoformat(), sid) imagePath = path.join(self.doneDir, imagePath) self.draw(image, elements, imagePath) return imagePath
def words_cloud(args): voc = sio.loadmat(args.vocabulary)[args.voc_key] folds = [x for x in os.listdir(args.exp) if "fold" in x] folds.sort() if args.force_fold >= 0: folds = [x for x in folds if "%d" % args.force_fold in x] exp_opts = pickle.load(file(os.sep.join([args.exp, "cmd_opts"]), "rb")) out_dir = os.sep.join([ args.out, "u_lambda=%s" % exp_opts.u_lambda, "w_lambda=%s" % exp_opts.w_lambda ]) if not os.path.exists(out_dir): os.makedirs(out_dir) logger.info("Number of folds: %d" % len(folds)) for fold in folds: fold_dir = os.sep.join([args.exp, fold]) epochs = [x for x in os.listdir(fold_dir) if "epoch" in x] epochs.sort() if args.force_epoch >= 0: epochs = [x for x in epochs if "%d" % args.force_epoch in x] for epoch_name in epochs: ef_out_dir = os.sep.join([out_dir, fold, epoch_name]) if not os.path.exists(ef_out_dir): os.makedirs(ef_out_dir) epoch_path = os.sep.join([fold_dir, epoch_name]) epoch = load_compressed(epoch_path) region_task_words = scored_epoch_words(epoch, voc, args.nwords, args.force_region) for r in region_task_words: for t in region_task_words[r]: for pn in region_task_words[r][t]: if args.for_wordle: outname = "%s_r%dt%d.wordle" % (pn, r, t) outfile = os.sep.join([ef_out_dir, outname]) f = file(outfile, "w") for word, score in region_task_words[r][t][pn]: f.write("%s: %2.5f\n" % (word, score)) f.close() else: outname = "%s_r%dt%d.png" % (pn, r, t) fit_rtw = wordcloud.fit_words( region_task_words[r][t][pn], font_path=args.font) wordcloud.draw(fit_rtw, os.sep.join([ef_out_dir, outname]), font_path=args.font)
def ldaGoalDistribution(goals_distribution, max_goal, image_path, lda_n, name): show_goal = lda_n.show_topic(max_goal) print show_goal new_goal = [] for goal in show_goal: weight = goal[0] tag = goal[1] new_goal.append((tag, weight)) # Compute the position of the words. elements = wordcloud.fit_words(new_goal, width=100, height=100) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(image_path + name), width=100, height=100, scale=2)
def freq_words(string): print "\n\n\n\t\t\tReading from file" #tokenize on white spaces raw_word_list=word_tokenize(string) #remove stop words processed_word_list=[word for word in raw_word_list if word not in total_stop_words] #create an nltk text object text_obj=nltk.Text(processed_word_list) print "\n\n\n\t\t\tProcessing" #Call the frequency distribution method and store the words and corresponding frequencies in a dictionary fd=FreqDist(text_obj) #convert the dictionary to a list of tuples conatining key-value pairs result=fd.items() #select the 100 most frequent words. If number of words in the result is less than 100, adjust accordingly if len(result) < 100: result_length=len(result) chosen_words=result[: result_length/2] else: chosen_words=result[:100] print "\n\n\n\t\t\tDrawing cloud" #specify the canvas measurement elements = wordcloud.fit_words(chosen_words, width=500, height=500) #draw the cloud wordcloud.draw(elements, path.join(d, 'frequent_words.png'), width=500, height=500, scale=2) print "\n\n\n\t\t\tWord cloud generated in frequent_words.png file" return
def drawTags(model, lsi, query, dictionary, image_path, tfidf): print "Init drawTags" goals_distribution = model.perQueryGoalProportions(query, dictionary, tfidf, lsi) max_goal = model.viewPerQueryGoalProportions(goals_distribution) show_goal = lsi.show_topic(max_goal) print show_goal new_goal = [] for goal in show_goal: weight = goal[0] tag = goal[1] new_goal.append((tag, weight)) # Compute the position of the words. elements = wordcloud.fit_words(new_goal, width=100, height=100) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(image_path + 'lsa-image.png'), width=100, height=100, scale=2)
def main(): dictionary = gensim.corpora.Dictionary.load( "dict_abstracts_corpus_1_cleaned.dict") corpus = gensim.corpora.MmCorpus("abstracts_corpus_1_cleaned.mm") lda = gensim.models.LdaModel.load("abstracts_corpus_1_cleaned.lda") list_of_topics = lda.show_topics(100, formatted=False) for i in range(len(list_of_topics)): # Compute the position of the words. elements = wordcloud.fit_words([(str(l[1]), l[0]) for l in list_of_topics[i]], font_path="/Library/Fonts/Tahoma.ttf") # Draw the positioned words to a PNG file. wordcloud.draw(elements, "../topic_%i.png" % i, font_path="/Library/Fonts/Tahoma.ttf")
def words_cloud(args): voc = sio.loadmat(args.vocabulary)[args.voc_key] folds = [x for x in os.listdir(args.exp) if "fold" in x] folds.sort() if args.force_fold >= 0: folds = [x for x in folds if "%d"%args.force_fold in x] exp_opts = pickle.load(file(os.sep.join([args.exp,"cmd_opts"]),"rb")) out_dir = os.sep.join([args.out,"u_lambda=%s"%exp_opts.u_lambda,"w_lambda=%s"%exp_opts.w_lambda]) if not os.path.exists(out_dir): os.makedirs(out_dir) logger.info("Number of folds: %d"%len(folds)) for fold in folds: fold_dir = os.sep.join([args.exp,fold]) epochs = [x for x in os.listdir(fold_dir) if "epoch" in x] epochs.sort() if args.force_epoch >= 0: epochs = [x for x in epochs if "%d"%args.force_epoch in x] for epoch_name in epochs: ef_out_dir = os.sep.join([out_dir,fold,epoch_name]) if not os.path.exists(ef_out_dir): os.makedirs(ef_out_dir) epoch_path = os.sep.join([fold_dir,epoch_name]) epoch = load_compressed(epoch_path) region_task_words = scored_epoch_words(epoch,voc,args.nwords,args.force_region) for r in region_task_words: for t in region_task_words[r]: for pn in region_task_words[r][t]: if args.for_wordle: outname = "%s_r%dt%d.wordle"%(pn,r,t) outfile = os.sep.join([ef_out_dir,outname]) f = file(outfile,"w") for word,score in region_task_words[r][t][pn]: f.write("%s: %2.5f\n"%(word,score)) f.close() else: outname = "%s_r%dt%d.png"%(pn,r,t) fit_rtw = wordcloud.fit_words(region_task_words[r][t][pn],font_path=args.font) wordcloud.draw(fit_rtw, os.sep.join([ef_out_dir,outname]),font_path=args.font)
def plot_word_cloud(predata): #词云图 根据词频 word_df=predata['Comment_Text'].copy() wordlist=word_df.values.tolist() wordlist1=','.join(wordlist) segment=jieba.lcut(wordlist1) words_df=pd.DataFrame({'segment':segment}) # print(words_df.head()) stopwords=pd.read_csv("C:\\Users\\13174\\Desktop\\my_flask\\flask_01_mysql_Css\\MLmodel\\hit_stopwords.txt",index_col=False,quoting=3,sep="\t",names=['stopword'], encoding='utf-8')#quoting=3全不引用 # stopwords=pd.read_csv("..\MLmodel\hit_stopwords.txt",index_col=False,quoting=3,sep="\t",names=['stopword'], encoding='utf-8')#quoting=3全不引用 # print(stopwords) word_df1=words_df[~words_df.segment.isin(stopwords.stopword)] #词频统计 words_stat = word_df1.groupby('segment').agg(计数=pd.NamedAgg(column='segment', aggfunc='size')).reset_index().sort_values( by='计数', ascending=False) # print(words_stat.head()) matplotlib.rcParams['figure.figsize'] = (10.0, 5.0) wordcloud=WordCloud(font_path="simhei.ttf",background_color="white",max_font_size=80) #指定字体类型、字体大小和字体颜色 word_frequence = {x[0]:x[1] for x in words_stat.head(1000).values} word_frequence_list = [] for key in word_frequence: temp = (key,word_frequence[key]) word_frequence_list.append(temp) word_frequence_list1=dict(word_frequence_list) wordcloud=wordcloud.fit_words(word_frequence_list1) plt.imshow(wordcloud) plt.xticks([])#去除x坐标 plt.yticks([])#去除y坐标 # plt.savefig('../static/img/WordCloud.jpg') plt.savefig('C:\\Users\\13174\\Desktop\\my_flask\\flask_01_mysql_Css\\static\\img\\WordCloud.jpg') plt.show()
import os import wordcloud MODELS_DIR = "models" final_topics = open(os.path.join(MODELS_DIR, "final_topics.txt"), 'rb') curr_topic = 0 for line in final_topics: line = line.strip()[line.rindex(":") + 2:] scores = [float(x.split("*")[0]) for x in line.split(" + ")] words = [x.split("*")[1] for x in line.split(" + ")] freqs = [] for word, score in zip(words, scores): freqs.append((word, score)) elements = wordcloud.fit_words(freqs, width=120, height=120) wordcloud.draw(elements, "gs_topic_%d.png" % (curr_topic), width=120, height=120) curr_topic += 1 final_topics.close()
#!/usr/bin/env python2 from os import path import sys import wordcloud # print 'Number of arguments:', len(sys.argv), 'arguments.' # print 'Argument List:', str(sys.argv) # Experimenting with random seeds import random random.seed(42) if(len(sys.argv) != 5): print "[ USAGE ]: ", sys.argv[0], " <WordsFile> <OutputFile> <Width> <Height>" sys.exit() d = path.dirname(__file__) # Read the whole text text = open(path.join(d, sys.argv[1])).read() # Separate into a list of (word, frequency). words = wordcloud.process_text(text, max_features=500) # Compute the position of the words. elements = wordcloud.fit_words(words, width=int(sys.argv[3]), height=int(sys.argv[4])) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(d, sys.argv[2]), width=int(sys.argv[3]), height=int(sys.argv[4]), scale=2)
with open(keysFile,'rU') as csvFile: fileReader = csv.reader(csvFile, delimiter='\t', quotechar='|') for row in fileReader: topicKeys[int(row[0])] = row[2].split(' ') # main work for source in sources: print "Processing %s..." %source topics, weightSum = get_top_topics([source],docWeights,weightThreshold) text = "" for topic in topics: t = " ".join(topicKeys[topic]) + " " text += t words = wordcloud.process_text(text) elements = wordcloud.fit_words(words,font_path=fontPath) outpath = "/Users/jchan/Desktop/Dropbox/Research/Dissertation/OpenIDEO/Pipeline/Validation/wordclouds/k400t50/%s.png" %source wordcloud.draw(elements, outpath, font_path=fontPath) ## read in the concept list #concepts = {} #with open(conceptFile, 'rU') as csvfile: # filereader = csv.reader(csvfile, delimiter=',', quotechar='|') # for row in filereader: # concepts[row[0]] = row[1] # ## grab topic-keys -> hash: key = topic, value = list of words #topicKeys = {} #with open(keysFile,'rU') as csvFile: # fileReader = csv.reader(csvFile, delimiter='\t', quotechar='|') # for row in fileReader:
def generateCloud(text): dir = path.dirname(__file__) words = wordcloud.process_text(text, max_features=1000) elements = wordcloud.fit_words(words, width=1000, height=1000) wordcloud.draw(elements, path.join(dir, 'wordcloud.png'), width=1000, height=1000)
import os import wordcloud MODELS_DIR = "." final_topics = open(os.path.join(MODELS_DIR, "final_topics.txt"), 'rb') curr_topic = 0 for line in final_topics: line = line.strip()[line.rindex(":") + 2:] scores = [float(x.split("*")[0]) for x in line.split(" + ")] words = [x.split("*")[1] for x in line.split(" + ")] freqs = [] for word, score in zip(words, scores): freqs.append((word, score)) elements = wordcloud.fit_words(freqs, width=120, height=120) wordcloud.draw(elements, "gs_topic_%d.png" % (curr_topic), width=120, height=120) curr_topic += 1 final_topics.close()
__FILENAME__ = more #!/usr/bin/env python2 from os import path import sys import wordcloud d = path.dirname(__file__) # Read the whole text. text = open(path.join(d, 'alice.txt')).read() # Separate into a list of (word, frequency). words = wordcloud.process_text(text, max_features=2000) # Compute the position of the words. elements = wordcloud.fit_words(words, width=500, height=500) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(d, 'alice.png'), width=500, height=500, scale=2) ########NEW FILE######## __FILENAME__ = simple #!/usr/bin/env python2 from os import path import sys import wordcloud d = path.dirname(__file__) # Read the whole text. text = open(path.join(d, 'constitution.txt')).read()
#!/usr/bin/env python2 from os import path import sys import wordcloud d = path.dirname(__file__) # Read the whole text. text = open(path.join(d, 'presinaug-addresses.txt')).read() # Separate into a list of (word, frequency). words = wordcloud.process_text(text, max_features=10000) # Compute the position of the words. elements = wordcloud.fit_words(words, width=900, height=1600) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(d, 'presinaug-wordcloud-1600x900.png'), width=900, height=1600, scale=1)
def wordclouds(x): d = path.dirname("/Users/MrG/Capstone/") words = wordcloud.process_text(str(x), max_features = 500) elements = wordcloud.fit_words(words) wordcloud.draw(elements, path.join(d,"WC.png"), scale = 5) return Image(filename='/Users/MrG/Capstone/WC.png', height= 1000, width= 618)
def produceWordCloud(inputText, outputPng): words = wordcloud.process_text(inputText, max_features=400) elements = wordcloud.fit_words(words, width=800, height=500) wordcloud.draw(elements, outputPng, width=800, height=500, scale=2)
# Cluster km = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1, verbose=False) km.fit(X) # Create cluster outputs output_dict = {'cluster': km.labels_, 'values': dataset} output_df = pd.DataFrame(output_dict) # Create text files for i in range(true_k): print len(output_df[output_df.cluster == i]), round(100*len(output_df[output_df.cluster == i]) / float(len(output_df)), 2) cluster_text = output_df['values'][output_df.cluster == i].values temp = "cluster " + str(i) + ".txt" with open(temp, "w") as outfile: for j in cluster_text: outfile.write("%s\n" % j) # Create wordclouds for i in range(true_k): text = open('cluster ' + str(i) + '.txt').read() # Separate into a list of (word, frequency). words = wordcloud.process_text(text) # Compute the position of the words. elements = wordcloud.fit_words(words, font_path='/Library/Fonts/Arial Black.ttf', width=600, height=300) # Draw the positioned words to a PNG file. wordcloud.draw(elements, 'cluster ' + str(i) + '.png', font_path="/Library/Fonts/Arial Black.ttf", width=600, height=300)
__FILENAME__ = more #!/usr/bin/env python2 from os import path import sys import wordcloud d = path.dirname(__file__) # Read the whole text. text = open(path.join(d, 'alice.txt')).read() # Separate into a list of (word, frequency). words = wordcloud.process_text(text, max_features=2000) # Compute the position of the words. elements = wordcloud.fit_words(words, width=500, height=500) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(d, 'alice.png'), width=500, height=500, scale=2) ########NEW FILE######## __FILENAME__ = simple #!/usr/bin/env python2 from os import path import sys import wordcloud d = path.dirname(__file__)
for doc_top in topics: for ti,_ in doc_top: counts[ti] += 1 # most talked about topics words_max = model.show_topic(counts.argmax(), 50) # least talked about topics words_min = model.show_topic(counts.argmin(), 50) wf_max = [] wlist_max = [] for i,j in words_max: wlist_max.append(j) for i in range(50): wf_max.append((wlist_max[i],counts[i])) wf_min = [] wlist_min = [] for i,j in words_min: wlist_min.append(j) for i in range(50): wf_min.append((wlist_min[i],counts[i+50])) d = path.dirname(__file__) elements_max = wordcloud.fit_words(wf_max) wordcloud.draw(elements_max, path.join(d, 'top50.png'),scale=3) elements_min = wordcloud.fit_words(wf_min) wordcloud.draw(elements_min, path.join(d, 'bottom50.png'),scale=3)
#!/usr/bin/env python2 from os import path import sys import wordcloud d = path.dirname(__file__) # Read the whole text. text = open(path.join(d, '4chdata/all.dat')).read() # Separate into a list of (word, frequency). words = wordcloud.process_text(text, max_features=1000) # Compute the position of the words. elements = wordcloud.fit_words(words, width=1000, height=1000) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(d, str(sys.argv[1])), width=1000, height=1000, scale=2)
txtfeatWords5 = [lmtzr.lemmatize(word) for word in txtfeatWords4] # stem using snowball stemmer txtfeatWords6 = [stemmer.stem(word) for word in txtfeatWords5] # remove punctuations txtfeatWords7 = [ word.encode('utf-8').translate(None, string.punctuation) for word in txtfeatWords6 ] # remove empty strings txtfeatWords8 = [word for word in txtfeatWords7 if word <> ''] txtfeatWordList[i] = ' '.join(txtfeatWords8) #pprint('Iteration: %d'% i) #pprint(txtfeatWordList[i]) pprint(txtfeatWordList) text = '\n'.join([str(txtfeatWordList[i]) for i in range(num_total)]) #tags = make_tags(get_tag_counts( '\n'.join([ str(txtfeatWordList[i]) for i in range(num_total) ]) )) #create_tag_image(tags, 'cloud_large.png', size=(1800, 1200), fontname='Lobster') d = os.path.dirname(__file__) words = wordcloud.process_text(text) elements = wordcloud.fit_words(words) wordcloud.draw(elements, os.path.join(d, 'lemmatized_wordle.png'))
stopwords = pd.read_csv( "D:\my_documents\competition\government\\stopwords_addition.txt", encoding='utf8', index_col=False, quoting=3, sep="\t") segmentDF = segmentDF[~segmentDF.segment.isin(stopwords)] segStat = segmentDF.groupby(by=["segment"])["segment"].agg({ "计数": numpy.size }).reset_index().sort_values(["计数"], ascending=False) segStat.head(100) #绘画词云 #http://www.lfd.uci.edu/~gohlke/pythonlibs/ wordcloud = WordCloud(font_path='simhei.ttf', background_color="white") words = segStat.set_index('segment').to_dict() wordcloud = wordcloud.fit_words(words['计数']) plt.figure(num=None, figsize=(100, 80), dpi=100, facecolor='w', edgecolor='k') plt.axis("off") plt.imshow(wordcloud) plt.show() plt.close()
d = path.dirname(__file__) # String to hold the text from the webpages. text = ""; # Array of webpages which we'll loop through (from googling Deonte Burton draft). url_list = ["http://www.draftexpress.com/profile/Deonte-Burton-6487/", "http://blogs.rgj.com/chrismurray/2014/01/10/nba-scouts-view-nevadas-deonte-burton-as-solid-draft-pick-but-not-a-first-rounder/", "http://www.nbadraftroom.com/2014/01/deonte-burton.html", "http://www.nbadraftinsider.com/deonte-burton/", "http://nbaprospects.blogspot.com/2012/08/scouting-report-deonte-burton-nevada.html", "http://rushthecourt.net/2014/01/09/a-college-basketball-resolution-for-2014-get-to-know-nevadas-deonte-burton/", "http://mrsportsblog.wordpress.com/2014/03/05/trust-me-on-this-dynamic-deonte-burton-of-nevada-will-be-making-a-living-in-the-nba/", "http://www.draftexpress.com/article/NBA-Draft-Prospect-of-the-Week-Deonte-Burton-4392/", "http://www.nevadawolfpack.com/sports/m-baskbl/spec-rel/021214aad.html"] # Loop through url items and get the text from each. for url in url_list: content = urllib2.urlopen(url) text += Document(content).summary() + " " # Separate into a list of word, frequency). words = wordcloud.process_text(text) # Compute the position of the words. elements = wordcloud.fit_words(words) # Draw the positioned words to a PNG file. wordcloud.draw(elements, path.join(d, 'db2.png'))
def genWordCloud(filename): textArray = openTxt(filename) count = countWords(textArray, 1000) words = wordcloud.fit_words(count, width=500, height=500) wordcloud.draw(words, pngPath + os.path.splitext(filename)[0] + '.png', width=500, height=500, scale=2) return 'Cloud generated for {}'.format(filename)