示例#1
0
def make_cloud(channel, time, myType=None, drawLabels=True, font_path=None):
    #get the log file
    directory = "logs/" + channel + '/' + time 

    if myType == None:
        file_path = os.path.relpath(directory + '/words.log')
        with open(file_path, 'r') as f:
            words = f.read().upper()

        file_path = os.path.relpath(directory + '/emotes.log')
        with open(file_path, 'r') as f:
            emotes = " ".join(filter(lambda x:len(x)>3 and x != 'double' and x != 'triple', f.read().split('\n')))

        directory = "images/" + channel + '/' + time
        if not os.path.exists(directory):
            os.makedirs(directory)

        print "Generating word cloud... Hold on! (This takes a while if there are a lot of messages)"
        scale = 2
        w = wordcloud.process_text(words, max_features=1500)
        elements = wordcloud.fit_words(w, width=w_words/scale, height=h_words/scale)
        wordcloud.draw(elements, os.path.relpath(directory + '/wordcloud.png'), 
                       width=w_words/scale, height=h_words/scale, scale=scale)
        print "Word cloud created!"

        print "Generating emote cloud..."
        w = wordcloud.process_text(emotes, max_features=1500)
        elements = wordcloud.fit_words(w, width=w_emotes, height=h_emotes)
        wordcloud.draw(elements, os.path.relpath(directory + '/emotecloud.png'), 
                       width=w_emotes, height=h_emotes)
        print "Emote cloud created!"
    else: #if running the program manually. this is mainly for my debugging purposes.
        w_custom = 1100
        h_custom = 700
        file_path = os.path.relpath(directory + '/'+myType+'.log')

        directory = "images/" + channel + '/' + time
        if not os.path.exists(directory):
            os.makedirs(directory)

        with open(file_path, 'r') as f:
            data = f.read()
        if myType.lower() == 'authors':
            data = data.upper()
        print "Generating " +myType+ " cloud... Hold on!"
        scale = 2
        w = wordcloud.process_text(data, max_features=1000)
        elements = wordcloud.fit_words(w, width=w_custom/scale, 
                                       height=h_custom/scale, font_path=font_path)
        wordcloud.draw(elements, os.path.relpath(directory + '/'+myType+'cloud.png'), 
              width=w_custom/scale, height=h_custom/scale, scale=scale, font_path=font_path)
        print myType + " cloud created!"
示例#2
0
	def make_cloud(self, text):
		
		words = wordcloud.process_text(text)
		elements = wordcloud.fit_words(words, width = 400, height = 400)
		wordcloud.draw(elements, self.out, width = 400, height = 400, scale = 2)
		
		return self.out
示例#3
0
    def make_cloud(self, text):

        words = wordcloud.process_text(text)
        elements = wordcloud.fit_words(words, width=400, height=400)
        wordcloud.draw(elements, self.out, width=400, height=400, scale=2)

        return self.out
示例#4
0
def analizer(query, num_topics, dictionary, corpus, alpha):    
    image_path = "/media/University/UniversityDisc/2-Master/MasterThesis/EjecucionTesis/Desarrollo/PythonProjects/QueryAnalyzer/Models/"
    model_path = image_path
    alpha=alpha
    #lda = models.ldamodel.LdaModel(corpus, num_topics=num_topics, id2word=dictionary, update_every=1, chunksize=50, passes=1)
    lda_2 = models.ldamodel.LdaModel(corpus, num_topics=num_topics, id2word=dictionary, alpha=alpha, update_every=1, chunksize=50, passes=1)    
    
    dictionary.save(model_path + "tmp_dictionary.dict")  
    corpora.MmCorpus.serialize(model_path + "tmp_corpus.mm", corpus)
    lda_2.save(model_path + 'tmp_model.lda') # same for tfidf, lsi, ...
    
    
    goals_distribution = ldam.perQueryGoalProportions(query, dictionary, lda_2)
    max_goal = ldam.viewPerQueryGoalProportions(goals_distribution)
    show_goal = lda_2.show_topic(max_goal)
    #print show_goal
    new_goal = []
    for goal in show_goal:
        weight = goal[0]
        tag = goal[1]
        new_goal.append((tag, weight))
        
    # Compute the position of the words.
    elements = wordcloud.fit_words(new_goal, width=100, height=100)
    image_path = "/media/University/UniversityDisc/2-Master/MasterThesis/EjecucionTesis/Desarrollo/PythonProjects/QueryAnalyzer/Models/"
    # Draw the positioned words to a PNG file.    
    wordcloud.draw(elements, path.join(image_path + 'image.png'), width=100, height=100, scale=2)    
    lda_model = lda_2
    return image_path, lda_model
示例#5
0
 def makeCloud(self, text, font=None):
     if font is None:
         font = random.choice(self.fonts)
     words, counts = wordcloud.process_text(text, max_features=2000)
     elements = wordcloud.fit_words(words, counts, width=self.size,
             height=self.size, font_path=font)
     wordcloud.draw(elements, self.outFile, width=self.size,
             height=self.size, scale=self.scale, font_path=font)
示例#6
0
 def makeCloud(self, text, font=None):
     if font is None:
         font = random.choice(self.fonts)
     words, counts = wordcloud.process_text(text, max_features=2000)
     elements = wordcloud.fit_words(words, counts, width=self.size,
             height=self.size, font_path=font)
     wordcloud.draw(elements, self.outFile, width=self.size,
             height=self.size, scale=self.scale, font_path=font)
示例#7
0
def draw_goal(lsi, topic):
    other_goal = lsi.show_topic(topic)
    new_goal = []
    image_path = "/media/University/UniversityDisc/2-Master/MasterThesis/EjecucionTesis/Desarrollo/PythonProjects/QueryAnalyzer/Models/"
    
    for goal in other_goal: weight = goal[0];tag = goal[1];new_goal.append((tag, weight))
    
    elements = wordcloud.fit_words(new_goal, width=100, height=100)
    wordcloud.draw(elements, path.join(image_path + 'other_image.png'), width=100, height=100, scale=2)  
def make_word_cloud(text, filepath):
    import wordcloud  #@UnresolvedImport

    if isinstance(text, str):
        text = wordcloud.process_text(text, max_features=20)

    w, h = (400, 400)
    text = remove_letters(text)
    elements = wordcloud.fit_words(text, width=w, height=h)

    wordcloud.draw(elements, filepath, width=w, height=h, scale=1)
    return filepath
示例#9
0
 def makeDoge(self, words, sid):
     image = random.choice(self.images)
     
     img = Image.open(image['original'])
     width, height = img.size
     initialFontSize = int(height * self.config['initialFontSize'])
     elements = wordcloud.fit_words(words, width=width, height=height,
             font_path=self.config['fontPath'], prefer_horiz=1.0, 
             initial_font_size=initialFontSize)
     imagePath = '{0}-{1}.png'.format(datetime.now().isoformat(), sid)
     imagePath = path.join(self.doneDir, imagePath)                
     self.draw(image, elements, imagePath)
     return imagePath
def words_cloud(args):

    voc = sio.loadmat(args.vocabulary)[args.voc_key]

    folds = [x for x in os.listdir(args.exp) if "fold" in x]
    folds.sort()

    if args.force_fold >= 0:
        folds = [x for x in folds if "%d" % args.force_fold in x]
    exp_opts = pickle.load(file(os.sep.join([args.exp, "cmd_opts"]), "rb"))
    out_dir = os.sep.join([
        args.out,
        "u_lambda=%s" % exp_opts.u_lambda,
        "w_lambda=%s" % exp_opts.w_lambda
    ])
    if not os.path.exists(out_dir): os.makedirs(out_dir)

    logger.info("Number of folds: %d" % len(folds))
    for fold in folds:
        fold_dir = os.sep.join([args.exp, fold])
        epochs = [x for x in os.listdir(fold_dir) if "epoch" in x]
        epochs.sort()
        if args.force_epoch >= 0:
            epochs = [x for x in epochs if "%d" % args.force_epoch in x]
        for epoch_name in epochs:
            ef_out_dir = os.sep.join([out_dir, fold, epoch_name])
            if not os.path.exists(ef_out_dir): os.makedirs(ef_out_dir)
            epoch_path = os.sep.join([fold_dir, epoch_name])
            epoch = load_compressed(epoch_path)

            region_task_words = scored_epoch_words(epoch, voc, args.nwords,
                                                   args.force_region)

            for r in region_task_words:
                for t in region_task_words[r]:
                    for pn in region_task_words[r][t]:
                        if args.for_wordle:
                            outname = "%s_r%dt%d.wordle" % (pn, r, t)
                            outfile = os.sep.join([ef_out_dir, outname])
                            f = file(outfile, "w")
                            for word, score in region_task_words[r][t][pn]:
                                f.write("%s: %2.5f\n" % (word, score))
                            f.close()
                        else:
                            outname = "%s_r%dt%d.png" % (pn, r, t)
                            fit_rtw = wordcloud.fit_words(
                                region_task_words[r][t][pn],
                                font_path=args.font)
                            wordcloud.draw(fit_rtw,
                                           os.sep.join([ef_out_dir, outname]),
                                           font_path=args.font)
示例#11
0
def ldaGoalDistribution(goals_distribution, max_goal, image_path, lda_n, name):    
    show_goal = lda_n.show_topic(max_goal)
    print show_goal
    new_goal = []
    for goal in show_goal:
        weight = goal[0]
        tag = goal[1]
        new_goal.append((tag, weight))
        
    # Compute the position of the words.
    elements = wordcloud.fit_words(new_goal, width=100, height=100)
    
    # Draw the positioned words to a PNG file.    
    wordcloud.draw(elements, path.join(image_path + name), width=100, height=100, scale=2)                
示例#12
0
def freq_words(string):


	print "\n\n\n\t\t\tReading from file"

	#tokenize on white spaces
	raw_word_list=word_tokenize(string)

	#remove stop words
	processed_word_list=[word for word in raw_word_list if word not in total_stop_words]

	#create an nltk text object
	text_obj=nltk.Text(processed_word_list)

	print "\n\n\n\t\t\tProcessing"

	#Call the frequency distribution method and store the words and  corresponding frequencies in a dictionary
	fd=FreqDist(text_obj)

	
	#convert the dictionary to a list of tuples conatining key-value pairs
	result=fd.items()
	

	#select the 100 most frequent words. If number of words in the result is less than 100, adjust accordingly
	if len(result) < 100:
		result_length=len(result)
		chosen_words=result[: result_length/2]
	else:
		chosen_words=result[:100]
	
	print "\n\n\n\t\t\tDrawing cloud"


	#specify the canvas measurement
	elements = wordcloud.fit_words(chosen_words, width=500, height=500)


	#draw the cloud
	wordcloud.draw(elements, path.join(d, 'frequent_words.png'), width=500, height=500,
        scale=2)


	print "\n\n\n\t\t\tWord cloud generated in frequent_words.png file"
	

	return
示例#13
0
def drawTags(model, lsi, query, dictionary, image_path, tfidf):
    print "Init drawTags"   
    goals_distribution = model.perQueryGoalProportions(query, dictionary, tfidf, lsi)
    max_goal = model.viewPerQueryGoalProportions(goals_distribution)    
    show_goal = lsi.show_topic(max_goal)
    print show_goal
      
    new_goal = []
    for goal in show_goal:
        weight = goal[0]
        tag = goal[1]
        new_goal.append((tag, weight))
        
    # Compute the position of the words.
    elements = wordcloud.fit_words(new_goal, width=100, height=100)    
    # Draw the positioned words to a PNG file.    
    wordcloud.draw(elements, path.join(image_path + 'lsa-image.png'), width=100, height=100, scale=2)    
示例#14
0
def main():

    dictionary = gensim.corpora.Dictionary.load(
        "dict_abstracts_corpus_1_cleaned.dict")
    corpus = gensim.corpora.MmCorpus("abstracts_corpus_1_cleaned.mm")
    lda = gensim.models.LdaModel.load("abstracts_corpus_1_cleaned.lda")

    list_of_topics = lda.show_topics(100, formatted=False)

    for i in range(len(list_of_topics)):
        # Compute the position of the words.
        elements = wordcloud.fit_words([(str(l[1]), l[0])
                                        for l in list_of_topics[i]],
                                       font_path="/Library/Fonts/Tahoma.ttf")
        # Draw the positioned words to a PNG file.
        wordcloud.draw(elements,
                       "../topic_%i.png" % i,
                       font_path="/Library/Fonts/Tahoma.ttf")
示例#15
0
def words_cloud(args):

	voc = sio.loadmat(args.vocabulary)[args.voc_key]
	
	folds = [x for x in os.listdir(args.exp) if "fold" in x]
	folds.sort()

	if args.force_fold >= 0: folds = [x for x in folds if "%d"%args.force_fold in x]
	exp_opts = pickle.load(file(os.sep.join([args.exp,"cmd_opts"]),"rb"))
	out_dir = os.sep.join([args.out,"u_lambda=%s"%exp_opts.u_lambda,"w_lambda=%s"%exp_opts.w_lambda])
	if not os.path.exists(out_dir): os.makedirs(out_dir)

	logger.info("Number of folds: %d"%len(folds))
	for fold in folds:
		fold_dir = os.sep.join([args.exp,fold])
		epochs = [x for x in os.listdir(fold_dir) if "epoch" in x]
		epochs.sort()
		if args.force_epoch >= 0: epochs = [x for x in epochs if "%d"%args.force_epoch in x]
		for epoch_name in epochs:
			ef_out_dir = os.sep.join([out_dir,fold,epoch_name])
			if not os.path.exists(ef_out_dir): os.makedirs(ef_out_dir)
			epoch_path = os.sep.join([fold_dir,epoch_name])
			epoch = load_compressed(epoch_path)

			region_task_words = scored_epoch_words(epoch,voc,args.nwords,args.force_region)
			
			for r in region_task_words:
				for t in region_task_words[r]:
					for pn in region_task_words[r][t]:
						if args.for_wordle:
							outname = "%s_r%dt%d.wordle"%(pn,r,t)
							outfile = os.sep.join([ef_out_dir,outname])
							f = file(outfile,"w")
							for word,score in region_task_words[r][t][pn]:
								f.write("%s: %2.5f\n"%(word,score))
							f.close()
						else:
							outname = "%s_r%dt%d.png"%(pn,r,t)
							fit_rtw = wordcloud.fit_words(region_task_words[r][t][pn],font_path=args.font)
							wordcloud.draw(fit_rtw, os.sep.join([ef_out_dir,outname]),font_path=args.font)
示例#16
0
def plot_word_cloud(predata):
    #词云图 根据词频
    word_df=predata['Comment_Text'].copy()
    wordlist=word_df.values.tolist()
    wordlist1=','.join(wordlist)
    segment=jieba.lcut(wordlist1)
    words_df=pd.DataFrame({'segment':segment})
    # print(words_df.head())

    stopwords=pd.read_csv("C:\\Users\\13174\\Desktop\\my_flask\\flask_01_mysql_Css\\MLmodel\\hit_stopwords.txt",index_col=False,quoting=3,sep="\t",names=['stopword'], encoding='utf-8')#quoting=3全不引用
    # stopwords=pd.read_csv("..\MLmodel\hit_stopwords.txt",index_col=False,quoting=3,sep="\t",names=['stopword'], encoding='utf-8')#quoting=3全不引用
    # print(stopwords)
    word_df1=words_df[~words_df.segment.isin(stopwords.stopword)]

    #词频统计
    words_stat = word_df1.groupby('segment').agg(计数=pd.NamedAgg(column='segment', aggfunc='size')).reset_index().sort_values(
        by='计数', ascending=False)

    # print(words_stat.head())

    matplotlib.rcParams['figure.figsize'] = (10.0, 5.0)

    wordcloud=WordCloud(font_path="simhei.ttf",background_color="white",max_font_size=80) #指定字体类型、字体大小和字体颜色
    word_frequence = {x[0]:x[1] for x in words_stat.head(1000).values}
    word_frequence_list = []
    for key in word_frequence:
        temp = (key,word_frequence[key])
        word_frequence_list.append(temp)

    word_frequence_list1=dict(word_frequence_list)
    wordcloud=wordcloud.fit_words(word_frequence_list1)
    plt.imshow(wordcloud)
    plt.xticks([])#去除x坐标
    plt.yticks([])#去除y坐标
    # plt.savefig('../static/img/WordCloud.jpg')
    plt.savefig('C:\\Users\\13174\\Desktop\\my_flask\\flask_01_mysql_Css\\static\\img\\WordCloud.jpg')
    plt.show()
import os
import wordcloud

MODELS_DIR = "models"

final_topics = open(os.path.join(MODELS_DIR, "final_topics.txt"), 'rb')
curr_topic = 0
for line in final_topics:
    line = line.strip()[line.rindex(":") + 2:]
    scores = [float(x.split("*")[0]) for x in line.split(" + ")]
    words = [x.split("*")[1] for x in line.split(" + ")]
    freqs = []
    for word, score in zip(words, scores):
        freqs.append((word, score))
    elements = wordcloud.fit_words(freqs, width=120, height=120)
    wordcloud.draw(elements, "gs_topic_%d.png" % (curr_topic),
                   width=120, height=120)
    curr_topic += 1
final_topics.close()
示例#18
0
#!/usr/bin/env python2

from os import path
import sys
import wordcloud

# print 'Number of arguments:', len(sys.argv), 'arguments.'
# print 'Argument List:', str(sys.argv)

# Experimenting with random seeds
import random
random.seed(42)

if(len(sys.argv) != 5):
    print "[ USAGE ]: ", sys.argv[0], " <WordsFile> <OutputFile> <Width> <Height>"
    sys.exit()

d = path.dirname(__file__)

# Read the whole text
text = open(path.join(d, sys.argv[1])).read()

# Separate into a list of (word, frequency).
words = wordcloud.process_text(text, max_features=500)
# Compute the position of the words.
elements = wordcloud.fit_words(words, width=int(sys.argv[3]), height=int(sys.argv[4]))
# Draw the positioned words to a PNG file.
wordcloud.draw(elements, path.join(d, sys.argv[2]), width=int(sys.argv[3]), height=int(sys.argv[4]), scale=2)
with open(keysFile,'rU') as csvFile:
    fileReader = csv.reader(csvFile, delimiter='\t', quotechar='|')
    for row in fileReader:
        topicKeys[int(row[0])] = row[2].split(' ')

# main work        
for source in sources:
    print "Processing %s..." %source
    topics, weightSum = get_top_topics([source],docWeights,weightThreshold)
    text = ""
    for topic in topics:
        t = " ".join(topicKeys[topic]) + " "
        text += t
    
    words = wordcloud.process_text(text)
    elements = wordcloud.fit_words(words,font_path=fontPath)
    outpath = "/Users/jchan/Desktop/Dropbox/Research/Dissertation/OpenIDEO/Pipeline/Validation/wordclouds/k400t50/%s.png" %source
    wordcloud.draw(elements, outpath, font_path=fontPath)

## read in the concept list
#concepts = {}
#with open(conceptFile, 'rU') as csvfile:
#    filereader = csv.reader(csvfile, delimiter=',', quotechar='|')
#    for row in filereader:
#        concepts[row[0]] = row[1]
#
## grab topic-keys -> hash: key = topic, value = list of words
#topicKeys = {}
#with open(keysFile,'rU') as csvFile:
#    fileReader = csv.reader(csvFile, delimiter='\t', quotechar='|')
#    for row in fileReader:
示例#20
0
def generateCloud(text):
	dir = path.dirname(__file__)
	words = wordcloud.process_text(text, max_features=1000)
	elements = wordcloud.fit_words(words, width=1000, height=1000)
	wordcloud.draw(elements, path.join(dir, 'wordcloud.png'), width=1000, height=1000)
示例#21
0
import os
import wordcloud

MODELS_DIR = "."

final_topics = open(os.path.join(MODELS_DIR, "final_topics.txt"), 'rb')
curr_topic = 0
for line in final_topics:
    line = line.strip()[line.rindex(":") + 2:]
    scores = [float(x.split("*")[0]) for x in line.split(" + ")]
    words = [x.split("*")[1] for x in line.split(" + ")]
    freqs = []
    for word, score in zip(words, scores):
        freqs.append((word, score))
    elements = wordcloud.fit_words(freqs, width=120, height=120)
    wordcloud.draw(elements,
                   "gs_topic_%d.png" % (curr_topic),
                   width=120,
                   height=120)
    curr_topic += 1
final_topics.close()
示例#22
0
__FILENAME__ = more
#!/usr/bin/env python2

from os import path
import sys
import wordcloud

d = path.dirname(__file__)

# Read the whole text.
text = open(path.join(d, 'alice.txt')).read()
# Separate into a list of (word, frequency).
words = wordcloud.process_text(text, max_features=2000)
# Compute the position of the words.
elements = wordcloud.fit_words(words, width=500, height=500)
# Draw the positioned words to a PNG file.
wordcloud.draw(elements, path.join(d, 'alice.png'), width=500, height=500,
        scale=2)

########NEW FILE########
__FILENAME__ = simple
#!/usr/bin/env python2

from os import path
import sys
import wordcloud

d = path.dirname(__file__)

# Read the whole text.
text = open(path.join(d, 'constitution.txt')).read()
示例#23
0
#!/usr/bin/env python2

from os import path
import sys
import wordcloud

d = path.dirname(__file__)

# Read the whole text.
text = open(path.join(d, 'presinaug-addresses.txt')).read()
# Separate into a list of (word, frequency).
words = wordcloud.process_text(text, max_features=10000)
# Compute the position of the words.
elements = wordcloud.fit_words(words, width=900, height=1600)
# Draw the positioned words to a PNG file.
wordcloud.draw(elements,
               path.join(d, 'presinaug-wordcloud-1600x900.png'),
               width=900,
               height=1600,
               scale=1)
示例#24
0
def wordclouds(x):
    d = path.dirname("/Users/MrG/Capstone/")
    words = wordcloud.process_text(str(x), max_features = 500)
    elements = wordcloud.fit_words(words)
    wordcloud.draw(elements, path.join(d,"WC.png"), scale = 5)
    return Image(filename='/Users/MrG/Capstone/WC.png', height= 1000, width= 618)
def produceWordCloud(inputText, outputPng):
	words = wordcloud.process_text(inputText, max_features=400)
	elements = wordcloud.fit_words(words, width=800, height=500)
	wordcloud.draw(elements, outputPng, width=800, height=500, scale=2)
示例#26
0
# Cluster
km = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1, verbose=False)
km.fit(X)


# Create cluster outputs
output_dict = {'cluster': km.labels_, 'values': dataset}
output_df = pd.DataFrame(output_dict)

# Create text files 
for i in range(true_k):
    print len(output_df[output_df.cluster == i]), round(100*len(output_df[output_df.cluster == i]) / float(len(output_df)), 2)

    cluster_text = output_df['values'][output_df.cluster == i].values
    temp = "cluster " + str(i) + ".txt"
    
    with open(temp, "w") as outfile:
       for j in cluster_text:
           outfile.write("%s\n" % j)

# Create wordclouds
for i in range(true_k):
    text = open('cluster ' + str(i) + '.txt').read()
    # Separate into a list of (word, frequency).
    words = wordcloud.process_text(text)
    # Compute the position of the words.
    elements = wordcloud.fit_words(words, font_path='/Library/Fonts/Arial Black.ttf', width=600, height=300)
    # Draw the positioned words to a PNG file.
    wordcloud.draw(elements, 'cluster ' + str(i) + '.png', font_path="/Library/Fonts/Arial Black.ttf", width=600, height=300)
示例#27
0
__FILENAME__ = more
#!/usr/bin/env python2

from os import path
import sys
import wordcloud

d = path.dirname(__file__)

# Read the whole text.
text = open(path.join(d, 'alice.txt')).read()
# Separate into a list of (word, frequency).
words = wordcloud.process_text(text, max_features=2000)
# Compute the position of the words.
elements = wordcloud.fit_words(words, width=500, height=500)
# Draw the positioned words to a PNG file.
wordcloud.draw(elements,
               path.join(d, 'alice.png'),
               width=500,
               height=500,
               scale=2)

########NEW FILE########
__FILENAME__ = simple
#!/usr/bin/env python2

from os import path
import sys
import wordcloud

d = path.dirname(__file__)
示例#28
0
for doc_top in topics:
	for ti,_ in doc_top:
		counts[ti] += 1

# most talked about topics
words_max = model.show_topic(counts.argmax(), 50)

# least talked about topics
words_min = model.show_topic(counts.argmin(), 50)

wf_max = []
wlist_max = []
for i,j in words_max:
	wlist_max.append(j)
for i in range(50):
	wf_max.append((wlist_max[i],counts[i]))

wf_min = []
wlist_min = []
for i,j in words_min:
	wlist_min.append(j)
for i in range(50):
	wf_min.append((wlist_min[i],counts[i+50]))

d = path.dirname(__file__)

elements_max = wordcloud.fit_words(wf_max)
wordcloud.draw(elements_max, path.join(d, 'top50.png'),scale=3)

elements_min = wordcloud.fit_words(wf_min)
wordcloud.draw(elements_min, path.join(d, 'bottom50.png'),scale=3)
示例#29
0
文件: mkcloud.py 项目: bstrds/4chdm
#!/usr/bin/env python2

from os import path
import sys
import wordcloud

d = path.dirname(__file__)

# Read the whole text.
text = open(path.join(d, '4chdata/all.dat')).read()
# Separate into a list of (word, frequency).
words = wordcloud.process_text(text, max_features=1000)
# Compute the position of the words.
elements = wordcloud.fit_words(words, width=1000, height=1000)
# Draw the positioned words to a PNG file.
wordcloud.draw(elements, path.join(d, str(sys.argv[1])), width=1000, height=1000,
        scale=2)
示例#30
0
        txtfeatWords5 = [lmtzr.lemmatize(word) for word in txtfeatWords4]

        # stem using snowball stemmer
        txtfeatWords6 = [stemmer.stem(word) for word in txtfeatWords5]

        # remove punctuations
        txtfeatWords7 = [
            word.encode('utf-8').translate(None, string.punctuation)
            for word in txtfeatWords6
        ]

        # remove empty strings
        txtfeatWords8 = [word for word in txtfeatWords7 if word <> '']

        txtfeatWordList[i] = ' '.join(txtfeatWords8)

        #pprint('Iteration: %d'% i)
        #pprint(txtfeatWordList[i])

pprint(txtfeatWordList)

text = '\n'.join([str(txtfeatWordList[i]) for i in range(num_total)])

#tags = make_tags(get_tag_counts( '\n'.join([ str(txtfeatWordList[i]) for i in range(num_total) ]) ))
#create_tag_image(tags, 'cloud_large.png', size=(1800, 1200), fontname='Lobster')

d = os.path.dirname(__file__)
words = wordcloud.process_text(text)
elements = wordcloud.fit_words(words)
wordcloud.draw(elements, os.path.join(d, 'lemmatized_wordle.png'))
示例#31
0
stopwords = pd.read_csv(
    "D:\my_documents\competition\government\\stopwords_addition.txt",
    encoding='utf8',
    index_col=False,
    quoting=3,
    sep="\t")

segmentDF = segmentDF[~segmentDF.segment.isin(stopwords)]

segStat = segmentDF.groupby(by=["segment"])["segment"].agg({
    "计数": numpy.size
}).reset_index().sort_values(["计数"], ascending=False)

segStat.head(100)

#绘画词云
#http://www.lfd.uci.edu/~gohlke/pythonlibs/
wordcloud = WordCloud(font_path='simhei.ttf', background_color="white")

words = segStat.set_index('segment').to_dict()

wordcloud = wordcloud.fit_words(words['计数'])

plt.figure(num=None, figsize=(100, 80), dpi=100, facecolor='w', edgecolor='k')

plt.axis("off")
plt.imshow(wordcloud)
plt.show()

plt.close()
示例#32
0
d = path.dirname(__file__)

# String to hold the text from the webpages. 
text = "";

# Array of webpages which we'll loop through (from googling Deonte Burton draft).
url_list = ["http://www.draftexpress.com/profile/Deonte-Burton-6487/", 
            "http://blogs.rgj.com/chrismurray/2014/01/10/nba-scouts-view-nevadas-deonte-burton-as-solid-draft-pick-but-not-a-first-rounder/", 
            "http://www.nbadraftroom.com/2014/01/deonte-burton.html", 
            "http://www.nbadraftinsider.com/deonte-burton/", 
            "http://nbaprospects.blogspot.com/2012/08/scouting-report-deonte-burton-nevada.html",
            "http://rushthecourt.net/2014/01/09/a-college-basketball-resolution-for-2014-get-to-know-nevadas-deonte-burton/",
            "http://mrsportsblog.wordpress.com/2014/03/05/trust-me-on-this-dynamic-deonte-burton-of-nevada-will-be-making-a-living-in-the-nba/", 
            "http://www.draftexpress.com/article/NBA-Draft-Prospect-of-the-Week-Deonte-Burton-4392/",
            "http://www.nevadawolfpack.com/sports/m-baskbl/spec-rel/021214aad.html"] 

# Loop through url items and get the text from each. 
for url in url_list:
    content = urllib2.urlopen(url)

    text += Document(content).summary() + " "

# Separate into a list of word, frequency).
words = wordcloud.process_text(text)

# Compute the position of the words. 
elements = wordcloud.fit_words(words)

# Draw the positioned words to a PNG file. 
wordcloud.draw(elements, path.join(d, 'db2.png'))
示例#33
0
def genWordCloud(filename):
    textArray = openTxt(filename)
    count = countWords(textArray, 1000)
    words = wordcloud.fit_words(count, width=500, height=500)
    wordcloud.draw(words, pngPath + os.path.splitext(filename)[0] + '.png', width=500, height=500, scale=2)
    return 'Cloud generated for {}'.format(filename)