def make_wordcloud(self,*total): t_list = list(total) total_list=t_list[0] temp_list=[] #print(total_list) for i in total_list: if i ==0: temp_list.append(1*300) else: temp_list.append(i*300) #total_list=[0, 16.0, 5.0, 5.5, 0, 0, 0] cloud_catagory = [] ##rating 합산 별로 값 반복하기. (word cloud 빈도수를 기준으로 단어 크기를 띄어주는 방법을 이용하기위하여) for i in range(1, int(temp_list[0])): cloud_catagory.append("농작물 경작") for i in range(1, int(temp_list[1])): cloud_catagory.append("공예(만들기)") for i in range(1, int(temp_list[2])): cloud_catagory.append("음식체험") for i in range(1, int(temp_list[3])): cloud_catagory.append("전통문화") for i in range(1, int(temp_list[4])): cloud_catagory.append("자연생태") for i in range(1, int(temp_list[5])): cloud_catagory.append("건강레포츠") for i in range(1, int(temp_list[6])): cloud_catagory.append("산·어촌 생활") # 각 카테고리별 빈도 count를 dictionary 형태로 만들어줌 ex '산·어촌 생활': 9, count = Counter(cloud_catagory) tags = count.most_common(7) taglist = pytagcloud.make_tags(tags, maxsize=45) pytagcloud.create_tag_image(taglist, 'wordcloud.jpg', size=(500, 200), fontname='Noto Sans CJK', layout=pytagcloud.LAYOUT_MOST_HORIZONTAL)
def _create_image(self, text): tag_counts = get_tag_counts(text) if tag_counts is None: sys.exit(-1) if self._repeat_tags: expanded_tag_counts = [] for tag in tag_counts: expanded_tag_counts.append((tag[0], 5)) for tag in tag_counts: expanded_tag_counts.append((tag[0], 2)) for tag in tag_counts: expanded_tag_counts.append((tag[0], 1)) tag_counts = expanded_tag_counts tags = make_tags(tag_counts, maxsize=150, colors=self._color_scheme) path = os.path.join('/tmp/cloud_large.png') if Gdk.Screen.height() < Gdk.Screen.width(): height = Gdk.Screen.height() width = int(height * 4 / 3) else: width = Gdk.Screen.width() height = int(width * 3 / 4) if self._font_name is not None: create_tag_image(tags, path, layout=self._layout, size=(width, height), fontname=self._font_name) else: create_tag_image(tags, path, layout=self._layout, size=(width, height)) return 0
def draw_cloud(tags, filename, fontname='NotoSansCJKkr-Bold', size=(1200, 800)): # fontname='Noto Sans CJK' 한글, # NotoSansCJKkr-Medium, NotoSansCJKkr-Bold, NotoSansCJKkr-Regular_0, NotoSansCJKkr-Light pytagcloud.create_tag_image(tags, filename, fontname=fontname, size=size) webbrowser.open(filename)
def finance_cloud(tag): tags = make_tags(get_tag_counts(tag), maxsize=100) create_tag_image(tags, "cloud.png", size=(1280, 800), background=(0, 0, 0, 255), fontname="SimHei")
def generate_word_cloud(counts, title): # Sort the keywords sorted_wordscount = sorted(counts.iteritems(), key=operator.itemgetter(1), reverse=True)[:20] # Generate the word cloud image create_tag_image(make_tags(sorted_wordscount, minsize=50, maxsize=150), title + '.png', size=(1300,1150), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Molengo', rectangular=True)
def make_tag_cloud(data, can_be_noun_arg, process_option='freqs'): stop_words = sw.words() process_f = { 'concatenate': lambda : concatenate(data, can_be_noun_arg, stop_words), 'freqs': lambda : freq_weight(data, can_be_noun_arg, stop_words), 'race' : lambda : race_tfidf(data, can_be_noun_arg, stop_words) } freqs = process_f[process_option]() if type(freqs) == type([]): freqs = freqs[:30] # normalize freqs in case they are counts sum_freqs = np.sum(x for _,x in freqs) freqs = [(w, np.float(f)/sum_freqs) for w,f in freqs] #pprint(freqs) #return tags = make_tags(freqs, maxsize=80) fname = 'noun_last_words_{}.png'.format(process_option) if not can_be_noun_arg: fname = 'not_'+fname create_tag_image(tags, fname, size=(900, 600), fontname='Lobster') elif type(freqs)==type({}): for k in freqs: top_freqs = freqs[k][:30] # normalize sum_freqs = np.sum(x for _,x in top_freqs) top_freqs = [(w, np.float(f)/sum_freqs) for w,f in top_freqs] print top_freqs tags = make_tags(top_freqs, maxsize=15) fname = 'noun_last_words_{}_{}.png'.format(process_option,k) create_tag_image(tags, fname, size=(900, 600), fontname='Lobster')
def draw_wordcloud(self, tag, name): taglist = pytagcloud.make_tags(tag, maxsize=80) pytagcloud.create_tag_image(taglist, '%s.jpg' % name, size=(900, 600), fontname='Korean', rectangular=False)
def plot(game_name, game_id): dict = {} comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id) for comment in comments: result = jieba.analyse.extract_tags(comment[2], topK=3) for word in result: if len(word) < 2: continue elif word in stop: continue if word not in dict: dict[word] = 1 else: dict[word] += 1 print(dict) swd = sorted(dict.items(), key=itemgetter(1), reverse=True) swd = swd[1:50] tags = make_tags(swd, minsize=30, maxsize=120, colors=random.choice(list(COLOR_SCHEMES.values()))) create_tag_image(tags, 'c:/%s.png' % game_name, background=(0, 0, 0, 255), size=(900, 600), fontname='SimHei') print('having save file to dick')
def create_wordcloud(topic_id): word_tuples = LDA_MODEL.show_topic(topic_id, 20) # array of words with their frequencies words_arr = [] freq_arr = [] for word_tuple in word_tuples: try: word = str(word_tuple[0]) words_arr.append(word) freq_arr.append(word_tuple[1]) except: continue print words_arr normalize(freq_arr) print freq_arr # code for generating word cloud word_count = len(words_arr) text = "" counts = [] for i in range(word_count): counts.append((words_arr[i], int(freq_arr[i]*100))) for i in range(0, word_count): for j in range(0, (int)(freq_arr[i] * 100)): text = text + words_arr[i] + " " tags = make_tags(counts, minsize=20, maxsize=60, colors=COLOR_SCHEMES['audacity']) output = join(WORDCLOUD_PATH, 'cloud' + str(topic_id) + '.png') create_tag_image(tags=tags, output=output, size=(500, 333), background=(255, 255, 255, 255), layout=3, fontname='PT Sans Regular', rectangular=True)
def search(query_word): result = [] es = Elasticsearch() query1 = {"query": {"wildcard": {"name": {"value": "*" + query_word + "*" } } } } res = es.search(index="urban", body=query1) if res['hits']['total'] == 0: res = es.search(index="champ", body=query1) if res['hits']['total'] == 0: return 0 ret = res['hits']['hits'] temp = defaultdict(int) for item in ret: ids = item['_source']['business_id'] query2 = {"query": {"match": {"business_id": ids } } } res = es.search(index="my_data", body=query2) for item in res['hits']['hits'][0]['_source']['word_freq']: temp[item[0]] += item[1] words = [] for item in temp: words.append((item,temp[item])) tags = make_tags(words, maxsize=80) create_tag_image(tags, 'static/cloud_large.jpg', size=(900, 600), fontname='Lobster')
def make_tag_cloud(data, can_be_noun_arg, process_option='freqs'): stop_words = sw.words() process_f = { 'concatenate': lambda: concatenate(data, can_be_noun_arg, stop_words), 'freqs': lambda: freq_weight(data, can_be_noun_arg, stop_words), 'race': lambda: race_tfidf(data, can_be_noun_arg, stop_words) } freqs = process_f[process_option]() if type(freqs) == type([]): freqs = freqs[:30] # normalize freqs in case they are counts sum_freqs = np.sum(x for _, x in freqs) freqs = [(w, np.float(f) / sum_freqs) for w, f in freqs] #pprint(freqs) #return tags = make_tags(freqs, maxsize=80) fname = 'noun_last_words_{}.png'.format(process_option) if not can_be_noun_arg: fname = 'not_' + fname create_tag_image(tags, fname, size=(900, 600), fontname='Lobster') elif type(freqs) == type({}): for k in freqs: top_freqs = freqs[k][:30] # normalize sum_freqs = np.sum(x for _, x in top_freqs) top_freqs = [(w, np.float(f) / sum_freqs) for w, f in top_freqs] print top_freqs tags = make_tags(top_freqs, maxsize=15) fname = 'noun_last_words_{}_{}.png'.format(process_option, k) create_tag_image(tags, fname, size=(900, 600), fontname='Lobster')
def run(): create_tag_image(tags, os.path.join(test_output, 'cloud_profile.png'), size=(1280, 900), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Lobster')
def __generate_tag_cloud(self): import jieba.analyse jieba.add_word('氪金') jieba.add_word('逼氪') jieba.add_word('骗氪') jieba.add_word('王者荣耀') jieba.del_word('...') jieba.del_word('只能') jieba.del_word('可能') jieba.del_word('觉得') jieba.del_word('而且') jieba.del_word('然后') jieba.del_word('还有') jieba.del_word('游戏') comments_file = open(self.__comment_file_name, 'r') tags = jieba.analyse.extract_tags(comments_file.read(), topK=100, withWeight=True) comments_file.close() dd = [] for i in tags: dd.append((i[0], int(float(i[1] * 1000)))) print 'i is ', i[0], i[1] tags = make_tags(dd, minsize=10, maxsize=80, colors=COLOR_SCHEMES['audacity']) create_tag_image( tags, self.__tag_image_file_name, size=(600, 600), layout=LAYOUT_HORIZONTAL, fontname='SimHei' # !!! 注意字体需要自己设置了才会有效, 见ReadMe ) print self.__tag_image_file_name
def create_cloud(oname, words,maxsize=120, fontname='Lobster'): '''Creates a word cloud (when pytagcloud is installed) Parameters ---------- oname : output filename words : list of (value,str) maxsize : int, optional Size of maximum word. The best setting for this parameter will often require some manual tuning for each input. fontname : str, optional Font to use. ''' try: from pytagcloud import create_tag_image, make_tags except ImportError: if not warned_of_error: print("Could not import pytagcloud. Skipping cloud generation") return # gensim는 각 단어에 대해 0과 1사이의 가중치를 반환하지만 # pytagcloud는 단어 수를 받는다. 그래서 큰 수를 곱한다 # gensim는 (value, word)를 반환하고 pytagcloud는 (word, value)으로 입력해야 한다 words = [(w,int(v*10000)) for v,w in words] tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
def show_token_df(): dic = _build_vocabulary(dictionary_path='../data/vocabulary_all.dict') id2token = {tokenid: token for (tokenid, token) in dic.items()} id2df = dic.dfs token2df = {id2token[tokenid]: df for (tokenid, df) in id2df.items()} df = pd.DataFrame() df['token'] = token2df.keys() df['df'] = token2df.values() print(df['df'].describe()) ''' count 125156.000000 mean 63.621824 std 858.189270 min 1.000000 25% 1.000000 50% 2.000000 75% 7.000000 max 39912.000000 ''' print({token: df for (token, df) in token2df.items() if df > 30000} ) ''' {'起诉书': 38442, '公诉': 39386, '现已': 39136, '参加': 38840, '检察员': 37974, '检': 37350, '机关': 39859, '元': 31317, '指控': 39265, '终结': 39468, '月': 39911, '证据': 37175, '年': 39912, '上述事实': 33553, '犯': 39459, '人民检察院': 39234, '号': 39814, '审理': 39629, '开庭审理': 35738, '到庭': 38301, '供述': 30093, '证实': 32083, '被告人': 39864, '提起公诉': 38118, '依法': 39123, '指派': 33070, '本案': 36616, '出庭': 34811, '支持': 35414, '公开': 38635, '中': 31875, '本院': 39852, '刑诉': 38329, '日': 39902, '诉讼': 38437} len 35 ''' print(df[(df['df'] > 3) & (df['df'] < 30000)].describe()) filter_words = {token:df for (token,df) in token2df.items() if df>5000 } print(filter_words,'len %s' % len(filter_words) ) swd = sorted(filter_words.items(), key=itemgetter(1), reverse=True) tags = make_tags(swd, minsize=10, maxsize=50, colors=COLOR_SCHEMES['goldfish']) create_tag_image(tags, 'keyword_tag_cloud4.png',size=(2400, 1000), background=(240, 255, 255), layout=LAYOUT_HORIZONTAL, fontname="SimHei")
def wordcloud(query, layout, font, max_words, verbosity=False): my_oauth, complete_url, stop_words = twitter(query) punctuation = "#@!\"$%&'()*+,-./:;<=>?[\]^_`{|}~\'" # characters exluded from tweets my_text = '' r = requests.get(complete_url, auth=my_oauth) tweets = r.json() if verbosity == True: print tweets for tweet in tweets['statuses']: text = tweet['text'].lower() text = ''.join(ch for ch in text if ch not in punctuation) # exclude punctuation from tweets my_text += text words = my_text.split() counts = Counter(words) for word in stop_words: del counts[word] for key in counts.keys(): if len(key) < 3 or key.startswith('http'): del counts[key] final = counts.most_common(max_words) max_count = max(final, key=operator.itemgetter(1))[1] final = [(name, count / float(max_count))for name, count in final] tags = make_tags(final, maxsize=max_word_size) create_tag_image(tags, query + '.png', size=(width, height), layout=layout, fontname=font, background=background_color) print "new png created"
def action(counts): tags = make_tags(counts, minsize=15, maxsize=120) create_tag_image(tags, 'weibo_liu.png', background=(0, 0, 0, 0), size=(1200, 1200), fontname="simhei")
def createTagCloud(self,wordline): """ Create tag cloud image """ wordstream = [] if wordline == '': return False wordsTokens = WhitespaceTokenizer().tokenize(wordline) wordsTokens.remove(wordsTokens[0]) wordstream.append(' '.join(wordsTokens)) wordstream = ' '.join(wordstream) thresh = self.wordCount colorS = self.colorSchemes[self.color] tags = make_tags(get_tag_counts(wordstream)[:thresh],\ minsize=3, maxsize=40,\ colors = COLOR_SCHEMES[colorS]) create_tag_image(tags, self.png,\ size=(960, 400),\ background=(255, 255, 255, 255),\ layout= LAYOUT_HORIZONTAL,\ fontname='Neuton') return True
def create_cloud(oname, words, maxsize=120, fontname='Lobster'): '''Creates a word cloud (when pytagcloud is installed) Parameters ---------- oname : output filename words : list of (value,str) maxsize : int, optional Size of maximum word. The best setting for this parameter will often require some manual tuning for each input. fontname : str, optional Font to use. ''' try: from pytagcloud import create_tag_image, make_tags except ImportError: if not warned_of_error: print("Could not import pytagcloud. Skipping cloud generation") return # gensim returns a weight between 0 and 1 for each word, while pytagcloud # expects an integer word count. So, we multiply by a large number and # round. For a visualization this is an adequate approximation. words = [(w, int(v * 10000)) for w, v in words] tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
def word_cloud(final_object, cloud_object): import re from pytagcloud.lang.stopwords import StopWords from operator import itemgetter final_object = [x for x in final_object if x != "no_object"] counted = {} for word in final_object: if len(word) > 1: if counted.has_key(word): counted[word] += 1 else: counted[word] = 1 #print len(counted) counts = sorted(counted.iteritems(), key=itemgetter(1), reverse=True) print "Total count of Word Cloud List Items: ",counts #type(counts) words = make_tags(counts, maxsize=100) print "Word Cloud List items: ", words create_tag_image(words, 'cloud_1_All_Objects.png', size=(1280, 900), fontname='Lobster') width = 1280 height = 800 layout = 3 background_color = (255, 255, 255)
def create_cloud(out_name, words, maxsize=120, fontname='Lobster'): """ Create a word cloud when pytagcloud is installed :param out_name: output filename :param words: list of (value,str), a gensim returns (value, word) :param maxsize: int, optional Size of maximum word. The best setting for this parameter will often require some manual tuning for each input. :param fontname: str, optional, Font to use. :return: """ try: from pytagcloud import create_tag_image, make_tags except ImportError: if not warned_error: print("Could not import pytagcloud. Skipping cloud generation!") return # gensim returns a weight between 0 and 1 for each word, while pytagcloud # expects an integer word count. So, we multiply by a large number and # round. For a visualization this is an adequate approximation. # We also need to flip the order as gensim returns (value, word), whilst # pytagcloud expects (word, value): words = [(w, int(v*10000)) for v, w in words] tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, out_name, size=(1800, 1200), fontname=fontname)
def make_cloud(self, output_html): keywords = KeywordManager().all() text = ' '.join([kw.keyword for kw in keywords]) if output_html: max_tags = 30 max_size = 42 else: max_tags = 100 max_size = self.maxsize tags = make_tags(get_tag_counts(text)[:max_tags], minsize=self.minsize, maxsize=max_size) if output_html: size = (900, 300) result = create_html_data(tags, size=size, layout=LAYOUT_HORIZONTAL) else: #now = datetime.utcnow() #filename = 'jcuwords/static/clouds/keyword-cloud-%s.png' % now.isoformat() cloud = self.resolver.resolve('jcuwords:keyword-cloud.png') filename = cloud.abspath() size = (1024, 500) create_tag_image(tags, filename, size=size, fontname='IM Fell DW Pica', layout=LAYOUT_MIX) image_url = self.request.resource_url(None, 'keyword-cloud.png') result = {'image': image_url} return result
def tagCloud(self): texts ="" for item in self.docSet: texts = texts +" " +item tags = make_tags(get_tag_counts(texts), maxsize=120) create_tag_image(tags,'filename.png', size=(2000,1000), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Lobster', rectangular=True)
def words_check(request): # 필요한 라이브러리 및 변수 초기화 data = request.POST.get('data') komoran = Komoran() words = Counter(komoran.nouns(data)) # 1글자 단어 걸러내기 nouns = dict() for data in words.keys(): if len(data) != 1: nouns[data] = words.get(data) nouns = sorted(nouns.items(), key=lambda x: x[1], reverse=True) hashing = random.choice(range(100)) context = { 'nouns': nouns, 'hashing': hashing, } # 워드클라우드 taglist = pytagcloud.make_tags(nouns, minsize=10, maxsize=60) link = 'static/wordcloud/wordcloud' + str(hashing) + '.jpg' #link = 'static/wordcloud/wordcloud.jpg' pytagcloud.create_tag_image(taglist, link, size=(600, 600), layout=3, fontname='CookieRun', rectangular=True) return HttpResponse(json.dumps(context), content_type='application/json')
def create_wordcloud_file(self, tags, output_file): # Get configuration parameters conf_num_tags = self.configuration['num_tags'] conf_min_tag_size = self.configuration['min_tag_size'] conf_max_tag_size = self.configuration['max_tag_size'] conf_image_size = self.configuration['image_size'] conf_font = self.configuration['font'] conf_background = self.configuration['background'] logging.info("Creating wordcloud image file: %s" % output_file) # Limit the tags to be displayed to those appearing more frequently tags = tags[:conf_num_tags] # Create the image tags = wc.make_tags(tags, minsize=conf_min_tag_size, maxsize=conf_max_tag_size) # Save image to file wc.create_tag_image(tags, output_file, size=conf_image_size, fontname=conf_font, layout=wc.LAYOUT_HORIZONTAL, background=conf_background) logging.info("Created wordcloud image file: %s" % output_file) print("Created wordcloud image file: %s" % output_file)
def create_cloud(oname, words,maxsize=120, fontname='Lobster'): '''Creates a word cloud (when pytagcloud is installed) Parameters ---------- oname : output filename words : list of (value,str) maxsize : int, optional Size of maximum word. The best setting for this parameter will often require some manual tuning for each input. fontname : str, optional Font to use. ''' try: from pytagcloud import create_tag_image, make_tags except ImportError: if not warned_of_error: print("Could not import pytagcloud. Skipping cloud generation") return # gensim returns a weight between 0 and 1 for each word, while pytagcloud # expects an integer word count. So, we multiply by a large number and # round. For a visualization this is an adequate approximation. words = [(w,int(v*10000)) for w,v in words] tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
def create_cloud(oname, words, maxsize=120, fontname='Lobster'): '''Creates a word cloud (when pytagcloud is installed) Parameters ---------- oname : output filename words : list of (value,str) maxsize : int, optional Size of maximum word. The best setting for this parameter will often require some manual tuning for each input. fontname : str, optional Font to use. ''' try: from pytagcloud import create_tag_image, make_tags except ImportError: if not warned_of_error: print("Could not import pytagcloud. Skipping cloud generation") return # gensim는 각 단어에 대해 0과 1사이의 가중치를 반환하지만 # pytagcloud는 단어 수를 받는다. 그래서 큰 수를 곱한다 # gensim는 (value, word)를 반환하고 pytagcloud는 (word, value)으로 입력해야 한다 words = [(w, int(v * 10000)) for v, w in words] tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
def create_file(res_id): all_reviews = '' api_key = 'db837d5e88fefd82d146b8e2e4e45c35' headers = { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*', 'user-key': api_key } url = "https://developers.zomato.com/api/v2.1/reviews?res_id=%s" % (res_id) try: response = requests.get(url, headers=headers) except: print 'Network Issues!' return if response.status_code == 200: data = response.json() count = data["reviews_count"] if count == 0: print 'No Reviews!' else: for review in data["user_reviews"]: review = review["review"] all_reviews = all_reviews + review["review_text"] + ' ' all_reviews = convert(all_reviews) tags = make_tags(get_tag_counts(all_reviews), maxsize=50, colors=COLOR_SCHEMES['goldfish']) create_tag_image(tags, 'static/img/' + res_id + '.png', size=(900, 600), fontname='Lobster') else: print 'Api Issues'
def create_word_claod(words, output_file_name, maxsize, fontname='Lobster'): words = [(w, int(v * 10000)) for w, v in words] try: from pytagcloud import create_tag_image, make_tags except ImportError: print("Could not import pytagcloud. Skipping cloud generation") tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, output_file_name, size=(1800, 1200), fontname=fontname)
def make_image(tag_count, file_name, font_max_size=120, size=(900, 600)): tag_list = make_tags(tag_count, maxsize=font_max_size) create_tag_image(tag_list, file_name, size=size, fontname='Korean', rectangular=False) print("-" * 6 + "make wordcloud Image" + "-" * 6)
def draw_pytagcloud(data_array, image_filename): words_count = Counter(data_array) counts = words_count.most_common(50) tags = make_tags(counts, maxsize=50) create_tag_image(tags, image_filename, size=(900, 600), fontname='Nanum Gothic')
def saveWordCloud(wordinfo, filename): taglist = pytagcloud.make_tags(dict(wordinfo).items(), maxsize=80) pytagcloud.create_tag_image(taglist, filename, size=(640, 480), fontname="korean") webbrowser.open(filename)
def saveWordCloud(wordInfo, filename): taglist = pytagcloud.make_tags(dict(wordInfo).items(), maxsize=100) pytagcloud.create_tag_image(taglist, filename, size=(700, 480), fontname='korean', rectangular=False) webbrowser.open(filename)
def saveWordCloud( wordInfo ): taglist = pytagcloud.make_tags(dict(wordInfo).items(), maxsize=80) print( type(taglist) ) # <class 'list'> filename = 'wordcloud.png' pytagcloud.create_tag_image(taglist, filename, \ size=(640, 480), fontname='korean', rectangular=False) webbrowser.open( filename )
def _test_large_tag_image(self): start = time.time() tags = make_tags(get_tag_counts(self.hound.read())[:80], maxsize=120, colors=COLOR_SCHEMES['audacity']) create_tag_image(tags, os.path.join(self.test_output, 'cloud_large.png'), ratio=0.75, background=(0, 0, 0, 255), layout=LAYOUT_HORIZONTAL, fontname='Lobster') print "Duration: %d sec" % (time.time() - start)
def saveWordCloud(wordInfo, fileName): # 워드 클라우드 -> 빈도에 따라 글자의 크기 바뀜 # value값에 따라 글자 크기, 색상 생성 taglist = pytagcloud.make_tags(wordInfo.items(), maxsize=100) # 변환된 데이터를 이용하여 워드클라우드 이미지로 생성 pytagcloud.create_tag_image(taglist, fileName, size=(720, 480), fontname="korean") webbrowser.open(fileName)
def semantic_cloud(topic): topic_list = TopicList(topic) tlist = topic_list.GetTopicList() htagsl = HashtagsList(tlist['statuses'], topic) hl = htagsl.GetHashtagsList() cadena = " ".join(hl) print cadena tags = make_tags(get_tag_counts(cadena), maxsize=120) create_tag_image(tags, 'semantic_cloud.png', size=(900, 600), fontname='Lobster')
def make_tag_cloud(): for line in sys.stdin: try: text += ' ' + line.strip().lower() except: pass tags = make_tags(get_tag_counts(text), maxsize=150) create_tag_image(tags, sys.argv[1] + '.png', size=(1024, 768))
def wordcloud(wordfreq, filename): taglist = pytagcloud.make_tags(wordfreq.items(), maxsize=100) save_filename = '%s/wordcloud_%s.jpg' % (RESULT_DIRECTORY, filename) pytagcloud.create_tag_image(taglist, save_filename, size=(900, 600), fontname='Malgun', rectangular=False, background=(0, 0, 0))
def create_cloud(counter, filename): ''' Creates a word cloud from a counter ''' tags = make_tags(get_tag_counts(counter)[:80], maxsize=120, colors=COLOR_SCHEMES['goldfish']) create_tag_image(tags, './img/' + filename + '.png', size=(900, 600), background=(0, 0, 0, 255), layout=LAYOUT_HORIZONTAL, fontname='Lobster')
def test_layouts(self): start = time.time() tags = make_tags(get_tag_counts(self.hound.read())[:80], maxsize=120) for layout in LAYOUTS: create_tag_image(tags, os.path.join(self.test_output, 'cloud_%s.png' % layout), size=(900, 600), background=(255, 255, 255, 255), layout=layout, fontname='Lobster') print "Duration: %d sec" % (time.time() - start)
def make_wordcloud(text, image_name, width, height): list_of_tuple = return_list_of_tuples(text) tuple_countnoun = tuple(list_of_tuple) taglist = pytagcloud.make_tags(tuple_countnoun, maxsize=80, minsize=10) pytagcloud.create_tag_image(taglist, image_name, size=(width, height), fontname='Nanum Gothic', rectangular=False)
def build_pytag_cloud(self): width = 900 height = 575 fileName = '{0}/{1}.{2}.{3}.{4}.png'.format(self.img_directory, self.state, self.city, width, height) items = sorted(self.tagcloud.iteritems(), key=itemgetter(1), reverse=True) tags = make_tags(items[:self.wordcount], maxsize=80) create_tag_image(tags, fileName, size=(width, height), fontname='Droid Sans') import webbrowser webbrowser.open(fileName) # see results
def run(textpath): text = open(textpath, 'r') start = time.time() taglist = get_tag_counts(text.read().decode('utf8')) cleantaglist = process_tags(taglist) tags = make_tags(taglist[0:100], colors=COLOR_MAP) create_tag_image(tags, 'cloud.png', size=(1280, 900), background=(0, 0, 0 , 255), layout=LAYOUT_MOST_HORIZONTAL, crop=False, fontname='Cuprum', fontzoom=2) tags2 = make_tags(cleantaglist[0:100], colors=COLOR_MAP) create_tag_image(tags2, 'rcloud.png', size=(1280, 900), background=(0, 0, 0, 255), layout=LAYOUT_MOST_HORIZONTAL, crop=False, fontname='Cuprum', fontzoom=2) print "Duration: %d sec" % (time.time() - start)
def makeCloud(self, tagsCounts, name="tag_cloud.png", height=500,\ width=500, font="Droid Sans"): # Get rid of unigrams contained in bigrams tagsCounts = self.parseWords(tagsCounts) tags = pytagcloud.make_tags(tagsCounts, colors=self._colors) pytagcloud.create_tag_image(tags, name, size=(width, height),\ fontname=font, rectangular=True)
def make_cloud(text,fname): '''create the wordcloud from variable text''' Data1 = text.lower().replace('http','').replace('rt ','').replace('.co','') Data = Data1.split() two_words = [' '.join(ws) for ws in zip(Data, Data[1:])] wordscount = {w:f for w, f in collections.Counter(two_words).most_common() if f > 200} sorted_wordscount = sorted(wordscount.iteritems(), key=operator.itemgetter(1),reverse=True) tags = make_tags(get_tag_counts(Data1)[:50],maxsize=350,minsize=100) create_tag_image(tags,fname+'.png', size=(3000,3250), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Lobster', rectangular=True)
def run(): create_tag_image( tags, os.path.join(test_output, "cloud_profile.png"), size=(1280, 900), background=(0, 0, 0, 255), layout=LAYOUT_MIX, crop=True, fontname="Lobster", fontzoom=1, )
def createTagCloud(self,rapper): #creates a tag cloud for the given artist. #For some reason these imports only work when placed in the function #but they do not if they are placed at the top of the document from pytagcloud import create_tag_image, make_tags from pytagcloud.lang.counter import get_tag_counts teststr = rapper.rawLyrics tags = make_tags(get_tag_counts(teststr), maxsize=100) tags = [a for a in tags if a['size'] > 20] create_tag_image(tags, 'cloud_large.png', size=(800, 400), background=(239,101,85,255), fontname='PT Sans Regular')
def cr_tagcloud(words, fn, minsize=17, maxsize=50, size=(680, 500), fontname='Nobile'): tags = make_tags([(i[0], i[2]) for i in words], minsize=minsize, maxsize=maxsize) create_tag_image(tags, fn, size=size, fontname=fontname)
def newsearch(query_word): result = [] es = Elasticsearch() query1 = {"query": {"wildcard": {"name": {"value": "*" + query_word + "*" } } } } res = es.search(index="urban", body=query1) if res['hits']['total'] == 0: res = es.search(index="champ", body=query1) if res['hits']['total'] == 0: return 0 ret = res['hits']['hits'] temp = defaultdict(int) items = [] for item in ret: ids = item['_source']['business_id'] query2 = {"query": {"match": {"business_id": ids } } } res = es.search(index="alchem", body=query2) for item in res['hits']['hits'][0]['_source']['word_freq']: items.append(item) temp[item['text'].encode('utf-8')] += 1 words = [] for item in items: t = {} scale = 1 if 'sentiment' not in item: continue elif 'type' in item['sentiment']: if item['sentiment']['type'] == 'positive': scale = 1.75 t['color'] = (0,255,0) elif item['sentiment']['type'] == 'negative': scale = 1.25 t['color'] = (255,0,0) elif item['sentiment'] == 'positive': scale = 1.75 t['color'] = (0,255,0) elif item['sentiment'] == 'negative': scale = 1.25 t['color'] = (255,0,0) elif item['sentiment'] == 'neutral': t['color'] = (0,0,255) else: t['color'] = (128,128,128) t['tag'] = item['text'].encode('utf-8') t['size'] = int( math.ceil( temp[item['text']] * float(item['relevance']) * 30 * scale) ) words.append(t) create_tag_image(words, 'static/cloud_large.jpg', size=(900, 600), fontname='Philosopher')
def openWestCoastCloud(): #Stores tags from multiple cities into one text string. TEXT = getTags(34,118) #Los Angeles TEXT += ' ' + getTags(37,122) # San Francisco TEXT += ' ' + getTags(47,122) #Seattle #Draws Word Cloud tags = make_tags(get_tag_counts(TEXT), maxsize=80) #Creates Word Cloud File create_tag_image(tags, 'cloud_large.png', size=(900, 600), fontname='Lobster') #Opens Word Cloud File webbrowser.open('cloud_large.png') # see results
def showCloud(tagSimilarities): tagCloud = [] mainTag = tagSimilarities[0][0] #tag that our cloud will be built around tagCount = len(tagSimilarities) minTagWeight = tagSimilarities[tagCount-1][1] mainTagWeight = tagSimilarities[0][1] normalizedMainTagWeight = 1.25*100*(mainTagWeight-minTagWeight) #subtract minTagWeight to normalize, rescale by 1.25 * 100 tagCloud.append((mainTag, normalizedMainTagWeight)) #append (tag, weight) to be computed by tag cloud for index, piece in enumerate(tagSimilarities): if index > 1: tagCloud.append((tagSimilarities[index][0], 100*(tagSimilarities[index][1]-minTagWeight))) tags = make_tags(tagCloud, maxsize=100) create_tag_image(tags, 'cloud.png', size=(900, 600), fontname='Lobster') webbrowser.open('cloud.png')
def keys_cloud(): for i in range(9): f = file('../data_preprocess/Data/ftags_{}.pkl'.format(i), 'rb') fdist = pickle.load(f) tag = '' print fdist.most_common()[0][0], fdist.most_common()[0][1] for key, count in fdist.most_common(100): tag +=( key+" ")*count #text = "%s" % " ".join(tag) #tags = make_tags(get_tag_counts('cbb cbb xuxian xuxian keke keke keke'),maxsize=100) tags = make_tags(get_tag_counts(tag),maxsize=100) # Set your output filename create_tag_image(tags,"Data/word_cloud_{}.png".format(i), size=(600,400),background=(0, 0, 0, 255), fontname="SimHei")
def My_make_word_could(fileStr,outPNGStr): jieba.load_userdict("ap_dict.txt") STOP_WORD = set() stopword_file = open("stopwords.txt") for each_line in stopword_file: each_line_list = pseg.cut(each_line) for elem in each_line_list: STOP_WORD.add(elem.word) STOP_WORD.add(each_line.strip().decode('utf-8')) stopword_file.close() ##-----------------------------------------------------cut and cul wrod freq------------------------------------ word_freq = {} ## fileStr = "kouzhao.txt" raw_file = open(fileStr) for line in raw_file: seg_list = pseg.cut(line) for ele in seg_list: words = ele.word.strip() ## print words in STOP_WORD if ((ele.flag == 'n' or ele.flag == 'a' ) and (words not in STOP_WORD)): if(word_freq.has_key(words)): word_freq[words] += 1 else: word_freq[words] = 1 raw_file.close() ##---------------------------------------------sort the result-------------------------- paixu= sorted(word_freq.iteritems(), key=lambda d:d[1], reverse = True) paixu_tiqu=paixu[0:25] print "over" ##for (k,v) in word_freq.items(): ## if v==1: ## del word_freq[k] ## print k,v ##for (k,v) in word_freq.items(): ## print k,v ##for item in word_freq.keys(): ## print item ##for (k,v) in (dict (paixu_tiqu)).items(): ## print k,v ##--------------------------------------------make word cloud -------------------------------- tags = make_tags(dict(paixu_tiqu)) ##print tags ## outPNGStr = 'kouzhao.png' create_tag_image(tags, outPNGStr, size=(2000, 1600), fontname='haokan.ttf',fontzoom=4) print "all over"
def getImageCloudForText(text, filename='cloud.png'): wordCounts = text[:120] func = makeFunc(wordCounts) tags = make_tags(wordCounts, maxsize=50, minsize=1, colors=getColors(), scalef=func) # fd = cStringIO.StringIO() create_tag_image(tags, filename, size=(500,300), background=(0x1c, 0x1b, 0x1f), layout=pytagcloud.LAYOUT_MIX, fontname='PT Sans Regular', rectangular=False) if __name__!="__main__": cloudfile = open(filename, 'rb') data = cloudfile.read() cloudfile.close() os.remove(filename) #data = fd.getvalue() return uploadimage.uploadImage(data)