Exemplo n.º 1
0
def make_tag_cloud(data, can_be_noun_arg, process_option='freqs'):
    stop_words = sw.words()
    process_f = {
            'concatenate': lambda : concatenate(data, can_be_noun_arg, stop_words),
            'freqs': lambda : freq_weight(data, can_be_noun_arg, stop_words),
            'race' : lambda : race_tfidf(data, can_be_noun_arg, stop_words)
    }
    freqs = process_f[process_option]()
    if type(freqs) == type([]):
        freqs = freqs[:30]
        # normalize freqs in case they are counts
        sum_freqs = np.sum(x for _,x in freqs)
        freqs = [(w, np.float(f)/sum_freqs) for w,f in freqs]
        #pprint(freqs)
        #return
        tags = make_tags(freqs, maxsize=80)
        fname = 'noun_last_words_{}.png'.format(process_option)
        if not can_be_noun_arg:
            fname = 'not_'+fname
        create_tag_image(tags, fname, size=(900, 600), fontname='Lobster')
    elif type(freqs)==type({}):
        for k in freqs:
            top_freqs = freqs[k][:30]
            # normalize    
            sum_freqs = np.sum(x for _,x in top_freqs)
            top_freqs = [(w, np.float(f)/sum_freqs) for w,f in top_freqs]
            print top_freqs
            tags = make_tags(top_freqs, maxsize=15)
            fname = 'noun_last_words_{}_{}.png'.format(process_option,k)
            create_tag_image(tags, fname, size=(900, 600), fontname='Lobster')
     def wordcloud(self,OVERALLTEXT,NEGATIVETEXT,POSITIVETEXT,test_output, font):
 
         #Constants
         overalltext = open(OVERALLTEXT, 'r')
         negativetext = open(NEGATIVETEXT, 'r')
         positivetext = open(POSITIVETEXT, 'r')
         
         #Overall
         tags = make_tags(get_tag_counts(overalltext.read())[:50],maxsize=90, minsize = 15)
         for layout in LAYOUTS:
             
             create_tag_image(tags,os.path.join(test_output, 'Overall_%s.png' % layout), size=(900,600),
                              background=(255, 255, 255), layout = layout, fontname = font, rectangular=True)
     
         #Negative
         tags = make_tags(get_tag_counts(negativetext.read())[:50], maxsize=90,minsize = 15, colors=COLOR_SCHEMES['audacity'])
         
         for layout in LAYOUTS: 
             create_tag_image(tags, os.path.join(test_output, 'negative_%s.png' % layout), 
                          size=(900,600), background=(205, 50, 50), 
                          layout=layout, fontname = font)
         
         #Positive
         tags = make_tags(get_tag_counts(positivetext.read())[:50], maxsize=120, minsize = 25, colors=COLOR_SCHEMES['oldschool'])
         
         for layout in LAYOUTS: 
             create_tag_image(tags, os.path.join(test_output, 'positive_%s.png' % layout), 
                          size=(900,600), background=(0, 255, 15), 
                          layout=layout, fontname = font) 
Exemplo n.º 3
0
def make_tag_cloud(data, can_be_noun_arg, process_option='freqs'):
    stop_words = sw.words()
    process_f = {
        'concatenate': lambda: concatenate(data, can_be_noun_arg, stop_words),
        'freqs': lambda: freq_weight(data, can_be_noun_arg, stop_words),
        'race': lambda: race_tfidf(data, can_be_noun_arg, stop_words)
    }
    freqs = process_f[process_option]()
    if type(freqs) == type([]):
        freqs = freqs[:30]
        # normalize freqs in case they are counts
        sum_freqs = np.sum(x for _, x in freqs)
        freqs = [(w, np.float(f) / sum_freqs) for w, f in freqs]
        #pprint(freqs)
        #return
        tags = make_tags(freqs, maxsize=80)
        fname = 'noun_last_words_{}.png'.format(process_option)
        if not can_be_noun_arg:
            fname = 'not_' + fname
        create_tag_image(tags, fname, size=(900, 600), fontname='Lobster')
    elif type(freqs) == type({}):
        for k in freqs:
            top_freqs = freqs[k][:30]
            # normalize
            sum_freqs = np.sum(x for _, x in top_freqs)
            top_freqs = [(w, np.float(f) / sum_freqs) for w, f in top_freqs]
            print top_freqs
            tags = make_tags(top_freqs, maxsize=15)
            fname = 'noun_last_words_{}_{}.png'.format(process_option, k)
            create_tag_image(tags, fname, size=(900, 600), fontname='Lobster')
Exemplo n.º 4
0
def run(textpath):
	text = open(textpath, 'r')
	start = time.time()
	taglist = get_tag_counts(text.read().decode('utf8'))
	cleantaglist = process_tags(taglist)
	tags = make_tags(taglist[0:100], colors=COLOR_MAP)
	create_tag_image(tags, 'cloud.png', size=(1280, 900), background=(0, 0, 0 , 255), layout=LAYOUT_MOST_HORIZONTAL, crop=False,  fontname='Cuprum', fontzoom=2)
	tags2 = make_tags(cleantaglist[0:100], colors=COLOR_MAP)
	create_tag_image(tags2, 'rcloud.png', size=(1280, 900), background=(0, 0, 0, 255), layout=LAYOUT_MOST_HORIZONTAL, crop=False, fontname='Cuprum', fontzoom=2)
	print "Duration: %d sec" % (time.time() - start)
Exemplo n.º 5
0
 def createTagCloud(self,wordline):
     """
     Create tag cloud image 
     """
     wordstream = []
     if wordline == '':
         return False
     
     wordsTokens = WhitespaceTokenizer().tokenize(wordline)
     wordsTokens.remove(wordsTokens[0])
     wordstream.append(' '.join(wordsTokens))
     wordstream = ' '.join(wordstream)
     thresh = self.wordCount
     colorS = self.colorSchemes[self.color]
     
     tags = make_tags(get_tag_counts(wordstream)[:thresh],\
                      minsize=3, maxsize=40,\
                      colors = COLOR_SCHEMES[colorS])
     
     create_tag_image(tags, self.png,\
                      size=(960, 400),\
                      background=(255, 255, 255, 255),\
                      layout= LAYOUT_HORIZONTAL,\
                      fontname='Neuton')
     return True
Exemplo n.º 6
0
def action(counts):
    tags = make_tags(counts, minsize=15, maxsize=120)
    create_tag_image(tags,
                     'weibo_liu.png',
                     background=(0, 0, 0, 0),
                     size=(1200, 1200),
                     fontname="simhei")
Exemplo n.º 7
0
def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" %
                                   game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    print(dict)

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
    swd = swd[1:50]
    tags = make_tags(swd,
                     minsize=30,
                     maxsize=120,
                     colors=random.choice(list(COLOR_SCHEMES.values())))

    create_tag_image(tags,
                     'c:/%s.png' % game_name,
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')

    print('having save file to dick')
Exemplo n.º 8
0
def get_tag_cloud(request, region_code):
    # Get all tweets in the region
    data_zone = DataZone.objects.get(code=region_code)
    tweet_locations = TweetLocation.objects.filter(zone=data_zone)

    body_text = ''

    for x in tweet_locations:
        body_text += x.tweet.body + ' '

    tc = TagCloud()
    body_text = tc.filter_body(body_text)

    if body_text.strip() == '':
        body_text = "Region Empty"

    tags = make_tags(get_tag_counts(body_text)[:50], maxsize=50, colors=COLOR_SCHEMES['audacity'])
    data = create_html_data(tags, (560,450), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular')

    context = {}
        
    tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\
    left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>'
    
    context['tags'] = ''.join([tags_template % link for link in data['links']])
    context['width'] = data['size'][0]
    context['height'] = data['size'][1]
    context['css'] = "".join("a.%(cname)s{color:%(normal)s;}a.%(cname)s:hover{color:%(hover)s;}" % {'cname':k, 'normal': v[0], 'hover': v[1]} for k,v in data['css'].items())

    return render_to_response('tag_cloud.html', {'tags': context['tags'], 'css': context['css']})
def word_cloud(final_object, cloud_object):

    import re
    from pytagcloud.lang.stopwords import StopWords
    from operator import itemgetter
    final_object = [x for x in final_object if x != "no_object"]


    counted = {}

    for word in final_object:
        if len(word) > 1:
            if counted.has_key(word):
                counted[word] += 1
            else:
                counted[word] = 1
    #print len(counted)

    counts = sorted(counted.iteritems(), key=itemgetter(1), reverse=True)

    print "Total count of Word Cloud List Items: ",counts
    #type(counts)

    words = make_tags(counts, maxsize=100)
    print "Word Cloud List items: ", words


    create_tag_image(words, 'cloud_1_All_Objects.png', size=(1280, 900), fontname='Lobster')

    width = 1280
    height = 800
    layout = 3
    background_color = (255, 255, 255)
Exemplo n.º 10
0
def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    print(dict)

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
    swd = swd[1:50]
    tags = make_tags(swd,
                     minsize=30,
                     maxsize=120,
                     colors=random.choice(list(COLOR_SCHEMES.values())))

    create_tag_image(tags,
                     'c:/%s.png' % game_name,
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')

    print('having save file to dick')
Exemplo n.º 11
0
    def _create_image(self, text):
        tag_counts = get_tag_counts(text)
        if tag_counts is None:
            sys.exit(-1)

        if self._repeat_tags:
            expanded_tag_counts = []
            for tag in tag_counts:
                expanded_tag_counts.append((tag[0], 5))
            for tag in tag_counts:
                expanded_tag_counts.append((tag[0], 2))
            for tag in tag_counts:
                expanded_tag_counts.append((tag[0], 1))
            tag_counts = expanded_tag_counts

        tags = make_tags(tag_counts, maxsize=150, colors=self._color_scheme)
        path = os.path.join('/tmp/cloud_large.png')

        if Gdk.Screen.height() < Gdk.Screen.width():
            height = Gdk.Screen.height()
            width = int(height * 4 / 3)
        else:
            width = Gdk.Screen.width()
            height = int(width * 3 / 4)

        if self._font_name is not None:
            create_tag_image(tags, path, layout=self._layout,
                             size=(width, height),
                             fontname=self._font_name)
        else:
            create_tag_image(tags, path, layout=self._layout,
                             size=(width, height))
        return 0
Exemplo n.º 12
0
    def make_wordcloud(self,*total):
        t_list = list(total)
        total_list=t_list[0]
        temp_list=[]
        #print(total_list)

        for i in total_list:
            if i ==0:
                temp_list.append(1*300)
            else:
                temp_list.append(i*300)

        #total_list=[0, 16.0, 5.0, 5.5, 0, 0, 0]
        cloud_catagory = []
        ##rating 합산 별로 값 반복하기. (word cloud 빈도수를 기준으로 단어 크기를 띄어주는 방법을 이용하기위하여)
        for i in range(1, int(temp_list[0])):
            cloud_catagory.append("농작물 경작")
        for i in range(1, int(temp_list[1])):
            cloud_catagory.append("공예(만들기)")
        for i in range(1, int(temp_list[2])):
            cloud_catagory.append("음식체험")
        for i in range(1, int(temp_list[3])):
            cloud_catagory.append("전통문화")
        for i in range(1, int(temp_list[4])):
            cloud_catagory.append("자연생태")
        for i in range(1, int(temp_list[5])):
            cloud_catagory.append("건강레포츠")
        for i in range(1, int(temp_list[6])):
            cloud_catagory.append("산·어촌 생활")
        # 각 카테고리별 빈도 count를 dictionary 형태로 만들어줌 ex '산·어촌 생활': 9,
        count = Counter(cloud_catagory)
        tags = count.most_common(7)
        taglist = pytagcloud.make_tags(tags, maxsize=45)
        pytagcloud.create_tag_image(taglist, 'wordcloud.jpg', size=(500, 200), fontname='Noto Sans CJK',
                                    layout=pytagcloud.LAYOUT_MOST_HORIZONTAL)
Exemplo n.º 13
0
def finance_cloud(tag):
    tags = make_tags(get_tag_counts(tag), maxsize=100)
    create_tag_image(tags,
                     "cloud.png",
                     size=(1280, 800),
                     background=(0, 0, 0, 255),
                     fontname="SimHei")
    def generate_html(self, word_freq_tup_list, search_term):
        """
        this function generates html file depicting word cloud word_freq_tup is passed by the caller
        :param word_freq_tup_list:
        :return: generated word cloud html text
        """
        tags = make_tags(word_freq_tup_list)
        # print tags

        cloud_html = create_html_data(tags[:100], (500, 500),
                                      layout=LAYOUT_HORIZONTAL,
                                      fontname='PT Sans Regular')
        temp_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__), '..', '..', '..',
                         'templates', 'template.html'))
        template_file = codecs.open(temp_path, mode='r', encoding='utf-8')
        html_template = Template(template_file.read())
        context = {}

        # TODO: change href attribute in a tag to give link to data display
        tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;">' \
                        '<a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\
            left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>'

        context['tags'] = ''.join(
            [tags_template % link for link in cloud_html['links']])
        context['width'] = cloud_html['size'][0]
        context['height'] = cloud_html['size'][1]
        context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\
            a.%(cname)s:hover{color:%(hover)s;}" % {
            'cname': k,
            'normal': v[0],
            'hover': v[1]
        } for k, v in cloud_html['css'].items())
        context['mycss'] = "\
            #word_f { \
            height: 300px; \
            -webkit-column-count: 5; \
            -moz-column-count: 5; \
            column-count: 5; \
            } \
            #word_f li { \
            display: block; \
            }\
            #word_f li a { \
            color: rgb(0, 162, 232); \
            }"

        context['word_freq'] = "".join("<li> %(key)s => %(val)s </li>" % {
            'key': t[0],
            'val': str(t[1])
        } for t in word_freq_tup_list)

        context[
            'page_title'] = 'Search Term : %s - Frequency Based Word Cloud - Top 100 words from search \
            results (length(word) >= 3)' % search_term
        context[
            'list_title'] = '300 features extracted and their corresponding frequencies'
        html_text = html_template.substitute(context)
        return html_text
Exemplo n.º 15
0
def create_cloud(oname, words,maxsize=120, fontname='Lobster'):
    '''Creates a word cloud (when pytagcloud is installed)

    Parameters
    ----------
    oname : output filename
    words : list of (value,str)
    maxsize : int, optional
        Size of maximum word. The best setting for this parameter will often
        require some manual tuning for each input.
    fontname : str, optional
        Font to use.
    '''
    try:
        from pytagcloud import create_tag_image, make_tags
    except ImportError:
        if not warned_of_error:
            print("Could not import pytagcloud. Skipping cloud generation")
        return

    # gensim는 각 단어에 대해 0과 1사이의 가중치를 반환하지만 
    # pytagcloud는 단어 수를 받는다. 그래서 큰 수를 곱한다
    # gensim는 (value, word)를 반환하고 pytagcloud는 (word, value)으로 입력해야 한다
    words = [(w,int(v*10000)) for v,w in words]
    tags = make_tags(words, maxsize=maxsize)
    create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
Exemplo n.º 16
0
	def __generate_tag_cloud(self):
		import jieba.analyse
		jieba.add_word('氪金')
		jieba.add_word('逼氪')
		jieba.add_word('骗氪')
		jieba.add_word('王者荣耀')
		jieba.del_word('...')
		jieba.del_word('只能')
		jieba.del_word('可能')
		jieba.del_word('觉得')
		jieba.del_word('而且')
		jieba.del_word('然后')
		jieba.del_word('还有')
		jieba.del_word('游戏')

		comments_file = open(self.__comment_file_name, 'r')
		tags = jieba.analyse.extract_tags(comments_file.read(), topK=100, withWeight=True)
		comments_file.close()
		dd = []
		for i in tags:
			dd.append((i[0], int(float(i[1] * 1000))))
			print 'i is ', i[0], i[1]
		tags = make_tags(dd, minsize=10, maxsize=80, colors=COLOR_SCHEMES['audacity'])
		create_tag_image(
			tags,
			self.__tag_image_file_name,
			size=(600, 600),
			layout=LAYOUT_HORIZONTAL,
			fontname='SimHei'  #  !!! 注意字体需要自己设置了才会有效, 见ReadMe
		)
		print self.__tag_image_file_name
Exemplo n.º 17
0
    def create_wordcloud_file(self, tags, output_file):
        # Get configuration parameters
        conf_num_tags = self.configuration['num_tags']
        conf_min_tag_size = self.configuration['min_tag_size']
        conf_max_tag_size = self.configuration['max_tag_size']
        conf_image_size = self.configuration['image_size']
        conf_font = self.configuration['font']
        conf_background = self.configuration['background']

        logging.info("Creating wordcloud image file: %s" % output_file)
        # Limit the tags to be displayed to those appearing more frequently
        tags = tags[:conf_num_tags]
        # Create the image
        tags = wc.make_tags(tags,
                            minsize=conf_min_tag_size,
                            maxsize=conf_max_tag_size)
        # Save image to file
        wc.create_tag_image(tags,
                            output_file,
                            size=conf_image_size,
                            fontname=conf_font,
                            layout=wc.LAYOUT_HORIZONTAL,
                            background=conf_background)
        logging.info("Created wordcloud image file: %s" % output_file)
        print("Created wordcloud image file: %s" % output_file)
Exemplo n.º 18
0
 def test_create_html_data(self):
     """
     HTML code sample
     """
     tags = make_tags(get_tag_counts(self.hound.read())[:100], maxsize=120, colors=COLOR_SCHEMES['audacity'])
     data = create_html_data(tags, (440,600), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular')
     
     template_file = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'web/template.html'), 'r')    
     html_template = Template(template_file.read())
     
     context = {}
     
     tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\
     left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>'
     
     context['tags'] = ''.join([tags_template % link for link in data['links']])
     context['width'] = data['size'][0]
     context['height'] = data['size'][1]
     context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\
     a.%(cname)s:hover{color:%(hover)s;}" % 
                               {'cname':k,
                                'normal': v[0],
                                'hover': v[1]} 
                              for k,v in data['css'].items())
     
     html_text = html_template.substitute(context)
     
     html_file = open(os.path.join(self.test_output, 'cloud.html'), 'w')
     html_file.write(html_text)
     html_file.close()       
Exemplo n.º 19
0
def make_pytagcloud_image(data, imagefilename, most_top_number=50):
    """
    data 은 리스트 형태로 데이터가 들어와야 함
    """
    imagefile_size = (1000, 1000)
    max_word_size = 200
    background_color = (255, 255, 255)

    c = collections.Counter(data)
    d = c.most_common(most_top_number)
    tags = make_tags(d, maxsize=max_word_size)

    (i, j) = os.path.splitext(imagefilename)
    vertical_image = i + "_vertical" + j
    horizontal_image = i + "_horizontal" + j

    create_tag_image(tags,
                     imagefilename,
                     size=imagefile_size,
                     background=background_color,
                     fontname='NotoSansCJKkr-Bold')
    create_tag_image(tags,
                     vertical_image,
                     size=imagefile_size,
                     background=background_color,
                     fontname='NotoSansCJKkr-Bold',
                     layout=LAYOUT_VERTICAL)
    create_tag_image(tags,
                     horizontal_image,
                     size=imagefile_size,
                     background=background_color,
                     fontname='NotoSansCJKkr-Bold',
                     layout=LAYOUT_HORIZONTAL)
Exemplo n.º 20
0
def create_wordcloud(topic_id):
    word_tuples = LDA_MODEL.show_topic(topic_id, 20)
    # array of words with their frequencies
    words_arr = []
    freq_arr = []
    for word_tuple in word_tuples:
        try:
            word = str(word_tuple[0])
            words_arr.append(word)
            freq_arr.append(word_tuple[1])

        except:
            continue
    print words_arr
    normalize(freq_arr)
    print freq_arr
    # code for generating word cloud
    word_count = len(words_arr)
    text = ""
    counts = []
    for i in range(word_count):
        counts.append((words_arr[i], int(freq_arr[i]*100)))
    for i in range(0, word_count):
        for j in range(0, (int)(freq_arr[i] * 100)):
            text = text + words_arr[i] + " "

    tags = make_tags(counts, minsize=20, maxsize=60, colors=COLOR_SCHEMES['audacity'])

    output = join(WORDCLOUD_PATH, 'cloud' + str(topic_id) + '.png')

    create_tag_image(tags=tags, output=output,
                     size=(500, 333),
                     background=(255, 255, 255, 255),
                     layout=3, fontname='PT Sans Regular', rectangular=True)
Exemplo n.º 21
0
def generate_word_cloud(counts, title):
	# Sort the keywords
	sorted_wordscount = sorted(counts.iteritems(), key=operator.itemgetter(1), reverse=True)[:20]
	
	# Generate the word cloud image
	create_tag_image(make_tags(sorted_wordscount, minsize=50, maxsize=150), title + '.png', size=(1300,1150), 
		background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Molengo', rectangular=True)
Exemplo n.º 22
0
def create_cloud(out_name, words, maxsize=120, fontname='Lobster'):
    """
    Create a word cloud when pytagcloud is installed
    :param out_name: output filename
    :param words: list of (value,str), a gensim returns (value, word)
    :param maxsize: int, optional
        Size of maximum word. The best setting for this parameter will often
        require some manual tuning for each input.
    :param fontname: str, optional, Font to use.
    :return:
    """

    try:
        from pytagcloud import create_tag_image, make_tags
    except ImportError:
        if not warned_error:
            print("Could not import pytagcloud. Skipping cloud generation!")
        return

    # gensim returns a weight between 0 and 1 for each word, while pytagcloud
    # expects an integer word count. So, we multiply by a large number and
    # round. For a visualization this is an adequate approximation.
    # We also need to flip the order as gensim returns (value, word), whilst
    # pytagcloud expects (word, value):

    words = [(w, int(v*10000)) for v, w in words]
    tags = make_tags(words, maxsize=maxsize)
    create_tag_image(tags, out_name, size=(1800, 1200), fontname=fontname)
def interactive_wordcloud(all_texts):
    '''

    :param all_texts:
    :return:
    '''
    flat_text = []
    for text in all_texts:
        for word in text:
            flat_text.append(word)
    print(flat_text)

    counts = Counter(flat_text).items()
    print(counts)

    sorted_wordscount = sorted(counts, key=lambda tup: tup[
        1])[:200]  # sort and select the top 200 words counts
    print(sorted_wordscount)
    # Running get_tag_counts result in error UnicodeDecodeError: 'charmap' codec can't decode byte 0xaa in position 90: character maps to <undefined>
    # This is because in file stopwords.py, that is called by counter.py (contains code for get_tag_counts), the stopwords are not read in utf-8
    tags = make_tags(sorted_wordscount, maxsize=100)
    print('tags', tags)
    data = create_html_data(tags,
                            size=(1600, 800),
                            layout=LAYOUT_MIX,
                            fontname='Philosopher',
                            rectangular=True)
    print('data', data)

    # ======================================================================================================================
    # Write wordcloud on HTML file
    # ======================================================================================================================

    template_file = open(
        os.path.join(os.path.dirname(os.path.abspath(__file__)),
                     'out/template.html'), 'r')
    html_template = Template(template_file.read())

    context = {}

    tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\
            left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>'

    context['tags'] = ''.join([tags_template % link for link in data['links']])
    context['width'] = data['size'][0]
    context['height'] = data['size'][1]
    context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\
            a.%(cname)s:hover{color:%(hover)s;}" % {
        'cname': k,
        'normal': v[0],
        'hover': v[1]
    } for k, v in data['css'].items())

    html_text = html_template.substitute(context)

    test_output = os.path.join(os.getcwd(), 'out')
    html_file = open(os.path.join(test_output, 'cloud.html'), 'w')
    html_file.write(html_text)
    html_file.close()
    '''
Exemplo n.º 24
0
def create_cloud(oname, words, maxsize=120, fontname='Lobster'):
    '''Creates a word cloud (when pytagcloud is installed)

    Parameters
    ----------
    oname : output filename
    words : list of (value,str)
    maxsize : int, optional
        Size of maximum word. The best setting for this parameter will often
        require some manual tuning for each input.
    fontname : str, optional
        Font to use.
    '''
    try:
        from pytagcloud import create_tag_image, make_tags
    except ImportError:
        if not warned_of_error:
            print("Could not import pytagcloud. Skipping cloud generation")
        return

    # gensim는 각 단어에 대해 0과 1사이의 가중치를 반환하지만
    # pytagcloud는 단어 수를 받는다. 그래서 큰 수를 곱한다
    # gensim는 (value, word)를 반환하고 pytagcloud는 (word, value)으로 입력해야 한다
    words = [(w, int(v * 10000)) for v, w in words]
    tags = make_tags(words, maxsize=maxsize)
    create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
def wordcloud(query, layout, font, max_words, verbosity=False):
    my_oauth, complete_url, stop_words = twitter(query)
    punctuation = "#@!\"$%&'()*+,-./:;<=>?[\]^_`{|}~\'"  # characters exluded from tweets
    my_text = ''
    r = requests.get(complete_url, auth=my_oauth)
    tweets = r.json()
    if verbosity == True:
        print tweets
    for tweet in tweets['statuses']:
        text = tweet['text'].lower()
        text = ''.join(ch for ch in text if ch not in punctuation)  # exclude punctuation from tweets
        my_text += text

    words = my_text.split()
    counts = Counter(words)
    for word in stop_words:
        del counts[word]

    for key in counts.keys():
        if len(key) < 3 or key.startswith('http'):
            del counts[key]

    final = counts.most_common(max_words)
    max_count = max(final, key=operator.itemgetter(1))[1]
    final = [(name, count / float(max_count))for name, count in final]
    tags = make_tags(final, maxsize=max_word_size)
    create_tag_image(tags, query + '.png', size=(width, height), layout=layout, fontname=font, background=background_color)
    print "new png created"
def create_cloud(oname, words,maxsize=120, fontname='Lobster'):
    '''Creates a word cloud (when pytagcloud is installed)

    Parameters
    ----------
    oname : output filename
    words : list of (value,str)
    maxsize : int, optional
        Size of maximum word. The best setting for this parameter will often
        require some manual tuning for each input.
    fontname : str, optional
        Font to use.
    '''
    try:
        from pytagcloud import create_tag_image, make_tags
    except ImportError:
        if not warned_of_error:
            print("Could not import pytagcloud. Skipping cloud generation")
        return

    # gensim returns a weight between 0 and 1 for each word, while pytagcloud
    # expects an integer word count. So, we multiply by a large number and
    # round. For a visualization this is an adequate approximation.
    words = [(w,int(v*10000)) for w,v in words]
    tags = make_tags(words, maxsize=maxsize)
    create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
Exemplo n.º 27
0
def make_html_data(tag_count,
                   file_name,
                   font_max_size=120,
                   html_size=(900, 600)):
    tag_list = make_tags(tag_count, maxsize=font_max_size)
    data = create_html_data(tag_list,
                            size=html_size,
                            layout=LAYOUT_HORIZONTAL,
                            fontname='Korean')
    template_file = open(
        os.path.join(os.path.dirname(os.path.abspath(__file__)),
                     'web/template.html'), 'r')
    html_template = Template(template_file.read())

    context = {}

    tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s"\
            style="top: %(top)dpx;left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)\
            dpx;">%(tag)s</a></li>'

    context['tags'] = ''.join([tags_template % link for link in data['links']])
    context['width'] = data['size'][0]
    context['height'] = data['size'][1]
    context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\
            a.%(cname)s:hover{color:%(hover)s;}" % {
        'cname': k,
        'normal': v[0],
        'hover': v[1]
    } for k, v in data['css'].items())

    html_text = html_template.substitute(context)

    html_file = open(os.path.join('../dist', file_name), 'w')
    html_file.write(html_text)
    html_file.close()
Exemplo n.º 28
0
def words_check(request):

    # 필요한 라이브러리 및 변수 초기화
    data = request.POST.get('data')
    komoran = Komoran()
    words = Counter(komoran.nouns(data))
    # 1글자 단어 걸러내기
    nouns = dict()
    for data in words.keys():
        if len(data) != 1:
            nouns[data] = words.get(data)
    nouns = sorted(nouns.items(), key=lambda x: x[1], reverse=True)
    hashing = random.choice(range(100))
    context = {
        'nouns': nouns,
        'hashing': hashing,
    }
    # 워드클라우드
    taglist = pytagcloud.make_tags(nouns, minsize=10, maxsize=60)
    link = 'static/wordcloud/wordcloud' + str(hashing) + '.jpg'
    #link = 'static/wordcloud/wordcloud.jpg'
    pytagcloud.create_tag_image(taglist,
                                link,
                                size=(600, 600),
                                layout=3,
                                fontname='CookieRun',
                                rectangular=True)

    return HttpResponse(json.dumps(context), content_type='application/json')
Exemplo n.º 29
0
def show_token_df():
    dic = _build_vocabulary(dictionary_path='../data/vocabulary_all.dict')
    id2token = {tokenid: token for (tokenid, token) in dic.items()}
    id2df = dic.dfs
    token2df = {id2token[tokenid]: df for (tokenid, df) in id2df.items()}
    df = pd.DataFrame()
    df['token'] = token2df.keys()
    df['df'] = token2df.values()

    print(df['df'].describe())
    '''
    count    125156.000000
    mean         63.621824
    std         858.189270
    min           1.000000
    25%           1.000000
    50%           2.000000
    75%           7.000000
    max       39912.000000

    '''

    print({token: df for (token, df) in token2df.items() if df > 30000} )
    '''
    {'起诉书': 38442, '公诉': 39386, '现已': 39136, '参加': 38840, '检察员': 37974, '检': 37350, '机关': 39859, '元': 31317, '指控': 39265, '终结': 39468, '月': 39911, '证据': 37175, '年': 39912, '上述事实': 33553, '犯': 39459, '人民检察院': 39234, '号': 39814, '审理': 39629, '开庭审理': 35738, '到庭': 38301, '供述': 30093, '证实': 32083, '被告人': 39864, '提起公诉': 38118, '依法': 39123, '指派': 33070, '本案': 36616, '出庭': 34811, '支持': 35414, '公开': 38635, '中': 31875, '本院': 39852, '刑诉': 38329, '日': 39902, '诉讼': 38437} len 35
    '''
    print(df[(df['df'] > 3) & (df['df'] < 30000)].describe())

    filter_words = {token:df for  (token,df) in token2df.items() if df>5000 }
    print(filter_words,'len %s' % len(filter_words) )
    swd = sorted(filter_words.items(), key=itemgetter(1), reverse=True)
    tags = make_tags(swd, minsize=10, maxsize=50, colors=COLOR_SCHEMES['goldfish'])
    create_tag_image(tags, 'keyword_tag_cloud4.png',size=(2400, 1000), background=(240, 255, 255),
                     layout=LAYOUT_HORIZONTAL, fontname="SimHei")
Exemplo n.º 30
0
 def draw_wordcloud(self, tag, name):
     taglist = pytagcloud.make_tags(tag, maxsize=80)
     pytagcloud.create_tag_image(taglist,
                                 '%s.jpg' % name,
                                 size=(900, 600),
                                 fontname='Korean',
                                 rectangular=False)
Exemplo n.º 31
0
def create_cloud(oname, words, maxsize=120, fontname='Lobster'):
    '''Creates a word cloud (when pytagcloud is installed)

    Parameters
    ----------
    oname : output filename
    words : list of (value,str)
    maxsize : int, optional
        Size of maximum word. The best setting for this parameter will often
        require some manual tuning for each input.
    fontname : str, optional
        Font to use.
    '''
    try:
        from pytagcloud import create_tag_image, make_tags
    except ImportError:
        if not warned_of_error:
            print("Could not import pytagcloud. Skipping cloud generation")
        return

    # gensim returns a weight between 0 and 1 for each word, while pytagcloud
    # expects an integer word count. So, we multiply by a large number and
    # round. For a visualization this is an adequate approximation.
    words = [(w, int(v * 10000)) for w, v in words]
    tags = make_tags(words, maxsize=maxsize)
    create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
Exemplo n.º 32
0
    def make_cloud(self, output_html):
        keywords = KeywordManager().all()
        text = ' '.join([kw.keyword for kw in keywords])

        if output_html:
            max_tags = 30
            max_size = 42
        else:
            max_tags = 100
            max_size = self.maxsize

        tags = make_tags(get_tag_counts(text)[:max_tags], minsize=self.minsize,
                         maxsize=max_size)

        if output_html:
            size = (900, 300)
            result = create_html_data(tags, size=size,
                                      layout=LAYOUT_HORIZONTAL)
        else:
            #now = datetime.utcnow()
            #filename = 'jcuwords/static/clouds/keyword-cloud-%s.png' % now.isoformat()
            cloud = self.resolver.resolve('jcuwords:keyword-cloud.png')
            filename = cloud.abspath()
            size = (1024, 500)
            create_tag_image(tags, filename, size=size,
                             fontname='IM Fell DW Pica',
                             layout=LAYOUT_MIX)
            image_url = self.request.resource_url(None, 'keyword-cloud.png')
            result = {'image': image_url}

        return result
Exemplo n.º 33
0
def search(query_word):
	result = []
	es = Elasticsearch()
	query1 = {"query": {"wildcard": {"name": {"value": "*" + query_word + "*" } } } }
	res = es.search(index="urban", body=query1)

	if res['hits']['total'] == 0:
		res = es.search(index="champ", body=query1)

	if res['hits']['total'] == 0:
		return 0

	ret = res['hits']['hits']

	temp = defaultdict(int)
	for item in ret:
		ids = item['_source']['business_id']
		query2 = {"query":  {"match": {"business_id": ids } } }
		res = es.search(index="my_data", body=query2)

		for item in res['hits']['hits'][0]['_source']['word_freq']:
			temp[item[0]] += item[1]

	words = []
	for item in temp:
		words.append((item,temp[item]))

	tags = make_tags(words, maxsize=80)

	create_tag_image(tags, 'static/cloud_large.jpg', size=(900, 600), fontname='Lobster')
Exemplo n.º 34
0
def create_file(res_id):
    all_reviews = ''
    api_key = 'db837d5e88fefd82d146b8e2e4e45c35'
    headers = {
        'Content-Type': 'application/json',
        'Access-Control-Allow-Origin': '*',
        'user-key': api_key
    }
    url = "https://developers.zomato.com/api/v2.1/reviews?res_id=%s" % (res_id)
    try:
        response = requests.get(url, headers=headers)
    except:
        print 'Network Issues!'
        return
    if response.status_code == 200:
        data = response.json()
        count = data["reviews_count"]
        if count == 0:
            print 'No Reviews!'
        else:
            for review in data["user_reviews"]:
                review = review["review"]
                all_reviews = all_reviews + review["review_text"] + ' '

        all_reviews = convert(all_reviews)
        tags = make_tags(get_tag_counts(all_reviews),
                         maxsize=50,
                         colors=COLOR_SCHEMES['goldfish'])
        create_tag_image(tags,
                         'static/img/' + res_id + '.png',
                         size=(900, 600),
                         fontname='Lobster')
    else:
        print 'Api Issues'
Exemplo n.º 35
0
    def _create_image(self, text):
        tag_counts = get_tag_counts(text)
        if tag_counts is None:
            sys.exit(-1)

        if self._repeat_tags:
            expanded_tag_counts = []
            for tag in tag_counts:
                expanded_tag_counts.append((tag[0], 5))
            for tag in tag_counts:
                expanded_tag_counts.append((tag[0], 2))
            for tag in tag_counts:
                expanded_tag_counts.append((tag[0], 1))
            tag_counts = expanded_tag_counts

        tags = make_tags(tag_counts, maxsize=150, colors=self._color_scheme)
        path = os.path.join('/tmp/cloud_large.png')

        if Gdk.Screen.height() < Gdk.Screen.width():
            height = Gdk.Screen.height()
            width = int(height * 4 / 3)
        else:
            width = Gdk.Screen.width()
            height = int(width * 3 / 4)

        if self._font_name is not None:
            create_tag_image(tags, path, layout=self._layout,
                             size=(width, height),
                             fontname=self._font_name)
        else:
            create_tag_image(tags, path, layout=self._layout,
                             size=(width, height))
        return 0
    def tagCloud(self):
        texts =""
        for item in self.docSet:
            texts = texts +" " +item

        tags = make_tags(get_tag_counts(texts), maxsize=120)
        create_tag_image(tags,'filename.png', size=(2000,1000), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Lobster', rectangular=True)
Exemplo n.º 37
0
def saveWordCloud(wordinfo, filename):
    taglist = pytagcloud.make_tags(dict(wordinfo).items(), maxsize=80)
    pytagcloud.create_tag_image(taglist,
                                filename,
                                size=(640, 480),
                                fontname="korean")

    webbrowser.open(filename)
Exemplo n.º 38
0
def draw_pytagcloud(data_array, image_filename):
    words_count = Counter(data_array)
    counts = words_count.most_common(50)
    tags = make_tags(counts, maxsize=50)
    create_tag_image(tags,
                     image_filename,
                     size=(900, 600),
                     fontname='Nanum Gothic')
Exemplo n.º 39
0
def make_image(tag_count, file_name, font_max_size=120, size=(900, 600)):
    tag_list = make_tags(tag_count, maxsize=font_max_size)
    create_tag_image(tag_list,
                     file_name,
                     size=size,
                     fontname='Korean',
                     rectangular=False)
    print("-" * 6 + "make wordcloud Image" + "-" * 6)
Exemplo n.º 40
0
def create_word_claod(words, output_file_name, maxsize, fontname='Lobster'):
    words = [(w, int(v * 10000)) for w, v in words]
    try:
        from pytagcloud import create_tag_image, make_tags
    except ImportError:
        print("Could not import pytagcloud. Skipping cloud generation")
    tags = make_tags(words, maxsize=maxsize)
    create_tag_image(tags, output_file_name, size=(1800, 1200), fontname=fontname)
Exemplo n.º 41
0
def saveWordCloud( wordInfo ):
    taglist = pytagcloud.make_tags(dict(wordInfo).items(), maxsize=80)
    print( type(taglist) ) # <class 'list'>
    filename = 'wordcloud.png'
    
    pytagcloud.create_tag_image(taglist, filename, \
               size=(640, 480), fontname='korean', rectangular=False)
    webbrowser.open( filename )
Exemplo n.º 42
0
def saveWordCloud(wordInfo, filename):
    taglist = pytagcloud.make_tags(dict(wordInfo).items(), maxsize=100)
    pytagcloud.create_tag_image(taglist,
                                filename,
                                size=(700, 480),
                                fontname='korean',
                                rectangular=False)
    webbrowser.open(filename)
Exemplo n.º 43
0
 def _test_large_tag_image(self):
     start = time.time()
     tags = make_tags(get_tag_counts(self.hound.read())[:80], maxsize=120, 
                      colors=COLOR_SCHEMES['audacity'])
     create_tag_image(tags, os.path.join(self.test_output, 'cloud_large.png'), 
                      ratio=0.75, background=(0, 0, 0, 255), 
                      layout=LAYOUT_HORIZONTAL, fontname='Lobster')
     print "Duration: %d sec" % (time.time() - start)
Exemplo n.º 44
0
def make_tag_cloud(): 
    for line in sys.stdin:
        try: 
            text += ' ' + line.strip().lower()
        except:
            pass
    
    tags = make_tags(get_tag_counts(text), maxsize=150)
    create_tag_image(tags, sys.argv[1] + '.png', size=(1024, 768))
Exemplo n.º 45
0
def make_tag_cloud():
    for line in sys.stdin:
        try:
            text += ' ' + line.strip().lower()
        except:
            pass

    tags = make_tags(get_tag_counts(text), maxsize=150)
    create_tag_image(tags, sys.argv[1] + '.png', size=(1024, 768))
Exemplo n.º 46
0
Arquivo: ftes.py Projeto: kqdtran/FTES
def create_cloud(counter, filename):
    '''
    Creates a word cloud from a counter
    '''
    tags = make_tags(get_tag_counts(counter)[:80], maxsize=120, 
                     colors=COLOR_SCHEMES['goldfish'])
    create_tag_image(tags, './img/' + filename + '.png', 
                     size=(900, 600), background=(0, 0, 0, 255), 
                     layout=LAYOUT_HORIZONTAL, fontname='Lobster')
Exemplo n.º 47
0
def make_wordcloud(text, image_name, width, height):
    list_of_tuple = return_list_of_tuples(text)
    tuple_countnoun = tuple(list_of_tuple)
    taglist = pytagcloud.make_tags(tuple_countnoun, maxsize=80, minsize=10)
    pytagcloud.create_tag_image(taglist,
                                image_name,
                                size=(width, height),
                                fontname='Nanum Gothic',
                                rectangular=False)
 def build_pytag_cloud(self):
   width = 900
   height = 575
   fileName = '{0}/{1}.{2}.{3}.{4}.png'.format(self.img_directory, self.state, self.city, width, height)
   items = sorted(self.tagcloud.iteritems(), key=itemgetter(1), reverse=True)
   tags = make_tags(items[:self.wordcount], maxsize=80)
   create_tag_image(tags, fileName, size=(width, height), fontname='Droid Sans')
   import webbrowser
   webbrowser.open(fileName) # see results
Exemplo n.º 49
0
	def calAccuracy(self):	
		self.cursor1.execute("select id, name from phone_id;")
		result = self.cursor1.fetchall()
		for data in result:
			print data[0],data[1]
		self.phone_id=raw_input("Enter phone id\n");
		self.name = raw_input("Enter name:")
		import os
		os.mkdir('/home/darshan-ubuntu/Project/Products/Features/'+self.name)

		self.getReview(1)
		tags = make_tags(get_tag_counts(self.actual_review), maxsize=120)
		create_tag_image(tags, self.name+'/positive.png', size=(900, 600))
		self.actual_review=""
		
		self.getReview(0)
		tags = make_tags(get_tag_counts(self.actual_review), maxsize=60)
		create_tag_image(tags, self.name+'/negative.png', size=(900, 600))
Exemplo n.º 50
0
 def _test_make_tags(self):
     mtags = make_tags(get_tag_counts(self.hound.read())[:60])
     found = False
     for tag in mtags:
         if tag['tag'] == 'sir' and tag['size'] == 40:
             found = True
             break
         
     self.assertTrue(found)
Exemplo n.º 51
0
    def test_make_tags(self):
        mtags = make_tags(get_tag_counts(self.hound.read())[:60])
        found = False
        for tag in mtags:
            if tag['tag'] == 'sir' and tag['size'] == 40:
                found = True
                break

        self.assertTrue(found)
Exemplo n.º 52
0
def semantic_cloud(topic):
    topic_list = TopicList(topic)
    tlist = topic_list.GetTopicList()
    htagsl = HashtagsList(tlist['statuses'], topic)
    hl = htagsl.GetHashtagsList()
    cadena = " ".join(hl)
    print cadena
    tags = make_tags(get_tag_counts(cadena), maxsize=120)
    create_tag_image(tags, 'semantic_cloud.png', size=(900, 600), fontname='Lobster')
Exemplo n.º 53
0
 def test_layouts(self):
     start = time.time()
     tags = make_tags(get_tag_counts(self.hound.read())[:80], maxsize=120)
     for layout in LAYOUTS:
         create_tag_image(tags, os.path.join(self.test_output, 'cloud_%s.png' % layout),
                          size=(900, 600),
                          background=(255, 255, 255, 255),
                          layout=layout, fontname='Lobster')
     print "Duration: %d sec" % (time.time() - start)
Exemplo n.º 54
0
    def makeCloud(self, tagsCounts, name="tag_cloud.png", height=500,\
        width=500, font="Droid Sans"):

        # Get rid of unigrams contained in bigrams
        tagsCounts = self.parseWords(tagsCounts)

        tags = pytagcloud.make_tags(tagsCounts, colors=self._colors)

        pytagcloud.create_tag_image(tags, name, size=(width, height),\
            fontname=font, rectangular=True)
Exemplo n.º 55
0
def make_cloud(text,fname):
    '''create the wordcloud from variable text'''
    Data1 = text.lower().replace('http','').replace('rt ','').replace('.co','')
    Data = Data1.split()
    two_words = [' '.join(ws) for ws in zip(Data, Data[1:])]
    wordscount = {w:f for w, f in collections.Counter(two_words).most_common() if f > 200}
    sorted_wordscount = sorted(wordscount.iteritems(), key=operator.itemgetter(1),reverse=True)

    tags = make_tags(get_tag_counts(Data1)[:50],maxsize=350,minsize=100)
    create_tag_image(tags,fname+'.png', size=(3000,3250), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Lobster', rectangular=True)
Exemplo n.º 56
0
 def __init__(self, raw_text, except_words):        
             
     # Remove words shorter than 2 chars and words in except list
     filtered = " ".join([x for x in raw_text.split() 
                          if len(x) > 2 and x not in except_words])
      
     # Get word counts for each word in filtered text                     
     tag_counts = get_tag_counts(filtered)
     
     self.filtered = filtered
     self.tags = ptc.make_tags(tag_counts, maxsize=60, minsize=6)
Exemplo n.º 57
0
 def createTagCloud(self,rapper):
     #creates a tag cloud for the given artist.
     #For some reason these imports only work when placed in the function
     #but they do not if they are placed at the top of the document
     from pytagcloud import create_tag_image, make_tags
     from pytagcloud.lang.counter import get_tag_counts
     teststr = rapper.rawLyrics
     tags = make_tags(get_tag_counts(teststr), maxsize=100)
     tags = [a for a in tags if a['size'] > 20]
     create_tag_image(tags, 'cloud_large.png', size=(800, 400), 
         background=(239,101,85,255), fontname='PT Sans Regular')
Exemplo n.º 58
0
def cr_tagcloud(words,
                fn,
                minsize=17,
                maxsize=50,
                size=(680, 500),
                fontname='Nobile'):

   tags = make_tags([(i[0], i[2]) for i in words],
                    minsize=minsize, maxsize=maxsize)

   create_tag_image(tags, fn, size=size, fontname=fontname)
Exemplo n.º 59
0
def init():
    global tags
    global test_output
    
    home_folder = os.getenv('USERPROFILE') or os.getenv('HOME')
    test_output = os.path.join(home_folder, 'pytagclouds')
    
    if not os.path.exists(test_output):
        os.mkdir(test_output )         
    
    hound = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../test/pg2852.txt'), 'r')
    tags = make_tags(get_tag_counts(hound.read())[:50], maxsize=120, colors=COLOR_SCHEMES['audacity'])