def tag_cloud(dist, id_ = "", class_ = "", width=None, height=None, max_size=None, min_size=None, max_words = None, start_color=None, end_color=None, color_steps=None, sort_order="random"): ''' returns a dict with style and body elements. style contains defalt styling for the tag cloud, while body contains the html markup. ''' # sort() returns a list of (word, count) tuples in order of decreasing frequency dist = sort(dist) # truncate the list of items if max_words was specified if max_words: max_words = int(max_words) dist = dist[:max_words] # get the number of words remaining in dist (ie the number of tags in the # tagcloud) to pass back in the metadata words_in_cloud = len(dist) # get the equation of the line #max_size = 70; #min_size = 10; if max_size and min_size: font_size_fn = min_max_extrapolate(dist, max_size, min_size) else: if not (width and height): width = 600 height = 800 font = 'times new roman' font_size_fn = fit_to_area(width, height, dist, font) # get the equation of the line between min_size and max_size. do this AFTER # truncating to max_words and BEFORE shuffling the order around. # determine the sort order. if the sort order is frequency, there's nothing # to do since the distribution object is already sorted by frequency. if sort_order not in ['random', 'frequency', 'alphabetical']: print 'invalid sort order; using default = random' sort_order = 'random' if sort_order == 'random': # shuffles in place random.shuffle(dist) if sort_order == 'alphabetical': # not yet implemented pass # assemble the class and id tags for the tag cloud's wrapping div divstyle = '''class="tagcloud"''' if class_ != "": divstyle = divstyle[:-1] + " " + class_ + ''' "''' # the user can specify a unique id for the tag cloud if they want # additional styling applied from their own style sheets. if id_ != "": divstyle += ''' id="%s" ''' % id_ divstyle = divstyle.strip() body = '''<div %s>''' % divstyle for word, freq in dist: # each word has a class of 'word' in addition to its frequency so that # the user may specify additional styling. **note**: make sure the # space after the span is maintained; otherwise the spans within the # div won't wrap. body += '''<span title="%d" class="word %s">%s</span> ''' % (freq, num_to_word(freq), word) body += '''</div>''' #print body if start_color and end_color and color_steps: num_colors = int(color_steps) colors = color_scheme(start_color, end_color, color_steps) else: num_colors = 5 colors = color_scheme() # get the distinct frequencies and specify a font-size and color for each, # that corresponds to its size freqs = [] for f in [x[1] for x in dist]: if f not in freqs: freqs.append(f) style = '''<style type="text/css"> .tagcloud {width: %s; height: %s; text-align: center; } .word { text-align: center; vertical-align: middle; } ''' % (width, height) for f in freqs: freq_as_word = num_to_word(f) color_index = f % num_colors color = colors[color_index] style += (''' .%s {padding-left: 15px; padding-right: 15px; font-size: %s; color: %s }''' % (freq_as_word, font_size_fn(f), color)) style += ''' </style>''' # assemble the response oid = pymongo.objectid.ObjectId() uid = str(oid) long_url = settings.HOME_PAGE + '/cloud/' + uid short_url = bitly_shorten(long_url) metadata = { 'utc_created': datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S"), 'total_tags' : words_in_cloud, 'short_url' : short_url, } record = {'_id': oid, 'body': body, 'style': style, 'metadata': metadata} print style # save to the database con = pymongo.Connection() collection = con.wordapi.tagclouds collection.insert(record) return record
def word_cloud(dist, css_id = "", css_class = "", layout = "svg", width=800, height=600, max_words = None, start_color=None, end_color=None, color_steps=None, sort_order="random", equn="linear", slope=0.15, link_prefix=None): ''' returns a dict with style and body elements. style contains defalt styling for the tag cloud, while body contains the html markup. ''' # sort() returns a list of (word, count) tuples in order of decreasing frequency dist = sort(dist) # truncate the list of items if max_words was specified if max_words: max_words = int(max_words) dist = dist[:max_words] # get the number of words remaining in dist (ie the number of tags in the # tagcloud) to pass back in the metadata words_in_cloud = len(dist) # the equation of the line used here determines the *relative* sizes of # different frequency words. actual font sizes are computed with javascript # to dynamically fit the words into the specified area. # get the equation of the line AFTER truncating to max_words and BEFORE # shuffling the order around. if equn == "log": # shifting the logarithm up by 1 gives a word of frequency one size one. font_size_fn = lambda freq: 10*math.log(freq,2) + 1 elif equn == "exp": font_size_fn = lambda freq: math.pow(freq, 1.2) else: # linear min_size = 1 font_size_fn = extrapolate_linear(dist, min_size, slope) # determine the sort order. if the sort order is frequency, there's nothing # to do since the distribution object is already sorted by frequency. if sort_order == 'random': # shuffles in place random.shuffle(dist) if sort_order == 'alphabetical': dist.sort() # assemble the class and id tags for the tag cloud's wrapping div divstyle = '''id="tagcloud"''' if css_class != "": divstyle = divstyle[:-1] + " " + class_ + ''' "''' # the user can specify a unique id for the tag cloud if they want # additional styling applied from their own style sheets. if css_id != "": divstyle += ''' id="%s" ''' % id_ divstyle = divstyle.strip() if layout == "svg": body = svg_based_layout(divstyle, dist) else: body = div_based_layout(divstyle, dist) if start_color and end_color and color_steps: num_colors = int(color_steps) colors = color_scheme(start_color, end_color, color_steps) else: num_colors = 5 colors = color_scheme() # get the distinct frequencies and specify a font-size and color for each, # that corresponds to its size freqs = [] for f in [x[1] for x in dist]: if f not in freqs: freqs.append(f) style = '''<style type="text/css"> // important clearfix for divs which wrap floating elements // see http://nicolasgallagher.com/micro-clearfix-hack/ #hidden-resizer { zoom: 1; } #hidden-resizer:before, #hidden-resizer:after { content: ""; display: table; } #hidden-resizer:after { clear: both; } #tagcloud { font-size: 10px; text-align: center; zoom: 1; } #tagcloud:before, #tagcloud:after { content: ""; display: table; } #tagcloud:after { clear: both; } .word { text-align: center; vertical-align: middle; line-height:1; padding-right:5px; float:left; } ''' for f in freqs: freq_as_word = num_to_word(f) color_index = f % num_colors color = colors[color_index] style += (''' .%s {font-size: %sem; color: %s }''' % (freq_as_word, font_size_fn(f), color)) style += ''' </style>''' # assemble the response (if you get an error on this line, probably mongo is # not running) oid = pymongo.objectid.ObjectId() uid = str(oid) long_url = settings.HOME_PAGE + '/cloud/' + uid if settings.DEBUG: short_url = long_url else: short_url = bitly_shorten(long_url) metadata = { 'utc_created': datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S"), 'total_tags' : words_in_cloud, 'short_url' : short_url, } record = { '_id': oid, 'body': body, 'style': style, # hidden div to pass target width and height information onto javascript resizer # TODO not all tagclouds will necessarily have width and height? 'wordcloud_size': ''' <div id="tagcloud_size" style="width:%dpx; height:%dpx; position: absolute; left:-999em;top:-999em;"></div>''' % (width, height), 'script': '''<script type="text/javascript" src="http://code.jquery.com/jquery-1.7.1.min.js"></script> <script type="text/javascript" src="/media/js/wordcloud.js"></script> ''', 'metadata': metadata } print style # save to the database con = pymongo.Connection() collection = con.wordapi.tagclouds collection.insert(record) return record