def doc_categorize (repo, response, params): from uplib.basicPlugins import show_abstract, _is_sensible_browser from uplib.basicPlugins import show_title, STANDARD_BACKGROUND_COLOR, STANDARD_TOOLS_COLOR, STANDARD_LEGEND_COLOR from uplib.basicPlugins import __issue_javascript_head_boilerplate as issue_javascript_head_boilerplate from uplib.basicPlugins import __issue_menu_definition as issue_menu_definition from uplib.basicPlugins import __issue_title_styles as issue_title_styles global _CONFIGURATION if _CONFIGURATION is None: _CONFIGURATION = { "exclusions": [ re.compile(x.strip()) for x in configurator.default_configurator().get("categorize-excluded-categories", "").split(",") if x.strip()]} def figure_size(count, avgsize): if avgsize < 0.0001: return 0.0001 return math.sqrt(math.log((count * (math.e - 1))/avgsize + 1)) doc_id = params.get("doc_id") if not doc_id: response.error(HTTPCodes.BAD_REQUEST, "No doc_id parameter specified.") return doc = repo.valid_doc_id(doc_id) and repo.get_document(doc_id) if not doc: response.error(HTTPCodes.BAD_REQUEST, "Invalid doc_id parameter '%s' specified." % doc_id) return fp = response.open() title = (doc.get_metadata("title") or doc.id).encode("UTF-8", "strict") fp.write("<head><title>Categorizing '%s'</title>\n" % htmlescape(title)) fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n') fp.write('<link REL="SHORTCUT ICON" HREF="/favicon.ico">\n') fp.write('<link REL="ICON" type="image/ico" HREF="/favicon.ico">\n') issue_javascript_head_boilerplate(fp) issue_title_styles(fp) fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR) issue_menu_definition(fp) show_abstract(repo, doc, fp, _is_sensible_browser(response.user_agent), showpagesearch=False) fp.write("<hr />\n") doccats = [x.lower() for x in doc.get_category_strings()] for cat in doccats[:]: if cat.find('/') >= 0: parts = cat.split('/') for i in range(1, len(parts)): doccats.append('/'.join(parts[:i])) tags = find_likely_tags(doc) if tags: # try to remove duplicates stags = min(10, len(tags)) # tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0] # count = 0 # i = 0 # while tagnames and (i < stags): # if tags[i][0] in tagnames: # del tags[i] # stags = min(10, len(tags)) # tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0] # else: # i += 1 fp.write("<center><small><i>Likely categories</i></small><br />") count = 0 topscore = _adjust_score(*tags[0][1][:2]) exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions") for name, (score, ndocs, ascore) in tags: if count > stags: break skip = False for exclusion in exclusions: if exclusion.match(name.lower()): skip = True break if skip: continue if count > 0: fp.write(" · ") #size = max(0.5, (2/topscore) * ascore) size = 1 color = (name.lower() in doccats) and "red" or "black" action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % ( (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name)) fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category (score=%.3f)">%s</a>' % ( size, color, action, (name.lower() in doccats) and "remove" or "add", htmlescape(name), ascore, htmlescape(name))) count += 1 fp.write("</center></p><hr />\n") fp.write('<form action="%s" method=get><center>Add a new category to this document: ' % ('/'.join(response.request_path.split('/')[:3]) + '/doc_add_category')) fp.write('<input type=hidden name="doc_id" value="%s">\n' % doc.id) fp.write('<input type=text name="tag" value="" size=40></form></center>\n') note(4, "doc_categorize: retrieving repository categories... (%s)", time.ctime()) cats = repo.get_categories_with_docs() note(4, "doc_categorize: have categories (%s)", time.ctime()) if cats: fp.write("<hr>\n<center><small><i>All categories</i></small><br />") avgsize = sum([len(x) for x in cats.values()]) / float(len(cats)) catkeys = cats.keys() catkeys.sort(lambda x, y: cmp(x.lower(), y.lower())) first = True exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions") for name in catkeys: skip = False for exclusion in exclusions: if exclusion.match(name.lower()): skip = True break if skip: continue if not first: fp.write(" · ") else: first = False size = max(0.5, figure_size(len(cats[name]), avgsize)) color = (name.lower() in doccats) and "red" or "black" action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % ( (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name)) actionsee = '/action/basic/repo_search?query=%s' % ( urllib.quote_plus('categories:"%s"' % name)) fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category">%s</a>' % ( size, color, action, (name.lower() in doccats) and "remove" or "add", htmlescape(name), htmlescape(name))) fp.write('<a style="font-size: %fem; color: %s; vertical-align: super;" href="%s" ' % ( max(0.4, size/2), STANDARD_LEGEND_COLOR, actionsee) + 'title="see the %s document%s in the \'%s\' category" target="_blank">%d</a>' % ( (len(cats[name]) == 1) and "one" or str(len(cats[name])), (len(cats[name]) != 1) and "s" or "", htmlescape(name), len(cats[name]))) fp.write("</body>\n")
def related (repo, response, params): """ Find other documents related to the query document. :param doc_id: the query document :type doc_id: UpLib doc ID string :param use-authorship: whether or not to use co-authorship as a measure of relatedness. Defaults to "true". :type use-authorship: "true" or "false" :param use-history: whether or not to to use the use history (most recently used list) as a factor in the calculation. Defaults to "true". :type use-history: "true" or "false" :param format: whether to return non-browser format results. Specifying "xml" will cause an XML document to be returned containing the results. Specifying "ziplist" will cause a zip file containing extra information about each document to be returned. If the ``format`` parameter is not specified, an HTML page showing the results broken down by category is returned. :type format: "xml" or "ziplist" or none :result: list of other documents related to the query document. See discussion of the ``format`` parameter. :rtype: varies """ doc_id = params.get("doc_id") if not doc_id: response.error(HTTPCodes.BAD_REQUEST, "No doc_id specified.") return if not repo.valid_doc_id(doc_id): response.error(HTTPCodes.NOT_FOUND, "Invalid doc_id %s specified." % doc_id) return doc = repo.get_document(doc_id) use_authorship = (params.get("use-authorship") or "true") == "true" use_history = (params.get("use-history") or "true") == "true" docs, likethis, authored, recent, others, qstring = find_related(doc, True, use_history=use_history, use_authorship=use_authorship) if response.xml_request or (params.get("format") == "xml"): retval = getDOMImplementation().createDocument(None, "result", None) e = retval.createElement('paradigm') e.setAttribute('id', doc_id) e.setAttribute("title", _safe_title(doc)) e.setAttribute('use-history', use_history and "true" or "false") e.setAttribute('use-authorship', use_authorship and "true" or "false") retval.documentElement.appendChild(e) g = retval.createElement('similar') g.setAttribute('query', qstring) for doc, score in likethis: e = retval.createElement('document') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = _safe_title(doc) note("title is %s", repr(title)) e.setAttribute('title', title) g.appendChild(e) retval.documentElement.appendChild(g) g = retval.createElement('co-authored') for doc, score in authored: e = retval.createElement('document') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = _safe_title(doc) note("title is %s", repr(title)) e.setAttribute('title', title) g.appendChild(e) retval.documentElement.appendChild(g) g = retval.createElement('recent') for doc, score in recent: e = retval.createElement('document') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = _safe_title(doc) note("title is %s", repr(title)) e.setAttribute('title', title) g.appendChild(e) retval.documentElement.appendChild(g) g = retval.createElement('linked') for doc, score in others: e = retval.createElement('document') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = _safe_title(doc) note("title is %s", repr(title)) e.setAttribute('title', title) g.appendChild(e) retval.documentElement.appendChild(g) g = retval.createElement('combined') for doc, score in docs: e = retval.createElement('document') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = _safe_title(doc) note("title is %s", repr(title)) e.setAttribute('title', title) g.appendChild(e) retval.documentElement.appendChild(g) fp = response.open("application/xml;charset=utf-8") fp.write(retval.toxml("UTF-8") + "\n") fp.close() elif params.get("format") == "ziplist": no_icon = (params.get("no-icon") == "true") include_doc_functions = params.get("include-doc-functions") tpath = tempfile.mktemp() zf = zipfile.ZipFile(tpath, "w") try: try: for doc, score in docs: zf.writestr(doc.id.encode("ASCII", "strict") + "/", "") zf.writestr(doc.id.encode("ASCII", "strict") + "/score", str(score)) if not no_icon: zf.writestr(doc.id.encode("ASCII", "strict") + "/first.png", doc.document_icon()) if include_doc_functions: zf.writestr(doc.id.encode("ASCII", "strict") + "/doc_functions.txt", get_doc_functions(doc)) zf.writestr(doc.id.encode("ASCII", "strict") + "/metadata.txt", doc.metadata_text()) finally: zf.close() response.return_file("application/x-uplib-searchresults-zipped", tpath, True) except: msg = string.join(traceback.format_exception(*sys.exc_info())) os.remove(tpath) note("Exception building zipfile for search results:\n%s", msg) response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "Can't build zipfile for search results:\n%s\n" % htmlescape(msg)) else: fp = response.open() title = "Documents related to %s" % repr(doc.get_metadata("title") or doc.id) fp.write("<head><title>%s</title>\n" % htmlescape(title)) fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n') fp.write('<link REL="SHORTCUT ICON" HREF="/favicon.ico">\n') fp.write('<link REL="ICON" type="image/ico" HREF="/favicon.ico">\n') issue_javascript_head_boilerplate(fp) issue_title_styles(fp) fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR) issue_menu_definition(fp) fp.write('<h2>%s</h2><br>\n' % htmlescape(title)) show_abstract(repo, doc, fp, True, showpagesearch=False) fp.write('<p><hr><b>Context documents:</b><br>') for related, score in docs: show_title (fp, related, {related.id: score}, True) fp.write('<p><hr><b>Like this:</b><br>') for related, score in likethis: show_title (fp, related, { related.id: score }, True) fp.write('<p><i>query was: %s</i>\n' % htmlescape(qstring)) fp.write('<p><hr><b>Co-authored:</b><br>') for related, score in authored: show_title (fp, related, { related.id: score }, True) fp.write('<p><hr><b>Recently consulted:</b><br>') for related, score in recent: show_title (fp, related, { related.id: score }, True) fp.write('<p><hr><b>Other considerations:</b><br>') for explanation, related, score in others: fp.write('<p><i>%s</i><br>\n' % htmlescape(explanation)) show_title (fp, related, { related.id: score }, True) fp.write('</body>\n') fp.close()