def python_exception_html(excn, extra = None): typ, value, tb = excn s = ''.join(traceback.format_exception(typ, value, tb)) s2 = '<html><body><p>Error:' if extra: s2 = s2 + ' ' + htmlescape(extra) s2 = s2 + '<br>\n<p><pre>' + htmlescape(s) + '</pre></body></html>' return s2
def upload(repo, response, params): """ Obtain a Web form which supports file upload from a Web browser. :returns: a Web form supporting file upload :rtype: text/html """ fp = response.open() fp.write('<html><head><title>Upload document to "%s"</title><head>\n' % htmlescape(repo.name())) fp.write('<body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR) fp.write('<script type="text/javascript" src="/html/javascripts/prototype.js"></script>\n') fp.write('<script type="text/javascript">\n' 'var ext_to_content_type_mapping = {\n') for key in CONTENT_TYPES: fp.write(' "%s": "%s",\n' % (CONTENT_TYPES[key], key)) fp.write(' };\n\n' 'function choose_appropriate_type(filename) {\n' ' var ext = filename.split(".")[1];\n' ' for (var e in ext_to_content_type_mapping) {\n' ' if (e == ext)\n' ' return ext_to_content_type_mapping[e];\n' ' };\n' ' return "undefined";\n' '}\n\n' 'function on_filename_change (e) {\n' ' var a = choose_appropriate_type(document.forms.uploadform.content.value);\n' ' document.forms.uploadform.contenttype.value = a;\n' ' document.forms.uploadform.documentname.value = document.forms.uploadform.content.value;\n' '}\n' '</script>\n') fp.write('<form enctype="multipart/form-data" id="uploadform" action="/action/UploadDocument/add" method="POST" target="_top">\n') fp.write('<input type=hidden name=wait value=watch>\n') fp.write('<input type=hidden name=documentname value="">\n') referer = response.request.get_header('referer') fp.write('<p>File to upload: <input type="file" name=content size=50 value="%s"' % (referer or "") + ' onchange="{void(on_filename_change(this));}">\n') fp.write('<p>Content-Type of file: <select name="contenttype" size=1>\n') fp.write('<option value="undefined" selected>-- undefined --</option>\n') for key in CONTENT_TYPES: hkey = htmlescape(key) fp.write('<option value="%s">%s</option>\n' % (hkey, hkey)) fp.write('</select>\n') fp.write('<p>Optional metadata for the document:<br><table>' '<tr><td>Title for document: </td><td><input type=text name="md-title" size=60></td></tr>\n' '<tr><td>Authors <i>(" and "-separated)</i>: </td><td><input type=text name="md-authors" size=60></td></tr>\n' '<tr><td>Publication date <i>(mm/dd/yyyy)</i>: </td><td><input type=text name="md-date" size=60></td></tr>\n' '<tr><td>Categories <i>(comma-separated)</i>: </td><td><input type=text name="md-categories" size=60></td></tr>\n' '</table>\n') fp.write('<p><input type=submit name=submit value=submit>\n') fp.write('</form></body></html>\n')
def addnote(repo, response, params): """ Obtain a Web form with which to add a note to the repository. Useful for taking notes in meeting. :return: a Web form with which to add a note to the repository :rtype: text/html """ # send back a note to upload fp = response.open() fp.write('<html><head><title>Add note to "%s"</title><head>\n' % htmlescape(repo.name())) fp.write('<body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR) fp.write('<form enctype="multipart/form-data" id="addnote" action="/action/UploadDocument/add" method="POST" target="_top">\n') fp.write('<input type=hidden name=wait value=true>\n') # add fake filename with 3x5 extension to trigger CardDoc parser fp.write('<input type=hidden name=documentname value="note.3x5">\n') fp.write('<input type=hidden name=contenttype value="text/plain">\n') fp.write('<p><input type=textarea name="content" value="" style="width: 100%; height: 50%;">\n') fp.write('<p><input type=submit name=submit value=submit>\n') fp.write('<p>Optional metadata for the document:<br><table>' '<tr><td>Categories <i>(comma-separated)</i>: </td><td><input type=text name="md-categories" size=60></td></tr>\n' '<tr><td>Title for document: </td><td><input type=text name="md-title" size=60></td></tr>\n' '<tr><td>Authors <i>(" and "-separated)</i>: </td><td><input type=text name="md-authors" size=60></td></tr>\n' '</table>\n') fp.write('</form></body></html>\n')
def do_attendee (attendee, annotation, c, x, y, framesize): link = attendee.value cn = attendee.params.get("CN") nameaddr = cn and cn[0] if nameaddr: realname, emailaddr = parseaddr(nameaddr.replace(",", "%2C")) if realname: text = realname.replace("%2C", ",") elif emailaddr: text = emailaddr else: text = nameaddr elif link: text = link text = htmlescape(text) if link: text = '<link href="' + link + '">' + text + '</link>' if annotation: text += ' <i>' + annotation + '</i>' p = Paragraph(text, ParagraphStyle("normal")) w, h = p.wrapOn(c, *framesize) y -= h p.drawOn(c, x, y) # y -= (0.1 * inch) return y
def show_images (repo, response, params): import Image id = params.get("doc_id") if not id: response.error(HTTPCodes.BAD_REQUEST, "No doc ID specified") return doc = repo.get_document(id) images = doc.get_metadata("illustrations-bounding-boxes") if not images: response.reply("No illustration data found for %s." % doc) return dpi = int(doc.get_metadata("dpi") or doc.get_metadata("images-dpi") or 300) fp = response.open() fp.write("<body><h1>Illustrations in %s</h1>" % htmlescape(str(doc))) currentpage = None im = None for image in images.split(","): pageno, type, left, top, width, height = image.split(":") pageno = int(pageno) if pageno != currentpage: if currentpage is not None: fp.write('<hr>\n') fp.write("<p>Page %s" % (pageno + 1)) currentpage = pageno im = None left = int(left) top = int(top) width = int(width) height = int(height) newwidth, newheight = (width * 75) / dpi, (height * 75)/dpi if (newwidth < 1) or (newheight < 1): continue filepath = os.path.join(doc.folder(), "page-images", "page%05d.png" % (pageno + 1)) if im is None: if not os.path.exists(filepath): fp.write('<p>No image file %s for page %s' % (filepath, (pageno + 1))) else: im = Image.open(filepath) if im.mode in ("1", "P", "L"): im = im.convert("RGB") img = im.crop((left, top, left + width + 1, top + height + 1)) img.load() # rescale to 75 dpi if dpi != 75: img = img.resize((newwidth, newheight), Image.ANTIALIAS) # convert to data: URL fpi = StringIO.StringIO() img.save(fpi, "PNG") bits = fpi.getvalue() fpi.close() fp.write('<p>%s:<br><img src="data:image/png;base64,%s">\n' % (image, base64.encodestring(bits).strip()))
def format_description(self, c, x, y, framesize): if 'description' in self.__event.contents: text = '<i>Description:</i> ' + htmlescape(self.__event.description.value.strip()) text = text.replace('\n', '<br />') p = Paragraph(text, ParagraphStyle("normal")) w, h = p.wrapOn(c, *framesize) y -= h p.drawOn(c, x, y) return y
def render (self, fp): from uplib.plibUtil import note from uplib.webutils import htmlescape if not self.content: raise ValueError("can't score this document") if type(fp) in types.StringTypes: fp = open(fp, "ab") fp.write(u'<head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'.encode("UTF-8", "strict")) fp.write((u'<title>%s</title></head>\n' % htmlescape(self.title)).encode("UTF-8", "strict")) fp.write((u'<body>\n<h1>%s</h1>\n' % htmlescape(self.title)).encode("UTF-8", "strict")) if self.authors: self.clean_authors(self.authors) fp.write('<p class="uplib-authors">%s</p>\n' % self.authors.renderContents()) if self.date: self.clean_date(self.date) fp.write('<p class="uplib-pubdate">%s</p>\n' % self.date.renderContents()) self.clean_content(self.content) fp.write(self.content.prettify()) fp.write(u'</body>\n'.encode("UTF-8", "strict"))
def get_login_page(self): return ('<head><title>Login Page</title>\n' + '<script>\n' + '<!--\n' + 'function sf(){document.f.password.focus();}\n' + '// -->\n' + '</script></head>\n' + '<body bgcolor="#ef280e" onload="sf()">\n' + '<table width=100% height=100%><tr align=center><td align=center>' + '<table bgcolor=black cellpadding=10><tr bgcolor=white><td>' + '<center>Please enter pass-phrase:<br>' + '<form action="/login" method=POST enctype="multipart/form-data" name=f>\n' + '<input type=password size=60 name=password value=""><P> <br>\n' + ('<input type=hidden name=originaluri value="%s"><P> <br>\n' % htmlescape(self.request.uri, True)) + '<input type=submit value="Login">\n' + '</center></form></td></tr></table></td></tr></table></body>')
def get_error_html (self, status_code, **kwargs): if 'message' in kwargs and 'content_type' in kwargs: content_type = kwargs.get('content_type') message = kwargs.get('message') if content_type.startswith("text/plain"): message = '<pre>' + htmlescape(message) + '</pre>' content_type = "text/html" if (content_type is None) or (content_type == "text/html"): return "<html><title>%(code)d: %(stdmsg)s</title>" \ "<body>%(code)d: %(message)s</body></html>" % { "code": status_code, "stdmsg": httplib.responses[status_code], "message" : message, } else: raise RuntimeError("Error messages must be HTML") else: RequestHandler.get_error_html(self, status_code, **kwargs)
note(4, "forked off request") return false except Exception, x: note(0, "signalling exception <%s> at point 1a:", x) excn_data = sys.exc_info() signal_python_exception(request, excn_data) # s2 = python_exception_html (excn_data, None) # request.reply_code = 500 # request['Content-Type'] = 'text/html' # request['Content-Length'] = len(s2) # request.push(s2) return true else: # can't use request.error() here because request.done() will be called twice request.reply_code = 501 action = htmlescape("/action/" + module_name + "/" + function_name) if exception: s = u"<html><head><title>Error loading module: %s</title></head><body><p>Attempt to load module/function <i>%s/%s</i> raised an exception:\n<pre>%s</pre><p>(extensions path = [<tt>%s</tt>], sys.path = <tt>%s</tt>)</body></html>" % (action, module_name, function_name, exception, self.__repo__.get_actions_path(), htmlescape(str(sys.path))) else: s = u"<html><head><title>No such action: %s</title></head><body><p>No such action: %s.<br>actions path = [%s]</body></html>" % (action, action, self.__repo__.get_actions_path()) s = s.encode("UTF-8", "replace") request['Content-Type'] = "text/html; charset=UTF-8" request['Content-Length'] = len(s) request.push(s) return true def handle_request (self, request): request['Server'] = "UpLib/%s" % self.version request.version = '1.0' # stick with 1.0 for Medusa
def do_HTML (dirpath, html_dir, doc_id, port): note(3, " HTMLing in %s...", dirpath) html_index = os.path.join(dirpath, "index.html") doc_id = os.path.basename(dirpath) retval = false try: if not os.path.exists(html_dir): os.mkdir(html_dir) os.chmod(html_dir, 0700) metadata = read_metadata(os.path.join(dirpath, "metadata.txt")) title = metadata.get('name') or metadata.get('title') or doc_id pagewidth = None pageheight = None bts = metadata.get('big-thumbnail-size') if bts: pagewidth, pageheight = [int(x) for x in string.split(bts, ',')] note(3, " title is %s, pagesize is %sx%s", title, pagewidth, pageheight) # start with summary.html note(3, " summary.html") summarypath = os.path.join(dirpath, "summary.txt") if os.path.exists(summarypath): f = open(summarypath, 'r') summary_text = f.read() f.close() html_summary = htmlescape(summary_text, true) else: html_summary = "" html_summary_path = os.path.join(html_dir, "summary.html") f = open(html_summary_path, 'w') f.write('<html><body>' + html_summary + '</body></html>'); f.close() os.chmod(html_summary_path, 0600) # next thumbs.html note(3, " thumbs.html") thumbs_path = os.path.join(html_dir, "thumbs.html") f = open(thumbs_path, "w") if USE_VIRTUAL_INK: bgcolor = "white" else: bgcolor = STANDARD_TOOLS_COLOR f.write('<html><body bgcolor="%s"><center>\n' % bgcolor) thumbnail_dir = os.path.join(dirpath, "thumbnails") thumbnail_files = os.listdir(thumbnail_dir) thumbs = [] for thumbnail in thumbnail_files: m = re.match(r"(\d+).png", thumbnail) if m: thumbs.append((int(m.group(1)), thumbnail,)) thumbs.sort() for thumbnail in thumbs: page_no = int(thumbnail[0]) f.write('<a href="page%s.html" target=viewarea>' % page_no) f.write('<img src="../thumbnails/%s" border=1></a><br>\n' % thumbnail[1]) # now write the HTML connected to that thumbnail page_html = os.path.join(html_dir, "page%s.html" % page_no) f2 = open (page_html, 'w') # get width of large page if not pagewidth or not pageheight: im = Image.open(os.path.join(thumbnail_dir, "big%s.png" % page_no)) pagewidth, pageheight = im.size[0] - 25, im.size[1] note(3, " title is %s, pagesize is %sx%s", title, pagewidth, pageheight) del im f2.write('<html><body bgcolor="white"><img src="../thumbnails/big%s.png" usemap="#page%smap" border=0>\n' % (page_no, page_no)) f2.write('<map name="page%smap">\n' % page_no) if (page_no < len(thumbs)): f2.write('<area href="page%s.html" alt="to Page %s" shape="circle" coords="%s,60,10">\n' % (page_no + 1, page_no + 1, pagewidth + 15)) f2.write('<area href="page%s.html" alt="to Page %s" shape="rect" coords="%s,0,%s,%s">\n' % (page_no + 1, page_no + 1, pagewidth/2, pagewidth, pageheight)) if (page_no > 1): f2.write('<area href="page%s.html" alt="to Page %s" shape="circle" coords="%s,90,10">\n' % (page_no - 1, page_no - 1, pagewidth + 15)) f2.write('<area href="page%s.html" alt="to Page %s" shape="rect" coords="0,0,%s,%s">\n' % (page_no - 1, page_no - 1, (pagewidth/2)-1, pageheight)) f2.write('<area href="/" alt="to repository" target="_top" shape="circle" coords="%s,207,10">\n' % (pagewidth + 15)) f2.write('</map></body></html>\n') f2.close() os.chmod(page_html, 0600) f.write('</center></body></html>') f.close() os.chmod (thumbs_path, 0600) # next is controls.html note(3, " controls.html") controls_path = os.path.join(html_dir, "controls.html") f = open(controls_path, "w") if CONTROLS_TEMPLATE: f.write(CONTROLS_TEMPLATE % { 'doc-id': doc_id }) else: f.write('<html>\n<head>\n') f.write('<script type="text/javascript">\n') f.write('function newInWindow(did, title, w, h, sidebar, twopage) {\n') f.write(' var s = "/action/basic/dv_show?doc_id=" + did + "&no-margin=1";\n') f.write(' var c = "width=" + w + ",height=" + h;\n') f.write(' if (!sidebar)\n') f.write(' s = s + "&no-sidebar=1";\n') f.write(' if (twopage)\n') f.write(' s = s + "&two-pages=1";\n') f.write(' defaultStatus = s;\n') f.write(' window.open(s, title, config=c);\n') f.write('}\n') f.write('</script></head><body bgcolor="%s">\n<center>\n' % STANDARD_TOOLS_COLOR) f.write("""<a href="javascript:newInWindow('%s','%s', %d+30, %d+10, false, false); void 0;">Detach</a>""" % (doc_id, htmlescape(title, true), pagewidth, pageheight)) f.write(""" <a href="javascript:newInWindow('%s','%s', (2 * %d)+30, %d+10, false, true); void 0;">(2)</a>\n""" % (doc_id, htmlescape(title, true), pagewidth, pageheight)) buttons = get_buttons_sorted(FN_DOCUMENT_SCOPE) for button in buttons: url = button[1][4] target = button[1][3] label = button[1][0] if url: f.write('<br>\n<a href="%s"' % htmlescape(url % doc_id, true)) else: f.write('<br>\n<a href="/action/basic/repo_userbutton?uplib_userbutton_key=%s&doc_id=%s"' % (button[0], doc_id)) if target: f.write(' target="%s"' % target) f.write('>%s</a>\n' % label) f.write("</center></body></html>") f.close() os.chmod(controls_path, 0600) # then index.html note(3, " index.html") f = open(html_index, "w") f.write('<head>\n') f.write('<title>%s</title>\n</head>\n' % htmlescape(title)) f.write('<base target="_top">' '<frameset cols="%s,*">' '<frameset rows="%s,*">' '<frame name=controls src="./html/controls.html">' '<frame name=thumbs src="./html/thumbs.html">' '</frameset>' '<frame name="viewarea" src="./html/page1.html">' '</frameset>\n' % (THUMBNAIL_COLWIDTH, CONTROLS_HEIGHT)) f.close() os.chmod(html_index, 0600) # indicate successful completion note(3, " finished.") retval = true except: info = sys.exc_info() note(0, "exception raised in createHTML:\n%s\n", string.join(traceback.format_exception(*info))) raise else: if not retval: note("bad retval %s", retval) if os.path.exists(html_index): os.unlink(html_index) if os.path.exists(html_dir): shutil.rmtree(html_dir)
def get_epub_version (repo, response, params): doc_id = params.get("doc_id") if not doc_id: response.error(HTTPCodes.BAD_REQUEST, "No doc specified.\n") return elif not repo.valid_doc_id(doc_id): response.error(HTTPCodes.BAD_REQUEST, "Invalid doc ID %s specified.\n" % doc_id) return doc = repo.get_document(doc_id) bookid = "uplibhash:" + doc.sha_hash() page_count = int(doc.get_metadata("page-count") or doc.get_metadata("pagecount") or "0") language = doc.text_language() or "en-US" package = (u'<?xml version="1.0"?>\n' + u'<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookId">\n') metadata = (u'<metadata xmlns:dc="http://purl.org/dc/elements/1.1/"\n' + u' xmlns:opf="http://www.idpf.org/2007/opf">\n' + u' <dc:identifier id="BookId">%s</dc:identifier>\n' % htmlescape(bookid) + u' <dc:language>%s</dc:language>\n' % htmlescape(language)) ncx = u"""<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"> <ncx version="2005-1" xml:lang="en" xmlns="http://www.daisy.org/z3986/2005/ncx/"> <head> <meta name="dtb:uid" content="%s"/> <meta name="dtb:depth" content="6"/> <meta name="dtb:generator" content="UpLib %s"/> <meta name="dtb:totalPageCount" content="%s"/> <meta name="dtb:maxPageNumber" content="0"/> </head> """ % (bookid, UPLIB_VERSION, page_count) title = doc.get_metadata("title") or unicode(doc) authors = doc.get_metadata("authors") ncx += u"<docTitle><text>" + htmlescape(title) + u"</text></docTitle>\n" metadata += u' <dc:title>%s</dc:title>\n' % htmlescape(title) if authors: authors = authors.split(" and ") for author in authors: ncx += u"<docAuthor><text>" + htmlescape(author) + u"</text></docAuthor>\n" metadata += u' <dc:creator>%s</dc:creator>\n' % htmlescape(author) metadata += u'</metadata>\n' ncx += u'<navMap>\n' manifest = u'<manifest>\n' spine = u'<spine toc="toc.ncx">\n' contentpath = _get_html_filepath(doc, debug=("rebuild",)) content = open(contentpath, "rb").read() # remove META tags start = content.index("</head>") content = ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" ' + ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n' + '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="%s">\n<head>\n' % language.encode("UTF-8", "strict") + '<title>%s</title>\n</head>\n' % htmlescape(title)) + content[start + len('</head>'):] manifest += u' <item id="contents" href="contents.xhtml" media-type="application/xhtml+xml" />\n' spine += u' <itemref idref="contents" />\n' ncx += u'<navPoint id="contents" playOrder="1"><navLabel><text>Content</text></navLabel><content src="contents.xhtml" /></navPoint>\n' content, images = _separate_images(content) for image in images: content_type, bits = images[image] manifest += u' <item id="%s" href="images/%s" media-type="%s" />\n' % ( image, image, content_type) # for page_index, bboxes in wordboxes_page_iterator(doc.folder()): # page_xhtml = (u'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n' + # u'<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="%s">\n' % language + # u'<body>\n') # # if pageno in illustrations: # # for left, top, width, height, tp, image, junk in illustrations.get(pageno): # # page_xhtml += u'<img width="%s" height="%s" alt="image on page" style="position:absolute; left:%spt; top:%spt;" src="%s" />' % ( # # width, height, left, top, _form_data_url(image)) # for bbox in bboxes: # face = (bbox.is_italic() and "Italic") or "Regular" # family = (bbox.is_fixedwidth() and "Monospace") or (bbox.is_serif() and "Serif") or "Sans-Serif" # weight = (bbox.is_bold() and "Bold") or "Regular" # page_xhtml += u'<span style="font-family: %s; font-style: %s; font-weight: %s; font-size: %spt">%s</span>' % ( # bbox.left(), bbox.top(), family, face, weight, bbox.font_size() * 0.8, htmlescape(bbox.text())) # if bbox.ends_word(): # page_xhtml += u"\n" # page_xhtml += u"</body></html>\n" # pages[page_index] = page_xhtml # manifest += u' <item id="page-%d" href="page-%d.xhtml" media-type="application/xhtml+xml" />\n' % (page_index, page_index) # spine += u' <itemref idref="page-%d" />\n' % page_index # ncx += u'<navPoint class="page" id="page-%d" playOrder="%d"><navLabel><text>Page %s</text></navLabel><content src="page-%d.xhtml" /></navPoint>\n' % ( # page_index, page_index + 1, doc.page_index_to_page_number_string(page_index), page_index) # close up the spine elements ncx += "</navMap>\n</ncx>" manifest += u' <item id="toc.ncx" href="toc.ncx" media-type="application/x-dtbncx+xml" />\n' manifest += u'</manifest>\n' spine += u'</spine>\n' package += metadata + manifest + spine + u'</package>\n' # build the zip container filepath = os.path.join(doc.folder(), "versions") if not os.path.exists(filepath): os.mkdir(filepath) os.chmod(filepath, 0700) filepath = os.path.join(filepath, "document.epub") zf = zipfile.ZipFile(filepath, "w", zipfile.ZIP_STORED, True) zf.comment = "%s (from UpLib repository '%s', doc ID %s)" % (htmlescape(doc.get_metadata("title")), htmlescape(repo.name()), doc_id) zf.writestr("mimetype", "application/epub+zip") zf.writestr("META-INF/container.xml", """<?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="packagelayout.opf" media-type="application/oebps-package+xml" /> </rootfiles> </container> """) zf.writestr(_get_zip_info("packagelayout.opf"), package.encode("UTF-8", "strict")) for image in images: content_type, bits = images[image] zf.writestr("images/%s" % image, bits) zf.writestr(_get_zip_info("contents.xhtml"), content) zf.writestr(_get_zip_info("toc.ncx"), ncx.encode("UTF-8", "strict")) zf.close() response.return_file("application/epub", filepath)
def get_svg_version (repo, response, params): doc_id = params.get("doc_id") if not doc_id: response.error(HTTPCodes.BAD_REQUEST, "No doc specified.\n") return elif not repo.valid_doc_id(doc_id): response.error(HTTPCodes.BAD_REQUEST, "Invalid doc ID %s specified.\n" % doc_id) return note("doc_id is %s", doc_id) doc = repo.get_document(doc_id) page = params.get("page") if not page: response.error(HTTPCodes.BAD_REQUEST, "No page index specified.") return page = int(page) note("page is %s", page) page_count = int(doc.get_metadata("page-count") or doc.get_metadata("pagecount") or "0") if page >= page_count: response.error(HTTPCodes.BAD_REQUEST, "No such page %d." % page) return language = doc.text_language() or "en-US" dpi = int(doc.get_metadata('images-dpi') or doc.get_metadata('tiff-dpi') or doc.get_metadata("dpi") or 300) page_image_size = tuple([(float(x.strip())*72/float(dpi)) for x in (doc.get_metadata("images-size") or doc.get_metadata("tiff-size")).split(",")]) pages = {} illustrations = {} links = {} imd = read_illustrations_metadata(doc.folder(), True) for (left, top, width, height, type, bits, pageno) in imd: if ((width * height) < 100): continue if pageno in illustrations: illustrations[pageno].append((left, top, width, height, bits, pageno)) else: illustrations[pageno] = [(left, top, width, height, bits, pageno)] lmd = doc.links().values() for link in lmd: if hasattr(link, "from_page") and (link.typename == "uri"): pageno = link.from_page if pageno in links: links[pageno].append(link) else: links[pageno] = [link] note("links are %s", links) for page_index, bboxes in wordboxes_page_iterator(doc.folder()): page_svg = (u'''<?xml version="1.0" standalone="no"?> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> <svg width="%spt" height="%spt" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> ''' % page_image_size) if page_index in illustrations: for left, top, width, height, image, junk in illustrations.get(page_index): page_svg += u'<image x="%spt" y="%spt" width="%spt" height="%spt" xlink:href="%s" />\n' % ( left, top, width, height, _form_data_url(image)) if page_index in links: note("links for %s are %s", page_index, links.get(page_index)) for link in links[page_index]: fr = getattr(link, "from_rect") if fr: left, top, width, height = fr uri = urllib.quote_plus(link.to_uri) page_svg += (u'<a xlink:href="%s"><rect x="%spt" y="%spt" ' % (uri, left, top) + u'width="%spt" height="%spt" fill="none" stroke="none" /></a>\n' % ( width, height)) for bbox in bboxes: face = (bbox.is_italic() and "Italic") or "Regular" family = (bbox.is_fixedwidth() and "Monospace") or (bbox.is_serif() and "Serif") or "Sans-Serif" weight = (bbox.is_bold() and "Bold") or "Regular" page_svg += u'<text x="%spt" y="%spt" font-family="%s" font-size="%spt" font-style="%s" font-weight="%s">%s</text>' % ( bbox.left(), bbox.top(), family, bbox.font_size() * 0.9, face, weight, htmlescape(bbox.text())) if bbox.ends_word(): page_svg += u"\n" page_svg += u"</svg>\n" pages[page_index] = page_svg for pageno in pages: note("%s: %s\n", pageno, len(pages.get(pageno))) response.reply(pages.get(page), "image/svg+xml")
def doc_categorize (repo, response, params): from uplib.basicPlugins import show_abstract, _is_sensible_browser from uplib.basicPlugins import show_title, STANDARD_BACKGROUND_COLOR, STANDARD_TOOLS_COLOR, STANDARD_LEGEND_COLOR from uplib.basicPlugins import __issue_javascript_head_boilerplate as issue_javascript_head_boilerplate from uplib.basicPlugins import __issue_menu_definition as issue_menu_definition from uplib.basicPlugins import __issue_title_styles as issue_title_styles global _CONFIGURATION if _CONFIGURATION is None: _CONFIGURATION = { "exclusions": [ re.compile(x.strip()) for x in configurator.default_configurator().get("categorize-excluded-categories", "").split(",") if x.strip()]} def figure_size(count, avgsize): if avgsize < 0.0001: return 0.0001 return math.sqrt(math.log((count * (math.e - 1))/avgsize + 1)) doc_id = params.get("doc_id") if not doc_id: response.error(HTTPCodes.BAD_REQUEST, "No doc_id parameter specified.") return doc = repo.valid_doc_id(doc_id) and repo.get_document(doc_id) if not doc: response.error(HTTPCodes.BAD_REQUEST, "Invalid doc_id parameter '%s' specified." % doc_id) return fp = response.open() title = (doc.get_metadata("title") or doc.id).encode("UTF-8", "strict") fp.write("<head><title>Categorizing '%s'</title>\n" % htmlescape(title)) fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n') fp.write('<link REL="SHORTCUT ICON" HREF="/favicon.ico">\n') fp.write('<link REL="ICON" type="image/ico" HREF="/favicon.ico">\n') issue_javascript_head_boilerplate(fp) issue_title_styles(fp) fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR) issue_menu_definition(fp) show_abstract(repo, doc, fp, _is_sensible_browser(response.user_agent), showpagesearch=False) fp.write("<hr />\n") doccats = [x.lower() for x in doc.get_category_strings()] for cat in doccats[:]: if cat.find('/') >= 0: parts = cat.split('/') for i in range(1, len(parts)): doccats.append('/'.join(parts[:i])) tags = find_likely_tags(doc) if tags: # try to remove duplicates stags = min(10, len(tags)) # tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0] # count = 0 # i = 0 # while tagnames and (i < stags): # if tags[i][0] in tagnames: # del tags[i] # stags = min(10, len(tags)) # tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0] # else: # i += 1 fp.write("<center><small><i>Likely categories</i></small><br />") count = 0 topscore = _adjust_score(*tags[0][1][:2]) exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions") for name, (score, ndocs, ascore) in tags: if count > stags: break skip = False for exclusion in exclusions: if exclusion.match(name.lower()): skip = True break if skip: continue if count > 0: fp.write(" · ") #size = max(0.5, (2/topscore) * ascore) size = 1 color = (name.lower() in doccats) and "red" or "black" action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % ( (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name)) fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category (score=%.3f)">%s</a>' % ( size, color, action, (name.lower() in doccats) and "remove" or "add", htmlescape(name), ascore, htmlescape(name))) count += 1 fp.write("</center></p><hr />\n") fp.write('<form action="%s" method=get><center>Add a new category to this document: ' % ('/'.join(response.request_path.split('/')[:3]) + '/doc_add_category')) fp.write('<input type=hidden name="doc_id" value="%s">\n' % doc.id) fp.write('<input type=text name="tag" value="" size=40></form></center>\n') note(4, "doc_categorize: retrieving repository categories... (%s)", time.ctime()) cats = repo.get_categories_with_docs() note(4, "doc_categorize: have categories (%s)", time.ctime()) if cats: fp.write("<hr>\n<center><small><i>All categories</i></small><br />") avgsize = sum([len(x) for x in cats.values()]) / float(len(cats)) catkeys = cats.keys() catkeys.sort(lambda x, y: cmp(x.lower(), y.lower())) first = True exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions") for name in catkeys: skip = False for exclusion in exclusions: if exclusion.match(name.lower()): skip = True break if skip: continue if not first: fp.write(" · ") else: first = False size = max(0.5, figure_size(len(cats[name]), avgsize)) color = (name.lower() in doccats) and "red" or "black" action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % ( (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name)) actionsee = '/action/basic/repo_search?query=%s' % ( urllib.quote_plus('categories:"%s"' % name)) fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category">%s</a>' % ( size, color, action, (name.lower() in doccats) and "remove" or "add", htmlescape(name), htmlescape(name))) fp.write('<a style="font-size: %fem; color: %s; vertical-align: super;" href="%s" ' % ( max(0.4, size/2), STANDARD_LEGEND_COLOR, actionsee) + 'title="see the %s document%s in the \'%s\' category" target="_blank">%d</a>' % ( (len(cats[name]) == 1) and "one" or str(len(cats[name])), (len(cats[name]) != 1) and "s" or "", htmlescape(name), len(cats[name]))) fp.write("</body>\n")
def _add_internal (ostream, percent_done_fn, repo, response, params, content, wait): # this can be called in several different ways. # In general, you post a multipart/form-data body which # contains a "contenttype" for the document, and either a "URL" # for the content, or a "content" parameter containing the # the actual content. If both "URL" and "content" are present, # the URL is added as the "original-url" value for the metadata, # and if the content is HTML, it's used as the "original.html" # and the URL is used to pull ancillary content referenced in it. content_type = params.get("contenttype") url = params.get("URL") noredir = params.get("no-redirect") noredir = noredir and (noredir.lower() == "true") uploadloc = url docname = params.get("documentname") tempf = None suppress_duplicates = params.get("suppress-duplicates") suppress_duplicates = suppress_duplicates and (suppress_duplicates.lower() == "true") bury = params.get("bury") bury = bury and (bury.lower() == "true") verbosity = int(params.get("verbosity") or "0") if content: if wait and ostream: _rewrite_job_output(ostream, '{ state: 0, msg: "Caching page..."}') extension = CONTENT_TYPES.get(content_type) if not extension: if wait: msg = "Don't know what to do with contenttype \"%s\"" % content_type if ostream: _rewrite_job_output(ostream, '{state: 1, msg: "' + urllib.quote(msg) + '"}') else: response.error(HTTPCodes.UNSUPPORTED_MEDIA_TYPE, msg) return # special case HTML/XHTML if content and (content_type.lower() in ("text/html", "application/xhtml+xml")): tempf = tempfile.mkdtemp() uploadloc = os.path.join(tempf, "original.html") # make sure that the folder for other parts exists, even if empty os.mkdir(os.path.join(tempf, "original_files")) # remove our bookmarklet, if present content = _BOOKMARKLET_PATTERN.sub('', content) content = _ADD_FORM_PATTERN.sub('', content) c = _OurCacher(url, filename=uploadloc, bits=content, content_type=content_type) # make sure that the folder for other parts exists, even if empty other_parts = os.path.join(tempf, "original_files") if not os.path.exists(other_parts): os.mkdir(other_parts) # special case 3x5 cards elif (docname and (content_type.lower() == "text/plain") and os.path.splitext(docname)[1] == ".3x5"): fd, tempf = tempfile.mkstemp(".3x5") fp = os.fdopen(fd, "wb") fp.write(content) fp.close() uploadloc = tempf else: fd, tempf = tempfile.mkstemp("." + extension) fp = os.fdopen(fd, "wb") fp.write(content) fp.close() uploadloc = tempf if suppress_duplicates: hash = calculate_originals_fingerprint(tempf) results = repo.do_query("sha-hash:"+hash) if results: # it's a duplicate doc = results[0][1] if os.path.isdir(tempf): shutil.rmtree(tempf) elif os.path.exists(tempf): os.remove(tempf) if ostream: _rewrite_job_output(ostream, '{ state: 2, doc_id: "' + doc.id + '"}') elif noredir: response.reply(doc.id, "text/plain") else: response.redirect("/action/basic/dv_show?doc_id=%s" % doc.id) return try: try: # get a cookie for authentication cookie = repo.new_cookie(url or content[:min(100, len(content))]) cookie_str = '%s=%s; path=/; Secure' % (cookie.name(), cookie.value()) os.environ["UPLIB_COOKIE"] = cookie_str doctitle = params.get("md-title") docauthors = params.get("md-authors") docdate = params.get("md-date") doccats = params.get("md-categories") metadata = params.get("metadata") if metadata: mdtmpfile = tempfile.mktemp() open(mdtmpfile, "w").write(metadata) # check to see if we're replacing an existing document md2 = read_metadata(StringIO.StringIO(metadata)) existing_doc_id = md2.get("replacement-contents-for") if existing_doc_id and not repo.valid_doc_id(existing_doc_id): raise ValueError("Invalid doc ID %s specified for replacement" % existing_doc_id) else: mdtmpfile = None existing_doc_id = None # now form the command scheme = ((repo.get_param("use-http", "false").lower() == "true") or _use_http) and "http" or "https" cmd = '%s --verbosity=%s --repository=%s://127.0.0.1:%s ' % (_uplib_add_document, verbosity, scheme, repo.port()) if doctitle: cmd += ' --title=%s' % pipes.quote(doctitle) if docauthors: cmd += ' --authors=%s' % pipes.quote(docauthors) if docdate: cmd += ' --date="%s"' % docdate if doccats: cmd += ' --categories=%s' % pipes.quote(doccats) if mdtmpfile: cmd += ' --metadata="%s"' % mdtmpfile cmd += ' "%s"' % uploadloc if ostream: _rewrite_job_output(ostream, '{state: 0, msg: "' + urllib.quote(cmd) + '"}') # and invoke the command status, output, tsignal = subproc(cmd) note(4, "cmd is %s, status is %s, output is %s", repr(cmd), status, repr(output.strip())) if mdtmpfile: os.unlink(mdtmpfile) if status == 0: # success; output should be doc-id doc_id = existing_doc_id or output.strip().split()[-1] note(4, "output is '%s'; doc_id for new doc is %s", output.strip(), doc_id) if wait and ostream: _rewrite_job_output(ostream, '{ state: 1, doc_id: "' + doc_id + '", msg: "' + urllib.quote(output) + '"}') # wait for it to come on-line if percent_done_fn: percent_done_fn(40) # estimate 40% of work done on client side while not repo.valid_doc_id(doc_id): if ostream: pending = repo.list_pending(full=True) s = _first(pending, lambda x: x['id'] == doc_id) if not s: break dstatus = s['status'] if dstatus == 'error': msg = 'server-side error incorporating document' _rewrite_job_output(ostream, '{ state: 3, doc_id: "' + doc_id + '", msg: "' + urllib.quote(s['error']) + '"}') break if dstatus == 'unpacking': msg = 'starting ripper process...' elif dstatus == 'ripping': msg = "ripping with ripper '" + s['ripper'] + "'..." elif dstatus == 'moving': msg = 'adding to registered document set...' _rewrite_job_output(ostream, '{ state: 1, doc_id: "' + doc_id + '", msg: "' + urllib.quote(msg) + '"}') time.sleep(1.0) if percent_done_fn: percent_done_fn(100) # finished if repo.valid_doc_id(doc_id): if bury: # wait up to 100 seconds for it to show up in history list # after that, wait another second, then bury it counter = 100 while counter > 0: h = [x.id for x in repo.history()] if doc_id in h: break counter -= 1 time.sleep(1) time.sleep(1) repo.touch_doc(doc_id, bury=True, notify=False) note(3, "buried %s", doc_id) if wait: if ostream: _rewrite_job_output(ostream, '{ state: 2, doc_id: "' + doc_id + '"}') elif noredir: response.reply(doc_id, "text/plain") else: response.redirect("/action/basic/dv_show?doc_id=%s" % doc_id) else: note("cmd <<%s>> failed with status %s:\n%s", cmd, status, output) if wait: if ostream: _rewrite_job_output(ostream, '{ state: 3, msg: "' + urllib.quote('Error processing the document:\n' + output) + '"}') else: response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "<pre>" + htmlescape(output) + "</pre>") except: e = ''.join(traceback.format_exception(*sys.exc_info())) if wait: note(3, "Exception processing uplib-add-document request:\n%s", htmlescape(e)) if ostream: _rewrite_job_output(ostream, '{state: 3, msg: "' + urllib.quote("Exception processing uplib-add-document request:\n" + e) + '"}') else: response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "Exception processing uplib-add-document request:\n<pre>" + htmlescape(e) + "\n</pre>") else: note("Exception processing uplib-add-document request:\n%s", e) finally: if tempf and os.path.isfile(tempf): os.unlink(tempf) elif tempf and os.path.isdir(tempf): shutil.rmtree(tempf)
def build_html_abstract_display (self, doc, icon_cid): fp = StringIO() dict = doc.get_metadata() pubdate = dict.get("date") date = re.sub(" 0|^0", " ", time.strftime("%d %b %Y, %I:%M %p", time.localtime(id_to_time(doc.id)))) name = doc.id page_count = dict.get('page-count') summary = '<i>(No summary available.)</i>' if dict: if dict.has_key('title'): name = dict.get('title') elif dict.has_key('name'): name = '[' + dict.get('name') + ']' fp.write(u'<table border=0><tr><td>') fp.write(u'<center>') fp.write(u'<a href="https://%s:%d/action/basic/dv_show?doc_id=%s" border=0>' % (self.ip, doc.repo.secure_port(), doc.id)) fp.write(u'<img src="cid:%s">' % icon_cid) fp.write(u'</a><p><small><font color="%s">(%s)</font></small></center></td><td> </td>' % (STANDARD_DARK_COLOR, date)) fp.write(u'<td valign=top><h3>%s</h3>' % htmlescape(name)) if dict.has_key(u'authors') or pubdate: fp.write(u'<p><small>') if dict.has_key('authors'): fp.write(u'<b> %s</b>' % (re.sub(' and ', ', ', dict['authors']))) if pubdate: formatted_date = format_date(pubdate, True) fp.write(u' <i><font color="%s">%s</font></i>' % (STANDARD_DARK_COLOR, formatted_date)) fp.write(u'</small>\n') if dict.has_key('comment'): summary = htmlescape(dict.get('comment', '')) elif dict.has_key('abstract'): summary = "<i>" + htmlescape(dict.get('abstract', '')) + '</i>' elif dict.has_key('summary'): summary = '<font color="%s">' % STANDARD_DARK_COLOR + htmlescape(dict.get('summary')) + '</font>' fp.write(u'<P>%s' % summary) if page_count: fp.write(u'<small><i><font color="%s"> · (%s page%s)' % (STANDARD_DARK_COLOR, page_count, ((int(page_count) != 1) and "s") or "")) fp.write(u'</font></i></small>\n') cstrings = doc.get_category_strings() fp.write(u'<p>Categories: ') if cstrings: fp.write(string.join([htmlescape(s) for s in cstrings], u' · ')) else: fp.write('(none)') typ = doc.get_metadata("apparent-mime-type") if typ: mtype = ' · <small>%s</small>' % typ else: mtype = '' fp.write(u'<p><a href="https://%s:%s/action/externalAPI/fetch_original?doc_id=%s&browser=true"><font color="%s">(Original%s)</font></a>' % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR, mtype)) fp.write(u' · <a href="https://%s:%s/action/basic/doc_pdf?doc_id=%s"><font color="%s">(PDF)</font></a>' % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR)) if not mtype.lower().startswith("text/html"): fp.write(u' · <a href="https://%s:%s/action/basic/doc_html?doc_id=%s"><font color="%s">(HTML)</font></a>' % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR)) fp.write(u'</td></tr></table>') d = fp.getvalue() fp.close() return d, name
def search_repository (repository, response, params): """Search repository using specified query, and return hits (matching documents) as either a comma-separated values list of (score, ID, title) lines, or as an XML document, or as a zipped folder which includes the ``metadata.txt`` file and the document icon for each hit. TODO: the exact format of the XML bundle should be documented here. :param query: an UpLib query string :type query: string :param no-icon: optional, indicates whether to not return icons in the ziplist format, defaults to ``False`` :type no-icon: boolean :param format: optional, indicates whether to return results as plain-text CSV, XML, or a Zip file. \ if not specified, the plain-text CSV file is returned. :type format: string, either ``"xml"`` or ``"ziplist"`` :return: a listing of the documents matching the query, in the specified format :rtype: either ``text/plain``, ``application/xml``, or ``application/x-uplib-searchresults-zipped`` """ from uplib.basicPlugins import get_buttons_sorted, FN_DOCUMENT_SCOPE if not params.has_key('query'): response.error(HTTPCodes.BAD_REQUEST, "No query specified.\n") return query = unicode(params.get('query'), INTERACTION_CHARSET, "replace") results = repository.do_query(query) results.sort() results.reverse() def get_doc_functions (doc): buttons = get_buttons_sorted(FN_DOCUMENT_SCOPE) retval = "" for button in buttons: if (not button[1][5]) or (button[1][5](doc)): url = button[1][4] if url is None: url = "/action/basic/repo_userbutton?uplib_userbutton_key=%s&doc_id=%%s" % button[0] retval += "%s, %s, %s, %s\n" % (button[0], url, button[1][3], button[1][0]) return retval no_icon = (params.get("no-icon") == "true") if response.xml_request or (params.get("format") == "xml"): retval = getDOMImplementation().createDocument(None, "result", None) e = retval.createElement('query') e.setAttribute('query', query) retval.documentElement.appendChild(e) for score, doc in results: e = retval.createElement('hit') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = doc.get_metadata("title") or u"" title = title.replace("\r", " ") note("title is '%s'", title) e.setAttribute('title', title) retval.documentElement.appendChild(e) fp = response.open("application/xml;charset=utf-8") fp.write(retval.toxml("UTF-8") + "\n") fp.close() return elif params.get("format") == "ziplist": include_doc_functions = params.get("include-doc-functions") tpath = tempfile.mktemp() zf = zipfile.ZipFile(tpath, "w") try: try: for score, doc in results: zf.writestr(doc.id.encode("ASCII", "strict") + "/", "") zf.writestr(doc.id.encode("ASCII", "strict") + "/score", str(score)) if not no_icon: zf.writestr(doc.id.encode("ASCII", "strict") + "/first.png", doc.document_icon()) if include_doc_functions: zf.writestr(doc.id.encode("ASCII", "strict") + "/doc_functions.txt", get_doc_functions(doc)) zf.writestr(doc.id.encode("ASCII", "strict") + "/metadata.txt", doc.metadata_text()) finally: zf.close() response.return_file("application/x-uplib-searchresults-zipped", tpath, true) except: msg = string.join(traceback.format_exception(*sys.exc_info())) os.remove(tpath) note("Exception building zipfile for search results:\n%s", msg) response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "Can't build zipfile for search results:\n%s\n" % htmlescape(msg)) else: fp = response.open('text/plain; charset=UTF-8') for score, doc in results: title = doc.get_metadata("title") or u"" title = title.replace("\r", " ") fp.write("%f,%s,%s\n" % (score, doc.id, title.encode("UTF-8", "replace"))) fp.close() return
def add(repo, response, params): """ Add a document to the repository, calling ``uplib-add-document`` in a subprocess. :param wait: optional, whether to wait for the incorporation and ripping to \ happen. If not specified, ``add`` returns immediately after starting \ the incorporation process. If specified as ``true``, ``add`` will wait \ until the document is available in the repository. If specified as ``watch``, \ ``add`` will start a new ``Job`` which can be "watched" with the ``fetch_job_output`` \ function in ``uplib.externalAPI``. If specified as ``bounce``, and the ``URL`` \ parameter is also specified, the incorporation \ will be started, and ``add`` will immediately return an HTTP redirect to \ the value of ``URL``. If specified as ``watchexternal``, will start a new ``Job`` \ and immediately return the Job ID as a text/plain string. :type wait: string containing either ``watch`` or ``true`` or ``bounce`` :param content: the actual bits of the document. One of either ``content`` or ``URL`` must be specified. :type content: byte sequence :param contenttype: the MIME type for the document content :type contenttype: string containing MIME type :param URL: the URL for the document. One of either ``content`` or ``URL`` must be specified. :type URL: string :param documentname: the name of the document :type documentname: string :param no-redirect: if specified as ``true``, no redirect to the incorporated document \ will be returned; instead, a document ID string as "text/plain" will be returned, \ if ``wait`` is specified as ``true``. Optional, defaults to "false". :type no-redirect: boolean :param bury: optional, defaults to "false", if specified as "true" will cause \ the newly added document to be "buried" in the history list, so that it \ won't show up in the most-recently-used listing, as it normally would :type bury: boolean :param md-title: title to put in the document metadata :type md-title: string :param md-authors: standard UpLib authors line (" and "-separated) to put in the document metadata :type md-authors: string :param md-date: standard UpLib date ([MM[/DD]/]YYYY) to put in the document metadata :type md-date: string :param md-categories: standard UpLib categories string (comma-separated category names) to put in the document metadata :type md-categories: string :param metadata: contents of a standard UpLib metadata.txt file. If this file is provided, \ it is typically just passed unchanged to ``uplib-add-document``. However, it is \ inspected for the metadata element ``replacement-contents-for``, and if that is found, \ ``add`` will check to see that the specified document ID is still valid in that repository. :type metadata: string containing "text/rfc822-headers" format data :returns: depends on what parameters are passed. If ``wait`` is specified as ``true`` and ``no-redirect`` \ is specified as ``true``, will simply wait until the document has been incorporated and \ return the document ID as a plain text string. If ``no-redirect`` is not specified, \ and ``wait`` is ``true``, will return an HTTP redirect to the new document in the repository. \ If ``wait`` is specified as ``bounce``, will return an immediate redirect to the original \ URL for the document. If ``wait`` is not specified, will simply immediately return an HTTP \ 200 (Success) code and a non-committal message. :rtype: various """ wait = params.get("wait") content = params.get("content") url = params.get("URL") docname = params.get("documentname") if content and (not params.get("contenttype")): note(3, "add: No contenttype specified."); response.error(HTTPCodes.BAD_REQUEST, "No contenttype specified") return if (not content) and (not url): note(3, "add: Neither content nor URL specified."); response.error(HTTPCodes.BAD_REQUEST, "Nothing to upload!") return if wait and (wait.lower() in ("watch", "watchexternal")): job = Job(_add_internal, repo, None, params, content, True) note(3, "job id is %s", job.id) if url: title = htmlescape(url) elif docname: title = htmlescape(docname) else: title = 'document' if (wait.lower() == "watchexternal"): response.reply(job.id, "text/plain") else: fp = response.open() fp.write('<head><title>Adding %s to repository...</title>\n' % title) fp.write('<script type="text/javascript" language="javascript" src="/html/javascripts/prototype.js"></script>\n') fp.write(JOBS_JAVASCRIPT) fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR) fp.write('<p style="background-color: %s;"><span id="swirl">%s</span> <span id="titlespan">Adding <b>%s</b>...</span></p>\n' % ( STANDARD_TOOLS_COLOR, SWIRLIMG, title)) fp.write('<p id="progressreport"></p>\n') fp.write('<script type="text/javascript">\n' 'function report_error (req) {\n' ' // alert("Can\'t check status of job");\n' '}\n' 'function update_progress_report(jobid, percent_done, update_text) {\n' ' // alert("update_text is " + update_text);\n' ' var state = eval("(" + update_text + ")");\n' ' // alert("state is " + state);\n' ' if (percent_done >= 100) {\n' ' $("swirl").innerHTML = \'' + SWIRLSPACER + '\';\n' ' $("titlespan").innerHTML = "Finished adding ' + title + '.";\n' ' }\n' ' if (state.state == 2) {\n' ' $("progressreport").innerHTML = \'Finished.\\n<p>Click here <a href="/action/basic/dv_show?doc_id=\' + unescape(state.doc_id) + \'"><img src="/docs/\' + unescape(state.doc_id) + \'/thumbnails/first.png" border=0></a> to open the document in the UpLib browser viewer.\';\n' ' } else if (state.state == 0) {\n' ' $("progressreport").innerHTML = "Extracting page images and text...";\n' ' } else if (state.state == 1) {\n' ' $("progressreport").innerHTML = "Finished client side, ID is " + unescape(state.doc_id) + "<br>" + unescape(state.msg);\n' ' } else {\n' ' $("progressreport").innerHTML = "Error:<br><pre>" + unescape(state.msg) + "</pre>";\n' ' }\n' '}\n' 'Jobs.monitor("' + job.id + '", update_progress_report, 3, report_error);\n' '</script>\n') fp.write('</body>\n') return elif wait and (wait.lower() == "true"): response.fork_request(_add_internal, None, None, repo, response, params, content, True) else: uthread.start_new_thread(_add_internal, (None, None, repo, response, params, content, False), "UploadDocument: adding %s" % (docname or url or time.ctime())) if url and (wait.lower() == "bounce"): response.redirect(url) else: response.reply("Started new thread to add document", "text/plain")
def related (repo, response, params): """ Find other documents related to the query document. :param doc_id: the query document :type doc_id: UpLib doc ID string :param use-authorship: whether or not to use co-authorship as a measure of relatedness. Defaults to "true". :type use-authorship: "true" or "false" :param use-history: whether or not to to use the use history (most recently used list) as a factor in the calculation. Defaults to "true". :type use-history: "true" or "false" :param format: whether to return non-browser format results. Specifying "xml" will cause an XML document to be returned containing the results. Specifying "ziplist" will cause a zip file containing extra information about each document to be returned. If the ``format`` parameter is not specified, an HTML page showing the results broken down by category is returned. :type format: "xml" or "ziplist" or none :result: list of other documents related to the query document. See discussion of the ``format`` parameter. :rtype: varies """ doc_id = params.get("doc_id") if not doc_id: response.error(HTTPCodes.BAD_REQUEST, "No doc_id specified.") return if not repo.valid_doc_id(doc_id): response.error(HTTPCodes.NOT_FOUND, "Invalid doc_id %s specified." % doc_id) return doc = repo.get_document(doc_id) use_authorship = (params.get("use-authorship") or "true") == "true" use_history = (params.get("use-history") or "true") == "true" docs, likethis, authored, recent, others, qstring = find_related(doc, True, use_history=use_history, use_authorship=use_authorship) if response.xml_request or (params.get("format") == "xml"): retval = getDOMImplementation().createDocument(None, "result", None) e = retval.createElement('paradigm') e.setAttribute('id', doc_id) e.setAttribute("title", _safe_title(doc)) e.setAttribute('use-history', use_history and "true" or "false") e.setAttribute('use-authorship', use_authorship and "true" or "false") retval.documentElement.appendChild(e) g = retval.createElement('similar') g.setAttribute('query', qstring) for doc, score in likethis: e = retval.createElement('document') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = _safe_title(doc) note("title is %s", repr(title)) e.setAttribute('title', title) g.appendChild(e) retval.documentElement.appendChild(g) g = retval.createElement('co-authored') for doc, score in authored: e = retval.createElement('document') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = _safe_title(doc) note("title is %s", repr(title)) e.setAttribute('title', title) g.appendChild(e) retval.documentElement.appendChild(g) g = retval.createElement('recent') for doc, score in recent: e = retval.createElement('document') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = _safe_title(doc) note("title is %s", repr(title)) e.setAttribute('title', title) g.appendChild(e) retval.documentElement.appendChild(g) g = retval.createElement('linked') for doc, score in others: e = retval.createElement('document') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = _safe_title(doc) note("title is %s", repr(title)) e.setAttribute('title', title) g.appendChild(e) retval.documentElement.appendChild(g) g = retval.createElement('combined') for doc, score in docs: e = retval.createElement('document') e.setAttribute('doc_id', doc.id) e.setAttribute('score', str(score)) title = _safe_title(doc) note("title is %s", repr(title)) e.setAttribute('title', title) g.appendChild(e) retval.documentElement.appendChild(g) fp = response.open("application/xml;charset=utf-8") fp.write(retval.toxml("UTF-8") + "\n") fp.close() elif params.get("format") == "ziplist": no_icon = (params.get("no-icon") == "true") include_doc_functions = params.get("include-doc-functions") tpath = tempfile.mktemp() zf = zipfile.ZipFile(tpath, "w") try: try: for doc, score in docs: zf.writestr(doc.id.encode("ASCII", "strict") + "/", "") zf.writestr(doc.id.encode("ASCII", "strict") + "/score", str(score)) if not no_icon: zf.writestr(doc.id.encode("ASCII", "strict") + "/first.png", doc.document_icon()) if include_doc_functions: zf.writestr(doc.id.encode("ASCII", "strict") + "/doc_functions.txt", get_doc_functions(doc)) zf.writestr(doc.id.encode("ASCII", "strict") + "/metadata.txt", doc.metadata_text()) finally: zf.close() response.return_file("application/x-uplib-searchresults-zipped", tpath, True) except: msg = string.join(traceback.format_exception(*sys.exc_info())) os.remove(tpath) note("Exception building zipfile for search results:\n%s", msg) response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "Can't build zipfile for search results:\n%s\n" % htmlescape(msg)) else: fp = response.open() title = "Documents related to %s" % repr(doc.get_metadata("title") or doc.id) fp.write("<head><title>%s</title>\n" % htmlescape(title)) fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n') fp.write('<link REL="SHORTCUT ICON" HREF="/favicon.ico">\n') fp.write('<link REL="ICON" type="image/ico" HREF="/favicon.ico">\n') issue_javascript_head_boilerplate(fp) issue_title_styles(fp) fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR) issue_menu_definition(fp) fp.write('<h2>%s</h2><br>\n' % htmlescape(title)) show_abstract(repo, doc, fp, True, showpagesearch=False) fp.write('<p><hr><b>Context documents:</b><br>') for related, score in docs: show_title (fp, related, {related.id: score}, True) fp.write('<p><hr><b>Like this:</b><br>') for related, score in likethis: show_title (fp, related, { related.id: score }, True) fp.write('<p><i>query was: %s</i>\n' % htmlescape(qstring)) fp.write('<p><hr><b>Co-authored:</b><br>') for related, score in authored: show_title (fp, related, { related.id: score }, True) fp.write('<p><hr><b>Recently consulted:</b><br>') for related, score in recent: show_title (fp, related, { related.id: score }, True) fp.write('<p><hr><b>Other considerations:</b><br>') for explanation, related, score in others: fp.write('<p><i>%s</i><br>\n' % htmlescape(explanation)) show_title (fp, related, { related.id: score }, True) fp.write('</body>\n') fp.close()