def rerip (repo, response, params): ids = params.get('doc_id') if not ids: response.error(HTTPCodes.BAD_REQUEST, "No document(s) specified.") return if ids and type(ids) == type(''): ids = ( ids, ) fp = None for id in ids: if repo.valid_doc_id(id): doc = repo.get_document(id) try: update_document_metadata_from_acm_diglib(doc.folder()) doc.recache() if len(ids) == 1: response.redirect("/action/basic/doc_meta?doc_id=" + doc.id) else: if not fp: fp = response.open() fp.write('<p>Updated metadata for document \"%s\".\n' % doc.get_metadata("title")) except: msg = string.join(traceback.format_exception(*sys.exc_info())) note("Exception running ACMBibTexRipper:\n" + msg) if not fp: fp = response.open() fp.write("<p><pre>Exception running looking for document metadata in the ACM Digital Library:\n" + msg + "</pre>") else: if not fp: fp = response.open() fp.write("** Doc ID \"%s\" is not a valid doc ID.\n" % id)
def page_index_to_page_number_string (self, page_index): pn = self.get_page_numbering() if (type(pn) == types.IntType): return str(pn + page_index) elif (type(pn) == types.ListType): for pagerange, numbering_type, fpn in pn: first_page = pagerange[0] if len(pagerange) > 1: last_page = pagerange[1] else: last_page = pagerange[0] if (page_index >= first_page) and (page_index <= last_page): if numbering_type == 'b': return "" elif numbering_type == 'd': return str(page_index - first_page + fpn) elif numbering_type == 'r': # roman numeral value = page_index - first_page + fpn from roman import toRoman return toRoman(value).lower() else: return "" return "" else: note("document %s: bad page numbering %s", self.id, pn) self.__pagenumbering = 1 return str(page_index + 1)
def fork_request(self, fn, *args): note(3, "forking %s in new thread...", fn) id = uthread.start_new_thread(run_fn_in_new_thread, (self, fn, args), "handling request %s %s at %s" % ( self.request.request.method, self.request.request.uri, time.ctime())) raise ForkRequestInNewThread(id)
def do_request(self): try: self.current_request.handle_request() except: typ, v, tb = sys.exc_info() note("exception handling request %s:\n%s", repr(self), string.join(traceback.format_exception(typ, v, tb))) raise v
def rerip (self, changed_fields=None, wait=False): try: import thread def rip_it (self): reruns = [] for ripper in self.repo.rippers(): try: if (ripper.rerun_after_metadata_changes(changed_fields=changed_fields) or any([ripper.rerun_after_other_ripper(x.name()) for x in reruns])): ripper.rip(self.folder(), self.id) reruns.append(ripper) except: note("Exception running %s on %s:\n%s", ripper, self, ''.join(traceback.format_exception(*sys.exc_info()))) self.recache() newthread = uthread.start_new_thread(rip_it, (self,)) if wait: newthread.join() return newthread except: type, value, tb = sys.exc_info() note("%s", traceback.format_exception(type, value, tb))
def htmlize_folder (repo, path, doc_id): update_configuration() if (os.path.isdir(path) and os.path.isdir(os.path.join(path, "thumbnails"))): try: do_HTML(path, os.path.join(path, "html"), doc_id, repo.secure_port()) except: note(0, "exception raised in do_HTML:\n%s\n", ''.join(traceback.format_exception(*sys.exc_info())))
def create_temporary_icons (metadata, dirpath, output_dir, params): global TEMPORARY_BACKGROUND, UNDER_CONSTRUCTION thumbnails_path = output_dir os.mkdir(thumbnails_path) note("thumbnails_path is %s", thumbnails_path) title = metadata.get("title") document_icon = Image.new("RGB", (150, 194), TEMPORARY_BACKGROUND) draw = ImageDraw.Draw(document_icon) draw.line((0,0) + document_icon.size, LEGEND_COLOR) draw.line((0, document_icon.size[1], document_icon.size[0], 0), LEGEND_COLOR) draw.rectangle((0, 0, document_icon.size[0]-1, document_icon.size[1] -1), outline=LEGEND_COLOR) if title: document_icon = add_legend(document_icon, ("(255, 255, 255)" + title,)) document_icon.save(os.path.join(thumbnails_path, "first.png"), "PNG") page_1_big = Image.new("RGB", (425, 550), TEMPORARY_BACKGROUND) legend = [] legend.append("(255,255,255)[temporary document]") if title: legend.append("(0,255,0)%s" % title) page_1_big = add_legend(page_1_big, legend) page_1_big.save(os.path.join(thumbnails_path, "big1.png"), "PNG") page_1_small = Image.new("RGB", (85, 110), TEMPORARY_BACKGROUND) add_page_no (page_1_small, (5, 5), "1") page_1_small.save(os.path.join(thumbnails_path, "1.png"), "PNG") update_metadata(os.path.join(dirpath, "metadata.txt"), {"page-count" : "1", "tiff-width" : 2550, "images-width" : 2550, "images-size" : "2550,3300", "cropping-bounding-box" : "0,0;2550,3300", "big-thumbnail-size" : "425,550", "small-thumbnail-size" : "85,110", "small-thumbnail-scaling" : ("%f" % (float(1)/float(30))), "images-height" : "3300", "tiff-height" : "3300", })
def retry_folder (repo, folderpath, id): try: if os.path.exists(os.path.join(folderpath, "LOCK")): os.unlink(os.path.join(folderpath, "LOCK")) # if CODETIMER_ON: # code_timer.Init() # code_timer.CreateTable("uplib") # code_timer.CodeTimerOn() # else: # code_timer.CodeTimerOff() _finish_inclusion (repo, folderpath, id) # if CODETIMER_ON: # noteOut = StringIO.StringIO() # noteOut.write("\nCode Timer statistics (what took time, in milliseconds):\n") # code_timer.PrintTable(noteOut, "uplib") # noteOut.write("\n") # noteOutString = noteOut.getvalue() # note(3, noteOutString) except AbortDocumentIncorporation, x: # ripper signalled to stop adopting this document, for good note(2, "AbortDocumentIncorporation exception on %s: %s", x.id, x.message) if (x.id == id): shutil.rmtree(folderpath) remove_from_index(repo.index_path(), id)
def run_fn_in_new_thread(resp, fn, args): try: fn(*args) except: excn = sys.exc_info() note(0, "Exception calling %s with %s:\n%s", fn, args, ''.join(traceback.format_exception(*excn))) resp.error(HTTPCodes.INTERNAL_SERVER_ERROR, ''.join(traceback.format_exception(*excn)), "text/plain")
def format_event(self, outputfile, event): if (self._CLOCKIMAGE is None): from PIL import Image self._CLOCKIMAGE = im = Image.open(_CLOCKIMAGEFILEPATH) self._CLOCKIMAGE.load() note(3, "vcard _CLOCKIMAGE icon is %s", self._CLOCKIMAGE) # new 3x5 card c = canvas.Canvas(outputfile, pagesize=(5 * inch, 3 * inch)) border = .3 * inch x = 0 + border y = (3 * inch) - border width = (5 * inch) - (2 * border) iconsize = 0.5 * inch c.drawImage(ImageReader(self._CLOCKIMAGE), (5 * inch - border - iconsize), (3 * inch - border - iconsize), iconsize, iconsize) y = self.format_name(c, x, y, framesize=(width, y - border)) y = self.format_date_and_time(c, x, y, framesize=(width, y - border)) y = self.format_location(c, x, y, framesize=(width, y - border)) y = self.format_description(c, x, y, framesize=(width, y - border)) y = self.format_attendees(c, x, y, framesize=(width, y - border)) c.save()
def _initialize_windows_timezones(cls): def _windows_timezones (filename): """Returns mapping of Windows timezone names onto Olson names. This uses the Unicode Consortium's supplemental data file, available at <http://unicode.org/cldr/data/common/supplemental/supplementalData.xml>. @param filename the file to read @type filename a string filename path @return a mapping of Windows timezone names to Olson timezone names @rtype dict(string->string) """ import xml.dom.minidom mapping = {} d = xml.dom.minidom.parse(filename) if d: windows_sections = [x for x in d.getElementsByTagName("mapTimezones") if ( x.hasAttribute("type") and (x.getAttribute("type") == u"windows"))] for section in windows_sections: # probably only one section for node in section.getElementsByTagName("mapZone"): if (node.hasAttribute("other") and node.hasAttribute("type")): mapping[node.getAttribute("other")] = node.getAttribute("type") return mapping filepath = os.path.join(os.path.dirname(__file__), "windows-timezones.xml") for key, tzname in _windows_timezones(filepath).items(): tz = zoneinfo.gettz(tzname) if tz: vobject.icalendar.registerTzid(key, tz) note(5, "registered %s for '%s'", tz, key) cls.INITIALIZED = True
def __init__(self, repository, name=None, initial_values = ()): """Initialize an instance. :param repository: the repository we're running in :type repository: ``repository.Repository`` :param name: unique ID to use as the collection name. If not specified, one is generated. :type name: string :param initial_values: a sequence of values to start off the collection with. Defaults to empty list. :type initial_values: sequence of either Document or Collection instances """ self.__xitems = {} # maps id or name to element. Elements may be documents or other collections. self.scantime = repository.mod_time() # last time looked at self.repository = repository self.id = name or create_new_id() self.storage_path = os.path.join(repository.collections_folder(), self.id) if os.path.exists(self.storage_path): try: fp = open(self.storage_path, 'rb') self.load(fp) fp.close() except: type, value, tb = sys.exc_info() note(2, "Couldn't load collection %s:\n%s", self.id, ''.join(traceback.format_exception(type, value, tb))) elif initial_values: for item in initial_values: if isinstance(item, Document): self.__xitems[item.id] = DocumentPointer(item) elif isinstance(item, Collection): self.__xitems[item.name()] = CollectionPointer(item.id, item)
def load(self, fp): """Given a file produced by "store()", read in and populate SELF with elements.""" lines = fp.readlines() for line in lines: classname, id, name = self.parse_storage_line(line) clss = find_class(classname) if clss and issubclass(clss, Document): if self.repository.valid_doc_id(id): self[name] = self.repository.get_document(id) else: note("invalid doc ID %s found in collection file %s", id, self) elif clss and issubclass(clss, Collection): c = self.repository.get_collection(id, true) if not c: c = clss(self.repository, id) self.repository.add_collection(id, c) self[name] = c self.add_storage_extras(c, line) else: note("invalid class <%s> found in collection file %s", classname, self)
def repo_index (repository, response, params): """ Fetch the repository index of the document. This may have the usually harmless side-effect of saving the repository, if no index for the repository has been created yet. :param modtime: Optional. If supplied, and if the repository has not been modified since this time, this call wil return an HTTP "not modified" code, instead of the repository index. :type modtime: a string containing a floating point number giving seconds past the Python (UNIX) epoch. :return: the repository index :rtype: a binary structure containing a 'sketch' of the current state of the repository. See the ARCHITECTURE document for more information on the structure of this data. Uses MIME type "application/x-uplib-repository-index". """ modtime = params.get("modtime") if modtime is not None: modtime = float(modtime.strip()) note("modtime is %s, repository.mod_time() is %s", modtime, repository.mod_time()) if repository.mod_time() <= modtime: response.error(HTTPCodes.NOT_MODIFIED, "Not modified since %s" % time.ctime(modtime)) return fname = os.path.join(repository.overhead_folder(), "index.upri") if not os.path.exists(fname): repository.save(force = True) if os.path.exists(fname): response.return_file("application/x-uplib-repository-index", fname, false) else: response.error(HTTPCodes.NOT_FOUND, "No index file for this repository.")
def prepare(self): global TOP_LEVEL_ACTION super(ActionHandler, self).prepare() self.set_header('Server', "UpLib/%s (Tornado %s)" % (self.repo.get_version(), TornadoVersion)) self.angel_action = None scheme, netloc, path, params, query, fragment = urlparse.urlparse(self.request.uri) parts = [x for x in path.split('/') if x.strip()] if len(parts) == 3 and parts[0] == 'action': self.angel_action = (parts[1:], params, query, fragment) elif (len(parts)==4 and parts[0] == 'action' and parts[3][0:3]=='seq'): note(2, "in match with 4 parts, parts = %s", str(parts)) self.angel_action = (parts[1:3], params, query, fragment) elif (len(parts) > 3) and (parts[0] == 'action') and (is_hierarchical_extension(parts[1])): # to support extensions with hierarchical static elements, like GWT-generated UIs note(4, "hierarchical request, parts = %s", str(parts)) self.angel_action = ((parts[1], '/'.join(parts[2:])), params, query, fragment) elif path == '/': self.redirect('/'.join(("/action",) + TOP_LEVEL_ACTION)) else: raise HTTPError(HTTPCodes.BAD_REQUEST, "Invalid /action request %s received", self.request.uri)
def figure_author_name (basename): def clean_token(t): v = t.strip() if v[-1] == ",": v = v[:-1] return v honorifics = ("MD", "M.D.", "PhD", "Ph.D.", "Jr.", "Sr.", "II", "III", "IV", "V", "MPA") tokens = [clean_token(x) for x in basename.strip().split(' ') if x.strip()] if not tokens: note("Author name \"%s\" => %s", basename, tokens) return "" v = tokens[-1] h = "" while v in honorifics: h = h + ((h and " ") or "") + v tokens = tokens[:-1] v = tokens[-1] if len(tokens) > 2 and (tokens[-2] in ("van", "de", "von")): v = tokens[-2] + " " + v tokens = tokens[:-1] if tokens[:-1]: v = v + ", " + string.join(tokens[:-1]) if h: v = v + ", " + h return v
def _common(self): global REPO_EXTENSION_DIRS, ALLOW_OLD_EXTENSIONS if not REPO_EXTENSION_DIRS: conf = configurator.default_configurator() REPO_EXTENSION_DIRS = PATH_SEPARATOR.join(( os.path.join(self.repo.overhead_folder(), "extensions", "active"), os.path.join(conf.get('uplib-lib'), 'site-extensions'))) ALLOW_OLD_EXTENSIONS = conf.get_bool("allow-old-extensions") module_name, function_name = self.angel_action[0] exception = None callable = None try: callable = find_action_function(module_name, function_name, self.repo.get_actions_path()) except: t, v, b = sys.exc_info() exception = ''.join(traceback.format_exception(t, v, b)) note(0, "find_action_function(%s/%s) raised an exception:\n%s", module_name, function_name, exception) if callable: field_values = request_to_field_dict(self.request) or {} try: resp = response(self, self.current_user is not None) callable(self.repo, resp, field_values) return True except ForkRequestInNewThread, x: note(4, "forked off request") self._auto_finish = False return False except:
def format_name (c, name, organization, x, y): """Draw the person's name""" note("name is %s, organization is %s, x,y = %s,%s", name, organization, x, y) if name: text = name.value # define a large font c.setFont("Helvetica-Bold", 14) # choose some colors c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.drawString(x, y, text) if organization: text = ", ".join(organization.value) c.setFont("Helvetica", 12) c.drawString(x + (0.5 * inch), y - (0.2 * inch), text) elif organization: text = organization.value # define a large font c.setFont("Helvetica-Bold", 14) # choose some colors c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.drawString(x, y, text)
def handle_malformed_starttag(cacher): lineno, offset = cacher.getpos() endpos = cacher.rawdata.find(">", i) cacher.updatepos(i, endpos) tagtext = cacher.rawdata[i : endpos + 1] note(3, "Malformed start tag '%s' at line %s, column %s", tagtext, lineno, offset) return ((endpos < 0) and endpos) or (endpos + 1)
def format_phone (c, phone, x, y): """Draw the person's name""" if hasattr(phone, "params"): tp = phone.params.get("TYPE") text = phone.value note("phone is %s, tp is %s", text, tp and [q.lower() for q in tp]) if text.isdigit(): if len(text) == 10: text = '(' + text[:3] + ') ' + text[3:6] + '-' + text[6:] elif len(text) == 7: text = text[:3] + '-' + text[3:] label = "phone: " labelFontName = "Helvetica-Oblique" labelFontSize = 12 textFontName = "Courier-Bold" textFontSize = 12 # choose some colors c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setFont(labelFontName, labelFontSize) c.drawString(x, y, label) x += c.stringWidth(label, labelFontName, labelFontSize) c.setFont(textFontName, textFontSize) c.drawString(x, y, text) x += c.stringWidth(text + " ", textFontName, textFontSize) if tp: c.setFont(labelFontName, labelFontSize) c.drawString(x, y, '(' + ' '.join([q.lower() for q in tp]) + ')')
def starttls(self): if not ssl: raise RuntimeError("No SSL support in this Python") if not self.socket: raise RuntimeError("No socket to wrap with SSL") if isinstance(self.socket, ssl.SSLSocket): raise RuntimeError("Already wrapped with SSL") note(3, "Starting TLS handshake...") # remove the channel handler from the event loop self.del_channel() # now wrap with an SSL context try: wrapped_socket = ssl.wrap_socket( self.socket, server_side=True, certfile=self.server.mailcontext.server_certificate_file(), do_handshake_on_connect=False, ) except ssl.SSLError, err: # Apple Mail seems to do one connect just to check the certificate, # then it drops the connection if err.args[0] == ssl.SSL_ERROR_EOF: self.handle_close() return else: raise
def format_email (c, email, x, y): """Draw the person's name""" if hasattr(email, "params"): tp = [q.lower() for q in email.params.get("TYPE") if (q.lower() not in (u"internet",))] else: tp = None text = email.value note('email is %s, tp is %s, x is %s, y is %s', text, tp, x, y) label = "email: " labelFontName = "Helvetica-Oblique" labelFontSize = 12 textFontName = "Courier-Bold" textFontSize = 12 # choose some colors c.setStrokeColorRGB(0, 0, 0) c.setFillColorRGB(0, 0, 0) c.setFont(labelFontName, labelFontSize) c.drawString(x, y, label) x += c.stringWidth(label, labelFontName, labelFontSize) c.setFont(textFontName, textFontSize) c.drawString(x, y, text) x += c.stringWidth(text + " ", textFontName, textFontSize) if tp: c.setFont(labelFontName, labelFontSize) c.drawString(x, y, '(' + ' '.join(tp) + ')')
def page_bboxes (self, pageno): filename = os.path.join(self.__folder, "thumbnails", "%d.bboxes" % pageno) note(4, "pagetext for page %s/%d %s exist", self.id, pageno, (os.path.exists(filename) and "does") or "doesn't") if os.path.exists(filename): return open(filename, 'rb').read() else: return None
def __init__(self, doc, options): note("processing card %s", doc.fn.value) note(4, "card data is %s", doc.prettyPrint()) self.pdffile = None FakePDFDoc.__init__(self, doc, options) self.checkocr = false self.__options = options.copy() self.__card = doc self.__timestamp = None rev = doc.contents.get("REV") or [] if rev: timestamps = [] for x in rev: # v is a timestamp try: timestamp = time.strptime(x.value, "%Y%m%dT%H%M%SZ") timestamps.append(timestamp) except: try: timestamp = time.strptime(x.value, "%Y%m%d") timestamps.append(timestamp) except: pass timestamps.sort() self.__timestamp = timestamps[-1]
def get_page_numbering (self): if self.__pagenumbering is None: page_numbers = self.get_metadata("page-numbers") if page_numbers: self.__pagenumbering = [] for range in page_numbers.split(";"): try: if ',' in range: t, s, r = range.split(",") if not (t in "dbr"): note("document %s: Invalid page-numbers string (unrecognized numbering-type code '%s'): %s", self.id, t, page_numbers) continue elif '--' in range: t = 'd' begin, end = [int(x) for x in range.split('--')] s = begin r = "0-" + str(end - begin) elif '-' in range: t = 'd' begin, end = [int(x) for x in range.split('-')] s = begin r = "0-" + str(end - begin) self.__pagenumbering.append(([int(x) for x in r.split("-")], t, (s and int(s)) or 1,)) except: note("exception processing range string '%s' in page_numbers value '%s':\n%s" % (range, page_numbers, string.join(traceback.format_exception(*sys.exc_info())))) else: first_page = self.get_metadata("first-page-number") if first_page: self.__pagenumbering = int(first_page) else: self.__pagenumbering = 1 return self.__pagenumbering
def _common(self): self.set_header('Server', "UpLib/%s (Tornado %s)" % (self.repo.get_version(), TornadoVersion)) password = self.get_argument("password", default=None) if (authorized(self.repo, self) or ((authorized is is_logged_in) and password and self.repo.check_password(password))): # OK, authorized, so set cookie and redirect to original URI randomness = str(self.request.headers) cookie = self.repo.new_cookie(randomness) # would be nice if we could omit the secure if the web browser is local, # but we can't tell, because with stunnel in the chain, every request we # receive comes from localhost. If we could get rid of stunnel, we could # tell, though. # # cookie_str = '%s=%s; path=/' % (cookie.name(), cookie.value()) # if not (request.channel.addr[0] == '127.0.0.1' or request.channel.addr[0] == 'localhost'): # cookie_str = cookie_str + '; secure' cookie_str = '%s=%s; path=/' % (cookie.name(), cookie.value()) if not self.allow_insecure_cookies: cookie_str = cookie_str + "; Secure" note("request.channel.addr is %s, cookie_str is \"%s\"", self.request.host, cookie_str) self.set_header('Set-Cookie', cookie_str) self.set_header('Cache-Control', 'no-cache="set-cookie"') # cf RFC 2109 self.redirect(self.get_argument("originaluri", default=None) or "/") return True else: return False
def unix_mainloop(repodir, port, logfilename): try: port = int(port) except ValueError: port = None if not os.path.isdir(repodir): sys.stderr.write("Specified directory, %s, is not a directory!\n" % repodir) usage() elif port is None: sys.stderr.write("Bad port number, %s, specified.\n", portnum) usage() try: repo = start_angel(repodir, port, logfilename) except: sys.stderr.write("%s: exception initializing angel:\n%s" % ( time.ctime(), ''.join(traceback.format_exception(*sys.exc_info())))) sys.exit(1) # Finally, start up the server loop! This loop will not exit until # all clients and servers are closed. You may cleanly shut the system # down by sending SIGINT (a.k.a. KeyboardInterrupt). from uplib.plibUtil import note while True: try: asyncore.loop() except (KeyboardInterrupt, SystemExit), x: note(4, "Exited from main loop due to exception:\n%s", ''.join(traceback.format_exception(*sys.exc_info()))) raise except:
def collect_incoming_data(self, data): if self.current_request: note(4, "%s: new data: %d bytes => %s:%s...", self.addr, len(data), self.current_request.command, self.current_request.uri) else: note(4, "%s: new header data: %d bytes", self.addr, len(data)) http_server.http_channel.collect_incoming_data(self, data)
def get_page_images(self): if not os.path.exists(self.images_path()): os.makedirs(self.images_path()) imagespath = os.path.join(self.images_path(), "page00001.png") if self.artwork: try: from PIL import Image, StringIO im = Image.open(StringIO.StringIO(self.artwork[1])) if im: im.save(imagespath, "PNG") return except: note("exception trying to use PIL on MP3 cover art") pass else: # check self.metadata['music-genre'] and pick the right genre icon self.metadata['images-dpi'] = '100' genre = self.metadata.get('music-genre', 'generic') note("music-genre is %s", genre) genre_icon_path = os.path.join(UPLIB_SHARE, "images", "music", genre.lower() + ".png") if os.path.exists(genre_icon_path): shutil.copyfile(genre_icon_path, imagespath) else: genre_icon_path = os.path.join(UPLIB_SHARE, "images", "music", "generic.png") shutil.copyfile(genre_icon_path, imagespath)
def rip (self, location, doc_id): try: calculate_page_bboxes(self.repository(), location) except: msg = ''.join(traceback.format_exception(*sys.exc_info())) note("Exception processing %s:\n%s\n" % (doc_id, msg)) note("No page bounding boxes generated.") raise