def __setstate__(self, state): # we do this import here so that we can import 'document' without # causing a recursive import of 'repository' from uplib.repository import TheRepository self.id = state[0] self.repo = TheRepository self.__metadata = None self.__folder = self.repo.doc_location(self.id) self.__addtime = id_to_time(self.id) self.__pdforig = None
def __init__ (self, repo, id): self.repo = repo self.id = id self.__metadata = None self.__folder = repo.doc_location(id) self.__addtime = id_to_time(id) self.__pdforig = None self.__category_strings = None self.__citation = None self.__date = None self.__bboxes = {} self.__links = {} self.__removed_links = {} self.__touch_time = None self.__pagenumbering = None self.__icon_size = None
def read_document (doc, categories, collections, authors): def figure_date(datestring): d2 = parse_date(datestring) if (not d2) or (sum(d2) == 0): return 0 return d2[0] * (13 * 32) + d2[1] * 13 + d2[2] docdata = {'id': doc.id, 'rloc': 0} mdata = doc.get_metadata() docdata['title'] = mdata.get('title', "") docdata['page-count'] = int(mdata.get('page-count', 1)) date = mdata.get('date') if date: docdata['date'] = figure_date(date); else: docdata['date'] = 0 docdata['addtime'] = int(id_to_time(doc.id)) # we don't really know the reftime (FIXME) but we'll use the document add time as an approximation docdata['reftime'] = docdata['addtime'] docdata['categories'] = [] cstring = mdata.get('categories', "") if cstring: for category in split_categories_string(cstring): if not category in categories: categories[category] = { 'rloc': 0, 'docs': [ doc.id, ], 'name': category } else: categories[category]['docs'].append(doc.id) docdata['categories'].append(category) docdata['authors'] = [] auths = mdata.get('authors', "").split(" and ") for auth in auths: if auth: authname = figure_author_name(auth) if not authname in authors: authors[authname] = { 'rloc': 0, 'docs': [ doc.id, ], 'name': authname } else: authors[authname]['docs'].append(doc.id) docdata['authors'].append(authname) return docdata
def build_html_abstract_display (self, doc, icon_cid): fp = StringIO() dict = doc.get_metadata() pubdate = dict.get("date") date = re.sub(" 0|^0", " ", time.strftime("%d %b %Y, %I:%M %p", time.localtime(id_to_time(doc.id)))) name = doc.id page_count = dict.get('page-count') summary = '<i>(No summary available.)</i>' if dict: if dict.has_key('title'): name = dict.get('title') elif dict.has_key('name'): name = '[' + dict.get('name') + ']' fp.write(u'<table border=0><tr><td>') fp.write(u'<center>') fp.write(u'<a href="https://%s:%d/action/basic/dv_show?doc_id=%s" border=0>' % (self.ip, doc.repo.secure_port(), doc.id)) fp.write(u'<img src="cid:%s">' % icon_cid) fp.write(u'</a><p><small><font color="%s">(%s)</font></small></center></td><td> </td>' % (STANDARD_DARK_COLOR, date)) fp.write(u'<td valign=top><h3>%s</h3>' % htmlescape(name)) if dict.has_key(u'authors') or pubdate: fp.write(u'<p><small>') if dict.has_key('authors'): fp.write(u'<b> %s</b>' % (re.sub(' and ', ', ', dict['authors']))) if pubdate: formatted_date = format_date(pubdate, True) fp.write(u' <i><font color="%s">%s</font></i>' % (STANDARD_DARK_COLOR, formatted_date)) fp.write(u'</small>\n') if dict.has_key('comment'): summary = htmlescape(dict.get('comment', '')) elif dict.has_key('abstract'): summary = "<i>" + htmlescape(dict.get('abstract', '')) + '</i>' elif dict.has_key('summary'): summary = '<font color="%s">' % STANDARD_DARK_COLOR + htmlescape(dict.get('summary')) + '</font>' fp.write(u'<P>%s' % summary) if page_count: fp.write(u'<small><i><font color="%s"> · (%s page%s)' % (STANDARD_DARK_COLOR, page_count, ((int(page_count) != 1) and "s") or "")) fp.write(u'</font></i></small>\n') cstrings = doc.get_category_strings() fp.write(u'<p>Categories: ') if cstrings: fp.write(string.join([htmlescape(s) for s in cstrings], u' · ')) else: fp.write('(none)') typ = doc.get_metadata("apparent-mime-type") if typ: mtype = ' · <small>%s</small>' % typ else: mtype = '' fp.write(u'<p><a href="https://%s:%s/action/externalAPI/fetch_original?doc_id=%s&browser=true"><font color="%s">(Original%s)</font></a>' % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR, mtype)) fp.write(u' · <a href="https://%s:%s/action/basic/doc_pdf?doc_id=%s"><font color="%s">(PDF)</font></a>' % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR)) if not mtype.lower().startswith("text/html"): fp.write(u' · <a href="https://%s:%s/action/basic/doc_html?doc_id=%s"><font color="%s">(HTML)</font></a>' % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR)) fp.write(u'</td></tr></table>') d = fp.getvalue() fp.close() return d, name
def get_msg_as_email (self, doc): try: mime_type = doc.get_metadata("apparent-mime-type") if mime_type == "message/rfc822": f = os.path.join(doc.folder(), "originals") filepath = os.path.join(f, os.listdir(f)[0]) fp = open(filepath, 'r') s = fp.read() fp.close() msg = message_from_string(s) else: def make_header(name, value): try: v = value.encode("US-ASCII") charset = "US-ASCII" except: v = value.encode("UTF-8") charset = "UTF-8" return name, email.Header.Header(v, charset, 77, name).encode() def build_icon(doc): icon = doc.document_icon() img_part = email.Message.Message() img_part.set_type("image/png") cid = "%s.%s.%s.icon" % (self.ip, doc.repo.secure_port(), doc.id) img_part.add_header("Content-ID", cid) img_part.add_header("Content-Transfer-Encoding", "base64") img_part.set_payload(base64.encodestring(icon)) return img_part def build_description(doc, display): desc_part = email.Message.Message() desc_part.set_type("text/html") desc_part.add_header("Content-Transfer-Encoding", "quoted-printable") desc_part.set_payload(quopri.encodestring('<html><body bgcolor="%s">' % STANDARD_BACKGROUND_COLOR + display.encode('UTF-8') + "</body></html>\n"), "UTF-8") return desc_part icon_payload = build_icon(doc) display, name = self.build_html_abstract_display(doc, icon_payload.get("Content-ID")) msg = email.Message.Message() msg.set_type("multipart/related;boundary=%s%s%s%s" % (self.ip, doc.repo.secure_port(), doc.id, long(time.time()))) msg.add_header(*make_header("Message-ID", "%s:%s:%s" % (self.ip, doc.repo.secure_port(), doc.id))) d = doc.get_date() if d: try: d = email.Utils.formatdate(time.mktime((d[0], (d[1] or 1), (d[2] or 1), 0, 0, 0, 0, 1, -1,))) except: d = email.Utils.formatdate(id_to_time(doc.id)) else: d = email.Utils.formatdate(id_to_time(doc.id)) msg.add_header(*make_header("Date", d)) msg.add_header(*make_header("Subject", name)) authors = doc.get_metadata("authors") if authors: authors = authors.replace(" and ", ", ").replace('"', '\\"').replace('\r', '\\\r').replace('\\', '\\\\') msg.add_header(*make_header("From", '"' + authors + '"')) body_payload = build_description(doc, display) msg.attach(body_payload) msg.attach(icon_payload) # note("msg is:\n%s", str(msg)) return msg except: note("Exception getting document %s as email:\n%s", doc.id, string.join(traceback.format_exception(*sys.exc_info()))) return None