def __setstate__(self, state):
     # we do this import here so that we can import 'document' without
     # causing a recursive import of 'repository'
     from uplib.repository import TheRepository
     self.id = state[0]
     self.repo = TheRepository
     self.__metadata = None
     self.__folder = self.repo.doc_location(self.id)
     self.__addtime = id_to_time(self.id)
     self.__pdforig = None
    def __init__ (self, repo, id):

        self.repo = repo
        self.id = id
        self.__metadata = None
        self.__folder = repo.doc_location(id)
        self.__addtime = id_to_time(id)
        self.__pdforig = None
        self.__category_strings = None
        self.__citation = None
        self.__date = None
        self.__bboxes = {}
        self.__links = {}
        self.__removed_links = {}
        self.__touch_time = None
        self.__pagenumbering = None
        self.__icon_size = None
    def read_document (doc, categories, collections, authors):

        def figure_date(datestring):
            d2 = parse_date(datestring)
            if (not d2) or (sum(d2) == 0):
                return 0
            return d2[0] * (13 * 32) + d2[1] * 13 + d2[2]

        docdata = {'id': doc.id, 'rloc': 0}
        mdata = doc.get_metadata()
        docdata['title'] = mdata.get('title', "")
        docdata['page-count'] = int(mdata.get('page-count', 1))
        date = mdata.get('date')
        if date:
            docdata['date'] = figure_date(date);
        else:
            docdata['date'] = 0
        docdata['addtime'] = int(id_to_time(doc.id))
        # we don't really know the reftime (FIXME) but we'll use the document add time as an approximation
        docdata['reftime'] = docdata['addtime']
        docdata['categories'] = []
        cstring = mdata.get('categories', "")
        if cstring:
            for category in split_categories_string(cstring):
                if not category in categories:
                    categories[category] = { 'rloc': 0, 'docs': [ doc.id, ], 'name': category }
                else:
                    categories[category]['docs'].append(doc.id)
                docdata['categories'].append(category)
        docdata['authors'] = []
        auths = mdata.get('authors', "").split(" and ")
        for auth in auths:
            if auth:
                authname = figure_author_name(auth)
                if not authname in authors:
                    authors[authname] = { 'rloc': 0, 'docs': [ doc.id, ], 'name': authname }
                else:
                    authors[authname]['docs'].append(doc.id)
                docdata['authors'].append(authname)
        return docdata
    def build_html_abstract_display (self, doc, icon_cid):

        fp = StringIO()
        dict = doc.get_metadata()
        pubdate = dict.get("date")
        date = re.sub(" 0|^0", " ",
                      time.strftime("%d %b %Y, %I:%M %p",
                                    time.localtime(id_to_time(doc.id))))
        name = doc.id
        page_count = dict.get('page-count')
        summary = '<i>(No summary available.)</i>'
        if dict:
            if dict.has_key('title'):
                name = dict.get('title')
            elif dict.has_key('name'):
                name = '[' + dict.get('name') + ']'
        fp.write(u'<table border=0><tr><td>')
        fp.write(u'<center>')
        fp.write(u'<a href="https://%s:%d/action/basic/dv_show?doc_id=%s" border=0>' % (self.ip, doc.repo.secure_port(), doc.id))
        fp.write(u'<img src="cid:%s">' % icon_cid)
        fp.write(u'</a><p><small><font color="%s">(%s)</font></small></center></td><td>&nbsp;</td>'
                 % (STANDARD_DARK_COLOR, date))
        fp.write(u'<td valign=top><h3>%s</h3>' % htmlescape(name))
        if dict.has_key(u'authors') or pubdate:
            fp.write(u'<p><small>')
            if dict.has_key('authors'):
                fp.write(u'<b>&nbsp;&nbsp;&nbsp;&nbsp;%s</b>'
                         % (re.sub(' and ', ', ', dict['authors'])))
            if pubdate:
                formatted_date = format_date(pubdate, True)
                fp.write(u'&nbsp;&nbsp;&nbsp;&nbsp;<i><font color="%s">%s</font></i>' % (STANDARD_DARK_COLOR,
                                                                                        formatted_date))
            fp.write(u'</small>\n')
        if dict.has_key('comment'):
            summary = htmlescape(dict.get('comment', ''))
        elif dict.has_key('abstract'):
            summary = "<i>" + htmlescape(dict.get('abstract', '')) + '</i>'
        elif dict.has_key('summary'):
            summary = '<font color="%s">' % STANDARD_DARK_COLOR + htmlescape(dict.get('summary')) + '</font>'
        fp.write(u'<P>%s' % summary)
        if page_count:
            fp.write(u'<small><i><font color="%s"> &middot; (%s page%s)'
                     % (STANDARD_DARK_COLOR, page_count, ((int(page_count) != 1) and "s") or ""))
            fp.write(u'</font></i></small>\n')
        cstrings = doc.get_category_strings()
        fp.write(u'<p>Categories:  ')
        if cstrings:
            fp.write(string.join([htmlescape(s) for s in cstrings], u' &middot; '))
        else:
            fp.write('(none)')
        typ = doc.get_metadata("apparent-mime-type")
        if typ:
            mtype = ' &middot; <small>%s</small>' % typ
        else:
            mtype = ''
        fp.write(u'<p><a href="https://%s:%s/action/externalAPI/fetch_original?doc_id=%s&browser=true"><font color="%s">(Original%s)</font></a>'
                 % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR, mtype))
        fp.write(u' &middot; <a href="https://%s:%s/action/basic/doc_pdf?doc_id=%s"><font color="%s">(PDF)</font></a>'
                 % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR))
        if not mtype.lower().startswith("text/html"):
            fp.write(u' &middot; <a href="https://%s:%s/action/basic/doc_html?doc_id=%s"><font color="%s">(HTML)</font></a>'
                     % (self.ip, doc.repo.secure_port(), doc.id, STANDARD_DARK_COLOR))
        fp.write(u'</td></tr></table>')
        d = fp.getvalue()
        fp.close()
        return d, name
    def get_msg_as_email (self, doc):
        try:
            mime_type = doc.get_metadata("apparent-mime-type")
            if mime_type == "message/rfc822":
                f = os.path.join(doc.folder(), "originals")
                filepath = os.path.join(f, os.listdir(f)[0])
                fp = open(filepath, 'r')
                s = fp.read()
                fp.close()
                msg = message_from_string(s)
            else:

                def make_header(name, value):
                    try:
                        v = value.encode("US-ASCII")
                        charset = "US-ASCII"
                    except:
                        v = value.encode("UTF-8")
                        charset = "UTF-8"
                    return name, email.Header.Header(v, charset, 77, name).encode()

                def build_icon(doc):
                    icon = doc.document_icon()
                    img_part = email.Message.Message()
                    img_part.set_type("image/png")
                    cid = "%s.%s.%s.icon" % (self.ip, doc.repo.secure_port(), doc.id)
                    img_part.add_header("Content-ID", cid)
                    img_part.add_header("Content-Transfer-Encoding", "base64")
                    img_part.set_payload(base64.encodestring(icon))
                    return img_part

                def build_description(doc, display):
                    desc_part = email.Message.Message()
                    desc_part.set_type("text/html")
                    desc_part.add_header("Content-Transfer-Encoding", "quoted-printable")
                    desc_part.set_payload(quopri.encodestring('<html><body bgcolor="%s">' % STANDARD_BACKGROUND_COLOR +
                                                              display.encode('UTF-8') + "</body></html>\n"), "UTF-8")
                    return desc_part

                icon_payload = build_icon(doc)
                display, name = self.build_html_abstract_display(doc, icon_payload.get("Content-ID"))
                msg = email.Message.Message()
                msg.set_type("multipart/related;boundary=%s%s%s%s" % (self.ip, doc.repo.secure_port(), doc.id, long(time.time())))
                msg.add_header(*make_header("Message-ID", "%s:%s:%s" % (self.ip, doc.repo.secure_port(), doc.id)))
                d = doc.get_date()
                if d:
                    try:
                        d = email.Utils.formatdate(time.mktime((d[0], (d[1] or 1), (d[2] or 1), 0, 0, 0, 0, 1, -1,)))
                    except:
                        d = email.Utils.formatdate(id_to_time(doc.id))
                else:
                    d = email.Utils.formatdate(id_to_time(doc.id))
                msg.add_header(*make_header("Date", d))
                msg.add_header(*make_header("Subject", name))
                authors = doc.get_metadata("authors")
                if authors:
                    authors = authors.replace(" and ", ", ").replace('"', '\\"').replace('\r', '\\\r').replace('\\', '\\\\')
                    msg.add_header(*make_header("From", '"' + authors + '"'))
                body_payload = build_description(doc, display)
                msg.attach(body_payload)
                msg.attach(icon_payload)
                # note("msg is:\n%s", str(msg))
            return msg
        except:
            note("Exception getting document %s as email:\n%s", doc.id, string.join(traceback.format_exception(*sys.exc_info())))
            return None