def _index_file(self, request, writer, filename, mode='update'): """ index a file as it were a page named pagename Assumes that the write lock is acquired """ fs_rootpage = 'FS' # XXX FS hardcoded try: wikiname = request.cfg.interwikiname or 'Self' itemid = "%s:%s" % (wikiname, os.path.join(fs_rootpage, filename)) mtime = os.path.getmtime(filename) mtime = wikiutil.timestamp2version(mtime) if mode == 'update': query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid)) enq, mset, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ]) if docs: doc = docs[0] # there should be only one uid = doc['uid'] docmtime = long(doc['values']['mtime']) updated = mtime > docmtime logging.debug("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated)) else: uid = None updated = True elif mode == 'add': updated = True logging.debug("%s %r" % (filename, updated)) if updated: xitemid = xapdoc.Keyword('itemid', itemid) mimetype, file_content = self.contentfilter(filename) xwname = xapdoc.SortKey('wikiname', request.cfg.interwikiname or "Self") xpname = xapdoc.SortKey('pagename', fs_rootpage) xattachment = xapdoc.SortKey('attachment', filename) # XXX we should treat files like real pages, not attachments xmtime = xapdoc.SortKey('mtime', mtime) xrev = xapdoc.SortKey('revision', '0') title = " ".join(os.path.join(fs_rootpage, filename).split("/")) xtitle = xapdoc.Keyword('title', title) xmimetypes = [xapdoc.Keyword('mimetype', mt) for mt in [mimetype, ] + mimetype.split('/')] xcontent = xapdoc.TextField('content', file_content) doc = xapdoc.Document(textFields=(xcontent, ), keywords=xmimetypes + [xtitle, xitemid, ], sortFields=(xpname, xattachment, xmtime, xwname, xrev, ), ) doc.analyzerFactory = getWikiAnalyzerFactory() if mode == 'update': logging.debug("%s (replace %r)" % (filename, uid)) doc.uid = uid id = writer.index(doc) elif mode == 'add': logging.debug("%s (add)" % (filename, )) id = writer.index(doc) except (OSError, IOError): pass
def _index_page(self, writer, page, mode='update'): """ Index a page - assumes that the write lock is acquired @arg writer: the index writer object @arg page: a page object @arg mode: 'add' = just add, no checks 'update' = check if already in index and update if needed (mtime) """ request = page.request wikiname = request.cfg.interwikiname or "Self" pagename = page.page_name mtime = page.mtime_usecs() revision = str(page.get_real_rev()) itemid = "%s:%s:%s" % (wikiname, pagename, revision) author = page.edit_info().get('editor', '?') # XXX: Hack until we get proper metadata language, stem_language = self._get_languages(page) categories = self._get_categories(page) domains = tuple(self._get_domains(page)) updated = False if mode == 'update': # from #xapian: if you generate a special "unique id" term, # you can just call database.replace_document(uid_term, doc) # -> done in xapwrap.index.Index.index() query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid)) enq, mset, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ]) if docs: doc = docs[0] # there should be only one uid = doc['uid'] docmtime = long(doc['values']['mtime']) updated = mtime > docmtime logging.debug("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated)) else: uid = None updated = True elif mode == 'add': updated = True logging.debug("%s %r" % (pagename, updated)) if updated: xwname = xapdoc.SortKey('wikiname', wikiname) xpname = xapdoc.SortKey('pagename', pagename) xattachment = xapdoc.SortKey('attachment', '') # this is a real page, not an attachment xmtime = xapdoc.SortKey('mtime', str(mtime)) xrev = xapdoc.SortKey('revision', revision) xtitle = xapdoc.TextField('title', pagename, True) # prefixed mimetype = 'text/%s' % page.pi['format'] # XXX improve this xkeywords = [xapdoc.Keyword('itemid', itemid), xapdoc.Keyword('lang', language), xapdoc.Keyword('stem_lang', stem_language), xapdoc.Keyword('fulltitle', pagename), xapdoc.Keyword('revision', revision), xapdoc.Keyword('author', author), ] + \ [xapdoc.Keyword('mimetype', mt) for mt in [mimetype, ] + mimetype.split('/')] for pagelink in page.getPageLinks(request): xkeywords.append(xapdoc.Keyword('linkto', pagelink)) for category in categories: xkeywords.append(xapdoc.Keyword('category', category)) for domain in domains: xkeywords.append(xapdoc.Keyword('domain', domain)) xcontent = xapdoc.TextField('content', page.get_raw_body()) doc = xapdoc.Document(textFields=(xcontent, xtitle), keywords=xkeywords, sortFields=(xpname, xattachment, xmtime, xwname, xrev), ) doc.analyzerFactory = getWikiAnalyzerFactory(request, stem_language) if mode == 'update': logging.debug("%s (replace %r)" % (pagename, uid)) doc.uid = uid id = writer.index(doc) elif mode == 'add': logging.debug("%s (add)" % (pagename, )) id = writer.index(doc) from MoinMoin.action import AttachFile attachments = AttachFile._get_files(request, pagename) for att in attachments: filename = AttachFile.getFilename(request, pagename, att) att_itemid = "%s:%s//%s" % (wikiname, pagename, att) mtime = wikiutil.timestamp2version(os.path.getmtime(filename)) if mode == 'update': query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', att_itemid)) enq, mset, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', ]) logging.debug("##%r %r" % (filename, docs)) if docs: doc = docs[0] # there should be only one uid = doc['uid'] docmtime = long(doc['values']['mtime']) updated = mtime > docmtime logging.debug("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated)) else: uid = None updated = True elif mode == 'add': updated = True logging.debug("%s %s %r" % (pagename, att, updated)) if updated: xatt_itemid = xapdoc.Keyword('itemid', att_itemid) xpname = xapdoc.SortKey('pagename', pagename) xwname = xapdoc.SortKey('wikiname', request.cfg.interwikiname or "Self") xattachment = xapdoc.SortKey('attachment', att) # this is an attachment, store its filename xmtime = xapdoc.SortKey('mtime', mtime) xrev = xapdoc.SortKey('revision', '0') xtitle = xapdoc.Keyword('title', '%s/%s' % (pagename, att)) xlanguage = xapdoc.Keyword('lang', language) xstem_language = xapdoc.Keyword('stem_lang', stem_language) mimetype, att_content = self.contentfilter(filename) xmimetypes = [xapdoc.Keyword('mimetype', mt) for mt in [mimetype, ] + mimetype.split('/')] xcontent = xapdoc.TextField('content', att_content) xtitle_txt = xapdoc.TextField('title', '%s/%s' % (pagename, att), True) xfulltitle = xapdoc.Keyword('fulltitle', pagename) xdomains = [xapdoc.Keyword('domain', domain) for domain in domains] doc = xapdoc.Document(textFields=(xcontent, xtitle_txt), keywords=xdomains + xmimetypes + [xatt_itemid, xtitle, xlanguage, xstem_language, xfulltitle, ], sortFields=(xpname, xattachment, xmtime, xwname, xrev, ), ) doc.analyzerFactory = getWikiAnalyzerFactory(request, stem_language) if mode == 'update': logging.debug("%s (replace %r)" % (pagename, uid)) doc.uid = uid id = writer.index(doc) elif mode == 'add': logging.debug("%s (add)" % (pagename, )) id = writer.index(doc)