def create_link(self, source, target): '''Determine best way to represent a link between two pages @param source: a L{Path} object @param target: a L{Path} object @returns: a L{HRef} object ''' if target == source: # weird edge case .. return HRef(HREF_REL_FLOATING, target.basename) elif target.ischild(source): return HRef(HREF_REL_RELATIVE, target.relname(source)) else: href = self._find_floating_link(source, target) return href or HRef(HREF_REL_ABSOLUTE, target.name)
def on_finish_update(self, o): # Check for ghost links - warn but still clean them up for row in self.db.execute(''' SELECT DISTINCT pages.* FROM pages INNER JOIN links ON pages.id=links.source WHERE pages.source_file IS NULL ''').fetchall(): logger.warn('Found ghost links from: %s', row['name']) self.on_page_row_deleted(None, row) # Resolve pending links for row in self.db.execute( 'SELECT * FROM links WHERE needscheck=1 ' 'ORDER BY anchorkey, names' ): href = HRef(row['rel'], row['names']) source = self._pages.get_pagename(row['source']) target_id, targetname = self._pages.resolve_link(source, href) if target_id is None: target_id = self._pagesindexer.insert_link_placeholder(targetname) self.db.execute( 'UPDATE links SET target=?, needscheck=? WHERE source=? and names=?', (target_id, False, row['source'], row['names']) ) # Delete un-used placeholders for row in self.db.execute(''' SELECT pages.id FROM pages LEFT JOIN links ON pages.id=links.target WHERE pages.is_link_placeholder=1 and pages.n_children=0 and links.source IS NULL '''): pagename = self._pages.get_pagename(row['id']) self._pagesindexer.remove_page(pagename, self._allow_cleanup)
def runTest(self): def basename(name): if ":" in name: return name.split(":")[-1] else: return name db = sqlite3.connect(':memory:') db.row_factory = sqlite3.Row pi = PagesIndexer(db, None, tests.MockObject()) for i, name, cont in self.PAGES: db.execute( 'INSERT INTO pages(id, name, lowerbasename, sortkey, parent, source_file) VALUES (?, ?, ?, ?, 1, 1)', (i, name, basename(name).lower(), natural_sort_key(name))) ## Test PagesViewInternal methods iview = PagesViewInternal(db) i, pn = iview.resolve_pagename(Path(''), ['foo']) self.assertEqual((i, pn), (3, Path('Foo'))) i, pn = iview.resolve_link(Path('Foo'), HRef.new_from_wiki_link('Bar')) self.assertEqual((i, pn), (2, Path('Bar'))) ## Test the actual indexer pageindexer = tests.MaskedObject(pi, 'connect') indexer = LinksIndexer(db, pageindexer) for i, name, cont in self.PAGES: row = { 'id': i, 'name': name, 'sortkey': natural_sort_key(name), 'is_link_placeholder': False } indexer.on_page_row_inserted(pageindexer, row) ### pageindexer.setObjectAccess('insert_link_placeholder') for i, name, text in self.PAGES: tree = WikiParser().parse(text) row = {'id': i, 'name': name} indexer.on_page_changed(pageindexer, row, tree) indexer.update() links = sorted((r['source'], r['target']) for r in db.execute('SELECT * FROM links')) self.assertEqual(links, [(3, 2), (3, 4)]) ### pageindexer.setObjectAccess('remove_page') for i, name, cont in self.PAGES: row = {'id': i, 'name': name, 'is_link_placeholder': False} indexer.on_page_row_deleted(pageindexer, row) indexer.update() rows = db.execute('SELECT * FROM links').fetchall() self.assertEqual(rows, [])
def update_iter(self): # Check for ghost links - warn but still clean them up for row in self.db.execute(''' SELECT DISTINCT pages.* FROM pages INNER JOIN links ON pages.id=links.source WHERE pages.source_file IS NULL ''').fetchall(): logger.warn('Found ghost links from: %s', row['name']) self.on_page_row_deleted(None, row) yield # Check total n, = self.db.execute( 'SELECT COUNT(*) FROM links WHERE needscheck=1').fetchone() # Resolve pending links for i, row in enumerate( self.db.execute('SELECT * FROM links WHERE needscheck=1 ' 'ORDER BY anchorkey, names')): if i > 0 and i % 100 == 0: self.db.commit() logger.debug('Update link %i of %i', i, n) href = HRef(row['rel'], row['names']) source = self._pages.get_pagename(row['source']) target_id, targetname = self._pages.resolve_link( source, href, source_id=row['source']) if target_id is None: target_id = self._pagesindexer.insert_link_placeholder( targetname) self.db.execute( 'UPDATE links SET target=?, needscheck=0 WHERE source=? and names=? and rel=?', (target_id, row['source'], row['names'], row['rel'])) yield # Delete un-used placeholders for row in self.db.execute(''' SELECT pages.id FROM pages LEFT JOIN links ON pages.id=links.target WHERE pages.is_link_placeholder=1 and pages.n_children=0 and links.source IS NULL '''): pagename = self._pages.get_pagename(row['id']) self._pagesindexer.remove_page(pagename, self._allow_cleanup) yield # The allow_cleanup function checks whether a parent has links or not. # Without this guard function we would need to iterate several times # through this cleanup function. self.db.commit()
def iter_href(self): '''Generator for links in the text @returns: yields a list of unique L{HRef} objects ''' from zim.notebook.page import HRef # XXX seen = set() for elt in itertools.chain( self._etree.getiterator(LINK), self._etree.getiterator(IMAGE) ): href = elt.attrib.get('href') if href and href not in seen: seen.add(href) if link_type(href) == 'page': try: yield HRef.new_from_wiki_link(href) except ValueError: pass
def lookup_from_user_input(self, name, reference=None): '''Lookup a pagename based on user input @param name: the user input as string @param reference: a L{Path} in case relative links are supported as customer input @returns: a L{Path} object for C{name} @raises ValueError: when C{name} would reduce to empty string after removing all invalid characters, or if C{name} is a relative link while no C{reference} page is given. @raises IndexNotFoundError: when C{reference} is not indexed ''' # This method re-uses most of resolve_link() but is defined # separate because it has a distinct different purpose. # Only accidental that we treat user input as links ... ;) href = HRef.new_from_wiki_link(name) if reference is None and href.rel == HREF_REL_RELATIVE: raise ValueError, 'Got relative page name without parent: %s' % name else: source = reference or ROOT_PATH id, pagename = self._pages.resolve_link( source, href, ignore_link_placeholders=False) return pagename
def on_finish_update(self, o): # Resolve pending links for row in self.db.execute( 'SELECT * FROM links WHERE needscheck=1 ' 'ORDER BY anchorkey, names' ): href = HRef(row['rel'], row['names']) source = self._pages.get_pagename(row['source']) target_id, targetname = self._pages.resolve_link(source, href) if target_id is None: target_id = self._pagesindexer.insert_link_placeholder(targetname) self.db.execute( 'UPDATE links SET target=?, needscheck=? WHERE source=? and names=?', (target_id, False, row['source'], row['names']) ) # Delete un-used placeholders for row in self.db.execute(''' SELECT pages.id FROM pages LEFT JOIN links ON pages.id=links.target WHERE pages.is_link_placeholder=1 and pages.n_children=0 and links.source IS NULL '''): pagename = self._pages.get_pagename(row['id']) self._pagesindexer.remove_page(pagename, self._allow_cleanup)
def try_link(names): assert names href = HRef(HREF_REL_FLOATING, ':'.join(names)) pid, pagename = self._pages.resolve_link(source, href) return href if pagename == target else None