def info(self, path: str, fields: FileInfoReqIsh = FileInfoReq.internal(), path_resolved=False, skip_parse=None) -> FileInfo: if not path_resolved: path = os.path.abspath(path) if skip_parse is None: skip_parse = self._should_skip_parse(path) fields = FileInfoReq.parse(fields) if skip_parse or not os.path.exists(path): info = FileInfo(path) else: try: info = self.accessor_factory(path).info() except Exception as ex: raise IOError(f'Unable to parse {path}') from ex if fields.backlinks: for other in self.query(fields=FileInfoReq(path=True, links=True)): info.backlinks.extend(link for link in other.links if link.referent() == path) info.backlinks.sort(key=attrgetter('referrer', 'href')) return info
def test_info_and_referrers(fs): doc = """--- title: A Note created: 2012-01-02 03:04:05 ... I link to [two](two.md) and [three](../otherdir/three.md#heading) and have #two #tags.""" path1 = '/notes/dir/one.md' path2 = '/notes/dir/two.md' path3 = '/notes/otherdir/three.md' fs.create_file(path1, contents=doc) fs.create_file(path2, contents='---\ntitle: Note 2\n...\n') repo = config().instantiate() assert repo.info(path1, FileInfoReq.full()) == FileInfo( path1, title='A Note', created=datetime(2012, 1, 2, 3, 4, 5), tags={'tags', 'two'}, links=[ LinkInfo(path1, h) for h in ['../otherdir/three.md#heading', 'two.md'] ]) assert repo.info(path2, FileInfoReq.full()) == FileInfo( path2, title='Note 2', backlinks=[LinkInfo(path1, 'two.md')]) assert repo.info(path3, FileInfoReq.full()) == FileInfo( path3, backlinks=[LinkInfo(path1, '../otherdir/three.md#heading')])
def _query(args, nd: Notesdir) -> int: query = args.query or '' infos = [i for i in nd.repo.query(query) if os.path.isfile(i.path)] if args.fields: fields = FileInfoReq.parse(args.fields[0]) else: fields = FileInfoReq(path=True, tags=True, title=True, created=True) if args.json: infos.sort(key=attrgetter('path')) print(json.dumps([i.as_json() for i in infos])) elif args.table: # TODO make sorting / path resolution consistent with json output data = [] for info in infos: row = () if fields.path: row += (os.path.basename(info.path), ) if fields.title: row += (info.title or '', ) if fields.created: row += (info.created.strftime('%Y-%m-%d') if info.created else '', ) if fields.tags: row += ('\n'.join(sorted(info.tags)), ) if fields.links: row += ('\n'.join( sorted({ os.path.relpath(link.referent()) for link in info.links if link.referent() })), ) if fields.backlinks: row += ('\n'.join( sorted({ os.path.relpath(link.referrer) for link in info.backlinks })), ) data.append(row) data.sort(key=itemgetter(0)) heading = () if fields.path: heading += ('Filename', ) if fields.title: heading += ('Title', ) if fields.created: heading += ('Created', ) if fields.tags: heading += ('Tags', ) if fields.links: heading += ('Link paths', ) if fields.backlinks: heading += ('Backlink paths', ) data.insert(0, heading) table = AsciiTable(data) print(table.table) else: for info in infos: print('--------------------') _print_file_info(info, fields, nd) return 0
def _info(args, nd: Notesdir) -> int: fields = FileInfoReq.parse( args.fields[0]) if args.fields else FileInfoReq.full() info = nd.repo.info(args.path[0], fields) if args.json: print(json.dumps(info.as_json())) else: _print_file_info(info, fields, nd) return 0
def test_change(fs): fs.cwd = '/notes' path1 = '/notes/one.md' path2 = '/notes/two.md' path3 = '/notes/moved.md' fs.create_file(path1, contents='[1](old)') fs.create_file(path2, contents='[2](foo)') edits = [SetTitleCmd(path1, 'New Title'), ReplaceHrefCmd(path1, 'old', 'new'), MoveCmd(path1, path3), ReplaceHrefCmd(path2, 'foo', 'bar')] repo = config().instantiate() repo.change(edits) assert not Path(path1).exists() assert Path(path3).read_text() == '---\ntitle: New Title\n...\n[1](new)' assert Path(path2).read_text() == '[2](bar)' assert repo.info(path1, FileInfoReq.full()) == FileInfo(path1) assert repo.info(path3, FileInfoReq.full()) == FileInfo(path3, title='New Title', links=[LinkInfo(path3, 'new')]) assert repo.info(path2, FileInfoReq.full()) == FileInfo(path2, links=[LinkInfo(path2, 'bar')]) assert repo.info('old', FileInfoReq.full()) == FileInfo('/notes/old') assert repo.info('foo', FileInfoReq.full()) == FileInfo('/notes/foo') assert repo.info('new', FileInfoReq.full()) == FileInfo('/notes/new', backlinks=[LinkInfo(path3, 'new')]) assert repo.info('bar', FileInfoReq.full()) == FileInfo('/notes/bar', backlinks=[LinkInfo(path2, 'bar')]) # regression test for bug where invalidate removed entries for files that were referred to # only by files that had not been changed repo.invalidate() assert repo.info('new', FileInfoReq.full()) == FileInfo('/notes/new', backlinks=[LinkInfo(path3, 'new')]) assert repo.info('bar', FileInfoReq.full()) == FileInfo('/notes/bar', backlinks=[LinkInfo(path2, 'bar')])
def query(self, query: FileQueryIsh = FileQuery(), fields: FileInfoReqIsh = FileInfoReq.internal())\ -> Iterator[FileInfo]: fields = dataclasses.replace(FileInfoReq.parse(fields), tags=(fields.tags or query.include_tags or query.exclude_tags)) query = FileQuery.parse(query) filtered = query.apply_filtering( self.info(e.dir_entry.path, fields, path_resolved=True, skip_parse=e.skip_parse) for e in self._paths()) yield from query.apply_sorting(filtered)
def test_invalidate(fs): repo = config().instantiate() path = '/notes/one.md' assert repo.info(path, FileInfoReq.full()) == FileInfo(path) fs.create_file(path, contents='#hello [link](foo.md)') assert repo.info(path, FileInfoReq.full()) == FileInfo(path) repo.invalidate() assert repo.info(path, FileInfoReq.full()) == FileInfo(path, tags={'hello'}, links=[LinkInfo(path, 'foo.md')]) repo.invalidate() Path(path).write_text('#goodbye') repo.invalidate() assert repo.info(path, FileInfoReq.full()) == FileInfo(path, tags={'goodbye'})
def test_ignore(fs): path1 = '/notes/one.md' path2 = '/notes/.two.md' fs.create_file(path1, contents='I link to [two](.two.md)') fs.create_file(path2, contents='I link to [one](one.md)') repo = DirectRepoConf(root_paths={'/notes'}).instantiate() assert list(repo.query()) == [repo.info(path1)] assert not repo.info(path1, FileInfoReq.full()).backlinks assert repo.info(path2, FileInfoReq.full()).backlinks == [LinkInfo(path1, '.two.md')] repo.conf.ignore = lambda _1, _2: False assert list(repo.query()) == [repo.info(path1), repo.info(path2)] assert repo.info(path1, FileInfoReq.full()).backlinks == [LinkInfo(path2, 'one.md')] assert repo.info(path2, FileInfoReq.full()).backlinks == [LinkInfo(path1, '.two.md')]
def query(self, query: FileQueryIsh = FileQuery(), fields: FileInfoReqIsh = FileInfoReq.internal())\ -> Iterator[FileInfo]: self._refresh_if_needed() query = FileQuery.parse(query) cursor = self.connection.cursor() cursor.execute('SELECT path FROM files WHERE existent = TRUE') # TODO: Obviously, this is super lazy and inefficient. We should do as much filtering and data loading in # the query as we reasonably can. fields = dataclasses.replace(FileInfoReq.parse(fields), tags=(fields.tags or query.include_tags or query.exclude_tags)) filtered = query.apply_filtering( self.info(path, fields, path_resolved=True) for (path, ) in cursor) yield from query.apply_sorting(filtered)
def test_ignore(fs): path1 = '/notes/one.md' path2 = '/notes/.two.md' fs.create_file(path1, contents='I link to [two](.two.md)') fs.create_file(path2, contents='I link to [one](one.md)') with config().instantiate() as repo: assert list(repo.query()) == [repo.info(path1)] assert not repo.info(path1, FileInfoReq.full()).backlinks assert repo.info(path2, FileInfoReq.full()).backlinks == [LinkInfo(path1, '.two.md')] conf = config() conf.ignore = lambda _1, _2: False with conf.instantiate() as repo: assert list(repo.query()) == [repo.info(path1), repo.info(path2)] assert repo.info(path1, FileInfoReq.full()).backlinks == [LinkInfo(path2, 'one.md')] assert repo.info(path2, FileInfoReq.full()).backlinks == [LinkInfo(path1, '.two.md')]
def backfill(self) -> (List[str], List[Exception]): """Finds all files missing title or created metadata, and attempts to set that metadata. Missing titles are set to the filename, minus the file extension. Missing created dates are set based on the birthtime or ctime of the file. Returns a list of all successfully changed files, and a list of exceptions encountered for other files. """ modified = [] exceptions = [] for info in self.repo.query(fields=FileInfoReq(path=True, title=True, created=True)): edits = [] if not info.title: _, filename = os.path.split(info.path) title, _ = os.path.splitext(filename) edits.append(SetTitleCmd(info.path, title)) if not info.created: edits.append(SetCreatedCmd(info.path, info.guess_created())) if edits: try: self.repo.change(edits) modified.append(info.path) except Exception as ex: exceptions.append(ex) return modified, exceptions
def tag_counts(self, query: FileQueryIsh = FileQuery()) -> Dict[str, int]: query = FileQuery.parse(query) result = defaultdict(int) for info in self.query(query, FileInfoReq(path=True, tags=True)): for tag in info.tags: result[tag] += 1 return result
def info(self, path: str, fields: FileInfoReqIsh = FileInfoReq.internal(), path_resolved=False) -> FileInfo: self._refresh_if_needed() if not path_resolved: path = os.path.abspath(path) fields = FileInfoReq.parse(fields) cursor = self.connection.cursor() cursor.execute('SELECT id, title, created FROM files WHERE path = ?', (path, )) file_row = cursor.fetchone() info = FileInfo(path) if file_row: file_id = file_row[0] info.title = file_row[1] time_field = file_row[2] if time_field: if time_field.isnumeric(): info.created = datetime.utcfromtimestamp( int(time_field) / 1000) else: info.created = datetime.fromisoformat(time_field) if fields.tags: cursor.execute('SELECT tag FROM file_tags WHERE file_id = ?', (file_id, )) info.tags = {r[0] for r in cursor} if fields.links: cursor.execute( 'SELECT href FROM file_links WHERE referrer_id = ?', (file_id, )) info.links = [ LinkInfo(path, href) for href in sorted(r[0] for r in cursor) ] if fields.backlinks: cursor.execute( 'SELECT referrers.path, file_links.href' ' FROM files referrers' ' INNER JOIN file_links ON referrers.id = file_links.referrer_id' ' WHERE file_links.referent_id = ?', (file_id, )) info.backlinks = [ LinkInfo(referrer, href) for referrer, href in cursor ] info.backlinks.sort(key=attrgetter('referrer', 'href')) return info
def edits_for_rearrange(store: Repo, renames: Dict[str, str]) -> Iterator[FileEditCmd]: """Yields commands that will rename files and update links accordingly. The keys of the dictionary are the paths to be renamed, and the values are what they should be renamed to. (If a path appears as both a key and as a value, it will be moved to a temporary file as an intermediate step.) The given store is used to search for files that link to any of the paths that are keys in the dictionary, so that ReplaceHrefEditCmd instances can be generated for them. The files that are being renamed will also be checked for outbound links, and ReplaceRef edits will be generated for those too. Source paths may be directories; the directory as a whole will be moved, and links to/from all files/folders within it will be updated too. """ to_move = { os.path.realpath(s): os.path.realpath(d) for s, d in renames.items() } all_moves = {} for src, dest in to_move.items(): all_moves[src] = dest if os.path.isdir(src): for path in glob(os.path.join(src, '**', '*'), recursive=True): all_moves[path] = os.path.join(dest, os.path.relpath(path, src)) for src, dest in all_moves.items(): info = store.info(src, FileInfoReq(path=True, links=True, backlinks=True)) if info: for link in info.links: referent = link.referent() if not referent: continue url = urlparse(link.href) if referent == src and url.path == '': continue if referent in all_moves: referent = all_moves[referent] elif os.path.isabs(url.path): # Don't try to rewrite absolute paths, unless they refer to a file we're moving. continue newhref = path_as_href(href_path(dest, referent), url) if not link.href == newhref: yield ReplaceHrefCmd(src, link.href, newhref) for link in info.backlinks: if link.referrer in all_moves: continue # TODO either pass in all the hrefs at once, or change method to not take in a set yield from edits_for_path_replacement(link.referrer, {link.href}, dest) yield from edits_for_raw_moves(to_move)
def info( self, path: str, fields: FileInfoReqIsh = FileInfoReq.internal()) -> FileInfo: """Looks up the specified fields for the given file or folder. Additional fields might or might not be populated. May raise a :exc:`notesdir.accessors.base.ParseError` or IO-related exception, but otherwise will always return an instance. If no file or folder exists at the given path, or if the file type is unrecognized, it can still populate the ``path`` and ``backlinks`` attributes. """ raise NotImplementedError()
def replace_path_hrefs(self, original: str, replacement: str) -> None: """Finds and replaces links to the original path with links to the new path. Note that this does not currently replace links to children of the original path - e.g., if original is "/foo/bar", a link to "/foo/bar/baz" will not be updated. No files are moved, and this method does not care whether or not the original or replacement paths refer to actual files. """ info = self.repo.info(original, FileInfoReq(path=True, backlinks=True)) edits = [] for link in info.backlinks: # TODO group links from the same referrer for this call edits.extend(edits_for_path_replacement(link.referrer, {link.href}, replacement)) if edits: self.repo.change(edits)
def test_parse_info_req(): expected = FileInfoReq(path=True, backlinks=True) assert FileInfoReq.parse('path,backlinks') == expected assert FileInfoReq.parse(['path', 'backlinks']) == expected assert FileInfoReq.parse(expected) == expected
def organize(self) -> Dict[str, str]: """Reorganizes files using the function set in :attr:`notesdir.conf.NotesdirConf.path_organizer`. For every file in your note directories (defined by :attr:`notesdir.conf.RepoConf.root_paths`), this method will call that function with the file's FileInfo, and move the file to the path the function returns. Note that the function will only be called for files, not directories. You cannot directly move a directory by this method, but you can effectively move one by moving all the files from it to the same new directory. This method deletes any empty directories that result from the moves it makes, and creates any directories it needs to. The FileInfo is retrieved using :meth:`notesdir.models.FileInfoReq.full`. """ infos = self.repo.query('', FileInfoReq.full()) moves = {} move_fns = {} info_map = {} unavailable = set() for info in infos: if not os.path.isfile(info.path): continue info_map[info.path] = info dest = self.conf.path_organizer(info) if isinstance(dest, DependentPathFn): move_fns[info.path] = dest else: dest = find_available_name(dest, unavailable, info.path) if info.path == dest: continue moves[info.path] = dest unavailable.add(dest) def process_fn(src: str): dpfn = move_fns[src] determinant = dpfn.determinant dinfo = info_map.get(determinant, FileInfo(determinant)) if determinant in move_fns: process_fn(determinant) if determinant in moves: dinfo = replace(dinfo, path=moves[determinant]) srcdest = dpfn.fn(dinfo) del move_fns[src] srcdest = find_available_name(srcdest, unavailable, src) if src == srcdest: return moves[src] = srcdest unavailable.add(srcdest) while move_fns: process_fn(next(iter(move_fns))) if not moves: return {} edits = list(edits_for_rearrange(self.repo, moves)) for edit in edits: if isinstance(edit, MoveCmd): edit.create_parents = True edit.delete_empty_parents = True self.repo.change(edits) return moves
def query(self, query: FileQueryIsh = FileQuery(), fields: FileInfoReqIsh = FileInfoReq.internal())\ -> Iterator[FileInfo]: """Returns the requested fields for all files matching the given query.""" raise NotImplementedError()