def merge(src, dst, mode="no_move"): assert_choice("mode", mode, MODES) if not src.is_dir() or not dst.is_dir(): raise ValueError("src and dst must be directories") for path in src.rglob("*"): if path.is_dir(): relpath = path.relative_to(src) (dst / relpath).mkdir(parents=True, exist_ok=True) elif path.is_file(): relpath = path.relative_to(src) (dst / relpath.parent).mkdir(parents=True, exist_ok=True) target = dst / relpath if target.exists(): if mode == "fail": raise FileExistsError(fspath(path)) elif mode == "no_move": pass elif mode == "overwrite": replace(fspath(path), fspath(target)) elif mode == "rename": move_rename(path, target) else: path.rename( target ) # race condition on linux, us renameat2 with RENAME_NOREPLACE? else: raise RuntimeError("Unhandled file: {}".format(path)) remove_empty_dirs(fspath(src), onerror=remove_empty_error_log)
def get_paths(self, field: str, token: str, scoring: str = "unscored") -> Iterable[OptionalSearchResult]: assert_choice("scoring", scoring, SCORING_METHODS) docs = self.get_docs(field, token) paths: Iterable[OptionalSearchResult] if scoring == "unscored": paths = ((self.ids2docs[doc_id], i) for i, doc_id in enumerate(docs.keys())) elif scoring == "term_freq": paths = ((self.ids2docs[doc_id], term_freq) for doc_id, term_freq in docs.items()) elif scoring == "tfidf": if docs: num_docs = len(self.table[field]) inv_doc_freq = log10(num_docs / len(docs)) paths = ((self.ids2docs[doc_id], self._tfidf(term_freq, inv_doc_freq)) for doc_id, term_freq in docs.items()) return paths
def _sorted(self, groupname: str, paths: Iterable[OptionalSearchResult], sortby="path") -> list[SearchResult]: assert_choice("sortby", sortby, SORTBY_METHODS) grouppaths = list(map(str, self.groups[groupname])) try: filtered_paths = cast( Iterable[SearchResult], ( (path, score) for path, score in paths if any(fspath(path).startswith(gp) for gp in grouppaths) # type: ignore[arg-type] ), ) except TypeError as e: raise RuntimeError(e) if sortby == "path": return sorted(filtered_paths, key=itemgetter(0), reverse=False) elif sortby == "score": return sorted(filtered_paths, key=itemgetter(1), reverse=True) else: assert False
def get_episodes_by_bohne_preview(self, bohne_id, order="ASC"): # type: (int, str) -> Dict[str, Any] """ Returns reduced information about all episodes for the given Bohne. """ assert_choice("order", order, {"ASC", "DESC"}) return self._request_single( "/v1/media/episode/bybohne/preview/{}".format(bohne_id))
def get_newest_episodes_preview(self, order="ASC"): # type: (str, ) -> Iterator[Dict[str, Any]] assert_choice("order", order, {"ASC", "DESC"}) return self._request_paged("/v1/media/episode/preview/newest", 50, False, order=order)
def get_shows(self, sortby="LastEpisode", only=None): # type: (str, Optional[str]) -> Iterator[Dict[str, Any]] assert_choice("sortby", sortby, {"LastEpisode"}) assert_choice("only", only, {None, "podcast"}) return self._request_paged("/v1/media/show/all", 50, sortby=sortby, only=only)
def get_shows_mini(self, sortby="LastEpisode", only=None): # type: (str, Optional[str]) -> List[Dict[str, Any]] """ Returns minimal information about all shows. """ assert_choice("sortby", sortby, {"LastEpisode"}) assert_choice("only", only, {None, "podcast"}) return self._request_single("/v1/media/show/preview/mini/all", sortby=sortby, only=only)
def get_shows_preview(self, sortby="LastEpisode", only=None): # type: (str, Optional[str]) -> Iterator[Dict[str, Any]] """ Returns paginated, reduced information about all shows. """ assert_choice("sortby", sortby, {"LastEpisode"}) assert_choice("only", only, {None, "podcast"}) return self._request_paged("/v1/media/show/preview/all", 50, sortby=sortby, only=only)
def get_episodes_by_show_preview(self, show_id, order="ASC"): # type: (int, str) -> Iterator[Dict[str, Any]] """ Returns reduced information about all episodes for the given show. """ assert_choice("order", order, {"ASC", "DESC"}) return self._request_paged( "/v1/media/episode/byshow/preview/{}".format(show_id), 50, False, order=order)
def get_episodes_by_season(self, season_id, order="ASC"): # type: (int, str) -> Iterator[Dict[str, Any]] """ Returns information about all episodes of a given season. """ assert_choice("order", order, {"ASC", "DESC"}) return self._request_paged( "/v1/media/episode/byseason/{}".format(season_id), 50, False, order=order)
def get_episodes_by_bohne(self, bohne_id, order="ASC"): # type: (int, str) -> Iterator[Dict[str, Any]] """ Returns information about all episodes for the given Bohne. """ assert_choice("order", order, {"ASC", "DESC"}) return self._request_paged( "/v1/media/episode/bybohne/{}".format(bohne_id), 50, False, order=order)
def get_paths_op( self, field: str, tokens: Sequence[str], setop: Callable[..., set[int]], scoring: str = "unscored") -> Iterable[OptionalSearchResult]: assert_choice("scoring", scoring, SCORING_METHODS) sets = tuple( set(self.get_docs(field, token).keys()) for token in tokens) docs_op = setop(*sets) paths: Iterable[OptionalSearchResult] if scoring == "unscored": paths = ((self.ids2docs[doc_id], i) for i, doc_id in enumerate(docs_op)) elif scoring == "term_freq": term_freqs = defaultdict(int) # type: DefaultDict[int, int] for token in tokens: docs = self.get_docs(field, token) for doc_id, term_freq in docs.items(): term_freqs[doc_id] += term_freq paths = ((self.ids2docs[doc_id], term_freqs[doc_id]) for doc_id in docs_op) elif scoring == "tfidf": tfidf = defaultdict(float) # type: DefaultDict[int, float] for token in tokens: docs = self.get_docs(field, token) if docs: num_docs = len(self.table[field]) inv_doc_freq = log10(num_docs / len(docs)) for doc_id, term_freq in docs.items(): tfidf[doc_id] += self._tfidf(term_freq, inv_doc_freq) paths = ((self.ids2docs[doc_id], tfidf[doc_id]) for doc_id in docs_op) return paths