def filter_root_names_for_path(self, roots: List[str], path: DocPath) -> List[str]: logging.info( f"Context.filter_root_names_for_path: starting roots: {roots}, path: {path}" ) filetype = None hashmark = self.metadata.config.get("filenames", "hashmark", "#") logging.info( f"Context.filter_root_names_for_path: hashmark: {hashmark}") if path.find(hashmark) > -1: filetype = "cdocs" else: filetype = self.get_filetype(path) logging.info( f"Context.filter_root_names_for_path: filetype: {filetype}") ab = self.metadata.accepted_by logging.info(f"Context.filter_root_names_for_path: ab: {ab}") aroots = ab.get(filetype) if aroots is None: aroots = [] logging.info( f"Context.filter_root_names_for_path: found {aroots} for filetype. filtering roots using that list." ) filtered = [item for item in roots if item in aroots] if roots != filtered: logging.info( f"Context.filter_root_names_for_path: filtered (by accepted) {roots} to {filtered}" ) return filtered
def _add_labels_to_tokens(self, path:DocPath, tokens:JsonDict, recurse:Optional[bool]=True) -> JsonDict: apath = path if path.find(self._hashmark) > -1: apath = apath[0:apath.find(self._hashmark)] if apath.find(self._plus) > -1: apath = apath[0:apath.find(self._plus)] labels = self.get_labels(apath, recurse) ltokens = { "label__"+k:v for k,v in labels.items()} tokens = {**ltokens, **tokens} return JsonDict(tokens)
def get_concat_doc(self, path:DocPath) -> Doc: if path is None : raise DocNotFoundException("path can not be None") if path.find('.concat') == -1: raise BadDocPath("path must have a .concat file extension") paths = self._get_concat_paths(path) if paths is None: raise DocNotFoundException(f'No concat instruction file at {path}') content = self.concatter.concat(paths) return Doc(content)
def _get_doc_for_root(self, path:DocPath, pluspaths:List[DocPath], root:FilePath) -> Doc: logging.info(f"Cdocs._get_doc_for_root: path: {path}. plus paths: {pluspaths}. root: {root}") if len(pluspaths) > 0: logging.info(f"Cdocs._get_doc_for_root: stripping down base path from plus path(s)") plus = path.find(self._plus) path = path[0:plus] logging.info(f"Cdocs._get_doc_for_root: base path is now {path}") logging.info(f"Cdocs._get_doc_for_root: checking pather: {self._pather} for path: {path}") filepath = self._pather.get_full_file_path_for_root(path, root) logging.info(f"Cdocs._get_doc_for_root: filepath from pather: {filepath}") content = self._read_doc(filepath) logging.info(f"Cdocs._get_doc_for_root: content from {filepath} is {len(content) if content is not None else 0} chars. transforming with: {self.transformer}.") content = self.transformer.transform(content, path, None, True) if len(pluspaths) > 0: content = self.concatter.join(content, self.concatter.concat(pluspaths)) return Doc(content)
def _get_doc(self, path:DocPath, notfound) -> Optional[Doc]: logging.info(f"Cdocs._get_doc: looking for path: {path} in root: {self.rootname}. notfound: {notfound}") if path is None : raise DocNotFoundException("path can not be None") if path.find('.') > -1: if self.filer.get_filetype(path) == 'cdocs': raise BadDocPath("dots are not allowed in cdoc paths") if notfound is None: logging.info("Cdocs._get_doc: notfound is None. you should fix this unless you want None returns.") notfound = False logging.info(f"Cdocs._get_doc: path: {path}") pluspaths = self._get_plus_paths(path) logging.info(f"Cdocs._get_doc: pluspaths to concationate: {pluspaths}") root = self.get_doc_root() logging.info(f"Cdocs._get_doc: root {root}") doc = self._get_doc_for_root(path, pluspaths, root) if doc is None and notfound: doc = self.get_404() return doc
def get_doc_from_roots(self, rootnames: List[str], path: DocPath, notfound: Optional[bool] = True, splitplus: Optional[bool] = True) -> Optional[Doc]: """ rootnames: a list of named roots to search path: the docpath. may have hash and plusses notfound: if true, return a default notfound if no results splitplus: if true, plus concats can be on different roots. i.e. for /x/y/z+a+b /x/y/z, /x/y/z/a, /x/y/z/b can all be on different roots. """ logging.info( f"Context.get_doc_from_roots: first match wins. rootnames: {rootnames}, path: {path}, notfound: {notfound}, splitplus: {splitplus}" ) plusmark = self._metadata.config.get("filenames", "plus") plus = path.find(plusmark) if plus > -1 and splitplus: if self._nosplitplus is None: nsp = self._metadata.config.get("defaults", "nosplitplus", "") self._nosplitplus = nsp.split(',') if len(self._nosplitplus) > 0: rootnames = [ name for name in rootnames if not name in self._nosplitplus ] logging.info( f"Context.get_doc_from_roots: nsp filtered rootnames: {rootnames}" ) # split into paths and call get_doc_from_roots on each, then concat # # /r/o/o/t.html#fish # needs to become /r/o/o # # /r/o/o/t#fish # needs to become /r/o/o/t # # /r/o/o/t # needs to become /r/o/o/t # logging.info(f"Context.get_doc_from_roots: path: {path}") logging.info(f"Context.get_doc_from_roots: rootnames: {rootnames}") logging.info(f"Context.get_doc_from_roots: notfound: {notfound}") logging.info(f"Context.get_doc_from_roots: splitplus: {splitplus}") paths = path.split(plusmark) logging.info(f"Context.get_doc_from_roots: paths: {paths}") rootpath = paths[0] logging.info(f"Context.get_doc_from_roots: rootpath: {rootpath}") hashmark = self._metadata.config.get("filenames", "hashmark") logging.info(f"Context.get_doc_from_roots: hashmark: {hashmark}") rootpath = rootpath.split(hashmark)[0] logging.info(f"Context.get_doc_from_roots: rootpath: {rootpath}") paths = [ p if p.find(rootpath) > -1 else rootpath + "/" + p for p in paths ] logging.info(f"Context.get_doc_from_roots: paths: {paths}") result = [] for path in paths: logging.info( f"Context.get_doc_from_roots: .... next path: {path}") r = self.get_doc_from_roots(rootnames, path, notfound) if r is not None: result.append(r) if len(result) == 0 and notfound: return self._get_default_not_found() return "".join(result) else: rootnames = self.filter_root_names_for_path(rootnames, path) logging.info( f"Context.get_doc_from_roots: rootnames: {rootnames} - not spliting pluses, first root locks in the pluses" ) for _ in rootnames: cdocs = self.keyed_cdocs[_] logging.info( f"Context.get_doc_from_roots: cdocs: {_} -> {cdocs.get_doc_root()}" ) doc = cdocs.get_doc(path, False) logging.info(f"found doc: {type(doc)}") if doc is not None: return doc if notfound: return self._get_default_not_found()
def get_full_file_path_for_root(self, path: DocPath, root: FilePath) -> FilePath: logging.info( f"SimplePather.get_full_file_path_for_root: path: {path}, root: {root}" ) path = path.strip('/\\') if path == '': logging.info( f"SimplePather.get_full_file_path_for_root: path points to root. returning root." ) return root logging.info( f"SimplePather.get_full_file_path_for_root: getting filename for {path}" ) filename = self.get_filename(path) logging.info( f"SimplePather.get_full_file_path_for_root: path: {path}, filename: {filename}, root: {root}" ) if filename is None: pass else: path = path[0:path.find(self._hashmark)] # # why do we change out root here? seems to do no harm, but... # #logging.info(f"SimplePather.get_full_file_path_for_root: root: {root}, _docs_path: {self._docs_path}") #root = self._docs_path # # if path were None we'd never get here! # #if path is None: # logging.error(f"SimplePather.get_full_file_path_for_root: path is None") path = os.path.join(root, path) apath = path logging.info( f"SimplePather.get_full_file_path_for_root: joined root: {root} with path to get: {apath}" ) i = path.find(".") logging.info( f"SimplePather.get_full_file_path_for_root: index of '.': {i}") if filename is None and i <= len(root): logging.info( f"SimplePather.get_full_file_path_for_root: filename is None and no '.'" ) apath = self._find_path(apath) elif filename is None: logging.info( f"SimplePather.get_full_file_path_for_root: filename is None and there is a '.'" ) pass else: logging.info( f"SimplePather.get_full_file_path_for_root: apath: {apath}, last char: {apath[-1:]}, filename: {filename}" ) apath = apath + ('' if apath[-1:] == '/' else os.path.sep) + filename apath = self._find_path(apath) if apath is None: logging.info( f"SimplePather.get_full_file_path_for_root: apath is None! from: {self._rootname}->{apath}" ) return FilePath(apath)