Пример #1
0
  def tagsetses_from_path(cls, ont_path: str):
    ''' Return `(tagsets,ont_pfx_map)` instance from `ont_path`,
        being the default `TagSets` and a mapping of name->`TagSets`
        for various subontologies.

        If `ont_path` resolves to a file the mapping wil be empty;
        return an `SQLTags` if `ont_path` ends with `'.sqlite'`
        otherwise a `TagFile`.

        If `ont_path` resolves to a directory, scan the entries.
        An entry named *prefix*`.sqlite` adds a *prefix*->`SQLTags`
        entry to the mapping.
        An entry named *prefix*`.tags` adds a *prefix*->`TagFile`
        entry to the mapping.
        After the scan, `tagsets` is set from the entry
        whose prefix was `'_'`, or `None`.
    '''
    ont_pfx_map = {}
    if isfilepath(ont_path):
      if ont_path.endswith('.sqlite'):
        tagsets = SQLTags(ont_path)
      else:
        tagsets = TagFile(ont_path)
    elif isdirpath(ont_path):
      with Pfx("listdir(%r)", ont_path):
        for subont_name in os.listdir(ont_path):
          if not subont_name or subont_name.startswith('.'):
            continue
          subont_path = joinpath(ont_path, subont_name)
          with Pfx(subont_path):
            if not isfilepath(subont_path):
              warning("not a file")
            prefix = cutsuffix(subont_name, '.sqlite')
            if prefix is not subont_name:
              ont_pfx_map[prefix] = SQLTags(subont_path)
              continue
            prefix = cutsuffix(subont_name, '.tags')
            if prefix is not subont_name:
              ont_pfx_map[prefix] = TagFile(subont_path)
              continue
            warning("unsupported name, does not end in .sqlite or .tags")
            continue
      tagsets = ont_pfx_map.pop('_', None)
    else:
      if not ont_path.endswith('.sqlite'):
        ont_path_sqlite = ont_path + '.sqlite'
        if isfilepath(ont_path_sqlite):
          return cls.tagsetses_from_path(ont_path_sqlite)
      raise ValueError(f"unsupported ont_path={ont_path!r}")
    return tagsets, ont_pfx_map
Пример #2
0
 def startup_shutdown(self):
   # Define the window's contents
   self.tree = PathListWidget(
       self.fspaths,
       key="paths",
       fixed_size=(200, None),
       expand_x=True,
       expand_y=True,
       show_expanded=True,
       pad=(3, 3),
   )
   self.pathview = PathView(tagger=self.tagger)
   layout = [
       [
           self.tree,
           self.pathview,
       ],
       [sg.Text("BAH")],
   ]
   self.window = sg.Window(
       str(self),
       layout,
       size=(2200, 1500),
       finalize=True,
   )
   if False:
     for record in self.tree:
       if isfilepath(record.fullpath):
         print("set self.fspath =", repr(record.fullpath))
         self.fspath = record.fullpath
         break
   print("window made")
   yield self
   print("closing")
   self.window.close()
Пример #3
0
    def __init__(self, filename, dictclass=UUIDedDict, create=False):
        ''' Initialise the mapping.

        Parameters:
        * `filename`: the file containing the newline delimited JSON data;
          this need not yet exist
        * `dictclass`: a optional `dict` subclass to hold each record,
          default `UUIDedDict`
        * `create`: if true, ensure the file exists
          by transiently opening it for append if it is missing;
          default `False`
    '''
        if hasattr(self, '_lock'):
            return
        self.__ndjson_filename = filename
        self.__dictclass = dictclass
        if (create and not isfilepath(filename)
                and not isfilepath(filename + '.gz')):
            # make sure the file exists
            with gzifopen(filename, 'a'):  # pylint: disable=unspecified-encoding
                pass
        self.scan_errors = []
        self._lock = RLock()
Пример #4
0
 def cmd_linktree(self, argv):
   ''' Usage: {cmd} srctrees... dsttree
         Link media files from the srctrees into the dsttree
         using the Plex naming conventions.
   '''
   if len(argv) < 2:
     raise GetoptError("missing srctrees or dsttree")
   dstroot = argv.pop()
   srcroots = argv
   options = self.options
   fstags = options.fstags
   if not isdirpath(dstroot):
     raise GetoptError("dstroot does not exist: %s" % (dstroot,))
   for srcroot in srcroots:
     with Pfx(srcroot):
       for filepath in srcroot if isfilepath(srcroot) else sorted(
           rfilepaths(srcroot)):
         with Pfx(filepath):
           plex_linkpath(fstags, filepath, dstroot)
Пример #5
0
def VTDStore(name, path, *, hashclass, preferred_indexclass=None):
    ''' Factory to return a `MappingStore` using a `BackingFile`
      using a single `.vtd` file.
  '''
    if hashclass is None:
        hashclass = DEFAULT_HASHCLASS
    with Pfx(path):
        if not path.endswith('.vtd'):
            warning("does not end with .vtd")
        if not isfilepath(path):
            raise ValueError("missing path %r" % (path, ))
        pathbase, _ = splitext(path)
        index_basepath = f"{pathbase}-index-{hashclass.HASHNAME}"
        indexclass = choose_indexclass(
            index_basepath, preferred_indexclass=preferred_indexclass)
        binary_index = indexclass(index_basepath)
        index = BinaryHashCodeIndex(hashclass=hashclass,
                                    binary_index=binary_index,
                                    index_entry_class=BackingFileIndexEntry)
        return MappingStore(name,
                            CompressibleBackingFile(path,
                                                    hashclass=hashclass,
                                                    index=index),
                            hashclass=hashclass)
Пример #6
0
def get_store_spec(s, offset=0):
    ''' Get a single Store specification from a string.
      Return `(matched, type, params, offset)`
      being the matched text, store type, parameters and the new offset.

      Recognised specifications:
      * `"text"`: Quoted store spec, needed to enclose some of the following
        syntaxes if they do not consume the whole string.
      * `[clause_name]`: The name of a clause to be obtained from a Config.
      * `/path/to/something`, `./path/to/something`:
        A filesystem path to a local resource.
        Supported paths:
        - `.../foo.sock`: A UNIX socket based StreamStore.
        - `.../dir`: A DataDirStore directory.
        - `.../foo.vtd `: (STILL TODO): A VTDStore.
      * `|command`: A subprocess implementing the streaming protocol.
      * `store_type(param=value,...)`:
        A general Store specification.
      * `store_type:params...`:
        An inline Store specification.
        Supported inline types: `tcp:[host]:port`

      TODO:
      * `ssh://host/[store-designator-as-above]`:
      * `unix:/path/to/socket`:
        Connect to a daemon implementing the streaming protocol.
      * `http[s]://host/prefix`:
        A Store presenting content under prefix:
        + `/h/hashcode.hashtype`: Block data by hashcode
        + `/i/hashcode.hashtype`: Indirect block by hashcode.
      * `s3://bucketname/prefix/hashcode.hashtype`:
        An AWS S3 bucket with raw blocks.
  '''
    offset0 = offset
    if offset >= len(s):
        raise ValueError("empty string")
    if s.startswith('"', offset):
        # "store_spec"
        qs, offset = get_qstr(s, offset, q='"')
        _, store_type, params, offset2 = get_store_spec(qs, 0)
        if offset2 < len(qs):
            raise ValueError("unparsed text inside quotes: %r" %
                             (qs[offset2:], ))
    elif s.startswith('[', offset):
        # [clause_name]
        store_type = 'config'
        clause_name, offset = get_ini_clausename(s, offset)
        params = {'clause_name': clause_name}
    elif s.startswith('/', offset) or s.startswith('./', offset):
        path = s[offset:]
        offset = len(s)
        if path.endswith('.sock'):
            store_type = 'socket'
            params = {'socket_path': path}
        elif isdirpath(path):
            store_type = 'datadir'
            params = {'path': path}
        elif isfilepath(path):
            store_type = 'datafile'
            params = {'path': path}
        else:
            raise ValueError("%r: not a directory or a socket" % (path, ))
    elif s.startswith('|', offset):
        # |shell command
        store_type = 'shell'
        params = {'shcmd': s[offset + 1:].strip()}
        offset = len(s)
    else:
        store_type, offset = get_identifier(s, offset)
        if not store_type:
            raise ValueError("expected identifier at offset %d, found: %r" %
                             (offset, s[offset:]))
        with Pfx(store_type):
            if s.startswith('(', offset):
                params, offset = get_params(s, offset)
            elif s.startswith(':', offset):
                offset += 1
                params = {}
                if store_type == 'tcp':
                    colon2 = s.find(':', offset)
                    if colon2 < offset:
                        raise ValueError(
                            "missing second colon after offset %d" %
                            (offset, ))
                    hostpart = s[offset:colon2]
                    offset = colon2 + 1
                    if not isinstance(hostpart, str):
                        raise ValueError(
                            "expected hostpart to be a string, got: %r" %
                            (hostpart, ))
                    if not hostpart:
                        hostpart = 'localhost'
                    params['host'] = hostpart
                    portpart, offset = get_token(s, offset)
                    params['port'] = portpart
                else:
                    raise ValueError("unrecognised Store type for inline form")
            else:
                raise ValueError("no parameters")
    return s[offset0:offset], store_type, params, offset
Пример #7
0
 def cmd_autofile(self, argv):
     ''' Usage: {cmd} pathnames...
       Link pathnames to destinations based on their tags.
       -d    Treat directory pathnames like file - file the
             directory, not its contents.
             (TODO: we file by linking - this needs a rename.)
       -n    No link (default). Just print filing actions.
       -r    Recurse. Required to autofile a directory tree.
       -x    Remove the source file if linked successfully. Implies -y.
       -y    Link files to destinations.
 '''
     direct = False
     recurse = False
     no_link = True
     do_remove = False
     opts, argv = getopt(argv, 'dnrxy')
     for opt, _ in opts:
         with Pfx(opt):
             if opt == '-d':
                 direct = True
             elif opt == '-n':
                 no_link = True
                 do_remove = False
             elif opt == '-r':
                 recurse = True
             elif opt == '-x':
                 no_link = False
                 do_remove = True
             elif opt == '-y':
                 no_link = False
             else:
                 raise RuntimeError("unimplemented option")
     if not argv:
         raise GetoptError("missing pathnames")
     tagger = self.options.tagger
     fstags = tagger.fstags
     for path in argv:
         with Pfx(path):
             if direct or not isdirpath(path):
                 self._autofile(path,
                                tagger=tagger,
                                no_link=no_link,
                                do_remove=do_remove)
             elif not recurse:
                 pfxprint("not autofiling directory, use -r for recursion")
             else:
                 for subpath, dirnames, filenames in os.walk(path):
                     with Pfx(subpath):
                         # order the descent
                         dirnames[:] = sorted(
                             dname for dname in dirnames
                             if dname and not dname.startswith('.'))
                         tagged = fstags[subpath]
                         if 'tagger.skip' in tagged:
                             # prune this directory tree
                             dirnames[:] = []
                             continue
                         for filename in sorted(filenames):
                             with Pfx(filename):
                                 filepath = joinpath(subpath, filename)
                                 if not isfilepath(filepath):
                                     pfxprint(
                                         "not a regular file, skipping")
                                     continue
                                 self._autofile(
                                     filepath,
                                     tagger=tagger,
                                     no_link=no_link,
                                     do_remove=do_remove,
                                 )
Пример #8
0
def pngfor(path, max_size=None, *, min_size=None, cached=None, force=False):
  ''' Create a PNG version of the image at `path`,
      scaled to fit within some size constraints.
      return the pathname of the PNG file.

      Parameters:
      * `max_size`: optional `(width,height)` tuple, default `(1920,1800)`
      * `min_size`: optional `(width,height)` tuple, default half of `max_size`
      * `cached`: optional mapping of `(path,'png',size)`->`pngof_path`
        where size is the chosen final size tuple
      * `force`: optional flag (default `False`)
        to force recreation of the PNG version and associated cache entry
  '''
  if max_size is None:
    max_size = 1920, 1080
  if min_size is None:
    min_size = max_size[0] // 2, max_size[1] // 2
  if cached is None:
    cached = _conv_cache
  tagged = _fstags[path]
  path = tagged.filepath
  size = image_size(path)
  if size is None:
    return None
  # choose a target size
  if size[0] > max_size[0] or size[1] > max_size[1]:
    scale = min(max_size[0] / size[0], max_size[1] / size[1])
    re_size = int(size[0] * scale), int(size[1] * scale)
    ##warning("too big, rescale by %s from %r to %r", scale, size, re_size)
    key = path, 'png', re_size
  elif size[0] < min_size[0] or size[1] < min_size[1]:
    scale = min(min_size[0] / size[0], min_size[1] / size[1])
    re_size = int(size[0] * scale), int(size[1] * scale)
    ##warning("too small, rescale by %s from %r to %r", scale, size, re_size)
    key = path, 'png', re_size
  else:
    re_size = None
    key = path, 'png', size
  cached_path = cached.get(key)
  if cached_path:
    return cached_path
  if tagged['pil.format'] == 'PNG' and re_size is None:
    # right format, same size - return ourself
    cached[key] = tagged.filepath
    return tagged.filepath
  # path to converted file
  hashcode = SHA256.from_pathname(path)
  pngbase = f'{hashcode}.png'
  if not isdirpath(CONVCACHE_ROOT):
    pfx_call(os.mkdir, CONVCACHE_ROOT)
  convsize = re_size or size
  convdirpath = joinpath(CONVCACHE_ROOT, f'png/{convsize[0]}x{convsize[1]}')
  if not isdirpath(convdirpath):
    pfx_call(os.makedirs, convdirpath)
  pngpath = joinpath(convdirpath, pngbase)
  if force or not isfilepath(pngpath):
    try:
      with Image.open(path) as im:
        if re_size is None:
          pfx_call(im.save, pngpath, 'PNG')
        else:
          im2 = im.resize(re_size)
          pfx_call(im2.save, pngpath, 'PNG')
    except UnidentifiedImageError as e:
      warning("unhandled image: %s", e)
      pngpath = None
  cached[key] = pngpath
  return pngpath
Пример #9
0
 def _monitor_datafiles(self):
   ''' Thread body to poll the ideal tree for new or changed files.
   '''
   proxy = upd_state.proxy
   proxy.prefix = str(self) + " monitor "
   meta_store = self.meta_store
   filemap = self._filemap
   datadirpath = self.pathto('data')
   if meta_store is not None:
     topdir = self.topdir
   else:
     warning("%s: no meta_store!", self)
   updated = False
   disabled = False
   while not self.cancelled:
     sleep(self.DELAY_INTERSCAN)
     if self.flag_scan_disable:
       if not disabled:
         info("scan %r DISABLED", shortpath(datadirpath))
         disabled = True
       continue
     if disabled:
       info("scan %r ENABLED", shortpath(datadirpath))
       disabled = False
     # scan for new datafiles
     with Pfx("%r", datadirpath):
       seen = set()
       info("scan tree...")
       with proxy.extend_prefix(" scan"):
         for dirpath, dirnames, filenames in os.walk(datadirpath,
                                                     followlinks=True):
           dirnames[:] = sorted(dirnames)
           filenames = sorted(filenames)
           sleep(self.DELAY_INTRASCAN)
           if self.cancelled or self.flag_scan_disable:
             break
           rdirpath = relpath(dirpath, datadirpath)
           with Pfx(rdirpath):
             with (proxy.extend_prefix(" " + rdirpath)
                   if filenames else nullcontext()):
               # this will be the subdirectories into which to recurse
               pruned_dirnames = []
               for dname in dirnames:
                 if self.exclude_dir(joinpath(rdirpath, dname)):
                   # unwanted
                   continue
                 subdirpath = joinpath(dirpath, dname)
                 try:
                   S = os.stat(subdirpath)
                 except OSError as e:
                   # inaccessable
                   warning("stat(%r): %s, skipping", subdirpath, e)
                   continue
                 ino = S.st_dev, S.st_ino
                 if ino in seen:
                   # we have seen this subdir before, probably via a symlink
                   # TODO: preserve symlinks? attach alter ego directly as a Dir?
                   debug(
                       "seen %r (dev=%s,ino=%s), skipping", subdirpath,
                       ino[0], ino[1]
                   )
                   continue
                 seen.add(ino)
                 pruned_dirnames.append(dname)
               dirnames[:] = pruned_dirnames
               if meta_store is None:
                 warning("no meta_store")
                 D = None
               else:
                 with meta_store:
                   D = topdir.makedirs(rdirpath, force=True)
                   # prune removed names
                   names = list(D.keys())
                   for name in names:
                     if name not in dirnames and name not in filenames:
                       info("del %r", name)
                       del D[name]
               for filename in filenames:
                 with Pfx(filename):
                   if self.cancelled or self.flag_scan_disable:
                     break
                   rfilepath = joinpath(rdirpath, filename)
                   if self.exclude_file(rfilepath):
                     continue
                   filepath = joinpath(dirpath, filename)
                   if not isfilepath(filepath):
                     continue
                   # look up this file in our file state index
                   DFstate = filemap.get(rfilepath)
                   if (DFstate is not None and D is not None
                       and filename not in D):
                     # in filemap, but not in dir: start again
                     warning("in filemap but not in Dir, rescanning")
                     filemap.del_path(rfilepath)
                     DFstate = None
                   if DFstate is None:
                     DFstate = filemap.add_path(rfilepath)
                   try:
                     new_size = DFstate.stat_size(self.follow_symlinks)
                   except OSError as e:
                     if e.errno == errno.ENOENT:
                       warning("forgetting missing file")
                       self._del_datafilestate(DFstate)
                     else:
                       warning("stat: %s", e)
                     continue
                   if new_size is None:
                     # skip non files
                     debug("SKIP non-file")
                     continue
                   if meta_store:
                     try:
                       E = D[filename]
                     except KeyError:
                       E = FileDirent(filename)
                       D[filename] = E
                     else:
                       if not E.isfile:
                         info(
                             "new FileDirent replacing previous nonfile: %s",
                             E
                         )
                         E = FileDirent(filename)
                         D[filename] = E
                   if new_size > DFstate.scanned_to:
                     with proxy.extend_prefix(
                         " scan %s[%d:%d]" %
                         (filename, DFstate.scanned_to, new_size)):
                       if DFstate.scanned_to > 0:
                         info("scan from %d", DFstate.scanned_to)
                       if meta_store is not None:
                         blockQ = IterableQueue()
                         R = meta_store._defer(
                             lambda B, Q: top_block_for(spliced_blocks(B, Q)),
                             E.block, blockQ
                         )
                       scan_from = DFstate.scanned_to
                       scan_start = time()
                       scanner = DFstate.scanfrom(offset=DFstate.scanned_to)
                       if defaults.show_progress:
                         scanner = progressbar(
                             DFstate.scanfrom(offset=DFstate.scanned_to),
                             "scan " + rfilepath,
                             position=DFstate.scanned_to,
                             total=new_size,
                             units_scale=BINARY_BYTES_SCALE,
                             itemlenfunc=lambda t3: t3[2] - t3[0],
                             update_frequency=128,
                         )
                       for pre_offset, data, post_offset in scanner:
                         hashcode = self.hashclass.from_chunk(data)
                         entry = FileDataIndexEntry(
                             filenum=DFstate.filenum,
                             data_offset=pre_offset,
                             data_length=len(data),
                             flags=0,
                         )
                         entry_bs = bytes(entry)
                         with self._lock:
                           index[hashcode] = entry_bs
                         if meta_store is not None:
                           B = Block(data=data, hashcode=hashcode, added=True)
                           blockQ.put((pre_offset, B))
                         DFstate.scanned_to = post_offset
                         if self.cancelled or self.flag_scan_disable:
                           break
                     if meta_store is not None:
                       blockQ.close()
                       try:
                         top_block = R()
                       except MissingHashcodeError as e:
                         error("missing data, forcing rescan: %s", e)
                         DFstate.scanned_to = 0
                       else:
                         E.block = top_block
                         D.changed = True
                         updated = True
                     elapsed = time() - scan_start
                     scanned = DFstate.scanned_to - scan_from
                     if elapsed > 0:
                       scan_rate = scanned / elapsed
                     else:
                       scan_rate = None
                     if scan_rate is None:
                       info(
                           "scanned to %d: %s", DFstate.scanned_to,
                           transcribe_bytes_geek(scanned)
                       )
                     else:
                       info(
                           "scanned to %d: %s at %s/s", DFstate.scanned_to,
                           transcribe_bytes_geek(scanned),
                           transcribe_bytes_geek(scan_rate)
                       )
                     # stall after a file scan, briefly, to limit impact
                     if elapsed > 0:
                       sleep(min(elapsed, self.DELAY_INTRASCAN))
           # update the archive after updating from a directory
           if updated and meta_store is not None:
             self.sync_meta()
             updated = False
     self.flush()
Пример #10
0
 def __init__(self, mobipath):
   if not isfilepath(mobipath):
     raise ValueError("mobipath %r is not a file" % (mobipath,))
   self.path = mobipath
Пример #11
0
    def parse_special(self, special, readonly):
        ''' Parse the mount command's special device from `special`.
        Return `(fsname,readonly,Store,Dir,basename,archive)`.

        Supported formats:
        * `D{...}`: a raw `Dir` transcription.
        * `[`*clause*`]`: a config clause name.
        * `[`*clause*`]`*archive*: a config clause name
        and a reference to a named archive associates with that clause.
        * *archive_file*`.vt`: a path to a `.vt` archive file.
    '''
        fsname = special
        specialD = None
        special_store = None
        archive = None
        if special.startswith('D{') and special.endswith('}'):
            # D{dir}
            specialD, offset = parse(special)
            if offset != len(special):
                raise ValueError("unparsed text: %r" % (special[offset:], ))
            if not isinstance(specialD, Dir):
                raise ValueError(
                    "does not seem to be a Dir transcription, looks like a %s"
                    % (type(specialD), ))
            special_basename = specialD.name
            if not readonly:
                warning("setting readonly")
                readonly = True
        elif special.startswith('['):
            if special.endswith(']'):
                # expect "[clause]"
                clause_name, offset = get_ini_clausename(special)
                archive_name = ''
                special_basename = clause_name
            else:
                # expect "[clause]archive"
                # TODO: just pass to Archive(special,config=self)?
                # what about special_basename then?
                clause_name, archive_name, offset = get_ini_clause_entryname(
                    special)
                special_basename = archive_name
            if offset < len(special):
                raise ValueError("unparsed text: %r" % (special[offset:], ))
            fsname = str(self) + special
            try:
                special_store = self[clause_name]
            except KeyError:
                raise ValueError("unknown config clause [%s]" %
                                 (clause_name, ))
            if archive_name is None or not archive_name:
                special_basename = clause_name
            else:
                special_basename = archive_name
            archive = special_store.get_Archive(archive_name)
        else:
            # pathname to archive file
            arpath = special
            if not isfilepath(arpath):
                raise ValueError("not a file")
            fsname = shortpath(realpath(arpath))
            spfx, sext = splitext(basename(arpath))
            if spfx and sext == '.vt':
                special_basename = spfx
            else:
                special_basename = special
            archive = FilePathArchive(arpath)
        return fsname, readonly, special_store, specialD, special_basename, archive