def user_pw(self, username, password): try: user = db.session.query(User).filter_by(name=username).one() except NoResultFound: raise ValueError('User {} does not exist'.format(username)) user.password = bcrypt.hashpw(password.encode('utf-8'), bcrypt.gensalt()) user.time_updated = int(utils.now()) db.session.commit()
async def make_dataset(self, connection, files, name, time_added=None, discarded=False, setid=None, status=0, timestamp=None, _restore=None): # pylint: disable=too-many-arguments time_added = int(utils.now() * 1000) if time_added is None else time_added collection = await CollectionModel.filter( name=self.name).using_db(connection).first() dataset = await Dataset.create(collection=collection, name=name, discarded=discarded, status=status, time_added=time_added, timestamp=0, setid=setid or SetID.random(), acn_id=collection.acn_id, using_db=connection) if _restore: files = [ File(dataset=dataset, idx=i, **x) for i, x in enumerate(files) ] else: files = [ File(dataset=dataset, idx=i, mtime=int(utils.mtime(path) * 1000), path=path, size=stat.st_size) for i, (path, stat) in enumerate( (path, utils.stat(path)) for path in files) ] dataset.timestamp = timestamp or max(x.mtime for x in files) await dataset.save(using_db=connection) await File.bulk_create(files, using_db=connection) await dataset.fetch_related('files', using_db=connection) storedir = self.config.marv.storedir store = Store(storedir, self.nodes) store.add_dataset(dataset, exists_okay=_restore) self.render_detail(dataset) return dataset
def user_add(self, name, password, realm, realmuid, given_name=None, family_name=None, email=None, time_created=None, time_updated=None, _restore=None): try: if not _restore: password = bcrypt.hashpw(password.encode('utf-8'), bcrypt.gensalt()) now = int(utils.now()) if not time_created: time_created = now if not time_updated: time_updated = now user = User(name=name, password=password, realm=realm, given_name=given_name, family_name=family_name, email=email, realmuid=realmuid, time_created=time_created, time_updated=time_updated) db.session.add(user) db.session.commit() except IntegrityError: raise ValueError('User {} exists already'.format(name))
def make_dataset(self, files, name, time_added=None, discarded=None, setid=None, status=None, timestamp=None, _restore=None): setid = setid or SetID.random() if _restore: files = [File(idx=i, **x) for i, x in enumerate(files)] else: files = [File(idx=i, mtime=int(utils.mtime(path) * 1000), path=path, size=stat.st_size) for i, (path, stat) in enumerate((path, os.stat(path)) for path in files)] time_added = int(utils.now() * 1000) if time_added is None else time_added dataset = Dataset(collection=self.name, files=files, name=name, discarded=discarded, status=status, time_added=time_added, timestamp=timestamp or max(x.mtime for x in files), setid=setid) storedir = self.config.marv.storedir store = Store(storedir, self.nodes) store.add_dataset(dataset, exists_okay=_restore) self.render_detail(dataset) return dataset
def scan(self, scanpath, dry_run=False): Listing = self.model.Listing log = getLogger('.'.join([__name__, self.name])) scanroot = (x for x in self.scanroots if scanpath.startswith(x)).next() if not os.path.isdir(scanpath): log.warn('%s does not exist or is not a directory', scanpath) log.verbose("scanning %s'%s'", 'dry_run ' if dry_run else '', scanpath) # missing/changed flag for known files startswith = File.path.like('{}%'.format(esc(scanpath)), escape='$') known_files = File.query.filter(startswith)\ .join(Dataset)\ .filter(Dataset.discarded.isnot(True)) known_filenames = defaultdict(set) changes = defaultdict(list) # all mtime/missing changes in one transaction for file in known_files: path = file.path known_filenames[os.path.dirname(path)].add(os.path.basename(path)) try: mtime = utils.mtime(path) missing = False except OSError: mtime = None missing = True if missing ^ bool(file.missing): log.info("%s '%s'", 'lost' if missing else 'recovered', path) changes[file.dataset_id].append((file, missing)) if mtime and int(mtime * 1000) > file.mtime: log.info("mtime newer '%s'", path) changes[file.dataset_id].append((file, mtime)) # Apply missing/mtime changes if not dry_run and changes: ids = changes.keys() for dataset in Dataset.query.filter(Dataset.id.in_(ids)): for file, change in changes.pop(dataset.id): check_outdated = False if type(change) is bool: file.missing = change dataset.missing = change else: file.mtime = int(change * 1000) check_outdated = True if check_outdated: self._check_outdated(dataset) dataset.time_updated = int(utils.now()) assert not changes db.session.commit() # Scan for new files batch = [] for directory, subdirs, filenames in os.walk(scanpath): # Ignore directories containing a .marvignore file if os.path.exists(os.path.join(directory, '.marvignore')): subdirs[:] = [] continue # Ignore hidden directories and traverse subdirs alphabetically subdirs[:] = sorted([x for x in subdirs if x[0] != '.']) # Ignore hidden and known files known = known_filenames[directory] filenames = {x for x in filenames if x[0] != '.'} filenames = sorted(filenames - known) for name, files in self.scanner(directory, subdirs, filenames): files = [x if os.path.isabs(x) else os.path.join(directory, x) for x in files] assert all(x.startswith(directory) for x in files), files if dry_run: log.info("would add '%s': '%s'", directory, name) else: dataset = self.make_dataset(files, name) batch.append(dataset) if len(batch) > 50: self._add_batch(log, batch) if not dry_run and batch: self._add_batch(log, batch) log.verbose("finished %s'%s'", 'dry_run ' if dry_run else '', scanpath)
async def scan(self, scanpath, dry_run=False): # noqa: C901 # pylint: disable=too-many-locals,too-many-branches,too-many-statements log = getLogger('.'.join([__name__, self.name])) if not os.path.isdir(scanpath): log.warning('%s does not exist or is not a directory', scanpath) log.verbose("scanning %s'%s'", 'dry_run ' if dry_run else '', scanpath) # missing/changed flag for known files async with scoped_session(self.site.db) as connection: known_files = await File.filter(path__startswith=scanpath)\ .filter(dataset__discarded__not=True)\ .using_db(connection) known_filenames = defaultdict(set) changes = defaultdict( list) # all mtime/missing changes in one transaction for file in known_files: path = file.path known_filenames[os.path.dirname(path)].add( os.path.basename(path)) try: mtime = utils.mtime(path) missing = False except OSError: mtime = None missing = True if missing ^ bool(file.missing): log.info("%s '%s'", 'lost' if missing else 'recovered', path) changes[file.dataset_id].append((file, missing)) if mtime and int(mtime * 1000) > file.mtime: log.info("mtime newer '%s'", path) changes[file.dataset_id].append((file, mtime)) # Apply missing/mtime changes if not dry_run and changes: ids = changes.keys() for dataset in await Dataset.filter(id__in=ids ).using_db(connection): for file, change in changes.pop(dataset.id): check_outdated = False if isinstance(change, bool): file.missing = change dataset.missing = change else: file.mtime = int(change * 1000) check_outdated = True await file.save(connection) if check_outdated: await dataset.fetch_related('files', using_db=connection) self._check_outdated(dataset) dataset.time_updated = int(utils.now()) await dataset.save(connection) assert not changes # Scan for new files batch = [] for directory, subdirs, filenames in utils.walk(scanpath): directory = str( directory) # TODO: for now we don't pass Path into scanner # Ignore directories containing a .marvignore file if os.path.exists(os.path.join(directory, '.marvignore')): subdirs.clear() continue # Ignore hidden directories and traverse subdirs alphabetically subdirs[:] = sorted(x for x in subdirs if x[0] != '.') # Ignore hidden and known files known = known_filenames[directory] filenames = sorted(x for x in filenames if x[0] != '.' and x not in known) if not filenames and not subdirs: continue for name, files in self.scanner(directory, subdirs, filenames): files = [ x if os.path.isabs(x) else os.path.join(directory, x) for x in files ] assert all(x.startswith(directory) for x in files), files if dry_run: log.info("would add '%s': '%s'", directory, name) else: dataset = await self.make_dataset( connection, files, name) batch.append(dataset) if len(batch) >= 50: await self._upsert_listing(connection, log, batch) batch.clear() if not dry_run and batch: await self._upsert_listing(connection, log, batch) log.verbose("finished %s'%s'", 'dry_run ' if dry_run else '', scanpath)