def ingest(path, dataset, languages=None): """Queue a set of files for ingest.""" context = {'languages': languages} conn = get_redis() job = Job.create(conn, dataset) stage = job.get_stage(Stage.INGEST) manager = Manager(stage, context) path = ensure_path(path) if path is not None: if path.is_file(): entity = manager.make_entity('Document') checksum = manager.store(path) entity.set('contentHash', checksum) entity.make_id(checksum) entity.set('fileName', path.name) manager.queue_entity(entity) if path.is_dir(): DirectoryIngestor.crawl(manager, path) manager.close()
def get_cache(): if not hasattr(settings, '_cache') or settings._cache is None: settings._cache = Cache(get_redis(), prefix=settings.APP_NAME) return settings._cache
def connect_redis(): if settings.TESTING: return get_fakeredis() return get_redis()
def killthekitten(): """Completely kill redis contents.""" conn = get_redis() conn.flushall()
def cancel(dataset): """Delete scheduled tasks for given dataset""" conn = get_redis() Dataset(conn, dataset).cancel()
def ingest(path, dataset, languages=None): """Queue a set of files for ingest.""" conn = get_redis() db = get_dataset(dataset, OP_INGEST) _ingest_path(db, conn, dataset, path, languages=languages)
def ingest(path, dataset, languages=None): """Queue a set of files for ingest.""" conn = get_redis() _ingest_path(conn, dataset, path, languages=languages)
def setUp(self): self.conn = get_redis() self.dataset = "my-dataset" self.operation = "OP_FOO" self.job = Job.create(self.conn, self.dataset)