Пример #1
0
def crawldir(path, language=None, foreign_id=None):
    """Crawl the given directory."""
    path = Path(path)
    if foreign_id is None:
        foreign_id = 'directory:%s' % slugify(path)
    collection = ensure_collection(foreign_id, path.name)
    log.info('Crawling %s to %s (%s)...', path, foreign_id, collection.id)
    crawl_directory(collection, path)
    log.info('Complete. Make sure a worker is running :)')
    update_collection(collection)
Пример #2
0
def crawldir(path, language=None, foreign_id=None):
    """Crawl the given directory."""
    path = Path(path)
    if foreign_id is None:
        foreign_id = 'directory:%s' % slugify(path)
    create_collection({'foreign_id': foreign_id, 'label': path.name})
    collection = Collection.by_foreign_id(foreign_id)
    log.info('Crawling %s to %s (%s)...', path, foreign_id, collection.id)
    crawl_directory(collection, path)
    ingest_wait(collection)
Пример #3
0
def crawldir(path, language=None, foreign_id=None):
    """Crawl the given directory."""
    path = Path(path)
    if foreign_id is None:
        foreign_id = 'directory:%s' % slugify(path)
    authz = Authz.from_role(Role.load_cli_user())
    config = {'foreign_id': foreign_id, 'label': path.name, 'casefile': False}
    create_collection(config, authz)
    collection = Collection.by_foreign_id(foreign_id)
    log.info('Crawling %s to %s (%s)...', path, foreign_id, collection.id)
    crawl_directory(collection, path)
    log.info('Complete. Make sure a worker is running :)')
Пример #4
0
 def test_crawl_sample_directory(self):
     samples_path = self.get_fixture_path("samples")
     crawl_directory(self.collection, samples_path)
     assert Document.all().count() == 4, Document.all().count()