Пример #1
0
def devpiserver_mirror_initialnames(stage, projectnames):
    ix = get_indexer(stage.xom)
    threadlog.info("indexing '%s' mirror with %s projects", stage.name,
                   len(projectnames))
    ix.update_projects(
        ProjectIndexingInfo(stage=stage, name=name) for name in projectnames)
    threadlog.info("finished mirror indexing operation")
Пример #2
0
 def __init__(self, path):
     self.path = os.path.abspath(path)
     if not os.path.exists(self.path):
         fatal("No config at '%s'." % self.path)
     with open(self.path) as f:
         _config = yaml.load(f)
     self.update(_config.get('devpi-ldap', {}))
     if 'url' not in self:
         fatal("No url in LDAP config.")
     if 'user_template' in self:
         if 'user_search' in self:
             fatal("The LDAP options 'user_template' and 'user_search' are mutually exclusive.")
     else:
         if 'user_search' not in self:
             fatal("You need to set either 'user_template' or 'user_search' in LDAP config.")
         self._validate_search_settings('user_search')
     if 'group_search' not in self:
         threadlog.info("No group search setup for LDAP.")
     else:
         self._validate_search_settings('group_search')
     known_keys = set((
         'url',
         'user_template',
         'user_search',
         'group_search',
         'referrals',
         'reject_as_unknown',
         'tls',
     ))
     unknown_keys = set(self.keys()) - known_keys
     if unknown_keys:
         fatal("Unknown option(s) '%s' in LDAP config." % ', '.join(
             sorted(unknown_keys)))
Пример #3
0
 def __init__(self, path):
     self.path = os.path.abspath(path)
     if not os.path.exists(self.path):
         fatal("No config at '%s'." % self.path)
     with open(self.path) as f:
         _config = yaml.load(f)
     self.update(_config.get('devpi-ldap', {}))
     if 'url' not in self:
         fatal("No url in LDAP config.")
     if 'user_template' in self:
         if 'user_search' in self:
             fatal("The LDAP options 'user_template' and 'user_search' are mutually exclusive.")
     else:
         if 'user_search' not in self:
             fatal("You need to set either 'user_template' or 'user_search' in LDAP config.")
         self._validate_search_settings('user_search')
     if 'group_search' not in self:
         threadlog.info("No group search setup for LDAP.")
     else:
         self._validate_search_settings('group_search')
     known_keys = set((
         'url',
         'user_template',
         'user_search',
         'group_search',
         'referrals',
         'reject_as_unknown',
         'tls',
     ))
     unknown_keys = set(self.keys()) - known_keys
     if unknown_keys:
         fatal("Unknown option(s) '%s' in LDAP config." % ', '.join(
             sorted(unknown_keys)))
Пример #4
0
 def ensure_tables_exist(self):
     with self.get_connection() as conn:
         sqlconn = conn._sqlconn
         c = sqlconn.cursor()
         try:
             c.execute("select * from changelog limit 1")
             c.fetchall()
             c.execute("select * from kv limit 1")
             c.fetchall()
         except pg8000.ProgrammingError:
             sqlconn.rollback()
             threadlog.info("DB: Creating schema")
             c.execute("""
                 CREATE TABLE kv (
                     key TEXT NOT NULL PRIMARY KEY,
                     keyname TEXT,
                     serial INTEGER
                 )
             """)
             c.execute("""
                 CREATE TABLE changelog (
                     serial INTEGER PRIMARY KEY,
                     data BYTEA NOT NULL
                 )
             """)
             c.execute("""
                 CREATE TABLE files (
                     path TEXT PRIMARY KEY,
                     size INTEGER NOT NULL,
                     data BYTEA NOT NULL
                 )
             """)
             sqlconn.commit()
         finally:
             c.close()
Пример #5
0
def replay(xom, replica_xom, events=True):
    if replica_xom.replica_thread.replica_in_sync_at is None:
        # allow on_import to run right away, so we don't need to rely
        # on the initial import thread for tests
        replica_xom.replica_thread.replica_in_sync_at = 0

    threadlog.info("test: replaying replica")
    for serial in range(replica_xom.keyfs.get_next_serial(),
                        xom.keyfs.get_next_serial()):
        if serial == -1:
            continue
        with xom.keyfs._storage.get_connection() as conn:
            change_entry = conn.get_changes(serial)
        threadlog.info("test: importing to replica %s", serial)
        replica_xom.keyfs.import_changes(serial, change_entry)

    # replay notifications
    if events:
        replica_xom.replica_thread.wait()
        noti_thread = replica_xom.keyfs.notifier
        event_serial = noti_thread.read_event_serial()
        thread_push_log("NOTI")
        while event_serial < replica_xom.keyfs.get_current_serial():
            event_serial += 1
            noti_thread._execute_hooks(event_serial, threadlog, raising=True)
            noti_thread.write_event_serial(event_serial)
        thread_pop_log("NOTI")
Пример #6
0
def devpiserver_mirror_initialnames(stage, projectnames):
    xom = stage.xom
    ix = get_indexer(xom.config)
    ix.delete_index()
    indexer = get_indexer(xom.config)
    # directly use projectnames?
    indexer.update_projects(iter_projects(xom), clear=True)
    threadlog.info("finished initial indexing op")
Пример #7
0
def devpiserver_pypi_initial(stage, name2serials):
    xom = stage.xom
    ix = get_indexer(xom.config)
    ix.delete_index()
    indexer = get_indexer(xom.config)
    # directly use name2serials?
    indexer.update_projects(iter_projects(xom), clear=True)
    threadlog.info("finished initial indexing op")
Пример #8
0
 def _userdn(self, username):
     if 'user_template' in self:
         return self['user_template'].format(username=username)
     else:
         result = self._search(None, self['user_search'], username=username)
         if len(result) == 1:
             return result[0]
         elif not result:
             threadlog.info("No user '%s' found." % username)
         else:
             threadlog.error("Multiple results for user '%s' found.")
Пример #9
0
 def _userdn(self, username):
     if 'user_template' in self:
         return self['user_template'].format(username=username)
     else:
         result = self._search(None, self['user_search'], username=username)
         if len(result) == 1:
             return result[0]
         elif not result:
             threadlog.info("No user '%s' found." % username)
         else:
             threadlog.error("Multiple results for user '%s' found.")
Пример #10
0
 def delete_projects(self, projects):
     counter = itertools.count()
     count = next(counter)
     writer = self.project_ix.writer()
     searcher = self.project_ix.searcher()
     for project in projects:
         path = u"/%s/%s" % (project.indexname, project.name)
         count = next(counter)
         writer.delete_by_term('path', path, searcher=searcher)
     log.debug("Committing %s deletions to search index." % count)
     writer.commit()
     log.info("Finished committing %s deletions to search index." % count)
Пример #11
0
 def update_projects(self, projects, clear=False):
     counter = itertools.count()
     count = next(counter)
     writer = self.project_ix.writer()
     main_keys = self.project_ix.schema.names()
     text_keys = (
         ('author', 0.5),
         ('author_email', 0.5),
         ('description', 1.5),
         ('summary', 1.75),
         ('keywords', 1.75))
     for project in projects:
         data = dict((u(x), project[x]) for x in main_keys if x in project)
         data['path'] = u"/{user}/{index}/{name}".format(**data)
         if not clear:
             writer.delete_by_term('path', data['path'])
         data['type'] = "project"
         data['text'] = "%s %s" % (data['name'], project_name(data['name']))
         with writer.group():
             writer.add_document(**data)
             count = next(counter)
             for key, boost in text_keys:
                 if key not in project:
                     continue
                 writer.add_document(**{
                     "path": data['path'],
                     "type": key,
                     "text": project[key],
                     "_text_boost": boost})
                 count = next(counter)
             if '+doczip' not in project:
                 continue
             for page in project['+doczip']:
                 writer.add_document(**{
                     "path": data['path'],
                     "type": "title",
                     "text": page['title'],
                     "text_path": page['path'],
                     "text_title": page['title']})
                 count = next(counter)
                 writer.add_document(**{
                     "path": data['path'],
                     "type": "page",
                     "text": page['text'],
                     "text_path": page['path'],
                     "text_title": page['title']})
                 count = next(counter)
     log.info("Committing index with %s documents." % count)
     if clear:
         writer.commit(mergetype=CLEAR)
     else:
         writer.commit()
     log.info("Committed %s documents to index." % count)
Пример #12
0
 def handler(self, is_from_mirror, serial, indexname, names):
     log.debug("Got %s projects from %s at serial %s for indexing",
               len(names), indexname, serial)
     ix = get_indexer(self.xom)
     counter = itertools.count()
     project_ix = ix.get_project_ix()
     main_keys = project_ix.schema.names()
     writer = project_ix.writer()
     searcher = project_ix.searcher()
     try:
         with self.xom.keyfs.transaction(write=False) as tx:
             stage = self.xom.model.getstage(indexname)
             if stage is not None:
                 for name in names:
                     data = preprocess_project(
                         ProjectIndexingInfo(stage=stage, name=name))
                     if data is None:
                         ix._delete_project(indexname,
                                            name,
                                            tx.at_serial,
                                            counter,
                                            writer,
                                            searcher=searcher)
                         continue
                     # because we use the current transaction, we also
                     # use the current serial for indexing
                     ix._update_project(data,
                                        tx.at_serial,
                                        counter,
                                        main_keys,
                                        writer,
                                        searcher=searcher)
             else:
                 # stage was deleted
                 for name in names:
                     ix._delete_project(indexname,
                                        name,
                                        tx.at_serial,
                                        counter,
                                        writer,
                                        searcher=searcher)
         count = next(counter)
     except Exception:
         writer.cancel()
         # let the queue handle retries
         raise
     else:
         if count:
             log.info("Committing %s new documents to search index." %
                      count)
         else:
             log.debug("Committing no new documents to search index.")
         writer.commit()
Пример #13
0
def devpiserver_get_credentials(request):
    """Search request for REMOTE_USER header.

    Returns a tuple with (REMOTE_USER, '') if credentials could be
    extracted, or None if no credentials were found.

    The first plugin to return credentials is used, the order of plugin
    calls is undefined.
    """
    if 'REMOTE_USER' in request.headers:
        remote_user = request.headers['REMOTE_USER']
        threadlog.info("Found REMOTE_USER in request: %s", remote_user)
        return remote_user, ''
Пример #14
0
 def delete_projects(self, projects):
     counter = itertools.count()
     count = next(counter)
     writer = self.project_ix.writer()
     main_keys = self.project_ix.schema.names()
     for project in projects:
         data = dict((u(x), project[x]) for x in main_keys if x in project)
         data['path'] = u"/{user}/{index}/{name}".format(**data)
         count = next(counter)
         writer.delete_by_term('path', data['path'])
     log.debug("Committing %s deletions to search index." % count)
     writer.commit()
     log.info("Finished committing %s deletions to search index." % count)
Пример #15
0
 def __init__(self, config, settings):
     if 'path' not in settings:
         index_path = config.serverdir.join('.indices')
     else:
         index_path = settings['path']
         if not os.path.isabs(index_path):
             fatal("The path for Whoosh index files must be absolute.")
         index_path = py.path.local(index_path)
     index_path.ensure_dir()
     log.info("Using %s for Whoosh index files." % index_path)
     self.index_path = index_path.strpath
     self.indexer_thread = None
     self.shared_data = None
     self.xom = None
Пример #16
0
 def update_projects(self, projects, clear=False):
     writer = self.project_ix.writer()
     try:
         count = self._update_projects(writer, projects, clear=clear)
     except:
         log.exception("Aborted write to search index after exception.")
         writer.cancel()
     else:
         log.info("Committing %s new documents to search index." % count)
         if clear:
             writer.commit(mergetype=CLEAR)
         else:
             writer.commit()
         log.info("Finished committing %s documents to search index." %
                  count)
Пример #17
0
 def queue_projects(self, projects, at_serial, searcher):
     log.debug("Queuing projects for index update")
     queued_counter = itertools.count()
     queued = next(queued_counter)
     last_time = time.time()
     mirror_projects = {}
     processed = 0
     for processed, project in enumerate(projects, start=1):
         if time.time() - last_time > 5:
             last_time = time.time()
             log.debug(
                 "Processed a total of %s projects and queued %s so far. "
                 "Currently in %s" % (processed, queued, project.indexname))
         if project.is_from_mirror:
             # we find the last serial the project was changed to avoid re-indexing
             project_serial = project.stage.get_last_project_change_serial_perstage(
                 project.name, at_serial=at_serial)
             # mirrors have no docs, so we can shortcut
             path = '/%s/%s' % (project.indexname, project.name)
             existing = None
             doc_num = searcher.document_number(path=path)
             if doc_num is not None:
                 existing = searcher.stored_fields(doc_num)
             if existing:
                 existing_serial = existing.get('serial', -1)
                 if existing_serial >= project_serial:
                     continue
             # we use at_serial here, because indexing is always done
             # with the latest metadata
             key = (project.indexname, at_serial)
             _projects = mirror_projects.setdefault(key, [])
             _projects.append(project)
             if len(_projects) >= self.QUEUE_MAX_NAMES:
                 self.extend(_projects, at_serial)
                 _projects.clear()
         else:
             # private projects need to be checked in IndexerThread.handler,
             # because preprocess_project might depend on files which were
             # not available when indexing while replicating like doczips
             self.add(project, at_serial)
         queued = next(queued_counter)
     for (indexname, serial), _projects in mirror_projects.items():
         self.extend(_projects, serial)
     log.info("Processed a total of %s projects and queued %s" %
              (processed, queued))
Пример #18
0
def iter_projects(xom):
    timestamp = time.time()
    for user in xom.model.get_userlist():
        username = ensure_unicode(user.name)
        user_info = user.get(user)
        for index, index_info in user_info.get('indexes', {}).items():
            index = ensure_unicode(index)
            stage = xom.model.getstage(username, index)
            if stage is None:  # this is async, so the stage may be gone
                continue
            log.info("Search-Indexing %s:", stage.name)
            names = stage.list_projects_perstage()
            for count, name in enumerate(names, start=1):
                name = ensure_unicode(name)
                current_time = time.time()
                if current_time - timestamp > 3:
                    log.debug("currently search-indexed %s", count)
                    timestamp = current_time
                yield preprocess_project(stage, name)
Пример #19
0
def devpiserver_on_upload(stage, project, version, link):
    """ called when a file is uploaded to a private stage for
    a projectname/version.  link.entry.file_exists() may be false because
    a more recent revision deleted the file (and files are not revisioned).
    NOTE that this hook is currently NOT called for the implicit "caching"
    uploads to the pypi mirror.

    If the uploaded file is a wheel and is the latest version on this index,
    store its metadata in json file at the root of index/+f/ directory.
    With the standard config with nginx, nginx will directly serve this file.
    """
    if link.entry and link.entry.file_exists(
    ) and link.entry.basename.endswith('.whl'):
        threadlog.info("Wheel detected: %s", link.entry.basename)
        new_version = parse_version(version)
        latest_version = parse_version(
            stage.get_latest_version_perstage(project))
        if latest_version > new_version:
            threadlog.debug(
                "A newer release has already been uploaded: %s - nothing to do",
                latest_version)
            return
        metadata = extract_metadata_from_wheel_file(link.entry.file_os_path())
        linkstore = stage.get_linkstore_perstage(link.project, link.version)
        project_dir = '%s/%s/+f/%s' % (linkstore.filestore.storedir,
                                       stage.name, project)

        if not os.path.exists(project_dir):
            os.mkdir(project_dir)

        json_path = '%s/%s-%s.json' % (project_dir, project, new_version)
        with open(json_path, 'w') as fd:
            fd.write(json.dumps(metadata))

        threadlog.info("Stored %s to: %s", metadata, json_path)

        # We symlink the latest version
        symlink_path = '%s.json' % project_dir
        if os.path.exists(symlink_path):
            os.unlink(symlink_path)
        os.symlink(json_path, symlink_path)
Пример #20
0
def replay(xom, replica_xom, events=True):
    threadlog.info("test: replaying replica")
    for serial in range(replica_xom.keyfs.get_next_serial(),
                        xom.keyfs.get_next_serial()):
        if serial == -1:
            continue
        with xom.keyfs._storage.get_connection() as conn:
            change_entry = conn.get_changes(serial)
        threadlog.info("test: importing to replica %s", serial)
        replica_xom.keyfs.import_changes(serial, change_entry)

    # replay notifications
    if events:
        noti_thread = replica_xom.keyfs.notifier
        event_serial = noti_thread.read_event_serial()
        thread_push_log("NOTI")
        while event_serial < replica_xom.keyfs.get_current_serial():
            event_serial += 1
            noti_thread._execute_hooks(event_serial, threadlog, raising=True)
            noti_thread.write_event_serial(event_serial)
        thread_pop_log("NOTI")
Пример #21
0
 def thread_run(self):
     thread_push_log("[IDX]")
     last_time = time.time()
     event_serial = None
     serial = -1
     while 1:
         try:
             if time.time() - last_time > 5:
                 last_time = time.time()
                 size = self.shared_data.queue.qsize()
                 if size:
                     log.info("Indexer queue size ~ %s" % size)
                 event_serial = self.xom.keyfs.notifier.read_event_serial()
                 serial = self.xom.keyfs.get_current_serial()
             if event_serial is not None and event_serial < serial:
                 # be nice to everything else
                 self.thread.sleep(1.0)
             self.tick()
         except mythread.Shutdown:
             raise
         except Exception:
             log.exception("Unhandled exception in indexer thread.")
             self.thread.sleep(1.0)
Пример #22
0
 def queue_projects(self, projects, serial, searcher):
     log.debug("Queuing projects for index update")
     queued_counter = itertools.count()
     queued = next(queued_counter)
     last_time = time.time()
     mirror_projects = {}
     processed = 0
     for processed, project in enumerate(projects, start=1):
         if time.time() - last_time > 5:
             last_time = time.time()
             log.debug(
                 "Processed a total of %s projects and queued %s so far. "
                 "Currently in %s" % (processed, queued, project.indexname))
         # we find the last serial the project was changed to avoid re-indexing
         serial = project.stage.get_last_project_change_serial_perstage(
             project.name, at_serial=serial)
         if project.is_from_mirror:
             # mirrors have no docs, so we can shortcut
             path = '/%s/%s' % (project.indexname, project.name)
             existing = searcher.document(path=path)
             if existing:
                 existing_serial = existing.get('serial', -1)
                 if existing_serial >= serial:
                     continue
             key = (project.indexname, serial)
             _projects = mirror_projects.setdefault(key, [])
             _projects.append(project)
             if len(_projects) >= self.QUEUE_MAX_NAMES:
                 self.extend(_projects, serial)
                 _projects.clear()
         else:
             self.add(project, serial)
         queued = next(queued_counter)
     for (indexname, serial), _projects in mirror_projects.items():
         self.extend(_projects, serial)
     log.info("Processed a total of %s projects and queued %s" %
              (processed, queued))
Пример #23
0
def devpiserver_on_upload(stage, projectname, version, link):
    """ called when a file is uploaded to a private stage for
    a projectname/version.  link.entry.file_exists() may be false because
    a more recent revision deleted the file (and files are not revisioned).
    NOTE that this hook is currently NOT called for the implicit "caching"
    uploads to the pypi mirror.

    If the uploaded file is a wheel and is the latest version on this index,
    store its metadata in json file at the root of index/+f/ directory.
    With the standard config with nginx, nginx will directly serve this file.
    """
    if link.entry and link.entry.file_exists() and link.entry._filepath.endswith('.whl'):
        threadlog.info("Wheel detected: %s", link.entry._filepath)
        new_version = parse_version(version)
        latest_version = parse_version(stage.get_latest_version_perstage(projectname))
        if latest_version > new_version:
            threadlog.debug("A newer release has already been uploaded: %s - nothing to do", latest_current_version)
            return
        metadata = extract_metadata_from_wheel_file(link.entry._filepath)
        linkstore = stage.get_linkstore_perstage(link.projectname, link.version)
        json_path = '%s/%s/+f/%s.json' % (linkstore.filestore.storedir, stage.name, projectname)
        with open(json_path, 'w') as fd:
            fd.write(json.dumps(metadata))
        threadlog.info("Stored %s to: %s", metadata, json_path)
Пример #24
0
def info(msg):
    threadlog.info("devpi-rss: %s" % msg)
Пример #25
0
def info(msg):
    threadlog.info("devpi-rss: %s" % msg)
Пример #26
0
def devpiserver_auth_user(userdict, username, password):
    """Since we accept all remote_user, no password checks are needed."""
    threadlog.info("devpi-remoteuser accepting user: %s", username)
    return {'status': 'ok', 'groups': ['remote_user']}
Пример #27
0
def devpiserver_stage_created(stage):
    if stage.ixconfig["type"] == "mirror":
        threadlog.info("triggering load of initial projectnames for %s",
                       stage.name)
        stage.list_projects_perstage()
Пример #28
0
def test_threadlog(caplog):
    threadlog.info("hello")
    assert caplog.records[0].msg == "NOCTX hello"
    thread_push_log("this")
    threadlog.info("hello")
    assert caplog.records[1].msg == "this hello"
Пример #29
0
 def ensure_tables_exist(self):
     with self.get_connection() as conn:
         sqlconn = conn._sqlconn
         c = sqlconn.cursor()
         try:
             c.execute("select * from changelog limit 1")
             c.fetchall()
             c.execute("select * from kv limit 1")
             c.fetchall()
         except pg8000.ProgrammingError:
             sqlconn.rollback()
             threadlog.info("DB: Creating schema")
             c.execute("""
                 CREATE TABLE kv (
                     key TEXT NOT NULL PRIMARY KEY,
                     keyname TEXT,
                     serial INTEGER
                 )
             """)
             c.execute("""
                 CREATE TABLE changelog (
                     serial INTEGER PRIMARY KEY,
                     data BYTEA NOT NULL
                 )
             """)
             c.execute("""
                 CREATE TABLE files (
                     path TEXT PRIMARY KEY,
                     size INTEGER NOT NULL,
                     data BYTEA NOT NULL
                 )
             """)
             c.execute("""
                 CREATE FUNCTION set_kv(_key TEXT, _keyname TEXT, _serial INT) RETURNS VOID AS
                 $$
                 BEGIN
                     LOOP
                         -- first try to update the key
                         UPDATE kv SET keyname = _keyname, serial = _serial WHERE key = _key;
                         IF found THEN
                             RETURN;
                         END IF;
                         -- not there, so try to insert the key
                         -- if someone else inserts the same key concurrently,
                         -- we could get a unique-key failure
                         BEGIN
                             INSERT INTO kv(key, keyname, serial) VALUES (_key, _keyname, _serial);
                             RETURN;
                         EXCEPTION WHEN unique_violation THEN
                             -- Do nothing, and loop to try the UPDATE again.
                         END;
                     END LOOP;
                 END;
                 $$
                 LANGUAGE plpgsql;
             """)
             c.execute("""
                 CREATE FUNCTION set_files(_path TEXT, _size INTEGER, _data BYTEA) RETURNS VOID AS
                 $$
                 BEGIN
                     LOOP
                         -- first try to update the key
                         UPDATE files SET size = _size, data = _data WHERE path = _path;
                         IF found THEN
                             RETURN;
                         END IF;
                         -- not there, so try to insert the key
                         -- if someone else inserts the same key concurrently,
                         -- we could get a unique-key failure
                         BEGIN
                             INSERT INTO files(path, size, data) VALUES (_path, _size, _data);
                             RETURN;
                         EXCEPTION WHEN unique_violation THEN
                             -- Do nothing, and loop to try the UPDATE again.
                         END;
                     END LOOP;
                 END;
                 $$
                 LANGUAGE plpgsql;
             """)
             sqlconn.commit()
         finally:
             c.close()
Пример #30
0
 def _update_projects(self, writer, projects, clear=False):
     add_document = partial(self._add_document, writer)
     counter = itertools.count()
     count = next(counter)
     proj_counter = itertools.count()
     main_keys = self.project_ix.schema.names()
     text_keys = (('author', 0.5), ('author_email', 0.5),
                  ('description', 1.5), ('summary', 1.75), ('keywords',
                                                            1.75))
     for project in projects:
         proj_count = next(proj_counter)
         if proj_count % 1000 == 0:
             log.info("Processed %s projects", proj_count)
         data = dict((u(x), get_mutable_deepcopy(project[x]))
                     for x in main_keys if x in project)
         data['path'] = u"/{user}/{index}/{name}".format(**data)
         if not clear:
             # because we use hierarchical documents, we have to delete
             # everything we got for this path and index it again
             writer.delete_by_term('path', data['path'])
         data['type'] = "project"
         data['text'] = "%s %s" % (data['name'], project_name(data['name']))
         with writer.group():
             add_document(**data)
             count = next(counter)
             for key, boost in text_keys:
                 if key not in project:
                     continue
                 add_document(
                     **{
                         "path": data['path'],
                         "type": key,
                         "text": project[key],
                         "_text_boost": boost
                     })
                 count = next(counter)
             if '+doczip' not in project:
                 continue
             if not project['+doczip'].exists():
                 log.error("documentation zip file is missing %s",
                           data['path'])
                 continue
             for page in project['+doczip'].values():
                 if page is None:
                     continue
                 add_document(
                     **{
                         "path": data['path'],
                         "type": "title",
                         "text": page['title'],
                         "text_path": page['path'],
                         "text_title": page['title']
                     })
                 count = next(counter)
                 add_document(
                     **{
                         "path": data['path'],
                         "type": "page",
                         "text": page['text'],
                         "text_path": page['path'],
                         "text_title": page['title']
                     })
                 count = next(counter)
     return count
Пример #31
0
def devpiserver_mirror_initialnames(stage, projectnames):
    ix = get_indexer(stage.xom.config)
    ix.update_projects(
        [preprocess_project(stage, name) for name in projectnames])
    threadlog.info("finished mirror indexing operation")
Пример #32
0
 def update_projects(self, projects, clear=False):
     results = streaming_bulk(client=self.es,
                              actions=self._update_projects(projects))
     for i, result in enumerate(results):
         if i % 1000 == 0:
             log.info("Indexed %s", i)