def __init__(self, url, index, mappings=None, clean=False, insecure=True, analyzers=None, aliases=None): """Class to handle the operations with the ElasticSearch database, such as creating indexes, mappings, setting up aliases and uploading documents. :param url: ES url :param index: index name :param mappings: an instance of the Mapping class :param clean: if True, deletes an existing index and create it again :param insecure: support https with invalid certificates :param analyzers: analyzers for ElasticSearch :param aliases: list of aliases, defined as strings, to be added to the index """ # Get major version of Elasticsearch instance self.major = self.check_instance(url, insecure) logger.debug("Found version of ES instance at {}: {}.".format( anonymize_url(url), self.major)) self.url = url # Valid index for elastic self.index = self.safe_index(index) self.aliases = aliases self.index_url = self.url + "/" + self.index self.wait_bulk_seconds = 2 # time to wait to complete a bulk operation self.requests = grimoire_con(insecure) analyzer_settings = None if analyzers: analyzers_dict = analyzers.get_elastic_analyzers( es_major=self.major) analyzer_settings = analyzers_dict['items'] self.create_index(analyzer_settings, clean) if analyzers: self.update_analyzers(analyzer_settings) if mappings: map_dict = mappings.get_elastic_mappings(es_major=self.major) self.create_mappings(map_dict) if aliases: for alias in aliases: if self.alias_in_use(alias): logger.debug( "Alias {} won't be set on {}, it already exists on {}". format(alias, anonymize_url(self.index_url), anonymize_url(self.url))) continue self.add_alias(alias)
def check_instance(url, insecure): """Checks if there is an instance of Elasticsearch in url. Actually, it checks if GET on the url returns a JSON document with a field tagline "You know, for search", and a field version.number. :value url: url of the instance to check :value insecure: don't verify ssl connection (boolean) :returns: major version of Elasticsearch, as string. """ res = grimoire_con(insecure).get(url) if res.status_code != 200: msg = "Got {} from url {}".format(res.status_code, url) logger.error(msg) raise ElasticError(cause=msg) else: try: version_str = res.json()['version']['number'] version_major = version_str.split('.')[0] return version_major except Exception: msg = "Could not read proper welcome message from url {}, {}".format( anonymize_url(url), res.text) logger.error(msg) raise ElasticError(cause=msg)
def _check_instance(url, insecure): """Checks if there is an instance of Elasticsearch in url. Actually, it checks if GET on the url returns a JSON document with a field tagline "You know, for search", and a field version.number. :value url: url of the instance to check :value insecure: don't verify ssl connection (boolean) :returns: major version of Ellasticsearch, as string. """ res = grimoire_con(insecure).get(url) if res.status_code != 200: logger.error("Didn't get 200 OK from url %s", url) raise ElasticConnectException else: try: version_str = res.json()['version']['number'] version_major = version_str.split('.')[0] return version_major except Exception: logger.error( "Could not read proper welcome message from url %s", url) logger.error("Message read: %s", res.text) raise ElasticConnectException
def __init__(self, config): self.backend_section = None self.config = config self.conf = config.get_conf() self.db_sh = self.conf['sortinghat']['database'] self.db_user = self.conf['sortinghat']['user'] self.db_password = self.conf['sortinghat']['password'] self.db_host = self.conf['sortinghat']['host'] self.grimoire_con = grimoire_con(conn_retries=12) # 30m retry
def __init__(self, url, index, mappings=None, clean=False, insecure=True, analyzers=None, aliases=None): ''' clean: remove already existing index insecure: support https with invalid certificates ''' # Get major version of Elasticsearch instance self.major = self._check_instance(url, insecure) logger.debug("Found version of ES instance at %s: %s.", self.anonymize_url(url), self.major) self.url = url # Valid index for elastic self.index = self.safe_index(index) self.aliases = aliases self.index_url = self.url + "/" + self.index self.wait_bulk_seconds = 2 # time to wait to complete a bulk operation self.requests = grimoire_con(insecure) res = self.requests.get(self.index_url) headers = {"Content-Type": "application/json"} if res.status_code != 200: # Index does no exists r = self.requests.put(self.index_url, data=analyzers, headers=headers) if r.status_code != 200: logger.error("Can't create index %s (%s)", self.anonymize_url(self.index_url), r.status_code) raise ElasticWriteException() else: logger.info("Created index " + self.anonymize_url(self.index_url)) else: if clean: res = self.requests.delete(self.index_url) res.raise_for_status() res = self.requests.put(self.index_url, data=analyzers, headers=headers) res.raise_for_status() logger.info("Deleted and created index " + self.anonymize_url(self.index_url)) if mappings: map_dict = mappings.get_elastic_mappings(es_major=self.major) self.create_mappings(map_dict) if aliases: for alias in aliases: if self.alias_in_use(alias): logger.debug("Alias %s won't be set on %s, it already exists on %s", alias, self.anonymize_url(self.index_url), self.anonymize_url(self.url)) continue self.add_alias(alias)
def __init__(self, config): self.backend_section = None self.config = config self.conf = config.get_conf() sortinghat = self.conf.get('sortinghat', None) self.db_sh = sortinghat['database'] if sortinghat else None self.db_user = sortinghat['user'] if sortinghat else None self.db_password = sortinghat['password'] if sortinghat else None self.db_host = sortinghat['host'] if sortinghat else None self.db_unaffiliate_group = sortinghat['unaffiliated_group'] if sortinghat else None self.sh_kwargs = {'user': self.db_user, 'password': self.db_password, 'database': self.db_sh, 'host': self.db_host, 'port': None} self.grimoire_con = grimoire_con(conn_retries=12) # 30m retry
def __init__(self, config): """ config is a Config object """ self.config = config self.conf = config.get_conf() self.grimoire_con = grimoire_con(conn_retries=12) # 30m retry
import json import logging import os import tempfile import requests from grimoire_elk.elk import load_identities from grimoire_elk.enriched.gerrit import GerritEnrich from grimoire_elk.enriched.git import GitEnrich from grimoire_elk.enriched.utils import grimoire_con logger = logging.getLogger(__name__) requests_ses = grimoire_con() def fetch_track_items(upstream_file_url, data_source): """ The file format is: # Upstream contributions, bitergia will crawl this and extract the relevant information # system is one of Gerrit, Bugzilla, Launchpad (insert more) --- - url: https://review.openstack.org/169836 system: Gerrit """ track_uris = [] req = requests_ses.get(upstream_file_url)