def main(): # Define the globals global index_names global STARTED_TIMESTAMP global es global es_indices try: # Initiate the elasticsearch session using ES low-level client. # By default nodes are randomized before passed into the pool and round-robin strategy is used for load balancing. es = Elasticsearch(ES_HOSTS, timeout=30) es_indices = IndicesClient(es) except: print("Could not connect to elasticsearch!") sys.exit(1) print("Creating indices.. \n"), indices = generate_indices() print("Done!\n") print("GET Settings \n"), print json.dumps(es_indices.get_settings(index="_all"), sort_keys=True, indent=4, separators=(',', ': ')) print("Done!\n") # We will Clean up the indices by default # Default: True if CLEANUP: print("Cleaning up created indices.. "), cleanup_indices() print("Done!\n")
def setUp(self): """ Instantiate our ES client and make sure all indexes are deleted before each test """ super().setUp() self.indices_client = IndicesClient(client=ES_CLIENT) self.indices_client.delete(index="_all")
def create_wikipedia_index(ic: IndicesClient) -> None: """ Add an index to Elasticsearch called 'wikipedia' Parameters ---------- ic : IndicesClient The client to control Elasticsearch index settings Returns ------- None """ request_body = { "settings": { "analysis": { "analyzer": { "my_analyzer": { "type": "custom", "tokenizer": "standard", "filter": ["lowercase", "my_stops"] } }, "filter": { "my_stops": { "type": "stop", "stopwords_path": "stopwords.txt" } } } } } ic.create(index="wikipedia", body=request_body)
def setUp(self): """ Make sure all indexes are deleted before each new test is run. """ super().setUp() self.indices_client = IndicesClient(client=settings.ES_CLIENT) self.indices_client.delete(index="_all")
def __init__(self, driver, connection_name, connection_config): super(ElasticsearchConnection, self).__init__(driver, connection_name, connection_config) self.uri = self.connection_config.get('uri').split(',') self.cnx_opts = {} use_ssl = self.connection_config.get('use_ssl', True) if isinstance(use_ssl, str): if use_ssl.lower() == 'false': use_ssl = False else: use_ssl = True self.cnx_opts['use_ssl'] = use_ssl if use_ssl: verify_certs = self.connection_config.get('verify_certs', True) if isinstance(verify_certs, str): if verify_certs.lower() == 'false': verify_certs = False else: verify_certs = True self.cnx_opts['verify_certs'] = verify_certs self.cnx_opts['ca_certs'] = self.connection_config.get( 'ca_certs', None) self.cnx_opts['client_cert'] = self.connection_config.get( 'client_cert', None) self.cnx_opts['client_key'] = self.connection_config.get( 'client_key', None) self.es = Elasticsearch(self.uri, **self.cnx_opts) try: self.log.debug("Elasticsearch info: %s" % self.es.info()) except Exception as e: self.log.warn("An error occured on estabilishing " "connection to Elasticsearch: %s" % e) self.ic = IndicesClient(self.es)
class TestSingleDocSigTerms(TestCase): def setUp(self): super(TestSingleDocSigTerms, self).setUp() self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port]) self.ic = IndicesClient(self.es) self.index = 'single_doc_sigterms_test' self.doc_type = 'test-doc' self.field = 'text' if self.ic.exists(self.index): self.ic.delete(self.index) self.ic.create(self.index) self.es.create(self.index, self.doc_type, {self.field: 'foo ba knark foo knirk knark foo'}, id='doc_1') def test_tf_for_doc_id(self): sigterms = SingleDocSigTerms(self.es, self.index, self.doc_type, self.field, None) resp = dict(sigterms.tf_for_doc_id('doc_1')) self.assertEquals(4, len(resp)) self.assertEquals(3, resp['foo']) self.assertEquals(2, resp['knark']) self.assertEquals(1, resp['ba']) self.assertEquals(1, resp['knirk'])
def test_tokenize(self): test_case = { "text": "2018-06-08T00:00:00Z INFO GET /v1/bundles/7ef8966b-45ef-4e0a-a51b-44a865372050.2018-06-08T230333.785338Z?param1=1¶m2=2 {\"key\": \"value\"}" } index_name = self.es_client._format_today_index_name(self.index_prefix) index_client = IndicesClient(TestESClient.es) with self.new_index(index_name): response = index_client.analyze(index=index_name, body=test_case) tokens = [t['token'] for t in response['tokens']] self.assertEqual(set(tokens), { '7ef8966b-45ef-4e0a-a51b-44a865372050', '2018-06-08T230333.785338Z', ':', 'INFO', '1', '2', 'v1', 'bundles', 'key', 'GET', 'param2', 'param1', '2018-06-08T00:00:00Z', 'value' }) self.assertEqual(len(tokens), 14)
def setUp(self): """ Make sure tests get clean indexes to run """ self.indices_client = IndicesClient(client=settings.ES_CLIENT) # Delete any existing indexes so we get a clean slate self.indices_client.delete(index="_all") # Create an index we'll use to test the ES features self.indices_client.create(index="test_index") # Add a mapping for a test document type. It needs to include different fields for the # various features we'll be running tests on self.indices_client.put_mapping( body={ "properties": { "datetime_field": { "type": "date" }, "keyword_field": { "type": "keyword" }, "text_field": { "type": "text" }, } }, doc_type="test_doc", index="test_index", )
def import_examples_into_es(examples: list): index_name = config.index_name type_name = config.type_name buck_size = config.buck_size es = Elasticsearch(config.es_url) es_index = IndicesClient(es) if es_index.exists(index=index_name): es_index.delete(index=index_name) # 创建索引 with open(config.es_index_json) as f: mappings = json.load(f) res = es.indices.create(index=index_name, body=mappings) # 数据批量导入es for i in range(len(examples)): examples[i] = { "_index": index_name, "_type": type_name, "_id": examples[i]["ntc_id"], "_source": examples[i] } for i in tqdm(range(ceil(len(examples) / buck_size)), desc="Import into ES"): bulk(es, actions=examples[i * buck_size: min((i + 1) * buck_size, len(examples))])
def __init__(self, conf, queue): self.conf = conf host = self.conf.get("host", "es") port = self.conf.get("port", 9200) self.log = logging.getLogger("pulsar.indexer") logging.getLogger("elasticsearch").setLevel(logging.INFO) self.log.debug("port: %r" % port) self.es = Elasticsearch([{"host": host, "port": port}]) self.cluster_client = ClusterClient(self.es) health = self.cluster_client.health() if not health or health.get("number_of_nodes") < 1: raise Exception("No Elasticsearch nodes found: %r" % health) # Put our template self.indices_client = IndicesClient(self.es) self.index_prefix = self.conf.get("index_prefix", self.INDEX_PREFIX) self.indices_client.put_template( name=self.index_prefix, body=open("conf/es-template.json").read()) self.log.info("Put template to ES for pulsar indexes") self.last_event_time = time() self.index_prefix = self.index_prefix + "-" self.index_name = self.get_index_name() self.queue = queue self.counter = 0 self.stats_checkpoint = time() self.stats_every = 10000 try: # This will block as it reads from the queue self.bulk(self.es, self.iterator(), stats_only=True) except Exception as e: self.log.exception("Error with bulk", exc_info=e)
class IndexBase: def __init__(self, **kwargs): self.index = kwargs.pop('index') self.client = client_es self.client_index = IndicesClient(self.client) if kwargs.get('settings') self.settings = kwargs.pop('settings') else: self.settings = DEFAULT_SETTINGS if self.exist_index(): self.delete_index() self.create_index() else: self.create_index() def exist_index(self): return self.client_index.exists(index=self.index) def delete_index(self): return self.client_index.delete(index=self.index, ignore=[400, 404]) def create_index(self): return self.client_index.create(index=self.index, body=self.settings)
def setUp(self): """ Starts a new connector for every test """ try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() self.connector = Connector( address='%s:%s' % (mongo_host, self.primary_p), oplog_checkpoint='config.txt', target_url=elastic_pair, ns_set=['test.test'], u_key='_id', auth_key=None, doc_manager='mongo_connector/doc_managers/elastic_doc_manager.py', auto_commit_interval=0 ) # Clean out test databases try: self.elastic_doc._remove() except OperationFailed: try: # Create test.test index if necessary client = Elasticsearch(hosts=[elastic_pair]) idx_client = IndicesClient(client) idx_client.create(index='test.test') except es_exceptions.TransportError: pass self.conn.test.test.drop() self.connector.start() assert_soon(lambda: len(self.connector.shard_set) > 0) assert_soon(lambda: sum(1 for _ in self.elastic_doc._search()) == 0)
def _create_index(self): es_index = IndicesClient(self._es) if es_index.exists(self._store_index): logging.info('Index ' + self._store_index + ' already exists. Skipping index creation.') return None es_mapping = { "mappings": { 'last_runtime': { 'properties': { 'plugin_name': { 'index': 'not_analyzed', 'type': 'string' }, 'rule_name': { 'index': 'not_analyzed', 'type': 'string' }, 'plugin_sid': { 'index': 'not_analyzed', 'type': 'long' }, '@timestamp': { 'format': 'dateOptionalTime||epoch_millis', 'type': 'date' } } } } } self._es.indices.create(self._store_index, body=es_mapping) time.sleep(1)
def get_es_indices(): es_idx = IndicesClient(es) indicies = es_idx.get('appcompat-*') result = [] for index_name,v in indicies.iteritems(): result.append((index_name,index_name,v['settings']['index']['creation_date'])) return result
def handle(self, *args, **options): es = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}]) fop=open('spider/management/commands/'+str(argv[2]), 'r') inds = IndicesClient(es) mapping={ "mappings": { "product_type": { "properties": { "code": { "type" : "string" },"name": {"type" : "string"},"img": {"type" : "string"},"url": {"type" : "string"},"price_reg": {"type" : "float"},"price_discount": {"type" : "float"}}}}} if not inds.exists(index='gearbest_index'): inds.create(index='gearbest_index',body=mapping) print 'gearbest_index created' for jsonline in fop: jobj=loads(jsonline) del jobj["_type"] es.index(index="gearbest_index",doc_type='product_type', body=jobj, id=jobj['code']) disc=0 reg=0 if len(jobj['price_discount'])>0: disc = float(jobj['price_discount'][0]) if len(jobj['price_reg'])>0: reg = float(jobj['price_reg'][0]) #insert="INSERT into 'price_gb' ('price','price_disc','code','date') values ("+str(reg)+", "+str(disc)+", '"+str(jobj['code'])+"', '"+str(datetime.today())+"')" #cursor = connection.cursor() #cursor.execute(insert) add_price=Price_gb(price=reg,price_disc=disc,code=str(jobj['code']),date=datetime.date.today()) add_price.save() print 'code='+str(jobj['code'])
class TestSingleDocSigTerms(TestCase): def setUp(self): super(TestSingleDocSigTerms, self).setUp() self.es = Elasticsearch( hosts=['localhost:%d' % es_runner.es_state.port]) self.ic = IndicesClient(self.es) self.index = 'single_doc_sigterms_test' self.doc_type = 'test-doc' self.field = 'text' if self.ic.exists(self.index): self.ic.delete(self.index) self.ic.create(self.index) self.es.create(self.index, self.doc_type, {self.field: 'foo ba knark foo knirk knark foo'}, id='doc_1') def test_tf_for_doc_id(self): sigterms = SingleDocSigTerms(self.es, self.index, self.doc_type, self.field, None) resp = dict(sigterms.tf_for_doc_id('doc_1')) self.assertEquals(4, len(resp)) self.assertEquals(3, resp['foo']) self.assertEquals(2, resp['knark']) self.assertEquals(1, resp['ba']) self.assertEquals(1, resp['knirk'])
def setUp(self): """ Starts a new connector for every test """ try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() self.connector = Connector( address='%s:%s' % (mongo_host, self.primary_p), oplog_checkpoint='config.txt', target_url=elastic_pair, ns_set=['test.test'], u_key='_id', auth_key=None, doc_manager='mongo_connector/doc_managers/elastic_doc_manager.py', auto_commit_interval=0) # Clean out test databases try: self.elastic_doc._remove() except OperationFailed: try: # Create test.test index if necessary client = Elasticsearch(hosts=[elastic_pair]) idx_client = IndicesClient(client) idx_client.create(index='test.test') except es_exceptions.TransportError: pass self.conn.test.test.drop() self.connector.start() assert_soon(lambda: len(self.connector.shard_set) > 0) assert_soon(lambda: sum(1 for _ in self.elastic_doc._search()) == 0)
def __init__(self, host, port, db_config): self.es = Elasticsearch([{"host": host, "port": port}]) if not self.es.ping(): error( "Cannot connect to elasticsearch cluster for users. Check database configuration in user_db_config.json." ) exit(0) index = db_config["index"] self.index = index["name"] user_type = db_config["user_type"] self.user_type_name = user_type["name"] mappings = dict() if "mapping" in user_type: mappings[self.user_type_name] = user_type["mapping"] body = dict() if "settings" in index: body["settings"] = index["settings"] if mappings: body["mappings"] = mappings try: self.indices_client = IndicesClient(self.es) if not self.indices_client.exists(self.index): self.indices_client.create(index=self.index, body=body) except TransportError: error( "Error while creating elasticsearch cluster for users. Check type mappings in user_db_config.json." ) print(traceback.format_exc()) exit(0)
def main(): # Define the globals global index_names global STARTED_TIMESTAMP global es global es_indices try: # Initiate the elasticsearch session using ES low-level client. # By default nodes are randomized before passed into the pool and round-robin strategy is used for load balancing. es = Elasticsearch(ES_HOSTS, timeout=30) es_indices = IndicesClient(es) except: print("Could not connect to elasticsearch!") sys.exit(1) print("Creating indices.. \n"), indices = generate_indices() print("Done!\n") print("GET Settings \n"), print json.dumps(es_indices.get_settings(index="_all"), sort_keys=True,indent=4, separators=(',', ': ')) print("Done!\n") # We will Clean up the indices by default # Default: True if CLEANUP: print("Cleaning up created indices.. "), cleanup_indices() print("Done!\n")
def test_index_manager_regenerate_indices_from_broken_state(self, *args): """ `regenerate_indices` should succeed and give us a working ElasticSearch when it runs and finds a broken state (eg. with an existing, incorrect index with the name of an alias). This can occur when ES restarts and an update signal is triggered before Richie had a chance to bootstrap ES. """ # The indices client will be used to test the actual indices in ElasticSearch indices_client = IndicesClient(client=ES_CLIENT) # Create a course and trigger a signal to index it. This will create a # broken "richie_test_courses" index course = CourseFactory(should_publish=True) update_course(course.extended_object, "en") self.assertIsNotNone(indices_client.get("richie_test_courses")) # Call our `regenerate_indices command` creation_datetime = datetime(2010, 1, 1, tzinfo=timezone.utc) creation_string = creation_datetime.strftime("%Y-%m-%d-%Hh%Mm%S.%fs") with mock.patch.object(timezone, "now", return_value=creation_datetime): regenerate_indices(None) # No error was thrown, the courses index (like all others) was bootstrapped self.assertIsNotNone( indices_client.get(f"richie_test_courses_{creation_string}") ) # The expected alias is associated with the index self.assertEqual( list(indices_client.get_alias("richie_test_courses").keys())[0], f"richie_test_courses_{creation_string}", )
def __init__(self, host, port, db_config): self.es = Elasticsearch([{"host": host, "port": port}]) try: if self.es.ping(): es_logger = logging.getLogger('elasticsearch') es_logger.setLevel(logging.CRITICAL) self.indices_client = IndicesClient(self.es) index_definitions = db_config["index_definitions"] self.settings = db_config["settings"] self.data_point_definition = index_definitions["data_point"] self.create_index_from_definition(self.data_point_definition, self.settings) self.data_point_type_name = self.data_point_definition["name"] self.data_point_index = self.data_point_definition[ "index_name"] self.definitions.append(self.data_point_definition) self.experiment_definition = index_definitions["experiment"] self.create_index_from_definition(self.experiment_definition, self.settings) self.experiment_type_name = self.experiment_definition["name"] self.experiment_index = self.experiment_definition[ "index_name"] self.definitions.append(self.experiment_definition) self.target_system_definition = index_definitions[ "target_system"] self.create_index_from_definition( self.target_system_definition, self.settings) self.target_system_type_name = self.target_system_definition[ "name"] self.target_system_index = self.target_system_definition[ "index_name"] self.definitions.append(self.target_system_definition) self.analysis_definition = index_definitions["analysis"] self.create_index_from_definition(self.analysis_definition, self.settings) self.analysis_type_name = self.analysis_definition["name"] self.analysis_index = self.analysis_definition["index_name"] self.definitions.append(self.analysis_definition) self.stage_definition = index_definitions["stage"] self.create_index_from_definition(self.stage_definition, self.settings) self.stage_type_name = self.stage_definition["name"] self.stage_index = self.stage_definition["index_name"] self.definitions.append(self.stage_definition) else: raise ConnectionError("Host/port values are not valid") except TransportError as err1: error( "TransportError while creating elasticsearch instance for experiments. Check type mappings in experiment_db_config.json." ) raise err1
def create_index(name): es = get_es() ic = IndicesClient(es) body = {} # body.update(settings.INDEX_SETTINGS) body.update(settings.INDEX_MAPPINGS) resp = ic.create(name, json.dumps(body)) logger.debug('index create: ' + str(resp))
def __mapFile(self, json_map_file): es = Elasticsearch([{'host': self.elasticsearch_host, 'port': self.elasticsearch_port}]) # es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) ic = IndicesClient(es) with open(json_map_file) as json_data: d = json.load(json_data) doc_type = list(d.keys())[0] ic.put_mapping(index='wow', doc_type=doc_type, body=d)
def delete_all(self): """delete index""" try: indices_client = IndicesClient(self._es) indices_client.delete(index=self._index) except Exception as e: _eprint("exception on delete_index {}".format(e)) pass
def create_index(): es = Elasticsearch() client = IndicesClient(es) try: client.delete('physicians') except Exception, e: print e
def create_index(es, index_name): es_indices = IndicesClient(es) # es_indices.create(index=index_name) with open('data/FifaRecords.mappings.json') as json_data: d = json.load(json_data) es_indices.create(index=index_name, body=d) print("Created ES index {}".format(index_name))
def _create_main_index_if_not_exists(self): """ method that creates new elastic index if not existed :return: """ ic = IndicesClient(self.es) if not ic.exists(MAIN_INDEX_NAME): ic.create(MAIN_INDEX_NAME)
def create_index_excel(excel_filename): indices_client = IndicesClient(models.client) index_name = 'excel' if len(excel_filename): doc_type = os.path.splitext(excel_filename)[0] index_name = 'excel_' + doc_type if indices_client.exists(index_name): indices_client.delete(index=index_name)
def _reset_mapping(self, mapping_path): esi = IndicesClient(es.get_es_handle()) index = settings.ES_INDEX if not esi.exists(index): raise CommandError("Non existing index : %s"%index) self.stdout.write(str(esi.delete(index=index)))
def cmd_print_mapping(ctx, index): context = get_conn() doctype = context.get_doctype() conn = context.connection() indices_client = IndicesClient(conn) resp = indices_client.get_mapping(index=index) mapping = resp[index]["mappings"][doctype]["properties"] pprint(mapping)
def __init__(self, index, doc_type, hosts=None): if hosts is None: hosts = ['localhost'] self.es = Elasticsearch(hosts=hosts, verify_certs=False, timeout=60) self.ic = IndicesClient(self.es) self.index = index self.doc_type = doc_type
def remover_indice(nome_indice): """Remove o indice do Elasticsearch. O indice de elasticsearch é análogo a uma tabela em um SGBD. """ es = conectar_em_elastic_search() client_indice = IndicesClient(es) if client_indice.exists(index=[nome_indice]): client_indice.delete(nome_indice)
def create_index_if_not_exists(self): """ Check if index exists & if not exists create index & types & store their mappings. """ ic = IndicesClient(self.es) response = ic.exists(index=[self.index_name]) if not response: es_mappings = ElasticSearchController.get_index_mapper_dict() index_response = ic.create(index=self.index_name, body={"mappings": es_mappings})
def create_index_if_not_exists(self): """ Check if index exists & if not exists create index & types & store their mappings. """ ic = IndicesClient(self.es) response = ic.exists(index=[self.index_name]) if not response: es_mappings = ElasticSearchController.get_index_mapper_dict() index_response = ic.create(index=self.index_name, body={ "mappings":es_mappings })
def __init__(self): print(os.path.join(self.SETTINGS_DIR, 'corpus.json')) f = open(os.path.join(self.SETTINGS_DIR, 'corpus.json'), 'r', encoding='utf-8') self.settings = json.loads(f.read()) f.close() self.name = self.settings['corpus_name'] self.languages = self.settings['languages'] if len(self.languages) <= 0: self.languages = [self.name] self.input_format = self.settings['input_format'] self.corpus_dir = os.path.join('../corpus', self.name) self.iterSent = None if self.input_format in ['json', 'json-gzip']: self.iterSent = JSONDocReader(format=self.input_format) self.goodWordFields = ['lex', 'wf', 'wf_display', 'parts', 'gloss', 'gloss_index', 'n_ana', 'trans_en', 'trans_ru'] self.AdditionalWordFields = set() if 'word_fields' in self.settings: self.AdditionalWordFields |= set(self.settings['word_fields']) if 'word_table_fields' in self.settings: self.AdditionalWordFields |= set(self.settings['word_table_fields']) if 'accidental_word_fields' in self.settings: self.AdditionalWordFields -= set(self.settings['accidental_word_fields']) f = open(os.path.join(self.SETTINGS_DIR, 'categories.json'), 'r', encoding='utf-8') categories = json.loads(f.read()) self.goodWordFields += ['gr.' + v for lang in categories for v in categories[lang].values()] self.goodWordFields = set(self.goodWordFields) f.close() self.pd = PrepareData() self.es = Elasticsearch() self.es_ic = IndicesClient(self.es) self.shuffled_ids = [i for i in range(1, 1000000)] random.shuffle(self.shuffled_ids) self.shuffled_ids.insert(0, 0) # id=0 is special and should not change self.tmpWordIDs = [{} for i in range(len(self.languages))] # word as JSON -> its integer ID self.tmpLemmaIDs = [{} for i in range(len(self.languages))] # lemma as string -> its integer ID self.word2lemma = [{} for i in range(len(self.languages))] # word's ID -> ID of its lemma (or -1, if none) self.wordFreqs = [{} for i in range(len(self.languages))] # word's ID -> its frequency self.wordSFreqs = [{} for i in range(len(self.languages))] # word's ID -> its number of sentences self.wordDocFreqs = [{} for i in range(len(self.languages))] # (word's ID, dID) -> word frequency in the document # self.wordSIDs = [{} for i in range(len(self.languages))] # word's ID -> set of sentence IDs self.wordDIDs = [{} for i in range(len(self.languages))] # word's ID -> set of document IDs self.wfs = set() # set of word forms (for sorting) self.lemmata = set() # set of lemmata (for sorting) self.sID = 0 # current sentence ID for each language self.dID = 0 # current document ID self.wID = 0 # current word ID self.wordFreqID = 0 self.numWords = 0 # number of words in current document self.numSents = 0 # number of sentences in current document self.numWordsLang = [0] * len(self.languages) # number of words in each language in current document self.numSentsLang = [0] * len(self.languages) # number of sentences in each language in current document self.totalNumWords = 0
def deleteIndex(self): self.es = Elasticsearch([{ 'host': elasticConfig['host'], 'port': elasticConfig['port'] }]) esIndices = IndicesClient(self.es) index = elasticConfig['index'] doc_type = elasticConfig['doc_type'] esIndices.delete(index=index)
class EsSchema(): def __init__(self, client): self.client = client self.indicesClient = IndicesClient(self.client) self.schema = self.load_schema() def load_schema(self): with codecs.open('data/esdata.json', mode="r", encoding='UTF-8') as file: return json.load(file) def make_index_template(self): if not self.indicesClient.exists_template('soaktest:template'): self.indicesClient.put_template("soaktest:template", self.schema['soaktest:template']) if not self.indicesClient.exists_template('soakdownload:template'): self.indicesClient.put_template( "soakdownload:template", self.schema['soakdownload:template']) def make_kibana_index(self): if not self.indicesClient.exists_template( "kibana_index_template:.kibana"): self.indicesClient.put_template( "kibana_index_template:.kibana", self.schema["kibana_index_template:.kibana"]) def make_kibana_visualization(self): for item in self.schema["kibana:data"]: self.client.index(index=".kibana", doc_type=item["_type"], id=item["_id"], body=item["_source"]) def make_schema(self): self.make_index_template()
def create_index_survey(): indices_client = IndicesClient(models.client) index_name = models.SurveyMap._meta.es_index_name if indices_client.exists(index_name): indices_client.delete(index=index_name) indices_client.create(index=index_name) #put_settings(models.ScentemotionMap) # add qstfld fields es_mapping = models.SurveyMap._meta.es_mapping for qst, mapping in survey.qst2fld.items(): fields = mapping[0] field_type = mapping[1] if field_type == 'nested_qst_ans': for field in fields: if field not in es_mapping['properties']: es_mapping['properties'][field] = {} es_mapping['properties'][field]['type'] = 'nested' es_mapping['properties'][field]['properties'] = {} es_mapping['properties'][field]['properties']['question'] = {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}} es_mapping['properties'][field]['properties']['answer'] = {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}} #'type' : 'nested', #'properties' : { # 'question' : {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}}, # 'answer' : {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}}, # } #}, indices_client.put_mapping( doc_type=models.SurveyMap._meta.es_type_name, #body=models.SurveyMap._meta.es_mapping, body=es_mapping, index=index_name )
def __createIndex(self): es = Elasticsearch([{'host': self.elasticsearch_host, 'port': self.elasticsearch_port}]) ic = IndicesClient(es) if(ic.exists(index='wow')): print("deleting old index") self.deleteIndex() ic.create(index='wow') # blah = glob.glob(os.path.join(self.map_directory, '*')) for currentFile in glob.glob(os.path.join(self.map_directory, '*')): print("MAP FILE: " + currentFile) self.__mapFile(currentFile)
def status(self): idx_client = IndicesClient(self.es) for idx in ['raw-article', 'enhanced-article']: es_index = self.indexinfo(idx)[0] if idx_client.exists(es_index): self.logger.info("%s contains %s documents." % (idx, self.es.count(index=es_index)['count'])) if idx == 'article': query = {"query": {"term": {"status": 1}}} self.logger.info( "%s articles have been processed." % self.es.count(index=es_index, body=query)['count']) else: self.logger.info("%s does not exist" % es_index)
def main(): es_client = Elasticsearch([{'host': args.host, 'port': args.port}]) es_index = IndicesClient(es_client) list_indexes = [index for index in es_index.status()['indices']] regexp = re.compile(u'(\d{4})\.(\d{2})\.(\d{2})', re.IGNORECASE | re.UNICODE ) current_date = datetime.date.today() for index in list_indexes: res = regexp.search(index) if res: date_indx = datetime.date(year=int(res.group(1)), month=int(res.group(2)), day=int(res.group(3))) if (current_date - date_indx).days > args.old: es_index.delete(index)
def delete_index(self, es): """ Delete the dataset index. :param es: Elasticsearch client instance :type es: elasticsearch.client.Elasticsearch :rtype : NewsgroupsDataset """ ic = IndicesClient(es) ic.delete(index=self.es_index, ignore=[400, 404]) return self
def setUp(self): """Empty ElasticSearch at the start of every test """ try: self.elastic_doc._remove() except OperationFailed: try: # Create test.test index if necessary client = Elasticsearch(hosts=['localhost:9200']) idx_client = IndicesClient(client) idx_client.create(index='test.test') except es_exceptions.TransportError: pass
def initialize(self, conf, context): host = conf.get('zeit.recommend.elasticsearch.host', 'localhost') port = conf.get('zeit.recommend.elasticsearch.port', 9200) self.es = Elasticsearch(hosts=[{'host': host, 'port': port}]) self.match = re.compile('seite-[0-9]|komplettansicht').match self.index = '%s-%s' % date.today().isocalendar()[:2] ic = IndicesClient(self.es) try: if not ic.exists(self.index): ic.create(self.index) except ConnectionError, e: log('[UserIndexBolt] ConnectionError, index unreachable: %s' % e) return
def _create_weight_index(es, index): """ Creates the index with the right mapping if it doesn't exist. :param es: :type es:elasticsearch.Elasticsearch :param index: :type index:str|unicode """ ic = IndicesClient(es) if ic.exists(index): logging.info('Index %s already exists ...' % index) else: ic.create(index=index, body=ES_TERMWEIGHTING_INDEX_SETTINGS)
def setUp(self): self.settings = TEST_SETTINGS_OBJECT self.es = get_es(self.settings) self.esi = IndicesClient(self.es) self.index = self.settings.get("ES_INDEX") #create the index firstly if self.esi.exists(self.index): self.esi.delete(index=self.index) self.esi.create(index=self.index) mapping_path = os.path.join(SCRAPY_ROOT, "resources/mappings.json") mapping_str = open(mapping_path, "r").read() mappings = json.loads(mapping_str) for k,v in mappings.iteritems(): res = self.esi.put_mapping(self.index, k, {k:mappings[k]}) #print res self.redis_conn = get_redis(self.settings)
def initialize(self, idx): es_index, es_doctype = self.indexinfo(idx) self.logger.info("Initializing %s" % es_index) idx_client = IndicesClient(self.es) if idx_client.exists(es_index): idx_client.delete(es_index) idx_client.create(es_index) if idx == 'event': idx_client.put_mapping(doc_type=es_doctype, index=[es_index], body=event_mapping()) self.logger.info("%s ready." % es_index)
def recreate_index(self): indices_client = IndicesClient(client=settings.ES_CLIENT) index_name = Student._meta.es_index_name if indices_client.exists(index_name): indices_client.delete(index=index_name) indices_client.create(index=index_name) indices_client.put_mapping( doc_type=Student._meta.es_type_name, body=Student._meta.es_mapping, index=index_name )
def init_state(self, index, host, port): self._queue = [] self.index = index self.host = host self.port = port if host is None: self.es = Elasticsearch() else: self.es = Elasticsearch(hosts=[{'host': host, 'port': port}]) self.idx_manager = IndicesClient(self.es) self.mapper = ESMapper()
def _init_mapping(self, mapping_path): esi = IndicesClient(es.get_es_handle()) index = settings.ES_INDEX #first create index if not exists if not esi.exists(index): self.stdout.write("Creating index for db : %s"%index) esi.create(index=index) self.stdout.write("Index Created for : %s"%index) if not mapping_path or not os.path.exists(mapping_path): raise CommandError("not existing mapping path") mapping_str = open(mapping_path, "r").read() mappings = json.loads(mapping_str) for k,v in mappings.iteritems(): res = esi.put_mapping(index, k, {k:mappings[k]}) self.stdout.write(str(res))
def _remove_index_if_exists(): es = elasticsearch.Elasticsearch() from elasticsearch.client import IndicesClient es_index = IndicesClient(es) if es_index.exists(STORAGE_INDEX_NAME): logger.info( "Elasticsearch index '{0}' already exists and " "will be deleted".format(STORAGE_INDEX_NAME)) try: es_index.delete(STORAGE_INDEX_NAME) logger.info('Verifying Elasticsearch index was deleted...') deadline = time.time() + 45 while es_index.exists(STORAGE_INDEX_NAME): if time.time() > deadline: raise RuntimeError( 'Elasticsearch index was not deleted after ' '30 seconds') time.sleep(0.5) except BaseException as e: logger.warn('Ignoring caught exception on Elasticsearch delete' ' index - {0}: {1}'.format(e.__class__, e.message))
def create_stations_mapping(): idx_client = IndicesClient(es) mapping = { "properties": { "name": { "type": "text" }, "link": { "type": "text" }, "elevation": { "type": "float" }, "coordinates": { "type": "geo_point" } } } idx_client.put_mapping(doc_type=stations_mapping, index=[stations_index], body=mapping)
def remove_log_indices(): es = elasticsearch.Elasticsearch() from elasticsearch.client import IndicesClient es_index = IndicesClient(es) log_index_pattern = '{0}*'.format(LOG_INDICES_PREFIX) if es_index.exists(log_index_pattern): logger.info( "Elasticsearch indices '{0}' already exist and " "will be deleted".format(log_index_pattern)) try: es_index.delete(log_index_pattern) logger.info('Verifying Elasticsearch index was deleted...') deadline = time.time() + 45 while es_index.exists(log_index_pattern): if time.time() > deadline: raise RuntimeError( 'Elasticsearch index was not deleted after ' '30 seconds') time.sleep(0.5) except BaseException as e: logger.warn('Ignoring caught exception on Elasticsearch delete' ' index - {0}: {1}'.format(e.__class__, e.message))
def setUp(self): super(TestSingleDocSigTerms, self).setUp() self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port]) self.ic = IndicesClient(self.es) self.index = 'single_doc_sigterms_test' self.doc_type = 'test-doc' self.field = 'text' if self.ic.exists(self.index): self.ic.delete(self.index) self.ic.create(self.index) self.es.create(self.index, self.doc_type, {self.field: 'foo ba knark foo knirk knark foo'}, id='doc_1')
class RedisEsSetupMixin(object): def setUp(self): self.settings = TEST_SETTINGS_OBJECT self.es = get_es(self.settings) self.esi = IndicesClient(self.es) self.index = self.settings.get("ES_INDEX") #create the index firstly if self.esi.exists(self.index): self.esi.delete(index=self.index) self.esi.create(index=self.index) mapping_path = os.path.join(SCRAPY_ROOT, "resources/mappings.json") mapping_str = open(mapping_path, "r").read() mappings = json.loads(mapping_str) for k,v in mappings.iteritems(): res = self.esi.put_mapping(self.index, k, {k:mappings[k]}) #print res self.redis_conn = get_redis(self.settings) def tearDown(self): if self.esi.exists(self.index): self.esi.delete(index=self.index) print "ES INDEX DELETED" #remove redis stuff self.redis_conn.flushdb() print "REDIS DB DELETED"
def setUp(self): super(TestESTermAggregationWeightProvider, self).setUp() self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port]) self.ic = IndicesClient(self.es) self.index = 'es_term_weight_provider_test' self.doc_type = 'test-doc' self.field = 'text' if self.ic.exists(self.index): self.ic.delete(self.index) self.ic.create(self.index) self.es.create(self.index, self.doc_type, {self.field: 'foo'}) self.es.create(self.index, self.doc_type, {self.field: 'knark'}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}) self.es.create(self.index, self.doc_type, {self.field: 'knirk'}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}) self.es.create(self.index, self.doc_type, {self.field: 'knark '}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}, refresh=True)
def import_ontology(ontology: lib.obo.Ontology, index_name: str): es = elasticsearch.Elasticsearch() ies = IndicesClient(es) actions = [dict( _index=index_name, _type=index_name, _source=dict( id=item.id, names=item.names() ) ) for item in ontology.items()] if ies.exists(index_name): ies.delete(index_name) ies.create(index_name) return bulk(es, actions=actions)
def __init__(self): """ setup Neo4j database connection and node labels and Elasticsearch mapping attachments index """ self.db = GraphDatabase(self.db_path) self.pdf_documents = self.db.labels.create("PDFDocument") self.authors = self.db.labels.create("Author") self.keywords = self.db.labels.create("Keyword") self.es = Elasticsearch(self.es_cluster) self.es_ixc = IndicesClient(self.es) self.es_ixc.create( index="pdf_documents", body={ 'mappings': { 'pdf': { 'properties': { 'url': {'type': "string"}, 'pdf_file': {'type': "attachment"} } } } } )
def main(): # Define the globals global index_names global STARTED_TIMESTAMP global es global es_indices try: # Initiate the elasticsearch session using ES low-level client. # By default nodes are randomized before passed into the pool and round-robin strategy is used for load balancing. es = Elasticsearch(ES_HOSTS, timeout=30) es_indices = IndicesClient(es) except: print("Could not connect to elasticsearch!") sys.exit(1) print("Creating indices.. \n"), indices = generate_indices() print("Done!\n") # Register specific mapping definition for a specific type. print("Put Mapping \n"), es_indices.put_mapping(doc_type="_default_", body=mappings_body["_default_"], index="_all" ) for type_name in types: es_indices.put_mapping(doc_type=type_name, body=mappings_body[type_name], index="_all" ) print("Done!\n") # Retrieve mapping definition of index or index/type. print("GET Mapping \n"), print json.dumps(es_indices.get_mapping(index=["metrics_0", "metrics_1"], doc_type=types), sort_keys=True, indent=4, separators=(',', ': ')) #print json.dumps(es_indices.get_settings(index="_all"), sort_keys=True,indent=4, separators=(',', ': ')) print("Done!\n") # We will Clean up the indices by default # Default: True if CLEANUP: print("Cleaning up created indices.. "), cleanup_indices() print("Done!\n")