Exemplo n.º 1
0
def create_es_index(index_name):
    es = Elasticsearch()
    client = IndicesClient(es)

    # take this opportunity to create training index if it doesn't exist
    if not client.exists('appcompat-training'):
        client.create(index='appcompat-training', body=CONFIG)

    if client.exists(index_name):
        raise Exception('Index already exists: {}'.format(index_name))
    client.create(index=index_name, body=CONFIG)
    def _create_index(self):
        es_index = IndicesClient(self._es)
        if es_index.exists(self._store_index):
            logging.info('Index ' + self._store_index +
                         ' already exists. Skipping index creation.')
            return None

        es_mapping = {
            "mappings": {
                'last_runtime': {
                    'properties': {
                        'plugin_name': {
                            'index': 'not_analyzed',
                            'type': 'string'
                        },
                        'rule_name': {
                            'index': 'not_analyzed',
                            'type': 'string'
                        },
                        'plugin_sid': {
                            'index': 'not_analyzed',
                            'type': 'long'
                        },
                        '@timestamp': {
                            'format': 'dateOptionalTime||epoch_millis',
                            'type': 'date'
                        }
                    }
                }
            }
        }

        self._es.indices.create(self._store_index, body=es_mapping)

        time.sleep(1)
class TestSingleDocSigTerms(TestCase):
    def setUp(self):
        super(TestSingleDocSigTerms, self).setUp()

        self.es = Elasticsearch(
            hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'single_doc_sigterms_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index,
                       self.doc_type,
                       {self.field: 'foo ba knark foo knirk knark foo'},
                       id='doc_1')

    def test_tf_for_doc_id(self):
        sigterms = SingleDocSigTerms(self.es, self.index, self.doc_type,
                                     self.field, None)

        resp = dict(sigterms.tf_for_doc_id('doc_1'))
        self.assertEquals(4, len(resp))
        self.assertEquals(3, resp['foo'])
        self.assertEquals(2, resp['knark'])
        self.assertEquals(1, resp['ba'])
        self.assertEquals(1, resp['knirk'])
class TestSingleDocSigTerms(TestCase):
    def setUp(self):
        super(TestSingleDocSigTerms, self).setUp()

        self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'single_doc_sigterms_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index, self.doc_type, {self.field: 'foo ba knark foo knirk knark foo'}, id='doc_1')

    def test_tf_for_doc_id(self):
        sigterms = SingleDocSigTerms(self.es, self.index, self.doc_type, self.field, None)

        resp = dict(sigterms.tf_for_doc_id('doc_1'))
        self.assertEquals(4, len(resp))
        self.assertEquals(3, resp['foo'])
        self.assertEquals(2, resp['knark'])
        self.assertEquals(1, resp['ba'])
        self.assertEquals(1, resp['knirk'])
Exemplo n.º 5
0
class IndexBase:

    def __init__(self, **kwargs):
        self.index = kwargs.pop('index')
        self.client = client_es
        self.client_index = IndicesClient(self.client)

        if kwargs.get('settings')
            self.settings = kwargs.pop('settings')
        else:
            self.settings = DEFAULT_SETTINGS

        if self.exist_index():
            self.delete_index()
            self.create_index()
        else:
            self.create_index()

    def exist_index(self):
        return self.client_index.exists(index=self.index)

    def delete_index(self):
        return self.client_index.delete(index=self.index, ignore=[400, 404])

    def create_index(self):
        return self.client_index.create(index=self.index, body=self.settings)
Exemplo n.º 6
0
	def handle(self, *args, **options):

		es = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}])

		fop=open('spider/management/commands/'+str(argv[2]), 'r')
		inds = IndicesClient(es)

		mapping={ "mappings": { "product_type":  {  "properties": { "code": { "type" : "string" },"name": {"type" : "string"},"img": {"type" : "string"},"url": {"type" : "string"},"price_reg": {"type" : "float"},"price_discount": {"type" : "float"}}}}}

		if not inds.exists(index='gearbest_index'):
			inds.create(index='gearbest_index',body=mapping)
			print 'gearbest_index created'

		for jsonline in fop:
			jobj=loads(jsonline)
			del jobj["_type"]
			es.index(index="gearbest_index",doc_type='product_type', body=jobj, id=jobj['code'])
			
			disc=0
			reg=0

			if len(jobj['price_discount'])>0:
				disc  = float(jobj['price_discount'][0])
			if len(jobj['price_reg'])>0:
				reg  = float(jobj['price_reg'][0])

			#insert="INSERT into 'price_gb' ('price','price_disc','code','date') values ("+str(reg)+", "+str(disc)+", '"+str(jobj['code'])+"', '"+str(datetime.today())+"')"
			#cursor = connection.cursor()
			#cursor.execute(insert)

			add_price=Price_gb(price=reg,price_disc=disc,code=str(jobj['code']),date=datetime.date.today())
			add_price.save()

			print 'code='+str(jobj['code'])
Exemplo n.º 7
0
def import_examples_into_es(examples: list):
    index_name = config.index_name
    type_name = config.type_name
    buck_size = config.buck_size

    es = Elasticsearch(config.es_url)
    es_index = IndicesClient(es)
    if es_index.exists(index=index_name):
        es_index.delete(index=index_name)
    # 创建索引
    with open(config.es_index_json) as f:
        mappings = json.load(f)

    res = es.indices.create(index=index_name, body=mappings)

    # 数据批量导入es
    for i in range(len(examples)):
        examples[i] = {
            "_index": index_name,
            "_type": type_name,
            "_id": examples[i]["ntc_id"],
            "_source": examples[i]
        }

    for i in tqdm(range(ceil(len(examples) / buck_size)), desc="Import into ES"):
        bulk(es, actions=examples[i * buck_size: min((i + 1) * buck_size, len(examples))])
def create_index_conf():
    indices_client = IndicesClient(models.client)
    index_name = 'conf'
    doc_type = index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
Exemplo n.º 9
0
def create_index_survey():
    indices_client = IndicesClient(models.client)
    index_name = models.SurveyMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    #put_settings(models.ScentemotionMap)
    # add qstfld fields
    es_mapping = models.SurveyMap._meta.es_mapping
    for qst, mapping in survey.qst2fld.items():
        fields = mapping[0]
        field_type = mapping[1]
        if field_type == 'nested_qst_ans':
            for field in fields:
                if field not in es_mapping['properties']:
                    es_mapping['properties'][field] = {}
                    es_mapping['properties'][field]['type'] = 'nested'
                    es_mapping['properties'][field]['properties'] = {}
                    es_mapping['properties'][field]['properties']['question'] = {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}}
                    es_mapping['properties'][field]['properties']['answer'] = {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}}
                        #'type'       : 'nested',
                        #'properties' : {
                        #    'question' : {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}},
                        #    'answer'   : {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}},
                        #    }
                        #},
    indices_client.put_mapping(
        doc_type=models.SurveyMap._meta.es_type_name,
        #body=models.SurveyMap._meta.es_mapping,
        body=es_mapping,
        index=index_name
        )
Exemplo n.º 10
0
    def _reset_mapping(self, mapping_path):
        esi = IndicesClient(es.get_es_handle())
        index = settings.ES_INDEX

        if not esi.exists(index):
            raise CommandError("Non existing index : %s"%index)

        self.stdout.write(str(esi.delete(index=index)))
Exemplo n.º 11
0
def create_index_excel(excel_filename):
    indices_client = IndicesClient(models.client)
    index_name = 'excel'
    if len(excel_filename):
        doc_type = os.path.splitext(excel_filename)[0]
        index_name = 'excel_' + doc_type
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
Exemplo n.º 12
0
 def recreate_index(self, index_name, index_mapping):
     indices_client = IndicesClient(client=ES_CLIENT)
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name)
     indices_client.put_mapping(doc_type='page',
                                index=index_name,
                                body=index_mapping)
Exemplo n.º 13
0
def create_index():
    indices_client = IndicesClient(client=settings.ES)
    index_name = Apartments._meta.es_index_name
    if not indices_client.exists(index_name):
        indices_client.create(index=index_name)
        indices_client.put_mapping(doc_type=Apartments._meta.es_type_name,
                                   body=Apartments._meta.es_mapping,
                                   index=index_name)
Exemplo n.º 14
0
def create_index_mi():
    indices_client = IndicesClient(models.client)
    index_name = models.PostMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    indices_client.put_mapping(body=models.PostMap._meta.es_mapping,
                               index=index_name)
Exemplo n.º 15
0
 def _create_main_index_if_not_exists(self):
     """
     method that creates new elastic index if not existed
     :return:
     """
     ic = IndicesClient(self.es)
     if not ic.exists(MAIN_INDEX_NAME):
         ic.create(MAIN_INDEX_NAME)
    def create_index_if_not_exists(self):
        """ Check if index exists & if not exists create index & types & store their mappings.  """

        ic = IndicesClient(self.es)
        response = ic.exists(index=[self.index_name])
        if not response:
            es_mappings = ElasticSearchController.get_index_mapper_dict()
            index_response = ic.create(index=self.index_name,
                                       body={"mappings": es_mappings})
Exemplo n.º 17
0
def remover_indice(nome_indice):
    """Remove o indice do Elasticsearch.

    O indice de elasticsearch é análogo a uma tabela em um SGBD.
    """
    es = conectar_em_elastic_search()
    client_indice = IndicesClient(es)
    if client_indice.exists(index=[nome_indice]):
        client_indice.delete(nome_indice)
Exemplo n.º 18
0
 def recreate_index(self):
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = Student._meta.es_index_name
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name)
     indices_client.put_mapping(doc_type=Student._meta.es_type_name,
                                body=Student._meta.es_mapping,
                                index=index_name)
Exemplo n.º 19
0
def create_index_pi():
    #   indices_client = IndicesClient(client=settings.ES_HOSTS)
    indices_client = IndicesClient(models.client)
    index_name = models.Review._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    indices_client.put_mapping(body=models.Review._meta.es_mapping,
                               index=index_name)
Exemplo n.º 20
0
	def create_index_if_not_exists(self):
		
		""" Check if index exists & if not exists create index & types & store their mappings.  """
		
		ic = IndicesClient(self.es)
		response = ic.exists(index=[self.index_name])
		if not response:
			es_mappings = ElasticSearchController.get_index_mapper_dict()			
			index_response = ic.create(index=self.index_name, body={ "mappings":es_mappings })
Exemplo n.º 21
0
def create_index_bestmatch():
    indices_client = IndicesClient(models.client)
    index_name = models.bestmatchMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    #put_settings(models.bestmatchMap)
    indices_client.put_mapping(body=models.bestmatchMap._meta.es_mapping,
                               index=index_name)
Exemplo n.º 22
0
def create_index_si_sites():
    indices_client = IndicesClient(models.client)
    index_name = models.PageMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    indices_client.put_mapping(doc_type=models.PageMap._meta.es_type_name,
                               body=models.PageMap._meta.es_mapping,
                               index=index_name)
Exemplo n.º 23
0
def create_index_dhk():
    indices_client = IndicesClient(models.client)
    index_name = 'recipes'
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    indices_client.put_mapping(
        # ES7.0 does not support types anymore doc_type=index_name,
        body={'properties': wb_excel.recipes},
        index=index_name)
Exemplo n.º 24
0
 def initialize(self, idx):
     es_index, es_doctype = self.indexinfo(idx)
     self.logger.info("Initializing %s" % es_index)
     idx_client = IndicesClient(self.es)
     if idx_client.exists(es_index):
         idx_client.delete(es_index)
     idx_client.create(es_index)
     if idx == 'event':
         idx_client.put_mapping(doc_type=es_doctype, index=[es_index], body=event_mapping())
     self.logger.info("%s ready." % es_index)
Exemplo n.º 25
0
def create_index_mi_feedly():
    indices_client = IndicesClient(models.client)
    index_name = models.FeedlyMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    #put_settings(models.FeedlyMap)
    indices_client.put_mapping(doc_type=models.FeedlyMap._meta.es_type_name,
                               body=models.FeedlyMap._meta.es_mapping,
                               index=index_name)
Exemplo n.º 26
0
def create_index_survey():
    indices_client = IndicesClient(models.client)
    index_name = models.SurveyMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    #put_settings(models.ScentemotionMap)
    indices_client.put_mapping(doc_type=models.SurveyMap._meta.es_type_name,
                               body=models.SurveyMap._meta.es_mapping,
                               index=index_name)
Exemplo n.º 27
0
 def recreate_index(self):
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = es_index_name
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name, body=es_ind_settings)
     for model_name in es_models:
         indices_client.put_mapping(
             doc_type=model_es_indices[model_name]['type'],
             body=es_mappings[model_name],
             index=es_index_name)
Exemplo n.º 28
0
 def recreate_index(self):
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = Student._meta.es_index_name
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name)
     indices_client.put_mapping(
         doc_type=Student._meta.es_type_name,
         body=Student._meta.es_mapping,
         index=index_name
     )
 def __createIndex(self):
     es = Elasticsearch([{'host': self.elasticsearch_host, 'port': self.elasticsearch_port}])
     ic = IndicesClient(es)
     if(ic.exists(index='wow')):
         print("deleting old index")
         self.deleteIndex()
     ic.create(index='wow')
     # blah = glob.glob(os.path.join(self.map_directory, '*'))
     for currentFile in glob.glob(os.path.join(self.map_directory, '*')):
         print("MAP FILE: " + currentFile)
         self.__mapFile(currentFile)
Exemplo n.º 30
0
 def _remove_index_if_exists():
     es = elasticsearch.Elasticsearch()
     from elasticsearch.client import IndicesClient
     es_index = IndicesClient(es)
     if es_index.exists(STORAGE_INDEX_NAME):
         logger.info("Elasticsearch index '{0}' already exists and "
                     "will be deleted".format(STORAGE_INDEX_NAME))
         try:
             es_index.delete(STORAGE_INDEX_NAME)
             logger.info('Verifying Elasticsearch index was deleted...')
             deadline = time.time() + 45
             while es_index.exists(STORAGE_INDEX_NAME):
                 if time.time() > deadline:
                     raise RuntimeError(
                         'Elasticsearch index was not deleted after '
                         '30 seconds')
                 time.sleep(0.5)
         except BaseException as e:
             logger.warn('Ignoring caught exception on Elasticsearch delete'
                         ' index - {0}: {1}'.format(e.__class__, e.message))
Exemplo n.º 31
0
 def status(self):
     idx_client = IndicesClient(self.es)
     for idx in ['raw-article', 'enhanced-article']:
         es_index = self.indexinfo(idx)[0]
         if idx_client.exists(es_index):
             self.logger.info("%s contains %s documents." % (idx, self.es.count(index=es_index)['count']))
             if idx == 'article':
                 query = {"query": {"term": {"status": 1}}}
                 self.logger.info(
                     "%s articles have been processed." % self.es.count(index=es_index, body=query)['count'])
         else:
             self.logger.info("%s does not exist" % es_index)
Exemplo n.º 32
0
 def recreate_index(self):
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = self.es_index_name
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name, body=self.es_ind_settings)
     ## create mapping for one model only for now
     model_name = 'place'
     indices_client.put_mapping(
         doc_type=model_es_indices[model_name]['type'],
         body=es_mappings[model_name],
         index=index_name)
Exemplo n.º 33
0
def create_parcel_mapping():

    idx_client = IndicesClient(es)

    if not idx_client.exists(index=parcel_index):
        idx_client.create(index=parcel_index)

    with open('osc\util\mappings\parcel.json') as mapping_file:
        mapping = json.load(mapping_file)
        idx_client.put_mapping(doc_type=parcel_mapping,
                               index=[parcel_index],
                               body=mapping)
Exemplo n.º 34
0
 def _remove_index_if_exists():
     es = elasticsearch.Elasticsearch()
     from elasticsearch.client import IndicesClient
     es_index = IndicesClient(es)
     if es_index.exists(STORAGE_INDEX_NAME):
         logger.info(
             "Elasticsearch index '{0}' already exists and "
             "will be deleted".format(STORAGE_INDEX_NAME))
         try:
             es_index.delete(STORAGE_INDEX_NAME)
             logger.info('Verifying Elasticsearch index was deleted...')
             deadline = time.time() + 45
             while es_index.exists(STORAGE_INDEX_NAME):
                 if time.time() > deadline:
                     raise RuntimeError(
                         'Elasticsearch index was not deleted after '
                         '30 seconds')
                 time.sleep(0.5)
         except BaseException as e:
             logger.warn('Ignoring caught exception on Elasticsearch delete'
                         ' index - {0}: {1}'.format(e.__class__, e.message))
Exemplo n.º 35
0
 def recreateIndex(self):
     """function to recreate the index in tge elasticsearch"""
     print("delete the previous index and creating th new one...")
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = Product._meta.es_index_name
     type_type = Product._meta.es_type_name
     if indices_client.exists(index=index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index_name)
     indices_client.put_mapping(doc_type=Product._meta.es_type_name,
                                body=Product._meta.es_mapping,
                                index=index_name)
Exemplo n.º 36
0
 def remove_log_indices():
     es = elasticsearch.Elasticsearch()
     from elasticsearch.client import IndicesClient
     es_index = IndicesClient(es)
     log_index_pattern = '{0}*'.format(LOG_INDICES_PREFIX)
     if es_index.exists(log_index_pattern):
         logger.info("Elasticsearch indices '{0}' already exist and "
                     "will be deleted".format(log_index_pattern))
         try:
             es_index.delete(log_index_pattern)
             logger.info('Verifying Elasticsearch index was deleted...')
             deadline = time.time() + 45
             while es_index.exists(log_index_pattern):
                 if time.time() > deadline:
                     raise RuntimeError(
                         'Elasticsearch index was not deleted after '
                         '30 seconds')
                 time.sleep(0.5)
         except BaseException as e:
             logger.warn('Ignoring caught exception on Elasticsearch delete'
                         ' index - {0}: {1}'.format(e.__class__, e.message))
Exemplo n.º 37
0
 def remove_log_indices():
     es = elasticsearch.Elasticsearch()
     from elasticsearch.client import IndicesClient
     es_index = IndicesClient(es)
     log_index_pattern = '{0}*'.format(LOG_INDICES_PREFIX)
     if es_index.exists(log_index_pattern):
         logger.info(
             "Elasticsearch indices '{0}' already exist and "
             "will be deleted".format(log_index_pattern))
         try:
             es_index.delete(log_index_pattern)
             logger.info('Verifying Elasticsearch index was deleted...')
             deadline = time.time() + 45
             while es_index.exists(log_index_pattern):
                 if time.time() > deadline:
                     raise RuntimeError(
                         'Elasticsearch index was not deleted after '
                         '30 seconds')
                 time.sleep(0.5)
         except BaseException as e:
             logger.warn('Ignoring caught exception on Elasticsearch delete'
                         ' index - {0}: {1}'.format(e.__class__, e.message))
Exemplo n.º 38
0
 def get_index(self, index=INDEX):
     """
     get the index status
     :param index:
     :return: True or False
     """
     try:
         indexcli = IndicesClient(self.es)
         index_status = indexcli.exists(index=index)
         # LOG.info('Get index status successful ,index status is {}'.format(index_status))
         return index_status
     except Exception as e:
         LOG.error('Get index status failed ,cause {}'.format(e))
Exemplo n.º 39
0
    def createIndex(es, indexName, settingsFile, mappingFile, delete=False):
        iclient = IndicesClient(es)

        #If specified, delete any existing index with the same name
        if delete and iclient.exists(indexName):
            iclient.delete(indexName)

        #else only create it if it does not exist
        if not iclient.exists(indexName):
            #Load the settings and mapping
            f = open(settingsFile)
            settings = json.load(f)
            f.close()
            f = open(mappingFile)
            mapping = json.load(f)
            f.close()

            #Create the index with the settings and mapping
            iclient.create(indexName, {
                'settings': settings,
                'mappings': mapping
            })
Exemplo n.º 40
0
class RedisEsSetupMixin(object):

    def setUp(self):
        self.settings = TEST_SETTINGS_OBJECT
        self.es = get_es(self.settings)
        self.esi = IndicesClient(self.es)

        self.index = self.settings.get("ES_INDEX")

        #create the index firstly
        if self.esi.exists(self.index):
            self.esi.delete(index=self.index)

        self.esi.create(index=self.index)

        mapping_path = os.path.join(SCRAPY_ROOT,
                                 "resources/mappings.json")

        mapping_str = open(mapping_path, "r").read()
        mappings = json.loads(mapping_str)


        for k,v in mappings.iteritems():
            res = self.esi.put_mapping(self.index, k, {k:mappings[k]})
            #print res


        self.redis_conn = get_redis(self.settings)


    def tearDown(self):
        if self.esi.exists(self.index):
            self.esi.delete(index=self.index)
            print "ES INDEX DELETED"

        #remove redis stuff
        self.redis_conn.flushdb()
        print "REDIS DB DELETED"
Exemplo n.º 41
0
    def initialize(self, conf, context):
        host = conf.get('zeit.recommend.elasticsearch.host', 'localhost')
        port = conf.get('zeit.recommend.elasticsearch.port', 9200)
        self.es = Elasticsearch(hosts=[{'host': host, 'port': port}])
        self.match = re.compile('seite-[0-9]|komplettansicht').match
        self.index = '%s-%s' % date.today().isocalendar()[:2]
        ic = IndicesClient(self.es)

        try:
            if not ic.exists(self.index):
                ic.create(self.index)
        except ConnectionError, e:
            log('[UserIndexBolt] ConnectionError, index unreachable: %s' % e)
            return
    def _create_weight_index(es, index):
        """
        Creates the index with the right mapping if it doesn't exist.

        :param es:
        :type es:elasticsearch.Elasticsearch
        :param index:
        :type index:str|unicode
        """
        ic = IndicesClient(es)

        if ic.exists(index):
            logging.info('Index %s already exists ...' % index)
        else:
            ic.create(index=index, body=ES_TERMWEIGHTING_INDEX_SETTINGS)
Exemplo n.º 43
0
def import_ontology(ontology: lib.obo.Ontology, index_name: str):
    es = elasticsearch.Elasticsearch()

    ies = IndicesClient(es)

    actions = [dict(
        _index=index_name,
        _type=index_name,
        _source=dict(
            id=item.id,
            names=item.names()
        )
    ) for item in ontology.items()]

    if ies.exists(index_name):
        ies.delete(index_name)
    ies.create(index_name)
    return bulk(es, actions=actions)
Exemplo n.º 44
0
    def _init_mapping(self, mapping_path):
        esi = IndicesClient(es.get_es_handle())
        index = settings.ES_INDEX

        #first create index if not exists
        if not esi.exists(index):
            self.stdout.write("Creating index for db : %s"%index)
            esi.create(index=index)
            self.stdout.write("Index Created for : %s"%index)


        if not mapping_path or not os.path.exists(mapping_path):
            raise CommandError("not existing mapping path")

        mapping_str = open(mapping_path, "r").read()
        mappings = json.loads(mapping_str)


        for k,v in mappings.iteritems():
            res = esi.put_mapping(index, k, {k:mappings[k]})
            self.stdout.write(str(res))
Exemplo n.º 45
0
def setup(forced):
	properties = {}
	properties["fail_symptom"] = {"type" : "string", "index": "not_analyzed"}
	properties["ats_log"] = {"type" : "string"}
	properties["file_path"] = {"type" : "string", "analyzer": "path-analyzer"}
	add_unique_mapping(properties, "Test Start Time", {"VALUE" : {"type" : "date", "format": "yyyy/MM/dd HH:mm:ssZ||yyyy/MM/ddZ"}})
	add_unique_mapping(properties, "Test end Time", {"VALUE" : {"type" : "date", "format": "yyyy/MM/dd HH:mm:ssZ||yyyy/MM/ddZ"}})

	es = Elasticsearch([{'host': 'localhost', 'port': 9200}], max_retries=10, retry_on_timeout=True)
	idx_client = IndicesClient(es)
	if (idx_client.exists(index=PROJECT)):
		if (forced):
			idx_client.delete(index=PROJECT)
		else :
			print "Index already exists!"
			return

	runin_csv_status = {"runin_csv_status" : {"path_match": "RunInLog.*.STATUS", "mapping": {"index": "not_analyzed"}}}
	runin_csv_value = {"runin_csv_value" : {"path_match": "RunInLog.*.VALUE", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	runin_csv_u_limit = {"runin_csv_u_limit" : {"path_match": "RunInLog.*.U_LIMIT", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	runin_csv_l_limit = {"runin_csv_l_limit" : {"path_match": "RunInLog.*.L_LIMIT", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	runin_csv_test_time = {"runin_csv_test_time" : {"path_match": "RunInLog.*.TEST_TIME", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	csv_status = {"csv_status" : {"path_match": "*.STATUS", "mapping": {"index": "not_analyzed"}}}
	csv_value = {"csv_value" : {"path_match": "*.VALUE", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	csv_u_limit = {"csv_u_limit" : {"path_match": "*.U_LIMIT", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	csv_l_limit = {"csv_l_limit" : {"path_match": "*.L_LIMIT", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	csv_test_time = {"csv_test_time" : {"path_match": "*.TEST_TIME", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	dynamic_templates = [runin_csv_status, runin_csv_value, runin_csv_u_limit, runin_csv_l_limit, runin_csv_test_time, csv_status, csv_value, csv_u_limit, csv_l_limit, csv_test_time]

	analysis = {}
	analysis["analyzer"] = {}
	analysis["tokenizer"] = {}
	analysis["analyzer"]["path-analyzer"] = {"type": "custom", "tokenizer": "path-tokenizer"}
	analysis["tokenizer"]["path-tokenizer"] = {"type": "path_hierarchy"}

	mappings = {"dynamic_templates" : dynamic_templates, "properties" : properties}
	data = {"settings" : {"index.mapping.ignore_malformed": True, "number_of_replicas": 1, "analysis": analysis}, "mappings" : {STAGE: mappings}}
	print json.dumps(data)
	idx_client.create(index=PROJECT, body=data)
Exemplo n.º 46
0
class ElasticSearchEngine(object):
    '''
    ElasticSearch Engine.
    '''

    # make it compatible with services
    LOAD_PRIORITY = 30

    def __init__(self, index, host=None, port=None):
        '''Only one host for now.'''
        if not es_installed:
            raise ValueError('elasticsearch not installed')

        assert(index.isalpha())
        self.init_state(index, host, port)

    def init_state(self, index, host, port):
        self._queue = []
        self.index = index
        self.host = host
        self.port = port
        if host is None:
            self.es = Elasticsearch()
        else:
            self.es = Elasticsearch(hosts=[{'host': host, 'port': port}])
        self.idx_manager = IndicesClient(self.es)
        self.mapper = ESQueryMapper()

    # be persistence friendly
    def __getstate__(self):
        return (self.index, self.host, self.port)

    def __setstate__(self, state):
        self.init_state(*state)

    def _index(self, document, update=False):
        # for efficiency, nothing is executed yet,
        # we prepare and queue the operation
        cursor = IndexCursor(self.index)
        document.save(cursor, update)
        cursor.enqueue(self._queue)

    def add_document(self, document):
        '''
        Add a document to the data store, in index (a.k.a. collection),
        under the document type.
        '''
        self._index(document)

    def delete_document(self, schema, docid):
        '''
        Remove document from index and storage.
        '''
        op = {
            '_op_type': 'delete',
            '_index': self.index,
            '_type': schema.type_name,
            '_id': docid
        }
        self._queue.append(op)

    def update_document(self, document):
        '''Update document (partial update from delta document)'''
        self._index(document, True)

    def commit(self, sync=False):
        '''
        If ``sync``, index synchronously, else let Elasticsearch
        manage its index.
        '''
        helpers.bulk(self.es, self._queue)
        if sync:
            self.idx_manager.refresh(self.index)
        self._queue = []

    def cancel(self):
        '''
        Forget operation scheduled since last commit'''
        self._queue = []

    def search(self, query, size=20):
        '''
        Search the database.
        '''
        index_cursor = IndexCursor(self.index, self.es.search)
        return query.search(index_cursor, self.mapper, size)

    def delete_collection(self):
        if self.idx_manager.exists(self.index):
            self.idx_manager.delete(index=self.index)

    def create_collection(self, schemas):
        '''
        Init the collections the first time.
        Just use once! Or you'll have to reindex all your documents.
        `schemas` is a list of Document classes or Schema instances.
        '''

        idx_manager = self.idx_manager
        if idx_manager.exists(self.index):
            idx_manager.delete(index=self.index)

        mapper = ESSchemaMapper(idx_manager)
        for schema in schemas:
            schema.map(mapper)

        mapper.create(self.index)
Exemplo n.º 47
0
Arquivo: index.py Projeto: zuloo/frisc
class IndicesManager(object):

    def __init__(self, options=None):
        self.options = options or {}
        self.es = get_elasticsearch(self.options)
        self.esc = IndicesClient(self.es)
        self.conf_dir = sys.path[0]

    def __create__(self, name, config=None, type=None):
        result = None

        try:
            if not config:
                file_name = "{}/config/{}_index.json".format(
                    self.conf_dir, type)
                with open(file_name) as fp:
                    config = fp.read()

            # create the index with version number
            result = self.esc.create(index=name, body=config)

        except es_exceptions.TransportError:
            print("unable to connect to Elasticsearch")

        return result

    def create(self, doc_type):
        alias_name = 'frisc_{}'.format(doc_type)
        index_name = '{}_v1'.format(alias_name)

        try:
            if self.esc.exists_alias(alias_name):
                print('Index {} already existst, updating'.format(alias_name))
                self.update(doc_type)
                return

            self.__create__(index_name, type=doc_type)

            # set an alias to the index
            self.esc.put_alias(index=index_name, name=alias_name)

        except es_exceptions.TransportError:
            print("unable to connect to Elasticsearch")

    def update(self, doc_type):
        alias_name = 'frisc_{}'.format(doc_type)
        index_name = '{}_v1'.format(alias_name),

        try:
            if not self.esc.exists_alias(alias_name):
                self.create(doc_type)
                return

            version_number = 0
            old_index_name = ''

            old_indexes = self.esc.get_alias(name=alias_name)
            for index in old_indexes.keys():
                match = re.search('^({})_v(\d+)$'.format(alias_name), index)
                if match:
                    version = int(match.group(2))
                    if version > version_number:
                        version_number = version
                        old_index_name = match.group(0)

            version_number += 1
            index_name = '{}_v{}'.format(alias_name, version_number)

            if self.esc.exists(index_name):
                # raise soemthing
                raise

            self.__create__(index_name, type=doc_type)

            reindex(self.es, old_index_name, index_name)

            self.esc.update_aliases(
                body={'actions': [
                    {'remove': {'alias': alias_name, 'index': old_index_name}},
                    {'add': {'alias': alias_name, 'index': index_name}}
                ]}
            )

        except es_exceptions.TransportError:
            print("unable to connect to Elasticsearch")
Exemplo n.º 48
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--host', help='Elasticsearch host')
    parser.add_argument('--port', type=int, help='Elasticsearch port')
    parser.add_argument('--url-prefix', help='Elasticsearch URL prefix')
    parser.add_argument('--no-auth', action='store_const', const=True, help='Suppress prompt for basic auth')
    parser.add_argument('--ssl', action='store_true', default=None, help='Use TLS')
    parser.add_argument('--no-ssl', dest='ssl', action='store_false', help='Do not use TLS')
    parser.add_argument('--verify-certs', action='store_true', default=None, help='Verify TLS certificates')
    parser.add_argument('--no-verify-certs', dest='verify_certs', action='store_false', help='Do not verify TLS certificates')
    parser.add_argument('--index', help='Index name to create')
    parser.add_argument('--old-index', help='Old index name to copy')
    parser.add_argument('--send_get_body_as', default='GET', help='Method for querying Elasticsearch - POST, GET or source')
    parser.add_argument('--boto-profile', default=None, help='Boto profile to use for signing requests')
    parser.add_argument('--aws-region', default=None, help='AWS Region to use for signing requests')
    args = parser.parse_args()

    if os.path.isfile('../config.yaml'):
        filename = '../config.yaml'
    elif os.path.isfile('config.yaml'):
        filename = 'config.yaml'
    else:
        filename = ''

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = args.host if args.host else data.get('es_host')
        port = args.port if args.port else data.get('es_port')
        username = data.get('es_username')
        password = data.get('es_password')
        url_prefix = args.url_prefix if args.url_prefix is not None else data.get('es_url_prefix', '')
        use_ssl = args.ssl if args.ssl is not None else data.get('use_ssl')
        verify_certs = args.verify_certs if args.verify_certs is not None else data.get('verify_certs') is not False
        aws_region = data.get('aws_region', None)
        send_get_body_as = data.get('send_get_body_as', 'GET')
    else:
        username = None
        password = None
        aws_region = args.aws_region
        host = args.host if args.host else raw_input('Enter elasticsearch host: ')
        port = args.port if args.port else int(raw_input('Enter elasticsearch port: '))
        use_ssl = (args.ssl if args.ssl is not None
                   else raw_input('Use SSL? t/f: ').lower() in ('t', 'true'))
        if use_ssl:
            verify_certs = (args.verify_certs if args.verify_certs is not None
                            else raw_input('Verify TLS certificates? t/f: ').lower() not in ('f', 'false'))
        else:
            verify_certs = True
        if args.no_auth is None:
            username = raw_input('Enter optional basic-auth username (or leave blank): ')
            password = getpass.getpass('Enter optional basic-auth password (or leave blank): ')
        url_prefix = (args.url_prefix if args.url_prefix is not None
                      else raw_input('Enter optional Elasticsearch URL prefix (prepends a string to the URL of every request): '))
        send_get_body_as = args.send_get_body_as

    auth = Auth()
    http_auth = auth(host=host,
                     username=username,
                     password=password,
                     aws_region=aws_region,
                     boto_profile=args.boto_profile)

    es = Elasticsearch(
        host=host,
        port=port,
        use_ssl=use_ssl,
        verify_certs=verify_certs,
        connection_class=RequestsHttpConnection,
        http_auth=http_auth,
        url_prefix=url_prefix,
        send_get_body_as=send_get_body_as)

    silence_mapping = {'silence': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                  'until': {'type': 'date', 'format': 'dateOptionalTime'},
                                                  '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}
    ess_mapping = {'elastalert_status': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                        '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}
    es_mapping = {'elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'},
                                                'alert_time': {'format': 'dateOptionalTime', 'type': 'date'},
                                                'match_body': {'enabled': False, 'type': 'object'},
                                                'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}}
    past_mapping = {'past_elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                       'match_body': {'enabled': False, 'type': 'object'},
                                                       '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'},
                                                       'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}}
    error_mapping = {'elastalert_error': {'properties': {'data': {'type': 'object', 'enabled': False},
                                                         '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}

    index = args.index if args.index is not None else raw_input('New index name? (Default elastalert_status) ')
    if not index:
        index = 'elastalert_status'

    old_index = (args.old_index if args.old_index is not None
                 else raw_input('Name of existing index to copy? (Default None) '))

    es_index = IndicesClient(es)
    if es_index.exists(index):
        print('Index ' + index + ' already exists. Skipping index creation.')
        return None

    es.indices.create(index)
    # To avoid a race condition. TODO: replace this with a real check
    time.sleep(2)
    es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_status', body=ess_mapping)
    es.indices.put_mapping(index=index, doc_type='silence', body=silence_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_error', body=error_mapping)
    es.indices.put_mapping(index=index, doc_type='past_elastalert', body=past_mapping)
    print('New index %s created' % index)

    if old_index:
        print("Copying all data from old index '{0}' to new index '{1}'".format(old_index, index))
        # Use the defaults for chunk_size, scroll, scan_kwargs, and bulk_kwargs
        elasticsearch.helpers.reindex(es, old_index, index)

    print('Done!')
Exemplo n.º 49
0
def main(index_num):
    n_out      = int(10e6)
    n_batch    = int(4e3)
    n_batches  = n_out // n_batch
    index      = 'image_hashes_%02d' % index_num
    
    client = Elasticsearch('localhost:9200')
    index_client = IndicesClient(client)
    
    if index_client.exists(index):
        print('Not deleting %s!' % index); return; sys.exit(1)
        index_client.delete(index)
    
    es_short = {
        'type': 'short',
    }
    
    field_name = lambda i: '%x' % i
    fields = {field_name(i): es_short for i in range(n_samples)}
    fields['raw'] = {
        'type': 'string',
        'store': True,
        'index': 'not_analyzed',
        'doc_values': True
    }
    
    index_client.create(index=index, body={
        'settings': {
            'number_of_shards':   4,
            'number_of_replicas': 0
        },
        'mappings': {
            'images': {
                '_source': {'enabled': False},
                'properties': fields
            }
        }
    })
    
    sampler, pow2 = get_sampler(n_samples, b_p_sample)
    start_time = time.time()
    
    for i_batch in range(1, n_batches+1):
        data = np.random.randn(n_batch, dim_in)
        hash = (data.dot(proj) > 0).astype(np.uint64)
        hash_int = hash.dot(2**np.arange(dim_out).astype(np.uint64))
		
        #print('\n'.join(repr(i.astype(np.uint8)) for i in hash)); return
        
        sampled = np.vstack(
            hash.dot(sampler[:,:,j]).dot(pow2)
            for j in range(n_samples)
        ).astype(np.int16).T.tolist()
        
        #print(repr(sampled)); print(repr([len(sampled), len(sampled[0])])); return
        
        docs = []
        
        for i in range(n_batch):
            doc = {
                field_name(j): sampled[i][j] for j in range(n_samples)
            }
            doc['raw'] = '{0:064b}'.format(hash_int[i])
            doc_id = random.getrandbits(63)
            
            docs.append('{"index":{"_index": "%s", "_type": "images", "_id": "%d"}})' % (index, doc_id))
            docs.append(json.dumps(doc))
        
        #print(json.dumps(json.loads(docs[1]), indent=4)); return
        
        try:
            response = client.bulk(body='\n'.join(docs))
        except:
            # Even when an exception is thrown typically documents were stored in ES
            sleep_seconds = 10
            print('\rHTTP timed out, sleeping %d seconds...' % sleep_seconds)
            time.sleep(sleep_seconds)

        print('\rChunk %5d/%d, %5.2f%%' % (i_batch, n_batches, i_batch*100.0/n_batches), end='')
    
    index_time = time.time()
    print('\nCalling optimize, indexing took %.1f s...' % (index_time - start_time))
    sys.stdout.flush()
    
    index_client.optimize(index=index, max_num_segments=3, request_timeout=1e6)
    print('Optimization done in %.1f s' % (time.time() - index_time))
Exemplo n.º 50
0
class ElasticSearchEngine(object):
    '''
    ElasticSearch Engine.
    '''

    def __init__(self, index, host=None, port=None):
        '''Only one host for now.'''
        assert(index.isalpha())
        self.init_state(index, host, port)

    def init_state(self, index, host, port):
        self._queue = []
        self.index = index
        self.host = host
        self.port = port
        if host is None:
            self.es = Elasticsearch()
        else:
            self.es = Elasticsearch(hosts=[{'host': host, 'port': port}])
        self.idx_manager = IndicesClient(self.es)
        self.mapper = ESMapper()

    # be persistence friendly
    def __getstate__(self):
        return (self.index, self.host, self.port)

    def __setstate__(self, state):
        self.init_state(*state)

    def _index(self, document, update=False):
        # for efficiency, nothing is executed yet,
        # we prepare and queue the operation
        doc = 'doc' if update else '_source'
        op = {
            '_index': self.index,
            '_type': document.__class__.__name__,
            '_op_type': 'update' if update else 'create',
            '_id': document._id,
            doc: {k: getattr(document, k)
                  for k in document.fields
                  if getattr(document, k) is not None}
        }
        self._queue.append(op)

    def add_document(self, document):
        '''
        Add a document to the data store, in index (a.k.a. collection),
        under the document type.
        '''
        self._index(document)

    def delete_document(self, doctype, docid):
        '''
        Remove document from index and storage.
        '''
        op = {
            '_op_type': 'delete',
            '_index': self.index,
            '_type': doctype.__name__,
            '_id': docid
        }
        self._queue.append(op)

    def update_document(self, document):
        '''Update document (partial update from delta document)'''
        self._index(document, True)

    def commit(self, sync=False):
        '''
        If ``sync``, index synchronously, else let Elasticsearch
        manage its index.
        '''
        helpers.bulk(self.es, self._queue)
        if sync:
            self.idx_manager.refresh(self.index)
        self._queue = []

    def cancel(self):
        '''
        Forget operation scheduled since last commit'''
        self._queue = []

    def search(self, query, size=20):
        '''
        Search the database.
        '''
        dsl = query(self.mapper)
        hits = self.es.search(index=self.index,
                              doc_type=query.queried_doc.__name__,
                              body={'query': dsl},
                              size=size)
        res = [
            (h['_score'], query.queried_doc.delta(h['_id'],
                                                  **h['_source']))
            for h in hits['hits']['hits']
        ]
        return res

    def delete_collection(self):
        if self.idx_manager.exists(self.index):
            self.idx_manager.delete(index=self.index)

    def create_collection(self, schema):
        '''
        Init the collections the first time.
        Just use once! Or you'll have to reindex all your documents.
        Schema is a list of Document classes.
        '''

        idx_manager = self.idx_manager
        if idx_manager.exists(self.index):
            idx_manager.delete(index=self.index)

        mappings = {}
        for doctype in schema:
            properties = {'_full': {"type": "string",
                                    "index_analyzer":  "autocomplete",
                                    "search_analyzer": "standard"}}
            excludes = []
            for name, ftype in doctype.fields.iteritems():
                properties[name] = ESProperty(ftype)
                if not ftype.stored:
                    excludes.append(name)
            mappings[doctype.__name__] = {'properties': properties,
                                          '_source': {"excludes": excludes}}
        settings = {
            "number_of_shards": 1,
            "analysis": {
                "filter": {
                    "autocomplete_filter": {
                        "type":     "edge_ngram",
                        "min_gram": 1,
                        "max_gram": 20
                    }
                },
                "analyzer": {
                    "autocomplete": {
                        "type":      "custom",
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "autocomplete_filter"
                        ]
                    }
                }
            }
        }
        body = {"mappings": mappings, "settings": settings}
        idx_manager.create(index=self.index, body=body)
Exemplo n.º 51
0
def aggs_error_count(topic_name, group_name, app_name, ip, time_scope=1):
    index_list = []
    # 根据检索范围获取索引名称, 并验证索引是否存在, 并生成已经存在的索引列表
    indicesClient = IndicesClient(app.es)
    for count in range((int(time_scope)/24)+1):
        index_name = 'kafka_msg_log_' + time.strftime('%Y.%m.%d', time.localtime(time.time() - int(count)*24*60*60))
        if indicesClient.exists(index_name):
            index_list.append(index_name)
    if index_list.__len__() == 0:
        error_stat_result = {
            "xAxis": [],
            "send_error_list": [],
            "business_error_list": [],
            "success": "true",
            "group_name": group_name,
            "topic_name": topic_name,
            "app_name": app_name,
            "ip": ip
        }
        return json.dumps(error_stat_result, encoding='utf8', ensure_ascii=False, indent=2)

    start_time = "now-" + str(time_scope) + "h/h"
    range_dict = {
                    "range" : {
                        "timestamp" : {
                            "gte" : start_time,
                            "lte" :  "now/h"
                        }
                    }
                }

    must_list = _assemble_must_terms(topic_name, group_name, app_name, ip)
    must_list.append(range_dict)
    res = app.es.search(
            index=index_list,
            body={
                    "from": 0,
                    "size": 10000,
                    "query": {
                        "bool": {
                            "must_not": {
                                "missing": {
                                    "field": "etype"
                                }
                            },
                            "must": must_list
                        }
                    },
                    "fields": "etype",
                    "aggregations": {
                        "aggs": {
                            "date_histogram": {
                                "field": "timestamp",
                                "interval": "10m",
                                "format": "yyyy-MM-dd HH:mm",
                                "time_zone": "+08:00",
                                "min_doc_count": 0
                            },
                            "aggregations": {
                                "etype": {
                                    "terms": {
                                        "field": "etype",
                                        "min_doc_count": 0,
                                        "size": 10000
                                    },
                                    "aggregations": {
                                        "etype_count": {
                                            "value_count": {
                                                "field": "etype"
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            )

    xAxis = set([])
    error_stat_dict = {}
    for obj in res['aggregations']['aggs']['buckets']:
        date_time = obj['key_as_string']
        xAxis.add(date_time)
        # 添加横坐标列表
        etype_count_aggs = obj['etype']['buckets']
        for etype_count_obj in etype_count_aggs:
            etype_count = etype_count_obj['etype_count']['value']
            etype = etype_count_obj['key']

            if date_time not in error_stat_dict:
                error_stat_dict[date_time] = [{"etype":etype, "count":etype_count}]
            else:
                temp_list = error_stat_dict[date_time]
                temp_list.append({"etype":etype, "count":etype_count})
                error_stat_dict[date_time] = temp_list

            # error_stat_result.append(error_stat_dict)
            logger.debug('etype:[' + str(etype) + ']  datetime: [' + date_time + ']  count: [' + str(etype_count) + ']')
    xAxis = sorted(xAxis)

    # 发送异常数据集
    send_error_list = []
    # 业务异常数据集
    business_error_list = []
    for x_date_time in xAxis:
        if x_date_time in error_stat_dict:
            temp_etype_dict_list = error_stat_dict[x_date_time]
            if temp_etype_dict_list:
                for etype_dict in temp_etype_dict_list:
                    if etype_dict['etype'] == 1:
                        send_error_list.append(etype_dict['count'])
                    else:
                        business_error_list.append(etype_dict['count'])
        else:
            send_error_list.append('0')
            business_error_list.append('0')


    error_stat_result = {
        "xAxis": xAxis,
        "send_error_list": send_error_list,
        "business_error_list": business_error_list,
        "success": "true",
        "group_name": group_name,
        "topic_name": topic_name,
        "app_name": app_name,
        "ip": ip
    }
    return json.dumps(error_stat_result, encoding='utf8', ensure_ascii=False, indent=2)
Exemplo n.º 52
0
                    'type': 'string',
                    'store': 'yes',
                    'index': 'not_analyzed'
                    },
                'title': {'type': 'string'},
                'body': {'type': 'string'},
                'teaser': {'type': 'string'},
                'timestamp': {'type': 'date'}
                },
            '_id': {'path': 'path'}
            }
        }

    ic = IndicesClient(es)

    if not ic.exists(index):
        ic.create(index)

    if not ic.exists_type(index=index, doc_type='item'):
        ic.put_mapping(
            index=index,
            ignore_conflicts=True,
            doc_type='item',
            body=body
            )

    while 1:
        try:
            main()
        except KeyboardInterrupt:
            raise SystemExit(0)
Exemplo n.º 53
0
def main(argv):
	es_server_addr = 'localhost'
	input_location = os.path.abspath(".")
	input_subfolder = None
	output_location = None
	fail_location = os.path.abspath(DEFAULT_FAIL_PATH)
	doctype = ES_DOC_TYPE
	is_looping = False
	try:
		opts, args = getopt.getopt(argv, "i:s:o:f:t:c:l", ["input_location=", "input_subfolder=", "output_location=", "fail_location=", "doc_type=", "config=" "loop"])
	except getopt.GetoptError:
		usage()
		sys.exit(2)
	if len(args) < 1:
		usage()
		sys.exit(2)
	elif len(args) > 1:
		es_server_addr = args[1]
	project = args[0]
	for opt, arg in opts:
		if opt in ("-i", "--input_location"):
			input_location = os.path.abspath(arg)
		elif opt in ("-s", "--input_subfolder"):
			input_subfolder = arg
		elif opt in ("-o", "--output_location"):
			output_location = os.path.abspath(arg)
		elif opt in ("-f", "--fail_location"):
			fail_location = os.path.abspath(arg)
		elif opt in ("-t", "--doc_type"):
			doctype = arg
		elif opt in ("-c", "--config"):
			(input_location, input_subfolder, output_location, fail_location, doctype) = parse_config(arg, input_location, input_subfolder, output_location, fail_location, doctype)
		elif opt in ("-l", "--loop"):
			is_looping = True
	logger = logging.getLogger('mla_logger')
	logger.setLevel(logging.INFO)
	fh = logging.FileHandler(LOG_PREFIX + "_" + project + "_" + time.strftime("%Y%m%d-%H%M%S") + LOG_POSTFIX)
	fh.setLevel(logging.DEBUG)
	ch = logging.StreamHandler()
	ch.setLevel(logging.DEBUG)
	formatter = logging.Formatter('[%(asctime)s] [%(levelname)8s] %(message)s')
	fh.setFormatter(formatter)
	logger.addHandler(fh)
	logger.addHandler(ch)
	logger.info("%s run with es_server_addr=%s, project=%s, input_location=%s, input_subfolder=%s, output_location=%s, fail_location=%s, doctype=%s, is_looping=%s" % (LOG_PREFIX, es_server_addr, project, input_location, input_subfolder, output_location, fail_location, doctype, is_looping))
	print "Press Enter key to continue"
	a	=	raw_input()
	es = Elasticsearch([{'host': es_server_addr, 'port': 9200}], max_retries=10, retry_on_timeout=True)
	idx_client = IndicesClient(es)
	if (False == idx_client.exists(index=project)):
		logger.info("Index %s not exist, press enter key to create schema" % project) 
		a	=	raw_input()
		mla_setup.schema_setup(es, project, False, logger)
		print "Press enter key to continue"
		a	=	raw_input()
	try:
		mla_import_loop(es, project, doctype, input_location, input_subfolder, output_location, fail_location, is_looping)
	except:
		msg = traceback.format_exc()
		print msg
		notify_mail(msg)
Exemplo n.º 54
0
def create_index_mappings(es_client, ea_index, recreate=False, old_ea_index=None):
    esversion = es_client.info()["version"]["number"]
    print("Elastic Version: " + esversion)

    es_index_mappings = read_es_index_mappings() if is_atleastsix(esversion) else read_es_index_mappings(5)

    es_index = IndicesClient(es_client)
    if not recreate:
        if es_index.exists(ea_index):
            print('Index ' + ea_index + ' already exists. Skipping index creation.')
            return None

    # (Re-)Create indices.
    if is_atleastsix(esversion):
        index_names = (
            ea_index,
            ea_index + '_status',
            ea_index + '_silence',
            ea_index + '_error',
            ea_index + '_past',
        )
    else:
        index_names = (
            ea_index,
        )
    for index_name in index_names:
        if es_index.exists(index_name):
            print('Deleting index ' + index_name + '.')
            try:
                es_index.delete(index_name)
            except NotFoundError:
                # Why does this ever occur?? It shouldn't. But it does.
                pass
        es_index.create(index_name)

    # To avoid a race condition. TODO: replace this with a real check
    time.sleep(2)

    if is_atleastseven(esversion):
        # TODO remove doc_type completely when elasicsearch client allows doc_type=None
        # doc_type is a deprecated feature and will be completely removed in Elasicsearch 8
        es_client.indices.put_mapping(index=ea_index, doc_type='_doc',
                                      body=es_index_mappings['elastalert'], include_type_name=True)
        es_client.indices.put_mapping(index=ea_index + '_status', doc_type='_doc',
                                      body=es_index_mappings['elastalert_status'], include_type_name=True)
        es_client.indices.put_mapping(index=ea_index + '_silence', doc_type='_doc',
                                      body=es_index_mappings['silence'], include_type_name=True)
        es_client.indices.put_mapping(index=ea_index + '_error', doc_type='_doc',
                                      body=es_index_mappings['elastalert_error'], include_type_name=True)
        es_client.indices.put_mapping(index=ea_index + '_past', doc_type='_doc',
                                      body=es_index_mappings['past_elastalert'], include_type_name=True)
    elif is_atleastsixtwo(esversion):
        es_client.indices.put_mapping(index=ea_index, doc_type='_doc',
                                      body=es_index_mappings['elastalert'])
        es_client.indices.put_mapping(index=ea_index + '_status', doc_type='_doc',
                                      body=es_index_mappings['elastalert_status'])
        es_client.indices.put_mapping(index=ea_index + '_silence', doc_type='_doc',
                                      body=es_index_mappings['silence'])
        es_client.indices.put_mapping(index=ea_index + '_error', doc_type='_doc',
                                      body=es_index_mappings['elastalert_error'])
        es_client.indices.put_mapping(index=ea_index + '_past', doc_type='_doc',
                                      body=es_index_mappings['past_elastalert'])
    elif is_atleastsix(esversion):
        es_client.indices.put_mapping(index=ea_index, doc_type='elastalert',
                                      body=es_index_mappings['elastalert'])
        es_client.indices.put_mapping(index=ea_index + '_status', doc_type='elastalert_status',
                                      body=es_index_mappings['elastalert_status'])
        es_client.indices.put_mapping(index=ea_index + '_silence', doc_type='silence',
                                      body=es_index_mappings['silence'])
        es_client.indices.put_mapping(index=ea_index + '_error', doc_type='elastalert_error',
                                      body=es_index_mappings['elastalert_error'])
        es_client.indices.put_mapping(index=ea_index + '_past', doc_type='past_elastalert',
                                      body=es_index_mappings['past_elastalert'])
    else:
        es_client.indices.put_mapping(index=ea_index, doc_type='elastalert',
                                      body=es_index_mappings['elastalert'])
        es_client.indices.put_mapping(index=ea_index, doc_type='elastalert_status',
                                      body=es_index_mappings['elastalert_status'])
        es_client.indices.put_mapping(index=ea_index, doc_type='silence',
                                      body=es_index_mappings['silence'])
        es_client.indices.put_mapping(index=ea_index, doc_type='elastalert_error',
                                      body=es_index_mappings['elastalert_error'])
        es_client.indices.put_mapping(index=ea_index, doc_type='past_elastalert',
                                      body=es_index_mappings['past_elastalert'])

    print('New index %s created' % ea_index)
    if old_ea_index:
        print("Copying all data from old index '{0}' to new index '{1}'".format(old_ea_index, ea_index))
        # Use the defaults for chunk_size, scroll, scan_kwargs, and bulk_kwargs
        elasticsearch.helpers.reindex(es_client, old_ea_index, ea_index)

    print('Done!')
Exemplo n.º 55
0
Arquivo: ese.py Projeto: merlin83/ese
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--src-host", action="store", default="127.0.0.1", type=unicode, help="Source host [default: %(default)s]")
    parser.add_argument("--src-port", action="store", default=9200, help="Source port [default: %(default)s]")
    parser.add_argument("--src-index", action="store", default="", type=unicode, help="Source index")
    parser.add_argument("--src-batch-size", action="store", type=int, default=5000, help="Source query batchsize [default: %(default)s]")
    parser.add_argument("--src-scroll-interval", action="store", type=unicode, default="60m", help="Interval for source scroll query [default: %(default)s]")

    parser.add_argument("--dest-host", action="store", default="127.0.0.1", type=unicode, help="Destination host [default: %(default)s]")
    parser.add_argument("--dest-port", action="store", default=9200, help="Destination port [default: %(default)s]")
    parser.add_argument("--dest-index", action="store", default="", type=unicode, help="Destination index")
    parser.add_argument("--dest-batch-size", action="store", type=int, default=5000, help="Destination batchsize [default: %(default)s]")
    parser.add_argument("--dest-alias", action="store", help="Destination index alias (to be set after we have finished populating)")
    parser.add_argument("--dest-concurrency", action="store", type=int, default=4, help="Destination batchsize [default: %(default)s]")
    parser.add_argument("--dest-delete-index", action="store_true", help="Delete destination index at before starting")

    parser.add_argument("--query", action="store", type=unicode, default="", help="Query to use [if None is specified, a match_all will be used]")

    args = parser.parse_args()

    if args.src_index is None or len(args.src_index) == 0:
        raise Exception("--src-index must be specified!")

    if args.dest_index is None or len(args.dest_index) == 0:
        raise Exception("--dest-index must be specified!")

    dt_start = datetime.now()
    # copy mapping
    src_es_instance = get_elasticsearch(args.src_host, args.src_port)
    dest_es_instance = get_elasticsearch(args.dest_host, args.dest_port)
    # check if src_index exists
    src_es_ic = IndicesClient(src_es_instance)
    if not src_es_ic.exists(args.src_index):
        raise Exception("--src-index %s does not exist!" % args.src_index)
    # check if dest_index exists
    dest_es_ic = IndicesClient(dest_es_instance)
    if dest_es_ic.exists(args.dest_index):
        if args.dest_delete_index:
            dest_es_ic.delete(index=args.dest_index)
        else:
            raise Exception("--dest-index %s already exists! Use --dest-delete-index if you want to drop it" % args.dest_index)
    log.info("Copying mapping...")
    # copy mapping over to dest
    src_index_information = src_es_ic.get(index=args.src_index)
    dest_es_ic.create(index=args.dest_index, body=src_index_information.get(args.src_index, {}))
    # set num_of_replicas to 0
    dest_es_ic.put_settings(index=args.dest_index, body={"settings": {"index": {"number_of_replicas": 0}}})
    # perform multiprocessing
    log.info("Copying data...")
    MAGIC_STRING = "%s:%s" % (str(uuid4()), str(uuid4()))
    DEST_QUEUE = Queue()
    DEST_COUNTER = Value('i', 0)
    src_process = Process(target=src_worker, args=(args, DEST_QUEUE, MAGIC_STRING))
    src_process.start()
    dest_processes = [Process(target=dest_worker, args=(args, DEST_QUEUE, MAGIC_STRING, DEST_COUNTER)) for i in xrange(args.dest_concurrency)]
    for i in dest_processes: i.start()
    src_process.join()
    for i in dest_processes: i.join()
    log.info("[dest_worker] Total processed %s" % DEST_COUNTER.value)
    if args.dest_alias is not None and len(args.dest_alias) > 0:
        # we remove all existing mappings to this alias, then add it to the current dest_index
        for idx_name, aliases_mapping in dest_es_ic.get_aliases().iteritems():
            if args.dest_alias in aliases_mapping.get("aliases", {}):
                dest_es_ic.delete_alias(index=idx_name, name=args.dest_alias)
        dest_es_ic.put_alias(index=args.dest_index, name=args.dest_alias)
    dest_es_ic.refresh(args.dest_index)
    dt_end = datetime.now()
    log.info("Time elapsed: %s" % (dt_end-dt_start, ))
Exemplo n.º 56
0
    def handle(self, *args, **options):
        Student.objects.all().delete()
        University.objects.all().delete()
        Course.objects.all().delete()
        start = time.time()

        # database part
        # make some Universities
        university_names = (
            'MIT', 'MGU', 'CalTech', 'KPI', 'DPI', 'PSTU'
        )
        universities = []
        for name in university_names:
            uni = mommy.make(University, name=name)
            universities.append(uni)
        # make some courses
        template_options = ['CS%s0%s', 'MATH%s0%s', 'CHEM%s0%s', 'PHYS%s0%s']
        courses = []
        for num in range(1, 4):
            for course_num in range(1, 4):
                for template in template_options:
                    name = template % (course_num, num)
                    course = mommy.make(Course, name=name)
                    courses.append(course)

        students = []
        for _ in xrange(options.get('count')[0]):
            stud = mommy.prepare(
                Student,
                university=random.choice(universities),
                first_name=names.get_first_name(),
                last_name=names.get_last_name(),
                age=random.randint(17, 25)
            )
            students.append(stud)
        Student.objects.bulk_create(students)

        ThroughModel = Student.courses.through
        stud_courses = []
        for student_id in Student.objects.values_list('pk', flat=True):
            courses_already_linked = []
            for _ in range(random.randint(1, 10)):
                index = random.randint(0, len(courses) - 1)
                if index not in courses_already_linked:
                    courses_already_linked.append(index)
                else:
                    continue
                stud_courses.append(
                    ThroughModel(
                        student_id=student_id,
                        course_id=courses[index].pk
                    )
                )
        ThroughModel.objects.bulk_create(stud_courses)

        # recreate index
        indices_client = IndicesClient(client=settings.ES_CLIENT)
        if indices_client.exists('django'):
            indices_client.delete(index='django')
        indices_client.create(index='django')
        indices_client.put_mapping(
            doc_type='student',
            body=Student._meta.es_mapping,
            index='django'
        )
        # update part
        put_all_to_index(Student)

        finish = time.time() - start
        print '%s items  %s seconds' % (options.get('count')[0], finish)
Exemplo n.º 57
0
class ESExporter:
    def __init__(self, sm_config):
        self.es = Elasticsearch(hosts=[{"host": sm_config['elasticsearch']['host']}])
        self.ind_client = IndicesClient(self.es)

    def _index(self, annotations):
        to_index = []
        for r in annotations:
            d = dict(zip(COLUMNS, r))
            d['comp_names'] = u'|'.join(d['comp_names']).replace(u'"', u'')
            d['comp_ids'] = u'|'.join(d['comp_ids'])
            d['mz'] = '{:010.4f}'.format(d['mz']) if d['mz'] else ''

            to_index.append({
                '_index': 'sm',
                '_type': 'annotation',
                '_id': '{}_{}_{}_{}'.format(d['ds_name'], d['db_name'], d['sf'], d['adduct']),
                '_source': d
            })

        bulk(self.es, actions=to_index, timeout='60s')

    def _delete(self, annotations):
        to_delete = []
        for r in annotations:
            d = dict(zip(COLUMNS, r))
            to_delete.append({
                '_op_type': 'delete',
                '_index': 'sm',
                '_type': 'annotation',
                '_id': '{}_{}_{}_{}'.format(d['ds_name'], d['db_name'], d['sf'], d['adduct']),
            })
        try:
            bulk(self.es, to_delete)
        except BulkIndexError as e:
            logger.warn('{} - {}'.format(e.args[0], e.args[1][1]))

    def index_ds(self, db, ds_name, db_name):
        annotations = db.select(RESULTS_TABLE_SQL, ds_name, db_name)

        logger.info('Deleting documents from the index: {}-{}'.format(ds_name, db_name))
        self._delete(annotations)

        logger.info('Indexing documents: {}-{}'.format(ds_name, db_name))
        self._index(annotations)

    def create_index(self, name='sm'):
        body = {
            'settings': {
                "index": {
                    'max_result_window': 2147483647,
                    "analysis": {
                        "analyzer": {
                            "analyzer_keyword": {
                                "tokenizer": "keyword",
                                "filter": "lowercase"
                            }
                        }
                    }
                }
            },
            'mappings': {
                "annotation": {
                    "properties": {
                        "db_name": {"type": "string", "index": "not_analyzed"},
                        "ds_name": {"type": "string", "index": "not_analyzed"},
                        "sf": {"type": "string", "index": "not_analyzed"},
                        "comp_names": {
                            "type": "string",
                            "analyzer": "analyzer_keyword",
                        },
                        "comp_ids": {"type": "string", "index": "not_analyzed"},
                        "chaos": {"type": "float", "index": "not_analyzed"},
                        "image_corr": {"type": "float", "index": "not_analyzed"},
                        "pattern_match": {"type": "float", "index": "not_analyzed"},
                        "msm": {"type": "float", "index": "not_analyzed"},
                        "adduct": {"type": "string", "index": "not_analyzed"},
                        "fdr": {"type": "float", "index": "not_analyzed"},
                        "mz": {"type": "string", "index": "not_analyzed"}
                    }
                }
            }
        }
        if not self.ind_client.exists(name):
            out = self.ind_client.create(index=name, body=body)
            logger.info('Index {} created\n{}'.format(name, out))
        else:
            logger.info('Index {} already exists'.format(name))

    def delete_index(self, name='sm'):
        out = self.ind_client.delete(name)
        logger.info('Index {} deleted\n{}'.format(name, out))
Exemplo n.º 58
0
def main(argv):
    index = 'user_topics'
    client = Elasticsearch('localhost:9200')
    index_client = IndicesClient(client)
    
    if index_client.exists(index):
        index_client.delete(index)
    
    index_client.create(index=index, body={
        'settings': {
            'number_of_shards':   4,
            'number_of_replicas': 0
        },
        'mappings': {
            'user': {
                'properties': {
                    #'id': {
                    #    'type': 'long',
                    #    'doc_values': True
                    #},
                    'topics': {
                        'type': 'integer',
                        'doc_values': True
                    },
                    'n_topics': {
                        'type': 'integer',
                        'doc_values': True
                    }
                }
            }
        }
    })
    
    n_users           = int(argv[1])
    n_topics          = int(argv[2]) * 0.15
    n_topics_per_user = int(argv[3]) * 4.2
    
    docs_per_chunk = int(2e4)
    n_chunks       = int(ceil(n_users / docs_per_chunk))
    
    start_time = time.time()
    
    for i_chunk in range(1, n_chunks+1):
        docs = []
        
        for i in range(docs_per_chunk):
            n_user_topics = rand(n_topics_per_user)[0]
            topics = list(set(rand(n_topics, n_user_topics)))
            
            doc_id = str(random.getrandbits(63))

            docs.append('{"index":{"_index": "%s", "_type": "user", "_id": "%s"}})' % (index, doc_id))
            docs.append(json.dumps({
                #'id':      doc_id,
                'topics':   topics,
                'n_topics': len(topics)
            }))
        
        #print(json.dumps(json.loads(docs[1]), indent=4)); return
        
        try:
            response = client.bulk(body='\n'.join(docs))
        except:
            # Even when an exception is thrown typically documents were stored in ES
            sleep_seconds = 10
            print('\rHTTP timed out, sleeping %d seconds...' % sleep_seconds)
            time.sleep(sleep_seconds)
        
        print('\rChunk %5d/%d, %5.2f%%' % (i_chunk, n_chunks, i_chunk*100.0/n_chunks), end='')
    
    index_time = time.time()
    print('\nCalling optimize, indexing took %.1f s...' % (index_time - start_time))
    sys.stdout.flush()
    
    index_client.optimize(index=index, max_num_segments=3, request_timeout=1e6)
    print('Optimization done in %.1f s' % (time.time() - index_time))
Exemplo n.º 59
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--host', help='Elasticsearch host')
    parser.add_argument('--port', type=int, help='Elasticsearch port')
    parser.add_argument('--url-prefix', help='Elasticsearch URL prefix')
    parser.add_argument('--no-auth', action='store_const', const=True, help='Suppress prompt for basic auth')
    parser.add_argument('--ssl', action='store_true', default=None, help='Use SSL')
    parser.add_argument('--no-ssl', dest='ssl', action='store_false', help='Do not use SSL')
    parser.add_argument('--index', help='Index name to create')
    parser.add_argument('--old-index', help='Old index name to copy')
    parser.add_argument('--boto-profile', default=None, help='Boto profile to use for signing requests')
    parser.add_argument('--aws-region', default=None, help='AWS Region to use for signing requests')
    args = parser.parse_args()

    if os.path.isfile('../config.yaml'):
        filename = '../config.yaml'
    elif os.path.isfile('config.yaml'):
        filename = 'config.yaml'
    else:
        filename = ''

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = args.host if args.host else data.get('es_host')
        port = args.port if args.port else data.get('es_port')
        username = data.get('es_username')
        password = data.get('es_password')
        url_prefix = args.url_prefix if args.url_prefix is not None else data.get('es_url_prefix', '')
        use_ssl = args.ssl if args.ssl is not None else data.get('use_ssl')
        aws_region = data.get('aws_region', None)
    else:
        username = None
        password = None
        aws_region = args.aws_region
        host = args.host if args.host else raw_input('Enter elasticsearch host: ')
        port = args.port if args.port else int(raw_input('Enter elasticsearch port: '))
        use_ssl = (args.ssl if args.ssl is not None
                   else raw_input('Use SSL? t/f: ').lower() in ('t', 'true'))
        if args.no_auth is None:
            username = raw_input('Enter optional basic-auth username: '******'Enter optional basic-auth password: '******'Enter optional Elasticsearch URL prefix: '))

    auth = Auth()
    http_auth = auth(host=host,
                     username=username,
                     password=password,
                     aws_region=aws_region,
                     boto_profile=args.boto_profile)

    es = Elasticsearch(
        host=host,
        port=port,
        use_ssl=use_ssl,
        connection_class=RequestsHttpConnection,
        http_auth=http_auth,
        url_prefix=url_prefix)

    silence_mapping = {'silence': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                  'until': {'type': 'date', 'format': 'dateOptionalTime'},
                                                  '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}
    ess_mapping = {'elastalert_status': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                        '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}
    es_mapping = {'elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'},
                                                'alert_time': {'format': 'dateOptionalTime', 'type': 'date'},
                                                'match_body': {'enabled': False, 'type': 'object'},
                                                'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}}
    past_mapping = {'past_elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                       'match_body': {'enabled': False, 'type': 'object'},
                                                       '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'},
                                                       'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}}
    error_mapping = {'elastalert_error': {'properties': {'data': {'type': 'object', 'enabled': False},
                                                         '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}

    index = args.index if args.index is not None else raw_input('New index name? (Default elastalert_status) ')
    if not index:
        index = 'elastalert_status'

    old_index = (args.old_index if args.old_index is not None
                 else raw_input('Name of existing index to copy? (Default None) '))

    res = None
    if old_index:
        print('Downloading existing data...')
        res = es.search(index=old_index, body={}, size=500000)
        print('Got %s documents' % (len(res['hits']['hits'])))

    es_index = IndicesClient(es)
    if es_index.exists(index):
        print('Index ' + index + ' already exists. Skipping index creation.')
        return None

    es.indices.create(index)
    # To avoid a race condition. TODO: replace this with a real check
    time.sleep(2)
    es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_status', body=ess_mapping)
    es.indices.put_mapping(index=index, doc_type='silence', body=silence_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_error', body=error_mapping)
    es.indices.put_mapping(index=index, doc_type='past_elastalert', body=past_mapping)
    print('New index %s created' % index)

    if res:
        bulk = ''.join(['%s\n%s\n' % (json.dumps({'create': {'_type': doc['_type'], '_index': index}}),
                                      json.dumps(doc['_source'])) for doc in res['hits']['hits']])
        print('Uploading data...')
        es.bulk(body=bulk, index=index)

    print('Done!')