Exemplos de bulk em Python, exemplos de elasticsearch6.helpers.bulk em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: index_elasticsearch.py Projeto: Kesin1/build_deploy_and_query

def index_kb(all_docs_kb: dict, client: Elasticsearch(), args: argparse.ArgumentParser(), logger: logging.getLogger()):
    """
    Indexes the KB
    
    Parameters
    ----------
    args: argparse.ArgumentParser
        The command line arguments given
    logger: logging.getLogger
        logger on DEBUG level

    Returns
    -------
    None
    """

    # Delete Index
    client.indices.delete(index=INDEX_NAME, ignore=[404])
    # Create Index
    with open(MAPPING_JSON_FILE) as mapping_json_file:
        source = mapping_json_file.read().strip()
        client.indices.create(index=INDEX_NAME, body=source)

    # upload the KB
    logger.info("# upload the KB")
    bulk(client, all_docs_kb)
    return

Exemplo n.º 2

0

Exibir arquivo

def bulk_index_documents_to_es6(documents):
    try:
        bulk(es_client6,
             _prepare_docs_for_bulk_insert(documents),
             index=INDEX,
             doc_type=GENERIC_DOC_TYPE,
             chunk_size=100)
    except TransportError6 as e:
        print("Failed to index documents: %s", str(e))

Exemplo n.º 3

0

Exibir arquivo

    def test_errors_are_collected_properly(self):
        self.client.indices.create(
            "i", {
                "mappings": {
                    "t": {
                        "properties": {
                            "a": {
                                "type": "integer"
                            }
                        }
                    }
                },
                "settings": {
                    "number_of_shards": 1,
                    "number_of_replicas": 0
                }
            })
        self.client.cluster.health(wait_for_status="yellow")

        success, failed = helpers.bulk(self.client, [{
            "a": 42
        }, {
            "a": "c"
        }],
                                       index="i",
                                       doc_type="t",
                                       stats_only=True,
                                       raise_on_error=False)
        self.assertEquals(1, success)
        self.assertEquals(1, failed)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: pipelines.py Projeto: DDReki/CodeWarehouse

 def process_item(self, item, spider):
     if spider.name == 'chinapatent':
         if isinstance(item, ChinapatentScrapyItem):
             if item['cp_result']:
                 es_save = helpers.bulk(self.es,
                                        item['cp_result'],
                                        request_timeout=240)
                 print(f'es{len(item["cp_result"])}条数据保存成功..')
             else:
                 pass
         return item

Exemplo n.º 5

0

Exibir arquivo

    def test_stats_only_reports_numbers(self):
        docs = [{"answer": x} for x in range(100)]
        success, failed = helpers.bulk(self.client,
                                       docs,
                                       index='test-index',
                                       doc_type='answers',
                                       refresh=True,
                                       stats_only=True)

        self.assertEquals(100, success)
        self.assertEquals(0, failed)
        self.assertEquals(
            100,
            self.client.count(index='test-index', doc_type='answers')['count'])

Exemplo n.º 6

0

Exibir arquivo

    def test_all_documents_get_inserted(self):
        docs = [{"answer": x, '_id': x} for x in range(100)]
        success, failed = helpers.bulk(self.client,
                                       docs,
                                       index='test-index',
                                       doc_type='answers',
                                       refresh=True)

        self.assertEquals(100, success)
        self.assertFalse(failed)
        self.assertEquals(
            100,
            self.client.count(index='test-index', doc_type='answers')['count'])
        self.assertEquals({"answer": 42},
                          self.client.get(index='test-index',
                                          doc_type='answers',
                                          id=42)['_source'])

Exemplo n.º 7

0

Exibir arquivo

    def test_bulk_works_with_single_item(self):
        docs = [{"answer": 42, '_id': 1}]
        success, failed = helpers.bulk(self.client,
                                       docs,
                                       index='test-index',
                                       doc_type='answers',
                                       refresh=True)

        self.assertEquals(1, success)
        self.assertFalse(failed)
        self.assertEquals(
            1,
            self.client.count(index='test-index', doc_type='answers')['count'])
        self.assertEquals({"answer": 42},
                          self.client.get(index='test-index',
                                          doc_type='answers',
                                          id=1)['_source'])

Exemplo n.º 8

0

Exibir arquivo

    def test_errors_are_reported_correctly(self):
        self.client.indices.create(
            "i", {
                "mappings": {
                    "t": {
                        "properties": {
                            "a": {
                                "type": "integer"
                            }
                        }
                    }
                },
                "settings": {
                    "number_of_shards": 1,
                    "number_of_replicas": 0
                }
            })
        self.client.cluster.health(wait_for_status="yellow")

        success, failed = helpers.bulk(self.client, [{
            "a": 42
        }, {
            "a": "c",
            '_id': 42
        }],
                                       index="i",
                                       doc_type="t",
                                       raise_on_error=False)
        self.assertEquals(1, success)
        self.assertEquals(1, len(failed))
        error = failed[0]
        self.assertEquals('42', error['index']['_id'])
        self.assertEquals('t', error['index']['_type'])
        self.assertEquals('i', error['index']['_index'])
        print(error['index']['error'])
        self.assertTrue(
            'MapperParsingException' in repr(error['index']['error'])
            or 'mapper_parsing_exception' in repr(error['index']['error']))

Exemplo n.º 9

0

Exibir arquivo

def main():
    subprocess.run('/usr/bin/snapshot', shell=False)#calls "snapshot.py"           
    image_file = os.listdir(rootdir) 
    
    for root, subdirs, files in os.walk(rootdir):

        labels = getLabel(root, files)

        interpreter = getInterpreter(root, files)
                
        if interpreter is not None:
            size = classify.input_size(interpreter)
            
            #image_path = getImage(root, files)
            image_path = getImage(dir_path, image_file)
            
            image = Image.open(image_path).convert('RGB').resize(size, Image.ANTIALIAS)
    
            classify.set_input(interpreter, image)
    
            print('*The first inference on Edge TPU is slow because it includes',
                  'loading the model into Edge TPU memory*')
            for _ in range(count):
                start = time.perf_counter()
                interpreter.invoke()
                inference_time = time.perf_counter() - start
                classes = classify.get_output(interpreter, top_k, threshold)
                #print('%.1f' % (inference_time * 1000)) 
                dummy.append(f'Time(ms):{(inference_time*1000):.4}')
                print('Time(ms):', '%.1f' % (inference_time * 1000))
            print("\n")   
                
            for klass in classes:
                #print('%s: %.5f' % (labels.get(klass.id, klass.id), klass.score))
                dummy.append(f'Inference:{(labels.get(klass.id, klass.id))}')
                print('Inference:', '%s' % (labels.get(klass.id, klass.id)))
                dummy.append(f'Score:{(klass.score):.5}')
                print('Score:', '%.5f' % (klass.score))
                print("\n")
    
    
    maX_group = max_group() 
      
    temperature = check_temperature_status()
    maX_group.append(f'TPU_temp(°C):{int(temperature)/1000}')
    #print("maX_group:", maX_group)
    print('#####################################')
    print("\n")
    
   
    

    es=initialize_elasticsearch() 
    initialize_mapping(es)   


    actions = [
        {
            '_index': INDEX_NAME,
            '_type': DOC_TYPE,
            "@timestamp": str(datetime.datetime.utcnow().strftime("%Y-%m-%d"'T'"%H:%M:%S")),
            "Labels": maX_group[0].split(":")[1],
            "Model": maX_group[1].split(":")[1],
            "Image": maX_group[2].split(":")[1],
            "Time(ms)": maX_group[4].split(":")[1],
            "Inference": maX_group[5].split(":")[1],
            "Score": maX_group[6].split(":")[1],
            "TPU_temp(°C)": maX_group[7].split(":")[1]
        
        }]

    try:
        res=helpers.bulk(client=es, index = INDEX_NAME, actions = actions) 
        print ("\nhelpers.bulk() RESPONSE:", res)
        print ("RESPONSE TYPE:", type(res))
        
    except Exception as err: 
        print("\nhelpers.bulk() ERROR:", err)
    
    print("\n")
    print("\n")
    
    os.remove(image_path)
    print("Photo has been deleted")

Exemplo n.º 10

0

Exibir arquivo

Arquivo: load.py Projeto: turtle321/elasticsearch-py

        action="store",
        default=None,
        help=
        "Path to git repo. Commits used as data to load into Elasticsearch. (Default: None"
    )

    args = parser.parse_args()

    # instantiate es client, connects to localhost:9200 by default
    es = Elasticsearch(args.host)

    # we load the repo and all commits
    load_repo(es, path=args.path)

    # run the bulk operations
    success, _ = bulk(es, UPDATES, index='git')
    print('Performed %d actions' % success)

    # we can now make docs visible for searching
    es.indices.refresh(index='git')

    # now we can retrieve the documents
    initial_commit = es.get(index='git',
                            doc_type='doc',
                            id='20fbba1230cabbc0f4644f917c6c2be52b8a63e8')
    print('%s: %s' %
          (initial_commit['_id'], initial_commit['_source']['committed_date']))

    # refresh to make the documents available for search
    es.indices.refresh(index='git')