Пример #1
0
    def remove_ml_scripts_pipelines(cls, es_client: Elasticsearch,
                                    ml_type: List[str]) -> dict:
        """Remove all ML script and pipeline files."""
        results = dict(script={}, pipeline={})
        ingest_client = IngestClient(es_client)

        files = cls.get_all_ml_files(es_client=es_client)
        for file_type, data in files.items():
            for name in list(data):
                this_type = name.split('_')[1].lower()
                if this_type not in ml_type:
                    continue
                if file_type == 'script':
                    results[file_type][name] = es_client.delete_script(name)
                elif file_type == 'pipeline':
                    results[file_type][name] = ingest_client.delete_pipeline(
                        name)

        return results
Пример #2
0
class IngestConnector:
    def __init__(
            self,
            pipeline_id: str = "pdf_content",
            field: str = "data",
            pipeline_description: str = "Extracting info from pdf content"):
        self.pipeline_id: str = pipeline_id
        self.index_name: str = pipeline_id + "_index"
        self.field: str = field
        self.pipeline_description: str = pipeline_description

        self.ingest_client = IngestClient(current_app.elasticsearch)

    def create_pipeline(self):
        self.ingest_client.put_pipeline(id=self.pipeline_id,
                                        body={
                                            'description':
                                            self.pipeline_description,
                                            'processors': [{
                                                "attachment": {
                                                    "field": self.field
                                                }
                                            }]
                                        })

    def delete_pipeline(self):
        self.ingest_client.delete_pipeline(id=self.pipeline_id)

    def get_pipeline(self):
        return self.ingest_client.get_pipeline(id=self.pipeline_id)

    def add_to_index(self, id_: int, content: str, content_page: int,
                     content_paragraph: int):
        current_app.elasticsearch.index(
            index=self.index_name,
            id=id_,
            pipeline=self.pipeline_id,
            body={
                self.field:
                base64.b64encode(content.encode("utf-8")).decode("utf-8"),
                "content_page":
                content_page,
                "content_paragraph":
                content_paragraph,
            })

    def remove_from_index(self, id_: int):
        current_app.elasticsearch.delete(index=self.index_name, id=id_)

    def api_search(self, query: str):
        return current_app.elasticsearch.search(
            index=self.index_name,
            body={"query": {
                "match": {
                    "attachment.content": query
                }
            }})

    def search(self, query: str):
        search = self.api_search(query)

        ids = [int(hit['_id']) for hit in search['hits']['hits']]

        if len(ids) == 0:
            return None

        when = []
        for i in range(len(ids)):
            when.append((ids[i], i))

        res = KnowledgePdfContent.query.filter(
            KnowledgePdfContent.id.in_(ids)).order_by(
                db.case(when, value=KnowledgePdfContent.id)).all()
        return res[0] if len(res) > 0 else None
def deletePipelines():
    conn = get_connection()
    client = IngestClient(conn)
    client.delete_pipeline(id='rename_structure_unit_description')
Пример #4
0
final_leg_index = 'flower_measurements-magic'
new_doc = es.index(index=final_leg_index, body=new_flower)

# In[ ]:

# Verify the doc was created
new_doc['result'], new_doc['_id']

# In[ ]:

# Find out what the flower was predicted to be with the _source and a nice human readable output!
res = es.get(index=final_leg_index, id=new_doc['_id'])
pprint(res['_source'])
print('\nThis flower is predicted to be a %s !' %
      res['_source']['ml']['inference']['predicted_name']['flower_name'])

# In[ ]:

# In[ ]:

# cleanup

IngestClient.delete_pipeline(es, id=pipeline_name)
IndicesClient.delete_template(es, name=template_name)
EnrichClient.delete_policy(es, name=policy_name)
es.delete_by_query(index=final_leg_index, body={"query": {"match_all": {}}})
es.delete_by_query(index=mapping_index_name, body={"query": {"match_all": {}}})

# In[ ]: