def remove_ml_scripts_pipelines(cls, es_client: Elasticsearch, ml_type: List[str]) -> dict: """Remove all ML script and pipeline files.""" results = dict(script={}, pipeline={}) ingest_client = IngestClient(es_client) files = cls.get_all_ml_files(es_client=es_client) for file_type, data in files.items(): for name in list(data): this_type = name.split('_')[1].lower() if this_type not in ml_type: continue if file_type == 'script': results[file_type][name] = es_client.delete_script(name) elif file_type == 'pipeline': results[file_type][name] = ingest_client.delete_pipeline( name) return results
class IngestConnector: def __init__( self, pipeline_id: str = "pdf_content", field: str = "data", pipeline_description: str = "Extracting info from pdf content"): self.pipeline_id: str = pipeline_id self.index_name: str = pipeline_id + "_index" self.field: str = field self.pipeline_description: str = pipeline_description self.ingest_client = IngestClient(current_app.elasticsearch) def create_pipeline(self): self.ingest_client.put_pipeline(id=self.pipeline_id, body={ 'description': self.pipeline_description, 'processors': [{ "attachment": { "field": self.field } }] }) def delete_pipeline(self): self.ingest_client.delete_pipeline(id=self.pipeline_id) def get_pipeline(self): return self.ingest_client.get_pipeline(id=self.pipeline_id) def add_to_index(self, id_: int, content: str, content_page: int, content_paragraph: int): current_app.elasticsearch.index( index=self.index_name, id=id_, pipeline=self.pipeline_id, body={ self.field: base64.b64encode(content.encode("utf-8")).decode("utf-8"), "content_page": content_page, "content_paragraph": content_paragraph, }) def remove_from_index(self, id_: int): current_app.elasticsearch.delete(index=self.index_name, id=id_) def api_search(self, query: str): return current_app.elasticsearch.search( index=self.index_name, body={"query": { "match": { "attachment.content": query } }}) def search(self, query: str): search = self.api_search(query) ids = [int(hit['_id']) for hit in search['hits']['hits']] if len(ids) == 0: return None when = [] for i in range(len(ids)): when.append((ids[i], i)) res = KnowledgePdfContent.query.filter( KnowledgePdfContent.id.in_(ids)).order_by( db.case(when, value=KnowledgePdfContent.id)).all() return res[0] if len(res) > 0 else None
def deletePipelines(): conn = get_connection() client = IngestClient(conn) client.delete_pipeline(id='rename_structure_unit_description')
final_leg_index = 'flower_measurements-magic' new_doc = es.index(index=final_leg_index, body=new_flower) # In[ ]: # Verify the doc was created new_doc['result'], new_doc['_id'] # In[ ]: # Find out what the flower was predicted to be with the _source and a nice human readable output! res = es.get(index=final_leg_index, id=new_doc['_id']) pprint(res['_source']) print('\nThis flower is predicted to be a %s !' % res['_source']['ml']['inference']['predicted_name']['flower_name']) # In[ ]: # In[ ]: # cleanup IngestClient.delete_pipeline(es, id=pipeline_name) IndicesClient.delete_template(es, name=template_name) EnrichClient.delete_policy(es, name=policy_name) es.delete_by_query(index=final_leg_index, body={"query": {"match_all": {}}}) es.delete_by_query(index=mapping_index_name, body={"query": {"match_all": {}}}) # In[ ]: