def send_to_elasticsearch(index_name, doc, delete=False, es_merge_update=False): """ Utility method to update the doc in elasticsearch. Duplicates the functionality of pillowtop but can be called directly. """ from pillowtop.es_utils import ElasticsearchIndexInfo doc_id = doc['_id'] es_meta = ES_META[index_name] index_info = ElasticsearchIndexInfo(index=es_meta.index, type=es_meta.type) doc_exists = doc_exists_in_es(index_info, doc_id) return send_to_es( index=es_meta.index, doc_type=es_meta.type, doc_id=doc_id, es_getter=get_es_new, name="{}.{} <{}>:".format(send_to_elasticsearch.__module__, send_to_elasticsearch.__name__, index_name), data=doc, except_on_failure=True, update=doc_exists, delete=delete, es_merge_update=es_merge_update, )
def get_case_search_to_elasticsearch_pillow( pillow_id='CaseSearchToElasticsearchPillow', num_processes=1, process_num=0, **kwargs): """Populates the `case search` Elasticsearch index. Processors: - :py:class:`corehq.pillows.case_search.CaseSearchPillowProcessor` """ index_info = CASE_SEARCH_INDEX_INFO if 'index_name' in kwargs and 'index_alias' in kwargs: # Allow overriding index name and alias for the purposes of reindexing. # These can be set in localsettings.LOCAL_PILLOWTOPS raw_info = CASE_SEARCH_INDEX_INFO.to_json() raw_info.pop("meta") index_info = ElasticsearchIndexInfo.wrap(raw_info) index_info.index = kwargs['index_name'] index_info.alias = kwargs['index_alias'] checkpoint = get_checkpoint_for_elasticsearch_pillow( pillow_id, index_info, topics.CASE_TOPICS) case_processor = CaseSearchPillowProcessor( elasticsearch=get_es_new(), index_info=index_info, doc_prep_fn=transform_case_for_elasticsearch) change_feed = KafkaChangeFeed(topics=topics.CASE_TOPICS, client_id='cases-to-es', num_processes=num_processes, process_num=process_num) return ConstructedPillow( name=pillow_id, checkpoint=checkpoint, change_feed=change_feed, processor=case_processor, change_processed_event_handler=KafkaCheckpointEventHandler( checkpoint=checkpoint, checkpoint_frequency=100, change_feed=change_feed, ), )
'tags': { 'type': 'string' }, 'title': { 'fields': { 'exact': { 'index': 'not_analyzed', 'type': 'string' }, 'title': { 'index': 'analyzed', 'type': 'string' } }, 'type': 'multi_field' }, 'use_sql_backend': { 'type': "boolean" }, 'yt_id': { 'type': 'string' } } } DOMAIN_INDEX_INFO = ElasticsearchIndexInfo(index=DOMAIN_INDEX, alias=DOMAIN_ES_ALIAS, type='hqdomain', mapping=DOMAIN_MAPPING, hq_index_name=DOMAIN_HQ_INDEX_NAME)
"type": "multi_field" }, "phone_number": { "type": "string" }, "processed": { "type": "boolean" }, "reminder_id": { "type": "string" }, "text": { "type": "string" }, "workflow": { "type": "string" }, "xforms_session_couch_id": { "type": "string" } } } SMS_INDEX_INFO = ElasticsearchIndexInfo( index=SMS_INDEX, alias=SMS_ES_ALIAS, type=SMS_TYPE, mapping=SMS_MAPPING, hq_index_name=SMS_HQ_INDEX_NAME )
'type': { 'index': 'analyzed', 'type': 'string' } }, 'type': 'multi_field' }, 'user_id': { 'type': 'string' }, 'version': { 'type': 'string' }, 'xform_ids': { 'index': 'not_analyzed', 'type': 'string' } } } REPORT_CASE_ES_ALIAS = "report_cases" REPORT_CASE_ES_TYPE = "report_case" REPORT_CASE_INDEX_INFO = ElasticsearchIndexInfo( index=REPORT_CASE_INDEX, alias=REPORT_CASE_ES_ALIAS, type=REPORT_CASE_ES_TYPE, meta=DEFAULT_META, mapping=REPORT_CASE_MAPPING, )
'type': 'string'}, 'name': {'index': 'analyzed', 'type': 'string'}}, 'type': 'multi_field'}, 'opened_by': {'type': 'string'}, 'opened_on': {'format': "yyyy-MM-dd||yyyy-MM-dd'T'HH:mm:ssZZ||yyyy-MM-dd'T'HH:mm:ss.SSSSSS||yyyy-MM-dd'T'HH:mm:ss.SSSSSS'Z'||yyyy-MM-dd'T'HH:mm:ss'Z'||yyyy-MM-dd'T'HH:mm:ssZ||yyyy-MM-dd'T'HH:mm:ssZZ'Z'||yyyy-MM-dd'T'HH:mm:ss.SSSZZ||yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd' 'HH:mm:ss||yyyy-MM-dd' 'HH:mm:ss.SSSSSS||mm/dd/yy' 'HH:mm:ss", 'type': 'date'}, 'owner_id': {'type': 'string'}, 'referrals': {'enabled': False, 'type': 'object'}, 'server_modified_on': {'format': "yyyy-MM-dd||yyyy-MM-dd'T'HH:mm:ssZZ||yyyy-MM-dd'T'HH:mm:ss.SSSSSS||yyyy-MM-dd'T'HH:mm:ss.SSSSSS'Z'||yyyy-MM-dd'T'HH:mm:ss'Z'||yyyy-MM-dd'T'HH:mm:ssZ||yyyy-MM-dd'T'HH:mm:ssZZ'Z'||yyyy-MM-dd'T'HH:mm:ss.SSSZZ||yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd' 'HH:mm:ss||yyyy-MM-dd' 'HH:mm:ss.SSSSSS||mm/dd/yy' 'HH:mm:ss", 'type': 'date'}, 'type': {'fields': {'exact': {'index': 'not_analyzed', 'type': 'string'}, 'type': {'index': 'analyzed', 'type': 'string'}}, 'type': 'multi_field'}, 'user_id': {'type': 'string'}, 'version': {'type': 'string'}, 'xform_ids': {'index': 'not_analyzed', 'type': 'string'}}} REPORT_CASE_ES_ALIAS = prefix_for_tests("report_cases") REPORT_CASE_ES_TYPE = "report_case" REPORT_CASE_INDEX_INFO = ElasticsearchIndexInfo( index=REPORT_CASE_INDEX, alias=REPORT_CASE_ES_ALIAS, type=REPORT_CASE_ES_TYPE, mapping=REPORT_CASE_MAPPING, hq_index_name=REPORT_CASE_HQ_INDEX_NAME )
'date': { 'format': DATE_FORMATS_STRING, 'type': 'date' }, 'direction': {'type': 'string'}, 'doc_type': {'index': 'not_analyzed', 'type': 'string'}, 'domain': { 'fields': { 'domain': {'index': 'analyzed', 'type': 'string'}, 'exact': {'index': 'not_analyzed', 'type': 'string'} }, 'type': 'multi_field' }, 'phone_number': {'type': 'string'}, 'processed': {'type': 'boolean'}, 'reminder_id': {'type': 'string'}, 'text': {'type': 'string'}, 'workflow': {'type': 'string'}, 'xforms_session_couch_id': {'type': 'string'} } } SMS_TYPE = 'sms' SMS_INDEX_INFO = ElasticsearchIndexInfo( index=SMS_INDEX, alias="smslogs", type=SMS_TYPE, mapping=SMS_MAPPING, )
}, "user_location_id": { "index": "not_analyzed", "type": "string" }, "username": { "fields": { "exact": { "include_in_all": False, "index": "not_analyzed", "type": "string" }, "username": { "analyzer": "standard", "index": "analyzed", "type": "string" } }, "type": "multi_field" } } } USER_INDEX_INFO = ElasticsearchIndexInfo( index=USER_INDEX, alias=USER_ES_ALIAS, type=ElasticUser.type, mapping=USER_MAPPING, hq_index_name=USER_HQ_INDEX_NAME )
from __future__ import absolute_import from __future__ import unicode_literals from corehq.pillows.mappings.case_mapping import CASE_ES_TYPE from corehq.pillows.mappings.utils import mapping_from_json from corehq.util.elastic import es_index from pillowtop.es_utils import ElasticsearchIndexInfo CASE_SEARCH_INDEX = es_index("case_search_2016-03-15") CASE_SEARCH_ALIAS = "case_search" CASE_SEARCH_MAX_RESULTS = 100 CASE_SEARCH_MAPPING = mapping_from_json('case_search_mapping.json') CASE_SEARCH_INDEX_INFO = ElasticsearchIndexInfo( index=CASE_SEARCH_INDEX, alias=CASE_SEARCH_ALIAS, type=CASE_ES_TYPE, mapping=CASE_SEARCH_MAPPING, )
'type': 'string' }, 'title': { 'fields': { 'exact': { 'index': 'not_analyzed', 'type': 'string' }, 'title': { 'index': 'analyzed', 'type': 'string' } }, 'type': 'multi_field' }, 'use_sql_backend': { 'type': "boolean" }, 'yt_id': { 'type': 'string' } } } DOMAIN_INDEX_INFO = ElasticsearchIndexInfo( index=DOMAIN_INDEX, alias='hqdomains', type='hqdomain', mapping=DOMAIN_MAPPING, )
}, }, }, }, "dynamic_templates": [ { 'case_block': { "match": "case", "mapping": CASE_MAPPING_FRAGMENT } }, { "everything_else": { "match": "*", "match_mapping_type": "string", "mapping": {"type": "string", "index": "not_analyzed"} } } ] } REPORT_XFORM_ALIAS = "report_xforms" REPORT_XFORM_TYPE = "report_xform" REPORT_XFORM_INDEX_INFO = ElasticsearchIndexInfo( index=REPORT_XFORM_INDEX, alias=REPORT_XFORM_ALIAS, type=REPORT_XFORM_TYPE, mapping=REPORT_XFORM_MAPPING, )
from corehq.pillows.base import DEFAULT_META from corehq.pillows.mappings.utils import mapping_from_json from corehq.util.elastic import es_index from pillowtop.es_utils import ElasticsearchIndexInfo LEDGER_INDEX = es_index("ledgers_2016-03-15") LEDGER_ALIAS = "ledgers" LEDGER_TYPE = "ledger" LEDGER_MAPPING = mapping_from_json('ledger_mapping.json') LEDGER_INDEX_INFO = ElasticsearchIndexInfo( index=LEDGER_INDEX, alias=LEDGER_ALIAS, type=LEDGER_TYPE, meta=DEFAULT_META, mapping=LEDGER_MAPPING, )
"name": { "index": "analyzed", "type": "string" } }, "type": "multi_field" }, "path": { "type": "string" }, "removed_users": { "type": "string" }, "reporting": { "type": "boolean" }, "users": { "type": "string" } } } GROUP_INDEX_INFO = ElasticsearchIndexInfo( index=GROUP_INDEX, alias=GROUP_ES_ALIAS, type=ElasticGroup.type, mapping=GROUP_MAPPING, hq_index_name=GROUP_HQ_INDEX_NAME )
'comment': 'You know, for tests', 'created': '2015-10-07 @czue' }, "properties": { "doc_type": { "index": "not_analyzed", "type": "string" }, } } TEST_ES_TYPE = 'test_doc' TEST_ES_INDEX = 'test_pillowtop_index' TEST_ES_ALIAS = 'pillowtop_tests' TEST_INDEX_INFO = ElasticsearchIndexInfo(index=TEST_ES_INDEX, alias=TEST_ES_ALIAS, type=TEST_ES_TYPE, mapping=TEST_ES_MAPPING) def get_doc_count(es, index, refresh_first=True): if refresh_first: # we default to calling refresh since ES might have stale data es.indices.refresh(index) stats = es.indices.stats(index) return stats['indices'][index]['total']['docs']['count'] def get_index_mapping(es, index, doc_type): def _format_mapping_for_es_version(mapping): if settings.ELASTICSEARCH_VERSION < 1.0: return mapping[doc_type]
'name': {'fields': {'exact': {'index': 'not_analyzed', 'type': 'string'}, 'name': {'index': 'analyzed', 'type': 'string'}}, 'type': 'multi_field'}, 'phone_model': {'type': 'string'}, 'platform': {'type': 'string'}, 'profile': {'dynamic': True, 'type': 'object'}, 'recipients': {'type': 'string'}, 'secure_submissions': {'type': 'boolean'}, 'short_odk_media_url': {'type': 'string'}, 'short_odk_url': {'type': 'string'}, 'short_url': {'type': 'string'}, 'success_message': {'dynamic': False, 'type': 'object'}, 'text_input': {'type': 'string'}, 'translation_strategy': {'type': 'string'}, 'translations': {'dynamic': False, 'type': 'object'}, 'use_custom_suite': {'type': 'boolean'}, 'user_type': {'type': 'string'}, 'version': {'type': 'long'}, 'vellum_case_management': {'type': 'boolean'}}} APP_ES_ALIAS = "hqapps" APP_ES_TYPE = "app" APP_INDEX_INFO = ElasticsearchIndexInfo( index=APP_INDEX, alias=APP_ES_ALIAS, type=APP_ES_TYPE, mapping=APP_MAPPING )
}, "name": { "fields": { "exact": { "index": "analyzed", "type": "string", "analyzer": "sortable_exact" }, "name": { "index": "analyzed", "type": "string" } }, "type": "multi_field" }, "reporting": {"type": "boolean"}, "path": {"type": "string"}, "case_sharing": {"type": "boolean"}, "users": {"type": "string"}, } } GROUP_INDEX_INFO = ElasticsearchIndexInfo( index=GROUP_INDEX, alias='hqgroups', type='group', meta=DEFAULT_META, mapping=GROUP_MAPPING, )
'type': 'string' } }, 'type': 'multi_field' }, '__group_ids': { 'type': 'string' }, '__group_names': { 'fields': { '__group_names': { 'index': 'analyzed', 'type': 'string' }, 'exact': { 'index': 'not_analyzed', 'type': 'string' } }, 'type': 'multi_field' } } } USER_INDEX_INFO = ElasticsearchIndexInfo( index=USER_INDEX, alias='hqusers', type='user', mapping=USER_MAPPING, )
"type": "string", "index": "not_analyzed" }, "app_build_version": { "type": "string", "index": "not_analyzed" }, "geo_point": { "type": "geo_point", "lat_lon": True, "geohash": True, "geohash_prefix": True, "geohash_precision": '10m' }, } }, }, }, } } XFORM_ES_TYPE = 'xform' XFORM_ALIAS = "xforms" XFORM_INDEX_INFO = ElasticsearchIndexInfo( index=XFORM_INDEX, alias=XFORM_ALIAS, type=XFORM_ES_TYPE, mapping=XFORM_MAPPING, )
"type": "date" }, "type": { "fields": { "exact": { "index": "not_analyzed", "type": "string" }, "type": { "index": "analyzed", "type": "string" } }, "type": "multi_field" }, "user_id": { "index": "not_analyzed", "type": "string" } } } CASE_SEARCH_INDEX_INFO = ElasticsearchIndexInfo( index=CASE_SEARCH_INDEX, alias=CASE_SEARCH_ALIAS, type=ElasticCaseSearch.type, mapping=CASE_SEARCH_MAPPING, hq_index_name=CASE_SEARCH_HQ_INDEX_NAME, )