Пример #1
0
def recreate_index_for_db(database_name):
    start = time.time()
    es = get_elasticsearch_handle()
    logger = logging.getLogger(database_name)
    try:
        es.delete_index(database_name)
    except Exception as e:
        logger.info("Could not delete index " + str(e.message))
    response = es.create_index(database_name,
                               settings={
                                   "number_of_shards": 1,
                                   "number_of_replicas": 0
                               })
    logger.info('%s search index created : %s' %
                (database_name, response.get('ok')))
    dbm = get_db_manager(database_name)
    try:
        create_all_mappings(dbm)
        create_all_indices(dbm)
    except Exception as e:
        logger.exception("recreate index failed for database %s for" %
                         dbm.database_name)

    logger.info(
        'Time taken (seconds) for indexing {database_name} : {timetaken}'.
        format(database_name=database_name, timetaken=(time.time() - start)))
Пример #2
0
def create_ds_mapping(dbm, form_model):
    es = get_elasticsearch_handle()
    fields = form_model.fields
    fields.append(TextField(name="projects", code='projects', label='projects'))
    fields.append(TextField(name="groups", code='groups', label='My Groups'))
    fields.append(TextField(name="customgroups", code='customgroups', label='Custom groups'))
    es.put_mapping(dbm.database_name, REPORTER_ENTITY_TYPE[0], get_fields_mapping(REPORTER_ENTITY_TYPE[0], fields))
def update_submission_search_index(submission_doc, dbm, refresh_index=True):
    es = get_elasticsearch_handle()
    form_model = get_form_model_by_code(dbm, submission_doc.form_code)
    #submission_doc = SurveyResponseDocument.load(dbm.database, feed_submission_doc.id)
    search_dict = _meta_fields(submission_doc, dbm)
    _update_with_form_model_fields(dbm, submission_doc, search_dict, form_model)
    es.index(dbm.database_name, form_model.id, search_dict, id=submission_doc.id, refresh=refresh_index)
Пример #4
0
def create_search_indices_for_deleted_datasender(db_name):
    logger = logging.getLogger(db_name)
    try:
        logger.info('Starting indexing')
        dbm = get_db_manager(db_name)
        es = get_elasticsearch_handle(timeout=600)
        form_model = get_form_model_by_code(dbm, REGISTRATION_FORM_CODE)
        datasenders = []

        for entity in get_all_entities_include_voided(dbm,
                                                      REPORTER_ENTITY_TYPE):
            if not entity.data:
                continue
            if entity.is_void() or entity.short_code == 'test':
                datasender_dict = _create_datasender_dict(
                    dbm, entity, REPORTER, form_model)
                datasender_dict.update({'id': entity.id})
                datasenders.append(datasender_dict)
        if datasenders:
            es.bulk_index(dbm.database_name, REPORTER, datasenders)
            logger.info('Created index for datasenders with ids :' +
                        str([a.get('id') for a in datasenders]))
        logger.info('Completed Indexing')
        mark_as_successful(db_name)
    except Exception as e:
        logger.exception(e.message)
Пример #5
0
def create_index(dbm, form_model, logger):
    form_code = form_model.form_code
    start_key = [form_code]
    end_key = [form_code, {}]
    rows = dbm.database.iterview("surveyresponse/surveyresponse",
                                 1000,
                                 reduce=False,
                                 include_docs=False,
                                 startkey=start_key,
                                 endkey=end_key)
    es = get_elasticsearch_handle(timeout=600)

    survey_response_docs = []
    for row in rows:
        survey_response = SurveyResponseDocument._wrap_row(row)
        search_dict = _meta_fields(survey_response, dbm)
        _update_with_form_model_fields(dbm, survey_response, search_dict,
                                       form_model)
        search_dict.update({'id': survey_response.id})
        survey_response_docs.append(search_dict)

    if survey_response_docs:
        es.bulk_index(dbm.database_name, form_model.id, survey_response_docs)
        logger.info('Created index for survey response docs ' +
                    str([doc.get('id') for doc in survey_response_docs]))
Пример #6
0
def create_datasender_mapping(dbm, form_model):
    es = get_elasticsearch_handle()
    fields = form_model.fields
    fields.append(TextField(name="projects", code='projects',
                            label='projects'))
    es.put_mapping(dbm.database_name, REPORTER_ENTITY_TYPE[0],
                   get_fields_mapping(form_model.form_code, fields))
Пример #7
0
def update_datasender_index(contact_doc, dbm):
    es = get_elasticsearch_handle()
    if contact_doc.data:
        form_model = get_form_model_by_code(dbm, REGISTRATION_FORM_CODE)
        datasender_dict = _create_contact_dict(dbm, contact_doc, form_model)
        es.index(dbm.database_name,
                 REPORTER_ENTITY_TYPE[0],
                 datasender_dict,
                 id=contact_doc.id)
    es.refresh(dbm.database_name)
 def update_field_in_submission_index(self,
                                      document_id,
                                      fields_mapping,
                                      refresh_index=True):
     es = get_elasticsearch_handle()
     es.update(self.index,
               self.doc_type,
               id=document_id,
               doc=fields_mapping,
               refresh=refresh_index)
Пример #9
0
def update_submission_search_index(submission_doc, dbm, refresh_index=True):
    es = get_elasticsearch_handle()
    form_model = FormModel.get(dbm, submission_doc.form_model_id)
    search_dict = _meta_fields(submission_doc, dbm)
    _update_with_form_model_fields(dbm, submission_doc, search_dict,
                                   form_model)
    es.index(dbm.database_name,
             form_model.id,
             search_dict,
             id=submission_doc.id,
             refresh=refresh_index)
Пример #10
0
def create_subject_mapping(dbm, form_model):
    es = get_elasticsearch_handle()
    fields_definition = []
    for field in form_model.fields:
        fields_definition.append(
            get_field_definition(field,
                                 field_name=es_field_name(
                                     field.code, form_model.id)))
    mapping = get_fields_mapping_by_field_def(
        doc_type=form_model.id, fields_definition=fields_definition)
    es.put_mapping(dbm.database_name, form_model.entity_type[0], mapping)
Пример #11
0
 def delete_organizations(modeladmin, request, queryset):
     orgs = queryset.filter(status='Deactivated')
     for organization in orgs:
         dbm = get_database_manager_for_org(organization)
         organization.purge_all_data()
         del dbm.server[dbm.database_name]
         feed_database_name = "feed_" + dbm.database_name
         feed_dbm = feeds_db_for(feed_database_name)
         del feed_dbm.server[feed_database_name]
         es = get_elasticsearch_handle()
         es.delete_index(dbm.database_name)
    def handle(self, *args, **options):
        es = get_elasticsearch_handle()
        if len(args) > 0:
            databases_to_index = args[0:]
        else:
            databases_to_index = all_db_names()
        for database_name in databases_to_index:
            logger = logging.getLogger(database_name)
            recreate_index_for_db(database_name, es, logger)
            logger.info('Done')

        print 'Completed!'
Пример #13
0
def update_datasender_index(entity_doc, dbm):
    es = get_elasticsearch_handle()
    if entity_doc.data:
        entity_type = entity_doc.aggregation_paths['_type'][0].lower()
        form_model = get_form_model_by_code(dbm, REGISTRATION_FORM_CODE)
        datasender_dict = _create_datasender_dict(dbm, entity_doc, entity_type,
                                                  form_model)
        es.index(dbm.database_name,
                 entity_type,
                 datasender_dict,
                 id=entity_doc.id)
    es.refresh(dbm.database_name)
Пример #14
0
def update_datasender_index(contact_doc, dbm, bulk=False):
    es = get_elasticsearch_handle()
    if contact_doc.data:
        form_model = get_form_model_by_code(dbm, REGISTRATION_FORM_CODE)
        datasender_dict = _create_contact_dict(dbm, contact_doc, form_model)

        if bulk:
            datasender_dict.update({'id': contact_doc.id})
            return es.index_op(datasender_dict, index=dbm.database_name, doc_type=REPORTER_ENTITY_TYPE[0],
                               id=contact_doc.id)

        es.index(dbm.database_name, REPORTER_ENTITY_TYPE[0], datasender_dict, id=contact_doc.id)
    es.refresh(dbm.database_name)
Пример #15
0
def entity_search_update(entity_doc, dbm):
    if entity_doc.aggregation_paths['_type'] == REPORTER_ENTITY_TYPE:
        update_datasender_index(entity_doc, dbm)
        return
    es = get_elasticsearch_handle()
    if entity_doc.data:
        entity_type = entity_doc.aggregation_paths['_type'][0].lower()
        form_model = get_form_model_by_entity_type(dbm, [entity_type])
        es.index(dbm.database_name,
                 entity_type,
                 subject_dict(entity_type, entity_doc, dbm, form_model),
                 id=entity_doc.id)
    es.refresh(dbm.database_name)
Пример #16
0
 def delete_organizations(modeladmin, request, queryset):
     orgs = queryset.filter(status__in=['Deactivated', "Pending Activation"])
     for organization in orgs:
         dbm = get_database_manager_for_org(organization)
         organization.purge_all_data()
         del dbm.server[dbm.database_name]
         feed_database_name = "feed_" + dbm.database_name
         feed_dbm = feeds_db_for(feed_database_name)
         del feed_dbm.server[feed_database_name]
         es = get_elasticsearch_handle()
         try:
             es.delete_index(dbm.database_name)
         except Exception as e:
             logging.info("Could not delete index " + str(e.message))
Пример #17
0
def populate_contact_index(dbm):
    rows = dbm.database.iterview('datasender_by_mobile/datasender_by_mobile', 100, reduce=False, include_docs=True)
    actions = []
    es = get_elasticsearch_handle()
    for row in rows:
        contact = Contact.__document_class__.wrap(row.get('doc'))
        action = contact_search_update(contact, dbm, bulk=True)
        if action is not None: actions.append(action)
        if len(actions) == settings.ES_INDEX_RECREATION_BATCH:
            es.bulk(actions)
            actions = []

    if len(actions) > 0:
        es.bulk(actions)
Пример #18
0
def add_custom_group_field_to_data_sender_mapping(db_name):
    logger = logging.getLogger(db_name)
    logger.info('Starting Migration')

    es = get_elasticsearch_handle()
    fields = [
        TextField(name="customgroups",
                  code='customgroups',
                  label='Custom groups')
    ]
    es.put_mapping(db_name, 'reporter', get_fields_mapping('reg', fields))

    logger.info('Completed Migration')
    mark_as_completed(db_name)
def recreate_index_for_questionnaire(database_name, form_code):
    start = time.time()
    es = get_elasticsearch_handle()
    
    dbm = get_db_manager(database_name)
    try:
        form_model = get_form_model_by_code(dbm, form_code)
        delete_mapping(database_name, form_model.id)
        create_mapping_for_form_model(dbm, form_model)
        populate_submission_index(dbm, form_model.id)
    except Exception as e:
        logger.exception("recreate index failed for database %s for" %dbm.database_name)

    logger.info('Time taken (seconds) for indexing {database_name} : {timetaken}'
                .format(database_name=database_name,timetaken=(time.time()-start)))
Пример #20
0
def populate_entity_index(dbm):
    rows = dbm.database.iterview('by_short_codes/by_short_codes', 100, reduce=False, include_docs=True)
    actions = []
    es = get_elasticsearch_handle()
    for row in rows:
        try:
            entity = Entity.__document_class__.wrap(row.get('doc'))
            action = entity_search_update(entity, dbm, bulk=True)
            if action is not None: actions.append(action)
            if len(actions) == settings.ES_INDEX_RECREATION_BATCH:
                es.bulk(actions)
                actions = []
        except Exception as e:
            raise e
    if len(actions) > 0:
        es.bulk(actions)
Пример #21
0
def recreate_subject_index(db_name):
    logger = logging.getLogger(db_name)
    try:
        logger.info('Starting indexing')
        dbm = get_db_manager(db_name)
        form_models = dbm.database.query(map_form_model_for_subjects)
        es = get_elasticsearch_handle()
        for row in form_models:
            try:
                form_model = FormModel.get(dbm, row.id)
                entity_type = form_model.entity_type[0]
                try:
                    es.delete_all(db_name, entity_type)
                except Exception as ignore:
                    pass
                create_subject_mapping(dbm, form_model)
                entity_docs = []
                for entity_doc in get_all_entities_include_voided(
                        dbm, [entity_type]):
                    try:
                        if entity_doc.data:
                            subject = subject_dict(entity_type, entity_doc,
                                                   dbm, form_model)
                            subject.update({'id': entity_doc.id})
                            entity_docs.append(subject)
                    except Exception as e:
                        logger.error("Failed to index subject with id %s" %
                                     entity_doc.id)
                        logger.error(e)

                if entity_docs:
                    es.bulk_index(dbm.database_name, entity_type, entity_docs)
                    es.refresh(dbm.database_name)
                    logger.info('Changed index for subject with codes ' +
                                str([a.get('id') for a in entity_docs]))
            except Exception as e:
                logger.error("Failed to create subject mapping for %s" %
                             row.id)
                logger.error(e.message)

        logger.info('Completed Indexing')
        mark_as_completed(db_name)
    except Exception as e:
        logger.exception(e.message)
Пример #22
0
def populate_submission_index(dbm, form_model_id=None):
    logger = logging.getLogger()
    if form_model_id is None:
        questionnaires = dbm.load_all_rows_in_view("surveyresponse_by_questionnaire_id", reduce=True, group=True)
        for q in questionnaires:
            logger.info('Processing questionnaire id {q}'.format(q=q.key))
            populate_submission_index(dbm, q.key)
    else:
        start = time.time()
        rows = get_survey_responses_by_form_model_id(dbm, form_model_id)
        form_model = FormModel.get(dbm, form_model_id)
        logger = logging.getLogger(form_model.name)
        ignored = 0
        counter = 0
        error_count = 0
        actions = []
        es = get_elasticsearch_handle()
        for row in rows:
            try:
                survey_response = SurveyResponseDocument._wrap_row(row)
                submission_action = update_submission_search_index(survey_response, dbm, refresh_index=False,
                                                                   form_model=form_model, bulk=True)
                actions.append(submission_action)
                if len(actions) == settings.ES_INDEX_RECREATION_BATCH:
                    es.bulk(actions, index=dbm.database_name, doc_type=form_model.id)
                    actions = []
                counter += 1
                logger.info('No of submissions processed {counter}'.format(counter=counter))
            except FormModelDoesNotExistsException as e:
                ignored += 1
                logger.warning(e.message) # ignore orphaned submissions On changing form code!
            except Exception as ex:
                logger.exception('Exception occurred')
                error_count += 1

        if len(actions) > 0:
            es.bulk(actions, index=dbm.database_name, doc_type=form_model.id)
                
        logger.warning("No of submissions ignored: {ignored}".format(ignored=ignored))
        logger.warning("No of submissions had errors:{errors}".format(errors=error_count))
            
        logger.info('Time taken (seconds) for indexing {counter} submissions of questionnaire {q} : {timetaken}'
                    .format(counter=counter,q=form_model_id,timetaken=(time.time()-start)))
Пример #23
0
def update_submission_search_index(submission_doc,
                                   dbm,
                                   refresh_index=True,
                                   form_model=None,
                                   bulk=False):
    es = get_elasticsearch_handle()
    if form_model is None:
        form_model = FormModel.get(dbm, submission_doc.form_model_id)
    search_dict = _meta_fields(submission_doc, dbm)
    _update_with_form_model_fields(dbm, submission_doc, search_dict,
                                   form_model)
    if bulk:
        return es.index_op(search_dict,
                           doc_type=form_model.id,
                           index=dbm.database_name,
                           id=submission_doc.id)
    es.index(dbm.database_name,
             form_model.id,
             search_dict,
             id=submission_doc.id,
             refresh=refresh_index)
def create_submission_index(dbm, row):
    form_model = Project.new_from_doc(dbm, ProjectDocument.wrap(row["value"]))
    form_code = form_model.form_code
    start_key = [form_code]
    end_key = [form_code, {}]
    rows = dbm.database.iterview("surveyresponse/surveyresponse",
                                 1000,
                                 reduce=False,
                                 include_docs=False,
                                 startkey=start_key,
                                 endkey=end_key)
    es = get_elasticsearch_handle(timeout=600)

    survey_response_docs = []
    for row in rows:
        survey_response = SurveyResponseDocument._wrap_row(row)
        search_dict = _meta_fields(survey_response, dbm)
        _update_with_form_model_fields(dbm, survey_response, search_dict,
                                       form_model)
        search_dict.update({'id': survey_response.id})
        survey_response_docs.append(search_dict)

    if survey_response_docs:
        es.bulk_index(dbm.database_name, form_model.id, survey_response_docs)
Пример #25
0
import sys
from datawinners.search.index_utils import get_elasticsearch_handle

if __name__ == "__main__" and __package__ is None:
    sys.path.insert(0, ".")
from datawinners.main.couchdb.utils import all_db_names
from datawinners.main.management.commands.recreate_search_indexes import recreate_index_for_db

import logging
from migration.couch.utils import migrate, mark_as_completed


def create_search_indices_for_subjects(db_name):
    logger = logging.getLogger(db_name)
    try:
        mark_as_completed(db_name)
        logger.info('Starting indexing')
        recreate_index_for_db(db_name, es)
    except Exception as e:
        logger.exception("Failed DB: %s with message %s" %
                         (db_name, e.message))
    logger.info('Completed Indexing')


es = get_elasticsearch_handle()
migrate(all_db_names(),
        create_search_indices_for_subjects,
        version=(8, 0, 1),
        threads=1)
Пример #26
0
        logger.info('Starting indexing')
        dbm = get_db_manager(db_name)
        es = get_elasticsearch_handle(timeout=600)
        form_model = get_form_model_by_code(dbm, REGISTRATION_FORM_CODE)
        datasenders = []

        for entity in get_all_entities_include_voided(dbm,
                                                      REPORTER_ENTITY_TYPE):
            if not entity.data:
                continue
            if entity.is_void() or entity.short_code == 'test':
                datasender_dict = _create_datasender_dict(
                    dbm, entity, REPORTER, form_model)
                datasender_dict.update({'id': entity.id})
                datasenders.append(datasender_dict)
        if datasenders:
            es.bulk_index(dbm.database_name, REPORTER, datasenders)
            logger.info('Created index for datasenders with ids :' +
                        str([a.get('id') for a in datasenders]))
        logger.info('Completed Indexing')
        mark_as_successful(db_name)
    except Exception as e:
        logger.exception(e.message)


es = get_elasticsearch_handle(timeout=600)
migrate(all_db_names(),
        create_search_indices_for_deleted_datasender,
        version=(10, 0, 2),
        threads=1)
Пример #27
0
def create_submission_mapping(dbm, latest_form_model, old_form_model):
    es = get_elasticsearch_handle()
    SubmissionSearchStore(dbm, es, latest_form_model,
                          old_form_model).update_store()
Пример #28
0
 def __init__(self, dbm, latest_form_model, old_form_model):
     self.dbm = dbm
     self.es = get_elasticsearch_handle()
     self.latest_form_model = latest_form_model
     self.old_form_model = old_form_model