Exemplo n.º 1
0
def get_records_from_values(entity_name, values=None):
    """
    Fetch entity data based for the specified values in that entity
    Args:
        entity_name (str): Name of the entity for which records are to be fetched
        values (list, optional): List of str values for which the data is to be fetched
    Returns:
        dict: dictionary mapping the entity_value to a dictionary
            Sample: {
                'entity_value': {
                    'en': {
                        '_id': 'Random ES ID',
                        'value': ['Variant 1', 'Variant 2']
                    },
                    'hi': {
                        '_id': 'Random ES ID',
                        'value': ['Variant 1', 'Variant 2']
                    }
                }
    """
    datastore_obj = DataStore()
    results = datastore_obj.get_entity_data(entity_name=entity_name,
                                            values=values)

    merged_records = {}
    for result in results:
        merged_records.setdefault(result['_source']['value'], {})
        merged_records[result['_source']['value']][
            result['_source']['language_script']] = {
                '_id': result['_id'],
                'value': result['_source']['variants'],
            }
    return merged_records
Exemplo n.º 2
0
def update_dictionary(request):
    """
    This function is used to update the dictionary entities.
    Args:
        request (HttpResponse): HTTP response from url

    Returns:
        HttpResponse : HttpResponse with appropriate status and error message.
    """
    response = {"success": False, "error": "", "result": []}
    try:
        external_api_data = json.loads(request.POST.get(EXTERNAL_API_DATA))
        entity_name = external_api_data.get(ENTITY_NAME)
        entity_data = external_api_data.get(ENTITY_DATA)
        language_script = external_api_data.get(LANGUAGE_SCRIPT)
        datastore_obj = DataStore()
        datastore_obj.update_entity_data(entity_name=entity_name,
                                         entity_data=entity_data,
                                         language_script=language_script)
        response['success'] = True

    except (DataStoreSettingsImproperlyConfiguredException,
            EngineNotImplementedException,
            EngineConnectionException, FetchIndexForAliasException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)
    return HttpResponse(json.dumps(response), content_type='application/json', status=200)
Exemplo n.º 3
0
def transfer_entities(request):
    """
    This method is used to transfer entities from the source to destination.
    Args:
        request (HttpResponse): HTTP response from url
    Returns:
        HttpResponse : HttpResponse with appropriate status and error message.
    """
    response = {"success": False, "error": "", "result": []}
    try:
        external_api_data = json.loads(request.POST.get(EXTERNAL_API_DATA))
        entity_list = external_api_data.get(ENTITY_LIST)

        datastore_object = DataStore()
        datastore_object.transfer_entities_elastic_search(entity_list=entity_list)
        response['success'] = True

    except (IndexNotFoundException, InvalidESURLException,
            SourceDestinationSimilarException, InternalBackupException, AliasNotFoundException,
            PointIndexToAliasException, FetchIndexForAliasException, DeleteIndexFromAliasException,
            AliasForTransferException, IndexForTransferException, NonESEngineTransferException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    return HttpResponse(json.dumps(response), content_type='application/json', status=200)
Exemplo n.º 4
0
    def train_model_from_es_data(self):
        """
        This method is used to train the crf model by first extracting training data from ES
        for the entity and training the crf model for the same.
        """
        datastore_object = DataStore()
        ner_logger.debug('Fetch of data from ES for ENTITY: %s started' %
                         self.entity_name)
        result = datastore_object.get_crf_data_for_entity_name(
            entity_name=self.entity_name)

        sentence_list = result.get(SENTENCE_LIST, [])
        entity_list = result.get(ENTITY_LIST, [])

        if not sentence_list:
            raise ESCrfTrainingTextListNotFoundException()
        if not entity_list:
            raise ESCrfTrainingEntityListNotFoundException()

        ner_logger.debug('Fetch of data from ES for ENTITY: %s completed' %
                         self.entity_name)
        ner_logger.debug('Length of text_list %s' % str(len(sentence_list)))

        model_path = self.train_crf_model_from_list(
            entity_list=entity_list, sentence_list=sentence_list)
        return model_path
Exemplo n.º 5
0
def get_crf_training_data(request):
    """
    This function is used obtain the training data given the entity_name.
     Args:
         request (HttpResponse): HTTP response from url

     Returns:
         HttpResponse : With data consisting of a dictionary consisting of sentence_list and entity_list

     Examples:
         get request params
         key: "entity_name"
         value: "city"
    """
    response = {"success": False, "error": "", "result": []}
    try:
        entity_name = request.GET.get(ENTITY_NAME)
        datastore_obj = DataStore()
        result = datastore_obj.get_crf_data_for_entity_name(entity_name=entity_name)
        response['result'] = result
        response['success'] = True

    except (DataStoreSettingsImproperlyConfiguredException,
            EngineNotImplementedException,
            EngineConnectionException, FetchIndexForAliasException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    return HttpResponse(json.dumps(response), content_type='application/json', status=200)
Exemplo n.º 6
0
def entity_supported_languages(entity_name):
    """
    Fetch list of supported languages for the specific entity

    Args:
        entity_name (str): Name of the entity for which unique values are to be fetched
    Returns:
        list: List of language_codes
    """
    datastore_obj = DataStore()
    return datastore_obj.get_entity_supported_languages(
        entity_name=entity_name)
Exemplo n.º 7
0
def delete_records_by_values(entity_name, values):
    """
    Delete entity data based for the specified values in that entity
    Args:
        entity_name (str): Name of the entity for which records are to be fetched
        values (list): List of str values for which the data is to be fetched
    Returns:
        None
    """
    datastore_obj = DataStore()
    datastore_obj.delete_entity_data_by_values(entity_name=entity_name,
                                               values=values)
Exemplo n.º 8
0
def entity_update_languages(entity_name, new_language_list):
    """
    Updates the language support list of the entity by creating dummy records. Currently does not
    support removal of a language.
    It creates empty variant records for all the unique values present in this entity.

    Args:
        entity_name (str): Name of the entity for which unique values are to be fetched
        new_language_list (list): List of language codes for the new entity
    Returns:
        bool: Success flag if the update
    Raises:
        APIHandlerException (Exception): for any validation errors
    """
    old_language_list = entity_supported_languages(entity_name)
    languages_added = set(new_language_list) - set(old_language_list)
    languages_removed = set(old_language_list) - set(new_language_list)

    if languages_removed:
        # raise exception as it is not currently supported
        raise APIHandlerException(
            'Removing languages is not currently supported.')

    if not languages_added:
        # no change in language list. raise error
        raise APIHandlerException(
            'No new languages provided. Nothing changed.')

    # fetch all words
    # TODO: If possible add records in single ES query instead of
    #       two (get_entity_unique_values + db.add_entity_data)
    values = get_entity_unique_values(entity_name=entity_name)
    if not values:
        raise APIHandlerException(
            'This entity does not have any records. Please verify the entity name'
        )

    records_to_create = []
    for language_script in languages_added:
        # create records for all words
        for value in values:
            if value and language_script:
                records_to_create.append({
                    'value': value,
                    'language_script': language_script,
                    'variants': []
                })

    datastore_obj = DataStore()
    datastore_obj.add_entity_data(entity_name, records_to_create)

    return True
Exemplo n.º 9
0
def get_entity_word_variants(request):
    """
    This function is used obtain the entity dictionary given the dictionary name.
    Args:
        request (HttpResponse): HTTP response from url

    Returns:
        HttpResponse : With data consisting of a list of value variants.
    """
    response = {"success": False, "error": "", "result": []}
    try:
        entity_name = request.GET.get(ENTITY_NAME)
        datastore_obj = DataStore()
        result = datastore_obj.get_entity_dictionary(entity_name=entity_name)

        structured_result = []
        # The list around result.keys() is to make it compatible to python3
        key_list = list(result.keys())
        key_list.sort()
        for value in key_list:
            structured_result.append({
                'value': value,
                'variants': result[value]
            })
        result = structured_result

        response['result'] = result
        response['success'] = True

    except (DataStoreSettingsImproperlyConfiguredException,
            EngineNotImplementedException, EngineConnectionException,
            FetchIndexForAliasException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response),
                            content_type='application/json',
                            status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response),
                            content_type='application/json',
                            status=500)

    return HttpResponse(json.dumps(response),
                        content_type='application/json',
                        status=200)
Exemplo n.º 10
0
def update_crf_training_data(request):
    """
    This function is used to update the training data
     Args:
         request (HttpRequest): HTTP response from url
     Returns:
         HttpResponse : HttpResponse with appropriate status and error message.
    Example for data present in
    Post request body
    key: "external_api_data"
    value: {"sentence_list":["hello pratik","hello hardik"], "entity_list":[["pratik"], ["hardik"]],
    "entity_name":"training_try3", "language_script": "en"}
    """
    response = {"success": False, "error": "", "result": []}
    try:
        external_api_data = json.loads(request.POST.get(EXTERNAL_API_DATA))
        sentences = external_api_data.get(SENTENCES)
        entity_name = external_api_data.get(ENTITY_NAME)
        DataStore().update_entity_crf_data(entity_name=entity_name,
                                           sentences=sentences)
        response['success'] = True

    except (DataStoreSettingsImproperlyConfiguredException,
            EngineNotImplementedException,
            EngineConnectionException, FetchIndexForAliasException) as error_message:
        response['error'] = str(error_message)
        ner_logger.exception('Error: %s' % error_message)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)

    except BaseException as e:
        response['error'] = str(e)
        ner_logger.exception('Error: %s' % e)
        return HttpResponse(json.dumps(response), content_type='application/json', status=500)
    return HttpResponse(json.dumps(response), content_type='application/json', status=200)
Exemplo n.º 11
0
def update_entity_records(entity_name, data):
    """
    Update dictionary data with the edited and deleted records

    Args:
        entity_name (str): Name of the entity for which records are to be fetched
        data (dict): Dictionary of edited, deleted data. If replace flag is true, then all
            existing data is deleted before adding the records

    Returns:
        None
    """
    # Delete some records first
    records_to_delete = data.get('deleted', [])
    records_to_create = data.get('edited', [])
    replace_data = data.get('replace')

    if replace_data:
        # TODO: Delete everything for the `entity_name` without having to fetch values first!
        # https://www.elastic.co/guide/en/elasticsearch/reference/5.6/docs-delete-by-query.html
        values_to_delete = get_entity_unique_values(entity_name)
    else:
        values_to_delete = [record['word'] for record in records_to_delete]
        values_to_delete.extend(
            [record['word'] for record in records_to_create])

    value_variants_to_create = []
    for record in records_to_create:
        for language_script, variants in record.get('variants', {}).items():
            if record['word'] and language_script:
                value_variants_to_create.append({
                    'value':
                    record['word'],
                    'language_script':
                    language_script,
                    'variants':
                    variants.get('value', [])
                })

    # delete words
    delete_records_by_values(entity_name=entity_name, values=values_to_delete)

    datastore_obj = DataStore()
    datastore_obj.add_entity_data(entity_name, value_variants_to_create)
Exemplo n.º 12
0
def update_entity_records(entity_name, data):
    """
    Update dictionary data with the edited and deleted records

    Args:
        entity_name (str): Name of the entity for which records are to be fetched
        data (dict): Dictionary of edited, deleted data. If replace flag is true, then all
            existing data is deleted before adding the records

    Returns:
        None
    """
    # Delete some records first
    records_to_delete = data.get('deleted', [])
    records_to_create = data.get('edited', [])
    replace_data = data.get('replace')

    if replace_data:
        values_to_delete = get_entity_unique_values(entity_name)
    else:
        values_to_delete = [record['word'] for record in records_to_delete]
        values_to_delete.extend(
            [record['word'] for record in records_to_create])

    value_variants_to_create = []
    for record in records_to_create:
        for language_script, variants in record.get('variants', {}).items():
            if record['word'] and language_script:
                value_variants_to_create.append({
                    'value':
                    record['word'],
                    'language_script':
                    language_script,
                    'variants':
                    variants.get('value', [])
                })

    # delete words
    delete_records_by_values(entity_name=entity_name, values=values_to_delete)

    datastore_obj = DataStore()
    datastore_obj.add_entity_data(entity_name, value_variants_to_create)
Exemplo n.º 13
0
def get_entity_unique_values(entity_name,
                             empty_variants_only=False,
                             value_search_term=None,
                             variant_search_term=None):
    """
    Get a list of unique values belonging to this entity
    Args:
        entity_name (str): Name of the entity for which unique values are to be fetched
        empty_variants_only (bool, optional): Flag to search for values with empty variants only
        value_search_term (str, optional): Search term to filter values from this entity data
        variant_search_term (str, optional): Search term to filter out variants from the entity data
    Returns:
        list: List of strings which are unique values in the entity
    """
    datastore_obj = DataStore()
    return datastore_obj.get_entity_unique_values(
        entity_name=entity_name,
        value_search_term=value_search_term,
        variant_search_term=variant_search_term,
        empty_variants_only=empty_variants_only)