示例#1
0
def get_descriptive_substitutions(syn_svc, list_desc):
    all_desc_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms(
        words=list_desc, words_are_distinctive=False).data

    desc_substitution_dict = parse_dict_of_lists(
        all_desc_substitutions_synonyms)

    return desc_substitution_dict
示例#2
0
def get_distinctive_substitutions(syn_svc, list_dist):
    all_dist_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms(
        words=list_dist, words_are_distinctive=True).data

    dist_substitution_dict = parse_dict_of_lists(
        all_dist_substitutions_synonyms)

    return dist_substitution_dict
    def _set_entity_type_end_designation(self):
        syn_svc = self.synonym_service
        designation_end_list = self._designation_end_list

        all_end_designations = syn_svc.get_all_end_designations().data

        self._entity_type_end_designation = syn_svc.get_entity_type_end_designation(
            entity_end_designation_dict=parse_dict_of_lists(
                all_end_designations),
            all_designation_any_end_list=designation_end_list).data
示例#4
0
    def _set_entity_type_any_designation(self):
        syn_svc = self.synonym_service
        # entity_any_designation_dict = self._entity_any_designation_dict
        designation_any_list = self._designation_any_list

        designations = syn_svc.get_all_end_designations().data
        designations_dict = parse_dict_of_lists(designations)

        self._entity_type_any_designation = syn_svc.get_entity_type_any_designation(
            entity_any_designation_dict=designations_dict,
            all_designation_any_end_list=designation_any_list).data
    def get_substitutions_distinctive(self, w_dist):
        syn_svc = self.synonym_service

        all_dist_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms(
            words=w_dist, words_are_distinctive=True).data

        dist_substitution_dict = parse_dict_of_lists(
            all_dist_substitutions_synonyms)

        for key, value in dist_substitution_dict.items():
            if key not in value:
                value.append(key)

        return dist_substitution_dict
    def get_substitutions_descriptive(self, w_desc):
        syn_svc = self.synonym_service

        all_desc_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms(
            words=[desc.replace(" ", "") for desc in w_desc],
            words_are_distinctive=False).data

        desc_synonym_dict = parse_dict_of_lists(
            all_desc_substitutions_synonyms)

        for key, value in desc_synonym_dict.items():
            if key not in value:
                value.append(key)

        return desc_synonym_dict
示例#7
0
def get_all_substitutions(syn_svc, list_dist, list_desc, list_name):
    all_dist_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms(
        words=list_dist, words_are_distinctive=True).data

    dist_substitution_dict = parse_dict_of_lists(
        all_dist_substitutions_synonyms)

    all_desc_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms(
        words=list_desc, words_are_distinctive=False).data

    desc_substitution_dict = parse_dict_of_lists(
        all_desc_substitutions_synonyms)

    all_substitution_dict = collections.OrderedDict()
    for word in list_name:
        if word in dist_substitution_dict:
            all_substitution_dict[word] = dist_substitution_dict[word]
        elif word in desc_substitution_dict:
            all_substitution_dict[word] = desc_substitution_dict[word]

    for k, v in all_substitution_dict.items():
        all_substitution_dict[k] = [porter.stem(e.lower()) for e in v]

    return all_substitution_dict, dist_substitution_dict, desc_substitution_dict
示例#8
0
    def search_conflicts(self, list_dist_words, list_desc_words, list_name,
                         name):
        syn_svc = self.synonym_service

        result = ProcedureResult()
        result.is_valid = False
        all_matches_list = []  # Contains all the conflicts from database
        most_similar_names = []
        dict_highest_counter = {}
        dict_highest_detail = {}
        response = {}

        for w_dist, w_desc in zip(list_dist_words, list_desc_words):
            dist_substitution_list = []
            desc_synonym_list = []
            dist_all_permutations = []

            all_dist_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms(
                words=w_dist, words_are_distinctive=True).data

            dist_substitution_dict = parse_dict_of_lists(
                all_dist_substitutions_synonyms)
            dist_substitution_list = dist_substitution_dict.values()

            all_desc_substitutions_synonyms = syn_svc.get_all_substitutions_synonyms(
                words=w_desc, words_are_distinctive=False).data

            desc_synonym_dict = parse_dict_of_lists(
                all_desc_substitutions_synonyms)
            desc_synonym_list = desc_synonym_dict.values()

            # Inject distinctive section in query
            for dist in dist_substitution_list:
                criteria = Request.get_general_query()
                criteria = Request.get_query_distinctive_descriptive(
                    dist, criteria, True)
                # Inject descriptive section into query, execute and add matches to list
                for desc in desc_synonym_list:
                    matches = Request.get_query_distinctive_descriptive(
                        desc, criteria)
                    all_matches_list.extend(matches)
                    dict_highest_counter, dict_highest_detail = self.get_most_similar_names(
                        dict_highest_counter, dict_highest_detail, matches,
                        w_dist, w_desc, list_name, name)
        most_similar_names.extend(
            list({
                k
                for k, v in sorted(dict_highest_counter.items(),
                                   key=lambda item: (-item[1], len(item[0])))
                [0:MAX_MATCHES_LIMIT]
            }))

        if most_similar_names:
            response = self.prepare_response(all_matches_list,
                                             most_similar_names,
                                             dict_highest_detail)

        if response:
            result.is_valid = False
            result.result_code = AnalysisIssueCodes.CORPORATE_CONFLICT
            result.values = {
                'list_name': list_name,
                'list_dist': list_dist_words,
                'list_desc': list_desc_words,
                'list_conflicts': response['names'],
                'corp_num': response['corp_num'],
                'consumption_date': response['consumption_date']
            }
        else:
            result.is_valid = True
            result.result_code = AnalysisIssueCodes.CHECK_IS_VALID
            result.values = []
        return result