Python stdize_case примеры использования

Язык программирования: Python

Пространство имен/Пакет: psCleanup

Метод/Функция: stdize_case

Примеров на hotexamples.com: 12

Python stdize_case - 12 примеров найдено. Это лучшие примеры Python кода для psCleanup.stdize_case, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: pandas_query_clean_par.py Проект: ChaoOnGitHub/psClean

 def name_clean_wrapper(name_list, clean_regex=name_address_regex, legal_regex=legal_regex):
      name_string = psCleanup.decoder(name_list)
      name_string = psCleanup.remove_diacritics(name_string)
      name_string = psCleanup.stdize_case(name_string)
      name_string = psCleanup.master_clean_regex(name_string, clean_regex)
      names_ids = psCleanup.get_legal_ids(name_string, legal_regex)
      return names_ids

Пример #2

Показать файл

Файл: generate_cleaned_names_par.py Проект: ChaoOnGitHub/psClean

def clean_wrapper(name_string, dict_list=regex_dicts):
    print name_string
    out = psCleanup.rem_diacritics(name_string)
    out = psCleanup.stdize_case(out)
    out = psCleanup.master_clean_regex([out], dict_list)
    out = out[0].strip()
    return(out)

Пример #3

Показать файл

Файл: generate_cleaned_names_par.py Проект: zhengpingwan/psClean

def clean_wrapper(name_string, dict_list=regex_dicts):
    print name_string
    out = psCleanup.rem_diacritics(name_string)
    out = psCleanup.stdize_case(out)
    out = psCleanup.master_clean_regex([out], dict_list)
    out = out[0].strip()
    return (out)

Пример #4

Показать файл

 def address_clean_wrapper(address_list, clean_regex=name_address_regex):
     address_string = psCleanup.decoder(address_list)
     address_string = psCleanup.remove_diacritics(address_string)
     address_string = psCleanup.stdize_case(address_string)
     address_string = psCleanup.master_clean_regex(address_string,
                                                   clean_regex)
     return address_string

Пример #5

Показать файл

Файл: clean_raw_text.py Проект: ChaoOnGitHub/psClean

def clean_wrapper(name_dict, dict_list=regex_dicts):
    name_string = psCleanup.decoder(name_dict["person_name"])
    out = psCleanup.rem_diacritics(name_string)
    out = psCleanup.stdize_case(out)
    out = psCleanup.master_clean_regex([out], dict_list)
    out = out[0].strip()
    name_dict["person_name"] = psCleanup.encoder(out)
    return name_dict

Пример #6

Показать файл

def clean_wrapper(name_dict, dict_list=regex_dicts):
    name_string = psCleanup.decoder(name_dict['person_name'])
    out = psCleanup.rem_diacritics(name_string)
    out = psCleanup.stdize_case(out)
    out = psCleanup.master_clean_regex([out], dict_list)
    out = out[0].strip()
    name_dict['person_name'] = psCleanup.encoder(out)
    return (name_dict)

Пример #7

Показать файл

 def name_clean_wrapper(name_list,
                        clean_regex=name_address_regex,
                        legal_regex=legal_regex):
     name_string = psCleanup.decoder(name_list)
     name_string = psCleanup.remove_diacritics(name_string)
     name_string = psCleanup.stdize_case(name_string)
     name_string = psCleanup.master_clean_regex(name_string, clean_regex)
     names_ids = psCleanup.get_legal_ids(name_string, legal_regex)
     return names_ids

Пример #8

Показать файл

Файл: test_clean_disambig_workflow_3gram.py Проект: ChaoOnGitHub/psClean

def translate_non_alphanumerics(to_translate, translate_to=u' '):
    not_letters_or_digits = unicode(string.punctuation)
    translate_table = dict((ord(char), translate_to)
                           for char in not_letters_or_digits)
    return to_translate.translate(translate_table)

def strip_punc(s):
    s_out = s.translate(string.maketrans("",""), string.punctuation)
    return s_out
#Function names below are not exact
N = len(names)
t0 = time.time()
clean_names = [psCleanup.rem_diacritics(n) for n in names]
clean_names = [psCleanup.rem_trail_spaces(n) for n in clean_names]
clean_names = [psCleanup.stdize_case(n) for n in clean_names]
clean_names = [translate_non_alphanumerics(n) for n in clean_names]
clean_names = psCleanup.master_clean_dicts(clean_names, all_dicts)
clean_names = [n.strip() for n in clean_names]
t1 = time.time()

### Works out to ~ 0.05s / entry
clean_time = t1 - t0
print clean_time / N

## Then pre-cluster by the leading 3 characters of the name
t0 = time.time()
leading_ngram_dict = psDisambig.build_leading_ngram_dict(clean_names, leading_n=3)
t1 = time.time()

leading_ngram_time = t1 - t0

Пример #9

Показать файл

Файл: test_blocking_strategies.py Проект: zhengpingwan/psClean

    translate_table = dict(
        (ord(char), translate_to) for char in not_letters_or_digits)
    return to_translate.translate(translate_table)


def strip_punc(s):
    s_out = s.translate(string.maketrans("", ""), string.punctuation)
    return s_out


#Function names below are not exact
N = len(names)
t0 = time.time()
clean_names = [psCleanup.rem_diacritics(n) for n in names]
clean_names = [psCleanup.rem_trail_spaces(n) for n in clean_names]
clean_names = [psCleanup.stdize_case(n) for n in clean_names]
clean_names = [translate_non_alphanumerics(n) for n in clean_names]
clean_names = psCleanup.master_clean_dicts(clean_names, all_dicts)
clean_names = [re.sub(' ', '', n) for n in clean_names]
t1 = time.time()

## Define some blocking functions


def block_by_2_ngrams(name_string, ngram_length=2):
    block_dict = {}
    for name in name_string:
        these_ngrams = set([
            ''.join(name[j] for j in range(i, i + ngram_length))
            for i in range((len(name) - ngram_length))
        ])

Пример #10

Показать файл

Файл: generate_cleaned_names_par.py Проект: ChaoOnGitHub/psClean

def clean_list_wrapper(name_list, dict_list=all_dicts):
    out = [psCleanup.rem_diacritics(n) for n in name_list]
    out = [psCleanup.stdize_case(n) for n in out]
    out = psCleanup.master_clean_dicts(out, all_dicts)
    out = [n.strip() for n in out]
    return(out)

Пример #11

Показать файл

Файл: pandas_query_clean_par.py Проект: ChaoOnGitHub/psClean

 def address_clean_wrapper(address_list, clean_regex=name_address_regex):
      address_string = psCleanup.decoder(address_list)
      address_string = psCleanup.remove_diacritics(address_string)
      address_string = psCleanup.stdize_case(address_string)
      address_string = psCleanup.master_clean_regex(address_string, clean_regex)
      return address_string

Пример #12

Показать файл

Файл: generate_cleaned_names_par.py Проект: zhengpingwan/psClean

def clean_list_wrapper(name_list, dict_list=all_dicts):
    out = [psCleanup.rem_diacritics(n) for n in name_list]
    out = [psCleanup.stdize_case(n) for n in out]
    out = psCleanup.master_clean_dicts(out, all_dicts)
    out = [n.strip() for n in out]
    return (out)