Python preprocessor_text 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utils

메소드/함수: preprocessor_text

hotexamples.com에서의 예제들: 4

Python preprocessor_text - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utils.preprocessor_text에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: conll.py 프로젝트: EgorLakomkin/clearspending

def write_anotations_to_file(lst_annotation, file_name):

    with codecs.open(file_name, 'w', 'utf-8') as f:
        for annotation in lst_annotation:
            annotation_full_text = annotation.text

            car_name = preprocessor_text(annotation.name)

            annotation_start = annotation_full_text.find(car_name)
            annotation_end = annotation.start + len(car_name)

            full_text_before_annotation = preprocessor_text(annotation_full_text[:annotation_start].strip())

            before_tokens = word_tokenize(full_text_before_annotation)

            for token in before_tokens:
                f.write( token + u' ' + u'O' + u'\n' )

            annotation_tokens = word_tokenize(car_name)
            for idx, token in enumerate(annotation_tokens):
                if idx == 0:
                    label = u'B'
                else:
                    label = u'I'
                f.write( token + u' ' + label + u'\n' )

            full_text_after_annotation =  preprocessor_text(annotation_full_text[annotation_end:]).strip()

            after_tokens = word_tokenize(full_text_after_annotation)

            for token in after_tokens:
                f.write( token + u' ' + u'O' + '\n' )
            f.write( u'\n' )

예제 #2

파일 보기

def write_anotations_to_file(lst_annotation, file_name):

    with codecs.open(file_name, 'w', 'utf-8') as f:
        for annotation in lst_annotation:
            annotation_full_text = annotation.text

            car_name = preprocessor_text(annotation.name)

            annotation_start = annotation_full_text.find(car_name)
            annotation_end = annotation.start + len(car_name)

            full_text_before_annotation = preprocessor_text(
                annotation_full_text[:annotation_start].strip())

            before_tokens = word_tokenize(full_text_before_annotation)

            for token in before_tokens:
                f.write(token + u' ' + u'O' + u'\n')

            annotation_tokens = word_tokenize(car_name)
            for idx, token in enumerate(annotation_tokens):
                if idx == 0:
                    label = u'B'
                else:
                    label = u'I'
                f.write(token + u' ' + label + u'\n')

            full_text_after_annotation = preprocessor_text(
                annotation_full_text[annotation_end:]).strip()

            after_tokens = word_tokenize(full_text_after_annotation)

            for token in after_tokens:
                f.write(token + u' ' + u'O' + '\n')
            f.write(u'\n')

예제 #3

파일 보기

파일: classification.py 프로젝트: EgorLakomkin/clearspending

def predict_cars(clf, sentence):
    test_f_name = os.path.join( current_dir, './../data/test_ann' )

    sentence = preprocessor_text( sentence )
    tokens = word_tokenize(sentence)
    with codecs.open(test_f_name, 'w', 'utf-8') as f:

        for t in tokens:
            f.write(t + u' ' + u'O' + u'\n')
        f.flush()

    X, y, lengths= load_conll(test_f_name, features)
    y_pred = clf.predict(X, lengths)

    found_cars = []

    current_car = []
    found_car = False
    for idx, token in enumerate( y_pred ):
        t = str(token)
        if t == 'B':

            current_car.append(tokens[idx])
            found_car = True
        elif t == 'I':
            current_car.append(tokens[idx])
        else:
            if found_car:
                found_car = False
                found_cars.append( u' '.join(current_car) )
                current_car = []
    if len(current_car) > 0:
        found_cars.append( u' '.join(current_car) )
    return found_cars

예제 #4

파일 보기

def predict_cars(clf, sentence):
    test_f_name = os.path.join(current_dir, './../data/test_ann')

    sentence = preprocessor_text(sentence)
    tokens = word_tokenize(sentence)
    with codecs.open(test_f_name, 'w', 'utf-8') as f:

        for t in tokens:
            f.write(t + u' ' + u'O' + u'\n')
        f.flush()

    X, y, lengths = load_conll(test_f_name, features)
    y_pred = clf.predict(X, lengths)

    found_cars = []

    current_car = []
    found_car = False
    for idx, token in enumerate(y_pred):
        t = str(token)
        if t == 'B':

            current_car.append(tokens[idx])
            found_car = True
        elif t == 'I':
            current_car.append(tokens[idx])
        else:
            if found_car:
                found_car = False
                found_cars.append(u' '.join(current_car))
                current_car = []
    if len(current_car) > 0:
        found_cars.append(u' '.join(current_car))
    return found_cars