Python match_wordnik_rules示例

编程语言: Python

命名空间/包名称: serapis.features

方法/功能: match_wordnik_rules

hotexamples.com的示例: 6

Python match_wordnik_rules - 已找到6个示例。这些是从开源项目中提取的最受好评的serapis.features.match_wordnik_rules现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： test_features.py 项目： wordnik/serapis

def test_wordnik_patterns_match():
    from serapis.features import match_wordnik_rules

    with open("serapis/tests/data/patterns.yaml") as f:
        test_cases = yaml.load(f)
    for rule, sentence in test_cases.items():
        assert rule in match_wordnik_rules(sentence), "Rule {} does not match '{}'".format(rule, sentence)

示例#2

显示文件

文件： tasks.py 项目： clarecorthell/serapis

def detect(message):
    """Takes a message that must contain a list of URL objects, each having
    at least a doc property. This will split the doc of each URL into
    sentences, and  determine whether each sentence is an FRD or not.
    """
    batch_tag_sentences(message)

    # Load Models
    model_pipeline = PackagedPipeline().get()
    created_at = model_pipeline.metadata['created_at']

    feature_union = model_pipeline._feature_union
    model = model_pipeline._pipeline
    class_idx = np.where(model.classes_ == 1)[0][0]  # index of '1' pred in .predict_proba

    for url_object in message['urls']:
        readability_score(url_object)
        for sentence in url_object['sentences']:
            sentence_clean = sentence['s_clean']
            pos = ' '.join([i[i.find('/') + 1:] for i in sentence['pos_tags'].split()])  # just pos tags
            
            sentence_feature_union = feature_union.transform({
                's_clean': [sentence['s_clean']],
                'pos': [pos]
            })

            # metadata
            sentence['model_creation_date'] = created_at
            
            # predictions from model
            sentence['patterns'] = match_wordnik_rules(sentence_clean)
            sentence['frd'] = model.predict(sentence_feature_union)[0]
            sentence['frd_likelihood'] = round(model.predict_proba(sentence_feature_union)[0][class_idx], 4)  # P(Classification as FRD)

    return write_message('save', message)

示例#3

显示文件

文件： test_features.py 项目： harendranathvegi9/serapis

def test_wordnik_patterns_match():
    from serapis.features import match_wordnik_rules
    with open("serapis/tests/data/patterns.yaml") as f:
        test_cases = yaml.load(f)
    for rule, sentence in test_cases.items():
        assert rule in match_wordnik_rules(
            sentence), "Rule {} does not match '{}'".format(rule, sentence)

示例#4

显示文件

文件： test_features.py 项目： harendranathvegi9/serapis

def test_wordnik_patterns_perc():
    from serapis.features import match_wordnik_rules
    from serapis.preprocess import clean_sentence
    min_coverage = 0.2
    matches = 0.0
    with open("serapis/tests/data/frds_wordnik.csv") as f:
        test_cases = list(csv.reader(f))
    for term, sentence in test_cases:
        s_clean, _ = clean_sentence(sentence, term)
        matches += 1 if match_wordnik_rules(s_clean) else 0
    assert matches / len(
        test_cases) > min_coverage, "Only matched {:.2f}% of data set".format(
            100 * matches / len(test_cases))

示例#5

显示文件

文件： test_features.py 项目： wordnik/serapis

def test_wordnik_patterns_perc():
    from serapis.features import match_wordnik_rules
    from serapis.preprocess import clean_sentence

    min_coverage = 0.2
    matches = 0.0
    with open("serapis/tests/data/frds_wordnik.csv") as f:
        test_cases = list(csv.reader(f))
    for term, sentence in test_cases:
        s_clean, _ = clean_sentence(sentence, term)
        matches += 1 if match_wordnik_rules(s_clean) else 0
    assert matches / len(test_cases) > min_coverage, "Only matched {:.2f}% of data set".format(
        100 * matches / len(test_cases)
    )

示例#6

显示文件

文件： tasks.py 项目： harendranathvegi9/serapis

def detect(message):
    """Takes a message that must contain a list of URL objects, each having
    at least a doc property. This will split the doc of each URL into
    sentences, and  determine whether each sentence is an FRD or not.
    """
    batch_tag_sentences(message)

    # Load Models
    model_pipeline = PackagedPipeline().get()
    created_at = model_pipeline.metadata['created_at']

    feature_union = model_pipeline._feature_union
    model = model_pipeline._pipeline
    class_idx = np.where(
        model.classes_ == 1)[0][0]  # index of '1' pred in .predict_proba

    for url_object in message['urls']:
        readability_score(url_object)
        for sentence in url_object['sentences']:
            sentence_clean = sentence['s_clean']
            pos = ' '.join([
                i[i.find('/') + 1:] for i in sentence['pos_tags'].split()
            ])  # just pos tags

            sentence_feature_union = feature_union.transform({
                's_clean': [sentence['s_clean']],
                'pos': [pos]
            })

            # metadata
            sentence['model_creation_date'] = created_at

            # predictions from model
            sentence['patterns'] = match_wordnik_rules(sentence_clean)
            sentence['frd'] = model.predict(sentence_feature_union)[0]
            sentence['frd_likelihood'] = round(
                model.predict_proba(sentence_feature_union)[0][class_idx],
                4)  # P(Classification as FRD)

    return write_message('save', message)