def feature_seven(text):
    words = get_words(text)
    syllables_sum = 0
    for word in words:
        syllables_sum += count_syllables(word)

    return 0.39 * len(words)/total_sentences(text) + 11.8 * syllables_sum/len(words) - 15.59
def feature_three(text):
    words = get_words(text)
    poly_syllables_words = 0
    for word in words:
        if count_syllables(word) > 1:
            poly_syllables_words += 1
    return poly_syllables_words / total_sentences(text)
def feature_three(text):
    words = get_words(text)
    poly_syllables_words = 0
    for word in words:
        if count_syllables(word) > 1:
            poly_syllables_words += 1
    return poly_syllables_words/total_sentences(text)
def feature_seven(text):
    words = get_words(text)
    syllables_sum = 0
    for word in words:
        syllables_sum += count_syllables(word)

    return 0.39 * len(words) / total_sentences(
        text) + 11.8 * syllables_sum / len(words) - 15.59
def feature_five(text):
    path_difficult_words = "/Users/Ivan/PycharmProject/ReadAbility/DataSets_raw/DaleChallEasyWordList.txt"
    words = get_words(text)
    difficult_words_sum = 0

    with open(path_difficult_words, 'r') as f:
        difficult_words = f.readlines()

    for word in words:
        if word not in difficult_words:
            difficult_words_sum += 1
    return 0.0496 * len(words)/total_sentences(text) + 0.1579 * difficult_words_sum/len(words) * 100 + 3.6365
Пример #6
0
def extract_features(data):
    root = ElementTree.fromstring(data)
    pure_text = root[0].text

    ne = extract_entities_api(data)
    tw = len(get_words(pure_text))
    ts = total_sentences(pure_text)

    feature1 = ne/tw*100
    feature2 = ne/ts*100

    return [feature1, feature2]
Пример #7
0
def extract_features(data):
    root = ElementTree.fromstring(data)
    pure_text = root[0].text

    ne = extract_entities_api(data)
    tw = len(get_words(pure_text))
    ts = total_sentences(pure_text)

    feature1 = ne / tw * 100
    feature2 = ne / ts * 100

    return [feature1, feature2]
def extract_features(data):
    extr_entities = extract_entities(data)

    ne = extr_entities.ne
    tw = len(get_words(data))
    ts = total_sentences(data)

    feature1 = ne/tw*100
    feature2 = ne/ts*100

    print str(feature1) + " " + str(feature2)
    return [feature1, feature2]
def extract_features(data):
    extr_entities = extract_entities(data)

    ne = extr_entities.ne
    tw = len(get_words(data))
    ts = total_sentences(data)

    feature1 = ne / tw * 100
    feature2 = ne / ts * 100

    print str(feature1) + " " + str(feature2)
    return [feature1, feature2]
def feature_five(text):
    path_difficult_words = "/Users/Ivan/PycharmProject/ReadAbility/DataSets_raw/DaleChallEasyWordList.txt"
    words = get_words(text)
    difficult_words_sum = 0

    with open(path_difficult_words, 'r') as f:
        difficult_words = f.readlines()

    for word in words:
        if word not in difficult_words:
            difficult_words_sum += 1
    return 0.0496 * len(words) / total_sentences(
        text) + 0.1579 * difficult_words_sum / len(words) * 100 + 3.6365
def extract_features(data, pos):

    extr_words = extract_words(data, pos)

    words = extr_words.words
    unique_words = extr_words.unique_words

    total_w = len(get_words(data))
    total_unique_w = len(np.unique(get_words(data)))
    total_s = total_sentences(data)

    feature1 = words/total_w*100
    feature2 = unique_words/total_w*100
    feature3 = unique_words/total_unique_w
    feature4 = words/total_s
    feature5 = unique_words/total_s

    return [feature1, feature2, feature3, feature4, feature5]
def extract_features(data, pos):

    extr_words = extract_words(data, pos)

    words = extr_words.words
    unique_words = extr_words.unique_words

    total_w = len(get_words(data))
    total_unique_w = len(np.unique(get_words(data)))
    total_s = total_sentences(data)

    feature1 = words / total_w * 100
    feature2 = unique_words / total_w * 100
    feature3 = unique_words / total_unique_w
    feature4 = words / total_s
    feature5 = unique_words / total_s

    return [feature1, feature2, feature3, feature4, feature5]
def extract_features(data, pos_type):
    root = ElementTree.fromstring(data)
    pure_text = root[0].text

    extr_words = extract_words(data, pos_type)

    words = extr_words.words
    unique_words = extr_words.unique_words

    total_w = len(get_words(pure_text))
    total_unique_w = len(np.unique(get_words(pure_text)))
    total_s = total_sentences(pure_text)

    feature1 = words/total_w*100
    feature2 = unique_words/total_w*100
    feature3 = unique_words/total_unique_w
    feature4 = words/total_s
    feature5 = unique_words/total_s

    return [feature1, feature2, feature3, feature4, feature5]
def extract_features(data, pos_type):
    root = ElementTree.fromstring(data)
    pure_text = root[0].text

    extr_words = extract_words(data, pos_type)

    words = extr_words.words
    unique_words = extr_words.unique_words

    total_w = len(get_words(pure_text))
    total_unique_w = len(np.unique(get_words(pure_text)))
    total_s = total_sentences(pure_text)

    feature1 = words / total_w * 100
    feature2 = unique_words / total_w * 100
    feature3 = unique_words / total_unique_w
    feature4 = words / total_s
    feature5 = unique_words / total_s

    return [feature1, feature2, feature3, feature4, feature5]
def feature_six(text):
    return len(get_words(text))/total_sentences(text)
def feature_six(text):
    return len(get_words(text)) / total_sentences(text)