def feature_seven(text): words = get_words(text) syllables_sum = 0 for word in words: syllables_sum += count_syllables(word) return 0.39 * len(words)/total_sentences(text) + 11.8 * syllables_sum/len(words) - 15.59
def feature_three(text): words = get_words(text) poly_syllables_words = 0 for word in words: if count_syllables(word) > 1: poly_syllables_words += 1 return poly_syllables_words / total_sentences(text)
def feature_three(text): words = get_words(text) poly_syllables_words = 0 for word in words: if count_syllables(word) > 1: poly_syllables_words += 1 return poly_syllables_words/total_sentences(text)
def feature_seven(text): words = get_words(text) syllables_sum = 0 for word in words: syllables_sum += count_syllables(word) return 0.39 * len(words) / total_sentences( text) + 11.8 * syllables_sum / len(words) - 15.59
def feature_five(text): path_difficult_words = "/Users/Ivan/PycharmProject/ReadAbility/DataSets_raw/DaleChallEasyWordList.txt" words = get_words(text) difficult_words_sum = 0 with open(path_difficult_words, 'r') as f: difficult_words = f.readlines() for word in words: if word not in difficult_words: difficult_words_sum += 1 return 0.0496 * len(words)/total_sentences(text) + 0.1579 * difficult_words_sum/len(words) * 100 + 3.6365
def extract_features(data): root = ElementTree.fromstring(data) pure_text = root[0].text ne = extract_entities_api(data) tw = len(get_words(pure_text)) ts = total_sentences(pure_text) feature1 = ne/tw*100 feature2 = ne/ts*100 return [feature1, feature2]
def extract_features(data): root = ElementTree.fromstring(data) pure_text = root[0].text ne = extract_entities_api(data) tw = len(get_words(pure_text)) ts = total_sentences(pure_text) feature1 = ne / tw * 100 feature2 = ne / ts * 100 return [feature1, feature2]
def extract_features(data): extr_entities = extract_entities(data) ne = extr_entities.ne tw = len(get_words(data)) ts = total_sentences(data) feature1 = ne/tw*100 feature2 = ne/ts*100 print str(feature1) + " " + str(feature2) return [feature1, feature2]
def extract_features(data): extr_entities = extract_entities(data) ne = extr_entities.ne tw = len(get_words(data)) ts = total_sentences(data) feature1 = ne / tw * 100 feature2 = ne / ts * 100 print str(feature1) + " " + str(feature2) return [feature1, feature2]
def feature_five(text): path_difficult_words = "/Users/Ivan/PycharmProject/ReadAbility/DataSets_raw/DaleChallEasyWordList.txt" words = get_words(text) difficult_words_sum = 0 with open(path_difficult_words, 'r') as f: difficult_words = f.readlines() for word in words: if word not in difficult_words: difficult_words_sum += 1 return 0.0496 * len(words) / total_sentences( text) + 0.1579 * difficult_words_sum / len(words) * 100 + 3.6365
def extract_features(data, pos): extr_words = extract_words(data, pos) words = extr_words.words unique_words = extr_words.unique_words total_w = len(get_words(data)) total_unique_w = len(np.unique(get_words(data))) total_s = total_sentences(data) feature1 = words/total_w*100 feature2 = unique_words/total_w*100 feature3 = unique_words/total_unique_w feature4 = words/total_s feature5 = unique_words/total_s return [feature1, feature2, feature3, feature4, feature5]
def extract_features(data, pos): extr_words = extract_words(data, pos) words = extr_words.words unique_words = extr_words.unique_words total_w = len(get_words(data)) total_unique_w = len(np.unique(get_words(data))) total_s = total_sentences(data) feature1 = words / total_w * 100 feature2 = unique_words / total_w * 100 feature3 = unique_words / total_unique_w feature4 = words / total_s feature5 = unique_words / total_s return [feature1, feature2, feature3, feature4, feature5]
def extract_features(data, pos_type): root = ElementTree.fromstring(data) pure_text = root[0].text extr_words = extract_words(data, pos_type) words = extr_words.words unique_words = extr_words.unique_words total_w = len(get_words(pure_text)) total_unique_w = len(np.unique(get_words(pure_text))) total_s = total_sentences(pure_text) feature1 = words/total_w*100 feature2 = unique_words/total_w*100 feature3 = unique_words/total_unique_w feature4 = words/total_s feature5 = unique_words/total_s return [feature1, feature2, feature3, feature4, feature5]
def extract_features(data, pos_type): root = ElementTree.fromstring(data) pure_text = root[0].text extr_words = extract_words(data, pos_type) words = extr_words.words unique_words = extr_words.unique_words total_w = len(get_words(pure_text)) total_unique_w = len(np.unique(get_words(pure_text))) total_s = total_sentences(pure_text) feature1 = words / total_w * 100 feature2 = unique_words / total_w * 100 feature3 = unique_words / total_unique_w feature4 = words / total_s feature5 = unique_words / total_s return [feature1, feature2, feature3, feature4, feature5]
def feature_six(text): return len(get_words(text))/total_sentences(text)
def feature_six(text): return len(get_words(text)) / total_sentences(text)