def run_FKGL(output_dir): with open(output_dir) as f: output = f.readlines() output = [d.lower().strip() for d in output] output_final = " ".join(output) rd = Readability(output_final) score = rd.FleschKincaidGradeLevel() return score
def show_stat(text): rd = Readability(text) print 'Test text:' print '"%s"\n' % text print 'ARI: ', rd.ARI() print 'FleschReadingEase: ', rd.FleschReadingEase() print 'FleschKincaidGradeLevel: ', rd.FleschKincaidGradeLevel() print 'GunningFogIndex: ', rd.GunningFogIndex() print 'SMOGIndex: ', rd.SMOGIndex() print 'ColemanLiauIndex: ', rd.ColemanLiauIndex() print 'LIX: ', rd.LIX() print 'RIX: ', rd.RIX()
def get_read_stats(text): read = {} # readability stats rd = Readability(text) read['ari'] = rd.ARI() read['flesch_reading_ease'] = rd.FleschReadingEase() read['flesch_kincaid_grade_level'] = rd.FleschKincaidGradeLevel() read['gunning_fog_index'] = rd.GunningFogIndex() read['smog_index'] = rd.SMOGIndex() read['coleman_liau_index'] = rd.ColemanLiauIndex() read['lix'] = rd.LIX() read['rix'] = rd.RIX() return read
def readability(id): r = {} text = getDocContent(id) #print text rd = Readability(text) r["ARI"] = rd.ARI() r["FleschReadingEase"] = rd.FleschReadingEase() r["FleschKincaidGradeLevel"] = rd.FleschKincaidGradeLevel() r["RIX"] = rd.RIX() r["GunningFogIndex"] = rd.GunningFogIndex() r["SMOGIndex"] = rd.SMOGIndex() r["ColemanLiauIndex"] = rd.ColemanLiauIndex() r["LIX"] = rd.LIX() return r
def getReadability(): authorFileNames = os.listdir(directory) texts = [] authors = [] truth = {} quote = [] sents = [] for file in authorFileNames: if file.endswith(".xml"): te = gettext(file) te.encode('ascii', 'ignore') texts.append(te) authors.append(file[:-4]) else: fgh = open(directory + "/" + file, 'r') fg = fgh.read().split('\n')[:-1] for r in fg: df = r.split(':::')[1:] truth[r.split(':::')[0]] = df fgh.close() f = open('PANreadibility.csv', 'w') f.write( 'ID,Gender,Age,ARI,FleschReadingEase,FleschKincaidGradeLevel,GunningFogIndex,SMOGIndex,ColemanLiauIndex,LIX,RIX\n' ) for i in range(len(authors)): sf = texts[i] rd = Readability(sf.encode('ascii', 'ignore')) f.write(authors[i] + ',' + truth[authors[i]][0] + ',' + truth[authors[i]][1] + ',' + str(rd.ARI()) + ',' + str(rd.FleschReadingEase()) + ',' + str(rd.FleschKincaidGradeLevel()) + ',' + str(rd.GunningFogIndex()) + ',' + str(rd.SMOGIndex()) + ',' + str(rd.ColemanLiauIndex()) + ',' + str(rd.LIX()) + ',' + str(rd.RIX()) + '\n') f.close()
def readability(self, text): rd = Readability(text) fkg_score = rd.FleschKincaidGradeLevel() SMOG = rd.SMOGIndex() return fkg_score, SMOG
# Create a frequency distribution for the text text = nltk.Text(tokens) fdist = nltk.FreqDist(text) # Calculate the type-token ratio vocab_richness = len(set(tokens)) / len(tokens) out_file.write(str(vocab_richness) + '\n') # Calculate average word length: avg_word_len = fdist.N() / len(fdist) out_file.write(str(avg_word_len) + '\n') # Compute Readability rd = Readability(raw) out_file.write(str(rd.FleschKincaidGradeLevel()) + '\n') # Calculate the distribution of parts-of-speech tagged_text = nltk.pos_tag(text) tag_fd = nltk.FreqDist(tag for (word, tag) in tagged_text) for tag in pos_tags: out_file.write(str(tag_fd[tag]) + '\n') # Calculate the frequency of the 50 most frequenct function words stopwords = nltk.corpus.stopwords.words('english') txt_stopwords = [w for w in tokens if w in stopwords] functionWrd_freq = nltk.FreqDist(txt_stopwords) for func_word in func_words: out_file.write(str(functionWrd_freq[func_word]) + '\n')
# encoding: utf-8 # -*- coding: utf-8 -* import sys reload(sys) sys.setdefaultencoding('utf8') from readability import Readability #file = open("C:\\Users\\Administrator\\Desktop\\myfolder\\sea-and-adventures\\the-old-man-and-the-sea.txt") file = open( "C:\\Users\\Administrator\\Desktop\\myfolder\\corpora\\An-Inquiry-into-the-Nature-and-Causes-of-the-Wealth-of-Nations.txt" ) text = file.read() rd = Readability(text) print 'ARI: ', rd.ARI() print 'FleschReadingEase: ', rd.FleschReadingEase() print 'FleschKincaidGradeLevel: ', rd.FleschKincaidGradeLevel() print 'GunningFogIndex: ', rd.GunningFogIndex() print 'SMOGIndex: ', rd.SMOGIndex() print 'ColemanLiauIndex: ', rd.ColemanLiauIndex() print 'LIX: ', rd.LIX() print 'RIX: ', rd.RIX()
#!/usr/bin/python from readability import Readability import sys if __name__ == '__main__': infile = sys.argv[1] text = open(infile).read() rd = Readability(text) print(rd.FleschKincaidGradeLevel())
def get_text_flesch_grade_score(inp_text): rd = Readability(inp_text.strip()) return rd.FleschKincaidGradeLevel()