def _get_reading_stats(no_code_text): """ Returns reading level information :param no_code_text: String to analyse :return: list of details """ group_by = 'Reading Level Analysis ' results = [] results.append(TextFeature('Flesch Reading Ease', textstat.flesch_reading_ease(no_code_text), group_by)) # higher is better, scale 0 to 100 results.append(TextFeature('Flesch-Kincaid Grade Level', textstat.flesch_kincaid_grade(no_code_text), group_by)) try: results.append(TextFeature('The Fog Scale (Gunning FOG formula)', textstat.gunning_fog(no_code_text), group_by)) except IndexError: # Not sure why, but this test throws this error sometimes results.append(TextFeature('The Fog Scale (Gunning FOG formula)', "Undetermined", group_by)) try: results.append(TextFeature('The SMOG Index', textstat.smog_index(no_code_text), group_by)) except IndexError: # Not sure why, but this test throws this error sometimes results.append(TextFeature('The SMOG Index', "Undetermined", group_by)) results.append(TextFeature('Automated Readability Index', textstat.automated_readability_index(no_code_text), group_by)) results.append(TextFeature('The Coleman-Liau Index', textstat.coleman_liau_index(no_code_text), group_by)) try: results.append(TextFeature('Linsear Write Formula', textstat.linsear_write_formula(no_code_text), group_by)) except IndexError: results.append(TextFeature('Linsear Write Formula', "Undetermined", group_by)) try: results.append(TextFeature('Dale Chall Readability Score', textstat.dale_chall_readability_score(no_code_text), group_by)) except IndexError: # Not sure why, but this test throws this error sometimes results.append(TextFeature('Dale Chall Readability Score', "Undetermined", group_by)) try: results.append(TextFeature('Readability Consensus', textstat.readability_consensus(no_code_text), group_by)) except (TypeError, IndexError): results.append(TextFeature('Readability Consensus', "Undetermined; One of the tests above failed.", group_by)) return results
def textstat_analysis(profile_text): fre = textstat.flesch_reading_ease(profile_text) smog = textstat.smog_index(profile_text) fkg = textstat.flesch_kincaid_grade(profile_text) coleman = textstat.coleman_liau_index(profile_text) ari = textstat.automated_readability_index(profile_text) dale = textstat.dale_chall_readability_score(profile_text) dw = textstat.difficult_words(profile_text) lwf = textstat.linsear_write_formula(profile_text) gf = textstat.gunning_fog(profile_text) rc = textstat.readability_consensus(profile_text) word_count = textstat.lexicon_count(profile_text) return (fre, smog, fkg, coleman, ari, dale, dw, lwf, gf, rc, word_count)
def calculate_readability_measures(id): """ Count the words in doc and update the document. """ es = elasticsearch.Elasticsearch() source = es.get_source(index='beek', doc_type='page', id=id) # count = len(source['content'].split()) try: measures = { 'flesch': textstat.flesch_reading_ease(source['content']), 'smog': textstat.smog_index(source['content']), 'flesch_kincaid': textstat.flesch_kincaid_grade(source['content']), 'coleman_liau': textstat.coleman_liau_index(source['content']), 'readability': textstat.automated_readability_index(source['content']), 'dale_chall': textstat.dale_chall_readability_score(source['content']), 'difficult_words': textstat.difficult_words(source['content']), 'linsear_write_formula': textstat.linsear_write_formula(source['content']), 'gunning_fog': textstat.gunning_fog(source['content']), 'consensus': textstat.readability_consensus(source['content']), } es.update(index='beek', doc_type='page', id=id, body={'doc': { 'measures': measures }}, refresh=True) except Exception as err: pass
def calculate_readability_measures(id): """ Count the words in doc and update the document. """ es = elasticsearch.Elasticsearch() source = es.get_source(index='beek', doc_type='page', id=id) # count = len(source['content'].split()) try: measures = { 'flesch': textstat.flesch_reading_ease(source['content']), 'smog': textstat.smog_index(source['content']), 'flesch_kincaid': textstat.flesch_kincaid_grade(source['content']), 'coleman_liau': textstat.coleman_liau_index(source['content']), 'readability': textstat.automated_readability_index(source['content']), 'dale_chall': textstat.dale_chall_readability_score(source['content']), 'difficult_words': textstat.difficult_words(source['content']), 'linsear_write_formula': textstat.linsear_write_formula(source['content']), 'gunning_fog': textstat.gunning_fog(source['content']), 'consensus': textstat.readability_consensus(source['content']), } es.update(index='beek', doc_type='page', id=id, body={'doc': {'measures': measures}}, refresh=True) except Exception as err: pass
def get_readability(self, corpus, type='ari'): readability = None if type == 'ari': readability = textstat.automated_readability_index(corpus) elif type == 'flesch': readability = textstat.flesch_reading_ease(corpus) elif type == 'smog': readability = textstat.smog_index(corpus) elif type == 'flesch_kinciad': readability = textstat.flesch_kincaid_grade(corpus) elif type == 'coleman': readability = textstat.coleman_liau_index(corpus) elif type == 'dale_chall': readability = textstat.dale_chall_readability_score(corpus) elif type == 'difficult_words': readability = textstat.difficult_words(corpus) elif type == 'linsear': readability = textstat.linsear_write_formula(corpus) elif type == 'gunning_fog': readability = textstat.gunning_fog(corpus) elif type == 'readability_conensus': readability = textstat.readability_consensus(corpus) return readability
#!/bin/python import sys, string, os from textstat.textstat import textstat inputfile = '' test_data = "" script_name = sys.argv[0] inputfile = sys.argv[1] with open(inputfile) as myfile: test_data="".join(line.rstrip() for line in myfile) var1 = str(textstat.flesch_reading_ease(test_data)) var2 = str(textstat.smog_index(test_data)) var3 = str(textstat.flesch_kincaid_grade(test_data)) var4 = str(textstat.coleman_liau_index(test_data)) var5 = str(textstat.automated_readability_index(test_data)) var6 = str(textstat.dale_chall_readability_score(test_data)) var7 = str(textstat.difficult_words(test_data)) var8 = str(textstat.linsear_write_formula(test_data)) var9 = str(textstat.gunning_fog(test_data)) var10 = str(textstat.readability_consensus(test_data)) var11 = str(textstat.syllable_count(test_data)) var12 = str(textstat.lexicon_count(test_data, 1)) var13 = str(textstat.sentence_count(test_data)) print(var1 + ',' + var2 + ',' + var3 + ',' + var4 + ',' + var5 + ',' + var6 + ',' + var7 + ',' + var8 + ',' + var9 + ',' + var10 + ',' + var11 + ',' + var12 + ',' + var13)
def _get_reading_stats(no_code_text): """ Returns reading level information :param no_code_text: String to analyse :return: list of details """ group_by = 'Reading Level Analysis ' results = [] results.append( TextFeature('Flesch Reading Ease', textstat.flesch_reading_ease(no_code_text), group_by)) # higher is better, scale 0 to 100 results.append( TextFeature('Flesch-Kincaid Grade Level', textstat.flesch_kincaid_grade(no_code_text), group_by)) try: results.append( TextFeature('The Fog Scale (Gunning FOG formula)', textstat.gunning_fog(no_code_text), group_by)) except IndexError: # Not sure why, but this test throws this error sometimes results.append( TextFeature('The Fog Scale (Gunning FOG formula)', "Undetermined", group_by)) try: results.append( TextFeature('The SMOG Index', textstat.smog_index(no_code_text), group_by)) except IndexError: # Not sure why, but this test throws this error sometimes results.append(TextFeature('The SMOG Index', "Undetermined", group_by)) results.append( TextFeature('Automated Readability Index', textstat.automated_readability_index(no_code_text), group_by)) results.append( TextFeature('The Coleman-Liau Index', textstat.coleman_liau_index(no_code_text), group_by)) try: results.append( TextFeature('Linsear Write Formula', textstat.linsear_write_formula(no_code_text), group_by)) except IndexError: results.append( TextFeature('Linsear Write Formula', "Undetermined", group_by)) try: results.append( TextFeature('Dale Chall Readability Score', textstat.dale_chall_readability_score(no_code_text), group_by)) except IndexError: # Not sure why, but this test throws this error sometimes results.append( TextFeature('Dale Chall Readability Score', "Undetermined", group_by)) try: results.append( TextFeature('Readability Consensus', textstat.readability_consensus(no_code_text), group_by)) except (TypeError, IndexError): results.append( TextFeature('Readability Consensus', "Undetermined; One of the tests above failed.", group_by)) return results
#ui=[] print("grammer for the essay's") for index in range(len(df)): p=df.essay[index] p1=nltk.word_tokenize(p.lower()) p2=nltk.pos_tag(p1) counts=Counter(tag for p1,tag in p2) print(counts) total = sum(counts.values()) print(dict((word, float(count)/total) for word,count in counts.items())) print("") print("readability/complexity") for index in range(len(df)): r=df.essay[index] print(textstat.syllable_count(r)) print(textstat.readability_consensus(r)) print("") #print(textstat.flesch_reading_ease(r)) #print(textstat.flesch_kincaid_grade(r)) """for index in range(len(df)): r=df.essay[index] for words in r.split(): words1 = [w1 for w1 in words if not w1 in stopwords.words("english")] print(words1)""" #Example print("normalizing values") ranger = interp1d([1,512],[1,10]) print(ranger(256))