Exemplo n.º 1
0
def print_readability_metrics(text, file_name):
    print(file_name, " readability metrics")
    print("flesch reading ease: ", textstat.flesch_reading_ease(text))
    print("dale chall readability: ",
          textstat.dale_chall_readability_score(text))
    print("smog index: ", textstat.smog_index(text))
    print('------------------------------------------------')
Exemplo n.º 2
0
 def do_text_stats(self, text):
     ### Syllable Count
     syllable_count = textstat.syllable_count(text)
     ### Lexicon Count
     lexicon_count = textstat.lexicon_count(text, True)
     ### Sentence Count
     sentence_count = textstat.sentence_count(text)
     ### The Flesch Reading Ease formula
     try:
         flesch_reading_ease = textstat.flesch_reading_ease(text)
     except TypeError as e:
         flesch_reading_ease = None
     #* 90-100 : Very Easy
     #* 80-89 : Easy
     #* 70-79 : Fairly Easy
     #* 60-69 : Standard
     #* 50-59 : Fairly Difficult
     #* 30-49 : Difficult
     #* 0-29 : Very Confusing
     ### The The Flesch-Kincaid Grade Level
     try:
         flesch_kincaid_grade = textstat.flesch_kincaid_grade(text)
     except TypeError as e:
         flesch_kincaid_grade = None
     ## The Fog Scale (Gunning FOG Formula)
     gunning_fog = textstat.gunning_fog(text)
     ### The SMOG Index
     smog_index = textstat.smog_index(text)
     ### Automated Readability Index
     automated_readability_index = textstat.automated_readability_index(
         text)
     ### The Coleman-Liau Index
     try:
         coleman_liau_index = textstat.coleman_liau_index(text)
     except TypeError as e:
         coleman_liau_index = None
     ### Linsear Write Formula
     linsear_write_formula = textstat.linsear_write_formula(text)
     ### Dale-Chall Readability Score
     dale_chall_readability_score = textstat.dale_chall_readability_score(
         text)
     ### Readability Consensus based upon all the above tests
     try:
         text_standard = textstat.text_standard(text)
     except TypeError as e:
         text_standard = None
     return {
         "syllable_count": syllable_count,
         "lexicon_count": lexicon_count,
         "sentence_count": sentence_count,
         "flesch_reading_ease": flesch_reading_ease,
         "flesch_kincaid_grade": flesch_kincaid_grade,
         "gunning_fog": gunning_fog,
         "smog_index": smog_index,
         "automated_readability_index": automated_readability_index,
         "coleman_liau_index": coleman_liau_index,
         "linsear_write_formula": linsear_write_formula,
         "dale_chall_readability_score": dale_chall_readability_score,
         "text_standard": text_standard
     }
Exemplo n.º 3
0
def text_analytics(text):
    if textstat.sentence_count(text) != 0:
        lexicon = textstat.lexicon_count(text) #word count
        sent = textstat.sentence_count(text) #sentence count
        syll = textstat.syllable_count(text) #syllable count
        flesch = textstat.flesch_reading_ease(text) #flesch score
        smog = textstat.smog_index(text) #SMOG index
        fog = textstat.gunning_fog(text) #FOG index
        dale = textstat.dale_chall_readability_score(text) #grade level
        ari = textstat.automated_readability_index(text) #grade level
        cl = textstat.coleman_liau_index(text) #grade level

        flesch1 = lexicon*flesch
        flesch2 = sent*flesch
        flesch3 = syll*flesch
        smog1 = lexicon*smog
        smog2 = sent*smog
        smog3 = syll*smog
        fog1 = lexicon*fog
        fog2 = sent*fog
        fog3 = syll*fog
        dale1 = lexicon*dale
        dale2 = sent*dale
        dale3=syll*dale
        ari1 = lexicon*ari
        ari2 = sent*ari
        ari3 = syll*ari
        cl1 = lexicon*cl
        cl2 = sent*cl
        cl3 = syll*cl
        x=[lexicon,sent,syll,flesch,smog,fog,dale,ari,cl,flesch1,flesch2,flesch3,smog1,                 smog2,smog3,fog1,fog2,fog3,dale1,dale2,dale3,ari1,ari2,ari3,cl1,cl2,cl3]
    return(x)
Exemplo n.º 4
0
def get_readability(df2):
    df = df2.copy()
    text_feats = df.select_dtypes(include=['object']).columns.values
    for i, col in enumerate(text_feats):
        df['flesch_reading_ease{}'.format(i)] = df[col].apply(
            lambda x: textstat.flesch_reading_ease(x))
        df['smog_index{}'.format(i)] = df[col].apply(
            lambda x: textstat.smog_index(x))
        df['flesch_kincaid_grade{}'.format(i)] = df[col].apply(
            lambda x: textstat.flesch_kincaid_grade(x))
        df['coleman_liau_index{}'.format(i)] = df[col].apply(
            lambda x: textstat.coleman_liau_index(x))
        df['automated_readability_index{}'.format(i)] = df[col].apply(
            lambda x: textstat.automated_readability_index(x))
        df['dale_chall_readability_score{}'.format(i)] = df[col].apply(
            lambda x: textstat.dale_chall_readability_score(x))
        df['difficult_words{}'.format(i)] = df[col].apply(
            lambda x: textstat.difficult_words(x))
        df['linsear_write_formula{}'.format(i)] = df[col].apply(
            lambda x: textstat.linsear_write_formula(x))
        df['gunning_fog{}'.format(i)] = df[col].apply(
            lambda x: textstat.gunning_fog(x))
        df['text_standard{}'.format(i)] = df[col].apply(
            lambda x: textstat.text_standard(x))
    return df
Exemplo n.º 5
0
def compareContents():
	if request.method == "POST":
	    line = request.form['poem']
	    poem1 = request.form['poem1']
		#---------Metrics comparison logic goes here. keep them in session attributes-----------------------#

	    session['line'] = line	    
        #print("i am in row : ",row)
        #print "Tagline :", line
	    #print("no of words= ",len(line.split()))
	    #line1 = line.lstrip('0123456789.- ,')
	    #print "flesch_reading_ease = ",textstat.flesch_reading_ease(line)
	    fre = textstat.flesch_reading_ease(line)
	    session['fre'] = fre
	    #print "smog_index = ",textstat.smog_index(line)
	    smog = textstat.smog_index(line)
	    session['smog'] = smog
	    #print "flesch_kincaid_grade = ",textstat.flesch_kincaid_grade(line)
	    fkg = textstat.flesch_kincaid_grade(line)
	    session['fkg'] = fkg
	    #print "dale_chall_readability_score = ", textstat.dale_chall_readability_score(line)
	    dcr = textstat.dale_chall_readability_score(line)
	    session['dcr'] = dcr
	    #print "gunning_fog = ",textstat.gunning_fog(line)
	    gf = textstat.gunning_fog(line)
	    session['gf'] = gf
	    metrics = True
	    return render_template('compareContents.html',metrics=metrics, line=line, fre=fre, smog=smog, fkg=fkg, dcr=dcr,gf=gf)
	return render_template('compareContents.html')
def _get_reading_stats(no_code_text):
    """
    Returns reading level information
    :param no_code_text: String to analyse
    :return: list of details
    """
    group_by = 'Reading Level Analysis '
    results = []
    results.append(TextFeature('Flesch Reading Ease', textstat.flesch_reading_ease(no_code_text), group_by))        # higher is better, scale 0 to 100
    results.append(TextFeature('Flesch-Kincaid Grade Level', textstat.flesch_kincaid_grade(no_code_text), group_by))
    try:
        results.append(TextFeature('The Fog Scale (Gunning FOG formula)', textstat.gunning_fog(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('The Fog Scale (Gunning FOG formula)', "Undetermined", group_by))
    try:
        results.append(TextFeature('The SMOG Index', textstat.smog_index(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('The SMOG Index', "Undetermined", group_by))
    results.append(TextFeature('Automated Readability Index', textstat.automated_readability_index(no_code_text), group_by))
    results.append(TextFeature('The Coleman-Liau Index', textstat.coleman_liau_index(no_code_text), group_by))
    try:
        results.append(TextFeature('Linsear Write Formula', textstat.linsear_write_formula(no_code_text), group_by))
    except IndexError:
        results.append(TextFeature('Linsear Write Formula', "Undetermined", group_by))
    try:
        results.append(TextFeature('Dale Chall Readability Score', textstat.dale_chall_readability_score(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('Dale Chall Readability Score', "Undetermined", group_by))

    try:
        results.append(TextFeature('Readability Consensus', textstat.readability_consensus(no_code_text), group_by))
    except (TypeError, IndexError):
        results.append(TextFeature('Readability Consensus', "Undetermined; One of the tests above failed.", group_by))
    return results
Exemplo n.º 7
0
def readability(text):
    print("Readability\n=================================\n\n")
    print("Flesch Reading Ease\n________________________\n\n")
    print str(textstat.flesch_reading_ease(text)) + "\n"
    print("Smog Index\n________________________\n\n")
    print str(textstat.smog_index(text)) + "\n"
    print("Flesch Kincaid Grade\n________________________\n\n")
    print str(textstat.flesch_kincaid_grade(text)) + "\n"
    print("Coleman Liau Index\n________________________\n\n")
    print str(textstat.coleman_liau_index(text)) + "\n"
    print("ARI\n________________________\n\n")
    print str(textstat.automated_readability_index(text)) + "\n"
    print("Dale Chall\n________________________\n\n")
    print str(textstat.dale_chall_readability_score(text)) + "\n"
    print("Difficult Words\n________________________\n\n")
    print str(textstat.difficult_words(text)) + "\n"
    print("Linsear Write Formula\n________________________\n\n")
    print str(textstat.linsear_write_formula(text)) + "\n"
    print("Gunning Fog\n________________________\n\n")
    print str(textstat.gunning_fog(text)) + "\n"
    print "Compiled Score\n_____________________________\n\n"
    print str(textstat.text_standard(text)) + "\n"


    return len(adjectives)
Exemplo n.º 8
0
def calculate_statistics(lyrics):
    """
    Calculates statistics based on the text_raw of the lyrics.
    :return: Annotated lyrics containing information about the songs
    """
    logging.info("Calculating Statistics")
    from textstat.textstat import textstat
    for idx, song in tqdm(enumerate(lyrics), total=len(lyrics)):
        try:
            song["num_syllables"] = textstat.syllable_count(song["text_raw"])
            song["num_words"] = textstat.lexicon_count(song["text_raw"])
            song["num_sentences"] = textstat.sentence_count(song["text_raw"])
            song["flesch_score"] = textstat.flesch_reading_ease(
                song["text_raw"])
            song["flesch_kincaid_level"] = textstat.flesch_kincaid_grade(
                song["text_raw"])
            song["fog_score"] = textstat.gunning_fog(song["text_raw"])
            song[
                "num_difficult_words"] = textstat.dale_chall_readability_score(
                    song["text_raw"])
        except Exception as e:
            logging.error(
                "Something bad happened in the current song ! Skipping it... \n{}"
                .format(song))
            logging.exception(e)
    return lyrics
Exemplo n.º 9
0
def f():
    print("hello")
    book = xlwt.Workbook()
    worksheet = book.add_sheet('ReadabilityScore')
    worksheet.write(0, 0, "Gen_sent")
    worksheet.write(0, 1, "flesch_reading_ease")
    worksheet.write(0, 2, "flesch_kincaid_grade")
    worksheet.write(0, 3, "dale_chall_readability_score")
    worksheet.write(0, 4, "gunning_fog")

    f = open('abc.txt')  #, encoding='utf-8')
    row = 1
    for line in iter(f):
        #print("i am in row : ",row)
        #print "Tagline :", line
        worksheet.write(row, 0, line)
        #print("no of words= ",len(line.split()))
        #line1 = line.lstrip('0123456789.- ,')
        #print "flesch_reading_ease = ",textstat.flesch_reading_ease(line)
        fre = textstat.flesch_reading_ease(line)
        worksheet.write(row, 1, fre)
        #print "smog_index = ",textstat.smog_index(line)
        smog = textstat.smog_index(line)
        #print "flesch_kincaid_grade = ",textstat.flesch_kincaid_grade(line)
        fkg = textstat.flesch_kincaid_grade(line)
        worksheet.write(row, 2, fkg)
        #print "dale_chall_readability_score = ", textstat.dale_chall_readability_score(line)
        dcr = textstat.dale_chall_readability_score(line)
        worksheet.write(row, 3, dcr)
        #print "gunning_fog = ",textstat.gunning_fog(line)
        gf = textstat.gunning_fog(line)
        worksheet.write(row, 4, gf)
        row += 1
    book.save('Readability_Scores.xls')
Exemplo n.º 10
0
    def _calculate_scores(self, docs):
        docs_scores = []

        for doc in docs:
            scores = {}
            scores['chars'] = ts.char_count(doc)
            scores['words'] = ts.lexicon_count(doc)
            scores['sents'] = ts.sentence_count(doc)
            #scores['syllables'] = ts.syllable_count(doc)
            scores['avg_sent_length'] = ts.avg_sentence_length(doc)
            scores['avg_syllables_per_word'] = ts.avg_syllables_per_word(doc)
            scores['avg_letters_per_word'] = ts.avg_letter_per_word(doc)
            scores['flesch'] = ts.flesch_reading_ease(doc)
            #scores['smog'] = ts.smog_index(doc)
            #scores['coleman_liau'] = ts.coleman_liau_index(doc)
            scores['automated_readability'] = ts.automated_readability_index(
                doc)
            #scores['linsear'] = ts.linsear_write_formula(doc)
            #scores['difficult_words'] = ts.difficult_words(doc)
            scores['dale_chall'] = ts.dale_chall_readability_score(doc)
            #scores['gunning_fog'] = ts.gunning_fog(doc)
            scores['lix'] = ts.lix(doc)
            docs_scores.append(scores)

        return docs_scores
Exemplo n.º 11
0
def vecify(v):
    return [ts.flesch_reading_ease(v),
    # ts.smog_index(v),
    ts.flesch_kincaid_grade(v),
    ts.coleman_liau_index(v),
    ts.automated_readability_index(v),
    ts.dale_chall_readability_score(v),
    ts.difficult_words(v),
    ts.linsear_write_formula(v),
    ts.gunning_fog(v)]
Exemplo n.º 12
0
def all_trad_scores(text):
    fre = textstat.flesch_reading_ease(text)
    fkg = textstat.flesch_kincaid_grade(text)
    smog = textstat.smog_index(text)
    cole = textstat.coleman_liau_index(text)
    ari = textstat.automated_readability_index(text)
    dale = textstat.dale_chall_readability_score(text)
    linsear = textstat.linsear_write_formula(text)
    gunning = textstat.gunning_fog(text)

    return [fre, fkg, smog, cole, ari, dale, linsear, gunning]
Exemplo n.º 13
0
 def reading_difficulty(self):
     diff_words = textstat.difficult_words(self.text) / self.nword
     flesch_kincaid = textstat.flesch_kincaid_grade(self.text)
     coleman_liau = textstat.coleman_liau_index(self.text)
     ari = textstat.automated_readability_index(self.text)
     dale_chall = textstat.dale_chall_readability_score(self.text)
     linsear = textstat.linsear_write_formula(self.text)
     gunning_fog = textstat.gunning_fog(self.text) - 6
     smog = textstat.smog_index(self.text)
     avg_grade = max(
         math.ceil((flesch_kincaid + coleman_liau + ari + dale_chall +
                    linsear + gunning_fog + smog) / 7), 12)
     return avg_grade, diff_words
def textstat_analysis(profile_text):
    fre = textstat.flesch_reading_ease(profile_text)
    smog = textstat.smog_index(profile_text)
    fkg = textstat.flesch_kincaid_grade(profile_text)
    coleman = textstat.coleman_liau_index(profile_text)
    ari = textstat.automated_readability_index(profile_text)
    dale = textstat.dale_chall_readability_score(profile_text)
    dw = textstat.difficult_words(profile_text)
    lwf = textstat.linsear_write_formula(profile_text)
    gf = textstat.gunning_fog(profile_text)
    rc = textstat.readability_consensus(profile_text)
    word_count = textstat.lexicon_count(profile_text)
    return (fre, smog, fkg, coleman, ari, dale, dw, lwf, gf, rc, word_count)
Exemplo n.º 15
0
 def __readability_of_text(text, score="dale_chall"):
     try:
         if type(score) == str:
             if score == "dale_chall":
                 readability = ts.dale_chall_readability_score(text)
                 return [readability]
             else:
                 print('Other scores are not supported yet. You wanted: ' + score + " we have only dale_chall")
         else:
             raise ValueError
     except ValueError:
         print("the score should be of type str. You put " + str(type(score)))
         raise
Exemplo n.º 16
0
def get_readability(contents):
    readability = []
    readability.append(textstat.flesch_reading_ease(contents))
    readability.append(textstat.smog_index(contents))
    readability.append(textstat.flesch_kincaid_grade(contents))
    readability.append(textstat.automated_readability_index(contents))
    readability.append(textstat.dale_chall_readability_score(contents))
    readability.append(textstat.difficult_words(contents))
    readability.append(textstat.linsear_write_formula(contents))
    readability.append(textstat.gunning_fog(contents))
    readability.append(textstat.coleman_liau_index(contents))
    readability.append(textstat.text_standard(contents))

    return readability
def FindDifficulty(df, TgtLang, SrcLang):
    if TgtLang == 'eng':
        col = 'TgtSent'
    elif SrcLang == 'eng':
        col = 'SrcSent'
    else:
        return ('Error Message Here')

    df['Difficulty'] = ''

    df['Difficulty'] = [
        float(textstat.dale_chall_readability_score(x)) for x in df[col]
    ]

    return (df)
Exemplo n.º 18
0
    def analyze_one(self, email):
        """ Analyzes a single email and stores results. """

        sents = tstat.sentence_count(email)
        self.sent_count.append(sents if sents > 0 else 1)

        if email and len(email) > 0:
            self.flesch_kincaid_grade.append(tstat.flesch_kincaid_grade(email))
            self.automated_readability_index.append(
                tstat.automated_readability_index(email))
            self.coleman_liau_index.append(tstat.coleman_liau_index(email))
            self.linsear_write_formula.append(
                tstat.linsear_write_formula(email))
            self.dale_chall_readability_score.append(
                tstat.dale_chall_readability_score(email))
Exemplo n.º 19
0
 def readability_of_text(self, score="dale_chall"):
     try:
         if type(score) == str:
             if score == "dale_chall":
                 self.readability = ts.dale_chall_readability_score(
                     self.string)
                 print(self.readability)
             else:
                 print('Other scores are not supported yet. You wanted: ' +
                       score + " we have only dale_chall")
         else:
             raise ValueError
     except ValueError:
         print("the score shuld be of type str. You put " +
               str(type(score)))
         raise
Exemplo n.º 20
0
def main() :
  for arg in sys.argv[1:]:
    with open(arg) as f:
      text = f.read()

    with open(arg + '.readability.snip','w') as f:
       f.write ("syllable_count : %s\n" % textstat.syllable_count(text))
       f.write ("lexicon_count : %s\n" % textstat.lexicon_count(text))
       f.write ("sentence_count : %s\n" % textstat.sentence_count(text))
       f.write ("difficult_words : %s\n" % textstat.difficult_words(text))
       f.write ("flesch_reading_ease : %s\n" % textstat.flesch_reading_ease(text))
       f.write ("flesch_kincaid_grade : %s\n" % textstat.flesch_kincaid_grade(text))
       f.write ("smog_index : %s\n" % textstat.smog_index(text))
       f.write ("automated_readability_index : %s\n" % textstat.automated_readability_index(text))
       f.write ("coleman_liau_index : %s\n" % textstat.coleman_liau_index(text))
       f.write ("linsear_write_formula : %s\n" % textstat.linsear_write_formula(text))
       f.write ("dale_chall_readability_score : %s\n" % textstat.dale_chall_readability_score(text))
    def analyse_readbility(self, issue):
        """TODO: Docstring for analyse_readbility.

        :issue: TODO
        :returns: TODO

        """
        # Não realiza análise para uma issue sem 'body'
        # if not issue.body:
        #    message = ' - [ ] To improve the readability of the text.\n'
        #    return (None, message)

        gfm = GithubMarkdown(issue.body)
        str_markdown = gfm.parse(issue.body)
        str_text = self.markdown_to_text(str_markdown)
        dic_test_readbility = dict()

        if not issue.body:
            message = (" - [ ] To improve the text in issue body.\n")
            dic_test_readbility['flesch'] = -1
            dic_test_readbility['ari'] = 100
            dic_test_readbility['dale-chall'] = 100
            return (dic_test_readbility, message)

        # Analisando a métrica Flesch Reading Ease Score
        score_flesch = textstat.flesch_reading_ease(str_text)
        dic_test_readbility['flesch'] = score_flesch

        # Analisando com o teste Automated Readability Index (ARI)
        score_ari = textstat.automated_readability_index(str_text)
        dic_test_readbility['ari'] = score_ari

        # Analisando com o teste Dale-Chall Readbility Score
        score_dale_chal = textstat.dale_chall_readability_score(str_text)
        dic_test_readbility['dale-chall'] = score_dale_chal

        if not issue.body:
            message = (" - [ ] To improve the text in issue body.\n")
            return (dic_test_readbility, message)

        if self._has_low_readbility(dic_test_readbility):
            message = ' - [ ] To improve the readability of the text.\n'
        else:
            message = None

        return (dic_test_readbility, message)
Exemplo n.º 22
0
def scores_cal_ori(text):

              char_count_value=textstat.char_count(text,ignore_spaces=True)
              lexicon_count_value=textstat.lexicon_count(text,removepunct=True)
              syllable_count_value=textstat.syllable_count(text)
              sentence_count_value=textstat.sentence_count(text)
              avg_sentence_length_value=textstat.avg_sentence_length(text)
              avg_syllables_per_word_value=textstat.avg_syllables_per_word(text)
              avg_letter_per_word_value=textstat.avg_letter_per_word(text)
              avg_sentence_per_word_value=textstat.avg_sentence_per_word(text)
              flesch_kincaid_grade_value=textstat.flesch_kincaid_grade(text)
              smog_index_value=textstat.smog_index(text)
              gunning_fog_value=textstat.gunning_fog(text)
              difficult_words_value=textstat.difficult_words(text)
              dale_chall_value=textstat.dale_chall_readability_score(text)
              polysyllab_value=textstat.polysyllabcount(text)
              return char_count_value,lexicon_count_value,syllable_count_value,sentence_count_value,avg_sentence_length_value,avg_syllables_per_word_value,avg_letter_per_word_value,avg_sentence_per_word_value,flesch_kincaid_grade_value,smog_index_value,gunning_fog_value,difficult_words_value,dale_chall_value,polysyllab_value
              return smog_index_value
def calculate2FormulaFromFile(inputFile, isTEI=1):
    inputData = extractText.extractTextTEI(inputFile, isTEI)
    inputData = re.sub('_', ' ', inputData)
    # r1 = textstat.flesch_kincaid_grade(inputData)
    # r2 = textstat.dale_chall_readability_score(inputData)
    # import pdb; pdb.set_trace()
    try:
        r1 = textstat.flesch_kincaid_grade(inputData)
    except:
        print('ERROR: cannot calculate flesch_kincaid_grade for ', inputFile)
        r1 = -1
    try:
        r2 = textstat.dale_chall_readability_score(inputData)
    except:
        print('ERROR: cannot calculate dale_chall_readability_score for ', inputFile)
        r2 = -1
    print('processing file', inputFile, 'complete')
    return (inputFile, r1, r2)
Exemplo n.º 24
0
def run_textstat(text):
    #text = """Playing games has always been thought to be important to the development of well-balanced and creative children; however, what part, if any, they should play in the lives of adults has never been researched that deeply. I believe that playing games is every bit as important for adults as for children. Not only is taking time out to play games with our children and other adults valuable to building interpersonal relationships but is also a wonderful way to release built up tension."""

    ts_flesch_reading_ease = textstat.flesch_reading_ease(text)
    ts_smog_index = textstat.smog_index(text)
    ts_flesch_kincaid_grade = textstat.flesch_kincaid_grade(text)
    ts_coleman_liau_index = textstat.coleman_liau_index(text)
    ts_automated_readability_index = textstat.automated_readability_index(text)
    ts_dale_chall_readability_score = textstat.dale_chall_readability_score(
        text)
    ts_difficult_words = textstat.difficult_words(text)
    ts_linsear_write_formula = textstat.linsear_write_formula(text)
    ts_gunning_fog = textstat.gunning_fog(text)
    ts_text_standard = textstat.text_standard(text)

    return (ts_flesch_reading_ease, ts_smog_index, ts_flesch_kincaid_grade,
            ts_coleman_liau_index, ts_automated_readability_index,
            ts_dale_chall_readability_score, ts_difficult_words,
            ts_linsear_write_formula, ts_gunning_fog, ts_text_standard)
Exemplo n.º 25
0
def calculate2FormulaFromFile(inputFile, isTEI=1):
    inputData = extractText.extractTextTEI(inputFile, isTEI)
    inputData = re.sub('_', ' ', inputData)
    # r1 = textstat.flesch_kincaid_grade(inputData)
    # r2 = textstat.dale_chall_readability_score(inputData)
    # import pdb; pdb.set_trace()
    try:
        r1 = textstat.flesch_kincaid_grade(inputData)
    except:
        print('ERROR: cannot calculate flesch_kincaid_grade for ', inputFile)
        r1 = -1
    try:
        r2 = textstat.dale_chall_readability_score(inputData)
    except:
        print('ERROR: cannot calculate dale_chall_readability_score for ',
              inputFile)
        r2 = -1
    print('processing file', inputFile, 'complete')
    return (inputFile, r1, r2)
def dale_chall_readability_score(text):

    score = textstat.dale_chall_readability_score(text)
    level = 0
    if 0 < score < 4.9:
        level = 1
    elif 5 <= score < 5.9:
        level = 2
    elif 6 <= score < 6.9:
        level = 3
    elif 7 <= score < 7.9:
        level = 4
    elif 8 <= score < 8.9:
        level = 5
    elif 9 <= score < 9.9:
        level = 6
    elif 10 <= score:
        level = 7

    return level
Exemplo n.º 27
0
def score_statements(filename=DEFAULT_FILENAME, loglevel=logging.INFO, database=DB_PATH):
    sia = SentimentIntensityAnalyzer()
    for i, statement in enumerate(Statement.objects.iterator()):
        s = sia.polarity_scores(statement.text)
        score = Score(positive=s['pos'], negative=s['neg'], neutral=s['neu'], compound=s['compound'],
                      intensity=abs(s['compound']))
        words = statement.text.split()
        if len(words) and any(words):
            superficial_measures = getmeasures(words)
            score.flesch = superficial_measures['readability grades']['FleschReadingEase']
            score.kincaid = superficial_measures['readability grades']['Kincaid']
            score.dale_chall = textstat.dale_chall_readability_score(statement.text)
        else:
            score.flesch = 0
            score.kincaid = 0
            score.dale_chall = 0
        score.save()
        statement.score = score
        statement.save()
        print(statement.score)
    return i
Exemplo n.º 28
0
def lambda_handler(event, context):

    text = event['text']

    response = {}
    response['flesch_reading_ease'] = textstat.flesch_reading_ease(text)
    response['smog_index'] = textstat.smog_index(text)
    response['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(text)
    response['coleman_liau_index'] = textstat.coleman_liau_index(text)
    response[
        'automated_readability_index'] = textstat.automated_readability_index(
            text)
    response[
        'dale_chall_readability_score'] = textstat.dale_chall_readability_score(
            text)
    response['difficult_words'] = textstat.difficult_words(text)
    response['linsear_write_formula'] = textstat.linsear_write_formula(text)
    response['gunning_fog'] = textstat.gunning_fog(text)
    response['text_standard'] = textstat.text_standard(text)

    return respond(None, response)
Exemplo n.º 29
0
    def get_feat_readability_metrics(self):
        # https://github.com/shivam5992/textstat

        try:
            test_data = self.webscrap.get_body()
            out = []
            out.append(textstat.flesch_reading_ease(test_data))
            out.append(textstat.smog_index(test_data))
            out.append(textstat.flesch_kincaid_grade(test_data))
            out.append(textstat.coleman_liau_index(test_data))
            out.append(textstat.automated_readability_index(test_data))
            out.append(textstat.dale_chall_readability_score(test_data))
            out.append(textstat.difficult_words(test_data))
            out.append(textstat.linsear_write_formula(test_data))
            out.append(textstat.gunning_fog(test_data))
            #out.append(textstat.text_standard(test_data))
            return out, False

        except Exception as e:
            config.logger.error(repr(e))
            return MISSING_FEATURE * 9, True
Exemplo n.º 30
0
def feature_readability(essay):
    syllable_count = textstat.syllable_count(essay)
    #音节数统计
    flesch_reading_ease = textstat.flesch_reading_ease(essay)
    #文档的易读性0-100之间的分数
    smog_index = textstat.smog_index(essay)
    #烟雾指数,反映文档的易读程度,更精确,更容易计算
    flesch_kincaid_index = textstat.flesch_kincaid_grade(essay)
    #等级分数,年级等级
    coleman_liau_index = textstat.coleman_liau_index(essay)
    #返回文本的年级级别
    automated_readability_index = textstat.automated_readability_index(essay)
    #自动可读性指数,接近理解文本需要的年级
    dale_chall_readability_score = textstat.dale_chall_readability_score(essay)
    #返回年级级别,使用最常见的英文单词
    difficult_words = textstat.difficult_words(essay)

    linsear_write_formula = textstat.linsear_write_formula(essay)
    #返回文本的年级级别
    gunning_fog = textstat.gunning_fog(essay)
    #迷雾指数, 反映文本的阅读难度
    return syllable_count, flesch_reading_ease, smog_index, flesch_kincaid_index, coleman_liau_index, automated_readability_index, dale_chall_readability_score, difficult_words, linsear_write_formula, gunning_fog
Exemplo n.º 31
0
def analyseText():
    values = request.get_json()
    required = [ 'inputText' ]
    if not all(k in values for k in required):
        return 'Missing values', 400

    text = values['inputText']
    result = {
        'syllable_count': textstat.syllable_count(text),
        'lexicon_count': textstat.lexicon_count(text),
        'sentence_count': textstat.sentence_count(text),
        'flesch_reading_ease': textstat.flesch_reading_ease(text),
        'flesch_kincaid_grade': textstat.flesch_kincaid_grade(text),
        'gunning_fog': textstat.gunning_fog(text),
        'smog_index': textstat.smog_index(text),
        'automated_readability_index': textstat.automated_readability_index(text),
        'coleman_liau_index': textstat.coleman_liau_index(text),
        'linsear_write_formula': textstat.linsear_write_formula(text),
        'dale_chall_readability_score': textstat.dale_chall_readability_score(text)
    };

    return jsonify(result), 200
Exemplo n.º 32
0
def calculate_readability_measures(id):
    """ Count the words in doc and update the document. """
    es = elasticsearch.Elasticsearch()
    source = es.get_source(index='beek', doc_type='page', id=id)
    # count = len(source['content'].split())
    try:
        measures = {
            'flesch':
            textstat.flesch_reading_ease(source['content']),
            'smog':
            textstat.smog_index(source['content']),
            'flesch_kincaid':
            textstat.flesch_kincaid_grade(source['content']),
            'coleman_liau':
            textstat.coleman_liau_index(source['content']),
            'readability':
            textstat.automated_readability_index(source['content']),
            'dale_chall':
            textstat.dale_chall_readability_score(source['content']),
            'difficult_words':
            textstat.difficult_words(source['content']),
            'linsear_write_formula':
            textstat.linsear_write_formula(source['content']),
            'gunning_fog':
            textstat.gunning_fog(source['content']),
            'consensus':
            textstat.readability_consensus(source['content']),
        }

        es.update(index='beek',
                  doc_type='page',
                  id=id,
                  body={'doc': {
                      'measures': measures
                  }},
                  refresh=True)
    except Exception as err:
        pass
Exemplo n.º 33
0
def calculate_readability_measures(id):
    """ Count the words in doc and update the document. """
    es = elasticsearch.Elasticsearch()
    source = es.get_source(index='beek', doc_type='page', id=id)
    # count = len(source['content'].split())
    try:
        measures = {
            'flesch': textstat.flesch_reading_ease(source['content']),
            'smog': textstat.smog_index(source['content']),
            'flesch_kincaid': textstat.flesch_kincaid_grade(source['content']),
            'coleman_liau': textstat.coleman_liau_index(source['content']),
            'readability': textstat.automated_readability_index(source['content']),
            'dale_chall': textstat.dale_chall_readability_score(source['content']),
            'difficult_words': textstat.difficult_words(source['content']),
            'linsear_write_formula': textstat.linsear_write_formula(source['content']),
            'gunning_fog': textstat.gunning_fog(source['content']),
            'consensus': textstat.readability_consensus(source['content']),
        }

        es.update(index='beek', doc_type='page', id=id,
                  body={'doc': {'measures': measures}}, refresh=True)
    except Exception as err:
        pass
Exemplo n.º 34
0
def process(data):
    res = np.array([])
    cleaned = data.lower().strip()
    original = data.strip()
    fea1 = numOfWords(cleaned)
    # fea1 = fea1 / 10
    fea2 = numOfChar(cleaned)
    # fea2 = fea2 / 100
    fea3 = count(cleaned, string.punctuation)
    fea5 = numOfContUpperCase(original)
    fea4 = textstat.gunning_fog(data)
    fea6 = textstat.automated_readability_index(data)
    fea7 = textstat.linsear_write_formula(data)
    fea8 = textstat.difficult_words(data)
    fea9 = textstat.dale_chall_readability_score(data)
    fea10 = data.count("\'") + data.count(".") + data.count("\"") + data.count(",") + data.count(
        "’") + data.count("‘") + data.count("”") + data.count("“")
    fea10 = (fea10 / len(data)) * 1000
    fea11 = data.count("1") + data.count("2") + data.count("3") + data.count("4") + data.count(
        "5") + data.count("6") + data.count("7") + data.count("8") + data.count("9") + data.count("0")
    fea12 = data.count("?") + data.count("!") + data.count("@") + data.count("#") + data.count(
        "$") + data.count("%") + data.count("&")
    fea13 = data.count(":") + data.count(";")
    fea14 = data.count("—") + data.count("-") + data.count("_")
    fea15 = (fea10 / len(data)) * 100
    fea16 = data.count("(") + data.count(")") + data.count("[") + data.count("]") + data.count(
        "{") + data.count("}")

    fea17 = data.count("*") + data.count("/")
    fea18 = data.count("?")
    fea19 = fea10 + fea11 + fea12 + fea13 + fea14 + fea15 + fea16 + fea17 + fea18
    res = np.array([[fea1, fea2, fea3, fea5, fea4, fea6, fea7, fea8, fea9, fea10, fea11, fea12, fea13, fea14,
                     fea15, fea16, fea17, fea18, fea19]])


    return res
Exemplo n.º 35
0
    def get_readability(self, corpus, type='ari'):
        readability = None
        if type == 'ari':
            readability = textstat.automated_readability_index(corpus)
        elif type == 'flesch':
            readability = textstat.flesch_reading_ease(corpus)
        elif type == 'smog':
            readability = textstat.smog_index(corpus)
        elif type == 'flesch_kinciad':
            readability = textstat.flesch_kincaid_grade(corpus)
        elif type == 'coleman':
            readability = textstat.coleman_liau_index(corpus)
        elif type == 'dale_chall':
            readability = textstat.dale_chall_readability_score(corpus)
        elif type == 'difficult_words':
            readability = textstat.difficult_words(corpus)
        elif type == 'linsear':
            readability = textstat.linsear_write_formula(corpus)
        elif type == 'gunning_fog':
            readability = textstat.gunning_fog(corpus)
        elif type == 'readability_conensus':
            readability = textstat.readability_consensus(corpus)

        return readability
Exemplo n.º 36
0
 def stats(self, text):
     test_data = text
     stats = {}
     stats['flesch_reading_ease'] = textstat.flesch_reading_ease(test_data)
     stats['smog'] = textstat.smog_index(test_data)
     stats['flesch kincaid'] = textstat.flesch_kincaid_grade(test_data)
     stats['coleman Liau'] = textstat.coleman_liau_index(test_data)
     stats['automated'] = textstat.automated_readability_index(test_data)
     stats['dale chall'] = textstat.dale_chall_readability_score(test_data)
     stats['difficult'] = textstat.difficult_words(test_data)
     stats['linsear'] = textstat.linsear_write_formula(test_data)
     stats['gunning_fog'] = textstat.gunning_fog(test_data)
     stats['standard'] = textstat.text_standard(test_data)
     stats['charcount'] = textstat.char_count(test_data)
     stats['lexicon count'] = textstat.lexicon_count(test_data)
     stats['syllable count'] = textstat.syllable_count(test_data)
     stats['sentence count'] = textstat.sentence_count(test_data)
     stats['avg sentence length'] = textstat.avg_sentence_length(test_data)
     stats['avg_syllables_per_word'] = textstat.avg_syllables_per_word(
         test_data)
     stats['avg_letter_per_word'] = textstat.avg_letter_per_word(test_data)
     stats['avg_sentence_per_word'] = textstat.avg_sentence_per_word(
         test_data)
     return stats
Exemplo n.º 37
0
    def updateData(self):

        # Full list of polarity scores
        self.polscore = self.sid.polarity_scores(self.text)

        ##### INDEX 0 IN DATA: Text Sentiment #####
        # [INDEX 0] Compounded score (0.0 - 1.0)            [INDEX 1] Negative connotation rating (0.0 - 1.0),
        # [INDEX 2] Positive connotation rating (0.0 - 1.0) [INDEX 3] Neutral connotation rating (0.0 - 1.0)
        self.data.append([
            self.polscore['compound'], self.polscore['neg'],
            self.polscore['pos'], self.polscore['neu']
        ])

        ##### INDEX 1 IN DATA: Sentence Info #####
        # [INDEX 0] Sentence count          [INDEX 1] Average sentence length
        # [INDEX 2] Syllable count          [INDEX 3] Overall word count
        # [INDEX 4] Character count         [INDEX 5] Character count without spaces
        # [INDEX 6] Avg letters per word    [INDEX 7] Avg syllables per word
        self.data.append([
            textstat.sentence_count(self.text),
            textstat.avg_sentence_length(self.text),
            textstat.syllable_count(self.text),
            len(self.splList),
            textstat.char_count(self.text, False),
            textstat.char_count(self.text, True),
            textstat.avg_letter_per_word(self.text),
            textstat.avg_syllables_per_word(self.text)
        ])

        ##### INDEX 2 IN DATA: Flesch Reading Ease #####
        # [INDEX 0] Pure score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 100
        self.freRaw = textstat.flesch_reading_ease(self.text)
        self.freStat = min(max(self.freRaw, 0), 100)
        self.data.append([
            round(self.freStat, 3),
            self.freGrade(self.freStat),
            round(abs(self.freStat - 100), 2)
        ])

        ##### INDEX 3 IN DATA: Flesch-Kincaid Grade #####
        # [INDEX 0] Pure score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 18
        self.fkgRaw = textstat.flesch_kincaid_grade(self.text)
        self.fkgStat = self.adjustScore(self.fkgRaw)
        self.data.append([
            round(self.fkgStat, 3),
            self.grade(self.fkgStat),
            round(self.fkgStat / 0.18, 2)
        ])

        ##### INDEX 4 IN DATA: Gunning FOG Index #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 18
        self.fogRaw = textstat.gunning_fog(self.text)
        self.fogStat = self.adjustScore(self.fogRaw)
        self.data.append([
            round(self.fogStat, 3),
            self.grade(self.fogStat),
            round(self.fogStat / 0.18, 2)
        ])

        ##### INDEX 5 IN DATA: SMOG Index #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 18
        self.smogRaw = textstat.smog_index(self.text)
        self.smogStat = self.adjustScore(self.smogRaw)
        self.data.append([
            round(self.smogStat, 3),
            self.grade(self.smogStat),
            round(self.smogStat / 0.18, 2)
        ])

        ##### INDEX 6 IN DATA: Automated Readability Index #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 14
        self.ariRaw = textstat.automated_readability_index(self.text)
        self.ariStat = min(max(self.ariRaw, 0), 14)
        self.data.append([
            round(self.ariStat, 3),
            self.ariGrade(ceil(self.ariStat)),
            round(self.ariStat / 0.14, 2)
        ])  #13

        ##### INDEX 7 IN DATA: Coleman-Liau Index #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 18
        self.cliRaw = textstat.coleman_liau_index(self.text)
        self.cliStat = self.adjustScore(self.cliRaw)
        self.data.append([
            round(self.cliStat, 3),
            self.grade(self.cliStat),
            round(self.cliStat / 0.18, 2)
        ])

        ##### INDEX 8 IN DATA: Linsear Write Index #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 18
        self.lwiRaw = textstat.linsear_write_formula(self.text)
        self.lwiStat = self.adjustScore(self.lwiRaw)
        self.data.append([
            round(self.lwiStat, 3),
            self.grade(self.lwiStat),
            round(self.lwiStat / 0.18, 2)
        ])

        ##### INDEX 9 IN DATA: Dale-Chall Readability Score #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 10
        self.dcrRaw = textstat.dale_chall_readability_score(self.text)
        self.dcrStat = min(max(self.dcrRaw, 0), 10)
        self.data.append([
            round(self.dcrStat, 3),
            self.daleChallGrade(self.dcrStat),
            round(self.dcrStat / 0.1, 2)
        ])

        ##### INDEX 10 IN DATA: Overall Score #####
        # [INDEX 0] Pure Score              [INDEX 1] Approximate grade     [INDEX 2] Normalized (ratio) score
        # SCORE SCALE: 0 - 20
        self.txtRaw = textstat.text_standard(self.text, True)
        self.txtStd = min(max(self.txtRaw, 0), 20)
        self.txtInfo = textstat.text_standard(self.text)
        self.data.append([
            round(self.txtStd, 3),
            self.txtGrade(self.txtStd, self.txtInfo),
            round(self.txtStd / 0.2, 2)
        ])

        return self.data
Exemplo n.º 38
0
#! /usr/bin/env python

from textstat.textstat import textstat
import re

raw_input("Please copy the lyrics to the two text files song1 and song 2. \nWhen complete hit enter to analyze.")
print ""

try:
    f = open('song1.txt')
    f_read = str(f.read())
    cleaned = re.sub("[\(\[].*?[\)\]]", "", f_read)
    if textstat.dale_chall_readability_score(cleaned) < 5:
        print "Song #1 | Dale Chall Score: " + str(textstat.dale_chall_readability_score(cleaned))
        print "Song #1 | " + "Easily understood by 4th-grade students or lower."
        f.close()
    elif textstat.dale_chall_readability_score(cleaned) < 6:
        print "Song #1 | Dale-Chall Score: " + str(textstat.dale_chall_readability_score(cleaned))
        print "Song #1 | # of Difficult Words: " + str(textstat.difficult_words(cleaned))
        print "Song #1 | " + "Easily understood by 5th-grade and 6th-grade students."
        f.close()
    elif textstat.dale_chall_readability_score(cleaned) < 7:
        print "Song #1 | Dale-Chall Score: " + str(textstat.dale_chall_readability_score(cleaned))
        print "Song #1 | # of Difficult Words: " + str(textstat.difficult_words(cleaned))
        print "Song #1 | " + "Easily understood by 7th-grade and 8th-grade students."
        f.close()
    elif textstat.dale_chall_readability_score(cleaned) < 8:
        print "Song #1 | Dale-Chall Score: " + str(textstat.dale_chall_readability_score(cleaned))
        print "Song #1 | # of Difficult Words: " + str(textstat.difficult_words(cleaned))
        print "Song #1 | " + "Easily understood by 9th-grade and 10th-grade students."
        f.close()
Exemplo n.º 39
0
			smog_index_grades.append(smog_index_grade)
			smog_index_total_grade += smog_index_grade

			ar_index_grade = textstat.automated_readability_index(tweet)	
			ar_index_grades.append(ar_index_grade)
			ar_index_total_grade += ar_index_grade
			
			cl_index_grade = textstat.coleman_liau_index(tweet)	
			cl_index_grades.append(cl_index_grade)
			cl_index_total_grade += cl_index_grade				

			lwf_grade = textstat.linsear_write_formula(tweet)	
			lwf_grades.append(lwf_grade)
			lwf_total_grade += lwf_grade

			dcr_grade = textstat.dale_chall_readability_score(tweet)	
			dcr_grades.append(dcr_grade)
			dcr_total_grade += dcr_grade

			num_tweets += 1



	#avg grades
	avg_flesch_kincaid_grade = flesch_kincaid_total_grade / num_tweets
	avg_gunning_fog_grade = gunning_fog_total_grade / num_tweets
	avg_smog_index_grade = smog_index_total_grade / num_tweets
	avg_ar_index_grade = ar_index_total_grade / num_tweets
	avg_cl_index_grade = cl_index_total_grade / num_tweets
	avg_lwf_grade = lwf_total_grade / num_tweets		
	avg_dcr_grade = dcr_total_grade / num_tweets		
Exemplo n.º 40
0
#main script
if __name__ == '__main__':

	print "TextStat Comparison Script"
	print "--------------------------"
	
	#read in text from the command line
	#This needs to be fixed to deal/escape special characters
	textToCheck = raw_input("Please enter the text you would like to analyse: ") 
	
	#read in text from a file- but what format?
	
	print "\n\n"
	print "Results"
	print "=============================================="
	print "==============================================\n"
	
	print "Syllable Count: " + str(textstat.syllable_count(textToCheck))
	print "Lexicon Count: " + str(textstat.lexicon_count(textToCheck)) #TRUE is default and removes punctuation before counting
	print "Sentence Count: " + str(textstat.sentence_count(textToCheck))
	print "Flesch Reading Ease formula: " + str(textstat.flesch_reading_ease(textToCheck))
	print "Flesch-Kincaid Grade Level: " + str(textstat.flesch_kincaid_grade(textToCheck))
	print "Fog Scale (Gunning FOG Formula): " + str(textstat.gunning_fog(textToCheck))
	print "SMOG Index: " + str(textstat.smog_index(textToCheck))
	print "Automated Readability Index: " + str(textstat.automated_readability_index(textToCheck))
	print "Coleman-Liau Index: " + str(textstat.coleman_liau_index(textToCheck))
	print "Linsear Write Formula: " + str(textstat.linsear_write_formula(textToCheck))
	print "Dale-Chall Readability Score: " + str(textstat.dale_chall_readability_score(textToCheck))
	print "--------------------------------------------------------------"
	print "Readability Consensus based upon all the above tests: " + str(textstat.text_standard(textToCheck))
	print "\n\n"
Exemplo n.º 41
0
#!/bin/python

import sys, string, os
from textstat.textstat import textstat

inputfile = ''
test_data = ""

script_name = sys.argv[0]
inputfile = sys.argv[1]

with open(inputfile) as myfile:
	test_data="".join(line.rstrip() for line in myfile)

var1 = str(textstat.flesch_reading_ease(test_data))
var2 = str(textstat.smog_index(test_data))
var3 = str(textstat.flesch_kincaid_grade(test_data))
var4 = str(textstat.coleman_liau_index(test_data))
var5 = str(textstat.automated_readability_index(test_data))
var6 = str(textstat.dale_chall_readability_score(test_data))
var7 = str(textstat.difficult_words(test_data))
var8 = str(textstat.linsear_write_formula(test_data))
var9 = str(textstat.gunning_fog(test_data))
var10 = str(textstat.readability_consensus(test_data))
var11 = str(textstat.syllable_count(test_data))
var12 = str(textstat.lexicon_count(test_data, 1))
var13 = str(textstat.sentence_count(test_data))

print(var1 + ',' + var2 + ',' + var3 + ',' + var4 + ',' + var5 + ',' + var6 + ',' + var7 + ',' + var8 + ',' + var9 + ',' + var10 + ',' + var11 + ',' + var12 + ',' + var13)