def hist_number_of_words(nfvs): total = len(nfvs) nwords = [x['note_words'] for x in nfvs.values()] stats = s(nwords) cap.hist2( nwords, breaks=[ 0, 1, 5, 10, 20, 30, 40, 50, 100, 200, 500, 1000, 2000, 5000, 10000 ], filename="/var/www/listit-study/n-words.png", xlab="number of words", title= "words per note [df:%d] (min:0, max:%g, mean:%g, median:%g, var:%g)" % (total - 1, stats[1], stats[2], stats[3], stats[4]), ylab="Notes (out of %d)" % total) cap.loghist( nwords, breaks=[ 0, 1, 5, 10, 20, 30, 40, 50, 100, 200, 500, 1000, 2000, 5000, 10000 ], filename="/var/www/listit-study/n-logwords.png", xlab="number of words", title= "words per note [df:%d] (min:0, max:%g, mean:%g, median:%g, var:%g)" % (total - 1, stats[1], stats[2], stats[3], stats[4]), ylab="Notes (out of %d)" % total)
def hist_edit_distance(nfs): total = len(nfs) ndist = [n["edit_distance"] for n in nfs.values()] stats = s(ndist) cap.loghist( ndist, breaks=[0,1,5,10,20,30,40,50,100,200,500,1000,10000], filename="/var/www/listit-study/edit-distances.png", xlab="edit distance (chars)", title="edit distance[%d] (min:0, max:%g, mean:%g, median:%g, var:%g)" % (total-1,stats[1],stats[2],stats[3],stats[4]), ylab="edits (out of %d)" % total )
def hist_edit_distance(nfs): total = len(nfs) ndist = [n["edit_distance"] for n in nfs.values()] stats = s(ndist) cap.loghist( ndist, breaks=[0, 1, 5, 10, 20, 30, 40, 50, 100, 200, 500, 1000, 10000], filename="/var/www/listit-study/edit-distances.png", xlab="edit distance (chars)", title="edit distance[%d] (min:0, max:%g, mean:%g, median:%g, var:%g)" % (total - 1, stats[1], stats[2], stats[3], stats[4]), ylab="edits (out of %d)" % total)
def hist_capped_number_of_words(nfvs,cap=300): total = len(nfvs) nwords = [x['note_words'] for x in nfvs.values() if x['note_words'] < cap] stats = s(nwords) cap.hist2( nwords, breaks=[0,1,5,10,20,30,40,50,100,200,300], filename="/var/www/listit-study/n-words.png", xlab="number of words", title="words per note [df:%d] (min:0, max:%g, mean:%g, median:%g, var:%g)" % (total-1,stats[1],stats[2],stats[3],stats[4]), ylab="Notes (out of %d)" % total ) cap.loghist( nwords, breaks=[0,1,5,10,20,30,40,50,100,200,500,1000,2000,5000,10000], filename="/var/www/listit-study/n-logwords.png", xlab="number of words", title="words per note [df:%d] (min:0, max:%g, mean:%g, median:%g, var:%g)" % (total-1,stats[1],stats[2],stats[3],stats[4]), ylab="Notes (out of %d)" % total )