def plotAvgHist(column, field, bins=40): global subplot subplot +=1 plt.subplot(subplot) plt.title( '%s: \nmean=%s std=%s ' % (field, round(mean(column), 3), round(std(column), 3)), fontsize=11 ) plt.axhspan(0, 2000, color='none') plt.hist( column, bins=bins, range=(0,1)) print '%s: mean=%s std=%s items_under_0.3=%s' % (field, round(mean(column), 3), round(std(column), 3), sum([1 for c in column if c<0.3]) ) JCR = JamendoCsvReader('stats_reviewAVGs.csv') subplot = 220 columns = JCR.getColumns(['reviews_avgnote', 'avg_agreed_note', 'weighted_avg_note', 'weighted_avg_agreed_note', 'weighted_avg_agreed_note2', 'reviews_all'], \ filterfunc=filterfieldsunder(['reviews_all'],4)) plt.figure(figsize=(12,8)) for key in ['avg_agreed_note', 'weighted_avg_note', 'weighted_avg_agreed_note', 'weighted_avg_agreed_note2']: plotAvgHist(columns[key], key) plt.figure() plt.title( '%s: \nmean=%s std=%s ' % ('reviews_avgnote', round(mean(columns['reviews_avgnote']), 3), \
from sys import path path.append('../') from core.JamendoCsvReader import JamendoCsvReader import matplotlib.pyplot as plt from numpy import array JCR = JamendoCsvReader('stats_album_total.csv') #AGGREGATE COLUMNS TO MAKE LINEAR OPERATION LIKE SUM FBcols = JCR.getColumns(['fb_likesharecomment', 'starred']) total_likes = FBcols['fb_likesharecomment'] + FBcols['starred'] plt.figure() plt.plot(total_likes, 'b-', linewidth=1) plt.title('fb total (sum of likes,comments and shares) + starred') #COMPARE CURVE WITHOUT USING compareJoinedColumnsPlotting (you have more freedom to directly set any parameters...) JC = JCR.getColumns(['reviews_avgnote', 'weighted_avg_agreed_note'], sortkey='reviews_avgnote') avgweightednote = JC['weighted_avg_agreed_note'] avgnote = JC['reviews_avgnote'] plt.figure()
from sys import path path.append('../') from core.JamendoCsvReader import JamendoCsvReader import matplotlib.pyplot as plt import numpy as np JCR = JamendoCsvReader('stats_album_total.csv') plt.figure(figsize=(12,8)) plt.subplot(211) avgnote = JCR.getColumnArray('reviews_avgnote') avgnote_no0 = JCR.getColumnArray('reviews_avgnote', filterfunc=lambda x:True if x>0.0 else False) avgnote_0 = len(JCR.getColumnArray('reviews_avgnote', filterfunc=lambda x:True if x==0.0 else False)) plt.hist( avgnote, bins=10, range=(0,1)) plt.hist( avgnote_no0, bins=10, range=(0,1)) plt.title('Histogram with the distribution of review_avgnotes, spread on ten bins of length 0.5. \n\ The green one exclude items with review avg=0, so the ones that \n probably have no votes (%s)' % avgnote_0, fontsize=12) avgnote = JCR.getColumnArray('weighted_avg_agreed_note') avgnote_no0 = JCR.getColumnArray('weighted_avg_agreed_note', filterfunc=lambda x:True if x>0.0 else False) avgnote_0 = len(JCR.getColumnArray('weighted_avg_agreed_note', filterfunc=lambda x:True if x==0.0 else False))