def plotAllRates(children, analysis): #plots rate of distuption events for users who used device for more than 21 days #plots the rate how often device was used with best possible reaction #figures are created with matplotlibe and saved as .jpg files in folder "figures2" count=0 for cid in analysis.keys(): if len(analysis[cid].use)>20: fig, ax=plt.subplots() ax.set_title('rate of events for the last 7 days') ax.set_xlabel('number of days') ax.set_ylabel('rate') rate=analysis[cid].rate_dst_event use=analysis[cid].use reactgood_rate=analysis[cid].complete_dev_rate['ReactGood'] dev_rate=analysis[cid].dev_use_rate #print rate #print rate, use ax.plot(use, rate, 'bs-') ax.plot(use, dev_rate, 'k,-') ax.plot(use, reactgood_rate, 'go-') init=children[cid].initial value=init.month_count*init.night_count/30.0 ax.plot([min(use) - 1],value, 'ro') ax.set_xlim((-2, max(use)+1)) ax.set_ylim((-0.2, max([value] + rate)*1.2)) #fig.savefig('figures2/dev_rate'+str(cid)+'.jpg') #plt.close(fig) plt.show() count+=1 if count>10: break
def clusterLongTermUsers(analysis): #try clustering longterm users X=[] Y=[] Y2=[] Y3=[] for cid in analysis.keys(): ch=analysis[cid] if len(ch.biggest_improvement)>0: vector=[ch.std_time_for_bed, ch.std_event1_time, ch.std_time_to_event1, ch.avg_time_to_event1, ch.std_offset_for_device_start, ch.avg_offset_for_device_start, ch.std_device_start_time, ch.std_device_interval, ch.use_rate, len(ch.use)] valid=True for el in vector: if numpy.isnan(el): #print 'child with id=', cid, 'invalid data point is', vector valid=False if valid: if ch.corr_dev_reactgood is None or numpy.isnan(ch.corr_dev_reactgood): continue else: Y3.append(ch.corr_dev_reactgood) X.append(vector) Y.append(ch.biggest_improvement[2]) Y2.append(ch.biggest_improvement[0]) X=numpy.array(X) k_means = cluster.KMeans(n_clusters=3) k_means.fit(X) # print "label, rate change, final rate, correlation" # y=k_means.predict(X) # for i in range(len(y)): # print y[i], Y[i], Y2[i], Y3[i] X2=numpy.array(zip(Y3,numpy.array(X)[:,5], numpy.array(X)[:,5])) k_means = cluster.KMeans(n_clusters=5) k_means.fit(X2) y=k_means.predict(X2) plt.figure() plt.scatter(X2[:,0],X2[:,1], c=y, edgecolor='face') plt.xlabel('correlation') plt.ylabel('std offset') plt.title('long term user analysis') plt.colorbar() plt.figure() plt.scatter(X2[:,0],X2[:,2], c=y, edgecolor='face') plt.xlabel('correlation') plt.ylabel('mean offset') plt.title('long term user analysis') plt.colorbar() plt.show()
def dataForML(analysis): #prepares data to be used for machine learning analysis index=analysis.keys() random.shuffle(index) #shuffle users to ensure that data for ML is fair (no hidden trends based on when users joined the system) #print "Does shuffle work", index #initialize machine learning variables X=[] Y=[] Y2=[] Y3=[] Y4=[] #print "problem with analysis data structure?" for cid in index: ch=analysis[cid] #print cid, ch.std_time_for_bed if len(ch.biggest_improvement)>0: vector=[ch.std_time_for_bed, ch.std_event1_time, ch.std_time_to_event1, ch.avg_time_to_event1, ch.std_offset_for_device_start, ch.avg_offset_for_device_start, ch.std_device_start_time, ch.std_device_interval, ch.use_rate, len(ch.use), len(ch.vibration_use)] #vector=[ch.use_rate] #print 'time_for_bed?', cid, ch.std_time_for_bed valid=True for el in vector: if numpy.isnan(el): #print 'child with id=', cid, 'invalid data point is', vector valid=False if ch.corr_dev_reactgood is None or numpy.isnan(ch.corr_dev_reactgood): valid = False if valid: X.append(vector) Y.append(ch.biggest_improvement[2]) Y2.append(ch.biggest_improvement[0]) Y3.append(ch.corr_dev_reactgood) Y4.append(int(ch.works_flag)) return X, Y, Y2, Y3, Y4
def dataForInitialAnalysis(children, analysis): #generates data for machine learning with "initial" data (prior history on sleep disorder) provided from the users index=analysis.keys() random.shuffle(index) #print "Does shuffle work", index #collect data in machine learning format X=[] Y=[] for i in index: init=children[i].initial if analysis[i].start_date: s_date=dateDiff(start, analysis[i].start_date) #print i, s_date, analysis[i].start_date X.append([init.month_count, init.night_count, s_date]) Y.append(len(analysis[i].use)) X=numpy.array(X) Y=numpy.array(Y) print 'mean', numpy.mean(X[:,0]) print 'sdt', numpy.std(X[:,0]) print 'sample X', X[:10] plotInitData(X,Y) return index, X, Y
def plotRate(children, analysis): #plots rate of distuption events for users who used device for more than 21 days #plots done with matplotlib and saved as .jpg's in folder "figures2" count=0 for cid in analysis.keys(): if len(analysis[cid].use)>20: fig, ax=plt.subplots() ax.set_title('rate of distuption events for the last 7 days') ax.set_xlabel('number of days') ax.set_ylabel('dist event rate') rate=analysis[cid].rate_dst_event use=analysis[cid].use #print rate #print rate, use ax.plot(use, rate, 'bs-') init=children[cid].initial value=init.month_count*init.night_count/30.0 ax.plot([min(use) - 1],value, 'ro') ax.set_xlim((-2, max(use)+1)) ax.set_ylim((-0.2, max([value] + rate)*1.2)) fig.savefig('figures2/dev_rate'+str(cid)+'.jpg') plt.close(fig) #plt.show() count+=1