Пример #1
0
def plotAllRates(children, analysis):
    #plots rate of distuption events for users who used device for more than 21 days
    #plots the rate how often device was used with best possible reaction
    #figures are created with matplotlibe and saved as .jpg files in folder "figures2"
    count=0
    for cid in analysis.keys():
        if len(analysis[cid].use)>20:
            fig, ax=plt.subplots()
            ax.set_title('rate of events for the last 7 days')
            ax.set_xlabel('number of days')
            ax.set_ylabel('rate')
            rate=analysis[cid].rate_dst_event
            use=analysis[cid].use
            reactgood_rate=analysis[cid].complete_dev_rate['ReactGood']
            dev_rate=analysis[cid].dev_use_rate
            #print rate
            #print rate, use
            ax.plot(use, rate, 'bs-')
            ax.plot(use, dev_rate, 'k,-')
            ax.plot(use, reactgood_rate, 'go-')
            init=children[cid].initial
            value=init.month_count*init.night_count/30.0
            ax.plot([min(use) - 1],value, 'ro')
            ax.set_xlim((-2, max(use)+1))
            ax.set_ylim((-0.2, max([value] + rate)*1.2))
            #fig.savefig('figures2/dev_rate'+str(cid)+'.jpg')
            #plt.close(fig)
            plt.show()
            count+=1
            if count>10:
                break
def clusterLongTermUsers(analysis):
    #try clustering longterm users
    X=[]
    Y=[]
    Y2=[]
    Y3=[]
    for cid in analysis.keys():
        ch=analysis[cid]
        if len(ch.biggest_improvement)>0:
            vector=[ch.std_time_for_bed, ch.std_event1_time, ch.std_time_to_event1, ch.avg_time_to_event1, ch.std_offset_for_device_start, ch.avg_offset_for_device_start, ch.std_device_start_time, ch.std_device_interval, ch.use_rate, len(ch.use)]

            valid=True
            for el in vector:
                if numpy.isnan(el):
                    #print 'child with id=', cid, 'invalid data point is', vector
                    valid=False

            if valid:
                if ch.corr_dev_reactgood is None or numpy.isnan(ch.corr_dev_reactgood):
                    continue
                else:
                    Y3.append(ch.corr_dev_reactgood)
                    X.append(vector)
                    Y.append(ch.biggest_improvement[2])
                    Y2.append(ch.biggest_improvement[0])
    X=numpy.array(X)
    k_means = cluster.KMeans(n_clusters=3)
    k_means.fit(X) 

    # print "label, rate change, final rate, correlation"
    # y=k_means.predict(X)
    # for i in range(len(y)):
    #     print y[i], Y[i], Y2[i], Y3[i]
               
    X2=numpy.array(zip(Y3,numpy.array(X)[:,5], numpy.array(X)[:,5]))
    k_means = cluster.KMeans(n_clusters=5)
    k_means.fit(X2) 

   
    y=k_means.predict(X2)
    plt.figure()
    plt.scatter(X2[:,0],X2[:,1], c=y, edgecolor='face')
    plt.xlabel('correlation')
    plt.ylabel('std offset')
    plt.title('long term user analysis')
    plt.colorbar()

    plt.figure()
    plt.scatter(X2[:,0],X2[:,2], c=y, edgecolor='face')
    plt.xlabel('correlation')
    plt.ylabel('mean offset')
    plt.title('long term user analysis')
    plt.colorbar()
    
    plt.show()
def dataForML(analysis):
    #prepares data to be used for machine learning analysis
    index=analysis.keys()
    random.shuffle(index) #shuffle users to ensure that data for ML is fair (no hidden trends based on when users joined the system)
    #print "Does shuffle work", index

    #initialize machine learning variables
    X=[]
    Y=[]
    Y2=[]
    Y3=[]
    Y4=[]
    #print "problem with analysis data structure?"
    for cid in index:
        ch=analysis[cid]
        #print cid, ch.std_time_for_bed

        if len(ch.biggest_improvement)>0:
            vector=[ch.std_time_for_bed, ch.std_event1_time, 
                    ch.std_time_to_event1, ch.avg_time_to_event1, 
                    ch.std_offset_for_device_start, ch.avg_offset_for_device_start, 
                    ch.std_device_start_time, ch.std_device_interval, 
                    ch.use_rate, len(ch.use), 
                    len(ch.vibration_use)]
            #vector=[ch.use_rate]
            #print 'time_for_bed?', cid, ch.std_time_for_bed
            
            valid=True
            for el in vector:
                if numpy.isnan(el):
                    #print 'child with id=', cid, 'invalid data point is', vector
                    valid=False

            if ch.corr_dev_reactgood is None or numpy.isnan(ch.corr_dev_reactgood):
                valid = False

            if valid:
                X.append(vector)
                Y.append(ch.biggest_improvement[2])
                Y2.append(ch.biggest_improvement[0])
                Y3.append(ch.corr_dev_reactgood)
                Y4.append(int(ch.works_flag))


    return X, Y, Y2, Y3, Y4
def dataForInitialAnalysis(children, analysis):
    #generates data for machine learning with "initial" data (prior history on sleep disorder) provided from the users
    index=analysis.keys()
    random.shuffle(index)
    #print "Does shuffle work", index
    #collect data in machine learning format
    X=[]
    Y=[]
    for i in index:
        init=children[i].initial
        if analysis[i].start_date:
            s_date=dateDiff(start, analysis[i].start_date)
            #print i, s_date, analysis[i].start_date
            X.append([init.month_count, init.night_count, s_date])
            Y.append(len(analysis[i].use))
    X=numpy.array(X)
    Y=numpy.array(Y)
    print 'mean', numpy.mean(X[:,0])
    print 'sdt', numpy.std(X[:,0])
    print 'sample X', X[:10]
    plotInitData(X,Y)
    return index, X, Y
Пример #5
0
def plotRate(children, analysis):
    #plots rate of distuption events for users who used device for more than 21 days
    #plots done with matplotlib and saved as .jpg's in folder "figures2"
    count=0
    for cid in analysis.keys():
        if len(analysis[cid].use)>20:
            fig, ax=plt.subplots()
            ax.set_title('rate of distuption events for the last 7 days')
            ax.set_xlabel('number of days')
            ax.set_ylabel('dist event rate')
            rate=analysis[cid].rate_dst_event
            use=analysis[cid].use
            #print rate
            #print rate, use
            ax.plot(use, rate, 'bs-')
            init=children[cid].initial
            value=init.month_count*init.night_count/30.0
            ax.plot([min(use) - 1],value, 'ro')
            ax.set_xlim((-2, max(use)+1))
            ax.set_ylim((-0.2, max([value] + rate)*1.2))
            fig.savefig('figures2/dev_rate'+str(cid)+'.jpg')
            plt.close(fig)
            #plt.show()
            count+=1