clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() gold_subjects = penguin.__get_gold_subjects__() gold_sample = gold_subjects[:50] penguin.__readin_users__() for count,zooniverse_id in enumerate(gold_sample): if count == 50: break print count, zooniverse_id penguin.__readin_subject__(zooniverse_id,read_in_gold=True) blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg,fix_distinct_clusters=True,correction_alg=correctionAlg) penguin.__soy_it__(zooniverse_id) penguin.__signal_ibcc__() penguin.__roc__() # one_overlap = penguin.__off_by_one__(display=True) # last_id = None # # for t in one_overlap: # if t[0] != last_id: # print "*****" # print "=====" # last_id = t[0] # penguin.__relative_confusion__(t)
print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 > z2]) / float(len(Z1)) print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 < z2]) / float(len(Z1)) print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 == z2]) / float(len(Z1)) plt.plot(Z2, Z1, '.', color="black") plt.xlabel("Number of Clusters Found by Divisive K-Means") plt.ylabel("Number of Clusters Found by Agglomerative Clustering") plt.plot([0, max(max(Z1), max(Z2)) + 10], [0, max(max(Z1), max(Z2)) + 10], "--", color="black") plt.xlim((0, max(max(Z1), max(Z2)) + 10)) plt.ylim((0, max(max(Z1), max(Z2)) + 10)) plt.show() agglomerative.__signal_ibcc__() X, Y = agglomerative.__roc__() plt.plot(X, Y, color="red") dkmeans.__signal_ibcc__() X, Y = dkmeans.__roc__() plt.plot(X, Y, color="green") plt.show() # # plt.plot(X1,Y1,"+",color="black",label="Agglomerative") # plt.plot(X2,Y2,"o",color="black",label = "Divisive k-means") # plt.xlabel("Number of Clusters") # plt.ylabel("Runtime of Clustering Algorithm") # plt.legend(loc="upper left") # print len(X1),len(X2)
print len([z1 for (z1,z2) in zip(Z1,Z2) if z1 > z2])/float(len(Z1)) print len([z1 for (z1,z2) in zip(Z1,Z2) if z1 < z2])/float(len(Z1)) print len([z1 for (z1,z2) in zip(Z1,Z2) if z1 == z2])/float(len(Z1)) plt.plot(Z2,Z1,'.',color="black") plt.xlabel("Number of Clusters Found by Divisive K-Means") plt.ylabel("Number of Clusters Found by Agglomerative Clustering") plt.plot([0,max(max(Z1),max(Z2))+10],[0,max(max(Z1),max(Z2))+10],"--",color="black") plt.xlim((0,max(max(Z1),max(Z2))+10)) plt.ylim((0,max(max(Z1),max(Z2))+10)) plt.show() agglomerative.__signal_ibcc__() X,Y = agglomerative.__roc__() plt.plot(X,Y,color="red") dkmeans.__signal_ibcc__() X,Y = dkmeans.__roc__() plt.plot(X,Y,color="green") plt.show() # # plt.plot(X1,Y1,"+",color="black",label="Agglomerative") # plt.plot(X2,Y2,"o",color="black",label = "Divisive k-means") # plt.xlabel("Number of Clusters") # plt.ylabel("Runtime of Clustering Algorithm") # plt.legend(loc="upper left") # print len(X1),len(X2)
width = s["metadata"]["original_size"]["width"] height = s["metadata"]["original_size"]["height"] pts = [(int(x) / (width / 1000.), int(y) / (height / 563.)) for (x, y) in pts] if penguin.__get_status__(zooniverse_id) != "complete": continue penguin.__readin_subject__(zooniverse_id) blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg, fix_distinct_clusters=True) penguin.__roc__() #__ibcc__2(penguin.clusterResults,penguin.users_per_subject) # plt.plot(Xt,Yt,'.') # plt.xlabel("Large cluster size") # plt.ylabel("Small cluster size") # plt.xlim((min(Xt)-0.05,max(Xt)+0.05)) # plt.ylim((min(Yt)-0.05,max(Yt)+0.05)) # plt.show() # for i in range(1,10): # print sum([1 for j in Yt if i == j]) # Y = [] # yErr = [] # X = [] # for i in range(1,10): # y = []