# url = subject["location"]["standard"] # # slash_index = url.rfind("/") # object_id = url[slash_index+1:] # # if not(os.path.isfile(base_directory+"/Databases/condors/images/"+object_id)): # urllib.urlretrieve (url, base_directory+"/Databases/condors/images/"+object_id) # # image_file = cbook.get_sample_data(base_directory+"/Databases/condors/images/"+object_id) # image = plt.imread(image_file) # # fig, ax = plt.subplots() # im = ax.imshow(image) a = datetime.datetime.now() user_identified, clusters, users = DivisiveKmeans(1).fit2( annotation_list, user_list, debug=True) b = datetime.datetime.now() print "==" print len(user_identified) #for (x,y) in user_identified: # plt.plot([x,],[y,],'.',color="red") c = datetime.datetime.now() user_identified = agglomerativeClustering( zip(annotation_list, user_list)) d = datetime.datetime.now() print len(user_identified) print b - a print d - c print "--" #for (x,y) in user_identified: # plt.plot([x-3,],[y-3,],'.',color="green")
user_list.append(user) except KeyError: pass except ValueError: pass if annotation_list == []: continue if not (os.path.isfile(base_directory + "/Databases/condors/images/" + object_id)): urllib.urlretrieve( url, base_directory + "/Databases/condors/images/" + object_id) user_identified_condors, clusters = DivisiveKmeans(3).fit2(annotation_list, user_list, debug=True) f_name = base_directory + "/Databases/condors/images/" + object_id if user_identified_condors == []: pass else: l2.append(subject["classification_count"]) l1.append(subject["zooniverse_id"]) #print animals print l1 print l2 DivisiveKmeans(3).__fix__(user_identified_condors, clusters, annotation_list, user_list, 200, f_name)
import cPickle as pickle import aggregation import matplotlib.pyplot as plt import numpy as np # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from agglomerativeClustering import Ward from divisiveKmeans import DivisiveKmeans clusterAlg2 = DivisiveKmeans().__fit__ clusterAlg = Ward().__fit__ penguin = PenguinAggregation() subject_ids = pickle.load( open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb")) X1 = [] Y1 = [] X2 = [] Y2 = [] Z1 = [] Z2 = [] nonEmpty = 0 index = -1 random.shuffle(subject_ids)
#print annotation_list user_list.append(user) animal_list.append(animal_type) except (ValueError,KeyError): pass #print animal_list #if there were any markings on the image, use divisive kmeans to cluster the points so that each #cluster represents an image if annotation_list != []: user_identified,clusters,users = DivisiveKmeans(1).fit2(annotation_list,user_list,debug=True) #fix split clusters if necessary if user_identified != []: user_identified,clusters,users_per_cluster = DivisiveKmeans(3).__fix__(user_identified,clusters,users,200) pos = 0 neg = 0 results_dict[zooniverse_id] = [] #find out which users marked this "animal" for c,users_l in zip(user_identified,users_per_cluster): #moving on to the next animal so increase counter animal_count += 1 results_dict[zooniverse_id].append((c,animal_count,users_l,user_count))
import cPickle as pickle import aggregation import matplotlib.pyplot as plt import numpy as np # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from agglomerativeClustering import Ward from divisiveKmeans import DivisiveKmeans clusterAlg2 = DivisiveKmeans().__fit__ dkmeans = PenguinAggregation(clustering_alg=DivisiveKmeans().__fit__) agglomerative = PenguinAggregation(clustering_alg=Ward().__fit__) subject_ids = pickle.load( open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb")) X1 = [] Y1 = [] X2 = [] Y2 = [] Z1 = [] Z2 = [] nonEmpty = 0 index = -1 random.shuffle(subject_ids)
try: animal_type = animal["animal"] #if not(animal_type in ["carcassOrScale","carcass"]): if animal_type == "condor": annotation_list.append((x, y)) user_list.append(user_index) except KeyError: annotation_list.append((x, y)) user_list.append(user_index) except ValueError: pass user_identified_condors, clusters = DivisiveKmeans(3).fit2( annotation_list, user_list, debug=True ) #,jpeg_file=base_directory+"/Databases/condors/images/"+object_id) image_file = cbook.get_sample_data(base_directory + "/Databases/condors/images/" + object_id) image = plt.imread(image_file) fig, ax = plt.subplots() im = ax.imshow(image) #plt.show() print len(clusters) relations = [] for c1_index in range(len(clusters)): for c2_index in range(c1_index + 1, len(clusters)):
n = 0 xy_list = [] try: if isinstance(r["annotations"][1]["value"], dict): for marking in r["annotations"][1]["value"].values(): if marking["value"] in ["adult", "chick"]: x, y = (float(marking["x"]), float(marking["y"])) if (x, y, ip) in big_list: print "--" continue big_list.append((x, y, ip)) user_markings.append((x, y)) user_ips.append(ip) except KeyError: print r["annotations"] user_identified_condors, clusters, users = DivisiveKmeans(1).fit2( user_markings, user_ips, debug=True) #user_identified_condors,clusters,users = DivisiveKmeans_2(1).fit2(user_markings,user_ips,debug=True) #user_identified_condors,clusters,users = KMedoids(1).fit2(user_markings,user_ips,debug=True) #user_identified_condors = agglomerativeClustering(zip(user_markings,user_ips)) quadRoot = Node(0, 0, 1000, 750) for (m, u) in zip(user_markings, user_ips): quadRoot.__add_point__((m, u)) quadRoot.__ward_traverse__() break
for user in classification_history: history = classification_history[user] if len(history) < 15: continue #true positive, false positive, false negative, true negative power_users[user] = [0.,0.,0.,0.] print user for zooniverse_id in history: user_markings,user_list,found_animal ,animal_types= collect_classification(zooniverse_id,separate_users=[user]) #determine whether or not all of the other users found an animal if user_markings != []: gold,gold_clusters = DivisiveKmeans(3).fit2(user_markings,user_list,debug=True) if gold != []: gold,gold_clusters = DivisiveKmeans(3).__fix__(gold,gold_clusters,user_markings,user_list,200) else: gold = [] gold_clusters = [] #so we have found some animals - now we need to figure out what species they are #look until we have found a condor num_users = len(set(user_list)) gold_condor = False for cluster in gold_clusters: #the find the animal type corresponding to each pt in this cluster type_list = [animal_types[user_markings.index(pt)] for pt in cluster] #did at least half the people tag this animal and at least half of those classify it as a condor?
annotation_list.append((x, y)) user_list.append(user) except ValueError: pass #user_identified_condors,clusters = DivisiveKmeans(3).fit2(annotation_list,user_list,debug=True) relations = [] if not (os.path.isfile(base_directory + "/Databases/condors/images/" + object_id)): urllib.urlretrieve( url, base_directory + "/Databases/condors/images/" + object_id) user_identified_condors = DivisiveKmeans(1).fit2( annotation_list, user_list, jpeg_file=base_directory + "/Databases/condors/images/" + object_id) image_file = cbook.get_sample_data(base_directory + "/Databases/condors/images/" + object_id) print object_id image = plt.imread(image_file) fig, ax = plt.subplots() im = ax.imshow(image) x, y = zip(*annotation_list) plt.plot(x, y, '.', color='yellow') x, y = zip(*user_identified_condors) plt.plot(x, y, '.', color='blue')
X.append(float(p[0])) Y.append(float(p[1])) pt = (np.mean(X), np.mean(Y)) try: species = marking["species"] except KeyError: species = "NA" subject_results[zooniverse_id].append((pt, species, user)) for counter, (zooniverse_id, markings) in enumerate(subject_results.items()): if markings == []: continue pts, species, users = zip(*markings) if len(pts) > 100: continue plankton, clusters, users_l = DivisiveKmeans(3).fit2( pts, users, debug=True ) #,jpeg_file=base_directory+"/Databases/condors/images/"+object_id) for c in clusters: for m in c: index = pts.index(m) print species[index], users[index] print "--" if counter == 25: break
if user in max_animals[s]: max_animals[s][user] += 1 else: max_animals[s][user] = 1 except KeyError: pass except ValueError: pass #gold standard if user_markings[20] != []: gold, gold_clusters = DivisiveKmeans(3).fit2(user_markings[20], user_list[20], debug=True) if gold != []: gold, gold_clusters = DivisiveKmeans(3).__fix__( gold, gold_clusters, user_markings[20], user_list[20], 200) else: gold = [] gold_clusters = [] first_step = steps[0] if user_markings[first_step] != []: identified_animals, clusters = DivisiveKmeans(first_step).fit2( user_markings[first_step], user_list[first_step], debug=True)