# url = subject["location"]["standard"]
        #
        # slash_index = url.rfind("/")
        # object_id = url[slash_index+1:]
        #
        # if not(os.path.isfile(base_directory+"/Databases/condors/images/"+object_id)):
        #     urllib.urlretrieve (url, base_directory+"/Databases/condors/images/"+object_id)
        #
        # image_file = cbook.get_sample_data(base_directory+"/Databases/condors/images/"+object_id)
        # image = plt.imread(image_file)
        #
        # fig, ax = plt.subplots()
        # im = ax.imshow(image)

        a = datetime.datetime.now()
        user_identified, clusters, users = DivisiveKmeans(1).fit2(
            annotation_list, user_list, debug=True)
        b = datetime.datetime.now()
        print "=="
        print len(user_identified)
        #for (x,y) in user_identified:
        #    plt.plot([x,],[y,],'.',color="red")
        c = datetime.datetime.now()
        user_identified = agglomerativeClustering(
            zip(annotation_list, user_list))
        d = datetime.datetime.now()
        print len(user_identified)
        print b - a
        print d - c
        print "--"
        #for (x,y) in user_identified:
        #    plt.plot([x-3,],[y-3,],'.',color="green")
                        user_list.append(user)

                except KeyError:
                    pass

        except ValueError:
            pass

    if annotation_list == []:
        continue

    if not (os.path.isfile(base_directory + "/Databases/condors/images/" +
                           object_id)):
        urllib.urlretrieve(
            url, base_directory + "/Databases/condors/images/" + object_id)
    user_identified_condors, clusters = DivisiveKmeans(3).fit2(annotation_list,
                                                               user_list,
                                                               debug=True)
    f_name = base_directory + "/Databases/condors/images/" + object_id
    if user_identified_condors == []:
        pass
    else:
        l2.append(subject["classification_count"])
        l1.append(subject["zooniverse_id"])
        #print animals
        print l1
        print l2
        DivisiveKmeans(3).__fix__(user_identified_condors, clusters,
                                  annotation_list, user_list, 200, f_name)
Пример #3
0
import cPickle as pickle
import aggregation
import matplotlib.pyplot as plt
import numpy as np

# add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer
if os.path.exists("/home/ggdhines"):
    sys.path.append(
        "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg")
else:
    sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg")

from agglomerativeClustering import Ward
from divisiveKmeans import DivisiveKmeans

clusterAlg2 = DivisiveKmeans().__fit__
clusterAlg = Ward().__fit__

penguin = PenguinAggregation()
subject_ids = pickle.load(
    open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb"))

X1 = []
Y1 = []
X2 = []
Y2 = []
Z1 = []
Z2 = []
nonEmpty = 0
index = -1
random.shuffle(subject_ids)
Пример #4
0
                    #print annotation_list
                    user_list.append(user)
                    animal_list.append(animal_type)




        except (ValueError,KeyError):
            pass

    #print animal_list

    #if there were any markings on the image, use divisive kmeans to cluster the points so that each
    #cluster represents an image
    if annotation_list != []:
        user_identified,clusters,users = DivisiveKmeans(1).fit2(annotation_list,user_list,debug=True)

        #fix split clusters if necessary
        if user_identified != []:
            user_identified,clusters,users_per_cluster = DivisiveKmeans(3).__fix__(user_identified,clusters,users,200)
            pos = 0
            neg = 0

            results_dict[zooniverse_id] = []

            #find out which users marked this "animal"
            for c,users_l in zip(user_identified,users_per_cluster):
                #moving on to the next animal so increase counter
                animal_count += 1
                results_dict[zooniverse_id].append((c,animal_count,users_l,user_count))
Пример #5
0
import cPickle as pickle
import aggregation
import matplotlib.pyplot as plt
import numpy as np

# add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer
if os.path.exists("/home/ggdhines"):
    sys.path.append(
        "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg")
else:
    sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg")

from agglomerativeClustering import Ward
from divisiveKmeans import DivisiveKmeans

clusterAlg2 = DivisiveKmeans().__fit__

dkmeans = PenguinAggregation(clustering_alg=DivisiveKmeans().__fit__)
agglomerative = PenguinAggregation(clustering_alg=Ward().__fit__)
subject_ids = pickle.load(
    open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb"))

X1 = []
Y1 = []
X2 = []
Y2 = []
Z1 = []
Z2 = []
nonEmpty = 0
index = -1
random.shuffle(subject_ids)
Пример #6
0
                    try:
                        animal_type = animal["animal"]
                        #if not(animal_type in ["carcassOrScale","carcass"]):
                        if animal_type == "condor":
                            annotation_list.append((x, y))
                            user_list.append(user_index)

                    except KeyError:
                        annotation_list.append((x, y))
                        user_list.append(user_index)

            except ValueError:
                pass

        user_identified_condors, clusters = DivisiveKmeans(3).fit2(
            annotation_list, user_list, debug=True
        )  #,jpeg_file=base_directory+"/Databases/condors/images/"+object_id)

        image_file = cbook.get_sample_data(base_directory +
                                           "/Databases/condors/images/" +
                                           object_id)
        image = plt.imread(image_file)

        fig, ax = plt.subplots()
        im = ax.imshow(image)
        #plt.show()

        print len(clusters)
        relations = []
        for c1_index in range(len(clusters)):
            for c2_index in range(c1_index + 1, len(clusters)):
Пример #7
0
        n = 0
        xy_list = []
        try:
            if isinstance(r["annotations"][1]["value"], dict):
                for marking in r["annotations"][1]["value"].values():
                    if marking["value"] in ["adult", "chick"]:
                        x, y = (float(marking["x"]), float(marking["y"]))

                        if (x, y, ip) in big_list:
                            print "--"
                            continue

                        big_list.append((x, y, ip))
                        user_markings.append((x, y))
                        user_ips.append(ip)
        except KeyError:
            print r["annotations"]

    user_identified_condors, clusters, users = DivisiveKmeans(1).fit2(
        user_markings, user_ips, debug=True)
    #user_identified_condors,clusters,users = DivisiveKmeans_2(1).fit2(user_markings,user_ips,debug=True)
    #user_identified_condors,clusters,users = KMedoids(1).fit2(user_markings,user_ips,debug=True)
    #user_identified_condors = agglomerativeClustering(zip(user_markings,user_ips))
    quadRoot = Node(0, 0, 1000, 750)
    for (m, u) in zip(user_markings, user_ips):
        quadRoot.__add_point__((m, u))

    quadRoot.__ward_traverse__()

    break
Пример #8
0
for user in classification_history:
    history = classification_history[user]
    if len(history) < 15:
        continue

    #true positive, false positive, false negative, true negative
    power_users[user] = [0.,0.,0.,0.]

    print user
    for zooniverse_id in history:
        user_markings,user_list,found_animal ,animal_types= collect_classification(zooniverse_id,separate_users=[user])

        #determine whether or not all of the other users found an animal
        if user_markings != []:
            gold,gold_clusters = DivisiveKmeans(3).fit2(user_markings,user_list,debug=True)

            if gold != []:
                gold,gold_clusters = DivisiveKmeans(3).__fix__(gold,gold_clusters,user_markings,user_list,200)
        else:
            gold = []
            gold_clusters = []

        #so we have found some animals - now we need to figure out what species they are
        #look until we have found a condor
        num_users = len(set(user_list))
        gold_condor = False
        for cluster in gold_clusters:
            #the find the animal type corresponding to each pt in this cluster
            type_list = [animal_types[user_markings.index(pt)] for pt in cluster]
            #did at least half the people tag this animal and at least half of those classify it as a condor?
Пример #9
0
                    annotation_list.append((x, y))
                    user_list.append(user)

        except ValueError:
            pass

    #user_identified_condors,clusters = DivisiveKmeans(3).fit2(annotation_list,user_list,debug=True)
    relations = []

    if not (os.path.isfile(base_directory + "/Databases/condors/images/" +
                           object_id)):
        urllib.urlretrieve(
            url, base_directory + "/Databases/condors/images/" + object_id)

    user_identified_condors = DivisiveKmeans(1).fit2(
        annotation_list,
        user_list,
        jpeg_file=base_directory + "/Databases/condors/images/" + object_id)

    image_file = cbook.get_sample_data(base_directory +
                                       "/Databases/condors/images/" +
                                       object_id)
    print object_id
    image = plt.imread(image_file)

    fig, ax = plt.subplots()
    im = ax.imshow(image)

    x, y = zip(*annotation_list)
    plt.plot(x, y, '.', color='yellow')
    x, y = zip(*user_identified_condors)
    plt.plot(x, y, '.', color='blue')
Пример #10
0
            X.append(float(p[0]))
            Y.append(float(p[1]))

        pt = (np.mean(X), np.mean(Y))
        try:
            species = marking["species"]
        except KeyError:
            species = "NA"
        subject_results[zooniverse_id].append((pt, species, user))

for counter, (zooniverse_id, markings) in enumerate(subject_results.items()):
    if markings == []:
        continue

    pts, species, users = zip(*markings)
    if len(pts) > 100:
        continue
    plankton, clusters, users_l = DivisiveKmeans(3).fit2(
        pts, users, debug=True
    )  #,jpeg_file=base_directory+"/Databases/condors/images/"+object_id)

    for c in clusters:
        for m in c:
            index = pts.index(m)
            print species[index], users[index]

        print "--"

    if counter == 25:
        break
Пример #11
0
                                if user in max_animals[s]:
                                    max_animals[s][user] += 1
                                else:
                                    max_animals[s][user] = 1

                        except KeyError:
                            pass

        except ValueError:
            pass

    #gold standard
    if user_markings[20] != []:
        gold, gold_clusters = DivisiveKmeans(3).fit2(user_markings[20],
                                                     user_list[20],
                                                     debug=True)

        if gold != []:
            gold, gold_clusters = DivisiveKmeans(3).__fix__(
                gold, gold_clusters, user_markings[20], user_list[20], 200)
    else:
        gold = []
        gold_clusters = []

    first_step = steps[0]

    if user_markings[first_step] != []:
        identified_animals, clusters = DivisiveKmeans(first_step).fit2(
            user_markings[first_step], user_list[first_step], debug=True)