#!/usr/bin/env python __author__ = 'ggdhines' from penguinAggregation import PenguinAggregation import random import os import sys # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() subject_ids = penguin.__get_subjects_per_site__("APZ00035mv",complete=True,remove_blanks=True) for i,subject in enumerate(random.sample(subject_ids,50)): print i penguin.__readin_subject__(subject) blankImage = penguin.__cluster_subject__(subject, clusterAlg) if not blankImage: penguin.__save_raw_markings__(subject) break
# add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") elif os.path.exists("/Users/greg"): sys.path.append("/Users/greg/Code/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans from multiClickCorrect import MultiClickCorrect correctionAlg = MultiClickCorrect(overlap_threshold=1,min_cluster_size=2).__fix__ clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() gold_subjects = penguin.__get_gold_subjects__() gold_sample = gold_subjects[:50] penguin.__readin_users__() for count,zooniverse_id in enumerate(gold_sample): if count == 50: break print count, zooniverse_id penguin.__readin_subject__(zooniverse_id,read_in_gold=True) blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg,fix_distinct_clusters=True,correction_alg=correctionAlg) penguin.__soy_it__(zooniverse_id)
import numpy as np # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from agglomerativeClustering import Ward from divisiveKmeans import DivisiveKmeans clusterAlg2 = DivisiveKmeans().__fit__ clusterAlg = Ward().__fit__ penguin = PenguinAggregation() subject_ids = pickle.load( open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb")) X1 = [] Y1 = [] X2 = [] Y2 = [] Z1 = [] Z2 = [] nonEmpty = 0 index = -1 random.shuffle(subject_ids) while True: index += 1 #for i,subject in enumerate(random.sample(subject_ids,50)):
import aggregation import matplotlib.pyplot as plt import numpy as np # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from agglomerativeClustering import Ward from divisiveKmeans import DivisiveKmeans clusterAlg2 = DivisiveKmeans().__fit__ dkmeans = PenguinAggregation(clustering_alg= DivisiveKmeans().__fit__) agglomerative = PenguinAggregation(clustering_alg = Ward().__fit__) subject_ids = pickle.load(open(aggregation.base_directory+"/Databases/penguin_gold.pickle","rb")) X1 = [] Y1 = [] X2 = [] Y2 = [] Z1 = [] Z2 = [] nonEmpty = 0 index = -1 random.shuffle(subject_ids) while True: index += 1
import matplotlib.pyplot as plt import numpy as np # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from agglomerativeClustering import Ward from divisiveKmeans import DivisiveKmeans clusterAlg2 = DivisiveKmeans().__fit__ dkmeans = PenguinAggregation(clustering_alg=DivisiveKmeans().__fit__) agglomerative = PenguinAggregation(clustering_alg=Ward().__fit__) subject_ids = pickle.load( open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb")) X1 = [] Y1 = [] X2 = [] Y2 = [] Z1 = [] Z2 = [] nonEmpty = 0 index = -1 random.shuffle(subject_ids) while True: index += 1
import matplotlib.cbook as cbook # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") elif os.path.exists("/Users/greg"): sys.path.append("/Users/greg/Code/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans from zeroFix import ZeroFix clusterAlg = DivisiveKmeans().__fit__ fixAlg = ZeroFix().__fix__ penguin = PenguinAggregation() client = pymongo.MongoClient() db = client['penguin_2015-01-18'] collection = db["penguin_classifications"] subject_collection = db["penguin_subjects"] accuracy = [] numGold = [] penguin.__readin_subject__("APZ00035nr") penguin.__display_raw_markings__("APZ00035nr")
from penguinAggregation import PenguinAggregation import random import os import sys # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() zooniverse_id_list = random.sample(penguin.__get_subjects_per_site__("APZ0001x3p"),40) for i,zooniverse_id in enumerate(zooniverse_id_list): print i penguin.__readin_subject__(zooniverse_id) blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg) if not blankImage: print "+--" penguin.__find_closest_neighbour__(zooniverse_id) #penguin.__plot_cluster_size__(zooniverse_id_list) penguin.__find_one__(zooniverse_id_list)
import urllib import matplotlib.cbook as cbook # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") elif os.path.exists("/Users/greg"): sys.path.append("/Users/greg/Code/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans from zeroFix import ZeroFix clusterAlg = DivisiveKmeans().__fit__ fixAlg = ZeroFix().__fix__ penguin = PenguinAggregation() client = pymongo.MongoClient() db = client['penguin_2015-01-18'] collection = db["penguin_classifications"] subject_collection = db["penguin_subjects"] accuracy = [] numGold = [] penguin.__readin_subject__("APZ00035nr") penguin.__display_raw_markings__("APZ00035nr")
import random import os import sys # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() zooniverse_id_list = random.sample( penguin.__get_subjects_per_site__("APZ0001x3p"), 40) for i, zooniverse_id in enumerate(zooniverse_id_list): print i penguin.__readin_subject__(zooniverse_id) blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg) if not blankImage: print "+--" penguin.__find_closest_neighbour__(zooniverse_id) #penguin.__plot_cluster_size__(zooniverse_id_list) penguin.__find_one__(zooniverse_id_list)
import matplotlib.pyplot as plt import numpy as np # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from agglomerativeClustering import Ward from divisiveKmeans import DivisiveKmeans clusterAlg2 = DivisiveKmeans().__fit__ clusterAlg = Ward().__fit__ penguin = PenguinAggregation() subject_ids = pickle.load(open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb")) X1 = [] Y1 = [] X2 = [] Y2 = [] Z1 = [] Z2 = [] nonEmpty = 0 index = -1 random.shuffle(subject_ids) while True: index += 1 # for i,subject in enumerate(random.sample(subject_ids,50)):
#!/usr/bin/env python __author__ = 'ggdhines' from penguinAggregation import PenguinAggregation import random import os import sys # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() subject_ids = penguin.__get_subjects_per_site__("APZ00035mv", complete=True, remove_blanks=True) for i, subject in enumerate(random.sample(subject_ids, 50)): print i penguin.__readin_subject__(subject) blankImage = penguin.__cluster_subject__(subject, clusterAlg) if not blankImage: penguin.__save_raw_markings__(subject) break
import sys import cPickle as pickle import aggregation # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from agglomerativeClustering import Ward, TooBig clusterAlg = Ward().__fit__ penguin = PenguinAggregation() subject_ids = pickle.load( open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb")) for i, subject in enumerate(random.sample(subject_ids, 50)): #subject = "APZ000173v" print i, subject penguin.__readin_subject__(subject, users_to_skip=["caitlin.black"]) try: numClusters, time = penguin.__cluster_subject__(subject, clusterAlg) except TooBig: print "too big" continue if not blankImage:
# add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") elif os.path.exists("/Users/greg"): sys.path.append("/Users/greg/Code/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans from multiClickCorrect import __ibcc__2 clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() client = pymongo.MongoClient() db = client['penguin_2015-01-18'] collection = db["penguin_classifications"] subject_collection = db["penguin_subjects"] subjects = subject_collection.find({"metadata.path": {"$regex": "MAIVb2012a"}}) accuracy = [] num_gold = 0 could_have = 0 missed = 0 false_pos = 0 overlaps = {} #overlaps2 = [] Xt = []
import matplotlib.cbook as cbook # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") elif os.path.exists("/Users/greg"): sys.path.append("/Users/greg/Code/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans from multiClickCorrect import __ibcc__2 clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() client = pymongo.MongoClient() db = client['penguin_2015-01-18'] collection = db["penguin_classifications"] subject_collection = db["penguin_subjects"] subjects = subject_collection.find({"metadata.path":{"$regex":"MAIVb2012a"}}) accuracy = [] num_gold =0 could_have = 0 missed = 0 false_pos = 0 overlaps = {}
import os import sys import cPickle as pickle import aggregation # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from agglomerativeClustering import Ward, TooBig clusterAlg = Ward().__fit__ penguin = PenguinAggregation() subject_ids = pickle.load(open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb")) for i, subject in enumerate(random.sample(subject_ids, 50)): # subject = "APZ000173v" print i, subject penguin.__readin_subject__(subject, users_to_skip=["caitlin.black"]) try: numClusters, time = penguin.__cluster_subject__(subject, clusterAlg) except TooBig: print "too big" continue if not blankImage: penguin.__display_raw_markings__(subject)