示例#1
0
def dummy_func((count,zooniverse_id,markings,user_ids,tools,num_users)):
    if markings == []:
        return zooniverse_id,(-1,0),num_users
    clustering_engine = Agglomerative(None,None,{})
    clustering_results = clustering_engine.__cluster__(markings,user_ids,tools,markings,None,None)
    print("++ " + str(count))
    return zooniverse_id,clustering_results,num_users
示例#2
0
def agglomerative_clustering(filename, n_clusters, linkage, affinity):
    sample = read_data(filename)
    agglomerative_instance = Agglomerative(n_clusters, linkage, affinity)
    agglomerative_instance.fit(sample)
    clusters = agglomerative_instance.get_clusters()
    print(linkage, affinity)
    print("Cluster Result: \n", clusters)
示例#3
0
        path = l[0]
        t = [r.split(",") for r in l[1:] if r != ""]
        roi_dict[path] = [(int(x)/1.92,int(y)/1.92) for (x,y) in t]

client = pymongo.MongoClient()
db = client['penguin']
classification_collection = db["penguin_classifications"]
subject_collection = db["penguin_subjects"]

# for c in classification_collection.find():
#     _id = c["_id"]
#     zooniverse_id = c["subjects"][0]["zooniverse_id"]
#
#     classification_collection.update_one({"_id":_id},{"$set":{"zooniverse_id":zooniverse_id}})

clustering_engine = Agglomerative(None,None,{})

# result = db.profiles.create_index([('zooniverse_id', pymongo.ASCENDING)],unique=False)
# print result

for subject in subject_collection.find():
    # _id = c["_id"]
    zooniverse_id = subject["subjects"][0]["zooniverse_id"]
    print(zooniverse_id)

    markings = []
    user_ids = []
    tools = []

    num_users = 0
    path = subject["metadata"]["path"]
示例#4
0
        ub_roi = [[0, height], [width, height]]

        return lb_roi, ub_roi


project = PlanktonPortal()
# project.__top_users__()
#project.__set_subjects__([u'APK00011p5', u'APK0001bw9', u'APK0001dj4', u'APK00019zu', u'APK00018ri', u'APK0001dxl', u'APK0001ana', u'APK0000ppu', u'APK0000dvx', u'APK0000pyd', u'APK00019ol', u'APK00072zo', u'APK0000h5h', u'APK00001fk', u'APK0000a69', u'APK0000km2', u'APK000175z', u'APK00019yw', u'APK0000e39', u'APK0000kga'])
# project.__random_gold_sample__(max_subjects=50)
project.__gold_sample__(["yshish"], [
    "ElisabethB", "Damon22", "MingMing", "elizabeth", "JasonJason", "rlb66xyz",
    "planetari7", "fermor332002", "artman40", "Quia"
],
                        max_subjects=200)

clustering = Agglomerative(project)
# clustering2 = GoldClustering(project)
classifier = ExpertClassification(project, clustering)
#
for subject_id in project.gold_standard_subjects:
    print subject_id
    clustering.__fit__(subject_id, gold_standard=True)
    print clustering.goldResults[subject_id]
    classifier.__classify__([subject_id], True)
    print
    # project.__store_annotations__(subject_id,max_users=20)
    # clustering.__fit__(subject_id)
#     clustering.__fit__(subject_id,gold_standard=True)
#
# # # clustering.__check__()
# #
示例#5
0
        path = l[0]
        t = [r.split(",") for r in l[1:] if r != ""]
        roi_dict[path] = [(int(x) / 1.92, int(y) / 1.92) for (x, y) in t]

client = pymongo.MongoClient()
db = client['penguin']
classification_collection = db["penguin_classifications"]
subject_collection = db["penguin_subjects"]

# for c in classification_collection.find():
#     _id = c["_id"]
#     zooniverse_id = c["subjects"][0]["zooniverse_id"]
#
#     classification_collection.update_one({"_id":_id},{"$set":{"zooniverse_id":zooniverse_id}})

clustering_engine = Agglomerative(None, None, {})

# result = db.profiles.create_index([('zooniverse_id', pymongo.ASCENDING)],unique=False)
# print result

for subject in subject_collection.find():
    # _id = c["_id"]
    zooniverse_id = subject["subjects"][0]["zooniverse_id"]
    print(zooniverse_id)

    markings = []
    user_ids = []
    tools = []

    num_users = 0
    path = subject["metadata"]["path"]
示例#6
0
    def test_agglomerative(self):

        df = pd.read_csv('iris.data', header=None)

        x = df.drop([4], axis=1)
        y = df[4]

        X_train, X_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=79)

        agglomerative_single_model = Agglomerative(X_train, 3)
        agglomerative_complete_model = Agglomerative(X_train, 3, 'complete')
        agglomerative_average_model = Agglomerative(X_train, 3, 'average')
        agglomerative_average_group_model = Agglomerative(
            X_train, 3, 'average_group')

        agglomerative_single_label = agglomerative_single_model.get_cluster()
        agglomerative_complete_label = agglomerative_complete_model.get_cluster(
        )
        agglomerative_average_label = agglomerative_average_model.get_cluster()
        agglomerative_average_group_label = agglomerative_average_group_model.get_cluster(
        )

        agglomerative_single_map = self.get_mapping_to_label(
            3, agglomerative_single_label, y_train)
        agglomerative_complete_map = self.get_mapping_to_label(
            3, agglomerative_complete_label, y_train)
        agglomerative_average_map = self.get_mapping_to_label(
            3, agglomerative_average_label, y_train)
        agglomerative_average_group_map = self.get_mapping_to_label(
            3, agglomerative_average_group_label, y_train)

        agglomerative_single_pred = []
        agglomerative_complete_pred = []
        agglomerative_average_pred = []
        agglomerative_average_group_pred = []
        for i, row in X_test.iterrows():
            agglomerative_single_pred.append(
                agglomerative_single_model.predict(row))
            agglomerative_complete_pred.append(
                agglomerative_complete_model.predict(row))
            agglomerative_average_pred.append(
                agglomerative_average_model.predict(row))
            agglomerative_average_group_pred.append(
                agglomerative_average_group_model.predict(row))

        print(
            'Akurasi agglomerative dengan single linkage = ',
            self.get_accuracy(
                self.apply_map_to_cluster(agglomerative_single_pred,
                                          agglomerative_single_map),
                y_test.reset_index(drop=True)))

        print(
            'Akurasi agglomerative dengan complete linkage = ',
            self.get_accuracy(
                self.apply_map_to_cluster(agglomerative_complete_pred,
                                          agglomerative_complete_map),
                y_test.reset_index(drop=True)))

        print(
            'Akurasi agglomerative dengan average linkage = ',
            self.get_accuracy(
                self.apply_map_to_cluster(agglomerative_average_pred,
                                          agglomerative_average_map),
                y_test.reset_index(drop=True)))

        print(
            'Akurasi agglomerative dengan average_group linkage = ',
            self.get_accuracy(
                self.apply_map_to_cluster(agglomerative_average_group_pred,
                                          agglomerative_average_group_map),
                y_test.reset_index(drop=True)))

        model_single = AgglomerativeClustering(3,
                                               linkage="single").fit(X_train)
        model_complete = AgglomerativeClustering(
            3, linkage="complete").fit(X_train)
        model_average = AgglomerativeClustering(3,
                                                linkage="average").fit(X_train)
        model_ward = AgglomerativeClustering(3, linkage="ward").fit(X_train)

        agg_single_map = self.get_mapping_to_label(3, model_single.labels_,
                                                   y_train)
        agg_complete_map = self.get_mapping_to_label(3, model_complete.labels_,
                                                     y_train)
        agg_average_map = self.get_mapping_to_label(3, model_average.labels_,
                                                    y_train)
        agg_ward_map = self.get_mapping_to_label(3, model_ward.labels_,
                                                 y_train)

        print('Akurasi agglomerative sklearn dengan single linkage = %0.3f' %
              metrics.v_measure_score(y_train, model_single.labels_))
        print('Akurasi agglomerative sklearn dengan complete linkage = %0.3f' %
              metrics.v_measure_score(y_train, model_complete.labels_))
        print('Akurasi agglomerative sklearn dengan average linkage = %0.3f' %
              metrics.v_measure_score(y_train, model_average.labels_))
        print('Akurasi agglomerative sklearn dengan ward linkage = %0.3f' %
              metrics.v_measure_score(y_train, model_ward.labels_))
示例#7
0
        subject = self.subject_collection.find_one({"zooniverse_id":subject_id})
        cutout = subject["metadata"]["cutout"]
        width = cutout["width"]
        height = cutout["height"]
        lb_roi = [[0,0],[width,0]]
        ub_roi = [[0,height],[width,height]]

        return lb_roi,ub_roi

project = PlanktonPortal()
# project.__top_users__()
#project.__set_subjects__([u'APK00011p5', u'APK0001bw9', u'APK0001dj4', u'APK00019zu', u'APK00018ri', u'APK0001dxl', u'APK0001ana', u'APK0000ppu', u'APK0000dvx', u'APK0000pyd', u'APK00019ol', u'APK00072zo', u'APK0000h5h', u'APK00001fk', u'APK0000a69', u'APK0000km2', u'APK000175z', u'APK00019yw', u'APK0000e39', u'APK0000kga'])
# project.__random_gold_sample__(max_subjects=50)
project.__gold_sample__(["yshish"],["ElisabethB","Damon22","MingMing","elizabeth","JasonJason","rlb66xyz","planetari7","fermor332002","artman40","Quia"],max_subjects=200)

clustering = Agglomerative(project)
# clustering2 = GoldClustering(project)
classifier = ExpertClassification(project,clustering)
#
for subject_id in project.gold_standard_subjects:
    print subject_id
    clustering.__fit__(subject_id,gold_standard=True)
    print clustering.goldResults[subject_id]
    classifier.__classify__([subject_id],True)
    print
    # project.__store_annotations__(subject_id,max_users=20)
    # clustering.__fit__(subject_id)
#     clustering.__fit__(subject_id,gold_standard=True)
#
# # # clustering.__check__()
# #
示例#8
0
    data = data.dropna(how='any')  # deleted any missing value
    # del data['education']  # deleted categorial education because it's same with education-num
    the_data = data.loc[:, 'age':'hours-per-week']
    label = data.loc[:, 'class']

    encoder = preprocessing.LabelEncoder()
    label = encoder.fit_transform(label)
    # for col in ["workclass", "marital-status", "occupation", "relationship", "race", "sex",
    #            "native-country"]:
    #    the_data[col] = encoder.fit_transform(the_data[col])
    # print(the_data)
    # print(the_data.shape)
    testing_count = 1000
    #model = AgglomerativeClustering(linkage="ward", n_clusters=2)
    #model.fit(the_data.head(n=testing_count))
    testing = label[0:testing_count]

    model2 = Agglomerative(linkage="average")
    model2.fit(the_data.head(n=testing_count))

    print(model2.labels_)

    # print(testing)
    # print(model.labels_)
    count = 0
    for idx, label in enumerate(model2.labels_):
        if label == testing[idx]:
            count += 1
    print("Correct: ", count, " From: ", testing_count)
    print("Accuracy: ", count * 100.0 / testing_count)
示例#9
0
    # ax1.imshow(image)
    # plt.show()


user_buckets = {5: 0.05, 4: 0.25, 3: 0.5, 2: 0.75, 1: 0.95}
alg_threshold = range(100, 250, 10)

results = {t: [] for t in alg_threshold}
user_results = []

with AggregationAPI(11, "development") as whales:
    whales.__setup__()
    # whales.__migrate__()

    rectangle_clustering = BlobClustering("rectangle", whales, {})
    point_clustering = Agglomerative("point", {})

    postgres_cursor = whales.postgres_session.cursor()
    select = "SELECT classification_subjects.subject_id from classifications INNER JOIN classification_subjects ON classification_subjects.classification_id = classifications.id where workflow_id = 84"
    postgres_cursor.execute(select)

    #

    for subject_id in postgres_cursor.fetchall()[:50]:
        subject_id = subject_id[0]
        print subject_id
        # subject_id = 494953

        # T1 - rectangle outline
        # t2 - points
        classifications, markings, _, _ = whales.__sort_annotations__(