-
Notifications
You must be signed in to change notification settings - Fork 0
/
GridSearch.py
72 lines (53 loc) · 2.89 KB
/
GridSearch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
'''
Homework 4
Name : Tailin Lo
NetID : tl1720
N number : N15116873
Email : tl1720@nyu.edu
'''
import FetchFile
import FeatureExtraction
import Classification
import multiprocessing
def grid_search_for_neighbor(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, neighbor_num_seq):
train_X, test_X, train_y, test_y = FetchFile.gen_data(class_num, subsample_size, window_size, rnd_number)
#for neighbor_num in [2**i for i in range(neighbor_log2_num)]:
for neighbor_num in neighbor_num_seq:
Classification.classifiy(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, neighbor_num, train_X, train_y, test_X, test_y)
# end for
# ============================================= end ============================================= #
def grid_search_for_neighbor_multiprocess(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, neighbor_num_seq):
#Classification.classifiy(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, neighbor_num)
train_X, test_X, train_y, test_y = FetchFile.gen_data(class_num, subsample_size, window_size, rnd_number)
jobs = []
#for neighbor_num in [2**i for i in range(neighbor_log2_num)]:
for neighbor_num in neighbor_num_seq:
p = multiprocessing.Process(target=Classification.classifiy, args=(class_num, subsample_size, window_size, cluster_num, \
max_iter, rnd_number, neighbor_num, train_X, train_y, test_X, test_y))
jobs.append(p)
p.start()
# end for
print(jobs)
# ============================================= end ============================================= #
def grid_search_for_cluster(class_num, subsample_size, window_size, max_iter, rnd_number, cluster_num_seq):
for cluster_num in cluster_num_seq:
FeatureExtraction.extract_feature_by_kmeans(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number)
# end for
# ============================================= end ============================================= #
rnd_number = 8131985
class_num = 25
subsample_size = 4
window_size = 3
cluster_num = 16
max_iter = 50
'''
Grid Search by sweeping cluster number and neighbor number
Users can use grid_search_for_neighbor_multiprocess function to parallelly execute jobs (not larger than your CPU size - 1)
Note:
1. grid_search_for_cluster only generates centroids file, and users have to use grid_search_for_neighbor to find the optimal value.
2. a simple unit test by setting class_num = 1, subsample_size = 1, cluster_num = 3
3. before executing a simple unit test, ensuring there are a corresponding centroids file
'''
# [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] or use [2**i for i in range(0,11)]
#grid_search_for_cluster(class_num, subsample_size, window_size, max_iter, rnd_number, [32, 64, 128, 256, 512, 1024])
grid_search_for_neighbor(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, [16, 32, 64, 128, 256])