forked from andresantonioriveros/pyRF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
forest_test.py
63 lines (51 loc) · 2.37 KB
/
forest_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# coding=utf-8
# Voy a recorrer uno a uno los sets sampleados, entrenar un arbol de decision normal y un RF,
# y a comparar el f_score de cada uno sobre ellos. En teoría el rf deberia ser mejor que
# el arbol de decision en la mayoría de los casos
# -------------------------------------------------------------------------------------------------
from multiprocessing import Pool
from functools import partial
import argparse
import sys
import metrics
import parallel
if __name__ == '__main__':
print ' '.join(sys.argv)
parser = argparse.ArgumentParser()
parser.add_argument('--n_processes', required=True, type=int)
parser.add_argument('--catalog', default='MACHO', choices=['MACHO', 'EROS', 'OGLE'])
parser.add_argument('--folds', required=True, type=int)
parser.add_argument('--sets_path', required=True, type=str)
parser.add_argument('--result_path', required=True, type=str)
parser.add_argument('--feature_filter', nargs='*', type=str)
args = parser.parse_args(sys.argv[1:])
catalog = args.catalog
n_processes = args.n_processes
folds = args.folds
sets_path = args.sets_path
result_path = args.result_path
feature_filter = args.feature_filter
paths = [sets_path + catalog + '_sampled_' + str(i) + '.csv' for i in xrange(100)]
paths = paths[0:10]
# Entreno y clasifico con árboles
partial_fit = partial(parallel.fit_tree, feature_filter=feature_filter, folds=folds)
pool = Pool(processes=n_processes, maxtasksperchild=2)
resultados_tree = pool.map(partial_fit, paths)
pool.close()
pool.join()
# Imprimo y guardo resultados obtenidos
for i, r in enumerate(resultados_tree):
r.to_csv(result_path + 'result_tree_' + str(i) + '.csv')
matrix = metrics.hard_matrix(r)
print 'Tree ' + str(i) + ' f_score: ' + str(metrics.weighted_f_score(matrix))
# Entreno y clasifico con rf
partial_fit = partial(parallel.fit_rf, feature_filter=feature_filter, folds=folds)
pool = Pool(processes=n_processes, maxtasksperchild=2)
resultados_rf = pool.map(partial_fit, paths)
pool.close()
pool.join()
# Imprimo y guardo resultados obtenidos
for i, r in enumerate(resultados_rf):
r.to_csv(result_path + 'result_rf_' + str(i) + '.csv')
matrix = metrics.hard_matrix(r)
print 'RF ' + str(i) + ' f_score: ' + str(metrics.weighted_f_score(matrix))