def feature_re_extract():
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    m = ModelInterface.load('model/model.out')
    
    # construct train set
    train_set = []
    up_bound = []
    lower_bound = []
    for c in test_class:
    		for i in m.features[c]:
    				train_set.append(i)
    
    # put all values into -1~1
    up_bound = []
    lower_bound = []
    for j in xrange(len(train_set[0])):
    		up_bound.append(train_set[0][j])
    		lower_bound.append(train_set[0][j])
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				up_bound[j] = max(up_bound[j], train_set[i][j])
    				lower_bound[j] = min(lower_bound[j], train_set[i][j])
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1
    
    # construct stacked autoencoder
    sda = mSdA(
    		layers = [39, 100]
    )
    sda.setMinMax(up_bound, lower_bound)
    sda.train(train_set, 500) # use 500 as the batch size
    for c in test_class:
    		m.features[c] = sda.get_hidden_values(m.features[c])
    m.train()
    m.dump('model/model_sda.out')
    sda.dump('model/sda.out')
Пример #2
0
data = np.arange(total_size)
data = data.reshape(num_rows, num_cols) # 2D Matrix of data points
data = data.astype('float64')

max_val = []
min_val = []
for j in xrange(num_cols):
		max_val.append(my_data[0][j])
		min_val.append(my_data[0][j])
for i in xrange(num_rows):
		for j in xrange(num_cols):
				max_val[j] = max(max_val[j], my_data[i][j])
				min_val[j] = min(min_val[j], my_data[i][j])

#Read through data file, assume label is in last col
#Split data in terms of 70% train, 10% val, 20% test
for i in xrange(num_rows):
		for j in xrange(num_cols):
				data[i][j] = (my_data[i][j]-min_val[j])/(max_val[j]-min_val[j])
				
sda = mSdA(
		layers = [3, 5, 2]
)
sda.train(data, 10)
feat = sda.get_hidden_values(data)

writer = csv.writer(open("feat.csv", 'w'))
for row in feat:
		writer.writerow(row)