def feature_re_extract(): test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] m = ModelInterface.load('model/model.out') # construct train set train_set = [] up_bound = [] lower_bound = [] for c in test_class: for i in m.features[c]: train_set.append(i) # put all values into -1~1 up_bound = [] lower_bound = [] for j in xrange(len(train_set[0])): up_bound.append(train_set[0][j]) lower_bound.append(train_set[0][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): up_bound[j] = max(up_bound[j], train_set[i][j]) lower_bound[j] = min(lower_bound[j], train_set[i][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1 # construct stacked autoencoder sda = mSdA( layers = [39, 100] ) sda.setMinMax(up_bound, lower_bound) sda.train(train_set, 500) # use 500 as the batch size for c in test_class: m.features[c] = sda.get_hidden_values(m.features[c]) m.train() m.dump('model/model_sda.out') sda.dump('model/sda.out')
data = np.arange(total_size) data = data.reshape(num_rows, num_cols) # 2D Matrix of data points data = data.astype('float64') max_val = [] min_val = [] for j in xrange(num_cols): max_val.append(my_data[0][j]) min_val.append(my_data[0][j]) for i in xrange(num_rows): for j in xrange(num_cols): max_val[j] = max(max_val[j], my_data[i][j]) min_val[j] = min(min_val[j], my_data[i][j]) #Read through data file, assume label is in last col #Split data in terms of 70% train, 10% val, 20% test for i in xrange(num_rows): for j in xrange(num_cols): data[i][j] = (my_data[i][j]-min_val[j])/(max_val[j]-min_val[j]) sda = mSdA( layers = [3, 5, 2] ) sda.train(data, 10) feat = sda.get_hidden_values(data) writer = csv.writer(open("feat.csv", 'w')) for row in feat: writer.writerow(row)