def train(mode, inmode, model, sample_dict, iter, mini_batch_size=1000): '''bug bug bug''' epoches = 32 threshold = 8.88 show_step = 8 for batch, (index, features, labels) in enumerate( tcdl.iter_mini_batches(mode, sample_dict, mini_batch_size=mini_batch_size)): model.eta = model.eta * 0.99 list_rmse = epoches * [threshold * epoches] indicator = threshold * epoches epoch = 0 while indicator >= threshold: sample_index = np.random.choice(features.shape[0], mini_batch_size) batch_xs = features[sample_index, :] batch_ys = labels[sample_index, :] rmse = model.partial_fit(batch_xs, batch_ys) list_rmse[epoch % epoches] = rmse if (epoch + 1) % show_step == 0: indicator = sum(list_rmse) / len(list_rmse) print('mode: %s, inmode: %s, iter: %d, batch: %d, epoch: %d, eta: %f, rmse: %f, indicator: %f'% \ (mode, inmode, iter + 1, batch + 1, epoch + 1, model.eta, rmse, indicator)) epoch += 1 '''bug bug bug''' if epoch > 1024: break '''bug bug bug'''
def main(mode, sample_list, mini_batch_size=500): # halo = [0.0, 21.0, 35.0, 99.9] # helo = [3002, 3201, 3212, 3078] for batch, (idx, attrs, labels) in enumerate( tcdl.iter_mini_batches(mode, sample_list, mini_batch_size=mini_batch_size)): x = np.reshape(attrs, (-1, 15, 4, 101, 101)) for k in range(np.shape(x)[0]): print('mode: %s, batch: %d, idx: %d' % (mode, batch + 1, idx[k])) pathlist = [] for i in range(15): h_m = np.zeros((0, 101 * 2)) for j in range(2): v_m = np.zeros((101, 0)) for l in range(2): v_m = np.hstack((v_m, x[k, i, 2 * j + l, :, :])) h_m = np.vstack((h_m, v_m)) img = Image.fromarray(h_m.astype(np.uint8)) path = '../vid/%s_%d_%.2f_%d.jpg' % (mode, idx[k], labels[k, 0], i + 1) img.save(path) pathlist += [path] outpath = '../vid/%s_%d_%.2f.avi' % (mode, idx[k], labels[k, 0]) img_to_vid(pathlist, outpath)
def valid(mode, inmode, model, sample_dict, mini_batch_size=100): for batch, (index, features, labels) in enumerate( tcdl.iter_mini_batches(mode, sample_dict, mini_batch_size=mini_batch_size)): pre_labs = model.predict(features).reshape(-1, 1) rmse = np.sqrt(mean_squared_error(labels, pre_labs)) ys = np.hstack((index, labels, pre_labs)) print(mode, inmode, rmse) return ys
def multi_cosine_similarity(attrsX, mode, split_list): count = 0 sum_dist = 0 for attrsA, _ in tcdl.iter_mini_batches(mode, split_list, mini_batch_size=100): count += attrsA.shape[0] print("shape: %d", attrsA.shape[0]) sum_dist += (scidist.cosine_similarity(attrsX, attrsA)).sum() '''''' # break return sum_dist / float(count)
def calc_centroid(mode, split_list_train): idx = 0 count = 0 sumarr = np.zeros((1, 15 * 4 * 101 * 101), dtype=np.float) for _, attrsA, labelsA in tcdl.iter_mini_batches(mode, split_list_train, mini_batch_size=500): print('haha') count += attrsA.shape[0] sumarr += attrsA.sum(axis=0) idx += 1 print('calc_centroid: %d, count: %d' % (idx, count)) print(sumarr) '''''' # break return sumarr / float(count)
def valid(mode, inmode, crlnn, sample_dict, iter, memo = ''): ori_labels, pre_labels = np.zeros((0, 1)), np.zeros((0, 1)) '''''' for batch, (index, features, labels) in enumerate(tcdl.iter_mini_batches(mode, sample_dict, mini_batch_size=50)): _rmse_, _labels_ = crlnn.predict(features, labels) pre_labels = np.concatenate((pre_labels, _labels_), axis=0) ori_labels = np.concatenate((ori_labels, labels), axis=0) print('mode: %s, inmode: %s, iter: %d, batch: %d, epoch: %d, eta: %f, rmse: %f, indicator: %f\n'% \ (mode, inmode, iter + 1, batch + 1, 1, crlnn.eta, _rmse_, 14.68)) '''''' if memo == 'train': break else: break return ori_labels, pre_labels
def cnn2d_valid(mode, cnn, sample_dict, iter): batch = 0 ori_labels, pre_labels = np.zeros((0, 1)), np.zeros((0, 1)) '''''' for index, features, labels in tcdl.iter_mini_batches(mode, sample_dict[mode], mini_batch_size=100): _features = tcdpsm.tranform_feature(features, 15, shape=(-1, n_input)) _labels = tcdpsm.duplicate_label(labels, 15) _cost_, _labels_ = cnn.predict(_features, _labels) _labels_ = np.reshape(np.reshape(_labels_, (-1, 15)).mean(axis=1), (-1, 1)) pre_labels = np.concatenate((pre_labels, _labels_), axis=0) ori_labels = np.concatenate((ori_labels, labels), axis=0) print('iter: %d, batch: %d, epoch: %d, rmse: %f'%(iter + 1, batch + 1, 1, np.sqrt(_cost_))) batch += 1 '''''' # break return ori_labels, pre_labels
def main(mode, sample_list, mini_batch_size = 500): # halo = [0.0, 21.0, 35.0, 99.9] # helo = [3002, 3201, 3212, 3078] for batch, (idx, attrs, labels) in enumerate(tcdl.iter_mini_batches(mode, sample_list, mini_batch_size=mini_batch_size)): x = np.reshape(attrs, (-1, 15, 4, 101, 101)) x = np.transpose(x, (0,2,1,3,4)) for k in range(np.shape(x)[0]): print('mode: %s, batch: %d, idx: %d'%(mode, batch+1, idx[k])) for i in range(4): pathlist = [] for l in range(15): t_m = x[k,i,l,:,:] img = Image.fromarray(t_m.astype(np.uint8)) path = '../vid/%s_%d_%.2f_h%d_t%d.jpg'%(mode, idx[k],labels[k,0],i+1,l+1) img.save(path) pathlist += [path] outpath = '../vid/%s_%d_%.2f_h%d.avi' % (mode, idx[k],labels[k,0],i+1) img_to_vid(pathlist, outpath)
def cnn2d_train(mode, cnn, sample_dict, iter): batch = 0 '''''' for index, features, labels in tcdl.iter_mini_batches(mode, sample_dict[mode], mini_batch_size=100): '''''' for j in range(32): sample_index = np.random.choice(features.shape[0], 100) batch_xs = features[sample_index, :] batch_ys = labels[sample_index, :] new_features = tcdpsm.tranform_feature(batch_xs, 15, shape=(-1, n_input)) new_labels = tcdpsm.duplicate_label(batch_ys, 15) rmse = np.sqrt(cnn.partial_fit(new_features, new_labels)) print('iter: %d, batch: %d, epoch: %d, rmse: %f'%(iter + 1, batch + 1, j + 1, rmse)) '''''' # break batch += 1 '''''' # break cnn.saveModel(iter + 1)
def split_valid_data_batch_mode(mode, samplelist, centroid_lt_20, centroid_ge_20): lt_20_list, ge_20_list, similarlist = [], [], [] index, labels, simcos, disecl, dismht = [], np.zeros((0, 1)), np.zeros( (0, 2)), np.zeros((0, 2)), np.zeros((0, 2)) centroids = np.concatenate((centroid_lt_20, centroid_ge_20), axis=0) for batch, (idx, attrsA, labelsA) in enumerate( tcdl.iter_mini_batches(mode, samplelist, mini_batch_size=500)): index += idx labels = np.concatenate((labels, labelsA), axis=0) # simcos = np.concatenate((simcos, scidist.cosine_similarity(attrsA, centroids)), axis = 0) # disecl = np.concatenate((disecl, scidist.euclidean_distances(attrsA, centroids)), axis=0) dismht = np.concatenate( (dismht, scidist.manhattan_distances(attrsA, centroids)), axis=0) print("split_valid_data_batch_mode: I'm alive! batch: %d" % (batch + 1)) '''''' # break # simsum = simcos/np.tile(simcos.sum(axis=1).reshape((-1,1)), (1, 2)) # dissum = 1 - disecl/np.tile(disecl.sum(axis=1).reshape((-1,1)), (1, 2)) dismhtsum = 1 - dismht / np.tile( dismht.sum(axis=1).reshape((-1, 1)), (1, 2)) # weight = (simsum + dissum)/2 weight = dismhtsum for i in range(len(index)): if weight[i, 0] < weight[i, 1]: ge_20_list.append(labels[i, 0]) else: lt_20_list.append(labels[i, 0]) print([mode, index[i], labels[i, 0], weight[i, 0], weight[i, 1]]) similarlist.append( [mode, index[i], labels[i, 0], weight[i, 0], weight[i, 1]]) print('split_valid_data_batch_mode for %s: %d, %d' % (mode, len(lt_20_list), len(ge_20_list))) return lt_20_list, ge_20_list, similarlist
# classifier testing def valid(self, trainX, trainY): H = self.activCalc(trainX) output = numpy.dot(H, self.W) assert numpy.shape(output) == (numpy.shape(trainX)[0], self.outdim) rmse = numpy.sqrt(sklmse(output, trainY)) return output, rmse rbfnn = RBFNN(numCenter=512, numClass=8) sample_dict = tcdp.random_select_samples() mode = 'train' for _, (index, features, labels) in enumerate( tcdl.iter_mini_batches(mode, sample_dict[mode], mini_batch_size=8000)): y, rmse = rbfnn.train(features, labels) # np.savetxt('ys_train_lsq.txt', np.hstack((index, labels, y))) print(y.tolist()[-6:], '\n', labels.tolist()[-6:], '\n', rmse) break mode = 'valid' for _, (index, features, labels) in enumerate( tcdl.iter_mini_batches('train', sample_dict[mode], mini_batch_size=2000)): y, rmse = rbfnn.valid(features, labels) numpy.savetxt('ys_valid_lsq.txt', numpy.hstack((index, labels, y))) print(y.tolist()[-6:], '\n', labels.tolist()[-6:], '\n', rmse) break