def load(path, net): f = open(path, 'rb') d = cPickle.load(f) f.close() for ld in d['layers']: l = net.layers[ld['ind']] l.weight = data_loader.copy_to_gpu(ld['weight'].copy()) l.bias = data_loader.copy_to_gpu(ld['bias'].copy()) net.stat = d['stat']
def load_net(net, path): d = scipy.io.loadmat(path) net.layers[1].weight = data_loader.copy_to_gpu(d['w1'].copy()) net.layers[5].weight = data_loader.copy_to_gpu(d['w2'].copy()) net.layers[9].weight = data_loader.copy_to_gpu(d['w3'].copy()) net.layers[12].weight = data_loader.copy_to_gpu(d['w4'].copy()) net.layers[1].bias = data_loader.copy_to_gpu(d['b1'].copy()) net.layers[5].bias = data_loader.copy_to_gpu(d['b2'].copy()) net.layers[9].bias = data_loader.copy_to_gpu(d['b3'].copy()) net.layers[12].bias = data_loader.copy_to_gpu(d['b4'].copy())
def train_single(net, num_epoch, train_batches, test_batches): l = net.layers[-2] M = l.weight.shape[0] w_test = np.eye(11) w_test = data_loader.copy_to_gpu(w_test) if hasattr(net, 'stat') == False: net.stat = dict() for epoch in range(num_epoch): train_cases = 0 train_cost = 0 train_correct = 0 train_error = 0 test_cases = 0 test_cost = 0 test_correct = 0 test_error = 0 N = len(train_batches) order = range(N) np.random.shuffle(order) for i in range(N): batch = train_batches[order[i]] net.train_batch(batch.data, batch.labels, TRAIN) cost, correct, num_case = net.get_batch_information() train_cases += num_case train_correct += correct * num_case train_cost += cost * num_case if l.epsW > 0: normalize_conf_matrix(net) for batch in test_batches: w_noisy = l.weight l.weight = w_test net.train_batch(batch.data, batch.labels, TEST) l.weight = w_noisy cost, correct, num_case = net.get_batch_information() test_cases += num_case test_correct += correct * num_case test_cost += cost * num_case if train_cases > 0: train_error = (1. - 1.0*train_correct/train_cases) train_cost = (1.0*train_cost/train_cases) if test_cases > 0: test_error = (1. - 1.0*test_correct/test_cases) test_cost = (1.0*test_cost/test_cases) print '%d %0.3f %0.3f %0.3f %0.3f' % (epoch, train_cost, train_error, \ test_cost, test_error) if net.stat.has_key('test-error') == False: net.stat['test-error'] = list() net.stat['test-error'].append(test_error)
def train_single(net, num_epoch, train_batches, test_batches): l = net.layers[-2] M = l.weight.shape[0] w_test = np.eye(11) w_test = data_loader.copy_to_gpu(w_test) if hasattr(net, 'stat') == False: net.stat = dict() for epoch in range(num_epoch): train_cases = 0 train_cost = 0 train_correct = 0 train_error = 0 test_cases = 0 test_cost = 0 test_correct = 0 test_error = 0 N = len(train_batches) order = range(N) np.random.shuffle(order) for i in range(N): batch = train_batches[order[i]] net.train_batch(batch.data, batch.labels, TRAIN) cost, correct, num_case = net.get_batch_information() train_cases += num_case train_correct += correct * num_case train_cost += cost * num_case if l.epsW > 0: normalize_conf_matrix(net) for batch in test_batches: w_noisy = l.weight l.weight = w_test net.train_batch(batch.data, batch.labels, TEST) l.weight = w_noisy cost, correct, num_case = net.get_batch_information() test_cases += num_case test_correct += correct * num_case test_cost += cost * num_case if train_cases > 0: train_error = (1. - 1.0 * train_correct / train_cases) train_cost = (1.0 * train_cost / train_cases) if test_cases > 0: test_error = (1. - 1.0 * test_correct / test_cases) test_cost = (1.0 * test_cost / test_cases) print '%d %0.3f %0.3f %0.3f %0.3f' % (epoch, train_cost, train_error, \ test_cost, test_error) if net.stat.has_key('test-error') == False: net.stat['test-error'] = list() net.stat['test-error'].append(test_error)
def train(net, num_epoch, train_batches, val_batches, test_batches = []): if hasattr(net, 'stat') == False: net.stat = dict() net.stat['epoch'] = list() net.stat['epsW'] = list() net.stat['train'] = dict() net.stat['test'] = dict() net.stat['val'] = dict() fstat = open(net.output_dir + 'stat.txt','aw') # disable noise model during testing if hasattr(net.layers[-2],'weight'): M = net.layers[-2].weight.shape[0] w_test = np.eye(M) if M == 11: w_test[:10,10] = 0.1 w_test[10,10] = 0 w_test = data_loader.copy_to_gpu(w_test) for n in range(1, num_epoch + 1): epoch = len(net.stat['epoch']) + 1 train_data(net, train_batches, net.stat['train'], TRAIN) if hasattr(net.layers[-2],'weight') == False: train_data(net, val_batches, net.stat['val'], TEST) train_data(net, test_batches, net.stat['test'], TEST) else: w_noisy = net.layers[-2].weight net.layers[-2].weight = w_test train_data(net, val_batches, net.stat['val'], TEST) train_data(net, test_batches, net.stat['test'], TEST) net.layers[-2].weight = w_noisy net.stat['epoch'].append(epoch) net.stat['epsW'].append(net.layers[1].epsW) show_stat(net) msg = '%d %0.2e %0.3f %0.3f %0.3f' % (epoch, net.stat['epsW'][-1], net.stat['train']['error'][-1], net.stat['val']['error'][-1], net.stat['test']['error'][-1]) print msg fstat.write(msg + '\n') if epoch % 10 == 0: net_checkpoint.save(net, net.output_dir + 'model_' + str(epoch)) fstat.close()
def train(net, num_epoch, train_batches, val_batches, test_batches=[]): if hasattr(net, 'stat') == False: net.stat = dict() net.stat['epoch'] = list() net.stat['epsW'] = list() net.stat['train'] = dict() net.stat['test'] = dict() net.stat['val'] = dict() fstat = open(net.output_dir + 'stat.txt', 'aw') # disable noise model during testing if hasattr(net.layers[-2], 'weight'): M = net.layers[-2].weight.shape[0] w_test = np.eye(M) if M == 11: w_test[:10, 10] = 0.1 w_test[10, 10] = 0 w_test = data_loader.copy_to_gpu(w_test) for n in range(1, num_epoch + 1): epoch = len(net.stat['epoch']) + 1 train_data(net, train_batches, net.stat['train'], TRAIN) if hasattr(net.layers[-2], 'weight') == False: train_data(net, val_batches, net.stat['val'], TEST) train_data(net, test_batches, net.stat['test'], TEST) else: w_noisy = net.layers[-2].weight net.layers[-2].weight = w_test train_data(net, val_batches, net.stat['val'], TEST) train_data(net, test_batches, net.stat['test'], TEST) net.layers[-2].weight = w_noisy net.stat['epoch'].append(epoch) net.stat['epsW'].append(net.layers[1].epsW) show_stat(net) msg = '%d %0.2e %0.3f %0.3f %0.3f' % ( epoch, net.stat['epsW'][-1], net.stat['train']['error'][-1], net.stat['val']['error'][-1], net.stat['test']['error'][-1]) print msg fstat.write(msg + '\n') if epoch % 10 == 0: net_checkpoint.save(net, net.output_dir + 'model_' + str(epoch)) fstat.close()
# shuffle data order = range(pure_sz + back_sz + noise_sz) np.random.shuffle(order) train_data = train_data[:,order] train_labels = train_labels[order] train_batches = data_loader.prepare_batches(train_data, train_labels, batch_size) test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size) print '# train:', train_data.shape[1], 'samples', len(train_batches), 'batches' print '# test:', test_data.shape[1], 'samples', len(test_batches), 'batches' s = np.zeros((22,11)) s[:11,:11] = np.eye(11) s[11:,:11] = np.eye(11) net.layers[-2].weight = data_loader.copy_to_gpu(s) w = np.eye(22) w[11:21,11:21] = 0.4 * np.eye(10) + 0.06 net.W_denoise = data_loader.copy_to_gpu(w) net.label_tmp = data_loader.copy_to_gpu(np.zeros((22,128))) net.eps1 = 0.01 net.eps2 = float(sys.argv[4]) net.adjust_learning_rate(3) net_trainer_noisy.train_mixed(net, 200, train_batches, test_batches) net.adjust_learning_rate(0.1) net_trainer_noisy.train_mixed(net, 10, train_batches, test_batches) net.adjust_learning_rate(0.1) net_trainer_noisy.train_mixed(net, 10, train_batches, test_batches) save_net_cifar10.save_net(net, 'results/weights-cifar10-pure10k-noisy50k-TD-tc' + str(sys.argv[4]))
batch_size = 128 param_file = '/home/sainbar/fastnet-confussion-layer/config/cifar-10-18pct-confussion.cfg' #param_file = '/home/sainbar/fastnet/config/cifar-10-18pct-big3.cfg' num_epoch = 60 learning_rate = 1 image_color = 3 image_size = 32 image_shape = (image_color, image_size, image_size, batch_size) init_model = parser.parse_config_file(param_file) net = fastnet.net.FastNet(learning_rate, image_shape, init_model) # confussion matrix l = net.layers[-2]; w = np.eye(10) * 0.8 w[0,:] = w[0,:] + 0.2 l.weight = data_loader.copy_to_gpu(w) # prepare data train_data, train_labels, test_data, test_labels = data_loader.load_cifar10() data_mean = train_data.mean(axis=1,keepdims=True) train_data = train_data - data_mean test_data = test_data - data_mean # flip labels N = 50000 p = 0.2 for i in range(N): if np.random.rand() < p: train_labels[i] = 0 order = range(train_data.shape[1])
def train(net, num_epoch, train_batches, noisy_batches, test_batches, lrate_beta): l = net.layers[-2] M = l.weight.shape[0] assert M == l.weight.shape[1] w_pure = data_loader.copy_to_gpu(np.eye(M)) w_test = np.eye(M) if M == 11: w_test[:10,10] = 0.1 w_test[10,10] = 0 w_test = data_loader.copy_to_gpu(w_test) if hasattr(net, 'stat') == False: net.stat = dict() for epoch in range(num_epoch): train_cases = 0 train_cost = 0 train_correct = 0 train_error = 0 test_cases = 0 test_cost = 0 test_correct = 0 test_error = 0 noisy_cases = 0 noisy_cost = 0 noisy_correct = 0 noisy_error = 0 N = len(train_batches) + len(noisy_batches) order = range(N) np.random.shuffle(order) for i in range(N): if order[i] < len(train_batches): batch = train_batches[order[i]] w_noisy = l.weight l.weight = w_pure epsW_noisy = l.epsW l.epsW = 0 net.train_batch(batch.data, batch.labels, TRAIN) l.weight = w_noisy l.epsW = epsW_noisy cost, correct, num_case = net.get_batch_information() train_cases += num_case train_correct += correct * num_case train_cost += cost * num_case else: batch = noisy_batches[order[i] - len(train_batches)] net.adjust_learning_rate(lrate_beta) net.train_batch(batch.data, batch.labels, TRAIN) net.adjust_learning_rate(1./lrate_beta) if l.epsW > 0: normalize_conf_matrix(net) cost, correct, num_case = net.get_batch_information() noisy_cases += num_case noisy_correct += correct * num_case noisy_cost += cost * num_case for batch in test_batches: w_noisy = l.weight l.weight = w_test net.train_batch(batch.data, batch.labels, TEST) l.weight = w_noisy cost, correct, num_case = net.get_batch_information() test_cases += num_case test_correct += correct * num_case test_cost += cost * num_case if train_cases > 0: train_error = (1. - 1.0*train_correct/train_cases) train_cost = (1.0*train_cost/train_cases) if noisy_cases > 0: noisy_error = (1. - 1.0*noisy_correct/noisy_cases) noisy_cost = (1.0*noisy_cost/noisy_cases) if test_cases > 0: test_error = (1. - 1.0*test_correct/test_cases) test_cost = (1.0*test_cost/test_cases) print '%d %0.3f %0.3f %0.3f %0.3f %0.3f %0.3f' % (epoch, train_cost, train_error, \ noisy_cost, noisy_error, test_cost, test_error) if net.stat.has_key('test-error') == False: net.stat['test-error'] = list() net.stat['test-error'].append(test_error)
def load_net(net, path): d = scipy.io.loadmat(path) net.layers[1].weight = data_loader.copy_to_gpu(d['w1'].copy()) net.layers[5].weight = data_loader.copy_to_gpu(d['w2'].copy()) net.layers[9].weight = data_loader.copy_to_gpu(d['w3'].copy()) net.layers[12].weight = data_loader.copy_to_gpu(d['w4'].copy()) net.layers[1].bias = data_loader.copy_to_gpu(d['b1'].copy()) net.layers[5].bias = data_loader.copy_to_gpu(d['b2'].copy()) net.layers[9].bias = data_loader.copy_to_gpu(d['b3'].copy()) net.layers[12].bias = data_loader.copy_to_gpu(d['b4'].copy()) if 'conf-w' in d and len(net.layers) == 16: net.layers[14].weight = data_loader.copy_to_gpu(d['conf-w'].copy()) if 'confw' in d and len(net.layers) == 16: net.layers[14].weight = data_loader.copy_to_gpu(d['confw'].copy()) if 'W_denoise' in d: net.W_denoise = data_loader.copy_to_gpu(d['W_denoise'].copy())
import data_loader import confusion_matrix import save_net_cifar10 pure_sz = int(sys.argv[1]) # setting batch_size = 128 param_file = '/home/sainbar/fastnet-confussion-layer/config/cifar-10-18pct-confussion10.cfg' learning_rate = 1 image_color = 3 image_size = 32 image_shape = (image_color, image_size, image_size, batch_size) init_model = parser.parse_config_file(param_file) net = fastnet.net.FastNet(learning_rate, image_shape, init_model) net.layers[-2].weight = data_loader.copy_to_gpu(np.eye(10)) # prepare data train_data, train_labels, test_data, test_labels = data_loader.load_cifar10() data_mean = train_data.mean(axis=1,keepdims=True) train_data = train_data - data_mean test_data = test_data - data_mean # add noise to label W = np.load('mixing-matrix-' + sys.argv[2] + '.npy') train_labels_noisy = confusion_matrix.mix_labels(W, train_labels) train_batches = data_loader.prepare_batches(train_data[:,:pure_sz], train_labels_noisy[:pure_sz], batch_size) train_batches2 = data_loader.prepare_batches(train_data[:,:pure_sz], train_labels[:pure_sz], batch_size) test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size)
def update_confusion_matrix(net, noisy_batches, y, wc): w = data_loader.copy_to_cpu(net.layers[-2].weight) x = get_net_output(net, noisy_batches) w = train(w, x, y, 1, wc, 50) w = train(w, x, y, 0.1, wc, 10) net.layers[-2].weight = data_loader.copy_to_gpu(w)
import data_loader import confusion_matrix import save_net_cifar10 pure_sz = int(sys.argv[1]) # setting batch_size = 128 param_file = '/home/sainbar/fastnet-confussion-layer/config/svhn.cfg' learning_rate = 1 image_color = 3 image_size = 32 image_shape = (image_color, image_size, image_size, batch_size) init_model = parser.parse_config_file(param_file) net = fastnet.net.FastNet(learning_rate, image_shape, init_model) net.layers[-2].weight = data_loader.copy_to_gpu(np.eye(10)) # prepare data train_data, train_labels, test_data, test_labels = data_loader.load_svhn100k() train_data = train_data * 100 test_data = test_data * 100 data_mean = train_data.mean(axis=1,keepdims=True) train_data = train_data - data_mean test_data = test_data - data_mean # add noise to label W = np.load('mixing-matrix-' + sys.argv[2] + '.npy') train_labels_noisy = confusion_matrix.mix_labels(W, train_labels) train_batches = data_loader.prepare_batches(train_data[:,:pure_sz], train_labels_noisy[:pure_sz], batch_size) test_batches = data_loader.prepare_batches(test_data[:,:10000], test_labels[:10000], batch_size)
order = range(train_data.shape[1]) np.random.shuffle(order) train_data = train_data[:,order] train_labels = train_labels[order] train_batches = data_loader.prepare_batches(train_data, train_labels, batch_size) val_batches = data_loader.prepare_batches(val_data, val_labels, batch_size) test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size) print '# train:', train_data.shape[1], 'samples', len(train_batches), 'batches' print '# val:', val_data.shape[1], 'samples', len(val_batches), 'batches' print '# test:', test_data.shape[1], 'samples', len(test_batches), 'batches' # confussion matrix w = np.eye(10) * alpha + (1 - alpha) / 10.0 net.layers[-2].weight = data_loader.copy_to_gpu(w) if net_checkpoint.try_load(net) == False: # net.adjust_learning_rate(2) # net.adjust_learning_rate(1. + (1-alpha)/alpha/10) # net_trainer.train(net, 300, train_batches, val_batches, test_batches) # net.adjust_learning_rate(0.1) # net_trainer.train(net, 10, train_batches, val_batches, test_batches) # net.adjust_learning_rate(0.1) # net_trainer.train(net, 10, train_batches, val_batches, test_batches) net.adjust_learning_rate(1. + (1-alpha)/alpha/10) net.adjust_learning_rate(3) net_trainer.train(net, 50, train_batches, val_batches, test_batches) for i in range(10): net_trainer.train(net, 20, train_batches, val_batches, test_batches)
net = fastnet.net.FastNet(learning_rate, image_shape, init_model) # prepare data train_data, train_labels, test_data, test_labels = data_loader.load_cifar10() data_mean = train_data.mean(axis=1, keepdims=True) train_data = train_data - data_mean test_data = test_data - data_mean # noisy data noisy_data, noisy_labels = data_loader.load_noisy_labeled() noisy_data = noisy_data - data_mean # confussion matrix alpha = float(sys.argv[3]) w = np.eye(10) * alpha + (np.ones([10, 10]) - np.eye(10)) * (1.0 - alpha) / 9.0 net.layers[-2].weight = data_loader.copy_to_gpu(w) beta = float(sys.argv[4]) w2 = np.eye(10) * beta + (np.ones([10, 10]) - np.eye(10)) * (1.0 - beta) / 9.0 # shuffle data order = range(pure_sz) np.random.shuffle(order) train_data = train_data[:, order] train_labels = train_labels[order] order = range(noise_sz) np.random.shuffle(order) noisy_data = noisy_data[:, order] noisy_labels = noisy_labels[order] train_batches = data_loader.prepare_batches(train_data, train_labels,
def train(net, num_epoch, train_batches, noisy_batches, test_batches, lrate_beta): l = net.layers[-2] M = l.weight.shape[0] assert M == l.weight.shape[1] w_pure = data_loader.copy_to_gpu(np.eye(M)) w_test = np.eye(M) if M == 11: w_test[:10, 10] = 0.1 w_test[10, 10] = 0 w_test = data_loader.copy_to_gpu(w_test) if hasattr(net, 'stat') == False: net.stat = dict() for epoch in range(num_epoch): train_cases = 0 train_cost = 0 train_correct = 0 train_error = 0 test_cases = 0 test_cost = 0 test_correct = 0 test_error = 0 noisy_cases = 0 noisy_cost = 0 noisy_correct = 0 noisy_error = 0 N = len(train_batches) + len(noisy_batches) order = range(N) np.random.shuffle(order) for i in range(N): if order[i] < len(train_batches): batch = train_batches[order[i]] w_noisy = l.weight l.weight = w_pure epsW_noisy = l.epsW l.epsW = 0 net.train_batch(batch.data, batch.labels, TRAIN) l.weight = w_noisy l.epsW = epsW_noisy cost, correct, num_case = net.get_batch_information() train_cases += num_case train_correct += correct * num_case train_cost += cost * num_case else: batch = noisy_batches[order[i] - len(train_batches)] net.adjust_learning_rate(lrate_beta) net.train_batch(batch.data, batch.labels, TRAIN) net.adjust_learning_rate(1. / lrate_beta) if l.epsW > 0: normalize_conf_matrix(net) cost, correct, num_case = net.get_batch_information() noisy_cases += num_case noisy_correct += correct * num_case noisy_cost += cost * num_case for batch in test_batches: w_noisy = l.weight l.weight = w_test net.train_batch(batch.data, batch.labels, TEST) l.weight = w_noisy cost, correct, num_case = net.get_batch_information() test_cases += num_case test_correct += correct * num_case test_cost += cost * num_case if train_cases > 0: train_error = (1. - 1.0 * train_correct / train_cases) train_cost = (1.0 * train_cost / train_cases) if noisy_cases > 0: noisy_error = (1. - 1.0 * noisy_correct / noisy_cases) noisy_cost = (1.0 * noisy_cost / noisy_cases) if test_cases > 0: test_error = (1. - 1.0 * test_correct / test_cases) test_cost = (1.0 * test_cost / test_cases) print '%d %0.3f %0.3f %0.3f %0.3f %0.3f %0.3f' % (epoch, train_cost, train_error, \ noisy_cost, noisy_error, test_cost, test_error) if net.stat.has_key('test-error') == False: net.stat['test-error'] = list() net.stat['test-error'].append(test_error)
order = range(pure_sz + back_sz + noise_sz) np.random.shuffle(order) train_data = train_data[:, order] train_labels = train_labels[order] train_batches = data_loader.prepare_batches(train_data, train_labels, batch_size) test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size) print '# train:', train_data.shape[1], 'samples', len(train_batches), 'batches' print '# test:', test_data.shape[1], 'samples', len(test_batches), 'batches' s = np.zeros((22, 11)) s[:11, :11] = np.eye(11) s[11:, :11] = np.eye(11) net.layers[-2].weight = data_loader.copy_to_gpu(s) w = np.eye(22) w[11:21, 11:21] = 0.5 * np.eye(10) + (np.ones( (10, 10)) - np.eye(10)) * 0.5 / 9.0 net.W_denoise = data_loader.copy_to_gpu(w) net.label_tmp = data_loader.copy_to_gpu(np.zeros((22, 128))) net.eps1 = 0.01 net.eps2 = float(sys.argv[4]) # net_trainer_noisy.train_mixed(net, 150, train_batches, test_batches) # net.adjust_learning_rate(0.1) # net_trainer_noisy.train_mixed(net, 10, train_batches, test_batches) # net.adjust_learning_rate(0.1) # net_trainer_noisy.train_mixed(net, 10, train_batches, test_batches) # save_net_cifar10.save_net(net, 'results/weights-cifar10-pure10k-noisy20k-TD-tc' + str(sys.argv[4]))