Пример #1
0
 def __init__(self, solver_file, softmax_layer_name, accuracy_layer_name, snapshot, gpu_idx = 0):
     self.solver_file = solver_file
     self.softmax_layer_name = softmax_layer_name
     self.accuracy_layer_name = accuracy_layer_name
     self.snapshot = snapshot
     self.gpu = owl.create_gpu_device(gpu_idx)
     owl.set_device(self.gpu)
Пример #2
0
 def __init__(self, solver_file, snapshot, layer_name, result_path, gpu_idx = 0):
     self.solver_file = solver_file
     self.snapshot = snapshot
     self.layer_name = layer_name
     self.result_path = result_path
     self.gpu = owl.create_gpu_device(gpu_idx)
     owl.set_device(self.gpu)
Пример #3
0
def train_network(model,
                  num_epochs=100,
                  minibatch_size=256,
                  dropout_rate=0.5,
                  eps_w=0.01,
                  eps_b=0.01,
                  mom=0.9,
                  wd=0.0005):
    gpu0 = owl.create_gpu_device(0)
    gpu1 = owl.create_gpu_device(1)
    num_layers = 20
    num_weights = 8
    count = 0
    last = time.time()

    # dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
    #         train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
    #         val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
    #         test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for j in xrange(300):
            count = count + 1
            data = owl.randn([227, 227, 3, minibatch_size], 0, 1)
            label = owl.randn([1, minibatch_size], 0, 1)

            weightsgrad = [None] * num_weights
            biasgrad = [None] * num_weights

            num_samples = minibatch_size
            '''
            thisimg = samples[0, :]
            print thisimg
            imgdata = np.transpose(thisimg.reshape([3, 227*227])).reshape([227, 227, 3])
            print imgdata
            img = Image.fromarray(imgdata.astype(np.uint8))
            img.save('testimg.jpg', format='JPEG')
            exit(0)
            '''

            owl.set_device(gpu0)
            out = train_one_mb(model, data, label, weightsgrad, biasgrad,
                               dropout_rate)

            for k in range(num_weights):
                model.weightsdelta[
                    k] = mom * model.weightsdelta[k] - eps_w / num_samples * (
                        weightsgrad[k] + wd * model.weights[k])
                model.biasdelta[
                    k] = mom * model.biasdelta[k] - eps_b / num_samples * (
                        biasgrad[k] + wd * model.bias[k])
                model.weights[k] += model.weightsdelta[k]
                model.weights[k].start_eval()
                model.bias[k] += model.biasdelta[k]
                model.bias[k].start_eval()
            if count % 3 == 0:
                print_training_accuracy(out, label, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()
Пример #4
0
def train_network(model, num_epochs = 100, minibatch_size = 256, lr = 0.01, mom = 0.9, wd = 0.0000):
    np.set_printoptions(linewidth=200)
    owl.set_device(owl.create_gpu_device(0))
    count = 0
    # load data
    (train_data, test_data) = imageio.load_mb_from_mat("mnist_all.mat", minibatch_size)
    num_test_samples = test_data[0].shape[0]
    (test_samples, test_labels) = map(lambda npdata : owl.from_nparray(npdata), test_data)
    for i in xrange(num_epochs):
        print "---Epoch #", i
        for (mb_samples, mb_labels) in train_data:
            num_samples = mb_samples.shape[0]
            data = owl.from_nparray(mb_samples).reshape([28, 28, 1, num_samples])
            label = owl.from_nparray(mb_labels)
            out, weightgrad, biasgrad = train(model, data, label)
            for k in range(len(model.weights)):
                model.weightdelta[k] = mom * model.weightdelta[k] - lr / num_samples * weightgrad[k] - wd * model.weights[k]
                model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples * biasgrad[k]
                model.weights[k] += model.weightdelta[k]
                model.bias[k] += model.biasdelta[k]

            count = count + 1
            if (count % 1) == 0:
                print_training_accuracy(out, label, num_samples)
            if count == 100:
                sys.exit()
Пример #5
0
def train_network(model, num_epochs = 100, minibatch_size=10,
        dropout_rate = 0.5, eps_w = 0.01, mom = 0.9, wd = 0.0005):
    gpu0 = owl.create_gpu_device(0)
    owl.set_device(gpu0)
    num_weights = 8
    count = 0
    last = time.time()
    cropped_size = 224

    dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
            train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    #mark the output layer
    output_layer = 'prob'

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size, cropped_size):
            count = count + 1
            num_samples = samples.shape[0]
            data = owl.from_numpy(samples).reshape([cropped_size, cropped_size, 3, num_samples])
            target = owl.from_numpy(labels)
            model.ff(data, target)
            print_training_accuracy(model.layers[output_layer].get_act(), target, minibatch_size)
            model.bp(data, target)
            exit(0)
Пример #6
0
def train_network(model, num_epochs=100, minibatch_size=256, lr=0.01, mom=0.75, wd=5e-4):
    # load data
    (train_data, test_data) = mnist_io.load_mb_from_mat('mnist_all.mat', minibatch_size / len(gpu))
    num_test_samples = test_data[0].shape[0]
    test_samples = owl.from_numpy(test_data[0]).reshape([28, 28, 1, num_test_samples])
    test_labels = owl.from_numpy(test_data[1])
    for i in xrange(num_epochs):
        print "---Epoch #", i
        last = time.time()
        count = 0
        weightgrads = [None] * len(gpu)
        biasgrads = [None] * len(gpu)
        for (mb_samples, mb_labels) in train_data:
            count += 1
            current_gpu = count % len(gpu)
            owl.set_device(gpu[current_gpu])
            num_samples = mb_samples.shape[0]
            data = owl.from_numpy(mb_samples).reshape([28, 28, 1, num_samples])
            label = owl.from_numpy(mb_labels)
            out, weightgrads[current_gpu], biasgrads[current_gpu] = bpprop(model, data, label)
            if current_gpu == 0:
                for k in range(len(model.weights)):
                    model.weightdelta[k] = mom * model.weightdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(weightgrads, 0, k) - lr * wd * model.weights[k]
                    model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(biasgrads, 0, k)
                    model.weights[k] += model.weightdelta[k]
                    model.bias[k] += model.biasdelta[k]
                if count % (len(gpu) * lazy_cycle) == 0:
                    print_training_accuracy(out, label, num_samples, 'Training')
        print '---End of Epoch #', i, 'time:', time.time() - last
        # do test
        out, _, _  = bpprop(model, test_samples, test_labels)
        print_training_accuracy(out, test_labels, num_test_samples, 'Testing')
Пример #7
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    gpu0 = owl.create_gpu_device(0)
    owl.set_device(gpu0)
    num_weights = 8
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(mean_file='/home/yutian/data/config_file/google_model/imagenet_mean.binaryproto',
            train_db='/home/yutian/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/yutian/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/yutian/data/imagenet/ilsvrc12_test_lmdb')

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size):
            count = count + 1
            num_samples = samples.shape[0]
            data = owl.from_numpy(samples).reshape([227, 227, 3, num_samples])
            target = owl.from_numpy(labels)

            out, weightsgrad, biasgrad = model.train_one_mb(data, target, dropout_rate)
            model.update(weightsgrad, biasgrad, num_samples, mom, eps_w, wd)

            if count % 4 == 0:
                print_training_accuracy(out, target, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()
Пример #8
0
def multi_gpu_merge(l, base, layer):
    if len(l) == 1:
        return l[0][layer]
    left = multi_gpu_merge(l[:len(l) / 2], base, layer)
    right = multi_gpu_merge(l[len(l) / 2:], base + len(l) / 2, layer)
    owl.set_device(base)
    return left + right
Пример #9
0
 def __init__(self, solver_file, snapshot, layer_name, result_path, gpu_idx = 0):
     self.solver_file = solver_file
     self.snapshot = snapshot
     self.layer_name = layer_name
     self.result_path = result_path
     self.gpu = owl.create_gpu_device(gpu_idx)
     owl.set_device(self.gpu)
Пример #10
0
 def __init__(self, solver_file, softmax_layer_name, accuracy_layer_name, snapshot, gpu_idx = 0):
     self.solver_file = solver_file
     self.softmax_layer_name = softmax_layer_name
     self.accuracy_layer_name = accuracy_layer_name
     self.snapshot = snapshot
     self.gpu = owl.create_gpu_device(gpu_idx)
     owl.set_device(self.gpu)
Пример #11
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    gpu0 = owl.create_gpu_device(0)
    owl.set_device(gpu0)
    num_weights = 8
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(mean_file='/home/yutian/data/config_file/google_model/imagenet_mean.binaryproto',
            train_db='/home/yutian/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/yutian/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/yutian/data/imagenet/ilsvrc12_test_lmdb')

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size):
            count = count + 1
            num_samples = samples.shape[0]
            data = owl.from_numpy(samples).reshape([227, 227, 3, num_samples])
            target = owl.from_numpy(labels)

            out, weightsgrad, biasgrad = model.train_one_mb(data, target, dropout_rate)
            model.update(weightsgrad, biasgrad, num_samples, mom, eps_w, wd)

            if count % 4 == 0:
                print_training_accuracy(out, target, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()
Пример #12
0
def train_network(model, num_epochs=100, minibatch_size=256, lr=0.01, mom=0.75, wd=5e-4):
    # load data
    (train_data, test_data) = mnist_io.load_mb_from_mat('mnist_all.mat', minibatch_size / len(gpu))
    num_test_samples = test_data[0].shape[0]
    test_samples = owl.from_numpy(test_data[0]).reshape([28, 28, 1, num_test_samples])
    test_labels = owl.from_numpy(test_data[1])
    for i in xrange(num_epochs):
        print "---Epoch #", i
        last = time.time()
        count = 0
        weightgrads = [None] * len(gpu)
        biasgrads = [None] * len(gpu)
        for (mb_samples, mb_labels) in train_data:
            count += 1
            current_gpu = count % len(gpu)
            owl.set_device(gpu[current_gpu])
            num_samples = mb_samples.shape[0]
            data = owl.from_numpy(mb_samples).reshape([28, 28, 1, num_samples])
            label = owl.from_numpy(mb_labels)
            out, weightgrads[current_gpu], biasgrads[current_gpu] = bpprop(model, data, label)
            out.start_eval()
            if current_gpu == 0:
                for k in range(len(model.weights)):
                    model.weightdelta[k] = mom * model.weightdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(weightgrads, 0, k) - lr * wd * model.weights[k]
                    model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(biasgrads, 0, k)
                    model.weights[k] += model.weightdelta[k]
                    model.bias[k] += model.biasdelta[k]
                if count % (len(gpu) * lazy_cycle) == 0:
                    print_training_accuracy(out, label, num_samples, 'Training')
        print '---End of Epoch #', i, 'time:', time.time() - last
        # do test
        out, _, _  = bpprop(model, test_samples, test_labels)
        print_training_accuracy(out, test_labels, num_test_samples, 'Testing')
Пример #13
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    num_layers = model.num_layers
    num_weights = model.num_weights
    last = time.time()
    num_samples = minibatch_size
    minibatch_size = minibatch_size / num_gpu
    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        weightsgrad = [[None] * num_weights for z in range(num_gpu)]
        biasgrad = [[None] * num_weights for z in range(num_gpu)]
        for j in xrange(1, 1024):
            count = j % num_gpu
            owl.set_device(gpu_array[count])
            data = owl.randn([227, 227, 3, minibatch_size], 0, 1)
            label = owl.randn([1, minibatch_size], 0, 1)
            out = train_one_mb(model, data, label, weightsgrad[count], biasgrad[count], dropout_rate)
            out.start_eval()
            if count == 0:
                # Update
                for k in range(num_weights):
                    for l in range(1, num_gpu):
                        weightsgrad[0][k] = weightsgrad[0][k] + weightsgrad[l][k]
                        biasgrad[0][k] = biasgrad[0][k] + biasgrad[l][k]
                    model.weightsdelta[k] = mom * model.weightsdelta[k] - eps_w / num_samples  * (weightsgrad[0][k] + wd * model.weights[k])
                    model.biasdelta[k] = mom * model.biasdelta[k] - eps_b / num_samples  * (biasgrad[0][k] + wd * model.bias[k])
                    model.weights[k] += model.weightsdelta[k]
                    model.bias[k] += model.biasdelta[k]
                if j % (lazy * num_gpu) == 0:
                    print_training_accuracy(out, label, minibatch_size)
                    print "time: %s" % (time.time() - last)
                    last = time.time()
Пример #14
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    num_layers = model.num_layers
    num_weights = model.num_weights
    last = time.time()
    minibatch_size = minibatch_size # / num_gpu
    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        weightsgrad = [[None] * num_weights for z in range(num_gpu)]
        biasgrad = [[None] * num_weights for z in range(num_gpu)]
        for j in xrange(1, 1024):
            count = j % num_gpu
            owl.set_device(gpu_array[count])
            data = owl.randn([227, 227, 3, minibatch_size], 0, 1)
            label = owl.randn([1, minibatch_size], 0, 1)
            out = train_one_mb(model, data, label, weightsgrad[count], biasgrad[count], dropout_rate)
            for k in weightsgrad[count]:
                k.start_eval()
            for k in biasgrad[count]:
                k.start_eval()
            if count == 0:
                for k in range(0, num_gpu):
                    for l in weightsgrad[k]:
                        l.wait_for_eval()
                    for l in biasgrad[k]:
                        l.wait_for_eval()
                print "time: %s" % (time.time() - last)
                last = time.time()
Пример #15
0
def multi_gpu_merge(l, base, layer):
    if len(l) == 1:
        return l[0][layer]
    left = multi_gpu_merge(l[:len(l) / 2], base, layer)
    right = multi_gpu_merge(l[len(l) / 2:], base + len(l) / 2, layer)
    owl.set_device(base)
    return left + right
Пример #16
0
    def test_ones(self):
	owl.set_device(owl.create_mpi_device(1,1))
        test = 0
	for i in range(1000):
		owl.zeros([10000,10000])
		owl.wait_for_all()
	owl.print_profiler_result()
Пример #17
0
def multi_dev_merge(l, base, layer):
    if len(l) == 1:
        return l[0][layer]
#     print "pre-multi"
    left = multi_dev_merge(l[:len(l) / 2], base, layer)
#     print "post-left"
    right = multi_dev_merge(l[len(l) / 2:], base + len(l) / 2, layer)
#     print "post-right"
    owl.set_device(base)
    return left + right
Пример #18
0
    def run(self):
        (train_data,
         test_data) = mnist_io.load_mb_from_mat(self.data_file, self.mb_size)
        np.set_printoptions(linewidth=200)
        num_test_samples = test_data[0].shape[0]
        (test_samples,
         test_labels) = map(lambda npdata: owl.from_numpy(npdata), test_data)
        count = 1
        owl.set_device(self.gpu)
        for epoch in range(self.num_epochs):
            print '---Start epoch #%d' % epoch
            # train
            for (mb_samples, mb_labels) in train_data:
                num_samples = mb_samples.shape[0]

                a1 = owl.from_numpy(mb_samples)
                target = owl.from_numpy(mb_labels)

                # ff
                a2 = ele.relu(self.w1 * a1 + self.b1)
                a3 = self.w2 * a2 + self.b2
                # softmax & error
                out = co.softmax(a3)
                s3 = out - target
                # bp
                s2 = self.w2.trans() * s3
                s2 = ele.relu_back(s2, a2)
                # grad
                gw1 = s2 * a1.trans() / num_samples
                gb1 = s2.sum(1) / num_samples
                gw2 = s3 * a2.trans() / num_samples
                gb2 = s3.sum(1) / num_samples
                # update
                self.w1 -= self.eps_w * gw1
                self.w2 -= self.eps_w * gw2
                self.b1 -= self.eps_b * gb1
                self.b2 -= self.eps_b * gb2

                if (count % 40 == 0):
                    correct = out.argmax(0) - target.argmax(0)
                    val = correct.to_numpy()
                    print 'Training error:', float(
                        np.count_nonzero(val)) / num_samples
                count = count + 1

            # test
            a1 = test_samples
            a2 = ele.relu(self.w1 * a1 + self.b1)
            a3 = self.w2 * a2 + self.b2
            correct = a3.argmax(0) - test_labels.argmax(0)
            val = correct.to_numpy()
            #print val
            print 'Testing error:', float(
                np.count_nonzero(val)) / num_test_samples
            print '---Finish epoch #%d' % epoch
Пример #19
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    gpu0 = owl.create_gpu_device(0)
    gpu1 = owl.create_gpu_device(1)
    num_layers = 20
    num_weights = 8
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
            train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    minibatch_size = minibatch_size / 2

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for j in xrange(300):
            count = count + 1
            if count % 2 == 1:
                data1 = owl.from_nparray(samples).reshape([227, 227, 3, samples.shape[0]])
                label1 = owl.from_nparray(labels)
                #data1 = owl.randn([227, 227, 3, minibatch_size], 0, 1)
                #label1 = owl.randn([1, minibatch_size], 0, 1)
                weightsgrad1 = [None] * num_weights
                biasgrad1 = [None] * num_weights
                owl.set_device(gpu0)
                out1 = train_one_mb(model, data1, label1, weightsgrad1, biasgrad1, dropout_rate)
                out1.start_eval()
                continue
            if count % 2 == 0:
                data2 = owl.from_nparray(samples).reshape([227, 227, 3, samples.shape[0]])
                label2 = owl.from_nparray(labels)
                #data2 = owl.randn([227, 227, 3, minibatch_size], 0, 1)
                #label2 = owl.randn([1, minibatch_size], 0, 1)

            weightsgrad2 = [None] * num_weights
            biasgrad2 = [None] * num_weights

            num_samples = data1.shape[-1] + data2.shape[-1]

            owl.set_device(gpu1)
            out2 = train_one_mb(model, data2, label2, weightsgrad2, biasgrad2, dropout_rate)
            out2.start_eval()

            for k in range(num_weights):
                model.weightsdelta[k] = mom * model.weightsdelta[k] - eps_w / num_samples  * (weightsgrad1[k] + weightsgrad2[k] + wd * model.weights[k])
                model.biasdelta[k] = mom * model.biasdelta[k] - eps_b / num_samples  * (biasgrad1[k] + biasgrad2[k])
                model.weights[k] += model.weightsdelta[k]
                model.bias[k] += model.biasdelta[k]
            if count % 8 == 0:
                print_training_accuracy(out1, label1, data1.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()
Пример #20
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    gpu = [None] * 2
    gpu[0] = owl.create_gpu_device(0)
    gpu[1] = owl.create_gpu_device(1)
    num_layers = 20
    num_weights = 8
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
            train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    minibatch_size = minibatch_size / 2

    wgrad = [None] * 2
    bgrad = [None] * 2
    num_samples = 0

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size):
        #for j in range(300):
            count = count + 1
            gpuid = count % 2
            owl.set_device(gpu[gpuid])

            data = owl.from_numpy(samples).reshape([227, 227, 3, samples.shape[0]])
            label = owl.from_numpy(labels)
            #data = owl.randn([227, 227, 3, 128], 0.0, 0.01)
            #label = owl.randn([1000, 128], 0.0, 0.01)
            num_samples += data.shape[-1]
            (out, wgrad[gpuid], bgrad[gpuid]) = model.train_one_mb(data, label, dropout_rate)
            out.start_eval()

            if count % 2 != 0:
                continue

            for k in range(num_weights):
                wgrad[0][k] += wgrad[1][k]
                bgrad[0][k] += bgrad[1][k]

            model.update(wgrad[0], bgrad[0], num_samples, mom, eps_w, wd)

            if count % 8 == 0:
                print_training_accuracy(out, label, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()

            num_samples = 0
            wgrad = [None] * 2
            bgrad = [None] * 2
Пример #21
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    gpu = [None] * 2
    gpu[0] = owl.create_gpu_device(0)
    gpu[1] = owl.create_gpu_device(1)
    num_layers = 20
    num_weights = 8
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(mean_file='/home/yutian/data/config_file/google_model/imagenet_mean.binaryproto',
            train_db='/home/yutian/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/yutian/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/yutian/data/imagenet/ilsvrc12_test_lmdb')

    minibatch_size = minibatch_size / 2

    wgrad = [None] * 2
    bgrad = [None] * 2
    num_samples = 0

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size):
        #for j in range(300):
            count = count + 1
            gpuid = count % 2
            owl.set_device(gpu[gpuid])

            data = owl.from_numpy(samples).reshape([227, 227, 3, samples.shape[0]])
            label = owl.from_numpy(labels)
            #data = owl.randn([227, 227, 3, 128], 0.0, 0.01)
            #label = owl.randn([1000, 128], 0.0, 0.01)
            num_samples += data.shape[-1]
            (out, wgrad[gpuid], bgrad[gpuid]) = model.train_one_mb(data, label, dropout_rate)

            if count % 2 != 0:
                continue

            for k in range(num_weights):
                wgrad[0][k] += wgrad[1][k]
                bgrad[0][k] += bgrad[1][k]

            model.update(wgrad[0], bgrad[0], num_samples, mom, eps_w, wd)

            if count % 8 == 0:
                print_training_accuracy(out, label, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()

            num_samples = 0
            wgrad = [None] * 2
            bgrad = [None] * 2
Пример #22
0
    def run(self):
        (train_data, test_data) = mnist_io.load_mb_from_mat(self.data_file, self.mb_size)
        np.set_printoptions(linewidth=200)
        num_test_samples = test_data[0].shape[0]
        (test_samples, test_labels) = map(lambda npdata : owl.from_numpy(npdata), test_data)
        count = 1
        owl.set_device(self.gpu)
        for epoch in range(self.num_epochs):
            print '---Start epoch #%d' % epoch
            # train
            for (mb_samples, mb_labels) in train_data:
                num_samples = mb_samples.shape[0]

                a1 = owl.from_numpy(mb_samples)
                target = owl.from_numpy(mb_labels)

                # ff
                a2 = ele.relu(self.w1 * a1 + self.b1)
                a3 = self.w2 * a2 + self.b2
                # softmax & error
                out = co.softmax(a3)
                s3 = out - target
                # bp
                s2 = self.w2.trans() * s3
                s2 = ele.relu_back(s2, a2)
                # grad
                gw1 = s2 * a1.trans() / num_samples
                gb1 = s2.sum(1) / num_samples
                gw2 = s3 * a2.trans() / num_samples
                gb2 = s3.sum(1) / num_samples
                # update
                self.w1 -= self.eps_w * gw1
                self.w2 -= self.eps_w * gw2
                self.b1 -= self.eps_b * gb1
                self.b2 -= self.eps_b * gb2

                if (count % 40 == 0):
                    correct = out.max_index(0) - target.max_index(0)
                    val = correct.to_numpy()
                    print 'Training error:', float(np.count_nonzero(val)) / num_samples
                count = count + 1

            # test
            a1 = test_samples
            a2 = ele.relu(self.w1 * a1 + self.b1)
            a3 = self.w2 * a2 + self.b2
            correct = a3.max_index(0) - test_labels.max_index(0)
            val = correct.to_numpy()
            #print val
            print 'Testing error:', float(np.count_nonzero(val)) / num_test_samples
            print '---Finish epoch #%d' % epoch
Пример #23
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    gpu0 = owl.create_gpu_device(0)
    gpu1 = owl.create_gpu_device(1)
    num_layers = 20
    num_weights = 8
    count = 0
    last = time.time()

    # dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
    #         train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
    #         val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
    #         test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for j in xrange(300):
            count = count + 1
            data = owl.randn([227, 227, 3, minibatch_size], 0, 1)
            label = owl.randn([1, minibatch_size], 0, 1)

            weightsgrad = [None] * num_weights
            biasgrad = [None] * num_weights

            num_samples = minibatch_size

            '''
            thisimg = samples[0, :]
            print thisimg
            imgdata = np.transpose(thisimg.reshape([3, 227*227])).reshape([227, 227, 3])
            print imgdata
            img = Image.fromarray(imgdata.astype(np.uint8))
            img.save('testimg.jpg', format='JPEG')
            exit(0)
            '''

            owl.set_device(gpu0)
            out = train_one_mb(model, data, label, weightsgrad, biasgrad, dropout_rate)

            for k in range(num_weights):
                model.weightsdelta[k] = mom * model.weightsdelta[k] - eps_w / num_samples  * (weightsgrad[k] + wd * model.weights[k])
                model.biasdelta[k] = mom * model.biasdelta[k] - eps_b / num_samples  * (biasgrad[k] + wd * model.bias[k])
                model.weights[k] += model.weightsdelta[k]
                model.weights[k].start_eval()
                model.bias[k] += model.biasdelta[k]
                model.bias[k].start_eval()
            if count % 3 == 0:
                print_training_accuracy(out, label, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()
Пример #24
0
 def test(self):
     narrays = []
     n = 32
     exp = 0
     for i in range(n):
         narrays.append(owl.ones([1000,1000]))
     j = 1
     while j <= n/2:
         for i in range(0, n , j*2):
             owl.set_device(hash(i)%len(cpumpitestinit.devices))
             narrays[i] = narrays[i]*narrays[i+j]
         j *= 2
         exp = exp*2+1
     test = narrays[0]
     expected = np.ones([1000,1000])*math.pow(1000,exp)
     print 'Expected\n',expected
     print "Actual\n",test.to_numpy()
     self.assertTrue(np.allclose(expected, test.to_numpy()))
Пример #25
0
def train_network(filename, model, num_epochs=5, minibatch_size=256, lr=0.1, lr_decay= 0.95, mom=0.9, wd=5e-4):
    # load data
    (train_data, test_data) = mnist_io.load_mb_from_mat(filename, minibatch_size / len(devs))
    num_test_samples = test_data[0].shape[0]
    test_samples = owl.from_numpy(test_data[0]).reshape([28, 28, 1, num_test_samples])
    test_labels = owl.from_numpy(test_data[1])
    for i in xrange(num_epochs):
        print "---Epoch #", i
        last = time.time()
        count = 0
        weightgrads = [None] * len(devs)
        biasgrads = [None] * len(devs)
        for (mb_samples, mb_labels) in train_data:
            count += 1
            current_dev = count % len(devs)
            owl.set_device(devs[current_dev])
            num_samples = mb_samples.shape[0]
            data = owl.from_numpy(mb_samples).reshape([28, 28, 1, num_samples])
            label = owl.from_numpy(mb_labels)
            #print "\t[{}]Train Data imported to minerva format".format(count)
            out, weightgrads[current_dev], biasgrads[current_dev] = bpprop(model, data, label)
            #print "\t[{}]Backprop complete".format(count)
#             print "dev {}".format(current_dev)
            if current_dev == 0:
#                 print "pre-merge"
                for k in range(len(model.weights)):
                    model.weightdelta[k] = mom * model.weightdelta[k] - lr / num_samples / len(devs) * multi_dev_merge(weightgrads, 0, k) - lr * wd * model.weights[k]
#                     print "\t weight merge"
                    model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples / len(devs) * multi_dev_merge(biasgrads, 0, k)
#                     print "\t bias merge"
                    model.weights[k] += model.weightdelta[k]
                    model.bias[k] += model.biasdelta[k]
#                 print "post-merge"
                if count % (len(devs) * lazy_cycle) == 0:
                    print_training_accuracy(out, label, num_samples, 'Training ' + str(count))
                    owl.print_profiler_result()
        print '---End of Epoch #', i, 'time:', time.time() - last
        lr = lr*lr_decay
        # do test
        out, _, _  = bpprop(model, test_samples, test_labels)
        print_training_accuracy(out, test_labels, num_test_samples, 'Testing')
Пример #26
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    num_layers = model.num_layers
    num_weights = model.num_weights
    last = time.time()
    num_samples = minibatch_size
    minibatch_size = minibatch_size / num_gpu
    dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
            train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')
    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        weightsgrad = [[None] * num_weights for z in range(num_gpu)]
        biasgrad = [[None] * num_weights for z in range(num_gpu)]
        j = 0
        for (samples, labels) in dp.get_train_mb(minibatch_size):
            j += 1
            count = j % num_gpu
            owl.set_device(gpu_array[count])
            data = owl.from_nparray(samples).reshape([227, 227, 3, minibatch_size])
            label = owl.from_nparray(labels)
            out = train_one_mb(model, data, label, weightsgrad[count], biasgrad[count], dropout_rate)
            # out.start_eval()
            if count == 0:
                # Update
                for k in range(num_weights):
                    for l in range(1, num_gpu):
                        weightsgrad[0][k] = weightsgrad[0][k] + weightsgrad[l][k]
                        biasgrad[0][k] = biasgrad[0][k] + biasgrad[l][k]
                    model.weightsdelta[k] = mom * model.weightsdelta[k] - eps_w / num_samples  * (weightsgrad[0][k] + wd * model.weights[k])
                    model.biasdelta[k] = mom * model.biasdelta[k] - eps_b / num_samples  * (biasgrad[0][k] + wd * model.bias[k])
                    model.weights[k] += model.weightsdelta[k]
                    model.weights[k].start_eval()
                    model.bias[k] += model.biasdelta[k]
                    model.bias[k].start_eval()
                if j % (lazy * num_gpu) == 0:
                    print_training_accuracy(out, label, minibatch_size)
                    print "time: %s" % (time.time() - last)
                    last = time.time()
Пример #27
0
 def test(self):
     # Expected
     cpu=owl.create_cpu_device()
     owl.set_device(cpu)
     img = np.arange(0,32, dtype=np.float32) #/32
     img = np.reshape(img,[1,2,4,4])
     expected = np.asarray([[[5,7],
                             [13,15]],
                            [[21,23],
                             [29,31]]]) #/32.0
     #expected = np.asarray([[[ 110.25,  124.25],
     #                        [ 166.25,  180.25]],
     #                       [[ 278.25,  324.25],
     #                        [ 462.25,  508.25]]])
     
     # test
     owlimg = owl.from_numpy(img)
     pooler = owl.conv.Pooler(2,2,2,2)   
     test = pooler.ff(owlimg)
     
     print 'Expected\n',expected
     print "Actual\n",test.to_numpy()
     print "This test must be run with a fractional bit width of 12"
     self.assertTrue(np.allclose(expected, test.to_numpy(), atol= 1.0/(1<<12)*4))
Пример #28
0
 def test(self):
     owl.set_device(d[3%len(d)])
     a = owl.ones([1000,900])
     owl.set_device(d[2%len(d)])
     b = owl.ones([900,1000])
     owl.set_device(d[1%len(d)])
     test = a*b
     expected = np.ones([1000,1000])*900
     #print 'Expected\n',expected
     #print "Actual\n",test.to_numpy()
     self.assertTrue(np.array_equal(expected, test.to_numpy()))
Пример #29
0
 def test(self):
     owl.set_device(cpumpitestinit.devices[-3])
     a = owl.ones([20,900])
     owl.set_device(cpumpitestinit.devices[-2])
     b = owl.ones([900,800])
     owl.set_device(cpumpitestinit.devices[-1])
     test = a*b
     expected = np.ones([800,20])*900
     #print 'Expected\n',expected
     #print "Actual\n",test.to_numpy()
     self.assertTrue(np.array_equal(expected, test.to_numpy()))
Пример #30
0
            label = owl.from_numpy(labels)
            #data = owl.randn([227, 227, 3, 128], 0.0, 0.01)
            #label = owl.randn([1000, 128], 0.0, 0.01)
            num_samples += data.shape[-1]
            (out, wgrad[gpuid], bgrad[gpuid]) = model.train_one_mb(data, label, dropout_rate)

            if count % 2 != 0:
                continue

            for k in range(num_weights):
                wgrad[0][k] += wgrad[1][k]
                bgrad[0][k] += bgrad[1][k]

            model.update(wgrad[0], bgrad[0], num_samples, mom, eps_w, wd)

            if count % 8 == 0:
                print_training_accuracy(out, label, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()

            num_samples = 0
            wgrad = [None] * 2
            bgrad = [None] * 2

if __name__ == '__main__':
    cpu = owl.create_cpu_device()
    owl.set_device(cpu)
    model = AlexModel()
    model.init_random()
    train_network(model)
Пример #31
0
    cpu = owl.create_cpu_device()
    print "owl: local CPU creation in rank {} with id {}".format(owl.rank(), cpu)
    sys.stdout.flush()
    print '''
         __   __   _   __   _   _____   ____    _    _   ___
        /  | /  | | | |  \\ | | |  ___| |  _ \\  | |  / / /   |
       /   |/   | | | |   \\| | | |__   | |_| | | | / / / /| |
      / /|   /| | | | |      | |  __|  |    /  | |/ / / /_| |
     / / |  / | | | | | |\\   | | |___  | |\\ \\  |   / / ___  |
    /_/  |_/  |_| |_| |_| \\__| |_____| |_| \\_\\ |__/ /_/   |_|
    '''
    if owl.has_cuda():
        print owl.get_gpu_device_count()
        gpu = [owl.create_gpu_device(i) for i in range(owl.get_gpu_device_count())]
        print '[INFO] You have %d GPU devices' % len(gpu)
        print '[INFO] Set device to gpu[0]'
        owl.set_device(gpu[0])
    if owl.has_mpi():
        n = owl.get_mpi_node_count()
        for i in range(1,n):
            id = owl.create_mpi_device(i,0)
            print "owl: created mpi cpu device on rank {} with id {}".format(i, id)
    else:
        print '[INFO] CUDA disabled'
        print '[INFO] Set device to cpu'
        owl.set_device(cpu)
    print "\nREADY FOR INPUT\n"
    
    #print z.to_numpy()
    #import IPython; IPython.start_ipython(argv=[])
Пример #32
0
def train_network(model,
                  num_epochs=100,
                  minibatch_size=256,
                  dropout_rate=0.5,
                  eps_w=0.01,
                  eps_b=0.01,
                  mom=0.9,
                  wd=0.0005):
    gpu = owl.create_gpu_device(1)
    owl.set_device(gpu)
    num_layers = 20
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(
        mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
        train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
        val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
        test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    acts = [None] * num_layers
    sens = [None] * num_layers

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        sys.stdout.flush()
        for (samples, labels) in dp.get_train_mb(minibatch_size):
            num_samples = samples.shape[0]

            acts = [None] * num_layers
            sens = [None] * num_layers

            # FF
            acts[0] = owl.from_nparray(samples).reshape(
                [227, 227, 3, num_samples])
            target = owl.from_nparray(labels)

            acts1 = conv_forward(acts[0], model.weights[0], model.bias[0],
                                 model.conv_infos[0])
            acts[1] = ele.relu(
                acts1
            )  #(conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])) # conv1
            acts[2] = pooling_forward(acts[1], model.pooling_infos[0])  # pool1
            acts3 = conv_forward(acts[2], model.weights[1], model.bias[1],
                                 model.conv_infos[1])  # conv2
            acts[3] = ele.relu(
                acts3
            )  #(conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1])) # conv2
            acts[4] = pooling_forward(acts[3], model.pooling_infos[1])  # pool2
            acts5 = conv_forward(acts[4], model.weights[2], model.bias[2],
                                 model.conv_infos[2])  # conv3
            acts[5] = ele.relu(
                acts5
            )  #(conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2])) # conv3
            acts6 = conv_forward(acts[5], model.weights[3], model.bias[3],
                                 model.conv_infos[3])  # conv4
            acts[6] = ele.relu(
                acts6
            )  #(conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3])) # conv4
            acts7 = conv_forward(acts[6], model.weights[4], model.bias[4],
                                 model.conv_infos[4])  # conv5
            acts[7] = ele.relu(
                acts7
            )  #(conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4])) # conv5
            acts[8] = pooling_forward(acts[7], model.pooling_infos[2])  # pool5
            re_acts8 = acts[8].reshape(
                [np.prod(acts[8].shape[0:3]), num_samples])
            acts9 = model.weights[5] * re_acts8 + model.bias[5]  # fc6
            acts[9] = ele.relu(
                acts9)  #(model.weights[5] * re_acts8 + model.bias[5]) # fc6
            mask6 = owl.randb(acts[9].shape, dropout_rate)
            acts[9] = ele.mult(acts[9], mask6)  # drop6
            acts10 = model.weights[6] * acts[9] + model.bias[6]  # fc7
            acts[10] = ele.relu(
                acts10)  #(model.weights[6] * acts[9] + model.bias[6]) # fc7
            mask7 = owl.randb(acts[10].shape, dropout_rate)
            acts[10] = ele.mult(acts[10], mask7)  # drop7
            acts[11] = model.weights[7] * acts[10] + model.bias[7]  # fc8
            acts[12] = softmax_forward(
                acts[11].reshape([1000, 1, 1, num_samples]),
                soft_op.instance).reshape([1000, num_samples])  # prob

            # error
            sens[11] = acts[12] - target

            # BP
            sens[10] = model.weights[7].trans() * sens[11]  # fc8
            sens[10] = ele.mult(sens[10], mask7)  # drop7
            sens[10] = ele.relu_back(sens[10], acts[10], acts10)  # relu7
            sens[9] = model.weights[6].trans() * sens[10]
            sens[9] = ele.mult(sens[9], mask6)  # drop6
            sens[9] = ele.relu_back(sens[9], acts[9], acts9)  # relu6
            sens[8] = (model.weights[5].trans() * sens[9]).reshape(
                acts[8].shape)  # fc6
            sens[7] = pooling_backward(sens[8], acts[8], acts[7],
                                       model.pooling_infos[2])  # pool5
            sens[7] = ele.relu_back(sens[7], acts[7], acts7)  # relu5
            sens[6] = conv_backward_data(sens[7], model.weights[4],
                                         model.conv_infos[4])  # conv5
            sens[6] = ele.relu_back(sens[6], acts[6], acts6)  # relu4
            sens[5] = conv_backward_data(sens[6], model.weights[3],
                                         model.conv_infos[3])  # conv4
            sens[5] = ele.relu_back(sens[5], acts[5], acts5)  # relu3
            sens[4] = conv_backward_data(sens[5], model.weights[2],
                                         model.conv_infos[2])  # conv3
            sens[3] = pooling_backward(sens[4], acts[4], acts[3],
                                       model.pooling_infos[1])  # pool2
            sens[3] = ele.relu_back(sens[3], acts[3], acts3)  # relu2
            sens[2] = conv_backward_data(sens[3], model.weights[1],
                                         model.conv_infos[1])  # conv2
            sens[1] = pooling_backward(sens[2], acts[2], acts[1],
                                       model.pooling_infos[0])  # pool1
            sens[1] = ele.relu_back(sens[1], acts[1], acts1)  # relu1

            model.weightsdelta[
                7] = mom * model.weightsdelta[7] - eps_w / num_samples * (
                    sens[11] * acts[10].trans() + wd * model.weights[7])
            model.biasdelta[7] = mom * model.biasdelta[
                7] - eps_b / num_samples * sens[11].sum(1)

            model.weightsdelta[
                6] = mom * model.weightsdelta[6] - eps_w / num_samples * (
                    sens[10] * acts[9].trans() + wd * model.weights[6])
            model.biasdelta[6] = mom * model.biasdelta[
                6] - eps_b / num_samples * sens[10].sum(1)

            model.weightsdelta[
                5] = mom * model.weightsdelta[5] - eps_w / num_samples * (
                    sens[9] * re_acts8.trans() + wd * model.weights[5])
            model.biasdelta[5] = mom * model.biasdelta[
                5] - eps_b / num_samples * sens[9].sum(1)

            model.weightsdelta[
                4] = mom * model.weightsdelta[4] - eps_w / num_samples * (
                    conv_backward_filter(sens[7], acts[6], model.conv_infos[4])
                    + wd * model.weights[4])
            model.biasdelta[4] = mom * model.biasdelta[
                4] - eps_b / num_samples * conv_backward_bias(sens[7])

            model.weightsdelta[
                3] = mom * model.weightsdelta[3] - eps_w / num_samples * (
                    conv_backward_filter(sens[6], acts[5], model.conv_infos[3])
                    + wd * model.weights[3])
            model.biasdelta[3] = mom * model.biasdelta[
                3] - eps_b / num_samples * conv_backward_bias(sens[6])

            model.weightsdelta[
                2] = mom * model.weightsdelta[2] - eps_w / num_samples * (
                    conv_backward_filter(sens[5], acts[4], model.conv_infos[2])
                    + wd * model.weights[2])
            model.biasdelta[2] = mom * model.biasdelta[
                2] - eps_b / num_samples * conv_backward_bias(sens[5])

            model.weightsdelta[
                1] = mom * model.weightsdelta[1] - eps_w / num_samples * (
                    conv_backward_filter(sens[3], acts[2], model.conv_infos[1])
                    + wd * model.weights[1])
            model.biasdelta[1] = mom * model.biasdelta[
                1] - eps_b / num_samples * conv_backward_bias(sens[3])

            model.weightsdelta[
                0] = mom * model.weightsdelta[0] - eps_w / num_samples * (
                    conv_backward_filter(sens[1], acts[0], model.conv_infos[0])
                    + wd * model.weights[0])
            model.biasdelta[0] = mom * model.biasdelta[
                0] - eps_b / num_samples * conv_backward_bias(sens[1])

            for k in range(8):
                model.weights[k] += model.weightsdelta[k]
                model.bias[k] += model.biasdelta[k]

            count = count + 1
            if count % 10 == 0:
                print_training_accuracy(acts[12], target, num_samples)
                print "time: %s" % (time.time() - last)
                last = time.time()
Пример #33
0
from operator import mul
import matplotlib.pyplot as plt

#The file containing the data in the format of one vector per line space separated floats
DATAFILE = "????"



if __name__ == "__main__":
    #Setup minerva
    cpu = owl.create_cpu_device()
    if owl.get_gpu_device_count() > 0:
        dev = owl.create_gpu_device(0)
    else:
        dev = cpu
    owl.set_device(dev)
    
    # load data
    gzfile = gzip.GzipFile('/home/jlovitt/storage/mnist/mnist.dat','rb')
    #discard stored variable name
    pickle.load(gzfile)
    data = pickle.load(gzfile)
    #data = np.loadtxt(DATAFILE,dtype=np.float32, delimiter=" ")
    #data = data - np.mean(data, 0)
    #data = data / np.var(data, 0)
    data = data/255.0
    
    # training parameters
    epsilon = 0.01
    momentum = 0.9
    
Пример #34
0
import owl

devices = []
devices.append(owl.create_cpu_device())
if owl.has_mpi():
    n = owl.get_mpi_node_count()
    for i in range(1,n):
        id = owl.create_mpi_device(i,0)
        devices.append(id)
owl.set_device(devices[-1])
Пример #35
0
    def run(self):
        (train_data, test_data) = imageio.load_mb_from_mat(self.data_file, self.mb_size)
        np.set_printoptions(linewidth=200)
        num_test_samples = test_data[0].shape[0]
        (test_samples, test_labels) = map(lambda npdata : owl.from_nparray(npdata), test_data)
        count = 1
        for epoch in range(self.num_epochs):
            print '---Start epoch #%d' % epoch
            # train
            for (mb_samples, mb_labels) in train_data:
                num_samples = mb_samples.shape[0]

                owl.set_device(self.cpu)
                a1 = owl.from_nparray(mb_samples)
                target = owl.from_nparray(mb_labels)
                owl.set_device(self.gpu)

                # ff
                a2 = owl.elewise.sigmoid((self.w1 * a1).norm_arithmetic(self.b1, owl.op.add))
                a3 = owl.elewise.sigmoid((self.w2 * a2).norm_arithmetic(self.b2, owl.op.add))
                # softmax & error
                out = owl.softmax(a3)
                s3 = out - target
                # bp
                s3 = owl.elewise.mult(s3, 1 - s3)
                s2 = self.w2.trans() * s3
                s2 = owl.elewise.mult(s2, 1 - s2)
                # grad
                gw1 = s2 * a1.trans() / num_samples
                gb1 = s2.sum(1) / num_samples
                gw2 = s3 * a2.trans() / num_samples
                gb2 = s3.sum(1) / num_samples
                # update
                self.w1 -= self.eps_w * gw1
                self.w2 -= self.eps_w * gw2
                self.b1 -= self.eps_b * gb1
                self.b2 -= self.eps_b * gb2

                if (count % 40 == 0):
                    correct = out.max_index(0) - target.max_index(0)
                    val = correct.tolist()
                    print 'Training error:', (float(num_samples) - val.count(0.0)) / num_samples
                    # test
                    a1 = test_samples
                    a2 = owl.elewise.sigmoid((self.w1 * a1).norm_arithmetic(self.b1, owl.op.add))
                    a3 = owl.elewise.sigmoid((self.w2 * a2).norm_arithmetic(self.b2, owl.op.add))
                    correct = a3.max_index(0) - test_labels.max_index(0)
                    val = correct.tolist()
                    #print val
                    print 'Testing error:', (float(num_test_samples) - val.count(0.0)) / num_test_samples
                count = count + 1

            # test
            #a1 = test_samples
            #a2 = owl.elewise.sigmoid((self.w1 * a1).norm_arithmetic(self.b1, owl.op.add))
            #a3 = owl.elewise.sigmoid((self.w2 * a2).norm_arithmetic(self.b2, owl.op.add))
            #out = owl.softmax(a3)
            #correct = out.max_index(0) - test_labels.max_index(0)
            #val = correct.tolist()
            #print 'Testing error:', (float(num_test_samples) - val.count(0.0)) / num_test_samples
            print '---Finish epoch #%d' % epoch
Пример #36
0
    def gradient_checker(s, checklayer_name):
        ''' Check backpropagation on multiple GPUs
        '''
        h = 1e-2
        threshold = 1e-4
        checklayer = s.owl_net.units[s.owl_net.name_to_uid[checklayer_name][0]] 
        
        losslayer = []
        for i in xrange(len(s.owl_net.units)):
            if isinstance(s.owl_net.units[i], net.SoftmaxUnit):
                losslayer.append(i)
       
        last = None
        '''
        wunits = []
        for i in xrange(len(s.owl_net.units)):
            if isinstance(s.owl_net.units[i], net.WeightedComputeUnit):
                wunits.append(i)
        '''
        wunits = s.owl_net.get_weighted_unit_ids()
        accunits = s.owl_net.get_accuracy_units()
        owl.set_device(s.gpu[0])
        
        for iteridx in range(100):
            #disturb the weights
            oriweight = checklayer.weight
            npweight = checklayer.weight.to_numpy()
            weightshape = np.shape(npweight)
            npweight = npweight.reshape(np.prod(weightshape[0:len(weightshape)]))
            position = np.random.randint(0, np.shape(npweight)[0])
            disturb = np.zeros(np.shape(npweight), dtype = np.float32)
            disturb[position] = h
            oriposval = npweight[position]
            npweight += disturb
            newposval = npweight[position]
            npweight = npweight.reshape(weightshape)
            checklayer.weight = owl.from_numpy(npweight)

            all_loss = 0
            # train on multi-gpu

            s.owl_net.forward_check()
            for i in range(len(losslayer)):
                if len(s.owl_net.units[losslayer[i]].loss_weight) == 1:
                    all_loss += (s.owl_net.units[losslayer[i]].getloss() * s.owl_net.units[losslayer[i]].loss_weight[0])
                else:
                    all_loss += s.owl_net.units[losslayer[i]].getloss()

            #get origin loss
            checklayer.weight = oriweight
            ori_all_loss = 0
            # train on multi-gpu
            s.owl_net.forward_check()
            for i in range(len(losslayer)):
                if len(s.owl_net.units[losslayer[i]].loss_weight) == 1:
                    ori_all_loss += (s.owl_net.units[losslayer[i]].getloss() * s.owl_net.units[losslayer[i]].loss_weight[0])
                else:
                    ori_all_loss += s.owl_net.units[losslayer[i]].getloss()

            s.owl_net.backward('TEST')
            #get analytic gradient
            npgrad = checklayer.weightgrad.to_numpy()
            npgrad = npgrad.reshape(np.prod(weightshape[0:len(weightshape)]))
            analy_grad = npgrad[position] /  s.owl_net.units[losslayer[i]].out.shape[1]
           
            num_grad = (all_loss - ori_all_loss) / h
            
            info = "Gradient Check at positon: %d analy: %f num: %f ratio: %f" % (position, analy_grad, num_grad, analy_grad / num_grad)
            print info
Пример #37
0
            label = owl.from_nparray(labels)
            out = train_one_mb(model, data, label, weightsgrad[count], biasgrad[count], dropout_rate)
            # out.start_eval()
            if count == 0:
                # Update
                for k in range(num_weights):
                    for l in range(1, num_gpu):
                        weightsgrad[0][k] = weightsgrad[0][k] + weightsgrad[l][k]
                        biasgrad[0][k] = biasgrad[0][k] + biasgrad[l][k]
                    model.weightsdelta[k] = mom * model.weightsdelta[k] - eps_w / num_samples  * (weightsgrad[0][k] + wd * model.weights[k])
                    model.biasdelta[k] = mom * model.biasdelta[k] - eps_b / num_samples  * (biasgrad[0][k] + wd * model.bias[k])
                    model.weights[k] += model.weightsdelta[k]
                    model.weights[k].start_eval()
                    model.bias[k] += model.biasdelta[k]
                    model.bias[k].start_eval()
                if j % (lazy * num_gpu) == 0:
                    print_training_accuracy(out, label, minibatch_size)
                    print "time: %s" % (time.time() - last)
                    last = time.time()

if __name__ == '__main__':
    owl.initialize(sys.argv)
    owl.create_cpu_device()
    for i in range(num_gpu):
        gpu_array.append(owl.create_gpu_device(i))
    owl.set_device(gpu_array[0])
    model = AlexModel()
    model.init_random()
    train_network(model)

Пример #38
0
    def run(s):
        ''' Run the training algorithm on multiple GPUs

        The basic logic is similar to the traditional single GPU training code as follows (pseudo-code)::

            for epoch in range(MAX_EPOCH):
                for i in range(NUM_MINI_BATCHES):
                    # load i^th minibatch
                    minibatch = loader.load(i, MINI_BATCH_SIZE)
                    net.ff(minibatch.data)
                    net.bp(minibatch.label)
                    grad = net.gradient()
                    net.update(grad, MINI_BATCH_SIZE)

        With Minerva's lazy evaluation and dataflow engine, we are able to modify the above logic
        to perform data parallelism on multiple GPUs (pseudo-code)::

            for epoch in range(MAX_EPOCH):
                for i in range(0, NUM_MINI_BATCHES, NUM_GPU):
                    gpu_grad = [None for i in range(NUM_GPU)]
                    for gpuid in range(NUM_GPU):
                        # specify which gpu following codes are running on
                        owl.set_device(gpuid)
                        # each minibatch is split among GPUs
                        minibatch = loader.load(i + gpuid, MINI_BATCH_SIZE / NUM_GPU)
                        net.ff(minibatch.data)
                        net.bp(minibatch.label)
                        gpu_grad[gpuid] = net.gradient()
                    net.accumulate_and_update(gpu_grad, MINI_BATCH_SIZE)

        So each GPU will take charge of one *mini-mini batch* training, and since all their ``ff``, ``bp`` and ``gradient``
        calculations are independent among each others, they could be paralleled naturally using Minerva's DAG engine.

        The only problem let is ``accumulate_and_update`` of the the gradient from all GPUs. If we do it on one GPU,
        that GPU would become a bottleneck. The solution is to also partition the workload to different GPUs (pseudo-code)::

            def accumulate_and_update(gpu_grad, MINI_BATCH_SIZE):
                num_layers = len(gpu_grad[0])
                for layer in range(num_layers):
                    upd_gpu = layer * NUM_GPU / num_layers
                    # specify which gpu to update the layer
                    owl.set_device(upd_gpu)
                    for gid in range(NUM_GPU):
                        if gid != upd_gpu:
                            gpu_grad[upd_gpu][layer] += gpu_grad[gid][layer]
                    net.update_layer(layer, gpu_grad[upd_gpu][layer], MINI_BATCH_SIZE)

        Since the update of each layer is independent among each others, the update could be paralleled affluently. Minerva's
        dataflow engine transparently handles the dependency resolving, scheduling and memory copying among different devices,
        so users don't need to care about that.
        '''
        wgrad = [[] for i in range(s.num_gpu)]
        bgrad = [[] for i in range(s.num_gpu)]
        last = time.time()
        wunits = s.owl_net.get_weighted_unit_ids()
        last_start = time.time()

        for iteridx in range(s.snapshot * s.owl_net.solver.snapshot,
                             s.owl_net.solver.max_iter):
            # get the learning rate
            if s.owl_net.solver.lr_policy == "poly":
                s.owl_net.current_lr = s.owl_net.base_lr * pow(
                    1 - float(iteridx) / s.owl_net.solver.max_iter,
                    s.owl_net.solver.power)
            elif s.owl_net.solver.lr_policy == "step":
                s.owl_net.current_lr = s.owl_net.base_lr * pow(
                    s.owl_net.solver.gamma,
                    iteridx / s.owl_net.solver.stepsize)

            # train on multi-gpu
            for gpuid in range(s.num_gpu):
                owl.set_device(s.gpu[gpuid])
                s.owl_net.forward('TRAIN')
                s.owl_net.backward('TRAIN')
                for wid in wunits:
                    wgrad[gpuid].append(s.owl_net.units[wid].weightgrad)
                    bgrad[gpuid].append(s.owl_net.units[wid].biasgrad)

            # weight update
            for i in range(len(wunits)):
                wid = wunits[i]
                upd_gpu = i * s.num_gpu / len(wunits)
                owl.set_device(s.gpu[upd_gpu])
                for gid in range(s.num_gpu):
                    if gid == upd_gpu:
                        continue
                    wgrad[upd_gpu][i] += wgrad[gid][i]
                    bgrad[upd_gpu][i] += bgrad[gid][i]
                s.owl_net.units[wid].weightgrad = wgrad[upd_gpu][i]
                s.owl_net.units[wid].biasgrad = bgrad[upd_gpu][i]
                s.owl_net.update(wid)

            if iteridx % 2 == 0:
                owl.wait_for_all()
                thistime = time.time() - last
                print "Finished training %d minibatch (time: %s)" % (iteridx,
                                                                     thistime)
                last = time.time()

            wgrad = [[] for i in range(s.num_gpu)]  # reset gradients
            bgrad = [[] for i in range(s.num_gpu)]

            # decide whether to display loss
            if (iteridx + 1) % (s.owl_net.solver.display) == 0:
                lossunits = s.owl_net.get_loss_units()
                for lu in lossunits:
                    print "Training Loss %s: %f" % (lu.name, lu.getloss())

            # decide whether to test
            if (iteridx + 1) % (s.owl_net.solver.test_interval) == 0:
                acc_num = 0
                test_num = 0
                for testiteridx in range(s.owl_net.solver.test_iter[0]):
                    s.owl_net.forward('TEST')
                    all_accunits = s.owl_net.get_accuracy_units()
                    accunit = all_accunits[len(all_accunits) - 1]
                    #accunit = all_accunits[0]
                    test_num += accunit.batch_size
                    acc_num += (accunit.batch_size * accunit.acc)
                    print "Accuracy the %d mb: %f" % (testiteridx, accunit.acc)
                    sys.stdout.flush()
                print "Testing Accuracy: %f" % (float(acc_num) / test_num)

            # decide whether to save model
            if (iteridx + 1) % (s.owl_net.solver.snapshot) == 0:
                print "Save to snapshot %d, current lr %f" % (
                    (iteridx + 1) /
                    (s.owl_net.solver.snapshot), s.owl_net.current_lr)
                s.builder.save_net_to_file(s.owl_net, s.snapshot_dir,
                                           (iteridx + 1) /
                                           (s.owl_net.solver.snapshot))
            sys.stdout.flush()
Пример #39
0
 def __init__(self, solver_file, snapshot, gpu_idx = 0):
     self.solver_file = solver_file
     self.snapshot = snapshot
     self.gpu = owl.create_gpu_device(gpu_idx)
     owl.set_device(self.gpu)
Пример #40
0
def setfpga():
    owl.set_device(devices[-1])
Пример #41
0
 def __init__(self, solver_file, snapshot, gpu_idx=0):
     self.solver_file = solver_file
     self.snapshot = snapshot
     self.gpu = owl.create_gpu_device(gpu_idx)
     owl.set_device(self.gpu)
Пример #42
0
def train_network_n(n,
                    model,
                    num_epochs=100,
                    minibatch_size=40,
                    dropout_rate=0.5,
                    eps_w=0.0001,
                    eps_b=0.0002,
                    mom=0.9,
                    wd=0.0005):

    gpus = []
    for i in range(0, n):
        gpus.append(owl.create_gpu_device(i))

    count = 0
    last = time.time()

    dp = ImageNetDataProvider(
        mean_file='./VGGmodel/vgg_mean.binaryproto',
        train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
        val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
        test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    minibatch_size = minibatch_size / n
    correct = 0

    rerun = False
    startepoch = 0
    curepoch = startepoch

    data = [None] * n
    label = [None] * n
    out = [None] * n
    biasgrad = [None] * n
    weightsgrad = [None] * n

    for i in range(startepoch, num_epochs):
        print "---------------------Epoch %d Index %d" % (curepoch, i)
        sys.stdout.flush()
        batchidx = 0
        count = 0
        loadmodel(i, model)
        for (samples, labels) in dp.get_train_mb(minibatch_size, 224):
            count = count + 1
            data[count - 1] = owl.from_numpy(samples).reshape(
                [224, 224, 3, samples.shape[0]])
            label[count - 1] = owl.from_numpy(labels)
            biasgrad[count - 1] = [None] * (model.num_layers - 1)
            weightsgrad[count - 1] = [None] * (model.num_layers - 1)
            owl.set_device(gpus[count - 1])
            out[count - 1] = train_one_mb(model, data[count - 1],
                                          label[count - 1],
                                          weightsgrad[count - 1],
                                          biasgrad[count - 1])
            out[count - 1].start_eval()
            if count % n > 0:
                continue

            totalweightsgrad = [None] * (model.num_layers - 1)
            totalbiasgrad = [None] * (model.num_layers - 1)
            num_samples = 0
            for gpuidx in range(0, n):
                num_samples += data[gpuidx].shape[-1]
                for k in range(model.num_layers - 1):
                    if model.ff_infos[k]['ff_type'] == 'conv' or model.ff_infos[
                            k]['ff_type'] == 'fully':
                        if gpuidx == 0:
                            totalweightsgrad[k] = weightsgrad[gpuidx][k]
                            totalbiasgrad[k] = biasgrad[gpuidx][k]
                        else:
                            totalweightsgrad[k] += weightsgrad[gpuidx][k]
                            totalbiasgrad[k] += biasgrad[gpuidx][k]

            for k in range(model.num_layers - 1):
                if model.ff_infos[k]['ff_type'] == 'conv' or model.ff_infos[k][
                        'ff_type'] == 'fully':
                    model.weightsdelta[k] = mom * model.weightsdelta[
                        k] - eps_w / num_samples * (
                            totalweightsgrad[k] +
                            wd * num_samples * model.weights[k])
                    model.biasdelta[k] = mom * model.biasdelta[
                        k] - eps_b / num_samples * totalbiasgrad[k]
                    model.weights[k] += model.weightsdelta[k]
                    model.bias[k] += model.biasdelta[k]

            #print num_samples
            if count % n == 0:
                print 'batch %d' % (batchidx)
                batchidx = batchidx + 1
                '''
                #TODO hack
                if batchidx == 2000:
                    savemodel(i+1, model)
                    exit(0)
                '''
                thiscorrect = print_training_accuracy(out[0], label[0],
                                                      data[0].shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()
                count = 0
        savemodel(i + 1, model)
Пример #43
0
def check_weight_2gpu(owl_net, checklayer, gpu):
    h = 1e-2
    threshold = 1e-4
    wunits = get_weights_id(owl_net)
    wgrad = []
    bgrad = []

    for iteridx in range(10):
        #disturb the weights
        oriweight = checklayer.weight
        npweight = checklayer.weight.to_numpy()
        weightshape = np.shape(npweight)
        npweight = npweight.reshape(np.prod(weightshape[0:len(weightshape)]))
        print np.shape(npweight)
        position = np.random.randint(0, np.shape(npweight)[0])
        print position
        disturb = np.zeros(np.shape(npweight), dtype=np.float32)
        disturb[position] = h
        oriposval = npweight[position]
        npweight += disturb
        newposval = npweight[position]
        npweight = npweight.reshape(weightshape)
        checklayer.weight = owl.from_numpy(npweight)

        #get disturbed loss
        owl_net.forward('TRAIN')
        all_loss = 0
        for i in xrange(len(losslayer)):
            all_loss += owl_net.units[losslayer[i]].getloss()
        all_loss = all_loss / owl_net.batch_size  #+ 0.5 * owl_net.base_weight_decay * newposval * newposval

        #get origin loss
        checklayer.weight = oriweight
        owl_net.forward('TRAIN')
        ori_all_loss = 0
        for i in xrange(len(losslayer)):
            ori_all_loss += owl_net.units[losslayer[i]].getloss()
        ori_all_loss = ori_all_loss / owl_net.batch_size  #+ 0.5 * owl_net.base_weight_decay * oriposval * oriposval

        #analy_grad
        owl.set_device(gpu[0])
        owl_net.forward('TRAIN')
        owl_net.backward('TRAIN')
        for wid in wunits:
            wgrad.append(owl_net.units[wid].weightgrad)
            bgrad.append(owl_net.units[wid].biasgrad)
        owl.set_device(gpu[1])
        owl_net.forward('TRAIN')
        owl_net.backward('TRAIN')
        for i in range(len(wunits)):
            wid = wunits[i]
            owl_net.units[wid].weightgrad += wgrad[i]
            owl_net.units[wid].biasgrad += bgrad[i]
        wgrad = []
        bgrad = []

        #get analytic gradient
        npgrad = checklayer.weightgrad.to_numpy()
        npgrad = npgrad.reshape(np.prod(weightshape[0:len(weightshape)]))
        analy_grad = npgrad[position] / owl_net.batch_size / len(gpu)

        print all_loss
        print ori_all_loss
        num_grad = (all_loss - ori_all_loss) / h

        diff = np.abs(analy_grad - num_grad)
        info = "analy: %f num: %f ratio: %f" % (analy_grad, num_grad,
                                                analy_grad / num_grad)
        print info
Пример #44
0
    def run(s):
        wgrad = [[] for i in range(s.num_gpu)]
        bgrad = [[] for i in range(s.num_gpu)]
        last = time.time()
        wunits = s.owl_net.get_weighted_unit_ids()
        last_start = time.time()

        for iteridx in range(s.snapshot * s.owl_net.solver.snapshot,
                             s.owl_net.solver.max_iter):
            # get the learning rate
            if s.owl_net.solver.lr_policy == "poly":
                s.owl_net.current_lr = s.owl_net.base_lr * pow(
                    1 - float(iteridx) / s.owl_net.solver.max_iter,
                    s.owl_net.solver.power)
            elif s.owl_net.solver.lr_policy == "step":
                s.owl_net.current_lr = s.owl_net.base_lr * pow(
                    s.owl_net.solver.gamma,
                    iteridx / s.owl_net.solver.stepsize)
            # train on multi-gpu
            for gpuid in range(s.num_gpu):
                owl.set_device(s.gpu[gpuid])
                s.owl_net.forward('TRAIN')
                s.owl_net.backward('TRAIN')
                for wid in wunits:
                    wgrad[gpuid].append(s.owl_net.units[wid].weightgrad)
                    bgrad[gpuid].append(s.owl_net.units[wid].biasgrad)

            # weight update
            for i in range(len(wunits)):
                wid = wunits[i]
                upd_gpu = i * num_gpu / len(wunits)
                owl.set_device(s.gpu[upd_gpu])
                for gid in range(s.num_gpu):
                    if gid == upd_gpu:
                        continue
                    wgrad[upd_gpu][i] += wgrad[gid][i]
                    bgrad[upd_gpu][i] += bgrad[gid][i]
                s.owl_net.units[wid].weightgrad = wgrad[upd_gpu][i]
                s.owl_net.units[wid].biasgrad = bgrad[upd_gpu][i]
                s.owl_net.update(wid)
            #s.owl_net.weight_update(num_gpu = s.num_gpu)
            if iteridx % 2 == 0:
                s.owl_net.wait_for_eval_loss()
                thistime = time.time() - last
                print "Finished training %d minibatch (time: %s)" % (iteridx,
                                                                     thistime)
                last = time.time()

            #s.owl_net.units[wunits[0]].weight.wait_for_eval()
            wgrad = [[] for i in range(s.num_gpu)]  # reset gradients
            bgrad = [[] for i in range(s.num_gpu)]

            # decide whether to display loss
            if (iteridx + 1) % (s.owl_net.solver.display) == 0:
                lossunits = s.owl_net.get_loss_units()
                for lu in lossunits:
                    print "Training Loss %s: %f" % (lu.name, lu.getloss())

            # decide whether to test
            #if True:
            if (iteridx + 1) % (s.owl_net.solver.test_interval) == 0:
                acc_num = 0
                test_num = 0
                for testiteridx in range(s.owl_net.solver.test_iter[0]):
                    s.owl_net.forward('TEST')
                    all_accunits = s.owl_net.get_accuracy_units()
                    accunit = all_accunits[len(all_accunits) - 1]
                    #accunit = all_accunits[0]
                    print accunit.name
                    test_num += accunit.batch_size
                    acc_num += (accunit.batch_size * accunit.acc)
                    print "Accuracy the %d mb: %f" % (testiteridx, accunit.acc)
                    sys.stdout.flush()
                print "Testing Accuracy: %f" % (float(acc_num) / test_num)

            # decide whether to save model
            if (iteridx + 1) % (s.owl_net.solver.snapshot) == 0:
                print "Save to snapshot %d, current lr %f" % (
                    (iteridx + 1) /
                    (s.owl_net.solver.snapshot), s.owl_net.current_lr)
                s.builder.save_net_to_file(s.owl_net, s.snapshot_dir,
                                           (iteridx + 1) /
                                           (s.owl_net.solver.snapshot))
            sys.stdout.flush()
Пример #45
0
            if tanhC_version:
                Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
            else:
                Hout[t] = ele.mult(act_og[t], C[t])

            Y = softmax(model.decoder_weights * Hout[t] + model.decoder_bias)

            # evaluation
            output = Y.to_numpy()  # Can directly get a single element from Y
            # print output[0, sent[t]]
            sent_ll += math.log(max(output[0, sent[t]], 1e-20), 2)

        test_ll += sent_ll

    test_ent = test_ll * (-1) / words
    test_ppl = 2**test_ent

    print "Test PPL =", test_ppl


if __name__ == '__main__':
    owl.initialize(sys.argv)
    gpu = owl.create_gpu_device(1)
    owl.set_device(gpu)
    model, train_sents, test_sents, train_words, test_words = LSTM_init()
    learning_rate = 0.1
    for i in range(5):
        model, learning_rate = LSTM_train(model, train_sents, train_words,
                                          learning_rate, 1)
        LSTM_test(model, test_sents, test_words)
Пример #46
0
        out, _, _ = bpprop(model, test_samples, test_labels)
        print_training_accuracy(out, test_labels, num_test_samples, 'Testing')


def multi_gpu_merge(l, base, layer):
    if len(l) == 1:
        return l[0][layer]
    left = multi_gpu_merge(l[:len(l) / 2], base, layer)
    right = multi_gpu_merge(l[len(l) / 2:], base + len(l) / 2, layer)
    owl.set_device(base)
    return left + right


if __name__ == '__main__':
    owl.initialize(sys.argv)
    parser = argparse.ArgumentParser(description='MNIST CNN')
    parser.add_argument('-n',
                        '--num',
                        help='number of GPUs to use',
                        action='store',
                        type=int,
                        default=1)
    args = parser.parse_args()
    assert (1 <= args.num)
    print 'Using %d GPU(s)' % args.num
    gpu = [owl.create_gpu_device(i) for i in range(args.num)]
    owl.set_device(gpu[0])
    model = MNISTCNNModel()
    model.init_random()
    train_network(model)
Пример #47
0
import owl
import sys

owl.initialize(sys.argv)
owl.create_cpu_device()
gpu0 = owl.create_gpu_device(0)
gpu1 = owl.create_gpu_device(1)
owl.set_device(gpu0)
Пример #48
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    gpu = owl.create_gpu_device(1)
    owl.set_device(gpu)
    num_layers = 20
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
            train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    acts = [None] * num_layers
    sens = [None] * num_layers

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        sys.stdout.flush()
        for (samples, labels) in dp.get_train_mb(minibatch_size):
            num_samples = samples.shape[0]

            acts = [None] * num_layers
            sens = [None] * num_layers

            '''
            thisimg = samples[0, :]
            print thisimg
            imgdata = np.transpose(thisimg.reshape([3, 227*227])).reshape([227, 227, 3])
            print imgdata
            img = Image.fromarray(imgdata.astype(np.uint8))
            img.save('testimg.jpg', format='JPEG')
            exit(0)
            '''

            # FF
            acts[0] = owl.from_nparray(samples).reshape([227, 227, 3, num_samples])
            #print np.array(acts[0].tolist())[0:227*227*3]

            target = owl.from_nparray(labels)

            #np.set_printoptions(linewidth=200)
            #print acts[0].shape, model.weights[0].shape, model.bias[0].shape
            #im = np.array(acts[0].tolist()).reshape([num_samples, 227, 227, 3])
            #print im[0,:,:,0]
            #print im[0,:,:,1]
            #print im[0,:,:,2]
            #print target.max_index(0).tolist()[0:20]
            #sys.exit()

            acts1 = conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])
            acts[1] = ele.relu(acts1)#(conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])) # conv1
            acts[2] = pooling_forward(acts[1], model.pooling_infos[0]) # pool1
            acts3 = conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1]) # conv2
            acts[3] = ele.relu(acts3)#(conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1])) # conv2
            acts[4] = pooling_forward(acts[3], model.pooling_infos[1]) # pool2
            acts5 = conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2]) # conv3
            acts[5] = ele.relu(acts5)#(conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2])) # conv3
            acts6 = conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3]) # conv4
            acts[6] = ele.relu(acts6)#(conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3])) # conv4
            acts7 = conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4]) # conv5
            acts[7] = ele.relu(acts7)#(conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4])) # conv5
            acts[8] = pooling_forward(acts[7], model.pooling_infos[2]) # pool5
            re_acts8 = acts[8].reshape([np.prod(acts[8].shape[0:3]), num_samples])
            acts9 = model.weights[5] * re_acts8 + model.bias[5] # fc6
            acts[9] = ele.relu(acts9)#(model.weights[5] * re_acts8 + model.bias[5]) # fc6
            mask6 = owl.randb(acts[9].shape, dropout_rate)
            acts[9] = ele.mult(acts[9], mask6) # drop6
            acts10 = model.weights[6] * acts[9] + model.bias[6] # fc7
            acts[10] = ele.relu(acts10)#(model.weights[6] * acts[9] + model.bias[6]) # fc7
            mask7 = owl.randb(acts[10].shape, dropout_rate)
            acts[10] = ele.mult(acts[10], mask7) # drop7
            acts[11] = model.weights[7] * acts[10] + model.bias[7] # fc8
            acts[12] = softmax_forward(acts[11].reshape([1000, 1, 1, num_samples]), soft_op.instance).reshape([1000, num_samples]) # prob

            # error
            sens[11] = acts[12] - target

            # BP
            sens[10] = model.weights[7].trans() * sens[11] # fc8
            sens[10] = ele.mult(sens[10], mask7) # drop7
            sens[10] = ele.relu_back(sens[10], acts[10], acts10) # relu7
            sens[9] = model.weights[6].trans() * sens[10]
            sens[9] = ele.mult(sens[9], mask6) # drop6
            sens[9] = ele.relu_back(sens[9], acts[9], acts9) # relu6
            sens[8] = (model.weights[5].trans() * sens[9]).reshape(acts[8].shape) # fc6
            sens[7] = pooling_backward(sens[8], acts[8], acts[7], model.pooling_infos[2]) # pool5
            sens[7] = ele.relu_back(sens[7], acts[7], acts7) # relu5
            sens[6] = conv_backward_data(sens[7], model.weights[4], model.conv_infos[4]) # conv5
            sens[6] = ele.relu_back(sens[6], acts[6], acts6) # relu4
            sens[5] = conv_backward_data(sens[6], model.weights[3], model.conv_infos[3]) # conv4
            sens[5] = ele.relu_back(sens[5], acts[5], acts5) # relu3
            sens[4] = conv_backward_data(sens[5], model.weights[2], model.conv_infos[2]) # conv3
            sens[3] = pooling_backward(sens[4], acts[4], acts[3], model.pooling_infos[1]) # pool2
            sens[3] = ele.relu_back(sens[3], acts[3], acts3) # relu2
            sens[2] = conv_backward_data(sens[3], model.weights[1], model.conv_infos[1]) # conv2
            sens[1] = pooling_backward(sens[2], acts[2], acts[1], model.pooling_infos[0]) # pool1
            sens[1] = ele.relu_back(sens[1], acts[1], acts1) # relu1

	    model.weightsdelta[7] = mom * model.weightsdelta[7] - eps_w / num_samples  * (sens[11] * acts[10].trans() + wd * model.weights[7])
            model.biasdelta[7] = mom * model.biasdelta[7] - eps_b / num_samples  * (sens[11].sum(1) + wd * model.bias[7])
            
	    model.weightsdelta[6] = mom * model.weightsdelta[6] - eps_w / num_samples  * (sens[10] * acts[9].trans() + wd * model.weights[6])
            model.biasdelta[6] = mom * model.biasdelta[6] - eps_b / num_samples  * (sens[10].sum(1) + wd * model.bias[6])
    	    
	    model.weightsdelta[5] = mom * model.weightsdelta[5] - eps_w / num_samples  * (sens[9] * re_acts8.trans() + wd * model.weights[5])
            model.biasdelta[5] = mom * model.biasdelta[5] - eps_b / num_samples  * (sens[9].sum(1) + wd * model.bias[5])
            	
            model.weightsdelta[4] = mom * model.weightsdelta[4] - eps_w / num_samples  * (conv_backward_filter(sens[7], acts[6], model.conv_infos[4]) + wd * model.weights[4])
	    model.biasdelta[4] = mom * model.biasdelta[4] - eps_b / num_samples  * (conv_backward_bias(sens[7]) + wd * model.bias[4])

	    model.weightsdelta[3] = mom * model.weightsdelta[3] - eps_w / num_samples  * (conv_backward_filter(sens[6], acts[5], model.conv_infos[3]) + wd * model.weights[3])
	    model.biasdelta[3] = mom * model.biasdelta[3] - eps_b / num_samples  * (conv_backward_bias(sens[6]) + wd * model.bias[3])

 	    model.weightsdelta[2] = mom * model.weightsdelta[2] - eps_w / num_samples  * (conv_backward_filter(sens[5], acts[4], model.conv_infos[2]) + wd * model.weights[2])
	    model.biasdelta[2] = mom * model.biasdelta[2] - eps_b / num_samples  * (conv_backward_bias(sens[5]) + wd * model.bias[2])

  	    model.weightsdelta[1] = mom * model.weightsdelta[1] - eps_w / num_samples  * (conv_backward_filter(sens[3], acts[2], model.conv_infos[1]) + wd * model.weights[1])
	    model.biasdelta[1] = mom * model.biasdelta[1] - eps_b / num_samples  * (conv_backward_bias(sens[3]) + wd * model.bias[1])

            model.weightsdelta[0] = mom * model.weightsdelta[0] - eps_w / num_samples  * (conv_backward_filter(sens[1], acts[0], model.conv_infos[0]) + wd * model.weights[0])
	    model.biasdelta[0] = mom * model.biasdelta[0] - eps_b / num_samples  * (conv_backward_bias(sens[1]) + wd * model.bias[0])

            for k in range(8):
                model.weights[k] += model.weightsdelta[k]
                model.bias[k] += model.biasdelta[k]

            count = count + 1
            #if count % 2 == 0:
                #acts[18].start_eval()
            if count % 10 == 0:
                print_training_accuracy(acts[12], target, num_samples)
                print "time: %s" % (time.time() - last)
                last = time.time()
Пример #49
0
    #prepare the net and solver
    builder = CaffeNetBuilder(sys.argv[1], sys.argv[2])
    owl_net = net.Net()
    builder.build_net(owl_net)
    builder.init_net_from_file(owl_net, sys.argv[3])
    accunitname = sys.argv[4]
    last = time.time()

    wunits = get_weights_id(owl_net)
    print len(wunits)
    wgrad = [[] for i in xrange(4)]
    bgrad = [[] for i in xrange(4)]

    for iteridx in range(owl_net.solver.max_iter):
        gpuidx = iteridx % 4
        owl.set_device(gpu[gpuidx])
        owl_net.forward('TRAIN')
        owl_net.backward('TRAIN')

        for wid in wunits:
            wgrad[gpuidx].append(owl_net.units[wid].weightgrad)
            bgrad[gpuidx].append(owl_net.units[wid].biasgrad)
        owl_net.get_units_by_name(accunitname)[0].ff_y.start_eval()

        if (iteridx + 1) % 2 == 0:
            for i in range(len(wunits)):
                wid = wunits[i]
                wgrad[gpuidx][i] += wgrad[gpuidx - 1][i]
                bgrad[gpuidx][i] += bgrad[gpuidx - 1][i]

        if (iteridx + 1) % 4 == 0:
Пример #50
0
    bgrad = [[] for i in xrange(num_gpu)]

    for iteridx in range(startsnapshot * owl_net.solver.snapshot,
                         owl_net.solver.max_iter):
        #get the learning rate
        if owl_net.solver.lr_policy == "poly":
            owl_net.current_lr = owl_net.base_lr * pow(
                1 - float(iteridx) / owl_net.solver.max_iter,
                owl_net.solver.power)
        elif owl_net.solver.lr_policy == "step":
            owl_net.current_lr = owl_net.base_lr * pow(
                owl_net.solver.gamma, iteridx / owl_net.solver.step)

        # train on multi-gpu
        for gpuid in range(0, num_gpu):
            owl.set_device(gpuid)
            owl_net.forward('TRAIN')
            owl_net.backward('TRAIN')

            for wid in wunits:
                wgrad[gpuid].append(owl_net.units[wid].weightgrad)
                bgrad[gpuid].append(owl_net.units[wid].biasgrad)
            owl_net.get_units_by_name(evallayername)[0].ff_y.start_eval()

            if gpuid % 2 == 1:
                for i in range(len(wunits)):
                    wid = wunits[i]
                    wgrad[gpuid][i] += wgrad[gpuid - 1][i]
                    bgrad[gpuid][i] += bgrad[gpuid - 1][i]

            if gpuid == 3: