Exemplo n.º 1
0
def train():
    net = apollo.Net()

    forward(net)
    net.reset_forward()
    net.load(alexnet.weights_file())
    train_loss_hist = []

    for i in range(hyper['max_iter']):
        train_loss_hist.append(forward(net))
        net.backward()
        lr = (hyper['base_lr'] * (hyper['gamma'])**(i // hyper['stepsize']))
        net.update(lr=lr, momentum=hyper['momentum'],
            clip_gradients=hyper.get('clip_gradients', -1), weight_decay=hyper['weight_decay'])
        if i % hyper['display_interval'] == 0:
            logging.info('Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:])))
        if i % hyper['snapshot_interval'] == 0 and i > 0:
            filename = '%s_%d.h5' % (hyper['snapshot_prefix'], i)
            logging.info('Saving net to: %s' % filename)
            net.save(filename)
            with open('/tmp/log.txt', 'w') as f:
                f.write(json.dumps(train_loss_hist))
        if i % hyper['graph_interval'] == 0 and i > 0:
            sub = 100
            plt.plot(np.convolve(train_loss_hist, np.ones(sub)/sub)[sub:-sub])
            filename = '%strain_loss.jpg' % hyper['graph_prefix']
            logging.info('Saving figure to: %s' % filename)
            plt.savefig(filename)
Exemplo n.º 2
0
def eval(hyper):
    eval_net = apollo.Net()
    # evaluate the net once to set up structure before loading parameters
    evaluate_forward(eval_net)
    eval_net.load('%s/%d.h5' %
                  (hyper['snapshot_prefix'], hyper['max_iter'] - 1))
    print evaluate_forward(eval_net)
Exemplo n.º 3
0
def eval_performance(net, hyper):
    test_net = apollo.Net()
    eval_forward(test_net, hyper)
    test_net.reset_forward()

    test_net.phase = 'test'
    test_net.copy_params_from(net)
    eval_forward(test_net, hyper)
    test_net.reset_forward()
Exemplo n.º 4
0
def run(hyper):
    net = apollo.Net()
    arch = apollo.Architecture()
    arch.load_from_proto(hyper['net_prototxt'])

    test_net = apollo.Net(phase='test')
    test_arch = apollo.Architecture(phase='test')
    test_arch.load_from_proto(hyper['net_prototxt'])

    arch.forward(net)
    if hyper['weights']:
        print 'loading'
        net.load(hyper['weights'])
    net.reset_forward()

    test_arch.forward(test_net)
    test_net.reset_forward()

    def test_performance(net, test_net):
        test_error = []
        test_net.copy_params_from(net)
        for _ in range(hyper['test_iter']):
            test_error.append(test_arch.forward(test_net))
            test_net.reset_forward()
        logging.info('Test Error: %f' % np.mean(test_error))

    error = []
    for i in range(hyper['max_iter']):
        error.append(arch.forward(net))
        net.backward()
        lr = (hyper['base_lr'] /
              (hyper.get('gamma', 1.))**(i // hyper['stepsize']))
        net.update(lr=lr,
                   momentum=hyper['momentum'],
                   clip_gradients=hyper.get('clip_gradients', -1),
                   weight_decay=hyper['weight_decay'])
        if i % hyper['display_interval'] == 0:
            logging.info('Iteration %d: %s' % (i, np.mean(error)))
            error = []
        if i % hyper['test_interval'] == 0:
            test_performance(net, test_net)
        if i % hyper['snapshot_interval'] == 0 and i > 0:
            net.save('%s_%d.h5' % (hyper['snapshot_prefix'], i))
Exemplo n.º 5
0
def train(hyper):
    apollo.set_random_seed(hyper['random_seed'])
    if hyper['gpu'] is None:
        apollo.set_mode_cpu()
        logging.info('Using cpu device (pass --gpu X to train on the gpu)')
    else:
        apollo.set_mode_gpu()
        apollo.set_device(hyper['gpu'])
        logging.info('Using gpu device %d' % hyper['gpu'])
    apollo.set_logging_verbosity(hyper['loglevel'])

    net = apollo.Net()
    forward(net, hyper)
    network_path = '%s/network.jpg' % hyper['schematic_prefix']
    net.draw_to_file(network_path)
    logging.info('Drawing network to %s' % network_path)
    net.reset_forward()
    if 'weights' in hyper:
        logging.info('Loading weights from %s' % hyper['weights'])
        net.load(hyper['weights'])

    train_loss_hist = []
    for i in xrange(hyper['start_iter'], hyper['max_iter']):
        train_loss_hist.append(forward(net, hyper))
        net.backward()
        lr = (hyper['base_lr'] * hyper['gamma']**(i // hyper['stepsize']))
        net.update(lr=lr,
                   momentum=hyper['momentum'],
                   clip_gradients=hyper['clip_gradients'])
        if i % hyper['display_interval'] == 0:
            logging.info(
                'Iteration %d: %s' %
                (i, np.mean(train_loss_hist[-hyper['display_interval']:])))
        if i % hyper['snapshot_interval'] == 0 and i > hyper['start_iter']:
            filename = '%s/%d.h5' % (hyper['snapshot_prefix'], i)
            logging.info('Saving net to: %s' % filename)
            net.save(filename)
        if i % hyper['graph_interval'] == 0 and i > hyper['start_iter']:
            sub = hyper.get('sub', 100)
            plt.plot(
                np.convolve(train_loss_hist,
                            np.ones(sub) / sub)[sub:-sub])
            filename = '%s/train_loss.jpg' % hyper['graph_prefix']
            logging.info('Saving figure to: %s' % filename)
            plt.savefig(filename)
Exemplo n.º 6
0
def main():
    parser = apollo.utils.training.default_parser()
    parser.add_argument('--solver')
    args = parser.parse_args()

    config = imp.load_source('module.name', args.solver)
    hyper = {}
    hyper.update(config.get_hyper())
    hyper.update({k:v for k, v in vars(args).iteritems() if v is not None})

    arch = Architecture()
    arch.load_from_proto(hyper['net_prototxt'])

    test_net = apollo.Net(phase='test')
    test_arch = Architecture(phase='test')
    test_arch.load_from_proto(hyper['net_prototxt'])

    apollo.utils.training.default_train(hyper, forward=arch.forward, test_forward=test_arch.forward)
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--loglevel', default=0)
    args = parser.parse_args()
    apollo.Caffe.set_logging_verbosity(int(args.loglevel)) # turn off logging
    sentences = get_data()
    sentence_batches = get_data_batch(sentences)
    net = apollo.Net()
    net.set_phase_train()
    apollo.Caffe.set_random_seed(10)
    apollo.Caffe.set_mode_gpu()
    apollo.Caffe.set_device(0)

    #import time
    #shape = [3,256,256]
    #numpy_layer = NumpyDataLayer(name='numpy', tops=['numpy'], data=np.reshape(np.arange(shape[0] * shape[1] * shape[2]), shape))
    #net.forward_layer(numpy_layer)
    #net.backward()
    #net.update(lr=20)
    #print net.blobs['numpy'].data().shape
    #shape = [2,256,256]
    #numpy_layer = NumpyDataLayer(name='numpy', tops=['numpy'], data=np.reshape(np.arange(shape[0] * shape[1] * shape[2]), shape))
    #net.forward_layer(numpy_layer)
    #net.backward()
    #net.update(lr=20)
    #print net.blobs['numpy'].data().shape
    #return
    #print time.time() - start
    #print net.blobs['numpy'].data()

    net.reshape_only = True
    iter(net, sentence_batches)
    net.reshape_only = False
    error = 0
    display_interval = 10
    for i in range(100000):
        loss = iter(net, sentence_batches)
        error += loss / display_interval
        if i % display_interval == 0 and i > 0:
            print 'Iteration %d: %s' % (i, np.mean(error))
            error = 0
Exemplo n.º 8
0
def train():
    net = apollo.Net()
    train_loss_hist = []

    for i in range(hyper['max_iter']):
        train_loss_hist.append(forward(net))
        net.backward()
        lr = (hyper['base_lr'] * (hyper['gamma'])**(i // hyper['stepsize']))
        net.update(lr=lr, momentum=hyper['momentum'],
            clip_gradients=hyper['clip_gradients'], weight_decay=hyper['weight_decay'])
        if i % hyper['display_interval'] == 0:
            logging.info('Iteration %d: %s' % 
                (i, np.mean(train_loss_hist[-hyper['display_interval']:])))
        if (i % hyper['snapshot_interval'] == 0 and i > 0) or i == hyper['max_iter'] - 1:
            filename = '%s_%d.h5' % (hyper['snapshot_prefix'], i)
            logging.info('Saving net to: %s' % filename)
            net.save(filename)
        if i % hyper['graph_interval'] == 0 and i > 0:
            sub = 100
            plt.plot(np.convolve(train_loss_hist, np.ones(sub)/sub)[sub:-sub])
            plt.savefig('%strain_loss.jpg' % hyper['graph_prefix'])
Exemplo n.º 9
0
def main():
    hyper = {}
    hyper['max_iter'] = 10000
    hyper['snapshot_prefix'] = './char/'
    hyper['schematic_prefix'] = './graph/'
    hyper['snapshot_interval'] = 1000
    hyper['random_seed'] = 22
    hyper['gamma'] = 0.8
    hyper['stepsize'] = 2500
    hyper['graph_interval'] = 1000
    hyper['graph_prefix'] = './graph/'
    hyper['mem_cells'] = 250
    hyper['vocab_size'] = 256
    hyper['batch_size'] = 32
    hyper['init_range'] = 0.1
    #hyper['zero_symbol'] = hyper['vocab_size'] - 1
    #hyper['unknown_symbol'] = hyper['vocab_size'] - 2
    hyper['test_interval'] = 100
    hyper['test_iter'] = 1
    hyper['base_lr'] = 0.2
    hyper['weight_decay'] = 0
    hyper['momentum'] = 0.0
    hyper['clip_gradients'] = 100
    hyper['display_interval'] = 100
    hyper['length'] = 2000
    hyper['i_temperature'] = 1.5

    hyper['unknown_symbol'] = len(vocab) + 1
    hyper['zero_symbol'] = len(vocab) + 2
    hyper['vocab_size'] = len(vocab) + 2

    args = apollo.utils.training.default_parser().parse_args()
    hyper.update({k: v for k, v in vars(args).iteritems() if v is not None})

    net = apollo.Net()
    eval_forward(net, hyper)
    net.load('./char/4000.h5')
    eval_performance(net, hyper)
Exemplo n.º 10
0
    '-n',
    '--name',
    default="latest",
    type=str,
    help="Session name; prefix used to save snapshots and loss history")
args = parser.parse_args()
random.seed(0)

# --- Use them to init caffe state ---
apollo.Caffe.set_random_seed(hyper['random_seed'])
apollo.Caffe.set_mode_gpu()
apollo.Caffe.set_device(args.gpu)
apollo.Caffe.set_logging_verbosity(args.loglevel)

# --- Initialize network ---
net = apollo.Net()
batch_iter = get_batch_iterator(hyper['batch_size'], data_type="extra")
test_batch_iter = get_batch_iterator(hyper['batch_size'], data_type="extra")

batch = batch_iter.next()
overfeat_net = OverfeatNet(net, batch)
if args.parameter_file:
    overfeat_net.net.load(args.parameter_file)

# --- Do the training ---
train_loss_hist = []
binary_softmax_hist = []
label_softmax_hist = []
bbox_loss_hist = []

for i in range(hyper['max_iter']):
Exemplo n.º 11
0
def train():
    net = apollo.Net()
    test_net = apollo.Net()

    sentences = get_data()
    sentence_batches = get_data_batch(sentences)

    forward(net, sentence_batches)
    if hyper['weights'] is not None:
        net.load(hyper['weights'])
    net.reset_forward()
    train_loss_hist = []

    for i in range(hyper['max_iter']):
        train_loss_hist.append(forward(net, sentence_batches))
        net.backward()
        lr = (hyper['base_lr'] * (hyper['gamma'])**(i // hyper['stepsize']))
        net.update(lr=lr,
                   momentum=hyper['momentum'],
                   clip_gradients=hyper['clip_gradients'],
                   weight_decay=hyper['weight_decay'])
        if i % hyper['display_interval'] == 0:
            logging.info(
                'Iteration %d: %s' %
                (i, np.mean(train_loss_hist[-hyper['display_interval']:])))
            output = []
            target = []
            source = []
            for step in range(hyper['max_len']):
                try:
                    output.append(
                        np.argmax(net.tops['softmax%d' %
                                           step].data[0].flatten()))
                    target.append(
                        np.int(net.tops['word%d' % step].data[0].flatten()))
                    source.append(
                        int(net.tops['source_word%d' %
                                     step].data[0].flatten()[0]))
                except:
                    break
            logging.info('input:\t' + ' '.join(es_ivocab[x] for x in source))
            logging.info('output:\t' +
                         ' '.join(en_ivocab[x]
                                  for x in padded_reverse(output)))
            logging.info('target:\t' +
                         ' '.join(en_ivocab[x]
                                  for x in padded_reverse(target)))
        if i % hyper['snapshot_interval'] == 0 and i > 0:
            filename = '%s_%d.h5' % (hyper['snapshot_prefix'], i)
            logging.info('Saving net to: %s' % filename)
            net.save(filename)
            with open('/tmp/log.txt', 'w') as f:
                f.write(json.dumps(train_loss_hist))
        if i % hyper['graph_interval'] == 0 and i > 0:
            sub = 100
            plt.plot(
                np.convolve(train_loss_hist,
                            np.ones(sub) / sub)[sub:-sub])
            filename = '%strain_loss.jpg' % hyper['graph_prefix']
            logging.info('Saving figure to: %s' % filename)
            plt.savefig(filename)
Exemplo n.º 12
0
        net.forward_layer(
            layers.InnerProduct(name='ip%d' % step,
                                bottoms=['dropout%d' % step],
                                param_names=['ip_weight', 'ip_bias'],
                                num_output=hyper['vocab_size'],
                                weight_filler=filler))
        loss.append(
            net.forward_layer(
                layers.SoftmaxWithLoss(
                    name='softmax_loss%d' % step,
                    ignore_label=hyper['zero_symbol'],
                    bottoms=['ip%d' % step, 'label%d' % step])))
    return np.mean(loss)


net = apollo.Net()
test_net = apollo.Net()

sentences = get_data()
sentence_batches = get_data_batch(sentences)

forward(net, sentence_batches)
net.draw_to_file('/tmp/s2s.png')
net.reset_forward()
train_loss_hist = []

for i in range(hyper['max_iter']):
    train_loss_hist.append(forward(net, sentence_batches))
    net.backward()
    lr = (hyper['base_lr'] * (hyper['gamma'])**(i // hyper['stepsize']))
    net.update(lr=lr,
Exemplo n.º 13
0
def default_train(hyper, forward, test_forward=None):
    if test_forward is None:
        test_forward=forward
    import matplotlib; matplotlib.use('Agg', warn=False); import matplotlib.pyplot as plt
    d = default_hyper()
    d.update(hyper)
    hyper = d
    validate_hyper(hyper)
    apollo.set_random_seed(hyper['random_seed'])
    if hyper['gpu'] is None:
        apollo.set_mode_cpu()
        logging.info('Using cpu device (pass --gpu X to train on the gpu)')
    else:
        apollo.set_mode_gpu()
        apollo.set_device(hyper['gpu'])
        logging.info('Using gpu device %d' % hyper['gpu'])
    apollo.set_logging_verbosity(hyper['loglevel'])

    net = apollo.Net()
    forward(net, hyper)
    network_path = '%s/network.jpg' % hyper['schematic_prefix']
    net.draw_to_file(network_path)
    logging.info('Drawing network to %s' % network_path)
    net.reset_forward()

    if hyper.get('separate_test_net', True) == True:
        test_net = apollo.Net()
        test_forward(test_net, hyper)
        test_net.reset_forward()
    else:
        test_net = net
    if 'weights' in hyper:
        logging.info('Loading weights from %s' % hyper['weights'])
        net.load(hyper['weights'])

    train_loss_hist = []
    for i in xrange(hyper['start_iter'], hyper['max_iter']):
        train_loss_hist.append(forward(net, hyper))
        net.backward()
        lr = (hyper['base_lr'] * hyper.get('gamma', 1.)**(i // hyper.get('stepsize', sys.maxint)))
        net.update(lr=lr, momentum=hyper.get('momentum', 0.0),
            clip_gradients=hyper.get('clip_gradients', -1), weight_decay=hyper.get('weight_decay', 0.0))
        if i % hyper['display_interval'] == 0:
            logging.info('Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:])))
        if i % hyper['snapshot_interval'] == 0 and i > hyper['start_iter']:
            filename = '%s/%d.h5' % (hyper['snapshot_prefix'], i)
            logging.info('Saving net to: %s' % filename)
            net.save(filename)
        if i % hyper['graph_interval'] == 0 and i > hyper['start_iter']:
            sub = hyper.get('sub', 100)
            plt.plot(np.convolve(train_loss_hist, np.ones(sub)/sub)[sub:-sub])
            filename = '%s/train_loss.jpg' % hyper['graph_prefix']
            logging.info('Saving figure to: %s' % filename)
            plt.savefig(filename)
        if hyper['test_interval'] is not None and i % hyper['test_interval'] == 0:
            test_loss = []
            accuracy = []
            test_net.phase = 'test'
            test_net.copy_params_from(net)
            for j in xrange(hyper['test_iter']):
                test_loss.append(test_forward(test_net, hyper))
                test_net.reset_forward()
                if 'accuracy' in test_net.tops:
                    accuracy.append(test_net.tops['accuracy'].data.flatten()[0])
            if len(accuracy) > 0:
                logging.info('Accuracy: %.5f' % np.mean(accuracy))
            logging.info('Test loss: %f' % np.mean(test_loss))
            test_net.phase = 'train'