Exemplo n.º 1
0
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error
    self.min_reward = args.min_reward
    self.max_reward = args.max_reward
    self.batch_norm = args.batch_norm

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.input = self.be.empty(self.input_shape)
    self.input.lshape = self.input_shape # HACK: needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    # Bug fix
    for l in self.model.layers.layers:
      l.parallelism = 'Disabled'
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.train_iterations = 0
    if args.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      # Bug fix
      for l in self.target_model.layers.layers:
        l.parallelism = 'Disabled'
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

    self.callback = None
Exemplo n.º 2
0
def test_adadelta_wclip(backend_default):
    wclip = 0.5
    ada = Adadelta(param_clip_value=wclip)
    param = np.random.rand(200, 128)
    param2 = copy.deepcopy(param)
    grad = 0.01 * np.random.rand(200, 128)
    grad2 = grad / 128.
    states = [
        0.01 * np.random.rand(200, 128), 0.01 * np.random.rand(200, 128),
        0.01 * np.random.rand(200, 128)
    ]
    states2 = [
        copy.deepcopy(states[0]),
        copy.deepcopy(states[1]),
        copy.deepcopy(states[2])
    ]
    decay = ada.decay
    states2[0][:] = states2[0] * decay + (1. - decay) * grad2 * grad2
    states2[2][:] = np.sqrt(
        (states2[1] + float(ada.epsilon)) / (states2[0] + ada.epsilon)) * grad2
    states2[1][:] = states2[1] * decay + (1. - decay) * states2[2] * states2[2]
    param2[:] -= states2[2]
    np.clip(param2, -wclip, wclip, param2)
    param_list = [((wrap(param), wrap(grad)),
                   [wrap(states[0]),
                    wrap(states[1]),
                    wrap(states[2])])]
    compare_tensors(ada, param_list, param2, tol=1e-7)
Exemplo n.º 3
0
def run(train, test):
    init = Gaussian(scale=0.01)
    layers = [
        Conv((3, 3, 128),
             init=init,
             activation=Rectlin(),
             strides=dict(str_h=1, str_w=2)),
        Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()),
        Pooling(2, strides=2),
        Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()),
        DeepBiRNN(256,
                  init=init,
                  activation=Rectlin(),
                  reset_cells=True,
                  depth=3),
        RecurrentLast(),
        Affine(32, init=init, batch_norm=True, activation=Rectlin()),
        Affine(nout=common['nclasses'], init=init, activation=Softmax())
    ]

    model = Model(layers=layers)
    opt = Adadelta()
    metric = Misclassification()
    callbacks = Callbacks(model,
                          eval_set=test,
                          metric=metric,
                          **args.callback_args)
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())

    model.fit(train,
              optimizer=opt,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)
    return model
Exemplo n.º 4
0
def main():
    # parse the command line arguments
    parser = NeonArgparser(__doc__)

    args = parser.parse_args()

    logger = logging.getLogger()
    logger.setLevel(args.log_thresh)

    #Set up batch iterator for training images
    train = ImgMaster(repo_dir='dataTmp_optFlow_BW',
                      set_name='train',
                      inner_size=240,
                      subset_pct=100)
    val = ImgMaster(repo_dir='dataTmp_optFlow_BW',
                    set_name='train',
                    inner_size=240,
                    subset_pct=100,
                    do_transforms=False)
    test = ImgMaster(repo_dir='dataTestTmp_optFlow_BW',
                     set_name='train',
                     inner_size=240,
                     subset_pct=100,
                     do_transforms=False)

    train.init_batch_provider()
    val.init_batch_provider()
    test.init_batch_provider()

    print "Constructing network..."
    #Create AlexNet architecture
    model = constuct_network()

    # Optimzer for model
    opt = Adadelta()

    # configure callbacks
    valmetric = TopKMisclassification(k=5)
    callbacks = Callbacks(model,
                          train,
                          eval_set=test,
                          metric=valmetric,
                          **args.callback_args)

    cost = GeneralizedCost(costfunc=CrossEntropyMulti())

    #flag = input("Press Enter if you want to begin training process.")
    print "Training network..."
    model.fit(train,
              optimizer=opt,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)
    mets = model.eval(test, metric=valmetric)

    print 'Validation set metrics:'
    print 'LogLoss: %.2f, Accuracy: %.1f %%0 (Top-1), %.1f %% (Top-5)' % (
        mets[0], (1.0 - mets[1]) * 100, (1.0 - mets[2]) * 100)
    return
Exemplo n.º 5
0
def test_multi_optimizer(backend_default):
    opt_gdm = GradientDescentMomentum(learning_rate=0.001,
                                      momentum_coef=0.9,
                                      wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64),
              strides=4,
              padding=3,
              init=init_one,
              bias=Constant(0),
              activation=Rectlin())
    l2 = Affine(nout=4096,
                init=init_one,
                bias=Constant(1),
                activation=Rectlin())
    l3 = LSTM(output_size=1000,
              init=init_one,
              activation=Logistic(),
              gate_activation=Tanh())
    l4 = GRU(output_size=100,
             init=init_one,
             activation=Logistic(),
             gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)

    opt = MultiOptimizer({
        'default': opt_gdm,
        'Bias': opt_ada,
        'Convolution': opt_adam,
        'Linear': opt_rms,
        'LSTM': opt_rms_1,
        'GRU': opt_rms_1
    })

    map_list = opt._map_optimizers(layer_list)
    assert map_list[opt_adam][0].__class__.__name__ == 'Convolution'
    assert map_list[opt_ada][0].__class__.__name__ == 'Bias'
    assert map_list[opt_rms][0].__class__.__name__ == 'Linear'
    assert map_list[opt_gdm][0].__class__.__name__ == 'Activation'
    assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Exemplo n.º 6
0
  def __init__(self, state_size, num_steers, num_speeds, args):
    # remember parameters
    self.state_size = state_size
    self.num_steers = num_steers
    self.num_speeds = num_speeds
    self.num_actions = num_steers + num_speeds
    self.num_layers = args.hidden_layers
    self.hidden_nodes = args.hidden_nodes
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    self.model = Model(layers = self._createLayers())
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers())
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model
Exemplo n.º 7
0
def test_multi_optimizer(backend_default_mkl):
    """
    A test for MultiOptimizer.
    """
    opt_gdm = GradientDescentMomentum(
        learning_rate=0.001, momentum_coef=0.9, wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64), strides=4, padding=3,
              init=init_one, bias=Constant(0), activation=Rectlin())
    l2 = Affine(nout=4096, init=init_one,
                bias=Constant(1), activation=Rectlin())
    l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh())
    l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)
    for l in layer_list:
        l.configure(in_obj=(16, 28, 28))
        l.allocate()
    # separate layer_list into two, the last two recurrent layers and the rest
    layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:]
    opt = MultiOptimizer({'default': opt_gdm,
                          'Bias': opt_ada,
                          'Convolution': opt_adam,
                          'Convolution_bias': opt_adam,
                          'Linear': opt_rms,
                          'LSTM': opt_rms_1,
                          'GRU': opt_rms_1})
    layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)]
    layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)]
    opt.optimize(layers_to_optimize1, 0)
    assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias'
    assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear'
    opt.optimize(layers_to_optimize2, 0)
    assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Exemplo n.º 8
0
 def _set_optimizer(self):
     """ Initializes the selected optimization algorithm. """
     _logger.debug("Optimizer = %s" % str(self.args.optimizer))
     if self.args.optimizer == 'rmsprop':
         self.optimizer = RMSProp(
                 learning_rate = self.args.learning_rate,
                 decay_rate = self.args.decay_rate,
                 stochastic_round = self.args.stochastic_round)
     elif self.args.optimizer == 'adam':
         self.optimizer = Adam(
                 learning_rate = self.args.learning_rate,
                 stochastic_round = self.args.stochastic_round)
     elif self.args.optimizer == 'adadelta':
         self.optimizer = Adadelta(
                 decay = self.args.decay_rate,
                 stochastic_round = self.args.stochastic_round)
     else:
         assert false, "Unknown optimizer"
Exemplo n.º 9
0
def test_adadelta(backend):
    ada = Adadelta()
    param = np.random.rand(200, 128)
    param2 = copy.deepcopy(param)
    grad = 0.01 * np.random.rand(200, 128)
    states = [0.01 * np.random.rand(200, 128),
              0.01 * np.random.rand(200, 128),
              0.01 * np.random.rand(200, 128)]
    states2 = [copy.deepcopy(states[0]),
               copy.deepcopy(states[1]),
               copy.deepcopy(states[2])]
    decay = ada.decay
    states2[0][:] = states2[0] * decay + (1. - decay) * grad * grad
    states2[2][:] = np.sqrt(
        (states2[1] + ada.epsilon) / (states2[0] + ada.epsilon)) * grad
    states2[1][:] = states2[1] * decay + (1. - decay) * states2[2] * states2[2]
    param2[:] -= states2[2]
    param_list = [
        ((wrap(param), wrap(grad)), [wrap(states[0]), wrap(states[1]), wrap(states[2])])]
    compare_tensors(ada, param_list, param2, tol=1e-7)
Exemplo n.º 10
0
    Affine(2, init=init_uni, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyBinary())

lunaModel = Model(layers=layers)

modelFileName = 'LUNA16_CADIMI_subset{}.prm'.format(SUBSET)

# If model file exists, then load the it and start from there.
# if (os.path.isfile(modelFileName)):
#   lunaModel = Model(modelFileName)

# Nesterov accelerated gradient descent with a learning rate of 0.01, a decay of 10^-3 and a momentum of 0.9
#opt = GradientDescentMomentum(0.01, 0.9, wdecay=0.001, nesterov=True)
opt = Adadelta(decay=0.95, epsilon=1e-6)

# configure callbacks
if args.callback_args['eval_freq'] is None:
    args.callback_args['eval_freq'] = 1

# configure callbacks
callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args)
# add a callback that saves the best model state
callbacks.add_save_best_state_callback(modelFileName)

lunaModel.fit(train_set,
              optimizer=opt,
              num_epochs=num_epochs,
              cost=cost,
              callbacks=callbacks)
Exemplo n.º 11
0
parser = NeonArgparser(__doc__)
parser.add_argument('-tw',
                    '--test_data_dir',
                    default='',
                    help='directory in which to find test images')
parser.add_argument('-iw', '--image_width', default=384, help='image width')
args = parser.parse_args()
imwidth = int(args.image_width)

train = ClassifierLoader(repo_dir=args.data_dir,
                         inner_size=imwidth,
                         set_name='train',
                         do_transforms=False)
train.init_batch_provider()
init = Gaussian(scale=0.01)
opt = Adadelta(decay=0.9)
common = dict(init=init, batch_norm=True, activation=Rectlin())

layers = []
nchan = 64
layers.append(Conv((2, 2, nchan), strides=2, **common))
for idx in range(5):
    if nchan > 1024:
        nchan = 1024
    layers.append(Conv((3, 3, nchan), strides=1, **common))
    layers.append(Pooling(2, strides=2))
    nchan *= 2
#layers.append(Affine(nout=4096, init=init, activation=Rectlin(), batch_norm=True))
layers.append(DropoutBinary(keep=0.2))
layers.append(Affine(nout=447, init=init, activation=Softmax()))
Exemplo n.º 12
0
layers = [
    Conv((5, 5, 64),
         init=init,
         activation=Rectlin(),
         strides=dict(str_h=2, str_w=4)),
    Pooling(2, strides=2),
    Conv((5, 5, 64),
         init=init,
         batch_norm=True,
         activation=Rectlin(),
         strides=dict(str_h=1, str_w=2)),
    BiRNN(256, init=init, activation=Rectlin(), reset_cells=True),
    RecurrentMean(),
    Affine(128, init=init, batch_norm=True, activation=Rectlin()),
    Affine(nout=common['nclasses'], init=init, activation=Softmax())
]

model = Model(layers=layers)
opt = Adadelta()
metric = Misclassification()
callbacks = Callbacks(model, eval_set=val, metric=metric, **args.callback_args)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.fit(train,
          optimizer=opt,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)
print('Misclassification error = %.1f%%' %
      (model.eval(val, metric=metric) * 100))
Exemplo n.º 13
0
subm_config = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           'whale_subm.cfg')
config_files = [subm_config] if os.path.exists(subm_config) else []
parser = NeonArgparser(__doc__, default_config_files=config_files)
parser.add_argument('--submission_file',
                    help='where to write prediction output')
args = parser.parse_args()

model, cost_obj = create_network()

assert 'all' in args.manifest, "Missing train manifest"
assert 'test' in args.manifest, "Missing test manifest"
assert args.submission_file is not None, "Must supply a submission file to output scores to"

neon_logger.display('Performing train and test in submission mode')
train = make_train_loader(args.manifest['all'],
                          args.manifest_root,
                          model.be,
                          noise_file=args.manifest.get('noise'))
test = make_test_loader(args.manifest['test'], args.manifest_root, model.be)

model.fit(dataset=train,
          cost=cost_obj,
          optimizer=Adadelta(),
          num_epochs=args.epochs,
          callbacks=Callbacks(model, **args.callback_args))

preds = model.get_outputs(test)
np.savetxt(args.submission_file, preds[:, 1], fmt='%.5f')