Пример #1
0
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error
    self.min_reward = args.min_reward
    self.max_reward = args.max_reward
    self.batch_norm = args.batch_norm

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.input = self.be.empty(self.input_shape)
    self.input.lshape = self.input_shape # HACK: needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    # Bug fix
    for l in self.model.layers.layers:
      l.parallelism = 'Disabled'
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.train_iterations = 0
    if args.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      # Bug fix
      for l in self.target_model.layers.layers:
        l.parallelism = 'Disabled'
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

    self.callback = None
Пример #2
0
def test_model_get_outputs_rnn(backend_default, data):

    dataset = PTB(50, path=data)
    dataiter = dataset.train_iter

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, activation=Logistic()),
        Affine(len(dataiter.vocab), init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    output = model.get_outputs(dataiter)

    assert output.shape == (dataiter.ndata, dataiter.seq_length, dataiter.nclass)

    # since the init are all constant and model is un-trained:
    # along the feature dim, the values should be all the same
    assert allclose_with_out(output[0, 0], output[0, 0, 0], rtol=0, atol=1e-4)
    assert allclose_with_out(output[0, 1], output[0, 1, 0], rtol=0, atol=1e-4)

    # along the time dim, the values should be increasing:
    assert np.alltrue(output[0, 2] > output[0, 1])
    assert np.alltrue(output[0, 1] > output[0, 0])
Пример #3
0
def train_eval(
        train_set,
        valid_set,
        args,
        hidden_size = 100,
        clip_gradients = True,
        gradient_limit = 5):

    # weight initialization
    init = Uniform(low=-0.08, high=0.08)

    # model initialization
    layers = [
        LSTM(hidden_size, init, Logistic(), Tanh()),
        LSTM(hidden_size, init, Logistic(), Tanh()),
        Affine(2, init, bias=init, activation=Softmax())
    ]

    cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))
    model = Model(layers=layers)
    optimizer = RMSProp(clip_gradients=clip_gradients, gradient_limit=gradient_limit, stochastic_round=args.rounding)

    # configure callbacks
    callbacks = Callbacks(model, train_set, progress_bar=args.progress_bar)

    # train model
    model.fit(train_set,
              optimizer=optimizer,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)

    pred = model.get_outputs(valid_set)
    pred_neg_rate = model.eval(valid_set, metric=Misclassification())
    return (pred[:,1], pred_neg_rate)
Пример #4
0
    def __init__(self, depth=9):
        self.depth = depth

        depth = 9
        train = (3, 32, 32)

        nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)]
        strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])]

        # Now construct the network
        layers = [Conv(**self.conv_params(3, 16))]
        layers.append(self.module_s1(nfms[0], True))

        for nfm, stride in zip(nfms[1:], strides):
            res_module = self.module_s1(nfm) if stride == 1 else self.module_s2(nfm)
            layers.append(res_module)
        layers.append(BatchNorm())
        layers.append(Activation(Rectlin()))
        layers.append(Pooling('all', op='avg'))
        layers.append(Affine(10, init=Kaiming(local=False),
                             batch_norm=True, activation=Softmax()))
        self.layers = layers
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyMulti())
        model.initialize(train, cost=cost)
        self.model = model
Пример #5
0
    def __init__(self, env, args, rng, name = "DQNNeon"):
        """ Initializes a network based on the Neon framework.

        Args:
            env (AtariEnv): The envirnoment in which the agent actuates.
            args (argparse.Namespace): All settings either with a default value or set via command line arguments.
            rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator.
            name (str): The name of the network object.

        Note:
            This function should always call the base class first to initialize
            the common values for the networks.
        """
        _logger.info("Initializing new object of type " + str(type(self).__name__))
        super(DQNNeon, self).__init__(env, args, rng, name)
        self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,)
        self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8)
        self.batch_norm = args.batch_norm

        self.be = gen_backend(
                backend = args.backend,
                batch_size = args.batch_size,
                rng_seed = args.random_seed,
                device_id = args.device_id,
                datatype = np.dtype(args.datatype).type,
                stochastic_round = args.stochastic_round)

        # prepare tensors once and reuse them
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.output_shape, self.batch_size))

        # create model
        layers = self._create_layer()
        self.model = Model(layers = layers)
        self.cost_func = GeneralizedCost(costfunc = SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost_func)

        self._set_optimizer()

        if not self.args.load_weights == None:
            self.load_weights(self.args.load_weights)

        # create target model
        if self.target_update_frequency:
            layers = self._create_layer()
            self.target_model = Model(layers)
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
        else:
            self.target_model = self.model

        self.callback = None
        _logger.debug("%s" % self)
Пример #6
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    print('Args:', args)

    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')

    ext = extension_from_parameters(args)

    loader = p1b3.DataLoader(feature_subsample=args.feature_subsample,
                             scaling=args.scaling,
                             drug_features=args.drug_features,
                             scramble=args.scramble,
                             min_logconc=args.min_logconc,
                             max_logconc=args.max_logconc,
                             subsample=args.subsample,
                             category_cutoffs=args.category_cutoffs)

    # initializer = Gaussian(loc=0.0, scale=0.01)
    initializer = GlorotUniform()
    activation = get_function(args.activation)()

    layers = []
    reshape = None

    if args.convolution and args.convolution[0]:
        reshape = (1, loader.input_dim, 1)
        layer_list = list(range(0, len(args.convolution), 3))
        for l, i in enumerate(layer_list):
            nb_filter = args.convolution[i]
            filter_len = args.convolution[i+1]
            stride = args.convolution[i+2]
            # print(nb_filter, filter_len, stride)
            # fshape: (height, width, num_filters).
            layers.append(Conv((1, filter_len, nb_filter), strides={'str_h':1, 'str_w':stride}, init=initializer, activation=activation))
            if args.pool:
                layers.append(Pooling((1, args.pool)))

    for layer in args.dense:
        if layer:
            layers.append(Affine(nout=layer, init=initializer, activation=activation))
        if args.drop:
            layers.append(Dropout(keep=(1-args.drop)))
    layers.append(Affine(nout=1, init=initializer, activation=neon.transforms.Identity()))

    model = Model(layers=layers)

    train_iter = ConcatDataIter(loader, ndata=args.train_samples, lshape=reshape, datatype=args.datatype)
    val_iter = ConcatDataIter(loader, partition='val', ndata=args.val_samples, lshape=reshape, datatype=args.datatype)

    cost = GeneralizedCost(get_function(args.loss)())
    optimizer = get_function(args.optimizer)()
    callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args)

    model.fit(train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
Пример #7
0
  def __init__(self, state_size, num_steers, num_speeds, args):
    # remember parameters
    self.state_size = state_size
    self.num_steers = num_steers
    self.num_speeds = num_speeds
    self.num_actions = num_steers + num_speeds
    self.num_layers = args.hidden_layers
    self.hidden_nodes = args.hidden_nodes
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    self.model = Model(layers = self._createLayers())
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers())
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model
Пример #8
0
def test_model_N_S_setter(backend_default):

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, activation=Logistic()),
        Affine(100, init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    model.set_batch_size(20)
    model.set_seq_len(10)
    def build(self):
        # setup model layers
        layers = [Affine(nout=100, init=self.init, bias=self.init, activation=Rectlin()),
                  Affine(nout=2, init=self.init, bias=self.init, activation=Softmax())]

        # initialize model object
        self.model = Model(layers=layers)
    def load(self, model_path):
        """
        Load pre-trained model's .prm file to NpSemanticSegClassifier object

        Args:
            model_path(str): local path for loading the model
        """
        self.model = Model(model_path)
Пример #11
0
def test_model_get_outputs(backend):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist()
    train_set = DataIterator(X_train[:backend.bsz * 3])

    init_norm = Gaussian(loc=0.0, scale=0.1)

    layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
              Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]
    mlp = Model(layers=layers)
    out_list = []
    for x, t in train_set:
        x = mlp.fprop(x)
        out_list.append(x.get().T.copy())
    ref_output = np.vstack(out_list)

    train_set.reset()
    output = mlp.get_outputs(train_set)
    assert np.allclose(output, ref_output)
Пример #12
0
def run(args, train, test):
    init_uni = Uniform(low=-0.1, high=0.1)
    opt_gdm = GradientDescentMomentum(learning_rate=0.01,
                                      momentum_coef=0.9,
                                      stochastic_round=args.rounding)
    layers = [Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True),
              Pooling((2, 2)),
              Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True),
              Pooling((2, 2)),
              Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True),
              Affine(nout=10, init=init_uni, activation=Softmax())]
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())
    mlp = Model(layers=layers)
    callbacks = Callbacks(mlp, train, eval_set=test, **args.callback_args)
    mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
    err = mlp.eval(test, metric=Misclassification())*100
    print('Misclassification error = %.2f%%' % err)
    return err
Пример #13
0
def test_model_predict_rnn(backend):

    data_path = load_text('ptb-valid')

    data_set = Text(time_steps=50, path=data_path)

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, Logistic()),
        Affine(len(data_set.vocab), init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    output = model.predict(data_set)

    assert output.shape == (data_set.ndata, data_set.nclass)
Пример #14
0
def run_once(web_input):
    """
    Run forward pass for a single input. Receives input vector from the web form.
    """

    parser = NeonArgparser(__doc__)
    
    args = parser.parse_args()
    
    num_feat = 4
    
    npzfile = np.load('./model/homeapp_preproc.npz')
    mean = npzfile['mean']
    std = npzfile['std']
    mean = np.reshape(mean, (1,mean.shape[0]))
    std = np.reshape(std, (1,std.shape[0]))
    
    # Reloading saved model
    mlp=Model("./model/homeapp_model.prm")
    
    # Horrible terrible hack that should never be needed :-(
    NervanaObject.be.bsz = 1
    
    # Actual: 275,000 Predicted: 362,177 
    #web_input = np.array([51.2246169879,-1.48577399748,223.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0])
    # Actual 185,000 Predicted: 244,526
    #web_input = np.array([51.4395375168,-1.07174234072,5.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0])
    # Actual 231,500 Predicted 281,053
    web_input = np.array([52.2010084131,-2.18181259148,218.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0])
    web_input = np.reshape(web_input, (1,web_input.shape[0]))
    
    web_input[:,:num_feat-1] -= mean[:,1:num_feat]
    web_input[:,:num_feat-1] /= std[:,1:num_feat]
    
    web_test_set = ArrayIterator(X=web_input, make_onehot=False)
    
    web_output = mlp.get_outputs(web_test_set)
    
    #Rescale the output
    web_output *= std[:,0]
    web_output += mean[:,0]
    
    return web_output[0]
Пример #15
0
class TreeModel(object):
    """
    Container for Tree style test model"
    """
    def __init__(self):
        self.in_shape = (1, 32, 32)

        init_norm = Gaussian(loc=0.0, scale=0.01)

        normrelu = dict(init=init_norm, activation=Rectlin())
        normsigm = dict(init=init_norm, activation=Logistic(shortcut=True))
        normsoft = dict(init=init_norm, activation=Softmax())

        # setup model layers
        b1 = BranchNode(name="b1")
        b2 = BranchNode(name="b2")

        p1 = [Affine(nout=100, name="main1", **normrelu),
              b1,
              Affine(nout=32, name="main2", **normrelu),
              Affine(nout=160, name="main3", **normrelu),
              b2,
              Affine(nout=32, name="main2", **normrelu),
              # make next layer big to check sizing
              Affine(nout=320, name="main2", **normrelu),
              Affine(nout=10, name="main4", **normsoft)]

        p2 = [b1,
              Affine(nout=16, name="branch1_1", **normrelu),
              Affine(nout=10, name="branch1_2", **normsigm)]

        p3 = [b2,
              Affine(nout=16, name="branch2_1", **normrelu),
              Affine(nout=10, name="branch2_2", **normsigm)]

        self.cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()),
                              GeneralizedCost(costfunc=CrossEntropyBinary()),
                              GeneralizedCost(costfunc=CrossEntropyBinary())],
                              weights=[1, 0., 0.])

        self.layers = SingleOutputTree([p1, p2, p3], alphas=[1, .2, .2])
        self.model = Model(layers=self.layers)
        self.model.initialize(self.in_shape, cost=self.cost)
Пример #16
0
    def __init__(self, args,  max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem
        
        self.be = gen_backend(backend='gpu',             
                         batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)
        
        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':        # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                            beta_2=args.rms_decay,
                                            learning_rate=args.learning_rate)
        else:		# Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                            learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True
Пример #17
0
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 default_dtype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.tensor = self.be.empty(self.input_shape)
    self.tensor.lshape = self.input_shape # needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self.createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.tensor.shape[:-1], self.cost)
    self.optimizer = RMSProp(learning_rate = args.learning_rate, 
        decay_rate = args.rmsprop_decay_rate, 
        stochastic_round = args.stochastic_round)

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self.createLayers(num_actions))
      self.target_model.initialize(self.tensor.shape[:-1])
      self.save_weights_path = args.save_weights_path
    else:
      self.target_model = self.model

    self.callback = None
Пример #18
0
    def __init__(self):
        self.in_shape = (3, 299, 299)
        self.nout = 100
        self.pool3s1p1 = dict(fshape=3, padding=1, strides=1, op='avg')
        self.pool3s2p0 = dict(fshape=3, strides=2, op='max')

        layers = self.main_branch(nout=self.nout)
        self.cost = GeneralizedCost(costfunc=CrossEntropyMulti())
        self.model = Model(layers=layers)
        self.model.initialize(self.in_shape, cost=self.cost)
        self.layers = layers
Пример #19
0
def run(train, test):
    init = Gaussian(scale=0.01)
    layers = [Conv((3, 3, 128), init=init, activation=Rectlin(),
                   strides=dict(str_h=1, str_w=2)),
              Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()),
              Pooling(2, strides=2),
              Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()),
              DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3),
              RecurrentLast(),
              Affine(32, init=init, batch_norm=True, activation=Rectlin()),
              Affine(nout=common['nclasses'], init=init, activation=Softmax())]

    model = Model(layers=layers)
    opt = Adadelta()
    metric = Misclassification()
    callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args)
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())

    model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
    return model
Пример #20
0
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab):
    """
    Return regressor to map word2vec to RNN word space

    Function modified from:
    https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py
    """
    # Gather all words from word2vec that appear in wordvecs
    d = defaultdict(lambda: 0)
    for w in w2v_vocab.keys():
        d[w] = 1
    shared = OrderedDict()
    count = 0

    for w in list(orig_wordvecs.keys())[:-2]:
        if d[w] > 0:
            shared[w] = count
            count += 1

    # Get the vectors for all words in 'shared'
    w2v = np.zeros((len(shared), 300), dtype='float32')
    sg = np.zeros((len(shared), 620), dtype='float32')
    for w in shared.keys():
        w2v[shared[w]] = w2v_W[w2v_vocab[w]]
        sg[shared[w]] = orig_wordvecs[w]

    train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False)

    layers = [Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)),
              Bias(init=Constant(0.0))]
    clf = Model(layers=layers)

    # regression model is trained using default global batch size
    cost = GeneralizedCost(costfunc=SumSquared())
    opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0)
    callbacks = Callbacks(clf)

    clf.fit(train_set, num_epochs=20, optimizer=opt, cost=cost, callbacks=callbacks)
    return clf
def create_model(model_type, model_tree, freeze, dataset_dir, model_file, img_loader):
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())

    if model_type == "alexnet":
        opt = create_alexnet_opt()
        layer_func = create_alexnet_layers
    elif model_type == "vgg":
        opt = create_vgg_opt()
        layer_func = create_vgg_layers
    else:
        raise NotImplementedError(model_type + " has not been implemented")

    if model_tree:
        ctree = ClassTaxonomy("Aves", "taxonomy_dict.p", dataset_dir)
        layers = created_branched(layer_func, ctree, img_loader)
        model = TaxonomicBranchModel(layers=layers)
    else:
        layers = layer_func(img_loader.nclass)
        model = Model(layers=layers)

    if freeze > 0:
        saved_model = Model(layers=layer_func(1000))
        saved_model.load_params(model_file)
        model.initialize(img_loader)
        model.initialized = False
        saved_lto = saved_model.layers.layers_to_optimize
        model_lto = model.layers.layers_to_optimize
        keep_length = len(saved_lto) - freeze * 2

        for i in range(len(saved_lto))[:keep_length]:
            model_lto[i].W[:] = saved_lto[i].W
            model_lto[i].optimize = False
        for i in range(len(model_lto))[keep_length:]:
            model_lto[i].optimize = True

        model.layers = FreezeSequential(layers)
        model.layers_to_optimize = model.layers.layers_to_optimize

    return model, cost, opt
def DeepCascadeLearning(modelLayers,X_train,Y_train,callbacks,init_uni=Uniform(low=-0.1, high=0.1),
                        testIterator=None,epochs=2,
                        cost=GeneralizedCost(costfunc=CrossEntropyMulti()),
                        opt_gdm=GradientDescentMomentum(learning_rate=0.01,momentum_coef=0.9)):
  importantLayersIndexes = list()
  i = 0
  outputLayer = Affine(nout=10, init=init_uni, activation=Softmax())
  modelToPredict = None
  for currentLayer in modelLayers:
    if(np.shape(currentLayer)):
      currentLayer = currentLayer[0]
    if((currentLayer.classnm == 'Convolution') or (currentLayer.classnm == 'Affine')):
      importantLayersIndexes.append(i)
    i += 1
  for i in importantLayersIndexes:
    modelToTrain = list()
    for currentLayer in modelLayers[i:importantLayersIndexes[i+1]]:
      modelToTrain.append(currentLayer)
    modelToTrain.append(outputLayer)
    modelToTrain = Model(modelToTrain)
    if(modelToPredict == None):
      trainIterator = ArrayIterator(X_train, Y_train, nclass=10, lshape=(3,32,32)) 
      x = trainIterator.__iter__()
      callbacks = Callbacks(modelToTrain)
      modelToTrain.fit(trainIterator, optimizer=opt_gdm, num_epochs=epochs, cost=GeneralizedCost(costfunc=CrossEntropyMulti()), callbacks=callbacks)
    else:
      tmpIterator = ArrayIterator(X_train,lshape=(3,32,32))
      tmpTrain = modelToPredict.get_outputs(tmpIterator)
      tmpIterator = ArrayIterator(tmpTrain[0:20],Y_train[0:20],nclass=10,lshape=(32,30,30))
      modelToTrain.fit(tmpIterator, optimizer=opt_gdm, num_epochs=epochs, cost=cost)
    if modelToPredict == None:
        modelToPredict = list()
    else:
        modelToPredict = modelToPredict.layers.layers
    for currentLayer in modelToTrain.layers.layers[0:-2]:
      modelToPredict.append(currentLayer)
    modelToPredict = Model(modelToPredict)

  return modelToPredict
Пример #23
0
class MultistreamModel(object):
    """
    Container for a multistream test model
    """
    def __init__(self):
        self.in_shape = [1024, (2538, 38)]

        init = Constant(0)
        image_path = Sequential([Affine(20, init, bias=init),
                                 Affine(10, init, bias=init)])
        sent_path = Sequential([Affine(30, init, bias=init),
                                Affine(10, init)])

        layers = [MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
                  Dropout(keep=0.5),
                  LSTM(4, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True),
                  Affine(20, init, bias=init, activation=Softmax())]
        self.layers = layers
        self.cost = GeneralizedCostMask(CrossEntropyMulti())

        self.model = Model(layers=layers)
        self.model.initialize(self.in_shape, cost=self.cost)
Пример #24
0
    def __init__(self):
        self.in_shape = (3, 32, 32)
        relu = Rectlin()
        init_use = Constant(0)
        conv = dict(init=init_use,
                    batch_norm=False,
                    activation=relu)
        convp1 = dict(init=init_use,
                      batch_norm=False,
                      bias=init_use,
                      activation=relu,
                      padding=1)
        convp1s2 = dict(init=init_use,
                        batch_norm=False,
                        bias=init_use,
                        padding=1, strides=2)

        layers = [Dropout(keep=.8),
                  Conv((3, 3, 96), **convp1),
                  Conv((3, 3, 96), **convp1),
                  Conv((3, 3, 96), **convp1s2),
                  Dropout(keep=.5),
                  Conv((3, 3, 192), **convp1),
                  Conv((3, 3, 192), **convp1),
                  Conv((3, 3, 192), **convp1s2),
                  Dropout(keep=.5),
                  Conv((3, 3, 192), **convp1),
                  Conv((1, 1, 192), **conv),
                  Conv((1, 1, 16), **conv),
                  Pooling(8, op="avg"),
                  Activation(Softmax())]
        self.layers = layers
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyMulti())
        model.initialize(self.in_shape, cost=cost)
        self.model = model
    def build(self):
        """
        Build the model's layers
        """
        first_layer_dens = 64
        second_layer_dens = 64
        output_layer_dens = 2
        # setup weight initialization function
        init_norm = Gaussian(scale=0.01)
        # setup model layers
        layers = [Affine(nout=first_layer_dens, init=init_norm,
                         activation=Rectlin()),
                  Affine(nout=second_layer_dens, init=init_norm,
                         activation=Rectlin()),
                  Affine(nout=output_layer_dens, init=init_norm,
                         activation=Logistic(shortcut=True))]

        # initialize model object
        self.model = Model(layers=layers)
Пример #26
0
def run(be, fake_dilation, fsz, stride, pad, dilation):
    K = 8
    strides = stride
    padding = pad
    be.rng = be.gen_rng(be.rng_seed)

    in_shape = 16
    while out_shape(in_shape, fsz, stride, dilation, pad) < 3:
        in_shape *= 2
    train_shape = (1, in_shape, in_shape)

    inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz))
    init = Gaussian()

    layers = [Conv((5, 5, K), init=init),
              Conv((fsz, fsz, K), strides=strides, padding=padding, init=init,
                   dilation=dict(dil_d=1, dil_h=dilation, dil_w=dilation)),
              Conv((3, 3, K), init=init),
              Affine(nout=1, init=init)]
    model = Model(layers=layers)
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    model.initialize(train_shape, cost)

    if fake_dilation:
        # Perform regular convolution with an expanded filter.
        weights = save(model)
        new_layers = layers
        # Replace the middle layers.
        new_fsz = dilated_fsz(fsz, dilation)
        new_layers[1] = Conv((new_fsz, new_fsz, K), strides=strides, padding=padding, init=init)
        model = Model(layers=new_layers)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(train_shape, cost)
        load(weights, model, K, fsz, dilation)

    print(model)
    model.optimizer = GradientDescentMomentum(learning_rate=0.01,
                                              momentum_coef=0.9)
    outputs = fprop(model, inp)
    weights = bprop(model, outputs)
    model.optimizer.optimize(model.layers_to_optimize, epoch=0)
    return outputs.get(), weights.get()
Пример #27
0
def test_conv_rnn(backend_default):
    train_shape = (1, 17, 142)

    be = NervanaObject.be
    inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz))
    delta = be.array(be.rng.randn(10, be.bsz))

    init_norm = Gaussian(loc=0.0, scale=0.01)
    bilstm = DeepBiLSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(),
                        depth=1, reset_cells=True)
    birnn_1 = DeepBiRNN(128, init_norm, activation=Rectlin(),
                        depth=1, reset_cells=True, batch_norm=False)
    birnn_2 = DeepBiRNN(128, init_norm, activation=Rectlin(),
                        depth=2, reset_cells=True, batch_norm=False)
    bibnrnn = DeepBiRNN(128, init_norm, activation=Rectlin(),
                        depth=1, reset_cells=True, batch_norm=True)
    birnnsum = DeepBiRNN(128, init_norm, activation=Rectlin(),
                         depth=1, reset_cells=True, batch_norm=False, bi_sum=True)
    rnn = Recurrent(128, init=init_norm, activation=Rectlin(), reset_cells=True)
    lstm = LSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True)
    gru = GRU(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True)

    rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru]

    for rl in rlayers:
        layers = [
                    Conv((2, 2, 4), init=init_norm, activation=Rectlin(),
                         strides=dict(str_h=2, str_w=4)),
                    Pooling(2, strides=2),
                    Conv((3, 3, 4), init=init_norm, batch_norm=True, activation=Rectlin(),
                         strides=dict(str_h=1, str_w=2)),
                    rl,
                    RecurrentMean(),
                    Affine(nout=10, init=init_norm, activation=Rectlin()),
                ]
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(train_shape, cost)
        model.fprop(inp)
        model.bprop(delta)
Пример #28
0
def test_model_get_outputs(backend_default, data):
    dataset = MNIST(path=data)
    train_set = dataset.train_iter

    init_norm = Gaussian(loc=0.0, scale=0.1)

    layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
              Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]
    mlp = Model(layers=layers)
    out_list = []
    mlp.initialize(train_set)
    for x, t in train_set:
        x = mlp.fprop(x)
        out_list.append(x.get().T.copy())
    ref_output = np.vstack(out_list)

    train_set.reset()
    output = mlp.get_outputs(train_set)
    assert allclose_with_out(output, ref_output[:output.shape[0], :])

    # test model benchmark inference
    mlp.benchmark(train_set, inference=True, niterations=5)
Пример #29
0
def test_model_get_outputs(backend_default, data):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data)
    train_set = ArrayIterator(X_train[:backend_default.bsz * 3])

    init_norm = Gaussian(loc=0.0, scale=0.1)

    layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
              Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]
    mlp = Model(layers=layers)
    out_list = []
    mlp.initialize(train_set)
    for x, t in train_set:
        x = mlp.fprop(x)
        out_list.append(x.get().T.copy())
    ref_output = np.vstack(out_list)

    train_set.reset()
    output = mlp.get_outputs(train_set)
    assert np.allclose(output, ref_output)

    # test model benchmark inference
    mlp.benchmark(train_set, inference=True, niterations=5)
class MostCommonWordSense:

    def __init__(self, rounding, callback_args, epochs):
        # setup weight initialization function
        self.init = Gaussian(loc=0.0, scale=0.01)
        # setup optimizer
        self.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9,
                                                 stochastic_round=rounding)
        # setup cost function as CrossEntropy
        self.cost = GeneralizedCost(costfunc=SumSquared())
        self.epochs = epochs
        self.model = None
        self.callback_args = callback_args

    def build(self):
        # setup model layers
        layers = [Affine(nout=100, init=self.init, bias=self.init, activation=Rectlin()),
                  Affine(nout=2, init=self.init, bias=self.init, activation=Softmax())]

        # initialize model object
        self.model = Model(layers=layers)

    def fit(self, valid_set, train_set):
        # configure callbacks
        callbacks = Callbacks(self.model, eval_set=valid_set, **self.callback_args)
        self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs,
                       cost=self.cost, callbacks=callbacks)

    def save(self, save_path):
        self.model.save_params(save_path)

    def load(self, model_path):
        self.model = Model(model_path)

    def eval(self, valid_set):
        eval_rate = self.model.eval(valid_set, metric=Misclassification())
        return eval_rate

    def get_outputs(self, valid_set):
        return self.model.get_outputs(valid_set)
Пример #31
0
                                      momentum_coef=0.9,
                                      stochastic_round=args.rounding)

layers = [
    Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True),
    Pooling((2, 2)),
    Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True),
    Pooling((2, 2)),
    Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True),
    Affine(nout=10, init=init_uni, activation=Softmax())
]

if args.datatype in [np.float32, np.float64]:
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())
elif args.datatype in [np.float16]:
    cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=cost_scale))

mlp = Model(layers=layers)

# configure callbacks
callbacks = Callbacks(mlp, eval_set=test, **args.callback_args)

mlp.fit(train,
        optimizer=opt_gdm,
        num_epochs=num_epochs,
        cost=cost,
        callbacks=callbacks)

print 'Misclassification error = %.1f%%' % (
    mlp.eval(test, metric=Misclassification()) * 100)
Пример #32
0
    proc.communicate()


# parse the command line arguments
demo_config = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           'test.cfg')
config_files = [demo_config] if os.path.exists(demo_config) else []

parser = NeonArgparser(__doc__, default_config_files=config_files)
parser.add_argument('--input_video', help='video file')
parser.add_argument('--output_video',
                    help='Video file with overlayed inference hypotheses')
args = parser.parse_args()

assert args.model_file is not None, "need a model file for testing"
model = Model(args.model_file)

assert 'categories' in args.manifest, "Missing categories file"
category_map = {
    t[0].decode(): t[1]
    for t in np.genfromtxt(
        args.manifest['categories'], dtype=None, delimiter=',')
}

# Make a temporary directory and clean up afterwards
outdir = mkdtemp()
atexit.register(shutil.rmtree, outdir)
caption_file = os.path.join(outdir, 'caption.txt')

manifest = segment_video(args.input_video, outdir)
Пример #33
0
def test_empty_dataset():
    model = Model(test_layers)
    b = Benchmark(model=model)
    with pytest.raises(ValueError):
        b.time([], niterations=5, inference=True)
Пример #34
0
window_size = 64

# Set up the testset to load via aeon
image_config = dict(height=window_size, width=window_size, channels=3)
label_config = dict(binary=False)
config = dict(type="image,label",
              image=image_config,
              label=label_config,
              manifest_filename=testFileName,
              minibatch_size=args.batch_size,
              subset_fraction=1,
              cache_directory='')
test_set = DataLoader(config, be)
test_set = TypeCast(test_set, index=0, dtype=np.float32)  # cast image to float

lunaModel = Model('LUNA16_resnet.prm')


def round(arr, threshold=0.5):
    '''
       Round to an arbitrary threshold.
       Above threshold goes to 1. Below goes 0.
   '''
    out = np.zeros(np.shape(arr))
    out[np.where(arr > threshold)[0]] = 1

    out[np.where(arr <= threshold)[0]] = 0

    return out

Пример #35
0
    branch3 = [Conv((1, 1, p3[0]), **common), Conv((5, 5, p3[1]), **commonp2)]
    branch4 = [Pooling(op="max", **pool3s1p1), Conv((1, 1, p4[0]), **common)]
    return MergeBroadcast(layers=[branch1, branch2, branch3, branch4],
                          merge="depth")


model = Model(layers=[
    Conv((7, 7, 64), padding=3, strides=2, **common),
    Pooling(**pool3s2p1),
    Conv((1, 1, 64), **common),
    Conv((3, 3, 192), **commonp1),
    Pooling(**pool3s2p1),
    inception([(64, ), (96, 128), (16, 32), (32, )]),
    inception([(128, ), (128, 192), (32, 96), (64, )]),
    Pooling(**pool3s2p1),
    inception([(192, ), (96, 208), (16, 48), (64, )]),
    inception([(160, ), (112, 224), (24, 64), (64, )]),
    inception([(128, ), (128, 256), (24, 64), (64, )]),
    inception([(112, ), (144, 288), (32, 64), (64, )]),
    inception([(256, ), (160, 320), (32, 128), (128, )]),
    Pooling(**pool3s2p1),
    inception([(256, ), (160, 320), (32, 128), (128, )]),
    inception([(384, ), (192, 384), (48, 128), (128, )]),
    Pooling(fshape=7, strides=1, op="avg"),
    Affine(nout=1000, init=init1)
])

weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.))
opt_gdm = GradientDescentMomentum(0.01,
                                  0.0,
                                  wdecay=0.0005,
                                  schedule=weight_sched)
Пример #36
0
def test_model_serialize(backend_default, data):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data)

    train_set = DataIterator([X_train, X_train],
                             y_train,
                             nclass=nclass,
                             lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = Sequential([
        Conv((5, 5, 16),
             init=init_norm,
             bias=Constant(0),
             activation=Rectlin()),
        Pooling(2),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ])
    path2 = Sequential([
        Affine(nout=100,
               init=init_norm,
               bias=Constant(0),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ])
    layers = [
        MergeMultistream(layers=[path1, path2], merge="stack"),
        Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1,
                                            momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    mlp.initialize(train_set, cost=mlp.cost)
    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    save_obj(mlp.serialize(keep_states=True), tmp_save)

    # Load model
    mlp = Model(layers=layers)
    mlp.load_weights(tmp_save)

    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert np.allclose(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert np.allclose(_s, _s_e)
            else:
                assert np.allclose(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            assert type(p) == type(p_e)
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert np.allclose(_p, _p_e)
            elif isinstance(p, np.ndarray):
                assert np.allclose(p, p_e)
            else:
                assert p == p_e

    os.remove(tmp_save)
Пример #37
0

####
# Set batch size and time_steps to 1 for generation and reset buffers
# then load up the pickle file with the saved model and weights
#
be.bsz = 1
time_steps = 1
num_predict = args.number_of_samples

layers = [
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh()),
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh()),
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]
model_new = Model(layers=layers)
model_new.load_weights(args.save_path)
model_new.initialize(dataset=(train_set.shape[0], time_steps))

####
# Generative RNN sample time!
#
text = []
seed_tokens = list(args.sample_primer_text)

x = be.zeros((len(train_set.vocab), time_steps))

for s in seed_tokens:
    x.fill(0)
    x[train_set.token_to_index[s], 0] = 1
    y = model_new.fprop(x)
Пример #38
0
]

# setup cost function as Square Hinge Loss
cost = GeneralizedCost(costfunc=SquareHingeLoss())

# setup optimizer
LR_start = 1.65e-2


def ShiftAdaMax_with_Scale(LR=1):
    return ShiftAdaMax(learning_rate=LR_start * LR, schedule=ShiftSchedule(2, shift_size=1))


optimizer = MultiOptimizer({
    'default': ShiftAdaMax_with_Scale(),
    'BinaryLinear_0': ShiftAdaMax_with_Scale(57.038),
    'BinaryLinear_1': ShiftAdaMax_with_Scale(73.9008),
    'BinaryLinear_2': ShiftAdaMax_with_Scale(73.9008),
    'BinaryLinear_3': ShiftAdaMax_with_Scale(52.3195)
})

# initialize model object
bnn = Model(layers=layers)

# configure callbacks
callbacks = Callbacks(bnn, eval_set=valid_set, **args.callback_args)

# run fit
bnn.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
print('Misclassification error = %.1f%%' % (bnn.eval(valid_set, metric=Misclassification())*100))
Пример #39
0
# setup cost function as CrossEntropy
cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()),
                        GeneralizedCost(costfunc=CrossEntropyMulti()),
                        GeneralizedCost(costfunc=CrossEntropyMulti())],
                 weights=[1, 0., 0.])  # We only want to consider the CE of the main path

if not args.resume:
    # build the model from scratch and run it

    # Now construct the model
    branch_nodes = [BranchNode(name='branch' + str(i)) for i in range(2)]
    main1 = main_branch(branch_nodes)
    aux1 = aux_branch(branch_nodes[0], ind=1)
    aux2 = aux_branch(branch_nodes[1], ind=2)

    model = Model(layers=Tree([main1, aux1, aux2], alphas=[1.0, 0.3, 0.3]))

else:
    # load up the save model
    model = Model('serialize_test_2.pkl')
    model.initialize(train, cost=cost)

# configure callbacks
callbacks = Callbacks(model, progress_bar=True, output_file='temp1.h5',
                      serialize=1, history=3, save_path='serialize_test.pkl')

lr_sched = PolySchedule(total_epochs=10, power=0.5)
opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0002, schedule=lr_sched)
opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=lr_sched)

opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})
Пример #40
0
be = gen_backend(**extract_valid_args(args, gen_backend))

# Set up the testset to load via aeon
image_config = dict(height=64, width=64, channels=3)
label_config = dict(binary=False)
config = dict(type="image,label",
              image=image_config,
              label=label_config,
              manifest_filename=testFileName,
              minibatch_size=args.batch_size,
              subset_fraction=1,
              cache_directory='')
test_set = DataLoader(config, be)
test_set = TypeCast(test_set, index=0, dtype=np.float32)  # cast image to float

lunaModel = Model('LUNA16_VGG_model_no_batch_sigmoid_pretrained.prm')


def round(arr, threshold=0.5):
    '''
       Round to an arbitrary threshold.
       Above threshold goes to 1. Below goes 0.
   '''
    out = np.zeros(np.shape(arr))
    out[np.where(arr > threshold)[0]] = 1

    out[np.where(arr <= threshold)[0]] = 0

    return out

Пример #41
0
class DeepQNetwork:
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error
    self.min_reward = args.min_reward
    self.max_reward = args.max_reward
    self.batch_norm = args.batch_norm

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.input = self.be.empty(self.input_shape)
    self.input.lshape = self.input_shape # HACK: needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    # Bug fix
    for l in self.model.layers.layers:
      l.parallelism = 'Disabled'
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert False, "Unknown optimizer"

    # create target model
    self.train_iterations = 0
    if args.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      # Bug fix
      for l in self.target_model.layers.layers:
        l.parallelism = 'Disabled'
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

    self.callback = None

  def _createLayers(self, num_actions):
    # create network
    init_xavier_conv = Xavier(local=True)
    init_xavier_affine = Xavier(local=False)
    layers = []
    # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
    layers.append(Conv((8, 8, 32), strides=4, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm))
    # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
    layers.append(Conv((4, 4, 64), strides=2, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm))
    # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
    layers.append(Conv((3, 3, 64), strides=1, init=init_xavier_conv, activation=Rectlin(), batch_norm=self.batch_norm))
    # The final hidden layer is fully-connected and consists of 512 rectifier units.
    layers.append(Affine(nout=512, init=init_xavier_affine, activation=Rectlin(), batch_norm=self.batch_norm))
    # The output layer is a fully-connected linear layer with a single output for each valid action.
    layers.append(Affine(nout=num_actions, init = init_xavier_affine))
    return layers

  def _setInput(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states, axes = (1, 2, 3, 0))
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    self.be.divide(self.input, 255, self.input)

  def update_target_network(self):
      # have to serialize also states for batch normalization to work
      pdict = self.model.get_description(get_weights=True, keep_states=True)
      self.target_model.deserialize(pdict, load_states=True)

  def train(self, minibatch, epoch):
    # expand components of minibatch
    prestates, actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 4
    assert len(poststates.shape) == 4
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    maxpostq = self.be.max(postq, axis=0).asnumpyarray()
    assert maxpostq.shape == (1, self.batch_size)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    targets = preq.asnumpyarray().copy()

    # clip rewards between -1 and 1
    rewards = np.clip(rewards, self.min_reward, self.max_reward)

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      if terminals[i]:
        targets[action, i] = float(rewards[i])
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for stats callback)
    self.train_iterations += 1

    # calculate statistics
    if self.callback:
      self.callback.on_train(cost[0,0])

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_params(load_path)

  def save_weights(self, save_path):
    self.model.save_params(save_path)
Пример #42
0
          Conv((3, 3, 96), **convp1),
          Conv((3, 3, 96), **convp1s2),
          Dropout(keep=.5),
          Conv((3, 3, 192), **convp1),
          Conv((3, 3, 192), **convp1),
          Conv((3, 3, 192), **convp1s2),
          Dropout(keep=.5),
          Conv((3, 3, 192), **convp1),
          Conv((1, 1, 192), **conv),
          Conv((1, 1, 16), **conv),
          Pooling(8, op="avg"),
          Activation(Softmax())]

cost = GeneralizedCost(costfunc=CrossEntropyMulti())

mlp = Model(layers=layers)

if args.model_file:
    import os
    assert os.path.exists(args.model_file), '%s not found' % args.model_file
    mlp.load_weights(args.model_file)

# configure callbacks
callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args)

if args.deconv:
    callbacks.add_deconv_callback(train_set, valid_set)

callbacks.add_callback(
    MetricCallback(
        valid_set,
Пример #43
0
(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)
train_set = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28))
valid_set = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28))

# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# initialize model
layers = []
layers.append(
    Affine(nout=100, init=init_norm, batch_norm=True, activation=Rectlin()))
layers.append(
    Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)))
cost = GeneralizedCost(costfunc=CrossEntropyBinary())
mlp = Model(layers=layers)

# define stopping function
# it takes as input a tuple (State,val[t])
# which describes the cumulative validation state (generated by this function)
# and the validation error at time t
# and returns as output a tuple (State', Bool),
# which represents the new state and whether to stop


# Stop if validation error ever increases from epoch to epoch
def stop_func(s, v):
    if s is None:
        return (v, False)

    return (min(v, s), v > s)
Пример #44
0
layers = [
    MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
    Dropout(keep=0.5),
    LSTM(hidden_size,
         init,
         activation=Logistic(),
         gate_activation=Tanh(),
         reset_cells=True),
    Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]

cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))
checkpoint_model_path = "~/image_caption2.pickle"
checkpoint_schedule = range(num_epochs)

model = Model(layers=layers)

callbacks = Callbacks(model, train_set, args)

opt = RMSProp(decay_rate=0.997,
              learning_rate=0.0005,
              epsilon=1e-8,
              clip_gradients=True,
              gradient_limit=1.0)

# train model
model.fit(train_set,
          optimizer=opt,
          num_epochs=num_epochs,
          cost=cost,
          callbacks=callbacks)
Пример #45
0
layers.append(Conv((1, 1, 256),  init=init_uni, activation=relu, strides=1))
layers.append(Conv((3, 3, 256),  init=init_uni, activation=relu, strides=2,  padding=1))  # 23->12

layers.append(Conv((3, 3, 384),  init=init_uni, activation=relu, strides=1,  padding=1))
layers.append(Conv((1, 1, 384),  init=init_uni, activation=relu, strides=1))
layers.append(Conv((3, 3, 384),  init=init_uni, activation=relu, strides=2,  padding=1))  # 12->6

layers.append(Dropout(keep=0.5))
layers.append(Conv((3, 3, 1024), init=init_uni, activation=relu, strides=1, padding=1))
layers.append(Conv((1, 1, 1024), init=init_uni, activation=relu, strides=1))
layers.append(Conv((1, 1, 1000), init=init_uni, activation=relu, strides=1))
layers.append(Pooling(6, op='avg'))

layers.append(Activation(Softmax()))

cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model = Model(layers=layers)

if args.model_file:
    import os
    assert os.path.exists(args.model_file), '%s not found' % args.model_file
    model.load_params(args.model_file)

# configure callbacks
callbacks = Callbacks(model, eval_set=test, **args.callback_args)
if args.deconv:
    callbacks.add_deconv_callback(train, test)

model.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
Пример #46
0
# and not weight decay.
opt_bias = GradientDescentMomentum(0.002, 0.9)
opt_bias_class = GradientDescentMomentum(0.02, 0.9)

# set up the mapping of layers to optimizers
opt = MultiOptimizer({
    'default': opt_vgg,
    'Bias': opt_bias,
    'class_layer': opt_class_layer,
    'class_layer_bias': opt_bias_class
})

# use cross-entropy cost to train the network
cost = GeneralizedCost(costfunc=CrossEntropyBinary())

lunaModel = Model(layers=vgg_layers)

# location and size of the VGG weights file
url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/VGG/'
filename = 'VGG_E.p'  # VGG_E.p is VGG19; VGG_D.p is VGG16
size = 554227541

# edit filepath below if you have the file elsewhere
_, filepath = Dataset._valid_path_append('data', '', filename)
if not os.path.exists(filepath):
    print('Need to fetch VGG pre-trained weights from cloud. Please wait...')
    Dataset.fetch_dataset(url, filename, filepath, size)

# load the weights param file
print("Loading VGG weights from {}...".format(filepath))
trained_vgg = load_obj(filepath)
Пример #47
0
# Strided conv autoencoder
bn = False
layers = [Conv((4, 4, 8), init=init_uni, activation=Rectlin(), batch_norm=bn),
          Pooling(2),
          Conv((4, 4, 32), init=init_uni, activation=Rectlin(), batch_norm=bn),
          Pooling(2),
          Deconv(fshape=(4, 4, 8), init=init_uni,
                 activation=Rectlin(), batch_norm=bn),
          Deconv(fshape=(3, 3, 8), init=init_uni,
                 activation=Rectlin(), strides=2, batch_norm=bn),
          Deconv(fshape=(2, 2, 1), init=init_uni, strides=2, padding=1)]

# Define the cost
cost = GeneralizedCost(costfunc=SumSquared())

model = Model(layers=layers)

# configure callbacks
callbacks = Callbacks(model, **args.callback_args)

# Fit the model
model.fit(train, optimizer=opt_gdm, num_epochs=args.epochs,
          cost=cost, callbacks=callbacks)

# Plot the reconstructed digits
try:
    from matplotlib import pyplot, cm
    fi = 0
    nrows = 10
    ncols = 12
    test = np.zeros((28 * nrows, 28 * ncols))
Пример #48
0
decoder1 = Affine(nout=image_size, init=init_norm, activation=Logistic(),
                  name='decoder1')
encoder2 = Affine(nout=config.encoder_size[1], init=init_norm,
                  activation=Logistic(), name='encoder2')
decoder2 = Affine(nout=config.encoder_size[0], init=init_norm,
                  activation=Logistic(), name='decoder2')
encoder3 = Affine(nout=config.encoder_size[2], init=init_norm,
                  activation=Logistic(), name='encoder3')
decoder3 = Affine(nout=config.encoder_size[1], init=init_norm,
                  activation=Logistic(), name='decoder3')
classifier = Affine(nout=config.ydim, init=init_norm, activation=Softmax())
cost_reconst = GeneralizedCost(costfunc=SumSquared()) 
cost_classification = GeneralizedCost(costfunc=CrossEntropyMulti())

# Setting model layers for AE1
AE1 = Model([encoder1, decoder1])
AE1.cost = cost_reconst
AE1.initialize(data, cost_reconst)
# AE1.optimizer = optimizer_default
measure_time(data, AE1, config, 'AE1')
            
# Setting model layers for AE2
# It has an extra encoder layer compared to what AE should really be. This is
# done to avoid saving the outputs for each AE.
AE2_mimic = Model([encoder1, encoder2, decoder2])
AE2_mimic.cost = cost_reconst
AE2_mimic.initialize(data, cost_reconst)
# Learning rates for extra layers that should not be updated are set to zero.
# opt = MultiOptimizer({'default': optimizer_default,
#                       'encoder1': optimizer_helper})
# AE2_mimic.optimizer = opt
Пример #49
0
def create_objects(root_yaml,
                   be_type='gpu',
                   batch_size=128,
                   rng_seed=None,
                   device_id=0,
                   default_dtype=np.float32,
                   stochastic_rounding=False):
    """
    Instantiate objects as per the given specifications.

    Arguments:
        root_yaml (dict): Model definition dictionary parse from YAML file

        be_type (str): backend either 'gpu', 'mgpu' or 'cpu'

        batch_size (int): Batch size.
        rng_seed (None or int): random number generator seed

        device_id (int): for GPU backends id of device to use

        default_dtype (type): numpy data format for default data types,

        stochastic_rounding (bool or int): number of bits for stochastic rounding
                                           use False for no rounding

    Returns:
        tuple: Contains model, cost and optimizer objects.
    """

    assert NervanaObject.be is not None, 'Must generate a backend before running this function'

    # can give filename or parse dictionary
    if type(root_yaml) is str:
        with open(root_yaml, 'r') as fid:
            root_yaml = yaml.safe_load(fid.read())

    # in case references were used
    root_yaml = deepcopy(root_yaml)

    # initialize layers
    yaml_layers = root_yaml['layers']

    # currently only support sequential in yaml
    layer_dict = {'layers': yaml_layers}
    layers = Sequential.gen_class(layer_dict)

    # initialize model
    model = Model(layers=layers)

    # cost (before layers for shortcut derivs)
    cost_name = root_yaml['cost']
    cost = GeneralizedCost.gen_class({'costfunc': {'type': cost_name}})

    # create optimizer
    opt = None
    if 'optimizer' in root_yaml:
        yaml_opt = root_yaml['optimizer']
        typ = yaml_opt['type']
        opt = getattr(neon.optimizers, typ).gen_class(yaml_opt['config'])

    return model, cost, opt
Пример #50
0
class DeepQNetwork:
  def __init__(self, state_size, num_actions, args):
    # remember parameters
    self.state_size = state_size
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 default_dtype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert False, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

  def _createLayers(self, num_actions):
    # create network
    init_norm = Gaussian(loc=0.0, scale=0.01)
    layers = []
    # The final hidden layer is fully-connected and consists of 512 rectifier units.
    layers.append(Affine(nout=16, init=init_norm, activation=Rectlin()))
    # The output layer is a fully-connected linear layer with a single output for each valid action.
    layers.append(Affine(nout=num_actions, init=init_norm))
    return layers

  def _setInput(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states)
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    #self.be.divide(self.input, 255, self.input)

  def train(self, minibatch, epoch = 0):
    # expand components of minibatch
    prestates, actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 2
    assert len(poststates.shape) == 2
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
    #print "WE ARE ACTUALLY TRAINING IN HERE"
    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # HACK: serialize network to disk and read it back to clone
      filename = self.save_weights_prefix + "_target.pkl"
      save_obj(self.model.serialize(keep_states = False), filename)
      self.target_model.load_weights(filename)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    maxpostq = self.be.max(postq, axis=0).asnumpyarray()
    assert maxpostq.shape == (self.batch_size,)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    targets = preq.asnumpyarray()

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      if terminals[i]:
        targets[action, i] = float(rewards[i])
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == (self.batch_size, self.state_size)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_weights(load_path)

  def save_weights(self, save_path):
    save_obj(self.model.serialize(keep_states = True), save_path)
Y = data.train_label-1
X_test = data.test_data
Y_test = data.test_label-1
train_set = ArrayIterator(X=X, y=Y, nclass=11, lshape=(1,200,200))
test_set = ArrayIterator(X_test, None, nclass=11, lshape=(1,200,200))
init_uni = Uniform(low=-0.1, high=0.1)
layers = [Conv(fshape=(4,4,16), init=init_uni, activation=Rectlin()),
          Pooling(fshape=2, strides=2),
          Conv(fshape=(4,4,32), init=init_uni, activation=Rectlin()),
          Pooling(fshape=2, strides=2),
          Conv(fshape=(4,4,32), init=init_uni, activation=Rectlin()),
          Pooling(fshape=2, strides=2),
          Affine(nout=500, init=init_uni, activation=Rectlin()),
          Affine(nout=11, init=init_uni, activation=Softmax())]

model = Model(layers)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())
optimizer = GradientDescentMomentum(learning_rate=0.005,
                                    momentum_coef=0.9)
callbacks = Callbacks(model, train_set)

model.fit(dataset=train_set, cost=cost, optimizer=optimizer,  num_epochs=40, callbacks=callbacks)
model.save_params('model.pkl')
# out = model.get_outputs(test_set)
# row = len(Y_test)
# result = np.zeros((row,1))
# i=0
# while i<row:
# 	result[i] = out[i].argmax()
# 	i=i+1
# np.save('result.npy', result)
Пример #52
0
gradient_clip_value = 5

# download shakespeare text
dataset = Shakespeare(time_steps, path=args.data_dir)
train_set = dataset.train_iter
valid_set = dataset.valid_iter

# weight initialization
init = Uniform(low=-0.08, high=0.08)

# model initialization
layers = [
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh()),
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]
model = Model(layers=layers)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

optimizer = RMSProp(gradient_clip_value=gradient_clip_value,
                    stochastic_round=args.rounding)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# fit and validate
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)
Пример #53
0
def test_fw_bw_no_cost_or_optimizer():
    model = Model(test_layers)
    model.initialize(test_dataset)
    b = Benchmark(model=model)
    with pytest.raises(RuntimeError):
        b.time(test_dataset, niterations=1)
Пример #54
0
    Conv((3, 3, 192), **convp1),
    Conv((1, 1, 192), **conv),
    Conv((1, 1, 16), **conv),
    Pooling(8, op="avg"),
    Affine(nout=1024, init=init_uni, activation=relu),
    Affine(nout=512, init=init_uni, activation=relu),
    Dropout(keep=.5),
    Affine(nout=128, init=init_uni, activation=relu),
    Dropout(keep=.4),
    Affine(nout=64, init=init_uni, activation=relu),
    Affine(nout=2, init=init_uni, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti())

lunaModel = Model(layers=layers)

if args.model_file:
    import os
    assert os.path.exists(args.model_file), '%s not found' % args.model_file
    lunaModel.load_params(args.model_file)

# configure callbacks
#callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args)
callbacks = Callbacks(lunaModel,
                      eval_set=valid_set,
                      metric=Misclassification(),
                      **args.callback_args)

if args.deconv:
    callbacks.add_deconv_callback(train_set, valid_set)
Пример #55
0
    init = GlorotUniform()  # Uniform(low=-0.08, high=0.08)

    # define model: model is different for the 2 strategies (sequence target or not)
    if return_sequences is True:
        layers = [
            LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=False),
            Affine(train_set.nfeatures, init, bias=init, activation=Identity())
        ]
    else:
        layers = [
            LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True),
            RecurrentLast(),
            Affine(train_set.nfeatures, init, bias=init, activation=Identity())
        ]

    model = Model(layers=layers)
    cost = GeneralizedCost(MeanSquared())
    optimizer = RMSProp(stochastic_round=args.rounding)

    callbacks = Callbacks(model, train_set, eval_set=valid_set, **args.callback_args)

    # fit model
    model.fit(train_set,
              optimizer=optimizer,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)

    # =======visualize how the model does on validation set==============
    # run the trained model on train and valid dataset and see how the outputs match
    train_output = model.get_outputs(train_set).reshape(-1, train_set.nfeatures)
Пример #56
0
Файл: util.py Проект: yw774/neon
def load_sent_encoder(model_dict,
                      expand_vocab=False,
                      orig_vocab=None,
                      w2v_vocab=None,
                      w2v_path=None,
                      use_recur_last=False):
    """
    Custom function to load the model saved from skip-thought vector training
    and reconstruct another model just using the LUT and encoding layer for
    transfering sentence representations.

    Arguments:
        model_dict: saved s2v model dict
        expand_vocab: Bool to indicate if w2v vocab expansion should be attempted
        orig_vocab: If using expand_vocab, original vocabulary dict is needed for expansion
        w2v_vocab: If using expand_vocab, w2v vocab dict
        w2v_path: Path to trained w2v binary (GoogleNews)
        use_recur_last: If True a RecurrentLast layer is used as the final layer, if False
                        a RecurrentSum layer is used as the last layer of the returned model.
    """

    embed_dim = model_dict['model']['config']['embed_dim']
    model_train = Model(model_dict)

    # RecurrentLast should be used for semantic similarity evaluation
    if use_recur_last:
        last_layer = RecurrentLast()
    else:
        last_layer = RecurrentSum()

    if expand_vocab:
        assert orig_vocab and w2v_vocab, (
            "All vocabs and w2v_path " +
            "need to be specified when using expand_vocab")

        neon_logger.display("Computing vocab expansion regression...")
        # Build inverse word dictionary (word -> index)
        word_idict = dict()
        for kk, vv in orig_vocab.items():
            # Add 2 to the index to allow for padding and oov tokens as 0 and 1
            word_idict[vv + 2] = kk
        word_idict[0] = ''
        word_idict[1] = 'UNK'

        # Create dictionary of word -> vec
        orig_word_vecs = get_embeddings(
            model_train.layers.layer_dict['lookupTable'], word_idict)

        # Load GooleNews w2v weights
        w2v_W, w2v_dim, _ = get_google_word2vec_W(w2v_path, w2v_vocab)

        # Compute the expanded vocab lookup table from a linear mapping of
        # words2vec into RNN word space
        init_embed = compute_vocab_expansion(orig_word_vecs, w2v_W, w2v_vocab,
                                             word_idict)

        init_embed_dev = model_train.be.array(init_embed)
        w2v_vocab_size = len(w2v_vocab)

        table = LookupTable(vocab_size=w2v_vocab_size,
                            embedding_dim=embed_dim,
                            init=init_embed_dev,
                            pad_idx=0)

        model = Model(layers=[
            table, model_train.layers.layer_dict['encoder'], last_layer
        ])

    else:
        model = Model(layers=[
            model_train.layers.layer_dict['lookupTable'],
            model_train.layers.layer_dict['encoder'], last_layer
        ])
    return model
Пример #57
0
              activation=Rectlin(),
              batch_norm=True)
nchan = 16
layers = [
    Conv((1, 2, nchan), **common),
    Conv((1, 2, nchan), **common),
    Conv((1, 2, nchan / 2), **common),
    Conv((1, 2, nchan / 4), **common),
    Conv((1, 2, nchan / 8), **common),
    Conv((1, 2, nchan / 16), **common),
    Dropout(0.8),
    DeepBiRNN(16, init=init, activation=Rectlin(), reset_cells=True, depth=3),
    RecurrentMean(),
    Affine(nout=1, init=init, activation=None)
]

cost = GeneralizedCost(costfunc=SumSquared())
net = Model(layers=layers)
callbacks = Callbacks(net, eval_set=valid, **args.callback_args)

net.fit(train,
        optimizer=opt,
        num_epochs=args.epochs,
        cost=cost,
        callbacks=callbacks)

train_preds = net.get_outputs(train)
print('  training R %.4f' % r_score(dataset.train_y, train_preds))
valid_preds = net.get_outputs(valid)
print('validation R %.4f' % r_score(dataset.valid_y, valid_preds))
Пример #58
0
X_val = np.hstack([X_val_info, hit_flat])

mu = np.mean(X_train, axis=0).reshape(1, -1)
s = np.std(X_train, axis=0).reshape(1, -1)
s[s == 0] = 1

X_train = (X_train - mu) / s
X_val = (X_val - mu) / s

# X = np.random.rand(10000, 100)
# y = np.sum(X, axis=1).reshape(-1, 1)
# y = np.hstack([y, y])

train_set = ArrayIterator(X=X_train, y=y_train, make_onehot=False)
val_set = ArrayIterator(X=X_val, y=y_val, make_onehot=False)

print("!!! TEST STARTED !!!")
for bs in [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]:
    be = gen_backend('gpu', batch_size=bs)
    bnn = Model("bin_model/final_model.prm")
    tries = 3
    t_start = time.time()
    for i in range(tries):
        out = bnn.get_outputs(train_set)
    t_end = time.time()

    tot_time = t_end - t_start
    n = X_train.shape[0]
    fps = tries * n / tot_time
    print("fps {}; batch size {}; time elapsed: {}".format(fps, bs, tot_time))
Пример #59
0
init_norm = Gaussian(loc=0.0, scale=0.01)

# setup model layers
layers = [
    Affine(nout=100, init=init_norm, activation=Rectlin()),
    Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
]

# setup cost function as CrossEntropy
cost = GeneralizedCost(costfunc=CrossEntropyBinary())

# setup optimizer
optimizer = GradientDescentMomentum(0.1,
                                    momentum_coef=0.9,
                                    stochastic_round=args.rounding)

# initialize model object
mlp = Model(layers=layers)

# configure callbacks
callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args)

# run fit
mlp.fit(train_set,
        optimizer=optimizer,
        num_epochs=args.epochs,
        cost=cost,
        callbacks=callbacks)
error_rate = mlp.eval(valid_set, metric=Misclassification())
neon_logger.display('Misclassification error = %.1f%%' % (error_rate * 100))
Пример #60
0
parser = NeonArgparser(__doc__)
args = parser.parse_args()

NervanaObject.be.enable_winograd = 4

# setup data provider
X_train = np.random.uniform(-1, 1, (128, 3 * 231 * 231))
y_train = np.random.randint(0, 999, (128, 1000))
train = ArrayIterator(X_train, y_train, nclass=1000, lshape=(3, 231, 231))

layers = [Conv((11, 11, 96), init=Gaussian(scale=0.01),
               activation=Rectlin(), padding=0, strides=4),
          Pooling(2, strides=2),
          Conv((5, 5, 256), init=Gaussian(scale=0.01), activation=Rectlin(), padding=0),
          Pooling(2, strides=2),
          Conv((3, 3, 512), init=Gaussian(scale=0.01), activation=Rectlin(), padding=1),
          Conv((3, 3, 1024), init=Gaussian(scale=0.01), activation=Rectlin(), padding=1),
          Conv((3, 3, 1024), init=Gaussian(scale=0.01), activation=Rectlin(), padding=1),
          Pooling(2, strides=2),
          Affine(nout=3072, init=Gaussian(scale=0.01), activation=Rectlin()),
          Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
          Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax())]
model = Model(layers=layers)

weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.))
opt_gdm = GradientDescentMomentum(0.01, 0.0, wdecay=0.0005, schedule=weight_sched)
opt = MultiOptimizer({'default': opt_gdm})
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.benchmark(train, cost=cost, optimizer=opt, niterations=10, nskip=1)