示例#1
0
def create_objects(root_yaml,
                   be_type='gpu',
                   batch_size=128,
                   rng_seed=None,
                   device_id=0,
                   default_dtype=np.float32,
                   stochastic_rounding=False):
    """
    Instantiate objects as per the given specifications.

    Arguments:
        root_yaml (dict): Model definition dictionary parse from YAML file

        be_type (str): backend either 'gpu', 'mgpu' or 'cpu'

        rng_seed (None or int): random number generator seed

        device_id (int): for GPU backends id of device to use

        default_dtype (type): numpy data format for default data types,

        stochastic_rounding (bool or int): number of bits for stochastic rounding
                                           use False for no rounding

    Returns:
        tuple: Contains model, cost and optimizer objects.
    """

    assert NervanaObject.be is not None, 'Must generate a backend before running this function'

    # can give filename or parse dictionary
    if type(root_yaml) is str:
        with open(root_yaml, 'r') as fid:
            root_yaml = yaml.safe_load(fid.read())

    # in case references were used
    root_yaml = deepcopy(root_yaml)

    # initialize layers
    yaml_layers = root_yaml['layers']

    # currently only support sequential in yaml
    layer_dict = {'layers': yaml_layers}
    layers = Sequential.gen_class(layer_dict)

    # initialize model
    model = Model(layers=layers)

    # cost (before layers for shortcut derivs)
    cost_name = root_yaml['cost']
    cost = GeneralizedCost.gen_class({'costfunc': {'type': cost_name}})

    # create optimizer
    opt = None
    if 'optimizer' in root_yaml:
        yaml_opt = root_yaml['optimizer']
        typ = yaml_opt['type']
        opt = getattr(neon.optimizers, typ).gen_class(yaml_opt['config'])

    return model, cost, opt
示例#2
0
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error
    self.min_reward = args.min_reward
    self.max_reward = args.max_reward
    self.batch_norm = args.batch_norm

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.input = self.be.empty(self.input_shape)
    self.input.lshape = self.input_shape # HACK: needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    # Bug fix
    for l in self.model.layers.layers:
      l.parallelism = 'Disabled'
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.train_iterations = 0
    if args.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      # Bug fix
      for l in self.target_model.layers.layers:
        l.parallelism = 'Disabled'
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

    self.callback = None
示例#3
0
    def __init__(self, env, args, rng, name = "DQNNeon"):
        """ Initializes a network based on the Neon framework.

        Args:
            env (AtariEnv): The envirnoment in which the agent actuates.
            args (argparse.Namespace): All settings either with a default value or set via command line arguments.
            rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator.
            name (str): The name of the network object.

        Note:
            This function should always call the base class first to initialize
            the common values for the networks.
        """
        _logger.info("Initializing new object of type " + str(type(self).__name__))
        super(DQNNeon, self).__init__(env, args, rng, name)
        self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,)
        self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8)
        self.batch_norm = args.batch_norm

        self.be = gen_backend(
                backend = args.backend,
                batch_size = args.batch_size,
                rng_seed = args.random_seed,
                device_id = args.device_id,
                datatype = np.dtype(args.datatype).type,
                stochastic_round = args.stochastic_round)

        # prepare tensors once and reuse them
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.output_shape, self.batch_size))

        # create model
        layers = self._create_layer()
        self.model = Model(layers = layers)
        self.cost_func = GeneralizedCost(costfunc = SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost_func)

        self._set_optimizer()

        if not self.args.load_weights == None:
            self.load_weights(self.args.load_weights)

        # create target model
        if self.target_update_frequency:
            layers = self._create_layer()
            self.target_model = Model(layers)
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
        else:
            self.target_model = self.model

        self.callback = None
        _logger.debug("%s" % self)
示例#4
0
  def __init__(self, state_size, num_steers, num_speeds, args):
    # remember parameters
    self.state_size = state_size
    self.num_steers = num_steers
    self.num_speeds = num_speeds
    self.num_actions = num_steers + num_speeds
    self.num_layers = args.hidden_layers
    self.hidden_nodes = args.hidden_nodes
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    self.model = Model(layers = self._createLayers())
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers())
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model
示例#5
0
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 default_dtype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.tensor = self.be.empty(self.input_shape)
    self.tensor.lshape = self.input_shape # needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self.createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.tensor.shape[:-1], self.cost)
    self.optimizer = RMSProp(learning_rate = args.learning_rate, 
        decay_rate = args.rmsprop_decay_rate, 
        stochastic_round = args.stochastic_round)

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self.createLayers(num_actions))
      self.target_model.initialize(self.tensor.shape[:-1])
      self.save_weights_path = args.save_weights_path
    else:
      self.target_model = self.model

    self.callback = None
示例#6
0
    def __init__(self, args,  max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem
        
        self.be = gen_backend(backend='gpu',             
                         batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)
        
        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':        # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                            beta_2=args.rms_decay,
                                            learning_rate=args.learning_rate)
        else:		# Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                            learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True
示例#7
0
def main():
    # larger batch sizes may not fit on GPU
    parser = NeonArgparser(__doc__, default_overrides={'batch_size': 4})
    parser.add_argument("--bench", action="store_true", help="run benchmark instead of training")
    parser.add_argument("--num_classes", type=int, default=12, help="number of classes in the annotation")
    parser.add_argument("--height", type=int, default=256, help="image height")
    parser.add_argument("--width", type=int, default=512, help="image width")

    args = parser.parse_args(gen_be=False)

    # check that image dimensions are powers of 2
    if((args.height & (args.height - 1)) != 0):
        raise TypeError("Height must be a power of 2.")
    if((args.width & (args.width - 1)) != 0):
        raise TypeError("Width must be a power of 2.")

    (c, h, w) = (args.num_classes, args.height, args.width)

    # need to use the backend with the new upsampling layer implementation
    be = NervanaGPU_Upsample(rng_seed=args.rng_seed,
                             device_id=args.device_id)
    # set batch size
    be.bsz = args.batch_size

    # couple backend to global neon object
    NervanaObject.be = be

    shape = dict(channel_count=3, height=h, width=w, subtract_mean=False)
    train_params = ImageParams(center=True, flip=False,
                               scale_min=min(h, w), scale_max=min(h, w),
                               aspect_ratio=0, **shape)
    test_params = ImageParams(center=True, flip=False,
                              scale_min=min(h, w), scale_max=min(h, w),
                              aspect_ratio=0, **shape)
    common = dict(target_size=h*w, target_conversion='read_contents',
                  onehot=False, target_dtype=np.uint8, nclasses=args.num_classes)

    train_set = PixelWiseImageLoader(set_name='train', repo_dir=args.data_dir,
                                      media_params=train_params,
                                      shuffle=False, subset_percent=100,
                                      index_file=os.path.join(args.data_dir, 'train_images.csv'),
                                      **common)
    val_set = PixelWiseImageLoader(set_name='val', repo_dir=args.data_dir,media_params=test_params, 
                      index_file=os.path.join(args.data_dir, 'val_images.csv'), **common)

    # initialize model object
    layers = gen_model(c, h, w)
    segnet_model = Model(layers=layers)

    # configure callbacks
    callbacks = Callbacks(segnet_model, eval_set=val_set, **args.callback_args)

    opt_gdm = GradientDescentMomentum(1.0e-6, 0.9, wdecay=0.0005, schedule=Schedule())
    opt_biases = GradientDescentMomentum(2.0e-6, 0.9, schedule=Schedule())
    opt_bn = GradientDescentMomentum(1.0e-6, 0.9, schedule=Schedule())
    opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases, 'BatchNorm': opt_bn})

    cost = GeneralizedCost(costfunc=CrossEntropyMulti())

    if args.bench:
        segnet_model.initialize(train_set, cost=cost)
        segnet_model.benchmark(train_set, cost=cost, optimizer=opt)
        sys.exit(0)
    else:
        segnet_model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)

    # get the trained segnet model outputs for valisation set
    outs_val = segnet_model.get_outputs(val_set)

    with open('outputs.pkl', 'w') as fid:
        pickle.dump(outs_val, fid, -1)
示例#8
0
                 default_dtype=args.datatype)

(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)
train_set = DataIterator(X_train, y_train, nclass=nclass)
valid_set = DataIterator(X_test, y_test, nclass=nclass)

# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# initialize model
layers = []
layers.append(
    Affine(nout=100, init=init_norm, batch_norm=True, activation=Rectlin()))
layers.append(
    Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)))
cost = GeneralizedCost(costfunc=CrossEntropyBinary())
mlp = Model(layers=layers)

# define stopping function
# it takes as input a tuple (State,val[t])
# which describes the cumulative validation state (generated by this function)
# and the validation error at time t
# and returns as output a tuple (State', Bool),
# which represents the new state and whether to stop


# Stop if validation error ever increases from epoch to epoch
def stopFunc(s, v):
    if s is None:
        return (v, False)
示例#9
0
def main():
    # Get command-line parameters
    parser = get_p1b3_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b3.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Correct for arguments set by default by neon parser
    # (i.e. instead of taking the neon parser default value fall back to the config file,
    # if effectively the command-line was used, then use the command-line value)
    # This applies to conflictive parameters: batch_size, epochs and rng_seed
    if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv):
        args.batch_size = fileParameters['batch_size']
    if not any("--epochs" in ag or "-e" in ag for ag in sys.argv):
        args.epochs = fileParameters['epochs']
    if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv):
        args.rng_seed = fileParameters['rng_seed']

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b3.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = p1b3.DataLoader(
        seed=seed,
        dtype=gParameters['datatype'],
        val_split=gParameters['validation_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    # Re-generate the backend after consolidating parsing and file config
    gen_backend(backend=args.backend,
                rng_seed=seed,
                device_id=args.device_id,
                batch_size=gParameters['batch_size'],
                datatype=gParameters['datatype'],
                max_devices=args.max_devices,
                compat_mode=args.compat_mode)

    # Initialize weights and learning rule
    initializer_weights = p1_common_neon.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = p1_common_neon.build_initializer(
        'constant', kerasDefaults, 0.)

    activation = p1_common_neon.get_function(gParameters['activation'])()

    # Define model architecture
    layers = []
    reshape = None

    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                layers.append(
                    Affine(nout=layer,
                           init=initializer_weights,
                           bias=initializer_bias,
                           activation=activation))
            if gParameters['drop']:
                layers.append(Dropout(keep=(1 - gParameters['drop'])))
    else:  # Build convolutional layers
        reshape = (1, loader.input_dim, 1)
        layer_list = list(range(0, len(gParameters['conv']), 3))
        for l, i in enumerate(layer_list):
            nb_filter = gParameters['conv'][i]
            filter_len = gParameters['conv'][i + 1]
            stride = gParameters['conv'][i + 2]
            # print(nb_filter, filter_len, stride)
            # fshape: (height, width, num_filters).
            layers.append(
                Conv((1, filter_len, nb_filter),
                     strides={
                         'str_h': 1,
                         'str_w': stride
                     },
                     init=initializer_weights,
                     activation=activation))
            if gParameters['pool']:
                layers.append(Pooling((1, gParameters['pool'])))

    layers.append(
        Affine(nout=1,
               init=initializer_weights,
               bias=initializer_bias,
               activation=neon.transforms.Identity()))

    # Build model
    model = Model(layers=layers)

    # Define neon data iterators
    train_samples = int(loader.n_train)
    val_samples = int(loader.n_val)

    if 'train_samples' in gParameters:
        train_samples = gParameters['train_samples']
    if 'val_samples' in gParameters:
        val_samples = gParameters['val_samples']

    train_iter = ConcatDataIter(loader,
                                ndata=train_samples,
                                lshape=reshape,
                                datatype=gParameters['datatype'])
    val_iter = ConcatDataIter(loader,
                              partition='val',
                              ndata=val_samples,
                              lshape=reshape,
                              datatype=gParameters['datatype'])

    # Define cost and optimizer
    cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])())
    optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'],
                                               gParameters['learning_rate'],
                                               kerasDefaults)

    callbacks = Callbacks(model, eval_set=val_iter,
                          eval_freq=1)  #**args.callback_args)

    model.fit(train_iter,
              optimizer=optimizer,
              num_epochs=gParameters['epochs'],
              cost=cost,
              callbacks=callbacks)
示例#10
0
    def benchmark(self):
        for d in self.devices:
            b = d if (self.backends is None) or (
                "mkl" not in self.backends) else "mkl"
            print("Use {} as backend.".format(b))

            # Common suffix
            suffix = "neon_{}_{}_{}by{}_{}".format(b, self.dataset,
                                                   self.resize_size[0],
                                                   self.resize_size[1],
                                                   self.preprocessing)

            # Set up backend
            # backend: 'cpu' for single cpu, 'mkl' for cpu using mkl library, and 'gpu' for gpu
            be = gen_backend(backend=b,
                             batch_size=self.batch_size,
                             rng_seed=542,
                             datatype=np.float32)

            # Prepare training/validation/testing sets
            neon_train_set = ArrayIterator(X=np.asarray(
                [t.flatten().astype('float32') / 255 for t in self.x_train]),
                                           y=np.asarray(self.y_train),
                                           make_onehot=True,
                                           nclass=self.class_num,
                                           lshape=(3, self.resize_size[0],
                                                   self.resize_size[1]))
            neon_valid_set = ArrayIterator(X=np.asarray(
                [t.flatten().astype('float32') / 255 for t in self.x_valid]),
                                           y=np.asarray(self.y_valid),
                                           make_onehot=True,
                                           nclass=self.class_num,
                                           lshape=(3, self.resize_size[0],
                                                   self.resize_size[1]))
            neon_test_set = ArrayIterator(X=np.asarray([
                t.flatten().astype('float32') / 255 for t in self.testImages
            ]),
                                          y=np.asarray(self.testLabels),
                                          make_onehot=True,
                                          nclass=self.class_num,
                                          lshape=(3, self.resize_size[0],
                                                  self.resize_size[1]))

            # Initialize model object
            self.neon_model = SelfModel(layers=self.constructCNN())

            # Costs
            neon_cost = GeneralizedCost(costfunc=CrossEntropyMulti())

            # Model summary
            self.neon_model.initialize(neon_train_set, neon_cost)
            print(self.neon_model)

            # Learning rules
            neon_optimizer = SGD(0.01,
                                 momentum_coef=0.9,
                                 schedule=ExpSchedule(0.2))
            # neon_optimizer = RMSProp(learning_rate=0.0001, decay_rate=0.95)

            # # Benchmark for 20 minibatches
            # d[b] = self.neon_model.benchmark(neon_train_set, cost=neon_cost, optimizer=neon_optimizer)

            # Reset model
            # self.neon_model = None
            # self.neon_model = Model(layers=layers)
            # self.neon_model.initialize(neon_train_set, neon_cost)

            # Callbacks: validate on validation set
            callbacks = Callbacks(
                self.neon_model,
                eval_set=neon_valid_set,
                metric=Misclassification(3),
                output_file="./saved_data/{}/{}/callback_data_{}.h5".format(
                    self.network_type, d, suffix))
            callbacks.add_callback(
                SelfCallback(eval_set=neon_valid_set,
                             test_set=neon_test_set,
                             epoch_freq=1))

            # Fit
            start = time.time()
            self.neon_model.fit(neon_train_set,
                                optimizer=neon_optimizer,
                                num_epochs=self.epoch_num,
                                cost=neon_cost,
                                callbacks=callbacks)
            print("Neon training finishes in {:.2f} seconds.".format(
                time.time() - start))

            # Result
            # results = self.neon_model.get_outputs(neon_valid_set)

            # Print error on validation set
            start = time.time()
            neon_error_mis = self.neon_model.eval(
                neon_valid_set, metric=Misclassification()) * 100
            print(
                'Misclassification error = {:.1f}%. Finished in {:.2f} seconds.'
                .format(neon_error_mis[0],
                        time.time() - start))

            # start = time.time()
            # neon_error_top3 = self.neon_model.eval(neon_valid_set, metric=TopKMisclassification(3))*100
            # print('Top 3 Misclassification error = {:.1f}%. Finished in {:.2f} seconds.'.format(neon_error_top3[2], time.time() - start))

            # start = time.time()
            # neon_error_top5 = self.neon_model.eval(neon_valid_set, metric=TopKMisclassification(5))*100
            # print('Top 5 Misclassification error = {:.1f}%. Finished in {:.2f} seconds.'.format(neon_error_top5[2], time.time() - start))

            self.neon_model.save_params("./saved_models/{}/{}/{}.prm".format(
                self.network_type, d, suffix))

            # Print error on test set
            start = time.time()
            neon_error_mis_t = self.neon_model.eval(
                neon_test_set, metric=Misclassification()) * 100
            print(
                'Misclassification error = {:.1f}% on test set. Finished in {:.2f} seconds.'
                .format(neon_error_mis_t[0],
                        time.time() - start))

            # start = time.time()
            # neon_error_top3_t = self.neon_model.eval(neon_test_set, metric=TopKMisclassification(3))*100
            # print('Top 3 Misclassification error = {:.1f}% on test set. Finished in {:.2f} seconds.'.format(neon_error_top3_t[2], time.time() - start))

            # start = time.time()
            # neon_error_top5_t = self.neon_model.eval(neon_test_set, metric=TopKMisclassification(5))*100
            # print('Top 5 Misclassification error = {:.1f}% on test set. Finished in {:.2f} seconds.'.format(neon_error_top5_t[2], time.time() - start))

            cleanup_backend()
            self.neon_model = None
示例#11
0
    Conv((4, 4, 32), init=init_uni, activation=Rectlin(), batch_norm=bn),
    Pooling(2),
    Deconv(fshape=(4, 4, 8),
           init=init_uni,
           activation=Rectlin(),
           batch_norm=bn),
    Deconv(fshape=(3, 3, 8),
           init=init_uni,
           activation=Rectlin(),
           strides=2,
           batch_norm=bn),
    Deconv(fshape=(2, 2, 1), init=init_uni, strides=2, padding=1)
]

# Define the cost
cost = GeneralizedCost(costfunc=SumSquared())

model = Model(layers=layers)

# configure callbacks
callbacks = Callbacks(model, **args.callback_args)

# Fit the model
model.fit(train,
          optimizer=opt_gdm,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)

# Plot the reconstructed digits
try:
示例#12
0
p2 = [
    b1,
    Affine(nout=16, linear_name="b1_l1", **normrelu),
    Affine(nout=10, linear_name="b1_l2", **normsigm)
]

p3 = [
    b2,
    Affine(nout=16, linear_name="b2_l1", **normrelu),
    Affine(nout=10, linear_name="b2_l2", **normsigm)
]

# setup cost function as CrossEntropy
cost = Multicost(costs=[
    GeneralizedCost(costfunc=CrossEntropyMulti()),
    GeneralizedCost(costfunc=CrossEntropyBinary()),
    GeneralizedCost(costfunc=CrossEntropyBinary())
],
                 weights=[1, 0., 0.])

# setup optimizer
optimizer = GradientDescentMomentum(0.1,
                                    momentum_coef=0.9,
                                    stochastic_round=args.rounding)

# initialize model object
alphas = [1, 0.25, 0.25]
mlp = Model(layers=Tree([p1, p2, p3], alphas=alphas))

# setup standard fit callbacks
示例#13
0
def test_conv_rnn(backend_default):
    train_shape = (1, 17, 142)

    be = backend_default
    inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz))
    delta = be.array(be.rng.randn(10, be.bsz))

    init_norm = Gaussian(loc=0.0, scale=0.01)
    bilstm = DeepBiLSTM(128,
                        init_norm,
                        activation=Rectlin(),
                        gate_activation=Rectlin(),
                        depth=1,
                        reset_cells=True)
    birnn_1 = DeepBiRNN(128,
                        init_norm,
                        activation=Rectlin(),
                        depth=1,
                        reset_cells=True,
                        batch_norm=False)
    birnn_2 = DeepBiRNN(128,
                        init_norm,
                        activation=Rectlin(),
                        depth=2,
                        reset_cells=True,
                        batch_norm=False)
    bibnrnn = DeepBiRNN(128,
                        init_norm,
                        activation=Rectlin(),
                        depth=1,
                        reset_cells=True,
                        batch_norm=True)
    birnnsum = DeepBiRNN(128,
                         init_norm,
                         activation=Rectlin(),
                         depth=1,
                         reset_cells=True,
                         batch_norm=False,
                         bi_sum=True)
    rnn = Recurrent(128,
                    init=init_norm,
                    activation=Rectlin(),
                    reset_cells=True)
    lstm = LSTM(128,
                init_norm,
                activation=Rectlin(),
                gate_activation=Rectlin(),
                reset_cells=True)
    gru = GRU(128,
              init_norm,
              activation=Rectlin(),
              gate_activation=Rectlin(),
              reset_cells=True)

    rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru]

    for rl in rlayers:
        layers = [
            Conv((2, 2, 4),
                 init=init_norm,
                 activation=Rectlin(),
                 strides=dict(str_h=2, str_w=4)),
            Pooling(2, strides=2),
            Conv((3, 3, 4),
                 init=init_norm,
                 batch_norm=True,
                 activation=Rectlin(),
                 strides=dict(str_h=1, str_w=2)),
            rl,
            RecurrentMean(),
            Affine(nout=10, init=init_norm, activation=Rectlin()),
        ]
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(train_shape, cost)
        model.fprop(inp)
        model.bprop(delta)
示例#14
0
def main():
    # Get command-line parameters
    parser = get_p1b1_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b1.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Correct for arguments set by default by neon parser
    # (i.e. instead of taking the neon parser default value fall back to the config file,
    # if effectively the command-line was used, then use the command-line value)
    # This applies to conflictive parameters: batch_size, epochs and rng_seed
    if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv):
        args.batch_size = fileParameters['batch_size']
    if not any("--epochs" in ag or "-e" in ag for ag in sys.argv):
        args.epochs = fileParameters['epochs']
    if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv):
        args.rng_seed = fileParameters['rng_seed']

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b1.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Load dataset
    X_train, X_val, X_test = p1b1.load_data(gParameters, seed)

    print("Shape X_train: ", X_train.shape)
    print("Shape X_val: ", X_val.shape)
    print("Shape X_test: ", X_test.shape)

    print("Range X_train --> Min: ", np.min(X_train), ", max: ",
          np.max(X_train))
    print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val))
    print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test))

    input_dim = X_train.shape[1]
    output_dim = input_dim

    # Re-generate the backend after consolidating parsing and file config
    gen_backend(backend=args.backend,
                rng_seed=seed,
                device_id=args.device_id,
                batch_size=gParameters['batch_size'],
                datatype=gParameters['datatype'],
                max_devices=args.max_devices,
                compat_mode=args.compat_mode)

    # Set input and target to X_train
    train = ArrayIterator(X_train)
    val = ArrayIterator(X_val)
    test = ArrayIterator(X_test)

    # Initialize weights and learning rule
    initializer_weights = p1_common_neon.build_initializer(
        gParameters['initialization'], kerasDefaults)
    initializer_bias = p1_common_neon.build_initializer(
        'constant', kerasDefaults, 0.)

    activation = p1_common_neon.get_function(gParameters['activation'])()

    # Define Autoencoder architecture
    layers = []
    reshape = None

    # Autoencoder
    layers_params = gParameters['dense']

    if layers_params != None:
        if type(layers_params) != list:
            layers_params = list(layers_params)
        # Encoder Part
        for i, l in enumerate(layers_params):
            layers.append(
                Affine(nout=l,
                       init=initializer_weights,
                       bias=initializer_bias,
                       activation=activation))
        # Decoder Part
        for i, l in reversed(list(enumerate(layers_params))):
            if i < len(layers) - 1:
                layers.append(
                    Affine(nout=l,
                           init=initializer_weights,
                           bias=initializer_bias,
                           activation=activation))

    layers.append(
        Affine(nout=output_dim,
               init=initializer_weights,
               bias=initializer_bias,
               activation=activation))

    # Build Autoencoder model
    ae = Model(layers=layers)

    # Define cost and optimizer
    cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])())
    optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'],
                                               gParameters['learning_rate'],
                                               kerasDefaults)

    callbacks = Callbacks(ae, eval_set=val, eval_freq=1)

    # Seed random generator for training
    np.random.seed(seed)

    ae.fit(train,
           optimizer=optimizer,
           num_epochs=gParameters['epochs'],
           cost=cost,
           callbacks=callbacks)

    # model save
    #save_fname = "model_ae_W" + ext
    #ae.save_params(save_fname)

    # Compute errors
    X_pred = ae.get_outputs(test)
    scores = p1b1.evaluate_autoencoder(X_pred, X_test)
    print('Evaluation on test data:', scores)

    diff = X_pred - X_test
    # Plot histogram of errors comparing input and output of autoencoder
    plt.hist(diff.ravel(), bins='auto')
    plt.title("Histogram of Errors with 'auto' bins")
    plt.savefig('histogram_neon.png')
示例#15
0
    layers = [
        Affine(nout=50, init=w, bias=b, activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=50, init=w, bias=b, activation=Rectlin()),
        Dropout(keep=0.4),
        Affine(nout=3, init=w, bias=b, activation=Softmax()),
        Dropout(keep=0.3)
    ]

    # Optimizer
    optimizer = GradientDescentMomentum(0.1,
                                        momentum_coef=0.9,
                                        stochastic_round=args.rounding)

    # Cost
    cost = GeneralizedCost(costfunc=MeanSquared())

    model = Model(layers=layers)

    callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args)

    # Training
    model.fit(train_iter,
              optimizer=optimizer,
              num_epochs=1,
              cost=cost,
              callbacks=callbacks)

    # Evluate
    evaluate(model, val_iter, Metric=Misclassification())
示例#16
0
def main():
    # Get command-line parameters
    parser = get_p1b2_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b2.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Correct for arguments set by default by neon parser
    # (i.e. instead of taking the neon parser default value fall back to the config file,
    # if effectively the command-line was used, then use the command-line value)
    # This applies to conflictive parameters: batch_size, epochs and rng_seed
    if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv):
        args.batch_size = fileParameters['batch_size']
    if not any("--epochs" in ag or "-e" in ag for ag in sys.argv):
        args.epochs = fileParameters['epochs']
    if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv):
        args.rng_seed = fileParameters['rng_seed']

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b2.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Load dataset
    #(X_train, y_train), (X_test, y_test) = p1b2.load_data(gParameters, seed)
    (X_train, y_train), (X_val,
                         y_val), (X_test,
                                  y_test) = p1b2.load_data(gParameters, seed)

    print("Shape X_train: ", X_train.shape)
    print("Shape X_val: ", X_val.shape)
    print("Shape X_test: ", X_test.shape)
    print("Shape y_train: ", y_train.shape)
    print("Shape y_val: ", y_val.shape)
    print("Shape y_test: ", y_test.shape)

    print("Range X_train --> Min: ", np.min(X_train), ", max: ",
          np.max(X_train))
    print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val))
    print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test))
    print("Range y_train --> Min: ", np.min(y_train), ", max: ",
          np.max(y_train))
    print("Range y_val --> Min: ", np.min(y_val), ", max: ", np.max(y_val))
    print("Range y_test --> Min: ", np.min(y_test), ", max: ", np.max(y_test))

    input_dim = X_train.shape[1]
    num_classes = int(np.max(y_train)) + 1
    output_dim = num_classes  # The backend will represent the classes using one-hot representation (but requires an integer class as input !)

    # Re-generate the backend after consolidating parsing and file config
    gen_backend(backend=args.backend,
                rng_seed=seed,
                device_id=args.device_id,
                batch_size=gParameters['batch_size'],
                datatype=gParameters['data_type'],
                max_devices=args.max_devices,
                compat_mode=args.compat_mode)

    train = ArrayIterator(X=X_train, y=y_train, nclass=num_classes)
    val = ArrayIterator(X=X_val, y=y_val, nclass=num_classes)
    test = ArrayIterator(X=X_test, y=y_test, nclass=num_classes)

    # Initialize weights and learning rule
    initializer_weights = p1_common_neon.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = p1_common_neon.build_initializer(
        'constant', kerasDefaults, 0.)

    activation = p1_common_neon.get_function(gParameters['activation'])()

    # Define MLP architecture
    layers = []
    reshape = None

    for layer in gParameters['dense']:
        if layer:
            layers.append(
                Affine(nout=layer,
                       init=initializer_weights,
                       bias=initializer_bias,
                       activation=activation))
        if gParameters['dropout']:
            layers.append(Dropout(keep=(1 - gParameters['dropout'])))

    layers.append(
        Affine(nout=output_dim,
               init=initializer_weights,
               bias=initializer_bias,
               activation=activation))

    # Build MLP model
    mlp = Model(layers=layers)

    # Define cost and optimizer
    cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])())
    optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'],
                                               gParameters['learning_rate'],
                                               kerasDefaults)

    callbacks = Callbacks(mlp, eval_set=val, metric=Accuracy(), eval_freq=1)

    # Seed random generator for training
    np.random.seed(seed)

    mlp.fit(train,
            optimizer=optimizer,
            num_epochs=gParameters['epochs'],
            cost=cost,
            callbacks=callbacks)

    # model save
    #save_fname = "model_mlp_W_" + ext
    #mlp.save_params(save_fname)

    # Evalute model on test set
    print('Model evaluation by neon: ', mlp.eval(test, metric=Accuracy()))
    y_pred = mlp.get_outputs(test)
    #print ("Shape y_pred: ", y_pred.shape)
    scores = p1b2.evaluate_accuracy(p1_common.convert_to_class(y_pred), y_test)
    print('Evaluation on test data:', scores)
示例#17
0
# setting model layers for AE1
encoder1 = Affine(nout=config.encoder_size[0], init=init_norm,
                  activation=Logistic(), name='encoder1')
decoder1 = Affine(nout=image_size, init=init_norm, activation=Logistic(),
                  name='decoder1')
encoder2 = Affine(nout=config.encoder_size[1], init=init_norm,
                  activation=Logistic(), name='encoder2')
decoder2 = Affine(nout=config.encoder_size[0], init=init_norm,
                  activation=Logistic(), name='decoder2')
encoder3 = Affine(nout=config.encoder_size[2], init=init_norm,
                  activation=Logistic(), name='encoder3')
decoder3 = Affine(nout=config.encoder_size[1], init=init_norm,
                  activation=Logistic(), name='decoder3')
classifier = Affine(nout=config.ydim, init=init_norm, activation=Softmax())
cost_reconst = GeneralizedCost(costfunc=SumSquared()) 
cost_classification = GeneralizedCost(costfunc=CrossEntropyMulti())

# Setting model layers for AE1
AE1 = Model([encoder1, decoder1])
AE1.cost = cost_reconst
AE1.initialize(data, cost_reconst)
# AE1.optimizer = optimizer_default
measure_time(data, AE1, config, 'AE1')
            
# Setting model layers for AE2
# It has an extra encoder layer compared to what AE should really be. This is
# done to avoid saving the outputs for each AE.
AE2_mimic = Model([encoder1, encoder2, decoder2])
AE2_mimic.cost = cost_reconst
AE2_mimic.initialize(data, cost_reconst)
示例#18
0
from neon.backends import gen_backend
import bot_params as params
import replay_memory as mem
from enemydetector1 import model, predict

params.batch_size = 64
be = gen_backend(backend='cpu', batch_size=params.batch_size)

dataset = mem.load()


opt_gdm = GradientDescentMomentum(learning_rate=0.01,
                                  momentum_coef=0.9,
                                  stochastic_round=0)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=10))

(X_train, y_train), (X_test, y_test) = dataset.get_dataset()

print X_train.shape, y_train.shape, X_test.shape, y_test.shape
train_set = ArrayIterator(X=X_train, y=y_train, nclass=dataset.nclass, lshape=dataset.shape, make_onehot=False)
test = ArrayIterator(X=X_test, y=y_test, nclass=dataset.nclass, lshape=dataset.shape, make_onehot=False)

callbacks = Callbacks(model, eval_set=test, eval_freq=1,)

model.fit(train_set, optimizer=opt_gdm, num_epochs=2, cost=cost, callbacks=callbacks)
model.save_params(params.weigths_path)


def test_example(i):
    val = predict(X_train[i])
示例#19
0
def create_objects(root_yaml,
                   be_type='gpu',
                   batch_size=128,
                   rng_seed=None,
                   device_id=0,
                   default_dtype=np.float32,
                   stochastic_rounding=False):
    """
    Instantiate objects as per the given specifications.

    Arguments:
        root_yaml (dict): Model definition dictionary parse from YAML file

        be_type (str): backend either 'gpu', 'mgpu' or 'cpu'

        batch_size (int): Batch size.
        rng_seed (None or int): random number generator seed

        device_id (int): for GPU backends id of device to use

        default_dtype (type): numpy data format for default data types,

        stochastic_rounding (bool or int): number of bits for stochastic rounding
                                           use False for no rounding

    Returns:
        tuple: Contains model, cost and optimizer objects.
    """

    assert NervanaObject.be is not None, 'Must generate a backend before running this function'

    # can give filename or parse dictionary
    if type(root_yaml) is str:
        with open(root_yaml, 'r') as fid:
            root_yaml = yaml.safe_load(fid.read())

    # in case references were used
    root_yaml = deepcopy(root_yaml)

    # initialize layers
    yaml_layers = root_yaml['layers']

    # currently only support sequential in yaml
    layer_dict = {'layers': yaml_layers}
    layers = Sequential.gen_class(layer_dict)

    # initialize model
    model = Model(layers=layers)

    # cost (before layers for shortcut derivs)
    cost_name = root_yaml['cost']
    cost = GeneralizedCost.gen_class({'costfunc': {'type': cost_name}})

    # create optimizer
    opt = None
    if 'optimizer' in root_yaml:
        yaml_opt = root_yaml['optimizer']
        typ = yaml_opt['type']
        opt = getattr(neon.optimizers, typ).gen_class(yaml_opt['config'])

    return model, cost, opt
示例#20
0
def main():
    parser = NeonArgparser(__doc__)
    args = parser.parse_args(gen_be=False)

    #mat_data = sio.loadmat('../data/timeseries/02_timeseries.mat')

    #ts = V1TimeSeries(mat_data['timeseries'], mat_data['stim'], binning=10)

    seq_len = 30
    hidden = 20

    be = gen_backend(**extract_valid_args(args, gen_backend))

    kohn = KohnV1Dataset(path='../tmp/')
    kohn.gen_iterators(seq_len)
    import pdb; pdb.set_trace()
    train_spike_set = V1IteratorSequence(ts.train, seq_len, return_sequences=False)
    valid_spike_set = V1IteratorSequence(ts.test, seq_len, return_sequences=False)

    init = GlorotUniform()

    # dataset = MNIST(path=args.data_dir)
    # (X_train, y_train), (X_test, y_test), nclass = dataset.load_data()
    # train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))
    # valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28))

    # # weight initialization
    # init_norm = Gaussian(loc=0.0, scale=0.01)

    # # initialize model
    # path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
    #                            Affine(nout=100, init=init_norm, activation=Rectlin())])

    # path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
    #                            Affine(nout=100, init=init_norm, activation=Rectlin())])

    # layers = [MergeMultistream(layers=[path1, path2], merge="stack"),
    #           Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]

    spike_rnn_path = Sequential( layers = [

        LSTM(hidden, init, activation=Logistic(),
            gate_activation=Logistic(), reset_cells=False),

        Dropout(keep=0.5),

         LSTM(hidden, init, activation=Logistic(),
             gate_activation=Logistic(), reset_cells=False),

        #Dropout(keep=0.85),

        RecurrentLast(),

        Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_in')])

    stim_rnn_path = Sequential( layers = [

        LSTM(hidden, init, activation=Logistic(),
            gate_activation=Logistic(), reset_cells=False),

        Dropout(keep=0.5),

        RecurrentLast(),
        Affine(1, init, bias=init, activation=Identity(), name='stim')])

    layers = [
            MergeMultiStream(
                layers = [
                    spike_rnn_path,
                    stim_rnn_path],
                merge="stack"),

            Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_out'),

            Round()
            ]

    model = Model(layers=layers)

    sched = ExpSchedule(decay=0.7)

    # cost = GeneralizedCost(SumSquared())
    cost = GeneralizedCost(MeanSquared())

    optimizer_two = RMSProp(stochastic_round=args.rounding)
    optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9, schedule=sched)

    opt = MultiOptimizer({'default': optimizer_one,
                          'Bias': optimizer_two,
                          'special_linear': optimizer_two})

    callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)
    callbacks.add_hist_callback(filter_key = ['W'])
    #callbacks.add_callback(MetricCallback(eval_set=valid_set, metric=FractionExplainedVariance(), epoch_freq=args.eval_freq))
    #callbacks.add_callback(MetricCallback(eval_set=valid_set,metric=Accuracy(),  epoch_freq=args.eval_freq))

    model.fit(train_set,
              optimizer=opt,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)

    train_output = model.get_outputs(
    train_set).reshape(-1, train_set.nfeatures)
    valid_output = model.get_outputs(
    valid_set).reshape(-1, valid_set.nfeatures)
    train_target = train_set.y_series
    valid_target = valid_set.y_series

    tfev = fev(train_output, train_target, train_set.mean)
    vfev = fev(valid_output, valid_target, valid_set.mean)

    neon_logger.display('Train FEV: %g, Valid FEV:  %g' % (tfev, vfev))
    # neon_logger.display('Train Mean: %g, Valid Mean:  %g' % (train_set.mean, valid_set.mean))

    plt.figure()
    plt.plot(train_output[:, 0], train_output[
        :, 1], 'bo', label='prediction')
    plt.plot(train_target[:, 0], train_target[:, 1], 'r.', label='target')
    plt.legend()
    plt.title('Neon on training set')
    plt.savefig('neon_series_training_output.png')

    plt.figure()
    plt.plot(valid_output[:, 0], valid_output[
        :, 1], 'bo', label='prediction')
    plt.plot(valid_target[:, 0], valid_target[:, 1], 'r.', label='target')
    plt.legend()
    plt.title('Neon on validation set')
    plt.savefig('neon_series_validation_output.png')
示例#21
0
def test_model_serialize(backend_default, data):
    dataset = MNIST(path=data)
    (X_train, y_train), (X_test, y_test), nclass = dataset.load_data()
    train_set = ArrayIterator([X_train, X_train],
                              y_train,
                              nclass=nclass,
                              lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = Sequential([
        Conv((5, 5, 16),
             init=init_norm,
             bias=Constant(0),
             activation=Rectlin()),
        Pooling(2),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ])
    path2 = Sequential([
        Affine(nout=100,
               init=init_norm,
               bias=Constant(0),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ])
    layers = [
        MergeMultistream(layers=[path1, path2], merge="stack"),
        Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1,
                                            momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    mlp.initialize(train_set, cost=mlp.cost)
    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    mlp.save_params(tmp_save, keep_states=True)

    # Load model
    mlp = Model(tmp_save)

    mlp.initialize(train_set)
    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert np.allclose(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert np.allclose(_s, _s_e)
            else:
                assert np.allclose(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            assert type(p) == type(p_e)
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert np.allclose(_p, _p_e)
            elif isinstance(p, np.ndarray):
                assert np.allclose(p, p_e)
            else:
                assert p == p_e

    os.remove(tmp_save)
示例#22
0
class DeepQNetwork:
    def __init__(self, num_actions, args):
        # remember parameters
        self.num_actions = num_actions
        self.batch_size = args.batch_size
        self.discount_rate = args.discount_rate
        self.history_length = args.history_length
        self.screen_dim = (args.screen_height, args.screen_width)
        self.clip_error = args.clip_error
        self.min_reward = args.min_reward
        self.max_reward = args.max_reward
        self.batch_norm = args.batch_norm

        # create Neon backend
        self.be = gen_backend(backend=args.backend,
                              batch_size=args.batch_size,
                              rng_seed=args.random_seed,
                              device_id=args.device_id,
                              datatype=np.dtype(args.datatype).type,
                              stochastic_round=args.stochastic_round)

        # prepare tensors once and reuse them
        self.input_shape = (self.history_length, ) + self.screen_dim + (
            self.batch_size, )
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        # create model
        layers = self._createLayers(num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost)
        if args.optimizer == 'rmsprop':
            self.optimizer = RMSProp(learning_rate=args.learning_rate,
                                     decay_rate=args.decay_rate,
                                     stochastic_round=args.stochastic_round)
        elif args.optimizer == 'adam':
            self.optimizer = Adam(learning_rate=args.learning_rate,
                                  stochastic_round=args.stochastic_round)
        elif args.optimizer == 'adadelta':
            self.optimizer = Adadelta(decay=args.decay_rate,
                                      stochastic_round=args.stochastic_round)
        else:
            assert false, "Unknown optimizer"

        # create target model
        self.target_steps = args.target_steps
        self.train_iterations = 0
        if self.target_steps:
            self.target_model = Model(layers=self._createLayers(num_actions))
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
            self.save_weights_prefix = args.save_weights_prefix
        else:
            self.target_model = self.model

        self.callback = None

    def _createLayers(self, num_actions):
        # create network
        init_xavier_conv = Xavier(local=True)
        init_xavier_affine = Xavier(local=False)
        layers = []
        # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
        layers.append(
            Conv((8, 8, 32),
                 strides=4,
                 init=init_xavier_conv,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
        layers.append(
            Conv((4, 4, 64),
                 strides=2,
                 init=init_xavier_conv,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
        layers.append(
            Conv((3, 3, 64),
                 strides=1,
                 init=init_xavier_conv,
                 activation=Rectlin(),
                 batch_norm=self.batch_norm))
        # The final hidden layer is fully-connected and consists of 512 rectifier units.
        layers.append(
            Affine(nout=512,
                   init=init_xavier_affine,
                   activation=Rectlin(),
                   batch_norm=self.batch_norm))
        # The output layer is a fully-connected linear layer with a single output for each valid action.
        layers.append(Affine(nout=num_actions, init=init_xavier_affine))
        return layers

    def _setInput(self, states):
        # change order of axes to match what Neon expects
        states = np.transpose(states, axes=(1, 2, 3, 0))
        # copy() shouldn't be necessary here, but Neon doesn't work otherwise
        self.input.set(states.copy())
        # normalize network input between 0 and 1
        self.be.divide(self.input, 255, self.input)

    def train(self, minibatch, epoch):
        # expand components of minibatch
        prestates, actions, rewards, poststates, terminals = minibatch
        assert len(prestates.shape) == 4
        assert len(poststates.shape) == 4
        assert len(actions.shape) == 1
        assert len(rewards.shape) == 1
        assert len(terminals.shape) == 1
        assert prestates.shape == poststates.shape
        assert prestates.shape[0] == actions.shape[0] == rewards.shape[
            0] == poststates.shape[0] == terminals.shape[0]

        if self.target_steps and self.train_iterations % self.target_steps == 0:
            # have to serialize also states for batch normalization to work
            pdict = self.model.get_description(get_weights=True,
                                               keep_states=True)
            self.target_model.deserialize(pdict, load_states=True)

        # feed-forward pass for poststates to get Q-values
        self._setInput(poststates)
        postq = self.target_model.fprop(self.input, inference=True)
        assert postq.shape == (self.num_actions, self.batch_size)

        # calculate max Q-value for each poststate
        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)

        # feed-forward pass for prestates
        self._setInput(prestates)
        preq = self.model.fprop(self.input, inference=False)
        assert preq.shape == (self.num_actions, self.batch_size)

        # make copy of prestate Q-values as targets
        targets = preq.asnumpyarray().copy()

        # clip rewards between -1 and 1
        rewards = np.clip(rewards, self.min_reward, self.max_reward)

        # update Q-value targets for actions taken
        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(
                    rewards[i]) + self.discount_rate * maxpostq[0, i]

        # copy targets to GPU memory
        self.targets.set(targets)

        # calculate errors
        deltas = self.cost.get_errors(preq, self.targets)
        assert deltas.shape == (self.num_actions, self.batch_size)
        #assert np.count_nonzero(deltas.asnumpyarray()) == 32

        # calculate cost, just in case
        cost = self.cost.get_cost(preq, self.targets)
        assert cost.shape == (1, 1)

        # clip errors
        if self.clip_error:
            self.be.clip(deltas, -self.clip_error, self.clip_error, out=deltas)

        # perform back-propagation of gradients
        self.model.bprop(deltas)

        # perform optimization
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)

        # increase number of weight updates (needed for target clone interval)
        self.train_iterations += 1

        # calculate statistics
        if self.callback:
            self.callback.on_train(cost[0, 0])

    def predict(self, states):
        # minibatch is full size, because Neon doesn't let change the minibatch size
        assert states.shape == ((
            self.batch_size,
            self.history_length,
        ) + self.screen_dim)

        # calculate Q-values for the states
        self._setInput(states)
        qvalues = self.model.fprop(self.input, inference=True)
        assert qvalues.shape == (self.num_actions, self.batch_size)
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:, 0]))

        # transpose the result, so that batch size is first dimension
        return qvalues.T.asnumpyarray()

    def load_weights(self, load_path):
        self.model.load_params(load_path)

    def save_weights(self, save_path):
        self.model.save_params(save_path)
示例#23
0
def main():
    # setup the model and run for num_epochs saving the last state only
    # this is at the top so that the be is generated
    model = gen_model(args.backend)

    # setup data iterators
    (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)
    NN = batch_size*5  # avoid partial mini batches
    if args.backend == 'nervanacpu' or args.backend == 'cpu':
        # limit data since cpu backend runs slower
        train = ArrayIterator(X_train[:NN], y_train[:NN],
                              nclass=nclass, lshape=(1, 28, 28))
        valid = ArrayIterator(X_test[:NN], y_test[:NN],
                              nclass=nclass, lshape=(1, 28, 28))
    else:
        train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28))
        valid = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28))

    # serialization related
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)

    checkpoint_model_path = os.path.join('./', 'test_oneshot.pkl')
    checkpoint_schedule = 1  # save at every step

    callbacks = Callbacks(model)
    callbacks.add_callback(SerializeModelCallback(checkpoint_model_path,
                                                  checkpoint_schedule,
                                                  history=2))

    # run the fit all the way through saving a checkpoint e
    model.fit(train,
              optimizer=opt_gdm,
              num_epochs=num_epochs,
              cost=cost,
              callbacks=callbacks)

    # setup model with same random seed run epoch by epoch
    # serializing and deserializing at each step
    model = gen_model(args.backend)
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)

    # reset data iterators
    train.reset()
    valid.reset()

    checkpoint_model_path = os.path.join('./', 'test_manyshot.pkl')
    checkpoint_schedule = 1  # save at evey step
    for epoch in range(num_epochs):
        # _0 points to state at end of epoch 0
        callbacks = Callbacks(model)
        callbacks.add_callback(SerializeModelCallback(checkpoint_model_path,
                                                      checkpoint_schedule,
                                                      history=num_epochs))
        model.fit(train,
                  optimizer=opt_gdm,
                  num_epochs=epoch+1,
                  cost=cost,
                  callbacks=callbacks)

        # load saved file
        prts = os.path.splitext(checkpoint_model_path)
        fn = prts[0] + '_%d' % epoch + prts[1]
        model.load_params(fn)  # load the saved weights

    # compare test_oneshot_<num_epochs>.pkl to test_manyshot_<num_epochs>.pkl
    if not compare_model_pickles('test_oneshot_%d.pkl' % (num_epochs-1),
                                 'test_manyshot_%d.pkl' % (num_epochs-1)):
        print 'No Match'
        sys.exit(1)
    else:
        print 'Match'
示例#24
0
    def __init__(self, num_actions, args):
        # remember parameters
        self.num_actions = num_actions
        self.batch_size = args.batch_size
        self.discount_rate = args.discount_rate
        self.history_length = args.history_length
        self.screen_dim = (args.screen_height, args.screen_width)
        self.clip_error = args.clip_error
        self.min_reward = args.min_reward
        self.max_reward = args.max_reward
        self.batch_norm = args.batch_norm

        # create Neon backend
        self.be = gen_backend(backend=args.backend,
                              batch_size=args.batch_size,
                              rng_seed=args.random_seed,
                              device_id=args.device_id,
                              datatype=np.dtype(args.datatype).type,
                              stochastic_round=args.stochastic_round)

        # prepare tensors once and reuse them
        self.input_shape = (self.history_length, ) + self.screen_dim + (
            self.batch_size, )
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.num_actions, self.batch_size))

        # create model
        layers = self._createLayers(num_actions)
        self.model = Model(layers=layers)
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost)
        if args.optimizer == 'rmsprop':
            self.optimizer = RMSProp(learning_rate=args.learning_rate,
                                     decay_rate=args.decay_rate,
                                     stochastic_round=args.stochastic_round)
        elif args.optimizer == 'adam':
            self.optimizer = Adam(learning_rate=args.learning_rate,
                                  stochastic_round=args.stochastic_round)
        elif args.optimizer == 'adadelta':
            self.optimizer = Adadelta(decay=args.decay_rate,
                                      stochastic_round=args.stochastic_round)
        else:
            assert false, "Unknown optimizer"

        # create target model
        self.target_steps = args.target_steps
        self.train_iterations = 0
        if self.target_steps:
            self.target_model = Model(layers=self._createLayers(num_actions))
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
            self.save_weights_prefix = args.save_weights_prefix
        else:
            self.target_model = self.model

        self.callback = None
示例#25
0
train_set = ArrayIterator(X=X_train, y=y_train, make_onehot=False)
val_set = ArrayIterator(X=X_val, y=y_val, make_onehot=False)

# setup weight initialization function
init = Uniform(-1, 1)

# setup layers
layers = [
    BinaryAffine(nout=4096, init=init, batch_norm=True, activation=Sign()),
    BinaryAffine(nout=4096, init=init, batch_norm=True, activation=Sign()),
    BinaryAffine(nout=4096, init=init, batch_norm=True, activation=Sign()),
    BinaryAffine(nout=2, init=init, batch_norm=True, activation=Identity())
]

# setup cost function as Square Hinge Loss
cost = GeneralizedCost(costfunc=SquareHingeLoss())

# setup optimizer
LR_start = 1.65e-2


def ShiftAdaMax_with_Scale(LR=1):
    return ShiftAdaMax(learning_rate=LR_start * LR,
                       schedule=ShiftSchedule(2, shift_size=1))


optimizer = MultiOptimizer({
    'default': ShiftAdaMax_with_Scale(),
    'BinaryLinear_0': ShiftAdaMax_with_Scale(57.038),
    'BinaryLinear_1': ShiftAdaMax_with_Scale(73.9008),
    'BinaryLinear_2': ShiftAdaMax_with_Scale(73.9008),
示例#26
0
class DeepQNetwork:
  def __init__(self, state_size, num_actions, args):
    # remember parameters
    self.state_size = state_size
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error
    self.action_count = np.zeros(21)

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert False, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

  def _createLayers(self, num_actions):
    # create network
    init_norm = Gaussian(loc=0.0, scale=0.01)
    layers = []
    # The final hidden layer is fully-connected and consists of 512 rectifier units.
    layers.append(Affine(nout=64, init=init_norm, bias=init_norm, activation=Rectlin()))
    # The output layer is a fully-connected linear layer with a single output for each valid action.
    layers.append(Affine(nout=num_actions, init=init_norm, bias=init_norm))
    return layers

  def _setInput(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states)
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    # self.be.divide(self.input, 255, self.input)

  def train(self, minibatch, epoch):
    # expand components of minibatch
    prestates, actions, speed_actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 2
    assert len(poststates.shape) == 2
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
    #print "WE ARE ACTUALLY TRAINING IN HERE"
    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # HACK: serialize network to disk and read it back to clone
      filename = self.save_weights_prefix + "_target.pkl"
      save_obj(self.model.serialize(keep_states = False), filename)
      self.target_model.load_weights(filename)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    postq = postq.asnumpyarray()
    maxpostq = np.max(postq, axis=0)
    #print maxpostq.shape
    assert maxpostq.shape == (self.batch_size,)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    targets = preq.asnumpyarray().copy()

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      self.action_count[action] += 1
      if terminals[i]:
        targets[action, i] = float(rewards[i])
        if rewards[i] == -1000:
            print "######################### action ", action, "should never be sampled again"
        print "sampled_terminal"
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[i]
        #targets[i,action] = float(rewards[i]) + self.discount_rate * maxpostq[i]
    #print "action count", self.action_count
    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32
    print "nonzero deltas", np.count_nonzero(deltas.asnumpyarray())

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)
    print "cost:", cost.asnumpyarray()

    # clip errors
    #if self.clip_error:
    #  self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == (self.batch_size, self.state_size)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_weights(load_path)

  def save_weights(self, save_path):
    save_obj(self.model.serialize(keep_states = True), save_path)
示例#27
0
                                      momentum_coef=0.9,
                                      stochastic_round=args.rounding)
elif args.datatype in [np.float16]:
    opt_gdm = GradientDescentMomentum(learning_rate=0.01 / cost_scale,
                                      momentum_coef=0.9,
                                      stochastic_round=args.rounding)

bn = True
layers = [Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn),
          Pooling((2, 2)),
          Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=bn),
          Pooling((2, 2)),
          Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=bn),
          Affine(nout=10, init=init_uni, activation=Softmax())]

if args.datatype in [np.float32, np.float64]:
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())
elif args.datatype in [np.float16]:
    cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=cost_scale))

model = Model(layers=layers)

# configure callbacks
callbacks = Callbacks(model, eval_set=test, **args.callback_args)

model.fit(train, optimizer=opt_gdm, num_epochs=num_epochs,
          cost=cost, callbacks=callbacks)

error_rate = model.eval(test, metric=Misclassification())
neon_logger.display('Misclassification error = %.1f%%' % (error_rate * 100))
示例#28
0
         padding=1),
    Conv((3, 3, 512),
         init=Gaussian(scale=0.01),
         activation=Rectlin(),
         padding=1),
    Pooling(2, strides=2),
    Conv((3, 3, 512),
         init=Gaussian(scale=0.01),
         activation=Rectlin(),
         padding=1),
    Conv((3, 3, 512),
         init=Gaussian(scale=0.01),
         activation=Rectlin(),
         padding=1),
    Pooling(2, strides=2),
    Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
    Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
    Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax())
]
model = Model(layers=layers)

weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.))
opt_gdm = GradientDescentMomentum(0.01,
                                  0.0,
                                  wdecay=0.0005,
                                  schedule=weight_sched)
opt = MultiOptimizer({'default': opt_gdm})
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.benchmark(train, cost=cost, optimizer=opt, niterations=10, nskip=1)
示例#29
0
# setup optimizer
opt_w = GradientDescentMomentum(0.001 * learning_rate_scale, 0.9, wdecay=0.0005)
opt_b = GradientDescentMomentum(0.002 * learning_rate_scale, 0.9)

optimizer = MultiOptimizer({'default': opt_w, 'Bias': opt_b})

# setup model

model = Model(layers=Tree([frcn_layers, bb_layers]))

# if training a new model, seed the Alexnet conv layers with pre-trained weights
# otherwise, just load the model file
if args.model_file is None:
    load_imagenet_weights(model, args.data_dir)

cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()),
                        GeneralizedCostMask(costfunc=SmoothL1Loss())],
                 weights=[1, 1])

callbacks = Callbacks(model, **args.callback_args)

model.fit(train_set, optimizer=optimizer,
          num_epochs=num_epochs, cost=cost, callbacks=callbacks)


print 'running eval on the training set...'
metric_train = model.eval(train_set, metric=ObjectDetection())
print 'Train: label accuracy - {}%, object deteciton SmoothL1Loss - {}'.format(
    metric_train[0]*100,
    metric_train[1])
示例#30
0
def create_network_lrn():
    init1 = Gaussian(scale=0.01)
    init2 = Gaussian(scale=0.005)

    layers = [
        Conv((11, 11, 96),
             padding=0,
             strides=4,
             init=init1,
             bias=Constant(0),
             activation=Rectlin(),
             name='conv1'),
        Pooling(3, strides=2, name='pool1'),
        LRN(5, ascale=0.0001, bpower=0.75, name='norm1'),
        Conv((5, 5, 256),
             padding=2,
             init=init1,
             bias=Constant(1.0),
             activation=Rectlin(),
             name='conv2'),
        Pooling(3, strides=2, name='pool2'),
        LRN(5, ascale=0.0001, bpower=0.75, name='norm2'),
        Conv((3, 3, 384),
             padding=1,
             init=init1,
             bias=Constant(0),
             activation=Rectlin(),
             name='conv3'),
        Conv((3, 3, 384),
             padding=1,
             init=init1,
             bias=Constant(1.0),
             activation=Rectlin(),
             name='conv4'),
        Conv((3, 3, 256),
             padding=1,
             init=init1,
             bias=Constant(1.0),
             activation=Rectlin(),
             name='conv5'),
        Pooling(3, strides=2, name='pool5'),
        Affine(nout=4096,
               init=init2,
               bias=Constant(1.0),
               activation=Rectlin(),
               name='fc6'),
        Dropout(keep=0.5, name='drop6'),
        Affine(nout=4096,
               init=init2,
               bias=Constant(1.0),
               activation=Rectlin(),
               name='fc7'),
        Dropout(keep=0.5, name='drop7'),
        Affine(nout=1000,
               init=init1,
               bias=Constant(0.0),
               activation=Softmax(),
               name='fc8')
    ]

    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
示例#31
0
    DeepBiRNN(hidden_size,
              init=glorot,
              activation=Rectlinclip(),
              batch_norm=True,
              reset_cells=True,
              depth=depth),
    Affine(hidden_size, init=glorot, activation=Rectlinclip()),
    Affine(nout=nout, init=glorot, activation=Identity())
]

model = Model(layers=layers)

opt = GradientDescentMomentumNesterov(learning_rate,
                                      momentum,
                                      gradient_clip_norm=gradient_clip_norm,
                                      stochastic_round=False)
callbacks = Callbacks(model, eval_set=dev, **args.callback_args)

# Print validation set word error rate at the end of every epoch
pcb = WordErrorRateCallback(dev, argmax_decoder, max_tscrpt_len, epoch_freq=1)
callbacks.add_callback(pcb)

cost = GeneralizedCost(costfunc=CTC(max_tscrpt_len, nout=nout))

# Fit the model
model.fit(train,
          optimizer=opt,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)
示例#32
0
class DQNNeon(Learner):
    """ This class is an implementation of the DQN network based on Neon.

    The modules that interact with the agent, the replay memory and the
    statistic calls are implemented here, taking the individual requirements
    of the Lasagne framework into account. The code is adapted from:
    https://github.com/tambetm/simple_dqn

    Attributes:
        input_shape (tuple[int]): Dimension of the network input.
        dummy_batch (numpy.ndarray): Dummy batche used to calculate Q-values for single states.
        batch_norm (bool): Indicates if normalization is wanted for a certain layer (default=False).
        be (neon.backends.nervanagpu.NervanaGPU): Describes the backend for the Neon implementation.
        input (neon.backends.nervanagpu.GPUTensor): Definition of network input shape.
        targets(neon.backends.nervanagpu.GPUTensor): Definition of network output shape.
        model (neon.models.model.Model): Generated Neon model.
        target_model (neon.models.model.Model): Generated target Neon model.
        cost_func (neon.layers.layer.GeneralizedCost): Cost function for model training.
        callback (Statistics): Hook for the statistics object to pass train and test information.

    Note:
        More attributes of this class are defined in the base class Learner.
    """

    def __init__(self, env, args, rng, name = "DQNNeon"):
        """ Initializes a network based on the Neon framework.

        Args:
            env (AtariEnv): The envirnoment in which the agent actuates.
            args (argparse.Namespace): All settings either with a default value or set via command line arguments.
            rng (mtrand.RandomState): initialized Mersenne Twister pseudo-random number generator.
            name (str): The name of the network object.

        Note:
            This function should always call the base class first to initialize
            the common values for the networks.
        """
        _logger.info("Initializing new object of type " + str(type(self).__name__))
        super(DQNNeon, self).__init__(env, args, rng, name)
        self.input_shape = (self.sequence_length,) + self.frame_dims + (self.batch_size,)
        self.dummy_batch = np.zeros((self.batch_size, self.sequence_length) + self.frame_dims, dtype=np.uint8)
        self.batch_norm = args.batch_norm

        self.be = gen_backend(
                backend = args.backend,
                batch_size = args.batch_size,
                rng_seed = args.random_seed,
                device_id = args.device_id,
                datatype = np.dtype(args.datatype).type,
                stochastic_round = args.stochastic_round)

        # prepare tensors once and reuse them
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((self.output_shape, self.batch_size))

        # create model
        layers = self._create_layer()
        self.model = Model(layers = layers)
        self.cost_func = GeneralizedCost(costfunc = SumSquared())
        # Bug fix
        for l in self.model.layers.layers:
            l.parallelism = 'Disabled'
        self.model.initialize(self.input_shape[:-1], self.cost_func)

        self._set_optimizer()

        if not self.args.load_weights == None:
            self.load_weights(self.args.load_weights)

        # create target model
        if self.target_update_frequency:
            layers = self._create_layer()
            self.target_model = Model(layers)
            # Bug fix
            for l in self.target_model.layers.layers:
                l.parallelism = 'Disabled'
            self.target_model.initialize(self.input_shape[:-1])
        else:
            self.target_model = self.model

        self.callback = None
        _logger.debug("%s" % self)

    def _create_layer(self):
        """ Build a network consistent with the DeepMind Nature paper. """
        _logger.debug("Output shape = %d" % self.output_shape)
        # create network
        init_norm = Gaussian(loc=0.0, scale=0.01)
        layers = []
        # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
        layers.append(
                Conv((8, 8, 32),
                strides=4,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
        layers.append(
                Conv((4, 4, 64),
                strides=2,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
        layers.append(
                Conv((3, 3, 64),
                strides=1,
                init=init_norm,
                activation=Rectlin(),
                batch_norm=self.batch_norm))
        # The final hidden layer is fully-connected and consists of 512 rectifier units.
        layers.append(
                Affine(
                    nout=512,
                    init=init_norm,
                    activation=Rectlin(),
                    batch_norm=self.batch_norm))
        # The output layer is a fully-connected linear layer with a single output for each valid action.
        layers.append(
                Affine(
                    nout= self.output_shape,
                    init = init_norm))
        return layers

    def _set_optimizer(self):
        """ Initializes the selected optimization algorithm. """
        _logger.debug("Optimizer = %s" % str(self.args.optimizer))
        if self.args.optimizer == 'rmsprop':
            self.optimizer = RMSProp(
                    learning_rate = self.args.learning_rate,
                    decay_rate = self.args.decay_rate,
                    stochastic_round = self.args.stochastic_round)
        elif self.args.optimizer == 'adam':
            self.optimizer = Adam(
                    learning_rate = self.args.learning_rate,
                    stochastic_round = self.args.stochastic_round)
        elif self.args.optimizer == 'adadelta':
            self.optimizer = Adadelta(
                    decay = self.args.decay_rate,
                    stochastic_round = self.args.stochastic_round)
        else:
            assert false, "Unknown optimizer"

    def _prepare_network_input(self, states):
        """ Transforms and normalizes the states from one minibatch.

        Args:
            states (): a set of states with the size of minibatch
        """
        _logger.debug("Normalizing and transforming input")
        # change order of axes to match what Neon expects
        states = np.transpose(states, axes = (1, 2, 3, 0))
        # copy() shouldn't be necessary here, but Neon doesn't work otherwise
        self.input.set(states.copy())
        # normalize network input between 0 and 1
        self.be.divide(self.input, self.grayscales, self.input)

    def train(self, minibatch, epoch):
        """ Prepare, perform and document a complete train step for one minibatch.

        Args:
            minibatch (numpy.ndarray): Mini-batch of states, shape=(batch_size,sequence_length,frame_width,frame_height)
            epoch (int): Current train epoch
        """
        _logger.debug("Complete trainig step for one minibatch")
        prestates, actions, rewards, poststates, terminals = minibatch
        assert len(prestates.shape) == 4
        assert len(poststates.shape) == 4
        assert len(actions.shape) == 1
        assert len(rewards.shape) == 1
        assert len(terminals.shape) == 1
        assert prestates.shape == poststates.shape
        assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
        # feed-forward pass for poststates to get Q-values
        self._prepare_network_input(poststates)
        postq = self.target_model.fprop(self.input, inference = True)
        assert postq.shape == (self.output_shape, self.batch_size)
        # calculate max Q-value for each poststate
        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)
        # average maxpostq for stats
        maxpostq_avg = maxpostq.mean()
        # feed-forward pass for prestates
        self._prepare_network_input(prestates)
        preq = self.model.fprop(self.input, inference = False)
        assert preq.shape == (self.output_shape, self.batch_size)
        # make copy of prestate Q-values as targets
        targets = preq.asnumpyarray()
        # clip rewards between -1 and 1
        rewards = np.clip(rewards, self.min_reward, self.max_reward)
        # update Q-value targets for each state only at actions taken
        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
        # copy targets to GPU memory
        self.targets.set(targets)
        # calculate errors
        errors = self.cost_func.get_errors(preq, self.targets)
        assert errors.shape == (self.output_shape, self.batch_size)
        # average error where there is a error (should be 1 in every row)
        #TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.])
        # clip errors
        if self.clip_error:
            self.be.clip(errors, -self.clip_error, self.clip_error, out = errors)
        # calculate cost, just in case
        cost = self.cost_func.get_cost(preq, self.targets)
        assert cost.shape == (1,1)
        # perform back-propagation of gradients
        self.model.bprop(errors)
        # perform optimization
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)
        # increase number of weight updates (needed for target clone interval)
        self.update_iterations += 1
        if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0:
            self._copy_theta()
            _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg)))
        # update statistics
        if self.callback:
            self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg)

    def get_Q(self, state):
        """ Calculates the Q-values for one mini-batch.

        Args:
            state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height).

        Returns:
            q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,)
        """
        _logger.debug("State shape = %s" % str(state.shape))
        # minibatch is full size, because Neon doesn't let change the minibatch size
        # so we need to run 32 forward steps to get the one we actually want
        self.dummy_batch[0] = state
        states = self.dummy_batch
        assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims)
        # calculate Q-values for the states
        self._prepare_network_input(states)
        qvalues = self.model.fprop(self.input, inference = True)
        assert qvalues.shape == (self.output_shape, self.batch_size)
        _logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0])))
        return qvalues.asnumpyarray()[:,0]

    def _copy_theta(self):
        """ Copies the weights of the current network to the target network. """
        _logger.debug("Copying weights")
        pdict = self.model.get_description(get_weights=True, keep_states=True)
        self.target_model.deserialize(pdict, load_states=True)

    def save_weights(self, target_dir, epoch):
        """ Saves the current network parameters to disk.

        Args:
            target_dir (str): Directory where the network parameters are stored for each episode.
            epoch (int): Current epoch.
        """
        filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.net_type.lower()), str(self.args.optimizer.lower()), (epoch + 1))
        self.model.save_params(os.path.join(target_dir, filename))

    def load_weights(self, source_file):
        """ Loads the network parameters from a given file.

        Args:
            source_file (str): Complete path to a file with network parameters.
        """
        self.model.load_params(source_file)
示例#33
0
else:
    rlayer1 = GRU(hidden_size,
                  init,
                  activation=Tanh(),
                  gate_activation=Logistic())
    rlayer2 = GRU(hidden_size,
                  init,
                  activation=Tanh(),
                  gate_activation=Logistic())

layers = [
    rlayer1, rlayer2,
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

learning_rate_sched = Schedule(list(range(10, args.epochs)), .97)
optimizer = RMSProp(gradient_clip_value=gradient_clip_value,
                    stochastic_round=args.rounding,
                    schedule=learning_rate_sched)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# train model
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=args.epochs,
示例#34
0
class DeepQNetwork:
  def __init__(self, num_actions, args):
    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 default_dtype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # create model
    layers = self.createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.optimizer = RMSProp(learning_rate = args.learning_rate, 
        decay_rate = args.rmsprop_decay_rate, 
        stochastic_round = args.stochastic_round)

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self.createLayers(num_actions))
      self.save_weights_path = args.save_weights_path
    else:
      self.target_model = self.model

    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.tensor = self.be.empty(self.input_shape)
    self.tensor.lshape = self.input_shape # needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    self.callback = None

  def createLayers(self, num_actions):
    # create network
    init_norm = Gaussian(loc=0.0, scale=0.01)
    layers = []
    # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
    layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin()))
    # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
    layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin()))
    # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
    layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin()))
    # The final hidden layer is fully-connected and consists of 512 rectifier units.
    layers.append(Affine(nout=512, init=init_norm, activation=Rectlin()))
    # The output layer is a fully-connected linear layer with a single output for each valid action.
    layers.append(Affine(nout = num_actions, init = init_norm))
    return layers

  def setTensor(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states, axes = (1, 2, 3, 0))
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.tensor.set(states.copy())
    # normalize network input between 0 and 1
    self.be.divide(self.tensor, 255, self.tensor)

  def train(self, minibatch, epoch):
    # expand components of minibatch
    prestates, actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 4
    assert len(poststates.shape) == 4
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]

    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # HACK: push something through network, so that weights exist
      self.model.fprop(self.tensor)
      # HACK: serialize network to disk and read it back to clone
      filename = os.path.join(self.save_weights_path, "target_network.pkl")
      save_obj(self.model.serialize(keep_states = False), filename)
      self.target_model.load_weights(filename)

    # feed-forward pass for poststates to get Q-values
    self.setTensor(poststates)
    postq = self.target_model.fprop(self.tensor, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    maxpostq = self.be.max(postq, axis=0).asnumpyarray()
    assert maxpostq.shape == (1, self.batch_size)

    # feed-forward pass for prestates
    self.setTensor(prestates)
    preq = self.model.fprop(self.tensor, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    targets = preq.asnumpyarray()

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      if terminals[i]:
        targets[action, i] = float(rewards[i])
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

    # calculate statistics
    if self.callback:
      self.callback.on_train(cost.asnumpyarray()[0,0])

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self.setTensor(states)
    qvalues = self.model.fprop(self.tensor, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # find the action with highest q-value
    actions = self.be.argmax(qvalues, axis = 0)
    assert actions.shape == (1, self.batch_size)

    # take only the first result
    return actions.asnumpyarray()[0,0]

  def getMeanQ(self, states):
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self.setTensor(states)
    qvalues = self.model.fprop(self.tensor, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    
    # take maximum Q-value for each state
    actions = self.be.max(qvalues, axis = 0)
    assert actions.astensor().shape == (1, self.batch_size)
    
    # calculate mean Q-value of all states
    meanq = self.be.mean(actions, axis = 1)
    assert meanq.astensor().shape == (1, 1)

    # return the mean
    return meanq.asnumpyarray()[0,0]

  def load_weights(self, load_path):
    self.model.load_weights(load_path)

  def save_weights(self, save_path):
    save_obj(self.model.serialize(keep_states = True), save_path)
示例#35
0
class DeepQNetwork:
  def __init__(self, state_size, num_steers, num_speeds, args):
    # remember parameters
    self.state_size = state_size
    self.num_steers = num_steers
    self.num_speeds = num_speeds
    self.num_actions = num_steers + num_speeds
    self.num_layers = args.hidden_layers
    self.hidden_nodes = args.hidden_nodes
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.clip_error = args.clip_error

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.state_size, self.batch_size)
    self.input = self.be.empty(self.input_shape)
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    self.model = Model(layers = self._createLayers())
    self.cost = GeneralizedCost(costfunc = SumSquared())
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate, 
          decay_rate = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate, 
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate, 
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers())
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

  def _createLayers(self):
    # create network
    init_norm = Gaussian(loc=0.0, scale=0.01)
    layers = []
    for i in xrange(self.num_layers):
        layers.append(Affine(nout=self.hidden_nodes, init=init_norm, activation=Rectlin()))
    layers.append(Affine(nout=self.num_actions, init = init_norm))
    return layers

  def _setInput(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states)
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    #self.be.divide(self.input, 200, self.input)

  def train(self, minibatch, epoch = 0):
    # expand components of minibatch
    prestates, steers, speeds, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 2
    assert len(poststates.shape) == 2
    assert len(steers.shape) == 1
    assert len(speeds.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == steers.shape[0] == speeds.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]

    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # HACK: serialize network to disk and read it back to clone
      filename = self.save_weights_prefix + "_target.pkl"
      save_obj(self.model.serialize(keep_states = False), filename)
      self.target_model.load_weights(filename)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    postq = postq.asnumpyarray()
    maxsteerq = np.max(postq[:self.num_steers,:], axis=0)
    assert maxsteerq.shape == (self.batch_size,), "size: %s" % str(maxsteerq.shape)
    maxspeedq = np.max(postq[-self.num_speeds:,:], axis=0)
    assert maxspeedq.shape == (self.batch_size,)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    # HACK: copy() was needed to make it work on CPU
    targets = preq.asnumpyarray().copy()

    # update Q-value targets for actions taken
    for i, (steer, speed) in enumerate(zip(steers, speeds)):
      if terminals[i]:
        targets[steer, i] = float(rewards[i])
        targets[self.num_steers + speed, i] = float(rewards[i])
      else:
        targets[steer, i] = float(rewards[i]) + self.discount_rate * maxsteerq[i]
        targets[self.num_steers + speed, i] = float(rewards[i]) + self.discount_rate * maxspeedq[i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 2 * self.batch_size, str(np.count_nonzero(deltas.asnumpyarray()))

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)
    #print "cost:", cost.asnumpyarray()

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    '''
    if np.any(rewards < 0):
        preqq = preq.asnumpyarray().copy()
        self._setInput(prestates)
        qvalues = self.model.fprop(self.input, inference = True).asnumpyarray().copy()
        indexes = rewards < 0
        print "indexes:", indexes
        print "preq:", preqq[:, indexes].T
        print "preq':", qvalues[:, indexes].T
        print "diff:", (qvalues[:, indexes]-preqq[:, indexes]).T
        print "steers:", steers[indexes]
        print "speeds:", speeds[indexes]
        print "rewards:", rewards[indexes]
        print "terminals:", terminals[indexes]
        print "preq[0]:", preqq[:, 0]
        print "preq[0]':", qvalues[:, 0]
        print "diff:", qvalues[:, 0] - preqq[:, 0]
        print "deltas:", deltas.asnumpyarray()[:, indexes].T
        raw_input("Press Enter to continue...")
    '''

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == (self.batch_size, self.state_size)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_weights(load_path)

  def save_weights(self, save_path):
    save_obj(self.model.serialize(keep_states = True), save_path)
示例#36
0
                                          wdecay=args.weight_decay,
                                          schedule=weight_sched,
                                          stochastic_round=args.rounding)
        opt_biases = GradientDescentMomentum(args.rate_init[1],
                                             args.momentum[1],
                                             schedule=weight_sched,
                                             stochastic_round=args.rounding)
        opt_fixed = GradientDescentMomentum(0.0, 1.0, wdecay=0.0)
        opt = MultiOptimizer({
            'default': opt_gdm,
            'Bias': opt_biases,
            'DOG': opt_fixed
        })

        # configure cost and test metrics
        cost = GeneralizedCost(costfunc=(CrossEntropyBinary() \
            if train.parser.independent_labels else CrossEntropyMulti()))
        metric = EMMetric(
            oshape=test.parser.oshape,
            use_softmax=not train.parser.independent_labels) if test else None

        # configure callbacks
        if not args.neon_progress:
            args.callback_args['progress_bar'] = False
        callbacks = Callbacks(model,
                              eval_set=test,
                              metric=metric,
                              **args.callback_args)
        if not args.neon_progress:
            callbacks.add_callback(EMEpochCallback(
                args.callback_args['eval_freq'], train.nmacrobatches),
                                   insert_pos=None)
示例#37
0
class ModelRunnerNeon():
    def __init__(self, args, max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem

        self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2],
                            batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape  # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension,
                                                dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)

        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':  # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                  beta_2=args.rms_decay,
                                  learning_rate=args.learning_rate)
        else:  # Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                     learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True

    def get_initializer(self, input_size):
        dnnInit = self.args.dnn_initializer
        if dnnInit == 'xavier':
            initializer = Xavier()
        elif dnnInit == 'fan_in':
            std_dev = 1.0 / math.sqrt(input_size)
            initializer = Uniform(low=-std_dev, high=std_dev)
        else:
            initializer = Gaussian(0, 0.01)
        return initializer

    def create_layers(self, max_action_no):
        layers = []

        initializer = self.get_initializer(input_size=4 * 8 * 8)
        layers.append(
            Conv(fshape=(8, 8, 32),
                 strides=4,
                 init=initializer,
                 bias=initializer,
                 activation=Rectlin()))

        initializer = self.get_initializer(input_size=32 * 4 * 4)
        layers.append(
            Conv(fshape=(4, 4, 64),
                 strides=2,
                 init=initializer,
                 bias=initializer,
                 activation=Rectlin()))

        initializer = self.get_initializer(input_size=64 * 3 * 3)
        layers.append(
            Conv(fshape=(3, 3, 64),
                 strides=1,
                 init=initializer,
                 bias=initializer,
                 activation=Rectlin()))

        initializer = self.get_initializer(input_size=7 * 7 * 64)
        layers.append(
            Affine(nout=512,
                   init=initializer,
                   bias=initializer,
                   activation=Rectlin()))

        initializer = self.get_initializer(input_size=512)
        layers.append(
            Affine(nout=max_action_no, init=initializer, bias=initializer))

        return layers

    def clip_reward(self, reward):
        if reward > self.args.clip_reward_high:
            return self.args.clip_reward_high
        elif reward < self.args.clip_reward_low:
            return self.args.clip_reward_low
        else:
            return reward

    def set_input(self, data):
        if self.use_gpu_replay_mem:
            self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0))
            self.input[:] = self.input_uint8 / 255
        else:
            self.input.set(data.transpose(1, 2, 3, 0).copy())
            self.be.divide(self.input, 255, self.input)

    def predict(self, history_buffer):
        self.set_input(history_buffer)
        output = self.train_net.fprop(self.input, inference=True)
        return output.T.asnumpyarray()[0]

    def print_weights(self):
        pass

    def train(self, minibatch, replay_memory, learning_rate, debug):
        if self.args.prioritized_replay == True:
            prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch
        else:
            prestates, actions, rewards, poststates, terminals = minibatch

        # Get Q*(s, a) with targetNet
        self.set_input(poststates)
        post_qvalue = self.target_net.fprop(self.input,
                                            inference=True).T.asnumpyarray()

        if self.args.double_dqn == True:
            # Get Q*(s, a) with trainNet
            post_qvalue2 = self.train_net.fprop(
                self.input, inference=True).T.asnumpyarray()

        # Get Q(s, a) with trainNet
        self.set_input(prestates)
        pre_qvalue = self.train_net.fprop(self.input, inference=False)

        label = pre_qvalue.asnumpyarray().copy()
        for i in range(0, self.train_batch_size):
            if self.args.clip_reward:
                reward = self.clip_reward(rewards[i])
            else:
                reward = rewards[i]
            if terminals[i]:
                label[actions[i], i] = reward
            else:
                if self.args.double_dqn == True:
                    max_index = np.argmax(post_qvalue2[i])
                    label[actions[i],
                          i] = reward + self.discount_factor * post_qvalue[i][
                              max_index]
                else:
                    label[actions[i],
                          i] = reward + self.discount_factor * np.max(
                              post_qvalue[i])

        # copy targets to GPU memory
        self.targets.set(label)

        delta = self.cost.get_errors(pre_qvalue, self.targets)

        if self.args.prioritized_replay == True:
            delta_value = delta.asnumpyarray()
            for i in range(self.train_batch_size):
                if debug:
                    print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (
                        i, weights[i], delta_value[actions[i], i],
                        weights[i] * delta_value[actions[i], i])
                replay_memory.update_td(heap_indexes[i],
                                        abs(delta_value[actions[i], i]))
                delta_value[actions[i],
                            i] = weights[i] * delta_value[actions[i], i]
            delta.set(delta_value.copy())

        if self.args.clip_loss:
            self.be.clip(delta, -1.0, 1.0, out=delta)

        self.train_net.bprop(delta)
        self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0)

    def update_model(self):
        # have to serialize also states for batch normalization to work
        pdict = self.train_net.get_description(get_weights=True,
                                               keep_states=True)
        self.target_net.deserialize(pdict, load_states=True)
        #print ('Updated target model')

    def finish_train(self):
        self.running = False

    def load(self, file_name):
        self.train_net.load_params(file_name)
        self.update_model()

    def save(self, file_name):
        self.train_net.save_params(file_name)
示例#38
0
    ]
else:
    layers = [
        LSTM(recurrent_units,
             init,
             activation=Logistic(),
             gate_activation=Tanh(),
             reset_cells=True),
        RecurrentLast(),
        Affine(train_set.nfeatures, init, bias=init, activation=Identity())
    ]

model = Model(layers=layers)

# cost and optimizer
cost = GeneralizedCost(MeanSquared())
optimizer = RMSProp(stochastic_round=args.rounding)

callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# fit model
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)

# =======visualize how the model does on validation set==============
# run the trained model on train and valid dataset and see how the outputs
# match
train_output = model.get_outputs(train_set).reshape(-1, train_set.nfeatures)
示例#39
0
class DeepQNetwork:
  def __init__(self, num_actions, args):
    # remember parameters
    self.num_actions = num_actions
    self.batch_size = args.batch_size
    self.discount_rate = args.discount_rate
    self.history_length = args.history_length
    self.screen_dim = (args.screen_height, args.screen_width)
    self.clip_error = args.clip_error
    self.min_reward = args.min_reward
    self.max_reward = args.max_reward
    self.batch_norm = args.batch_norm

    # create Neon backend
    self.be = gen_backend(backend = args.backend,
                 batch_size = args.batch_size,
                 rng_seed = args.random_seed,
                 device_id = args.device_id,
                 datatype = np.dtype(args.datatype).type,
                 stochastic_round = args.stochastic_round)

    # prepare tensors once and reuse them
    self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
    self.input = self.be.empty(self.input_shape)
    self.input.lshape = self.input_shape # HACK: needed for convolutional networks
    self.targets = self.be.empty((self.num_actions, self.batch_size))

    # create model
    layers = self._createLayers(num_actions)
    self.model = Model(layers = layers)
    self.cost = GeneralizedCost(costfunc = SumSquared())
    # Bug fix
    for l in self.model.layers.layers:
      l.parallelism = 'Disabled'
    self.model.initialize(self.input_shape[:-1], self.cost)
    if args.optimizer == 'rmsprop':
      self.optimizer = RMSProp(learning_rate = args.learning_rate,
          decay_rate = args.decay_rate,
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adam':
      self.optimizer = Adam(learning_rate = args.learning_rate,
          stochastic_round = args.stochastic_round)
    elif args.optimizer == 'adadelta':
      self.optimizer = Adadelta(decay = args.decay_rate,
          stochastic_round = args.stochastic_round)
    else:
      assert false, "Unknown optimizer"

    # create target model
    self.target_steps = args.target_steps
    self.train_iterations = 0
    if self.target_steps:
      self.target_model = Model(layers = self._createLayers(num_actions))
      # Bug fix
      for l in self.target_model.layers.layers:
        l.parallelism = 'Disabled'
      self.target_model.initialize(self.input_shape[:-1])
      self.save_weights_prefix = args.save_weights_prefix
    else:
      self.target_model = self.model

    self.callback = None

  def _createLayers(self, num_actions):
    # create network
    init_norm = Gaussian(loc=0.0, scale=0.01)
    layers = []
    # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
    layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
    layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
    layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # The final hidden layer is fully-connected and consists of 512 rectifier units.
    layers.append(Affine(nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm))
    # The output layer is a fully-connected linear layer with a single output for each valid action.
    layers.append(Affine(nout=num_actions, init = init_norm))
    return layers

  def _setInput(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states, axes = (1, 2, 3, 0))
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    self.be.divide(self.input, 255, self.input)

  def train(self, minibatch, epoch):
    # expand components of minibatch
    prestates, actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 4
    assert len(poststates.shape) == 4
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]

    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # have to serialize also states for batch normalization to work
      pdict = self.model.get_description(get_weights=True, keep_states=True)
      self.target_model.deserialize(pdict, load_states=True)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    maxpostq = self.be.max(postq, axis=0).asnumpyarray()
    assert maxpostq.shape == (1, self.batch_size)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    # It seems neccessary for cpu backend.
    targets = preq.asnumpyarray().copy()

    # clip rewards between -1 and 1
    rewards = np.clip(rewards, self.min_reward, self.max_reward)

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      if terminals[i]:
        targets[action, i] = float(rewards[i])
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

    # calculate statistics
    if self.callback:
      self.callback.on_train(cost[0,0])

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_params(load_path)

  def save_weights(self, save_path):
    self.model.save_params(save_path)
示例#40
0
class ModelRunnerNeon():
    def __init__(self, args,  max_action_no, batch_dimension):
        self.args = args
        self.train_batch_size = args.train_batch_size
        self.discount_factor = args.discount_factor
        self.use_gpu_replay_mem = args.use_gpu_replay_mem
        
        self.be = gen_backend(backend='gpu',             
                         batch_size=self.train_batch_size)

        self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0])
        self.input = self.be.empty(self.input_shape)
        self.input.lshape = self.input_shape # HACK: needed for convolutional networks
        self.targets = self.be.empty((max_action_no, self.train_batch_size))

        if self.use_gpu_replay_mem:
            self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8)
            self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8)
        else:
            self.history_buffer = np.zeros(batch_dimension, dtype=np.float32)

        self.train_net = Model(self.create_layers(max_action_no))
        self.cost = GeneralizedCost(costfunc=SumSquared())
        # Bug fix
        for l in self.train_net.layers.layers:
            l.parallelism = 'Disabled'
        self.train_net.initialize(self.input_shape[:-1], self.cost)
        
        self.target_net = Model(self.create_layers(max_action_no))
        # Bug fix
        for l in self.target_net.layers.layers:
            l.parallelism = 'Disabled'
        self.target_net.initialize(self.input_shape[:-1])

        if self.args.optimizer == 'Adam':        # Adam
            self.optimizer = Adam(beta_1=args.rms_decay,
                                            beta_2=args.rms_decay,
                                            learning_rate=args.learning_rate)
        else:		# Neon RMSProp
            self.optimizer = RMSProp(decay_rate=args.rms_decay,
                                            learning_rate=args.learning_rate)

        self.max_action_no = max_action_no
        self.running = True

    def get_initializer(self, input_size):
        dnnInit = self.args.dnn_initializer
        if dnnInit == 'xavier':
            initializer = Xavier()
        elif dnnInit == 'fan_in':
            std_dev = 1.0 / math.sqrt(input_size)
            initializer = Uniform(low=-std_dev, high=std_dev)
        else:
            initializer = Gaussian(0, 0.01)
        return initializer
            
    def create_layers(self, max_action_no):
        layers = []

        initializer = self.get_initializer(input_size = 4 * 8 * 8)
        layers.append(Conv(fshape=(8, 8, 32), strides=4, init=initializer, bias=initializer, activation=Rectlin()))

        initializer = self.get_initializer(input_size = 32 * 4 * 4)
        layers.append(Conv(fshape=(4, 4, 64), strides=2, init=initializer, bias=initializer, activation=Rectlin()))
        
        initializer = self.get_initializer(input_size = 64 * 3 * 3)
        layers.append(Conv(fshape=(3, 3, 64), strides=1, init=initializer, bias=initializer, activation=Rectlin()))
        
        initializer = self.get_initializer(input_size = 7 * 7 * 64)
        layers.append(Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin()))
        
        initializer = self.get_initializer(input_size = 512)
        layers.append(Affine(nout=max_action_no, init=initializer, bias=initializer))
        
        return layers        
        
    def clip_reward(self, reward):
        if reward > self.args.clip_reward_high:
            return self.args.clip_reward_high
        elif reward < self.args.clip_reward_low:
            return self.args.clip_reward_low
        else:
            return reward

    def set_input(self, data):
        if self.use_gpu_replay_mem:
            self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0))
            self.input[:] = self.input_uint8 / 255
        else:
            self.input.set(data.transpose(1, 2, 3, 0).copy())
            self.be.divide(self.input, 255, self.input)

    def predict(self, history_buffer):
        self.set_input(history_buffer)
        output  = self.train_net.fprop(self.input, inference=True)
        return output.T.asnumpyarray()[0]            

    def print_weights(self):
        pass

    def train(self, minibatch, replay_memory, learning_rate, debug):
        if self.args.prioritized_replay == True:
            prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch
        else:
            prestates, actions, rewards, poststates, terminals = minibatch
        
        # Get Q*(s, a) with targetNet
        self.set_input(poststates)
        post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray()
        
        if self.args.double_dqn == True:
            # Get Q*(s, a) with trainNet
            post_qvalue2 = self.train_net.fprop(self.input, inference=True).T.asnumpyarray()
        
        # Get Q(s, a) with trainNet
        self.set_input(prestates)
        pre_qvalue = self.train_net.fprop(self.input, inference=False)
        
        label = pre_qvalue.asnumpyarray().copy()
        for i in range(0, self.train_batch_size):
            if self.args.clip_reward:
                reward = self.clip_reward(rewards[i])
            else:
                reward = rewards[i]
            if terminals[i]:
                label[actions[i], i] = reward
            else:
                if self.args.double_dqn == True:
                    max_index = np.argmax(post_qvalue2[i])
                    label[actions[i], i] = reward + self.discount_factor* post_qvalue[i][max_index]
                else:
                    label[actions[i], i] = reward + self.discount_factor* np.max(post_qvalue[i])

        # copy targets to GPU memory
        self.targets.set(label)
    
        delta = self.cost.get_errors(pre_qvalue, self.targets)
        
        if self.args.prioritized_replay == True:
            delta_value = delta.asnumpyarray()
            for i in range(self.train_batch_size):
                if debug:
                    print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i]) 
                replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i]))
                delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i]
            delta.set(delta_value.copy())
          
        if self.args.clip_loss:
            self.be.clip(delta, -1.0, 1.0, out = delta)
                
        self.train_net.bprop(delta)
        self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0)

    def update_model(self):
        # have to serialize also states for batch normalization to work
        pdict = self.train_net.get_description(get_weights=True, keep_states=True)
        self.target_net.deserialize(pdict, load_states=True)
        #print ('Updated target model')

    def finish_train(self):
        self.running = False
    
    def load(self, file_name):
        self.train_net.load_params(file_name)
        self.update_model()
        
    def save(self, file_name):
        self.train_net.save_params(file_name)