示例#1
0
    def __init__(self, dataset_name, model_name, optimizer_name, trial_num):
        """
        :param dataset_name: name of the dataset
        :type dataset_name: str
        :param model_name: name of the model
        :type model_name: str
        :param optimizer_name: name of the optimizer
        :type optimizer_name: str
        :param trial_num: current number of repeated trials
        :type trial_num: int
        """
        # get optimized hyperparameters
        with open(
                f'../params/{dataset_name}_{model_name}_{optimizer_name}/result.json'
        ) as f:
            params = json.load(f)

        # get instances
        self.dataset = Datasets.get(dataset_name)
        self.model = Models.get(model_name, dataset=self.dataset)
        self.optimizer = Optimizers.get(optimizer_name, params=params)

        # get config
        with open('./config.json') as f:
            config = json.load(f)

        # get constants
        c = config['constants'][dataset_name][model_name]
        self.loss = c['loss']
        self.batch_size = c['batch_size']
        self.epochs = c['epochs']

        # configure and initialize directory
        d = self.main_dir = f'../data/{dataset_name}_{model_name}_{optimizer_name}/trial{trial_num}'
        if os.path.exists(d):
            shutil.rmtree(d)
        os.makedirs(d)

        # configure hyperdash experiment
        self.hd_exp = HyperdashExperiment(
            f'{dataset_name}',
            api_key_getter=lambda: config['hyperdash']['api_key'])
        self.hd_exp.param('dataset_name', dataset_name)
        self.hd_exp.param('model_name', model_name)
        self.hd_exp.param('optimizer_name', optimizer_name)
        self.hd_exp.param('trial_num', trial_num)

        for k, v in params.items():
            self.hd_exp.param(k, v)

        # set callbacks
        self.callbacks = [
            Hyperdash(['accuracy', 'loss', 'val_accuracy', 'val_loss'],
                      self.hd_exp),
            TensorBoard(log_dir=f'{self.main_dir}/tensorboard'),
            TimeLogger(filename=f'{self.main_dir}/time.csv'),
            CSVLogger(filename=f'{self.main_dir}/result.csv', append=True)
        ]
示例#2
0
    def objective(self, params):
        """
        objective function to optimize

        :param params: hyperparamters for optimizer
        :return: maximum validation accuracy
        :rtype: float
        """
        # get instances
        dataset = Datasets.get(self.dataset_name)
        model = Models.get(self.model_name, dataset=dataset)
        optimizer = Optimizers.get(self.optimizer_name, params=params)

        # configure hyperdash experiment
        hd_exp = HyperdashExperiment(
            f'{self.dataset_name}',
            api_key_getter=lambda: self.config['hyperdash']['api_key'])
        hd_exp.param('dataset_name', self.dataset_name)
        hd_exp.param('model_name', self.model_name)
        hd_exp.param('optimizer_name', self.optimizer_name)

        for k, v in params.items():
            hd_exp.param(k, v)

        # set callbacks
        callbacks = [
            Hyperdash(['accuracy', 'loss', 'val_accuracy', 'val_loss'],
                      hd_exp),
            EarlyStopping('val_accuracy',
                          patience=10,
                          min_delta=0.01,
                          verbose=1),
            TerminateOnNaN()
        ]

        # get data
        (x_train, y_train), *_ = dataset.get_batch()

        # start learning
        model.compile(loss=self.loss,
                      optimizer=optimizer,
                      metrics=['accuracy'])
        history = model.fit(x_train,
                            y_train,
                            batch_size=self.batch_size,
                            epochs=self.epochs,
                            callbacks=callbacks,
                            validation_split=0.2,
                            verbose=2)

        # stop hyperdash experiment
        hd_exp.end()

        # return maximum validation accuracy
        val_accuracy = np.array(history.history['val_accuracy'])
        return max(val_accuracy) * (-1)
示例#3
0
    def objective(self, params):
        """
        objective function to optimize

        :param params: hyperparamters for optimizer
        :return: return value of `experiment.begin()`
        :rtype: float
        """
        # get instances
        benchmark = Benchmarks.get(self.benchmark_name)
        optimizer = Optimizers.get(self.optimizer_name,
                                   benchmark=benchmark,
                                   params=params)

        # initialize coordinates
        # random seed is set to 0
        np.random.seed(0)
        coords = np.array(
            [np.random.rand(100).astype(np.float) * 10 - 5 for _ in range(2)])
        optimum = np.array(benchmark.optimum).reshape(2, 1)

        # update coordinates
        dists_mean_min = np.inf
        wait = 0
        patience = 10
        for i in range(10000):
            coords = optimizer.update(coords)
            if i % 100 == 0:
                dists = (np.sum(coords - optimum, axis=0)**2.0)**0.5
                # terminate on nan
                if np.any(np.isnan(dists)):
                    break
                # early stopping
                if np.mean(dists) > dists_mean_min:
                    wait += 1
                    if wait > patience:
                        break
                else:
                    wait = 0
                    dists_mean_min = np.mean(dists)

        # return minimum distance in log 10
        return np.log10(dists_mean_min)
示例#4
0
    def __init__(self, benchmark_name, optimizer_name):
        """
        :param benchmark_name: name of the benchmark
        :type benchmark_name: str
        :param optimizer_name: name of the optimizer
        :type optimizer_name: str
        """
        # get optimized hyperparameters
        with open(f'../params/{benchmark_name}_{optimizer_name}/result.json') as f:
            params = json.load(f)

        # get instances
        self.benchmark = Benchmarks.get(benchmark_name)
        self.optimizer = Optimizers.get(optimizer_name, benchmark=self.benchmark, params=params)

        # configure and initialize directory
        d = self.main_dir = f'../data/{benchmark_name}_{optimizer_name}'
        if os.path.exists(d):
            shutil.rmtree(d)
        os.makedirs(d)
示例#5
0
    def __init__(self, name, hyper, load_params=False):

        if load_params:
            try:
                with open(name + '.params', 'rb') as f:
                    model_values, hyper, curves = pickle.load(f)
            except IOError as e:
                print("Error opening file: ", e)
        else:
            model_values = {}
            curves = {
                'CD error': [],
                'MSE 1': [],
                'MSE 2': [],
                'log likelihood': [],
                'validation error': []
            }
            std_err = {}

        # initialize random number generator
        self.np_rng = np.random.RandomState(hyper['seed'])
        self.theano_rng = RandomStreams(hyper['seed'])

        self.name = name
        self.model_values = model_values
        self.hyperparameters = hyper
        self.monitoring_curves = curves
        self.params = OrderedDict()
        self.params_shp = OrderedDict()

        # Optimizer
        opt = Optimizers()
        if hyper['learner'] == 'amsgrad':
            self.update_opt = opt.adam_updates
        elif hyper['learner'] == 'momentum':
            self.update_opt = opt.momentum_updates
        elif hyper['learner'] == 'rmsprop':
            self.update_opt = opt.rmsprop_updates
        else:
            self.update_opt = opt.sgd_updates
示例#6
0
    def __init__(self, name, hyper, load_params=False):

        if load_params:
            try:
                with open(name + '.params', 'rb') as f:
                    model_values, hyper, curves = pickle.load(f)
            except IOError as e:
                print("Error opening file: ", e)
        else:
            model_values = {}
            curves = {'CD error': [], 'log likelihood': []}

        # initialize random number generator
        self.np_rng = np.random.RandomState(hyper['seed'])
        self.theano_rng = T.shared_randomstreams.RandomStreams(hyper['seed'])

        self.name = name
        self.model_values = model_values
        self.hyperparameters = hyper
        self.monitoring_curves = curves
        self.model_params = OrderedDict()
        self.model_params_shapes = OrderedDict()
        self.opt = Optimizers()
示例#7
0
文件: test.py 项目: 2226171237/NN
                self.y[indices[have_train_num:]], self.class_num)


train_data = DataLoader(x_train, y_train, 3)
test_data = DataLoader(x_test, y_test, 3)

# 构建网络
model = NeuralNetwork()
model.add_layer(Layer(4, 8, 'relu'))
model.add_layer(Layer(8, 8, 'relu'))
model.add_layer(Layer(8, 3))

# 构建损失函数和优化器
lr = 0.01
loss = Loss(loss='cross_entropy_with_logits')
optimizer = Optimizers(optimizer='sgd', learning_rate=lr)
model.compile(loss=loss, optimizer=optimizer)

# 训练数据
num_epochs = 1600
batch_size = 64
train_loss = []
test_loss = []
for epoch in range(num_epochs):
    for x, y in train_data.get_batch(batch_size):
        loss = model.fit(x, y)
    train_loss.append(loss)
    t_loss, n, right_num = 0., 0, 0
    for x, y in test_data.get_batch(batch_size, shuffle=False):
        y_pred = model(x)
        right_num += np.sum(
示例#8
0
def main(data):
    # optimizer
    opt = Optimizers()

    # sampler
    theano_rng = RandomStreams(999)

    # import dataset
    n_samples = data.attrs['n_rows']
    lr = 1e-3
    batch_size = 128

    x_data = [
        data['purpose'], data['avg_speed'], data['duration'], data['trip_km'],
        data['n_coord'], data['interval'], data['dow'], data['startdistrict'],
        data['enddistrict']
    ]

    y_data = [data['mode']]

    params = OrderedDict()
    params_shp = OrderedDict()

    output = []
    input = []
    asc_params = []
    asc_params_m = []
    beta_params_f = []
    beta_params_s = []
    beta_params_sf = []
    beta_params = []
    beta_params_m = []

    for var in y_data:
        name = 'asc_' + var.name.strip('/')
        asc_shp = var['data'][:].squeeze().shape[1:]
        print('y', name, asc_shp)

        output.append(init_tensor((), name))

        mask = np.ones(asc_shp, DTYPE_FLOATX)
        mask[-1] = 0.
        asc_value = np.zeros(asc_shp, DTYPE_FLOATX) * mask

        asc_params.append(shared(asc_value, name))
        asc_params_m.append(shared(mask, name + '_mask'))

        params[name] = asc_params[-1]
        params_shp[name] = asc_shp

    for var in x_data:
        name = 'beta_' + var.name.strip('/')
        shp = var['data'].shape[1:] + asc_shp
        print('x', name, shp)

        input.append(init_tensor(var['data'].shape[1:], name))

        mask = np.ones(shp, DTYPE_FLOATX)
        mask[..., -1] = 0.
        mask = mask.flatten()
        beta_value = np.zeros(np.prod(shp), DTYPE_FLOATX) * mask
        sigma_value = np.ones(np.prod(shp), DTYPE_FLOATX) * mask

        beta_params_f.append(shared(beta_value, name))
        beta_params_sf.append(shared(sigma_value, name + '_sigma'))

        beta_params.append(T.reshape(beta_params_f[-1], shp))
        beta_params_s.append(T.reshape(beta_params_sf[-1], shp))
        beta_params_m.append(shared(mask, name + '_mask'))

        params[name] = beta_params_f[-1]
        params[name + '_sigma'] = beta_params_sf[-1]
        params_shp[name] = shp
        params_shp[name + '_sigma'] = shp

    # compute the utility function
    utility = 0.
    h_utility = 0.
    for x, b, s in zip(input, beta_params, beta_params_s):

        normal_sample = b[..., None] + T.sqr(s)[..., None] * theano_rng.normal(
            size=b.eval().shape + (1, ), avg=0., std=1., dtype=DTYPE_FLOATX)

        ax = [np.arange(x.ndim)[1:], np.arange(b.ndim)[:-1]]
        utility += T.tensordot(x, normal_sample, axes=ax)
        if x.ndim > 2:
            h_utility += T.tensordot(x, b + T.sqr(s), axes=[[1, 2], [0, 1]])
        else:
            h_utility += T.tensordot(x, b + T.sqr(s), axes=[[1], [0]])

    for y, asc in zip(output, asc_params):
        utility += asc[None, ..., None]
        h_utility += asc
        (d1, d2, d3) = utility.shape
        utility = utility.reshape((d1 * d3, d2))
        p_y_given_x = T.nnet.softmax(utility)

        hessian_prob = T.nnet.softmax(h_utility)  #!
        hessian_nll = T.log(hessian_prob)
        hessian_cr = hessian_nll[T.arange(y.shape[0]), y]
        hessian_cost = -T.sum(hessian_cr)

        nll = T.log(p_y_given_x).reshape((d3, d1, d2))
        nll = nll[:, T.arange(y.shape[0]), y]
        cost = -T.sum(T.mean(nll, axis=0))

    gparams = asc_params + beta_params_f + beta_params_sf
    grads = T.grad(cost, gparams)

    # mask gradient updates
    mask = asc_params_m + beta_params_m + beta_params_m
    for j, g in enumerate(grads):
        grads[j] = g * mask[j]

    # create list of updates to iterate over
    updates = opt.sgd_updates(gparams, grads, lr)

    # symbolic equation for the Hessian function
    stderrs = []
    hessian = T.hessian(cost=hessian_cost, wrt=gparams)
    stderr = [T.sqrt(f) for f in [T.diag(2. / h) for h in hessian]]
    stderrs.extend(stderr)

    tensors = input + output
    shared_x = [shared(var['data'][:], borrow=True) for var in x_data]
    shared_y = [T.cast(shared(var['label'][:]), 'int32') for var in y_data]
    shared_variables = shared_x + shared_y

    i = T.lscalar('index')
    start_idx = i * batch_size
    end_idx = (i + 1) * batch_size

    print('constructing Theano computational graph...')

    train = theano.function(
        inputs=[i],
        outputs=cost,
        updates=updates,
        givens={
            key: val[start_idx:end_idx]
            for key, val in zip(tensors, shared_variables)
        },
        name='train',
        allow_input_downcast=True,
    )

    std_err = theano.function(
        inputs=[],
        outputs=stderrs,
        givens={key: val[:]
                for key, val in zip(tensors, shared_variables)},
        name='std errors',
        allow_input_downcast=True,
    )

    # train model
    print('training the model...')
    curves = []
    n_batches = n_samples // batch_size
    epochs = 100
    epoch = 0
    t0 = time.time()
    while epoch < epochs:
        epoch += 1
        cost = []
        for i in range(n_batches):
            cost_items = train(i)
            cost.append(cost_items)

        epoch_cost = np.sum(cost)
        curves.append((epoch, epoch_cost))
        minutes, seconds = divmod(time.time() - t0, 60.)
        hours, minutes = divmod(minutes, 60.)
        print(("epoch {0:d} loglikelihood "
               "{1:.3f} time {hh:02d}:{mm:02d}:{ss:05.2f}").format(
                   epoch,
                   epoch_cost,
                   hh=int(hours),
                   mm=int(minutes),
                   ss=seconds))

        if (epoch % 5) == 0:
            print('checkpoint')
            param_values = {}
            for name, param in params.items():
                param_shp = params_shp[name]
                param_values[name] = param.eval().reshape(param_shp)
                np.savetxt('params/{}.csv'.format(name),
                           param_values[name].squeeze(),
                           fmt='%.3f',
                           delimiter=',')

            to_file = param_values, curves
            path = 'params/epoch_{0:d}.params'.format(epoch)
            with open(path, 'wb') as f:
                pickle.dump(to_file, f, protocol=pickle.HIGHEST_PROTOCOL)

    # save parameters and stderrs to .csv
    stderrs = std_err()
    params_list = [p for p in asc_params + beta_params_f + beta_params_sf]
    param_names = [p.name for p in asc_params + beta_params_f + beta_params_sf]
    for se, param, name in zip(stderrs, params_list, param_names):
        v = param.eval().squeeze()
        shp = v.shape
        path = 'params/stderrs_{}.csv'.format(name)
        np.savetxt(path, se.reshape(shp), fmt='%.3f', delimiter=',')
        path = 'params/tstat_{}.csv'.format(name)
        np.savetxt(path, v / se.reshape(shp), fmt='%.3f', delimiter=',')
示例#9
0
    def __init__(self, experiment):
        self.experiment = experiment

        processing = experiment['processing']

        #config dataset
        self.dataset = DatasetFactory(
            name=experiment['dataset'],
            flat=processing['flat'],
            concat=processing['concat'],
            expand=processing['expand'],
            normalize=processing['normalize'],
        )
        
        #config state of the experiment
        self.state = LoaderState(
            id_exp=self.experiment_name(),
            epochs=experiment['epochs'],
            dataset=self.dataset,
            valid_exp=experiment['exp'],
            url=experiment['dir']
        ).state


        #compiler parameters
        optimizer = experiment['optimizer']
        opt_params = experiment['opt_params']
        loss = experiment['loss']
        metrics = [m for m in experiment['metrics'] if m != 'f1_score']
        history_metrics = [m.lower() for m in experiment['metrics'] if m != 'f1_score']
        metrics.append(f1_score)

        self.compiler_params = dict([
            ('optimizer', Optimizers(optimizer, opt_params).optimizer()),
            ('loss', loss),
            ('metrics', metrics)
        ])

        #Config training
        callbacks = []

        history_metrics.insert(0, 'loss')
        history_metrics.append('f1_score')

        cp = [m for m in history_metrics]

        for m in cp:
            history_metrics.append('val_' + m)

        callbacks.append(HistoryCheckpoint(
            self.experiment['dir'],
            self.state,
            history_metrics))
            
        callbacks.append(WeightsCheckpoint(self.experiment['dir'], self.state))

        
        
        if experiment['decay']:
            callbacks.append(ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=50, min_lr=0.1e-3))

        datagen = None
        if experiment['data_augmentation']:
            datagen = ImageDataGenerator(width_shift_range=0.1,
                                        height_shift_range=0.1,
                                        horizontal_flip=True)
        
        self.trainner = Trainner(
            epochs=experiment['epochs'],
            batch_size=experiment['batch'], 
            data_augmentation=datagen, 
            callbacks=callbacks, 
            dir_path=experiment['dir'],
            state=self.state
        )
示例#10
0
                                                     data['valid']['X'], data['valid']['Y'],\
                                                     data['test']['X'], data['test']['Y'],

# Initialize network
np.random.seed(1234)
network = Network(num_hidden,
                  sizes,
                  activation_choice=activation,
                  output_choice=output_choice,
                  loss_choice=loss)
model_name = '{}-{}-{}-{}-{}-{}-{}-{}.npy'.format(
    num_hidden, ','.join([str(word) for word in sizes]), activation,
    output_choice, batch_size, loss, opt, lr)
if pretrained_path != None:
    network.load(path=pretrained_path)
optimizer = Optimizers(network.theta.shape[0], opt, lr, momentum)

# Train
print 'Training has started'
num_epochs = 20
num_batches = int(float(train_X.shape[1]) / batch_size)
steps = 0
lr_min = 0.00001
loss_history = [np.inf]
prev_loss = np.inf
indices = np.arange(train_X.shape[1])
for epoch in range(num_epochs):
    steps = 0
    np.random.shuffle(indices)
    train_X, train_Y = train_X[:, indices], train_Y[indices]
    epoch_loss = []
示例#11
0
    def run(self,
            x,
            y,
            epochs,
            batch_size=None,
            lr=0.001,
            optimizer='adam',
            rho1=0.9,
            rho2=0.999,
            shuffle=True):
        """
        Description
        -----------
        MLP Training.

        Parameters
        ----------
        x : ndarray
        y : ndarray
        epochs : int
        batch_size : int
        optmizer : str, optional
                  Arguments - 'sgd', 'sgd_momentum', 'adagrad', 'rmsprop' or 'adam'.
        lr : float.
        rho1 : float, optional
              first moment
        rho2 : float, optional
              second moment
        shuffle : bool, optional
        
        Returns
        -------
        history : list
                 Loss funcion values throughout the training.
        """

        # if the batch_size was not informed
        if batch_size == None:
            batch_size = self.__n_images

        # initializing the optimizer
        opt = Optimizers(optimizer, self.__w, self.__n_layers)

        history = []
        # Running the epochs
        for i in range(1, epochs + 1):
            total_loss = 0

            # shuffling the data
            if shuffle:
                x, y = self.__shuffling_data(x, y)

            # indexes for get samples of set of training images
            ini_batch = 0
            end_batch = batch_size

            # interval of batches until reach n_images
            while ini_batch < self.__n_images:

                # Forward pass
                s = self.__forward_pass(x[ini_batch:end_batch])

                # Calculating the Train loss
                total_loss += self.__loss.forward(s, y[ini_batch:end_batch])

                # Backward pass
                self.__backward_pass(self.__loss, y[ini_batch:end_batch], opt,
                                     lr, rho1, rho2, i)

                # updating the indexes of batch
                ini_batch += batch_size
                end_batch += batch_size

                if end_batch > self.__n_images:
                    end_batch = self.__n_images

            # loss history
            print(f'Train_Loss [{i}]: {round(total_loss/self.__n_images, 6)}')
            history.append(total_loss / self.__n_images)

        return history