Exemplo n.º 1
0
def fit(model_str,
        data_s,
        nb_train,
        nb_test,
        offset,
        nb_epoch=10,
        batch_size=32,
        custom_objects=None,
        callbacks=None,
        cuts_shutd=True):
    """A function to train models given datasets,a serialized model,
    custom objects, callbacks, nb_epochs and batch size

    Args:
        model_str(str): the model dumped with the `to_json` method
        data(str): the path of the dataset to loads
        nb_train(int): the number of train datapoints to take
        nb_test(int): the number of test datapoints to take
        offset(int): how many datapoints to burn
        weights(numpy.array): an array of weights of the size of
            the training set.
        nb_epoch(int, optionnal): the number of epochs to train the model
        batch_size(int, optionnal): the batch size of the mini batches
        callbacks(list): a list of callbacks used in the fit

    Returns:
        the unique id of the model"""

    from utils import sliced
    from databasesetup import get_models
    from datetime import datetime
    import hashlib
    import json
    import numpy as np

    if custom_objects == None:
        custom_objects = []
    if callbacks == None:
        callbacks = []
    # convert string to json
    model_json = model_str

    # get the models collection
    models = get_models()

    # load data
    data_path = data_s.pop('data_path')
    elec = data_s.pop('elec')
    cell = data_s.pop('cell')
    if data_path != 'test':
        root = data_s.pop('root')

        reader = dr.DataReader(data_path, root)
        reader.get_files_info()
        data = reader.commit(data_s)
    else:
        cuts_shutd = False
        data = data_s.copy()
        data['current'] = []
        data['bins'] = []
        root = None
        data_s = {key: data_s['data'][key].tolist() for key in data_s['data']}

    # slice data
    beg, endt, endv = sliced(data['data'], nb_train, nb_test, offset)
    data_t = {n: data['data'][n][beg:endt] for n in data['data']}
    data_val = {n: data['data'][n][endt:endv] for n in data['data']}
    current_t = data['current'][beg:endt]
    current_val = data['current'][endt:endv]
    bins_t = data['bins'][beg:endt]
    bins_val = data['bins'][endt:endv]

    # cuts the data by shutdown
    if cuts_shutd == True:
        datas = dr.make_datasets(data_t, current_t, bins_t)
        datas_val = dr.make_datasets(data_val, current_val, bins_val)
        diff = len(datas) - len(datas_val)
        if diff > 0:
            datas_val += [datas_val[-1] for i in range(diff)]
    else:
        datas = [data_t]
        datas_val = [data_val]

    # TODO: implement cut dataset to match batch sizes
    if 'statefull' in model_json:
        pass
    # get a unique descriptor of the db
    first = data_t.keys()[0]
    un_data = data_t[first].mean()

    # create the hash from the stringified json
    m = hashlib.md5()
    m.update(json.dumps(model_str) + str(un_data) + str(batch_size))
    hexdi = m.hexdigest()

    params_dump = "/parameters_h5/" + hexdi + '.h5'

    # update the full json
    full_json = {'keras_model': model_json,
                 'datetime': datetime.now(),
                 'hashed_mod': hexdi,
                 'data_id': str(un_data),
                 'params_dump': params_dump,
                 'batch_size': batch_size,
                 'trained': 0,
                 'cell': cell,
                 'elec': elec,
                 'data_path': data_path,
                 'root': root,
                 'data_s': data_s}
    mod_id = models.insert_one(full_json).inserted_id

    try:
        loss, val_loss, model = train_model(model_str, custom_objects, datas,
                                            datas_val, batch_size, nb_epoch,
                                            callbacks)
        upres = models.update({"_id": mod_id}, {'$set': {
            'train_loss': loss,
            'min_tloss': np.min(loss),
            'valid_loss': val_loss,
            'min_vloss': np.min(val_loss),
            'iter_stopped': nb_epoch * len(datas),
            'trained': 1,
            'date_finished_trained': datetime.now()
        }})

        model.save_weights(params_dump, overwrite=True)

    except MemoryError as e:
        models.delete_one({'hashed_mod': hexdi})
        raise

    except Exception as e:
        upres = models.update({"_id": mod_id}, {'$set': {'error': 1}})
        raise
    return hexdi
Exemplo n.º 2
0
 def hosts_for_domain(self, domain):
     return map(
         lambda x: sliced(x, 1),
         self.hosts_by_domain()[dots(domain)]
     )
Exemplo n.º 3
0
def dot_(string, *args):
    return sliced(string, *args) or string