Пример #1
0
def run(gParameters):
    """
    Runs the model using the specified set of parameters

    Args:
       gParameters: a python dictionary containing the parameters (e.g. epoch)
       to run the model with.
    """
    #
    if 'dense' in gParameters:
        dval = gParameters['dense']
        if type(dval) != list:
            res = list(dval)
            #try:
            #is_str = isinstance(dval, basestring)
            #except NameError:
            #is_str = isinstance(dval, str)
            #if is_str:
            #res = str2lst(dval)
            gParameters['dense'] = res
        print(gParameters['dense'])

    if 'conv' in gParameters:
        #conv_list = p1_common.parse_conv_list(gParameters['conv'])
        #cval = gParameters['conv']
        #try:
        #is_str = isinstance(cval, basestring)
        #except NameError:
        #is_str = isinstance(cval, str)
        #if is_str:
        #res = str2lst(cval)
        #gParameters['conv'] = res
        print('Conv input', gParameters['conv'])
    # print('Params:', gParameters)
    # Construct extension to save model
    ext = benchmark.extension_from_parameters(gParameters, '.keras')
    logfile = gParameters['logfile'] if gParameters[
        'logfile'] else gParameters['output_dir'] + ext + '.log'

    fh = logging.FileHandler(logfile)
    fh.setFormatter(
        logging.Formatter("[%(asctime)s %(process)d] %(message)s",
                          datefmt="%Y-%m-%d %H:%M:%S"))
    fh.setLevel(logging.DEBUG)

    sh = logging.StreamHandler()
    sh.setFormatter(logging.Formatter(''))
    sh.setLevel(logging.DEBUG if gParameters['verbose'] else logging.INFO)

    benchmark.logger.setLevel(logging.DEBUG)
    benchmark.logger.addHandler(fh)
    benchmark.logger.addHandler(sh)
    benchmark.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = candle.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = benchmark.DataLoader(
        seed=seed,
        dtype=gParameters['data_type'],
        val_split=gParameters['val_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    # Initialize weights and learning rule
    initializer_weights = candle.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = candle.build_initializer('constant', kerasDefaults, 0.)

    activation = gParameters['activation']

    # Define model architecture
    gen_shape = None
    out_dim = 1

    model = Sequential()
    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                model.add(
                    Dense(layer,
                          input_dim=loader.input_dim,
                          kernel_initializer=initializer_weights,
                          bias_initializer=initializer_bias))
                if gParameters['batch_normalization']:
                    model.add(BatchNormalization())
                model.add(Activation(gParameters['activation']))
                if gParameters['dropout']:
                    model.add(Dropout(gParameters['dropout']))
    else:  # Build convolutional layers
        gen_shape = 'add_1d'
        layer_list = list(range(0, len(gParameters['conv'])))
        lc_flag = False
        if 'locally_connected' in gParameters:
            lc_flag = True

        for l, i in enumerate(layer_list):
            if i == 0:
                add_conv_layer(model,
                               gParameters['conv'][i],
                               input_dim=loader.input_dim,
                               locally_connected=lc_flag)
            else:
                add_conv_layer(model,
                               gParameters['conv'][i],
                               locally_connected=lc_flag)
            if gParameters['batch_normalization']:
                model.add(BatchNormalization())
            model.add(Activation(gParameters['activation']))
            if gParameters['pool']:
                model.add(MaxPooling1D(pool_size=gParameters['pool']))
        model.add(Flatten())

    model.add(Dense(out_dim))

    # Define optimizer
    optimizer = candle.build_optimizer(gParameters['optimizer'],
                                       gParameters['learning_rate'],
                                       kerasDefaults)

    # Compile and display model
    model.compile(loss=gParameters['loss'], optimizer=optimizer)
    model.summary()
    benchmark.logger.debug('Model: {}'.format(model.to_json()))

    train_gen = benchmark.DataGenerator(
        loader,
        batch_size=gParameters['batch_size'],
        shape=gen_shape,
        name='train_gen',
        cell_noise_sigma=gParameters['cell_noise_sigma']).flow()
    val_gen = benchmark.DataGenerator(loader,
                                      partition='val',
                                      batch_size=gParameters['batch_size'],
                                      shape=gen_shape,
                                      name='val_gen').flow()
    val_gen2 = benchmark.DataGenerator(loader,
                                       partition='val',
                                       batch_size=gParameters['batch_size'],
                                       shape=gen_shape,
                                       name='val_gen2').flow()
    test_gen = benchmark.DataGenerator(loader,
                                       partition='test',
                                       batch_size=gParameters['batch_size'],
                                       shape=gen_shape,
                                       name='test_gen').flow()

    train_steps = int(loader.n_train / gParameters['batch_size'])
    val_steps = int(loader.n_val / gParameters['batch_size'])
    test_steps = int(loader.n_test / gParameters['batch_size'])

    if 'train_steps' in gParameters:
        train_steps = gParameters['train_steps']
    if 'val_steps' in gParameters:
        val_steps = gParameters['val_steps']
    if 'test_steps' in gParameters:
        test_steps = gParameters['test_steps']

    checkpointer = ModelCheckpoint(filepath=gParameters['output_dir'] +
                                   '.model' + ext + '.h5',
                                   save_best_only=True)
    progbar = MyProgbarLogger(train_steps * gParameters['batch_size'])
    loss_history = MyLossHistory(
        progbar=progbar,
        val_gen=val_gen2,
        test_gen=test_gen,
        val_steps=val_steps,
        test_steps=test_steps,
        metric=gParameters['loss'],
        category_cutoffs=gParameters['category_cutoffs'],
        ext=ext,
        pre=gParameters['output_dir'])

    # Seed random generator for training
    np.random.seed(seed)

    candleRemoteMonitor = candle.CandleRemoteMonitor(params=gParameters)

    history = model.fit_generator(
        train_gen,
        train_steps,
        epochs=gParameters['epochs'],
        validation_data=val_gen,
        validation_steps=val_steps,
        verbose=0,
        callbacks=[checkpointer, loss_history, progbar, candleRemoteMonitor],
    )

    benchmark.logger.removeHandler(fh)
    benchmark.logger.removeHandler(sh)

    return history
def main():
    # Get command-line parameters
    parser = get_p1b3_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b3.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b3.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = p1b3.DataLoader(
        seed=seed,
        dtype=gParameters['datatype'],
        val_split=gParameters['validation_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    net = mx.sym.Variable('concat_features')
    out = mx.sym.Variable('growth')

    # Initialize weights and learning rule
    initializer_weights = p1_common_mxnet.build_initializer(
        gParameters['initialization'], kerasDefaults)
    initializer_bias = p1_common_mxnet.build_initializer(
        'constant', kerasDefaults, 0.)
    init = mx.initializer.Mixed(['bias', '.*'],
                                [initializer_bias, initializer_weights])

    activation = gParameters['activation']

    # Define model architecture
    layers = []
    reshape = None

    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                net = mx.sym.FullyConnected(data=net, num_hidden=layer)
                net = mx.sym.Activation(data=net, act_type=activation)
            if gParameters['drop']:
                net = mx.sym.Dropout(data=net, p=gParameters['drop'])
    else:  # Build convolutional layers
        net = mx.sym.Reshape(data=net,
                             shape=(gParameters['batch_size'], 1,
                                    loader.input_dim, 1))
        layer_list = list(range(0, len(args.convolution), 3))
        for l, i in enumerate(layer_list):
            nb_filter = gParameters['conv'][i]
            filter_len = gParameters['conv'][i + 1]
            stride = gParameters['conv'][i + 2]
            if nb_filter <= 0 or filter_len <= 0 or stride <= 0:
                break
            net = mx.sym.Convolution(data=net,
                                     num_filter=nb_filter,
                                     kernel=(filter_len, 1),
                                     stride=(stride, 1))
            net = mx.sym.Activation(data=net, act_type=activation)
            if gParameters['pool']:
                net = mx.sym.Pooling(data=net,
                                     pool_type="max",
                                     kernel=(gParameters['pool'], 1),
                                     stride=(1, 1))
        net = mx.sym.Flatten(data=net)

        reshape = (1, loader.input_dim, 1)
        layer_list = list(range(0, len(gParameters['conv']), 3))
        for l, i in enumerate(layer_list):
            nb_filter = gParameters['conv'][i]
            filter_len = gParameters['conv'][i + 1]
            stride = gParameters['conv'][i + 2]
            # print(nb_filter, filter_len, stride)
            # fshape: (height, width, num_filters).
            layers.append(
                Conv((1, filter_len, nb_filter),
                     strides={
                         'str_h': 1,
                         'str_w': stride
                     },
                     init=initializer_weights,
                     activation=activation))
            if gParameters['pool']:
                layers.append(Pooling((1, gParameters['pool'])))

    net = mx.sym.FullyConnected(data=net, num_hidden=1)
    net = mx.symbol.LinearRegressionOutput(data=net, label=out)

    # Display model
    p1_common_mxnet.plot_network(net, 'net' + ext)

    # Define mxnet data iterators
    train_samples = int(loader.n_train)
    val_samples = int(loader.n_val)

    if 'train_samples' in gParameters:
        train_samples = gParameters['train_samples']
    if 'val_samples' in gParameters:
        val_samples = gParameters['val_samples']

    train_iter = ConcatDataIter(loader,
                                batch_size=gParameters['batch_size'],
                                num_data=train_samples)
    val_iter = ConcatDataIter(loader,
                              partition='val',
                              batch_size=gParameters['batch_size'],
                              num_data=val_samples)

    devices = mx.cpu()
    if gParameters['gpus']:
        devices = [mx.gpu(i) for i in gParameters['gpus']]

    mod = mx.mod.Module(net,
                        data_names=('concat_features', ),
                        label_names=('growth', ),
                        context=devices)

    # Define optimizer
    optimizer = p1_common_mxnet.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    # Seed random generator for training
    mx.random.seed(seed)

    freq_log = 1

    #initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
    mod.fit(train_iter,
            eval_data=val_iter,
            eval_metric=gParameters['loss'],
            optimizer=optimizer,
            num_epoch=gParameters['epochs'],
            initializer=init,
            epoch_end_callback=mx.callback.Speedometer(
                gParameters['batch_size'], 20))
Пример #3
0
def main():
    # Get command-line parameters
    parser = get_p1b3_parser()
    args = parser.parse_args()
    #print('Args:', args)
    # Get parameters from configuration file
    fileParameters = p1b3.read_config_file(args.config_file)
    #print ('Params:', fileParameters)

    # Correct for arguments set by default by neon parser
    # (i.e. instead of taking the neon parser default value fall back to the config file,
    # if effectively the command-line was used, then use the command-line value)
    # This applies to conflictive parameters: batch_size, epochs and rng_seed
    if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv):
        args.batch_size = fileParameters['batch_size']
    if not any("--epochs" in ag or "-e" in ag for ag in sys.argv):
        args.epochs = fileParameters['epochs']
    if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv):
        args.rng_seed = fileParameters['rng_seed']

    # Consolidate parameter set. Command-line parameters overwrite file configuration
    gParameters = p1_common.args_overwrite_config(args, fileParameters)
    print('Params:', gParameters)

    # Determine verbosity level
    loggingLevel = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(level=loggingLevel, format='')
    # Construct extension to save model
    ext = p1b3.extension_from_parameters(gParameters, '.neon')

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = p1_common.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = p1b3.DataLoader(
        seed=seed,
        dtype=gParameters['datatype'],
        val_split=gParameters['validation_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    # Re-generate the backend after consolidating parsing and file config
    gen_backend(backend=args.backend,
                rng_seed=seed,
                device_id=args.device_id,
                batch_size=gParameters['batch_size'],
                datatype=gParameters['datatype'],
                max_devices=args.max_devices,
                compat_mode=args.compat_mode)

    # Initialize weights and learning rule
    initializer_weights = p1_common_neon.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = p1_common_neon.build_initializer(
        'constant', kerasDefaults, 0.)

    activation = p1_common_neon.get_function(gParameters['activation'])()

    # Define model architecture
    layers = []
    reshape = None

    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                layers.append(
                    Affine(nout=layer,
                           init=initializer_weights,
                           bias=initializer_bias,
                           activation=activation))
            if gParameters['drop']:
                layers.append(Dropout(keep=(1 - gParameters['drop'])))
    else:  # Build convolutional layers
        reshape = (1, loader.input_dim, 1)
        layer_list = list(range(0, len(gParameters['conv']), 3))
        for l, i in enumerate(layer_list):
            nb_filter = gParameters['conv'][i]
            filter_len = gParameters['conv'][i + 1]
            stride = gParameters['conv'][i + 2]
            # print(nb_filter, filter_len, stride)
            # fshape: (height, width, num_filters).
            layers.append(
                Conv((1, filter_len, nb_filter),
                     strides={
                         'str_h': 1,
                         'str_w': stride
                     },
                     init=initializer_weights,
                     activation=activation))
            if gParameters['pool']:
                layers.append(Pooling((1, gParameters['pool'])))

    layers.append(
        Affine(nout=1,
               init=initializer_weights,
               bias=initializer_bias,
               activation=neon.transforms.Identity()))

    # Build model
    model = Model(layers=layers)

    # Define neon data iterators
    train_samples = int(loader.n_train)
    val_samples = int(loader.n_val)

    if 'train_samples' in gParameters:
        train_samples = gParameters['train_samples']
    if 'val_samples' in gParameters:
        val_samples = gParameters['val_samples']

    train_iter = ConcatDataIter(loader,
                                ndata=train_samples,
                                lshape=reshape,
                                datatype=gParameters['datatype'])
    val_iter = ConcatDataIter(loader,
                              partition='val',
                              ndata=val_samples,
                              lshape=reshape,
                              datatype=gParameters['datatype'])

    # Define cost and optimizer
    cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])())
    optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'],
                                               gParameters['learning_rate'],
                                               kerasDefaults)

    callbacks = Callbacks(model, eval_set=val_iter,
                          eval_freq=1)  #**args.callback_args)

    model.fit(train_iter,
              optimizer=optimizer,
              num_epochs=gParameters['epochs'],
              cost=cost,
              callbacks=callbacks)