def get_output_dataset_and_handler(output_packet_shape, **dataset_args):
    dataset = ds.NumpyDataset(dataset_args['name'],
                              output_packet_shape,
                              item_types=dataset_args['item_types'],
                              dtype=dataset_args['dtype'])
    output_handler = fs_io.DatasetFsPersistencyHandler(
        save_dir=dataset_args['outdir'])
    return dataset, output_handler
Пример #2
0
def main(**settings):
    # load dataset
    name, srcdir = settings['name'], settings['srcdir']
    item_types = settings['item_types']
    input_handler = io_utils.DatasetFsPersistencyHandler(load_dir=srcdir)
    dataset = input_handler.load_dataset(name, item_types=item_types)

    # create dataset splitter
    fraction = settings['test_items_fraction']
    num_items = settings['test_items_count']
    splitter = netutils.DatasetSplitter('RANDOM',
                                        items_fraction=fraction,
                                        num_items=num_items)

    # import network
    net_module_name = settings['network']
    model = netutils.import_model(net_module_name, dataset.item_shapes,
                                  **settings)
    graph = model.network_graph

    # prepare network trainer
    num_epochs = settings['num_epochs']
    trainer = train_utils.TfModelTrainer(
        splitter.get_data_and_targets(dataset), **settings)

    # main loop
    weights = {
        layer: model.get_layer_weights(layer)
        for layer in graph.trainable_layers
    }
    biases = {
        layer: model.get_layer_biases(layer)
        for layer in graph.trainable_layers
    }
    run_id = 'cval_{}'.format(netutils.get_default_run_id(net_module_name))
    num_crossvals = settings['num_crossvals']
    for run_idx in range(num_crossvals):
        print('Starting run {}'.format(run_idx + 1))
        data_dict = splitter.get_data_and_targets(dataset,
                                                  dict_format='PER_SET')
        tr, te = data_dict['train'], data_dict['test']
        inputs_dict = {
            'train_data': netutils.convert_to_model_inputs_dict(model, tr),
            'train_targets': netutils.convert_to_model_outputs_dict(model, tr),
            'test_data': netutils.convert_to_model_inputs_dict(model, te),
            'test_targets': netutils.convert_to_model_outputs_dict(model, te),
        }
        trainer.train_model(model, data_dict=inputs_dict, run_id=run_id)
        # restore initial weights and biases
        for layer in graph.trainable_layers:
            model.set_layer_weights(layer, weights[layer])
            model.set_layer_biases(layer, biases[layer])
Пример #3
0
def main(**settings):
    srcdir, outdir = settings['srcdir'], settings['outdir']
    name, outname = args['name'], args['outname']
    if outname is None:
        outname = name
    if outdir is None:
        outdir = srcdir
    io_handler = io_utils.DatasetFsPersistencyHandler(load_dir=srcdir,
                                                      save_dir=outdir)
    items_slice = args['items_slice']
    old_dataset = io_handler.load_dataset(name)
    new_dataset = ds.NumpyDataset(outname, old_dataset.accepted_packet_shape,
                                  item_types=old_dataset.item_types,
                                  dtype=old_dataset.dtype)
    new_dataset.merge_with(old_dataset, items_slice)
    io_handler.save_dataset(new_dataset)
def main(**settings):
    logdir = settings['logdir']

    # disable CUDA device access
    if settings['usecpu']:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    # load dataset
    name, srcdir = settings['name'], settings['srcdir']
    item_types = settings['item_types']
    input_handler = dset_io.DatasetFsPersistencyHandler(load_dir=srcdir)
    dataset = input_handler.load_dataset(name, item_types=item_types)
    items_slice = settings.get('items_slice', slice(0, None))
    data = dataset.get_data_as_dict(items_slice)

    # import network model
    net_module_name = settings['network']
    model = netutils.import_model(net_module_name, dataset.item_shapes,
                                  **settings)
    graph = model.network_graph
    for data_path_layers in graph.data_paths.values():
        model.enable_hidden_layer_output(data_path_layers)

    # feed data to network model
    inputs = netutils.convert_dataset_items_to_model_inputs(model, data)
    activations = model.get_hidden_layer_activations(inputs)
    input_layers, hidden_layers = graph.input_layers, graph.hidden_layers
    #data_paths = model.network_graph.data_paths
    for layer_name, layer_activations in activations.items():
        out_dir = os.path.join(logdir, layer_name)
        os.makedirs(out_dir, exist_ok=True)
        layer = hidden_layers[layer_name]['layer']
        if len(layer.shape[1:]) == 3:
            fig_creator = acviz.visualize_3d_activations
        elif len(layer.shape[1:]) == 1:
            fig_creator = acviz.visualize_1d_activations
        print('creating activation figures for layer {}'.format(layer_name))
        for idx in range(len(layer_activations)):
            fig = fig_creator(layer_activations[idx], layer_name)
            savefile = os.path.join(
                out_dir,
                'layer_{}_item_{}.svg'.format(layer_name,
                                              items_slice.start + idx))
            acviz.save_figure(fig, savefile)
Пример #5
0
    import cmdint.cmd_interface_checker as cmd

    # command line argument parsing
    cmd_int = cmd.CmdInterface()
    args = cmd_int.get_cmd_args(sys.argv[1:])

    name, srcdir = args.name, args.srcdir
    item_types = args.item_types

    # do not use the GPU
    if args.usecpu:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    # load input dataset
    input_handler = io_utils.DatasetFsPersistencyHandler(load_dir=srcdir)
    dataset = input_handler.load_dataset(name, item_types=item_types)

    #load trained network model
    logdir = cutils.get_config_for_module("model_checker")['default']['logdir']
    if not os.path.exists(logdir):
        os.mkdir(logdir)
    network_module_name, model_file = args.network, args.model_file
    network_module_name = "net.samples." + network_module_name
    run_id = netutils.get_default_run_id(network_module_name)
    tb_dir = os.path.join(logdir, run_id)
    os.mkdir(tb_dir)
    model = netutils.import_model(network_module_name,
                                  dataset.item_shapes,
                                  model_file=model_file,
                                  tb_dir=tb_dir)
Пример #6
0
    def setUpClass(cls):
        super(TestDatasetFsPersistencyManager, cls).setUpClass()
        cls.meta_order = list(cls.metafields)
        cls.meta_order.sort()

        n_data = cls.n_packets
        item_types = cls.item_types
        item_types['gtux'], item_types['gtuy'] = False, False
        items = cls.items
        items['gtux'], items['gtuy'] = False, False
        # a mock dataset to use (also serves as value container)
        m_dataset = mock.create_autospec(ds.NumpyDataset)
        m_dataset.name = 'test'
        m_dataset.dtype = 'float32'
        m_dataset.num_data = n_data
        m_dataset.accepted_packet_shape = cls.packet_shape
        m_dataset.item_types = item_types
        m_dataset.item_shapes = dat.get_data_item_shapes(
            m_dataset.accepted_packet_shape, item_types)
        m_dataset.metadata_fields = cls.metafields
        m_dataset.get_data_as_dict.return_value = items
        m_dataset.get_data_as_arraylike.return_value = tuple(
            items[k] for k in cons.ALL_ITEM_TYPES if not item_types[k])
        m_dataset.get_targets.return_value = cls.mock_targets
        m_dataset.get_metadata.return_value = cls.mock_meta
        cls.m_dataset = m_dataset

        cls.configfile_contents = (
            '[general]{}'.format(os.linesep) +
            'num_data = {}{}'.format(n_data, os.linesep) +
            'metafields = {}{}'.format(cls.metafields, os.linesep) +
            'dtype = {}{}'.format(m_dataset.dtype, os.linesep) +
            '{}[packet_shape]{}'.format(os.linesep, os.linesep) +
            'num_frames = {}{}'.format(cls.n_f, os.linesep) +
            'frame_height = {}{}'.format(cls.f_h, os.linesep) +
            'frame_width = {}{}'.format(cls.f_w, os.linesep) +
            '{}[item_types]{}'.format(os.linesep, os.linesep) +
            ''.join('{} = {}{}'.format(k, item_types[k], os.linesep)
                    for k in cons.ALL_ITEM_TYPES) +
            os.linesep
        )

        cls.loaddir, cls.savedir = '/dsets', '/test'
        suffixes = {k: '_{}_test'.format(k) for k in cons.ALL_ITEM_TYPES}
        cls.datafiles = {k: '{}{}.npy'.format(m_dataset.name, suffixes[k])
                         for k in cons.ALL_ITEM_TYPES}
        conf_suffix = '_config_test'
        cls.configfile = '{}{}.ini'.format(m_dataset.name, conf_suffix)

        with mock.patch('os.path.isdir', return_value=True),\
             mock.patch('os.path.exists', return_value=True):
            cls.handler = fs_io.DatasetFsPersistencyHandler(
                cls.loaddir,
                cls.savedir,
                configfile_suffix=conf_suffix,
                data_handler = mock.create_autospec(
                    data_io.NumpyDataPersistencyHandler
                ),
                targets_handler=mock.create_autospec(
                    targets_io.NumpyTargetsPersistencyHandler
                ),
                metadata_handler=mock.create_autospec(
                    meta_io.TSVMetadataPersistencyHandler
                )
            )
        })
        # main loop
        for handler in iteration_handlers:
            start, stop = handler['start'], handler['stop']
            packet_handler = handler['packet_handler']
            target = handler['target']
            # idx serves as both an index into targets and data, as well as
            # shower angle in xy projection
            for idx in range(start, stop):
                packet, meta = packet_handler(idx)
                dataset.add_data_item(packet, target, meta)
        return dataset


if __name__ == '__main__':
    import cmdint.cmd_interface_generator as cmd

    # command line parsing
    ui = cmd.CmdInterface()
    args = ui.get_cmd_args(sys.argv[1:])
    print(args)

    data_generator = SimulatedDataGenerator(args.shower_template,
                                            args.bg_template)
    handler = io_utils.DatasetFsPersistencyHandler(save_dir=args.outdir)
    dataset = data_generator.create_dataset(args.name,
                                            args.num_data,
                                            item_types=args.item_types,
                                            dtype=args.dtype)
    handler.save_dataset(dataset, metafields_order=cons.SYNTH_METADATA)