Exemplo n.º 1
0
    def __init__(self):
        parser = argparse.ArgumentParser(description=(
            'Split off new dataset from range of input dataset items '
            'or shrink original dataset'))
        in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
        out_aliases = {
            'dataset name': 'out_name',
            'dataset directory': 'outdir'
        }
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases,
                                      output_aliases=out_aliases)

        group = parser.add_argument_group(title='Input dataset settings')
        dset_args.add_dataset_arg_double(group, dargs.arg_type.INPUT)
        # slice of dataset items to split off
        group.add_argument('--start_item',
                           default=0,
                           type=int,
                           help=('Index of first dataset item to use for '
                                 'evaluation.'))
        group.add_argument('--stop_item',
                           default=None,
                           type=int,
                           help=('Index of the dataset item after the last '
                                 'item to use for evaluation.'))

        group = parser.add_argument_group(title='Output dataset settings')
        dset_args.add_dataset_arg_double(group,
                                         dargs.arg_type.OUTPUT,
                                         required=False)
        self._parser = parser
        self._dset_args = dset_args
Exemplo n.º 2
0
    def __init__(self):
        parser = argparse.ArgumentParser(
            description=('Merge multiple datasets into a single dataset by '
                         'concatenating them in the order they are passed'))
        in_aliases = {'dataset': 'dataset'}
        out_aliases = {'dataset name': 'name', 'dataset directory': 'outdir'}
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases,
                                      output_aliases=out_aliases)

        group = parser.add_argument_group(title="Input dataset settings")
        dset_args.add_dataset_arg_single(group, dargs.arg_type.INPUT,
                                         short_alias='d', multiple=True)

        group = parser.add_argument_group(title="Output dataset settings")
        dset_args.add_dataset_arg_double(group, dargs.arg_type.OUTPUT,
                                         dir_short_alias='o', dir_default='.',
                                         name_short_alias='n')
        group.add_argument('--dtype',
                           help=('data type of items of the new dataset. If '
                                 'not set, uses the dtype of the first input '
                                 'dataset'))
        # group.add_argument('--delete_original', action='store_true',
        #                    help='delete the original input datasets')
        self.parser = parser
        self.dset_args = dset_args
    def __init__(self):
        self.default_logdir = cutils.get_config_for_module(
            "model_trainer")['default']['logdir']
        parser = argparse.ArgumentParser(
            description="Train network using provided dataset")

        # dataset input
        in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases)
        item_args = dargs.ItemTypeArgs()
        atype = dargs.arg_type.INPUT
        group = parser.add_argument_group(title="Input dataset")
        dset_args.add_dataset_arg_double(group, atype)
        item_args.add_item_type_args(group, atype)
        group.add_argument('--test_items_count',
                           type=atypes.int_range(1),
                           help='Number of dataset items to include in the '
                           'test set. Overrides test_items_fraction.')
        group.add_argument('--test_items_fraction',
                           type=float,
                           default=0.1,
                           help='Number of dataset items to include in the '
                           'test set, expressed as a fraction.')
        modes = net_cons.DATASET_SPLIT_MODES
        group.add_argument('--split_mode',
                           choices=modes,
                           required=True,
                           help='Method of splitting the test items subset '
                           'from the input dataset.')

        # network to train
        group = parser.add_argument_group(title="Network configuration")
        net_args.add_network_arg(group, short_alias='n')
        net_args.add_model_file_arg(group, short_alias='m')
        group.add_argument('--tb_dir',
                           default=self.default_logdir,
                           help=('directory to store training logs for '
                                 'tensorboard.'))
        group.add_argument('--save',
                           action='store_true',
                           help=('save the model after training. Model files '
                                 'are saved under tb_dir as net.network_name/'
                                 'net.network_name.tflearn.*'))

        # training settings
        group = parser.add_argument_group(title="Training parameters")
        net_args.add_training_settings_args(group,
                                            num_epochs={
                                                'required': False,
                                                'default': 11,
                                                'short_alias': 'e'
                                            })

        self.parser = parser
        self.dset_args = dset_args
        self.item_args = item_args
Exemplo n.º 4
0
    def __init__(self):
        self.default_logdir = cutils.get_config_for_module(
            "model_xvalidator")['default']['logdir']
        print("default logdir set to {}".format(self.default_logdir))
        parser = argparse.ArgumentParser(
            description="Perform Kfold cross-validation on a given neural "
            "network with the given dataset.")

        # cross-validation settings
        group = parser.add_argument_group(title="Cross-validation parameters")
        group.add_argument('--num_crossvals',
                           type=atypes.int_range(1),
                           required=True,
                           help='number of cross validations to perform')

        # network to train
        group = parser.add_argument_group(title="Network to use")
        net_args.add_network_arg(group, short_alias='n')
        net_args.add_model_file_arg(group, short_alias='m')

        # training_parameters
        group = parser.add_argument_group(title="Training parameters to use")
        net_args.add_training_settings_args(group,
                                            num_epochs={
                                                'required': False,
                                                'default': 11,
                                                'short_alias': 'e'
                                            })
        group.add_argument('--tb_dir',
                           default=self.default_logdir,
                           help=('directory to store training logs for '
                                 'tensorboard.'))

        # dataset input
        in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases)
        item_args = dargs.ItemTypeArgs()
        atype = dargs.arg_type.INPUT
        group = parser.add_argument_group(title="Input dataset")
        dset_args.add_dataset_arg_double(group, atype)
        item_args.add_item_type_args(group, atype)
        group.add_argument('--test_items_count',
                           type=atypes.int_range(1),
                           help='number of dataset items to include in the '
                           'test set. Overrides test_items_fraction.')
        group.add_argument('--test_items_fraction',
                           type=float,
                           default=0.1,
                           help='number of dataset items to include in the '
                           'test set, expressed as a fraction.')

        self.parser = parser
        self.dset_args = dset_args
        self.item_args = item_args
 def setUpClass(cls):
     cls.in_alss = {
         'dataset name': 'in_name', 'dataset directory': 'src_dir',
         'dataset': 'in_dset'
     }
     cls.out_alss = {
         'dataset name': 'out_name', 'dataset directory': 'out_dir',
         'dataset': 'out_dset'
     }
     cls.dset_args = dargs.DatasetArgs(input_aliases=cls.in_alss,
                                       output_aliases=cls.out_alss)
Exemplo n.º 6
0
    def __init__(self):
        parser = argparse.ArgumentParser(
            description="Evaluate trained network model with given dataset")
        parser.add_argument('outfile',
                            nargs='?',
                            type=argparse.FileType('w'),
                            default=sys.stdout,
                            help=('name of output TSV to write to. If not '
                                  'provided, output to stdout.'))

        # dataset input
        atype = dargs.arg_type.INPUT
        in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases)
        item_args = dargs.ItemTypeArgs()
        group = parser.add_argument_group(title="Input dataset")
        dset_args.add_dataset_arg_double(group, atype)
        item_args.add_item_type_args(group, atype)
        # slice of dataset items to use for evaluation
        group.add_argument('--start_item',
                           default=0,
                           type=int,
                           help=('index of first dataset item to use for '
                                 'evaluation.'))
        group.add_argument('--stop_item',
                           default=None,
                           type=int,
                           help=('index of the dataset item after the last '
                                 'item to use for evaluation.'))

        # trained neural network model
        group = parser.add_argument_group('Neural network settings')
        net_args.add_network_arg(group, short_alias='n')
        net_args.add_model_file_arg(group, short_alias='m', required=True)

        # misc
        parser.add_argument('--usecpu',
                            action='store_true',
                            help=('Use host CPU instead of the CUDA device. '
                                  'On systems without a dedicated CUDA device '
                                  'and no CUDA-enabled version  of tensorflow '
                                  'installed, this flag has no effect.'))

        self.parser = parser
        self.dset_args = dset_args
        self.item_args = item_args
Exemplo n.º 7
0
    def __init__(self):
        parser = argparse.ArgumentParser(
            description=('Create classification report in HTML format from '
                         'provided evaluation results in TSV format'))

        # input tsv
        parser.add_argument('infile',
                            nargs='?',
                            type=argparse.FileType('r'),
                            default=sys.stdin,
                            help=('name of input TSV to read from. If not '
                                  'provided, read from stdin.'))

        # output settings
        group = parser.add_argument_group(title="Output settings")
        group.add_argument('--tablesize',
                           type=atypes.int_range(1),
                           help=('Maximum number of table rows per html '
                                 'report file.'))
        group.add_argument('--logdir',
                           help=('Directory to store output logs. If a '
                                 'non-default directory is used, it must '
                                 'exist prior to calling this script.'))
        item_args = dargs.ItemTypeArgs(out_item_prefix='add')
        help = {
            k: 'add image placeholder for {}'.format(desc)
            for k, desc in item_args.item_descriptions.items()
        }
        item_args.add_item_type_args(group, dargs.arg_type.OUTPUT, help=help)

        # meta-information to include in report headers
        g_title = "Meta-information to include in report headers"
        group = parser.add_argument_group(title=g_title)
        nargs.add_network_arg(group, required=False)
        nargs.add_model_file_arg(group, required=False)
        in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases)
        dset_args.add_dataset_arg_double(group,
                                         dargs.arg_type.INPUT,
                                         required=False)

        self.parser = parser
        self.dset_args = dset_args
        self.item_args = item_args
    def __init__(self):
        parser = argparse.ArgumentParser(description="Visualize dataset items")

        # input dataset settings
        group = parser.add_argument_group(title="Input dataset")
        in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases)
        dset_args.add_dataset_arg_double(group, dargs.arg_type.INPUT,
                                         required=True,
                                         dir_default=os.path.curdir)
        item_args = dargs.ItemTypeArgs()
        item_args.add_item_type_args(group, dargs.arg_type.INPUT)
        group.add_argument('--start_item', default=0, type=int,
                           help=('index of first item to visualize.'))
        group.add_argument('--stop_item', default=None, type=int,
                           help=('index of the item after the last item to '
                                 'visualize.'))

        # output settings
        group = parser.add_argument_group(title="Output settings")
        group.add_argument('--outdir', default=os.path.curdir,
                           help=('directory to store output images. If a '
                                 'non-default directory is used, it must '
                                 'exist prior to calling this script. '
                                 'Default: current directory. Images '
                                 'are stored under outdir/img/<item_type>'))
        group.add_argument('-f', '--force_overwrite', action='store_true',
                           help=('overwrite any existing items under outdir '
                                 'having the same name as generated items'))

        # metadat to text converter
        group = parser.add_argument_group(title="Metadata to text converter")
        m_conv = group.add_mutually_exclusive_group(required=False)
        m_conv.add_argument('--simu', action='store_const', const='simu',
                            help=('Simu metadata converter'))
        m_conv.add_argument('--synth', action='store_const', const='synth',
                            help=('Synth metadata converter'))
        m_conv.add_argument('--flight', action='store_const', const='flight',
                            help=('Flight metadata converter'))

        self.parser = parser
        self.dset_args = dset_args
        self.item_args = item_args
Exemplo n.º 9
0
    def __init__(self):
        parser = argparse.ArgumentParser(
            description="Visualize hidden layer activations of model using "
            "given dataset")
        parser.add_argument('logdir',
                            help=('Directory to output visualized activation '
                                  'images to.'))

        # trained neural network model settings
        group = parser.add_argument_group('Trained model settings')
        net_args.add_network_arg(group, short_alias='n')
        net_args.add_model_file_arg(group, short_alias='m', required=True)

        # input dataset settings
        group = parser.add_argument_group(title="Input dataset")
        in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
        dset_args = dargs.DatasetArgs(input_aliases=in_aliases)
        dset_args.add_dataset_arg_double(group,
                                         dargs.arg_type.INPUT,
                                         required=True,
                                         dir_default=os.path.curdir)
        item_args = dargs.ItemTypeArgs()
        item_args.add_item_type_args(group, dargs.arg_type.INPUT)
        group.add_argument('--start_item',
                           default=0,
                           type=int,
                           help=('index of first dataset item to use.'))
        group.add_argument('--stop_item',
                           default=None,
                           type=int,
                           help=('index of the dataset item after the last '
                                 'item to use.'))

        # misc
        parser.add_argument('--usecpu',
                            action='store_true',
                            help=('Use host CPU instead of the CUDA device. '
                                  'On systems without a dedicated CUDA device '
                                  'and no CUDA-enabled version of tensorflow '
                                  'installed, this flag has no effect.'))
        self.dset_args = dset_args
        self.item_args = item_args
        self.parser = parser
import utils.common_utils as cutils

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=('Get attribute value distribution of metadata attribute '
                     'within a dataset or subset of it'))
    parser.add_argument('outfile',
                        nargs='?',
                        type=argparse.FileType('w'),
                        default=sys.stdout,
                        help=('name of output file to write to. If not '
                              'provided, output to stdout.'))

    group = parser.add_argument_group(title='Dataset settings')
    in_aliases = {'dataset name': 'name', 'dataset directory': 'srcdir'}
    dset_args = dargs.DatasetArgs(input_aliases=in_aliases)
    dset_args.add_dataset_arg_double(group, dargs.arg_type.INPUT)
    # slice of dataset items to use for evaluation
    group.add_argument('--start_item',
                       default=0,
                       type=int,
                       help=('Index of first dataset item to use for '
                             'evaluation.'))
    group.add_argument('--stop_item',
                       default=None,
                       type=int,
                       help=('Index of the dataset item after the last '
                             'item to use for evaluation.'))

    group = parser.add_argument_group(title='Attribute settings')
    group.add_argument('--attribute',
    def __init__(self):
        parser = argparse.ArgumentParser(
            description="Create dataset from multiple files with packets",
            formatter_class=argparse.RawTextHelpFormatter)

        # global settings
        parser.add_argument('--log_level',
                            default='INFO',
                            choices=logging._nameToLevel.keys(),
                            help='global logging output level (default: '
                            '%(default)s))')
        parser.add_argument('--max_cache_size',
                            default=40,
                            type=atypes.int_range(1),
                            help=('maximum size of parsed files cache'))
        parser.add_argument('--num_evicted',
                            default=10,
                            type=atypes.int_range(1),
                            help=('number of cache entires to evict when the '
                                  'cache gets full'))

        # input settings
        group = parser.add_argument_group(title='Input settings')
        packet_args = cargs.PacketArgs()
        packet_args.add_packet_arg(group)
        group.add_argument('-f',
                           '--filelist',
                           required=True,
                           help=('input files list in TSV format'))

        # output (dataset) settings
        group = parser.add_argument_group(title='Output settings')
        out_aliases = {'dataset name': 'name', 'dataset directory': 'outdir'}
        dset_args = dargs.DatasetArgs(output_aliases=out_aliases)
        dset_args.add_dataset_arg_double(group,
                                         dargs.arg_type.OUTPUT,
                                         dir_short_alias='d',
                                         dir_default='.',
                                         name_short_alias='n')

        # output (dataset) data item settings
        group = parser.add_argument_group(title='Data item settings')
        item_args = dargs.ItemTypeArgs()
        item_args.add_item_type_args(group, dargs.arg_type.OUTPUT)
        group.add_argument('--dtype',
                           default='float32',
                           help='cast extracted items to the given numpy data '
                           'type (default: %(default)s))')

        # output (dataset) target settings
        group = parser.add_argument_group(title='Item target settings')
        group.add_argument('--target',
                           required=True,
                           nargs=2,
                           metavar=('METHOD', 'ARGS'),
                           help=_TARGET_ARG_HELP)

        # output (dataset) metadata settings
        group = parser.add_argument_group(title='Metadata settings')
        group.add_argument('--extra_metafields',
                           nargs='+',
                           default=[],
                           metavar='FIELD',
                           help=('additional fields in the event list to '
                                 'include in dataset metadata'))

        subparsers = parser.add_subparsers(
            dest="converter", help='Packet to item conversion methods')

        def_m = subparsers.add_parser("default",
                                      help=("Convert events to dataset items "
                                            "using default transformer"))
        def_m.add_argument('--gtu_range',
                           type=atypes.int_range(0),
                           nargs=2,
                           metavar=('START_GTU', 'STOP_GTU'),
                           required=True,
                           help=('range of GTUs to use'))
        def_m.add_argument('--packet_idx',
                           type=atypes.int_range(0),
                           required=True,
                           help=('index of packet to use'))

        apack = subparsers.add_parser("allpack",
                                      help=("Convert events to dataset items "
                                            "using all_packets transformer"))
        apack.add_argument('--gtu_range',
                           type=atypes.int_range(0),
                           nargs=2,
                           metavar=('START_GTU', 'STOP_GTU'),
                           required=True,
                           help=('range of GTUs containing shower.'))

        gpack = subparsers.add_parser("gtupack",
                                      help=("Convert events to dataset items "
                                            "using gtu_in_packet transformer"))
        gpack.add_argument('--num_gtu_around',
                           type=atypes.int_range(0),
                           nargs=2,
                           metavar=('NUM_BEFORE', 'NUM_AFTER'),
                           help=('number of GTU/frames before and after '
                                 'gtu_in_packet to include in dataset items'))
        gpack.add_argument('--no_bounds_adjust',
                           action='store_true',
                           help=('do not shift the frames window if part of '
                                 'it is out of packet bounds. An exception '
                                 'will be raised instead '))

        self.parser = parser
        self.packet_args = packet_args
        self.dset_args = dset_args
        self.item_args = item_args
Exemplo n.º 12
0
    def __init__(self):
        self.parser = argparse.ArgumentParser(
            description="Create simulated air shower data as numpy arrays")
        out_aliases = {'dataset name': 'name', 'dataset directory': 'outdir'}
        self.packet_args = cargs.PacketArgs()
        self.dset_args = dargs.DatasetArgs(output_aliases=out_aliases)
        self.item_args = dargs.ItemTypeArgs()

        ds_group = self.parser.add_argument_group('dataset configuration')
        # packet dimensions
        self.packet_args.add_packet_arg(ds_group, required=True)
        # output dataset
        atype = dargs.arg_type.OUTPUT
        self.dset_args.add_dataset_arg_double(ds_group, atype)
        self.item_args.add_item_type_args(ds_group, atype)
        ds_group.add_argument('--num_data',
                              required=True,
                              type=atypes.int_range(1),
                              help=('Number of data items (both noise and '
                                    'shower)'))
        ds_group.add_argument('--dtype',
                              default='uint8',
                              help=('Data type of dataset items (default: '
                                    'uint8)'))

        shower_group = self.parser.add_argument_group('shower properties')
        # arguments qualifying shower property ranges
        args = [
            'shower_max', 'duration', 'track_length', 'start_gtu', 'start_y',
            'start_x'
        ]
        reqs = [True, True, True, False, False, False]
        descs = [
            'Peak relative diff. between shower track and bg pixel values',
            'Number of GTU or frames containing shower track pixels',
            'Length of shower tracks as viewed in the yx projection',
            'First GTU or packet frame containing shower pixels',
            'Start_gtu frame X coordinate from which the shower tracks begin',
            'Start_gtu frame Y coordinate from which the shower tracks begin'
        ]
        types = ([atypes.int_range(1)] * 3) + ([atypes.int_range(0)] * 3)
        for idx in range(len(args)):
            arg = args[idx]
            cargs.add_number_range_arg(shower_group,
                                       arg,
                                       arg_desc=descs[idx],
                                       required=reqs[idx],
                                       arg_type=types[idx])

        bg_group = self.parser.add_argument_group('background properties')
        # additional arguments applying to packet background
        cargs.add_number_range_arg(bg_group,
                                   'bg_lambda',
                                   required=True,
                                   arg_type=atypes.float_range(0),
                                   arg_desc=('Bg pixel values average '
                                             '(Poisson distribution lambda)'))
        cargs.add_number_range_arg(bg_group,
                                   'bad_ECs',
                                   default=(0, 0),
                                   arg_type=atypes.int_range(-1),
                                   arg_desc='Bad ECs count per data item')