def test_skip_missing_test_files_usage():
    command_line_args = [
        '--reference-input', 'tests/data/i2b2_2016_track-1_reference',
        '--test-input', 'tests/data/i2b2_2016_track-1_test',
        '--skip-missing-files'
    ]
    args = args_and_configs.get_arguments(command_line_args)
    assert args.skip_missing_files == True
    ## Performance should be identical to default
    command_line_args = [
        '--reference-input', 'tests/data/i2b2_2016_track-1_reference',
        '--test-input', 'tests/data/i2b2_2016_track-1_test'
    ]
    args = args_and_configs.get_arguments(command_line_args)
    assert args.skip_missing_files == True
def test_print_counts_and_metrics_and_confusion_for_test():
    command_line_args = [
        '--print-counts', '--print-metrics', '--print-confusion-matrix',
        '--test-input', 'tests/data/i2b2_2016_track-1_test'
    ]
    with pytest.raises(SystemExit) as e_info:
        args = args_and_configs.get_arguments(command_line_args)
def test_default_ignore_whitespace_flag():
    command_line_args = [
        '--reference-input', 'tests/data/i2b2_2016_track-1_reference',
        '--test-input', 'tests/data/i2b2_2016_track-1_test'
    ]
    args = args_and_configs.get_arguments(command_line_args)
    assert args.ignore_whitespace == True
def test_score_missing_test_files_usage():
    command_line_args = [
        '--reference-input', 'tests/data/i2b2_2016_track-1_reference',
        '--test-input', 'tests/data/i2b2_2016_track-1_test',
        '--score-missing-files'
    ]
    args = args_and_configs.get_arguments(command_line_args)
    assert args.skip_missing_files == False
def test_print_counts_ref_only():
    command_line_args = [
        '--print-counts', '--no-metrics', '--no-confusion-matrix',
        '--reference-input', 'tests/data/i2b2_2016_track-1_reference'
    ]
    args = args_and_configs.get_arguments(command_line_args)
    assert args.print_counts
    assert not args.print_metrics
    assert not args.print_confusion_matrix
    assert args.reference_input is not None
    assert args.test_input is None
def initialize_for_track1():
    command_line_args = [
        '--reference-input', 'tests/data/n2c2_2018_track-1_reference',
        '--test-input', 'tests/data/n2c2_2018_track-1_test', '--empty-value',
        '0.0'
    ]
    args = args_and_configs.get_arguments(command_line_args)
    ## TODO - we are usurping the test for this done within init_args to make
    ##        the test more protable, for now.
    args.empty_value = float(args.empty_value)
    file_mapping = {
        '103.xml': '103.xml',
        '203.xml': '203.xml',
        '303.xml': '303.xml'
    }
    return (args, file_mapping)
Пример #7
0
def init_args():
    ##
    args = args_and_configs.get_arguments(sys.argv[1:])
    ## Set up logging
    if args.verbose:
        log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
        log.info("Verbose output.")
        log.debug("{}".format(args))
    else:
        log.basicConfig(format="%(levelname)s: %(message)s")
    ## Configure progressbar peformance
    if (args.progressbar_output == 'none'):
        args.progressbar_disabled = True
        args.progressbar_file = None
    else:
        args.progressbar_disabled = False
        if (args.progressbar_output == 'stderr'):
            args.progressbar_file = sys.stderr
        elif (args.progressbar_output == 'stdout'):
            args.progressbar_file = sys.stdout
    ## F-score beta values are commonly set to 1, 2, and 0.5 but we
    ## want to support any values.  It's easiest to do filtering at
    ## this point in the pipeline to standardize beta values and how
    ## they show up in the pipeline
    if ('F' in args.metrics_list):
        f_position = args.metrics_list.index('F')
        args.metrics_list.pop(f_position)
        if (len(args.f_beta_values) == 0):
            log.warning(
                'F was included in the list of metrics to calculate but no beta values were provided (--f-beta-values <betas>)'
            )
        else:
            ## Reverse the list so that they get inserted into the metrics_list
            ## in the proper order
            args.f_beta_values.reverse()
            for beta in args.f_beta_values:
                if ('F{}'.format(beta) not in args.metrics_list):
                    args.metrics_list.insert(f_position, 'F{}'.format(beta))
    else:
        if (len(args.f_beta_values) > 0):
            log.warning(
                'F beta values were provided but "F" was not included in the list of metrics to calculate (--f-beta-values <betas>)'
            )
            args.f_beta_values = []
    for common_beta in ['1', '2', '0.5']:
        if ('F{}'.format(common_beta) in args.metrics_list):
            if (common_beta not in args.f_beta_values):
                args.f_beta_values.append(common_beta)
    ## The command line parameters are always initially cast as strings.
    ## That works fine for some empty values.  Sometimes we want to use
    ## 0 (int) or 0.0 (float) or -1 as empty values.  In this case,
    ## it's best to cast the string to the appropriate numerical
    ## type for formatting later.
    if (args.empty_value is not None and args.empty_value != ''):
        try:
            args.empty_value = int(args.empty_value)
        except ValueError:
            log.debug('Default empty_value is not an int')
            try:
                args.empty_value = float(args.empty_value)
            except ValueError:
                log.debug('Default empty_value is not a float')
    ## Resolve conflicts between --ignore-whitespace, --heed-whitespace,
    ## and --ignore-regex flags.  Essentially, if we set something in
    ## skip_chars, use that.  Otherwise, if we tripped --ignore_whitespace
    ## then set skip_chars accordingly
    if (args.ignore_whitespace and args.skip_chars == None):
        args.skip_chars = r'[\s]'
    ## lstrip hack added to handle prefixes and suffixes with dashes
    ##   https://stackoverflow.com/questions/16174992/cant-get-argparse-to-read-quoted-string-with-dashes-in-it
    args.file_prefix = args.file_prefix.lstrip()
    args.file_suffix[0] = args.file_suffix[0].lstrip()
    if (len(args.file_suffix) == 2):
        args.file_suffix[1] = args.file_suffix[1].lstrip()
    ## Initialize the list of annotation attributes to score
    args.attributes_list = []
    args.scorable_attributes = []
    if (isinstance(args.attributes_string, str)):
        for attribute_key in args.attributes_string.split(','):
            ## Strip off any extra whitespace before processing
            attribute_key = attribute_key.strip()
            attribute_kernel = attribute_key.split('/')
            last = len(attribute_kernel) - 1
            args.attributes_list.append(
                [attribute_kernel[0], attribute_kernel[last]])
    ## Initialize the list of normalization engines to score
    args.normalization_list = []
    args.scorable_engines = []
    args.normalization_synonyms = {}
    if (isinstance(args.normalization_string, str)):
        for normalization_key in args.normalization_string.split(','):
            ## Strip off any extra whitespace before processing
            normalization_key = normalization_key.strip()
            normalization_kernel = normalization_key.split('/')
            last = len(normalization_kernel) - 1
            args.normalization_list.append(
                [normalization_kernel[0], normalization_kernel[last]])
        ## Only bother to load the normalization_file if the --score-normalization
        ## flag was used
        args.normalization_synonyms = \
          args_and_configs.process_normalization_file( args.normalization_file )
    ## Initialize the corpuse settings, values, and metrics file
    ## if it was provided at the command line
    if (args.corpus_out):
        ## Clean out any previous corpus dictionary, in case it exists from
        ## an old run
        with open(args.corpus_out, 'w') as fp:
            json.dump({}, fp, sort_keys=True, indent=4)
        ## Add a few important arguments
        scoring_metrics.update_output_dictionary(args.corpus_out, ['args'], [
            'reference_config', 'reference_input', 'reference_out',
            'test_config', 'test_input', 'test_out', 'score_key', 'fuzzy_flags'
        ], [
            args.reference_config, args.reference_input, args.reference_out,
            args.test_config, args.test_input, args.test_out, args.score_key,
            args.fuzzy_flags
        ])
    return args
def test_print_counts_neither_ref_nor_test():
    command_line_args = [
        '--print-counts', '--no-metrics', '--no-confusion-matrix'
    ]
    with pytest.raises(SystemExit) as e_info:
        args = args_and_configs.get_arguments(command_line_args)
def test_required_input_flags_test_only():
    command_line_args = ['--test-input', 'tests/data/i2b2_2016_track-1_test']
    with pytest.raises(SystemExit) as e_info:
        args = args_and_configs.get_arguments(command_line_args)