Exemplo n.º 1
0
def fill_in_config_paths_for_parsing(config_template_path, values_to_fill_dict,
                                     sub_prefix):
    """
    Add paths to train, test, and output directories to a given config template
    file.
    """

    config = _setup_config_parser(config_template_path)

    to_fill_in = {
        'General': ['experiment_name', 'task'],
        'Input': [
            'train_directory', 'train_file', 'test_directory', 'test_file',
            'featuresets', 'featureset_names', 'feature_hasher',
            'hasher_features', 'learners', 'sampler', 'shuffle',
            'feature_scaling'
        ],
        'Tuning': ['grid_search', 'objective'],
        'Output': ['probability', 'results', 'log', 'models', 'predictions']
    }

    for section in to_fill_in:
        for param_name in to_fill_in[section]:
            if param_name in values_to_fill_dict:
                config.set(section, param_name,
                           values_to_fill_dict[param_name])

    config_prefix = re.search(r'^(.*)\.template\.cfg',
                              config_template_path).groups()[0]
    new_config_path = '{}_{}.cfg'.format(config_prefix, sub_prefix)

    with open(new_config_path, 'w') as new_config_file:
        config.write(new_config_file)

    return new_config_path
Exemplo n.º 2
0
def fill_in_config_paths_for_parsing(config_template_path, values_to_fill_dict,
                                     sub_prefix):
    """
    Add paths to train, test, and output directories to a given config template
    file.
    """

    config = _setup_config_parser(config_template_path)

    to_fill_in = {'General': ['experiment_name', 'task'],
                  'Input': ['train_directory', 'train_file', 'test_directory',
                            'test_file', 'featuresets', 'featureset_names',
                            'feature_hasher', 'hasher_features', 'learners',
                            'sampler', 'shuffle', 'feature_scaling'],
                  'Tuning': ['grid_search', 'objective'],
                  'Output': ['probability', 'results', 'log', 'models',
                             'predictions']}

    for section in to_fill_in:
        for param_name in to_fill_in[section]:
            if param_name in values_to_fill_dict:
                config.set(section, param_name,
                           values_to_fill_dict[param_name])

    config_prefix = re.search(r'^(.*)\.template\.cfg',
                              config_template_path).groups()[0]
    new_config_path = '{}_{}.cfg'.format(config_prefix, sub_prefix)

    with open(new_config_path, 'w') as new_config_file:
        config.write(new_config_file)

    return new_config_path
Exemplo n.º 3
0
def fill_in_config_paths_for_fancy_output(config_template_path):
    """
    Add paths to train, test, and output directories to a given config template
    file.
    """

    train_dir = join(_my_dir, 'train')
    test_dir = join(_my_dir, 'test')
    output_dir = join(_my_dir, 'output')

    config = _setup_config_parser(config_template_path)

    config.set("Input", "train_file", join(train_dir, "fancy_train.jsonlines"))
    config.set("Input", "test_file", join(test_dir,
                                              "fancy_test.jsonlines"))
    config.set("Output", "results", output_dir)
    config.set("Output", "log", output_dir)
    config.set("Output", "predictions", output_dir)

    config_prefix = re.search(r'^(.*)\.template\.cfg',
                              config_template_path).groups()[0]
    new_config_path = '{}.cfg'.format(config_prefix)

    with open(new_config_path, 'w') as new_config_file:
        config.write(new_config_file)

    return new_config_path
Exemplo n.º 4
0
def fill_in_config_paths_for_fancy_output(config_template_path):
    """
    Add paths to train, test, and output directories to a given config template
    file.
    """

    train_dir = join(_my_dir, 'train')
    test_dir = join(_my_dir, 'test')
    output_dir = join(_my_dir, 'output')

    config = _setup_config_parser(config_template_path)

    config.set("Input", "train_file", join(train_dir, "fancy_train.jsonlines"))
    config.set("Input", "test_file", join(test_dir, "fancy_test.jsonlines"))
    config.set("Output", "results", output_dir)
    config.set("Output", "log", output_dir)
    config.set("Output", "predictions", output_dir)

    config_prefix = re.search(r'^(.*)\.template\.cfg',
                              config_template_path).groups()[0]
    new_config_path = '{}.cfg'.format(config_prefix)

    with open(new_config_path, 'w') as new_config_file:
        config.write(new_config_file)

    return new_config_path
Exemplo n.º 5
0
def fill_in_config_paths_for_single_file(config_template_path,
                                         train_file,
                                         test_file,
                                         train_directory='',
                                         test_directory=''):
    """
    Add paths to train and test files, and output directories to a given config
    template file.
    """

    train_dir = join(_my_dir, 'train')
    test_dir = join(_my_dir, 'test')
    output_dir = join(_my_dir, 'output')

    config = _setup_config_parser(config_template_path)

    task = config.get("General", "task")

    config.set("Input", "train_file", join(train_dir, train_file))
    if task == 'predict' or task == 'evaluate':
        config.set("Input", "test_file", join(test_dir, test_file))

    if train_directory:
        config.set("Input", "train_directory", join(train_dir,
                                                    train_directory))

    if test_directory:
        config.set("Input", "test_directory", join(test_dir, test_directory))

    to_fill_in = ['log', 'predictions']

    if task != 'cross_validate':
        to_fill_in.append('models')

    if task == 'evaluate' or task == 'cross_validate':
        to_fill_in.append('results')

    for d in to_fill_in:
        config.set("Output", d, join(output_dir))

    if task == 'cross_validate':
        cv_folds_file = config.get("Input", "cv_folds_file")
        if cv_folds_file:
            config.set("Input", "cv_folds_file", join(train_dir,
                                                      cv_folds_file))

    config_prefix = re.search(r'^(.*)\.template\.cfg',
                              config_template_path).groups()[0]
    new_config_path = '{}.cfg'.format(config_prefix)

    with open(new_config_path, 'w') as new_config_file:
        config.write(new_config_file)

    return new_config_path
Exemplo n.º 6
0
def fill_in_config_paths_for_single_file(config_template_path, train_file,
                                         test_file, train_directory='',
                                         test_directory=''):
    """
    Add paths to train and test files, and output directories to a given config
    template file.
    """

    train_dir = join(_my_dir, 'train')
    test_dir = join(_my_dir, 'test')
    output_dir = join(_my_dir, 'output')

    config = _setup_config_parser(config_template_path)

    task = config.get("General", "task")

    config.set("Input", "train_file", join(train_dir, train_file))
    if task == 'predict' or task == 'evaluate':
        config.set("Input", "test_file", join(test_dir, test_file))

    if train_directory:
        config.set("Input", "train_directory", join(train_dir, train_directory))

    if test_directory:
        config.set("Input", "test_directory", join(test_dir, test_directory))

    to_fill_in = ['log', 'predictions']

    if task != 'cross_validate':
        to_fill_in.append('models')

    if task == 'evaluate' or task == 'cross_validate':
        to_fill_in.append('results')

    for d in to_fill_in:
        config.set("Output", d, join(output_dir))

    if task == 'cross_validate':
        cv_folds_file = config.get("Input", "cv_folds_file")
        if cv_folds_file:
            config.set("Input", "cv_folds_file",
                       join(train_dir, cv_folds_file))

    config_prefix = re.search(r'^(.*)\.template\.cfg',
                              config_template_path).groups()[0]
    new_config_path = '{}.cfg'.format(config_prefix)

    with open(new_config_path, 'w') as new_config_file:
        config.write(new_config_file)

    return new_config_path
Exemplo n.º 7
0
def fill_in_config_paths(config_template_path):
    '''
    Add paths to train, test, and output directories to a given config template
    file.
    '''
    train_dir = os.path.join(_my_dir, 'train')
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    test_dir = os.path.join(_my_dir, 'test')
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)
    output_dir = os.path.join(_my_dir, 'output')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    config = _setup_config_parser(config_template_path)

    task = config.get("General", "task")
    #experiment_name = config.get("General", "experiment_name")

    config.set("Input", "train_location", train_dir)

    to_fill_in = ['log', 'vocabs', 'predictions']

    if task != 'cross_validate':
        to_fill_in.append('models')

    if task == 'evaluate' or task == 'cross_validate':
        to_fill_in.append('results')

    for d in to_fill_in:
        config.set("Output", d, os.path.join(output_dir))

    if task == 'cross_validate':
        cv_folds_location = config.get("Input", "cv_folds_location")
        if cv_folds_location:
            config.set("Input", "cv_folds_location",
                       os.path.join(train_dir, cv_folds_location))

    if task == 'predict' or task == 'evaluate':
        config.set("Input", "test_location", test_dir)

    config_prefix = re.search(r'^(.*)\.template\.cfg',
                              config_template_path).groups()[0]
    new_config_path = '{}.cfg'.format(config_prefix)

    with open(new_config_path, 'w') as new_config_file:
        config.write(new_config_file)

    return new_config_path
Exemplo n.º 8
0
def test_int_labels():
    """
    Testing that SKLL can take integer input.
    This is just to test that SKLL can take int labels in the input
    (rather than floats or strings).  For v1.0.0, it could not because the
    json package doesn't know how to serialize numpy.int64 objects.
    """
    config_template_path = join(_my_dir, 'configs',
                                'test_int_labels_cv.template.cfg')
    config_path = join(_my_dir, 'configs', 'test_int_labels_cv.cfg')
    output_dir = join(_my_dir, 'output')

    config = _setup_config_parser(config_template_path)
    config.set("Input", "train_file",
               join(_my_dir, 'other', 'test_int_labels_cv.jsonlines'))
    config.set("Output", "results", output_dir)
    config.set("Output", "log", output_dir)
    config.set("Output", "predictions", output_dir)

    with open(config_path, 'w') as new_config_file:
        config.write(new_config_file)

    run_configuration(config_path, quiet=True)
Exemplo n.º 9
0
def test_int_labels():
    """
    Testing that SKLL can take integer input.
    This is just to test that SKLL can take int labels in the input
    (rather than floats or strings).  For v1.0.0, it could not because the
    json package doesn't know how to serialize numpy.int64 objects.
    """
    config_template_path = join(_my_dir, 'configs',
                                'test_int_labels_cv.template.cfg')
    config_path = join(_my_dir, 'configs', 'test_int_labels_cv.cfg')
    output_dir = join(_my_dir, 'output')

    config = _setup_config_parser(config_template_path)
    config.set("Input", "train_file",
               join(_my_dir, 'other', 'test_int_labels_cv.jsonlines'))
    config.set("Output", "results", output_dir)
    config.set("Output", "log", output_dir)
    config.set("Output", "predictions", output_dir)

    with open(config_path, 'w') as new_config_file:
        config.write(new_config_file)

    run_configuration(config_path, quiet=True)