Exemplo n.º 1
0
def classify_file(train_inst_fns,
                  test_inst_fns,
                  out_fns=None,
                  log_fn=None,
                  clas_dir=None,
                  descriptor=None,
                  timbl=None,
                  options="",
                  log=False):
    """
    Classify instances using Timbl
    
    @param train_inst_fns: a list of instance filenames for training

    @param test_inst_fns: a list of instance filenames for testing
    
    @keyword out_fns: list of classifier output files to be created
    
    @keyword log_fn: classifier log file to be created;
    ignored if keyword log is false
    
    @keyword clas_dir: directory for creating classifier output files; ignored
    if out_fns is given
    
    @keyword descriptor: a Descriptor instance, required to infer the feature
    metrics for Timbl, unless a TimblFile is supplied; ignored if timbl is
    supplied
    
    @keyword timbl: a tailored TimblFile instance; notice that it must at
    least set the verbosity options +vo, +vdb, +vdi, and the -m option to
    specify that the administrative features must be ignored.
    
    @keyword options: list of additional Timbl options, excluding -f, -m, +vo,
    +vdb, +vdi    
    
    @keyword log: log Timbl's standard output and error streams to file
    
    @return: list of Timbl output filenames
    """
    if clas_dir:
        makedirs(clas_dir)

    if not timbl:
        assert descriptor
        timbl = TimblFile(default_opts=timbl_options_string(descriptor))
    else:
        # ignore descriptor
        assert isinstance(timbl, TimblFile)
        assert "+vo" in timbl.default_opts
        assert "+vdb" in timbl.default_opts
        assert "+vdi" in timbl.default_opts
        assert "-m" in timbl.default_opts

    return timbl.train_test_multi(train_inst_fns,
                                  test_inst_fns,
                                  out_fns=out_fns,
                                  log_fn=log_fn,
                                  options=options,
                                  log=log,
                                  out_dir=clas_dir)
Exemplo n.º 2
0
def dump_inst_base(config):
    from daeso_nl.ga.classifier import timbl_options_string
    from tt.timblfile import TimblFile
    
    # determine Timbl options
    descriptor = getattr(config, "descriptor_inst",
                         Descriptor(config.features))
    
    timbl_opts = getattr(config, "timbl_opts", None)
    options = timbl_options_string(descriptor, 
                                   other=timbl_opts)
    
    # determine filenames
    inst_fname = _abspath(config, "timbl_inst_fname")
    assert inst_fname
    inst_base_fname = _abspath(config, "timbl_ib_fname")
    if not inst_base_fname:
        inst_base_fname = splitext(inst_fname)[0] + ".ib"
    
    # dump instance base file
    timbl = TimblFile()
    timbl.train(
        inst_fname, 
        inst_base_fname, 
        options=options)
Exemplo n.º 3
0
def classify_file(train_inst_fns,
                  test_inst_fns,
                  out_fns=None,
                  log_fn=None,
                  clas_dir=None,
                  descriptor=None,
                  timbl=None,
                  options="",
                  log=False):
    """
    Classify instances using Timbl
    
    @param train_inst_fns: a list of instance filenames for training

    @param test_inst_fns: a list of instance filenames for testing
    
    @keyword out_fns: list of classifier output files to be created
    
    @keyword log_fn: classifier log file to be created;
    ignored if keyword log is false
    
    @keyword clas_dir: directory for creating classifier output files; ignored
    if out_fns is given
    
    @keyword descriptor: a Descriptor instance, required to infer the feature
    metrics for Timbl, unless a TimblFile is supplied; ignored if timbl is
    supplied
    
    @keyword timbl: a tailored TimblFile instance; notice that it must at
    least set the verbosity options +vo, +vdb, +vdi, and the -m option to
    specify that the administrative features must be ignored.
    
    @keyword options: list of additional Timbl options, excluding -f, -m, +vo,
    +vdb, +vdi    
    
    @keyword log: log Timbl's standard output and error streams to file
    
    @return: list of Timbl output filenames
    """
    if clas_dir:
        makedirs(clas_dir)
        
    if not timbl:
        assert descriptor
        timbl = TimblFile(default_opts=timbl_options_string(descriptor))
    else:
        # ignore descriptor
        assert isinstance(timbl, TimblFile)
        assert "+vo" in timbl.default_opts
        assert "+vdb" in timbl.default_opts
        assert "+vdi" in timbl.default_opts
        assert "-m" in timbl.default_opts
    
    return timbl.train_test_multi(train_inst_fns, test_inst_fns,
                                  out_fns=out_fns, log_fn=log_fn, options=options, log=log,
                                  out_dir=clas_dir)
Exemplo n.º 4
0
def classify_file_cv(inst_fns,
                     test_inst_fns=None,
                     out_fns=None,
                     log_fns=None,
                     clas_dir=None,
                     descriptor=None,
                     timbl=None,
                     options="",
                     n=None,
                     log=False):
    """
    Classify instance using Timbl in a cross-validation procedure.
    
    @param inst_fns: a list of instance filenames for training; if no
    test_inst_fns is supplied, the same files will be used for testing,
    otherwise they are used for training only
    
    @keyword test_inst_fns: a list of instance filenames for testing; this
    allows for down-sampling of the training instances without affecting the
    test instances
    
    @keyword out_fns: list of classifier output files to be created
    
    @keyword log_fns: list of classifier log files to be created; 
    ignored if keyword log is false
    
    @keyword clas_dir: directory for creating classifier output files; ignored
    if out_fns is given
    
    @keyword descriptor: a Descriptor instance, required to infer the feature
    metrics for Timbl, unless a TimblFile is supplied; ignored if timbl is
    supplied
    
    @keyword timbl: a tailored TimblFile instance; notice that it must at
    least set the verbosity options +vo, +vdb, +vdi, and the -m option to
    specify that the administrative features must be ignored.
    
    @keyword options: list of additional Timbl options, excluding -f, -m, +vo,
    +vdb, +vdi
    
    @keyword n: limit on the number of cross-validations performed (by default
    equals the number of instance filenames)
    
    @keyword log: log Timbl's standard output and error streams to file
    
    @return: list of Timbl output filenames
    """
    if clas_dir:
        makedirs(clas_dir)

    if not timbl:
        assert descriptor
        timbl = TimblFile(default_opts=timbl_options_string(descriptor))
    else:
        # ignore descriptor
        assert isinstance(timbl, TimblFile)
        assert "+vo" in timbl.default_opts
        assert "+vdb" in timbl.default_opts
        assert "+vdi" in timbl.default_opts
        assert "-m" in timbl.default_opts

    return timbl.cross_validate(inst_fns,
                                test_inst_fns=test_inst_fns,
                                out_fns=out_fns,
                                log_fns=log_fns,
                                options=options,
                                n=n,
                                log=log,
                                out_dir=clas_dir)
Exemplo n.º 5
0
def classify_file_cv(inst_fns,
                     test_inst_fns=None,
                     out_fns=None,
                     log_fns=None,
                     clas_dir=None,
                     descriptor=None,
                     timbl=None,
                     options="",
                     n=None,
                     log=False):
    """
    Classify instance using Timbl in a cross-validation procedure.
    
    @param inst_fns: a list of instance filenames for training; if no
    test_inst_fns is supplied, the same files will be used for testing,
    otherwise they are used for training only
    
    @keyword test_inst_fns: a list of instance filenames for testing; this
    allows for down-sampling of the training instances without affecting the
    test instances
    
    @keyword out_fns: list of classifier output files to be created
    
    @keyword log_fns: list of classifier log files to be created; 
    ignored if keyword log is false
    
    @keyword clas_dir: directory for creating classifier output files; ignored
    if out_fns is given
    
    @keyword descriptor: a Descriptor instance, required to infer the feature
    metrics for Timbl, unless a TimblFile is supplied; ignored if timbl is
    supplied
    
    @keyword timbl: a tailored TimblFile instance; notice that it must at
    least set the verbosity options +vo, +vdb, +vdi, and the -m option to
    specify that the administrative features must be ignored.
    
    @keyword options: list of additional Timbl options, excluding -f, -m, +vo,
    +vdb, +vdi
    
    @keyword n: limit on the number of cross-validations performed (by default
    equals the number of instance filenames)
    
    @keyword log: log Timbl's standard output and error streams to file
    
    @return: list of Timbl output filenames
    """
    if clas_dir:
        makedirs(clas_dir)
        
    if not timbl:
        assert descriptor
        timbl = TimblFile(default_opts=timbl_options_string(descriptor))
    else:
        # ignore descriptor
        assert isinstance(timbl, TimblFile)
        assert "+vo" in timbl.default_opts
        assert "+vdb" in timbl.default_opts
        assert "+vdi" in timbl.default_opts
        assert "-m" in timbl.default_opts
    
    return timbl.cross_validate(inst_fns, test_inst_fns=test_inst_fns,
                                out_fns=out_fns, log_fns=log_fns, options=options, n=n, log=log,
                                out_dir=clas_dir)