示例#1
0
def classify_file(train_inst_fns,
                  test_inst_fns,
                  out_fns=None,
                  log_fn=None,
                  clas_dir=None,
                  descriptor=None,
                  timbl=None,
                  options="",
                  log=False):
    """
    Classify instances using Timbl
    
    @param train_inst_fns: a list of instance filenames for training

    @param test_inst_fns: a list of instance filenames for testing
    
    @keyword out_fns: list of classifier output files to be created
    
    @keyword log_fn: classifier log file to be created;
    ignored if keyword log is false
    
    @keyword clas_dir: directory for creating classifier output files; ignored
    if out_fns is given
    
    @keyword descriptor: a Descriptor instance, required to infer the feature
    metrics for Timbl, unless a TimblFile is supplied; ignored if timbl is
    supplied
    
    @keyword timbl: a tailored TimblFile instance; notice that it must at
    least set the verbosity options +vo, +vdb, +vdi, and the -m option to
    specify that the administrative features must be ignored.
    
    @keyword options: list of additional Timbl options, excluding -f, -m, +vo,
    +vdb, +vdi    
    
    @keyword log: log Timbl's standard output and error streams to file
    
    @return: list of Timbl output filenames
    """
    if clas_dir:
        makedirs(clas_dir)

    if not timbl:
        assert descriptor
        timbl = TimblFile(default_opts=timbl_options_string(descriptor))
    else:
        # ignore descriptor
        assert isinstance(timbl, TimblFile)
        assert "+vo" in timbl.default_opts
        assert "+vdb" in timbl.default_opts
        assert "+vdi" in timbl.default_opts
        assert "-m" in timbl.default_opts

    return timbl.train_test_multi(train_inst_fns,
                                  test_inst_fns,
                                  out_fns=out_fns,
                                  log_fn=log_fn,
                                  options=options,
                                  log=log,
                                  out_dir=clas_dir)
示例#2
0
def classify_file_cv(inst_fns,
                     test_inst_fns=None,
                     out_fns=None,
                     log_fns=None,
                     clas_dir=None,
                     descriptor=None,
                     timbl=None,
                     options="",
                     n=None,
                     log=False):
    """
    Classify instance using Timbl in a cross-validation procedure.
    
    @param inst_fns: a list of instance filenames for training; if no
    test_inst_fns is supplied, the same files will be used for testing,
    otherwise they are used for training only
    
    @keyword test_inst_fns: a list of instance filenames for testing; this
    allows for down-sampling of the training instances without affecting the
    test instances
    
    @keyword out_fns: list of classifier output files to be created
    
    @keyword log_fns: list of classifier log files to be created; 
    ignored if keyword log is false
    
    @keyword clas_dir: directory for creating classifier output files; ignored
    if out_fns is given
    
    @keyword descriptor: a Descriptor instance, required to infer the feature
    metrics for Timbl, unless a TimblFile is supplied; ignored if timbl is
    supplied
    
    @keyword timbl: a tailored TimblFile instance; notice that it must at
    least set the verbosity options +vo, +vdb, +vdi, and the -m option to
    specify that the administrative features must be ignored.
    
    @keyword options: list of additional Timbl options, excluding -f, -m, +vo,
    +vdb, +vdi
    
    @keyword n: limit on the number of cross-validations performed (by default
    equals the number of instance filenames)
    
    @keyword log: log Timbl's standard output and error streams to file
    
    @return: list of Timbl output filenames
    """
    if clas_dir:
        makedirs(clas_dir)

    if not timbl:
        assert descriptor
        timbl = TimblFile(default_opts=timbl_options_string(descriptor))
    else:
        # ignore descriptor
        assert isinstance(timbl, TimblFile)
        assert "+vo" in timbl.default_opts
        assert "+vdb" in timbl.default_opts
        assert "+vdi" in timbl.default_opts
        assert "-m" in timbl.default_opts

    return timbl.cross_validate(inst_fns,
                                test_inst_fns=test_inst_fns,
                                out_fns=out_fns,
                                log_fns=log_fns,
                                options=options,
                                n=n,
                                log=log,
                                out_dir=clas_dir)