Exemplo n.º 1
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a backoff builder model using the given "\
        "input data. Specify a model type (ngram, etc) and a name to "\
        "identify it. The data file should be a stored SequenceIndex file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    options, arguments = parse_args_with_config(parser)
    
    if len(arguments) < 3:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_type = arguments[0]
    model_name = arguments[1]
    
    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS
    
    # Load the sequence data from a dbinput file
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=['bulk-db', 'bulk-db-annotated'])
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
                            ModuleOption.process_option_string(options.training_opts), 
                            model_cls.TRAINING_OPTIONS)
        
    if options.partitions is not None:
        parts = holdout_partition(input_data, options.partitions)
        models = [(builder_cls.partition_model_name(model_name,num),seqs) for \
                        num,seqs in enumerate(parts)]
    else:
        models = [(model_name,input_data)]
    
    for part_name,seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        # Train it with the loaded data
        model.train(seqs)
        model.save()
        print "Trained model %s" % (part_name)
Exemplo n.º 2
0
def main():
    usage = "%prog <model-type> <command> [<command-arg> ...]"
    description = "Provides administrative operations for trained "\
                "backoff models. Use the 'help' command to get a list "\
                "of commands or command usage."
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()
    
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model type and a command"
        print >>sys.stderr, "Available model types: %s" % ", ".join(BUILDERS.keys())
        print >>sys.stderr, "Use the command 'help' for details of available commands"
        sys.exit(1)
    model_type = arguments[0]
    command = arguments[1].lower()
    command_args = arguments[2:]
    
    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS
    
    def _load_model(name):
        # Load the model from its file
        return model_cls.load_model(name)
        
    # Define behaviour for each command
    list_help = "Lists all the trained models available"
    def _list(args):
        # List the available models for the given model type
        models = model_cls.list_models()
        print "Available models for %s:" % model_cls.MODEL_TYPE
        print ", ".join(list(sorted(group_numerical_suffixes(models))))
        
    desc_help = "Outputs the descriptive text associated with the model at training time"
    def _desc(args):
        if len(args) == 0:
            raise CommandError, "desc requires a model name as an argument"
        try:
            model = _load_model(args[0])
        except ModelLoadError, err:
            # Try loading a model with 0 on the end - allows you to use 
            #  just the base name for crossval models
            try:
                print >>sys.stderr, "No model %s, but %s0 does exist\n" % (args[0], args[0])
                model = _load_model("%s0" % args[0])
            except ModelLoadError:
                raise err
        print "Model descriptor"
        print "================"
        print model.description
Exemplo n.º 3
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a backoff builder model using the given "\
        "input data. Specify a model type (ngram, etc) and a name to "\
        "identify it. The data file should be a stored SequenceIndex file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_type = arguments[0]
    model_name = arguments[1]

    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS

    # Load the sequence data from a dbinput file
    input_data = command_line_input(
        filename=filename,
        filetype=options.filetype,
        options=options.file_options,
        allowed_types=['bulk-db', 'bulk-db-annotated'])

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    if options.partitions is not None:
        parts = holdout_partition(input_data, options.partitions)
        models = [(builder_cls.partition_model_name(model_name,num),seqs) for \
                        num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        # Train it with the loaded data
        model.train(seqs)
        model.save()
        print "Trained model %s" % (part_name)
Exemplo n.º 4
0
 # Check that the options are valid
 try:
     tagger_cls.check_options(topts)
 except ModuleOptionError, err:
     logger.error("Problem with tagger options (--topt): %s" % err)
     return 1
 
 ######## Backoff ########
 # Load the requested backoff model, if any
 if options.backoff is not None:
     from jazzparser.backoff import BUILDERS
     if options.backoff.lower() == "help":
         print "Available backoff model types are: %s" % ", ".join(BUILDERS)
         return 0
     try:
         backoff = get_backoff_builder(options.backoff)
     except BackoffLoadError:
         logger.error("The backoff model '%s' could not be loaded. Possible "\
             "models are: %s" % (options.backoff, ", ".join(BUILDERS)))
         return 1
 else:
     backoff = None
     
 # Get backoff options for initializing the backoff model
 if options.backoff_opts is not None:
     npoptstr = options.backoff_opts
     if "help" in [s.strip().lower() for s in npoptstr]:
         # Output this tagger's option help
         from jazzparser.utils.options import options_help_text
         print options_help_text(backoff.BUILDER_OPTIONS, intro="Available options for selected backoff module")
         return 0