示例#1
0
 def check_options(cls, options):
     """
     Normally, options are validated when the tagger is instantiated. 
     This allows you to check them before that.
     
     """
     return ModuleOption.process_option_dict(options, cls.TAGGER_OPTIONS)
示例#2
0
 def check_options(cls, options):
     """
     Normally, options are validated when the tagger is instantiated. 
     This allows you to check them before that.
     
     """
     return ModuleOption.process_option_dict(options, cls.TAGGER_OPTIONS)
示例#3
0
 def process_option_dict(cls, options):
     """
     Verifies and processes the training option values. Returns the 
     processed dict.
     
     """
     return ModuleOption.process_option_dict(options, cls.OPTIONS)
示例#4
0
 def process_option_dict(cls, options):
     """
     Verifies and processes the training option values. Returns the 
     processed dict.
     
     """
     return ModuleOption.process_option_dict(options, cls.OPTIONS)
示例#5
0
    def __init__(self,
                 grammar,
                 tagger,
                 options={},
                 backoff=None,
                 backoff_options={},
                 logger=None):
        """
        @param grammar: the L{jazzparser.grammar.Grammar} instance to use for 
            parsing
        @param tagger: the L{jazzparser.taggers.tagger.Tagger} subclass 
            instance to use to tag the input
        @param backoff: an optional 
            L{jazzparser.backoff.base.BackoffBuilder} class 
            to use as a fallback if the parser returns no parses. Whether 
            this is used and in what circumstances depends on the type of 
            parser.
        @param backoff_options: dictionary of options to pass to the backoff 
            model if it gets used.
        @type logger: C{logging.Logger}
        @param logger: a logger to which all progress information during 
            parsing will be written. By default, outputs to stderr.
        
        """
        self.grammar = grammar
        self.tagger = tagger
        self.backoff_options = backoff_options
        if backoff is not None:
            # Look up the backoff model if one is requested
            self.backoff = backoff
            # Pre-check the options dict
            # This will be done again by the module when instantiated, but
            #  we do it now to verify the options
            ModuleOption.process_option_dict(backoff_options,
                                             backoff.BUILDER_OPTIONS)
        else:
            self.backoff = None
        # Initialize using parser-specific options
        self.options = type(self).check_options(options)

        if logger is None:
            # Output to stderr instead
            self.logger = create_plain_stderr_logger()
        else:
            self.logger = logger

        self.timed_out = False
示例#6
0
 def process_training_options(self):
     """
     Verifies and processes the training option values. Access them in 
     self.options.
     
     """
     self._options = ModuleOption.process_option_dict(
         self._options_dict, self.TRAINING_OPTIONS)
示例#7
0
 def check_options(cls, options):
     """
     In normal parser usage, the options dictionary is checked for 
     validity when the parser is instantiated. In this interface, you may 
     want to check the options before this point using this method.
     
     """
     return ModuleOption.process_option_dict(options, cls.PARSER_OPTIONS)
示例#8
0
 def process_training_options(self):
     """
     Verifies and processes the training option values. Access them in 
     self.options.
     
     """
     self._options = ModuleOption.process_option_dict(self._options_dict, 
                                                      self.TRAINING_OPTIONS)
示例#9
0
 def check_options(cls, options):
     """
     In normal parser usage, the options dictionary is checked for 
     validity when the parser is instantiated. In this interface, you may 
     want to check the options before this point using this method.
     
     """
     return ModuleOption.process_option_dict(options, cls.PARSER_OPTIONS)
示例#10
0
    def __init__(self, model, options={}):
        self.model = model
        # Check this model is of one of the types we can train
        if type(model) not in self.MODEL_TYPES:
            raise RaphstoHmmParameterError, "trainer %s cannot train a model "\
                "of type %s" % (type(self).__name__, type(model).__name__)

        self.options = ModuleOption.process_option_dict(options, self.OPTIONS)
        self.model_cls = type(model)
示例#11
0
 def __init__(self, model, options={}):
     self.model = model
     # Check this model is of one of the types we can train
     if type(model) not in self.MODEL_TYPES:
         raise RaphstoHmmParameterError, "trainer %s cannot train a model "\
             "of type %s" % (type(self).__name__, type(model).__name__)
     
     self.options = ModuleOption.process_option_dict(options, self.OPTIONS)
     self.model_cls = type(model)
示例#12
0
 def process_output_options(cls, optdict):
     """
     Makes output options globally available, based on a dictionary.
     
     @see: L{output_options}.
     
     """
     formalism = cls.get_name()
     opts = ModuleOption.process_option_dict(optdict, cls.output_options)
     settings.OPTIONS.OUTPUT[formalism] = opts
示例#13
0
 def __init__(cls, name, bases, dict):
     # Skip all this when the base class if created
     if name != "FormalismBase":
         # Initialize all the output options
         # If they're never set by whatever script is running, this
         #  ensures that their default values are available
         formalism = cls.get_name()
         opts = ModuleOption.process_option_dict({}, cls.output_options)
         # Store this so it's globally available to the formalism
         settings.OPTIONS.OUTPUT[formalism] = opts
示例#14
0
 def __init__(cls, name, bases, dict):
     # Skip all this when the base class if created
     if name != "FormalismBase":
         # Initialize all the output options
         # If they're never set by whatever script is running, this 
         #  ensures that their default values are available
         formalism = cls.get_name()
         opts = ModuleOption.process_option_dict({}, cls.output_options)
         # Store this so it's globally available to the formalism
         settings.OPTIONS.OUTPUT[formalism] = opts
示例#15
0
 def process_output_options(cls, optdict):
     """
     Makes output options globally available, based on a dictionary.
     
     @see: L{output_options}.
     
     """
     formalism = cls.get_name()
     opts = ModuleOption.process_option_dict(optdict, cls.output_options)
     settings.OPTIONS.OUTPUT[formalism] = opts
示例#16
0
 def __init__(self, grammar, tagger, options={}, backoff=None, 
                 backoff_options={}, logger=None):
     """
     @param grammar: the L{jazzparser.grammar.Grammar} instance to use for 
         parsing
     @param tagger: the L{jazzparser.taggers.tagger.Tagger} subclass 
         instance to use to tag the input
     @param backoff: an optional 
         L{jazzparser.backoff.base.BackoffBuilder} class 
         to use as a fallback if the parser returns no parses. Whether 
         this is used and in what circumstances depends on the type of 
         parser.
     @param backoff_options: dictionary of options to pass to the backoff 
         model if it gets used.
     @type logger: C{logging.Logger}
     @param logger: a logger to which all progress information during 
         parsing will be written. By default, outputs to stderr.
     
     """
     self.grammar = grammar
     self.tagger = tagger
     self.backoff_options = backoff_options
     if backoff is not None:
         # Look up the backoff model if one is requested
         self.backoff = backoff
         # Pre-check the options dict
         # This will be done again by the module when instantiated, but 
         #  we do it now to verify the options
         ModuleOption.process_option_dict(backoff_options, 
                                          backoff.BUILDER_OPTIONS)
     else:
         self.backoff = None
     # Initialize using parser-specific options
     self.options = type(self).check_options(options)
     
     if logger is None:
         # Output to stderr instead
         self.logger = create_plain_stderr_logger()
     else:
         self.logger = logger
     
     self.timed_out = False
示例#17
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a backoff builder model using the given "\
        "input data. Specify a model type (ngram, etc) and a name to "\
        "identify it. The data file should be a stored SequenceIndex file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    options, arguments = parse_args_with_config(parser)
    
    if len(arguments) < 3:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_type = arguments[0]
    model_name = arguments[1]
    
    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS
    
    # Load the sequence data from a dbinput file
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=['bulk-db', 'bulk-db-annotated'])
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
                            ModuleOption.process_option_string(options.training_opts), 
                            model_cls.TRAINING_OPTIONS)
        
    if options.partitions is not None:
        parts = holdout_partition(input_data, options.partitions)
        models = [(builder_cls.partition_model_name(model_name,num),seqs) for \
                        num,seqs in enumerate(parts)]
    else:
        models = [(model_name,input_data)]
    
    for part_name,seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        # Train it with the loaded data
        model.train(seqs)
        model.save()
        print "Trained model %s" % (part_name)
示例#18
0
def main():
    usage = "%prog [<options>] <model-name> <training-input>"
    description = "Training of PCFG models."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \
        help="Number of partitions to divide the data into. "\
            "For train, divides the input file, trains a model on each "\
            "partition's complement and appends partition number to "\
            "the model names. For del, appends partition numbers to model "\
            "names and deletes all the models. Recache does similarly. "\
            "Has no effect for parse.")
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options"
    )
    parser.add_option("--debug",
                      dest="debug",
                      action="store_true",
                      help="Output verbose logging information to stderr")
    parser.add_option("-g",
                      "--grammar",
                      dest="grammar",
                      action="store",
                      help="use the named grammar instead of the default.")
    options, arguments = parse_args_with_config(parser)

    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.WARN
    # Create a logger for training
    logger = create_logger(log_level=log_level, name="training", stderr=True)

    # Load a grammar
    grammar = get_grammar(options.grammar)
    # Get the pcfg model class for the formalism
    PcfgModel = grammar.formalism.PcfgModel

    # Parse the option string
    if options.training_opts is None:
        opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(PcfgModel.TRAINING_OPTIONS,
                                intro="Training options for PCFGs")
        sys.exit(0)
    else:
        opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            PcfgModel.TRAINING_OPTIONS)

    if len(arguments) == 0:
        print >> sys.stderr, "Specify a model name"
        models = PcfgModel.list_models()
        print >> sys.stderr, "Available models: %s" % ", ".join(models)
        sys.exit(1)
    model_name = arguments[0]
    print "Model base name:", model_name

    if options.partitions is not None:
        parts = [(i, "%s%d" % (model_name, i))
                 for i in range(options.partitions)]
    else:
        parts = [(None, model_name)]

    if len(arguments) < 2:
        print >> sys.stderr, "Specify an input file to read sequence data from"
        sys.exit(1)
    # Read in the training data from the given file
    seqs = SequenceIndex.from_file(arguments[1])

    if options.partitions is not None:
        # Prepare each training partition
        datasets = holdout_partition(seqs.sequences, options.partitions)
    else:
        datasets = [seqs.sequences]

    for dataset, (parti, part_model) in zip(datasets, parts):
        # Train the named model on the sequence data
        model = PcfgModel.train(part_model,
                                dataset,
                                opts,
                                grammar=grammar,
                                logger=logger)
        model.save()
        print "Trained model", part_model
示例#19
0
 def process_option_dict(cls, optdict):
     return ModuleOption.process_option_dict(optdict,
                                             cls.FILE_INPUT_OPTIONS)
示例#20
0
def main():
    usage = "%prog [<options>] <model-name> <training-input>"
    description = "Training of PCFG models."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-p", "--partitions", dest="partitions", action="store", type="int", \
        help="Number of partitions to divide the data into. "\
            "For train, divides the input file, trains a model on each "\
            "partition's complement and appends partition number to "\
            "the model names. For del, appends partition numbers to model "\
            "names and deletes all the models. Recache does similarly. "\
            "Has no effect for parse.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options")
    parser.add_option("--debug", dest="debug", action="store_true", help="Output verbose logging information to stderr")
    parser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    options, arguments = parse_args_with_config(parser)
    
    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.WARN
    # Create a logger for training
    logger = create_logger(log_level = log_level,
                  name = "training",
                  stderr = True)
    
    # Load a grammar
    grammar = get_grammar(options.grammar)
    # Get the pcfg model class for the formalism
    PcfgModel = grammar.formalism.PcfgModel
        
    # Parse the option string
    if options.training_opts is None:
        opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(PcfgModel.TRAINING_OPTIONS, 
                                            intro="Training options for PCFGs")
        sys.exit(0)
    else:
        opts = ModuleOption.process_option_dict(
                    ModuleOption.process_option_string(options.training_opts),
                    PcfgModel.TRAINING_OPTIONS)
    
    if len(arguments) == 0:
        print >>sys.stderr, "Specify a model name"
        models = PcfgModel.list_models()
        print >>sys.stderr, "Available models: %s" % ", ".join(models)
        sys.exit(1)
    model_name = arguments[0]
    print "Model base name:", model_name
    
    if options.partitions is not None:
        parts = [(i, "%s%d" % (model_name, i)) for i in range(options.partitions)]
    else:
        parts = [(None, model_name)]
    
    if len(arguments) < 2:
        print >>sys.stderr, "Specify an input file to read sequence data from"
        sys.exit(1)
    # Read in the training data from the given file
    seqs = SequenceIndex.from_file(arguments[1])
    
    if options.partitions is not None:
        # Prepare each training partition
        datasets = holdout_partition(seqs.sequences, options.partitions)
    else:
        datasets = [seqs.sequences]
        
    for dataset,(parti,part_model) in zip(datasets,parts):
        # Train the named model on the sequence data
        model = PcfgModel.train(part_model, dataset, opts, grammar=grammar, 
                                logger=logger)
        model.save()
        print "Trained model", part_model
示例#21
0
 def check_options(cls, options):
     return ModuleOption.process_option_dict(options, cls.BUILDER_OPTIONS)
示例#22
0
 def process_option_list(self, options):
     optdict = ModuleOption.process_option_string(options)
     self.options = ModuleOption.process_option_dict(
         optdict, self.tool_options)
示例#23
0
 def check_options(cls, options):
     return ModuleOption.process_option_dict(options, cls.BUILDER_OPTIONS)
 def process_option_dict(cls, optdict):
     return ModuleOption.process_option_dict(optdict, cls.FILE_INPUT_OPTIONS)
示例#25
0
 def __init__(self, options={}):
     self.options = ModuleOption.process_option_dict(options, self.OPTIONS)
示例#26
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a backoff builder model using the given "\
        "input data. Specify a model type (ngram, etc) and a name to "\
        "identify it. The data file should be a stored SequenceIndex file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_type = arguments[0]
    model_name = arguments[1]

    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS

    # Load the sequence data from a dbinput file
    input_data = command_line_input(
        filename=filename,
        filetype=options.filetype,
        options=options.file_options,
        allowed_types=['bulk-db', 'bulk-db-annotated'])

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    if options.partitions is not None:
        parts = holdout_partition(input_data, options.partitions)
        models = [(builder_cls.partition_model_name(model_name,num),seqs) for \
                        num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        # Train it with the loaded data
        model.train(seqs)
        model.save()
        print "Trained model %s" % (part_name)
示例#27
0
 def process_option_list(self, options):
     optdict = ModuleOption.process_option_string(options)
     self.options = ModuleOption.process_option_dict(optdict, self.tool_options)
示例#28
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Logging output
    parser.add_option(
        '--log',
        dest="log",
        action="store",
        help=
        "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end"
    )
    options, arguments = parse_args_with_config(parser)

    grammar = Grammar()

    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]

    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)

    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >> sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (
            tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    # Get the rest of the args
    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(
                                        single=False, bulk=True))

    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None

        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
示例#29
0
 def process_labeling_options(opts):
     """ Verifies and processes the labeling option values (dict). """
     return ModuleOption.process_option_dict(opts, HPChordLabeler.LABELING_OPTIONS)
示例#30
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Logging output
    parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end")
    options, arguments = parse_args_with_config(parser)
    
    grammar = Grammar()
    
    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]
    
    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)
    
    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >>sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
                            ModuleOption.process_option_string(options.training_opts), 
                            model_cls.TRAINING_OPTIONS)
    
    # Get the rest of the args
    if len(arguments) < 3:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(single=False, bulk=True))
    
    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name,input_data)]
    
    for part_name,seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None
            
        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
示例#31
0
 def process_training_options(opts):
     """ Verifies and processes the training option values. """
     return ModuleOption.process_option_dict(opts, HPChordLabeler.TRAINING_OPTIONS)
示例#32
0
 def __init__(self, options={}):
     self.options = ModuleOption.process_option_dict(options, self.OPTIONS)