def from_config(cls, config, name, section_key="extractors"): section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: Class = yamlconf.import_module(section['class']) return Class.from_config(config, name)
def from_config(cls, config, name, section_key="tokenizers"): section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) else: Tokenizer = yamlconf.import_module(section['class']) return Tokenizer.from_config(config, name, section_key)
def from_config(self, config, name, section_key="languages"): section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: Class = yamlconf.import_module(section['class']) return Class.from_config(config, section_key)
def from_config(cls, config, name, section_key="score_caches"): logger.info("Loading ScoreCache '{0}' from config.".format(name)) section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: Class = yamlconf.import_module(section['class']) return Class.from_config(config, name)
def from_config(cls, config, name, section_key='scorer_models'): section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: class_path = section['class'] Class = yamlconf.import_module(class_path) assert cls != Class return Class.from_config(config, name)
def from_config(cls, config, name, section_key="diff_engines"): """ Constructs a :class:`deltas.DiffEngine` from a configuration doc. """ section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) else: Engine = yamlconf.import_module(section['class']) return Engine.from_config(config, name, section_key=section_key)
def from_config(cls, config, name, section_key='scorer_models'): section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: class_path = section['class'] Class = yamlconf.import_module(class_path) assert cls != Class return Class.from_config(config, name, section_key=section_key)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') ScoringModel = yamlconf.import_module(args['<scoring-model>']) features = yamlconf.import_module(args['<features>']) version = args['--version'] estimator_params = {} for parameter in args['--parameter']: key, value = parameter.split("=", 1) estimator_params[key] = json.loads(value) labels, label_weights, population_rates = read_labels_and_population_rates( args['--labels'], args['--label-weight'], args['--pop-rate'], args['--labels-config']) multilabel = False if args['--multilabel']: multilabel = True model = ScoringModel(features, version=version, multilabel=multilabel, labels=labels, label_weights=label_weights, population_rates=population_rates, center=args['--center'], scale=args['--scale'], **estimator_params) if args['--observations'] == "<stdin>": observations = read_observations(sys.stdin) else: observations = read_observations(open(args['--observations'])) label_name = args['<label>'] value_labels = \ [(list(solve(features, cache=ob['cache'])), ob[label_name]) for ob in observations] if args['--model-file'] == "<stdout>": model_file = sys.stdout.buffer else: model_file = open(args['--model-file'], 'wb') folds = int(args['--folds']) workers = int(args['--workers']) if args['--workers'] is not None else None run(value_labels, model_file, model, folds, workers)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) ScoringModel = yamlconf.import_module(args['<scoring-model>']) features = yamlconf.import_module(args['<features>']) version = args['--version'] estimator_params = {} for parameter in args['--parameter']: key, value = parameter.split("=", 1) estimator_params[key] = json.loads(value) labels, label_weights, population_rates = read_labels_and_population_rates( args['--labels'], args['--label-weight'], args['--pop-rate'], args['--labels-config']) multilabel = False if args['--multilabel']: multilabel = True model = ScoringModel( features, version=version, multilabel=multilabel, labels=labels, label_weights=label_weights, population_rates=population_rates, center=args['--center'], scale=args['--scale'], **estimator_params) if args['--observations'] == "<stdin>": observations = read_observations(sys.stdin) else: observations = read_observations(open(args['--observations'])) label_name = args['<label>'] value_labels = \ [(list(solve(features, cache=ob['cache'])), ob[label_name]) for ob in observations] if args['--model-file'] == "<stdout>": model_file = sys.stdout.buffer else: model_file = open(args['--model-file'], 'wb') folds = int(args['--folds']) workers = int(args['--workers']) if args['--workers'] is not None else None run(value_labels, model_file, model, folds, workers)
def from_config(cls, config, name, section_key="score_caches"): try: import yamlconf except ImportError: raise ImportError("Could not find yamlconf. This packages is " + "required when using yaml config files.") logger.info("Loading ScoreCache '{0}' from config.".format(name)) section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: Class = yamlconf.import_module(section['class']) return Class.from_config(config, name)
def from_config(cls, config, name, section_key="metrics_collectors"): try: import yamlconf except ImportError: raise ImportError("Could not find yamlconf. This packages is " + "required when using yaml config files.") logger.info("Loading MetricsCollector '{0}' from config.".format(name)) section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: Class = yamlconf.import_module(section['class']) return Class.from_config(config, name)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') sys.path.insert(0, ".") # Search local directory first ScorerModel = yamlconf.import_module(args['<scorer-model>']) features = yamlconf.import_module(args['<features>']) version = args['--version'] estimator_params = {} for parameter in args['--parameter']: key, value = parameter.split("=") estimator_params[key] = json.loads(value) test_statistics = [] for stat_str in args['--statistic']: test_statistics.append(TestStatistic.from_stat_str(stat_str)) scorer_model = ScorerModel( features, version=version, balanced_sample=args['--balance-sample'], balanced_sample_weight=args['--balance-sample-weight'], center=args['--center'], scale=args['--scale'], **estimator_params) if args['--observations'] == "<stdin>": observations = read_observations(sys.stdin) else: observations = read_observations(open(args['--observations'])) label_name = args['<label>'] value_labels = \ [(list(solve(features, cache=ob['cache'])), ob[label_name]) for ob in observations] if args['--model-file'] == "<stdout>": model_file = sys.stdout.buffer else: model_file = open(args['--model-file'], 'wb') folds = int(args['--folds']) workers = int(args['--workers']) if args['--workers'] is not None else None run(value_labels, model_file, scorer_model, test_statistics, folds, workers)
def from_config(cls, config, name, section_key='scorer_models'): section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: class_path = section['class'] Class = yamlconf.import_module(class_path) if 'model_file' in section: return Class.load(open(section['model_file'], 'rb')) else: return Class( **{k: v for k, v in section.items() if k != "class"})
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) ScorerModel = yamlconf.import_module(args['<scorer_model>']) features = yamlconf.import_module(args['<features>']) version = args['--version'] model_kwargs = {} for parameter in args['--parameter']: key, value = parameter.split("=") model_kwargs[key] = json.loads(value) test_statistics = [] for stat_str in args['--statistic']: test_statistics.append(TestStatistic.from_stat_str(stat_str)) scorer_model = ScorerModel( features, version=version, balanced_sample=args['--balance-sample'], balanced_sample_weight=args['--balance-sample-weight'], center=args['--center'], scale=args['--scale'], **model_kwargs) if args['--values-labels'] == "<stdin>": observations_f = sys.stdin else: observations_f = open(args['--values-labels'], 'r') if args['--model-file'] == "<stdout>": model_file = sys.stdout.buffer else: model_file = open(args['--model-file'], 'wb') decode_label = util.DECODERS[args['--label-type']] observations = util.read_observations(observations_f, scorer_model.features, decode_label) test_prop = float(args['--test-prop']) run(observations, model_file, scorer_model, test_statistics, test_prop)
def from_config(cls, config, name, section_key='scorer_models'): section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: class_path = section['class'] Class = yamlconf.import_module(class_path) if 'model_file' in section: # TODO: Cache the model file for reuse across workers? with open_file(section['model_file']) as stream: return Class.load(stream) else: return Class(**{k: v for k, v in section.items() if k != "class"})
def from_config(cls, config, name, section_key="scoring_systems"): try: import yamlconf except ImportError: raise ImportError("Could not find yamlconf. This packages is " + "required when using yaml config files.") logger.info("Loading ScoreProcessor '{0}' from config.".format(name)) section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: Class = yamlconf.import_module(section['class']) return Class.from_config(config, name) else: raise RuntimeError("No module or class to load.")
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') ScorerModel = yamlconf.import_module(args['<scorer_model>']) features = yamlconf.import_module(args['<features>']) version = args['--version'] model_kwargs = {} for parameter in args['--parameter']: key, value = parameter.split("=") model_kwargs[key] = json.loads(value) test_statistics = [] for stat_str in args['--statistic']: test_statistics.append(TestStatistic.from_stat_str(stat_str)) scorer_model = ScorerModel( features, version=version, balanced_sample=args['--balance-sample'], balanced_sample_weight=args['--balance-sample-weight'], center=args['--center'], scale=args['--scale'], **model_kwargs) if args['--values-labels'] == "<stdin>": observations_f = sys.stdin else: observations_f = open(args['--values-labels'], 'r') if args['--model-file'] == "<stdout>": model_file = sys.stdout.buffer else: model_file = open(args['--model-file'], 'wb') decode_label = util.DECODERS[args['--label-type']] observations = util.read_observations(observations_f, scorer_model.features, decode_label) test_prop = float(args['--test-prop']) run(observations, model_file, scorer_model, test_statistics, test_prop)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.WARNING if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') features = yamlconf.import_module(args['<features>']) session = mwapi.Session(args['--host'], user_agent="Revscoring feature extractor utility") extractor = api.Extractor(session) if args['--rev-labels'] == "<stdin>": rev_labels = read_rev_labels(sys.stdin) else: rev_labels = read_rev_labels(open(args['--rev-labels'])) if args['--value-labels'] == "<stdout>": value_labels = sys.stdout else: value_labels = open(args['--value-labels'], 'w') include_revid = bool(args['--include-revid']) if args['--extractors'] == "<cpu count>": extractors = cpu_count() else: extractors = int(args['--extractors']) verbose = args['--verbose'] debug = args['--debug'] run(rev_labels, value_labels, features, extractor, include_revid, extractors, verbose, debug)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') sys.path.insert(0, ".") # Search local directory first features = yamlconf.import_module(args['<features>']) label_name = args['<label>'] if args['<model>'] is not None: model = Model.load(open(args['<model>'])) else: model = None additional_fields = args['<additional-field>'] if args['--input'] == "<stdin>": observations = read_observations(sys.stdin) else: observations = read_observations(open(args['--input'])) if args['--output'] == "<stdout>": output = sys.stdout else: output = open(args['--output'], 'w') verbose = args['--verbose'] run(observations, output, features, label_name, model, additional_fields, verbose)
def from_config(cls, config, name, section_key="segmenters"): """ Constructs a segmenter from a configuration doc. """ section = config[section_key][name] segmenter_class_path = section['class'] Segmenter = yamlconf.import_module(segmenter_class_path) return Segmenter.from_config(config, name, section_key=section_key)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) observations = read_observations(sys.stdin) sys.path.insert(0, ".") # Search local directory first features = yamlconf.import_module(args['<features>']) label_name = args['<label>'] verbose = args['--verbose'] run(observations, features, label_name, verbose)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) params_config = yamlconf.load(open(args['<params-config>'])) features_path = args['<features>'] features = yamlconf.import_module(features_path) label_decoder = util.DECODERS[args['--label-type']] if args['--observations'] == "<stdin>": observations_f = sys.stdin else: observations_f = open(args['--observations']) observations = util.read_observations(observations_f, features, label_decoder) # Get a sepecialized scorer if we have one scoring = metrics.SCORERS.get(args['--scoring'], args['--scoring']) folds = int(args['--folds']) if args['--report'] == "<stdout>": report = sys.stdout else: report = open(args['--report'], "w") if args['--processes'] == "<cpu-count>": processes = multiprocessing.cpu_count() else: processes = int(args['--processes']) if args['--cv-timeout'] == "<forever>": cv_timeout = None else: cv_timeout = float(args['--cv-timeout']) * 60 # Convert to seconds scale_features = args['--scale-features'] verbose = args['--verbose'] run(params_config, features_path, observations, scoring, folds, report, processes, cv_timeout, scale_features, verbose)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.WARNING if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) features = yamlconf.import_module(args['<features>']) session = mwapi.Session(args['--host'], user_agent="Revscoring feature extractor utility") if args['--login']: sys.stderr.write("Log into " + args['--host'] + "\n") sys.stderr.write("Username: "******"Password: "******"<stdin>": rev_labels = read_rev_labels(sys.stdin) else: rev_labels = read_rev_labels(open(args['--rev-labels'])) if args['--value-labels'] == "<stdout>": value_labels = sys.stdout else: value_labels = open(args['--value-labels'], 'w') include_revid = bool(args['--include-revid']) if args['--extractors'] == "<cpu count>": extractors = cpu_count() else: extractors = int(args['--extractors']) if args['--profile'] is not None: profile_f = open(args['--profile'], 'w') else: profile_f = None verbose = args['--verbose'] debug = args['--debug'] run(rev_labels, value_labels, features, extractor, include_revid, extractors, profile_f, verbose, debug)
def _model_param_grid(params_config): for name, config in params_config.items(): try: Model = yamlconf.import_module(config['class']) except Exception: logger.warn("Could not load model {0}".format(config['class'])) logger.warn("Exception:\n" + traceback.format_exc()) continue if not hasattr(Model, "train"): logger.warn("Model {0} does not have a train() method.".format( config['class'])) continue param_grid = grid_search.ParameterGrid(config['params']) yield name, Model, param_grid
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') params_config = yamlconf.load(open(args['<params-config>'])) features_path = args['<features>'] features = yamlconf.import_module(features_path) label_decoder = util.DECODERS[args['--label-type']] if args['--observations'] == "<stdin>": observations_f = sys.stdin else: observations_f = open(args['--observations']) observations = util.read_observations(observations_f, features, label_decoder) # Get a sepecialized scorer if we have one scoring = metrics.SCORERS.get(args['--scoring'], args['--scoring']) folds = int(args['--folds']) if args['--report'] == "<stdout>": report = sys.stdout else: report = open(args['--report'], "w") if args['--processes'] == "<cpu-count>": processes = multiprocessing.cpu_count() else: processes = int(args['--processes']) if args['--cv-timeout'] == "<forever>": cv_timeout = None else: cv_timeout = float(args['--cv-timeout']) * 60 # Convert to seconds scale_features = args['--scale-features'] verbose = args['--verbose'] run(params_config, features_path, observations, scoring, folds, report, processes, cv_timeout, scale_features, verbose)
def _model_param_grid(params_config): for name, config in params_config.items(): try: Model = yamlconf.import_module(config['class']) except Exception: logger.warn("Could not load model {0}" .format(config['class'])) logger.warn("Exception:\n" + traceback.format_exc()) continue if not hasattr(Model, "train"): logger.warn("Model {0} does not have a train() method." .format(config['class'])) continue param_grid = grid_search.ParameterGrid(config['params']) yield name, Model, param_grid
def _estimator_param_grid(params_config): for name, config in params_config.items(): try: EstimatorClass = yamlconf.import_module(config['class']) estimator = EstimatorClass() except Exception: logger.warn("Could not load estimator {0}".format(config['class'])) logger.warn("Exception:\n" + traceback.format_exc()) continue if not hasattr(estimator, "fit"): logger.warn("Estimator {0} does not have a fit() method.".format( config['class'])) continue param_grid = grid_search.ParameterGrid(config['params']) yield name, estimator, param_grid
def _estimator_param_grid(params_config): for name, config in params_config.items(): try: EstimatorClass = yamlconf.import_module(config['class']) estimator = EstimatorClass() except Exception: logger.warn("Could not load estimator {0}" .format(config['class'])) logger.warn("Exception:\n" + traceback.format_exc()) continue if not hasattr(estimator, "fit"): logger.warn("Estimator {0} does not have a fit() method." .format(config['class'])) continue param_grid = grid_search.ParameterGrid(config['params']) yield name, estimator, param_grid
def from_config(self, config, name, section_key="languages"): """ Constructs a :class:`revscoring.languages.language.Language` from a `dict`. :Parameters: config : dict A configuration dictionary name : str The name of the sub-section in which to look for configuration information section_key : str The top-level section key under which to look for `name` """ section = config[section_key][name] if 'module' in section: return yamlconf.import_module(section['module']) elif 'class' in section: raise RuntimeError("Loading a language via class construction " + \ "not yet supported")
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.WARNING if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) features = yamlconf.import_module(args['<features>']) session = mwapi.Session(args['--host'], user_agent="Revscoring feature extractor utility") extractor = APIExtractor(session) if args['--rev-labels'] == "<stdin>": rev_labels = read_rev_labels(sys.stdin) else: rev_labels = read_rev_labels(open(args['--rev-labels'])) if args['--value-labels'] == "<stdout>": value_labels = sys.stdout else: value_labels = open(args['--value-labels'], 'w') include_revid = bool(args['--include-revid']) if args['--extractors'] == "<cpu count>": extractors = cpu_count() else: extractors = int(extractors) verbose = args['--verbose'] debug = args['--debug'] run(rev_labels, value_labels, features, extractor, include_revid, extractors, verbose, debug)
def from_config(cls, doc, name): detector_class_path = doc['detectors'][name]['class'] Detector = yamlconf.import_module(detector_class_path) return Detector.from_config(doc, name)
def from_config(cls, doc, name): segmenter_class_path = doc['segmenters'][name]['class'] Segmenter = yamlconf.import_module(segmenter_class_path) return Segmenter.from_config(doc, name)
def from_config(cls, doc, name): tokenizer_class_path = doc['tokenizers'][name]['class'] Tokenizer = yamlconf.import_module(tokenizer_class_path) return Tokenizer.from_config(doc, name)