def create_patterns_file( ngram_range ) : """ Create an artificial list of MWE patterns in which all the parts of the words are wildcards. Such artificial patterns match every ngram of size n, which is exactly what we want to do with the option -n. This may seem a weird way to extract ngrams, but it allows a single transparent candidate extraction function, treat_sentence. @param ngram_range String argument of the -n option. """ global patterns, usage_string, shortest_pattern, longest_pattern result = interpret_ngram( ngram_range ) if result : ( shortest_pattern, longest_pattern ) = result patterns.append(build_generic_pattern(shortest_pattern, longest_pattern)) else : error("Invalid argument for -n.")
def create_patterns_file(ngram_range): """ Create an artificial list of MWE patterns in which all the parts of the words are wildcards. Such artificial patterns match every ngram of size n, which is exactly what we want to do with the option -n. This may seem a weird way to extract ngrams, but it allows a single transparent candidate extraction function, treat_sentence. @param ngram_range String argument of the -n option. """ global patterns, usage_string, shortest_pattern, longest_pattern result = interpret_ngram(ngram_range) if result: (shortest_pattern, longest_pattern) = result patterns.append( build_generic_pattern(shortest_pattern, longest_pattern)) else: error("Invalid argument for -n.")
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global surface_instead_lemmas global glue global base_attr global min_ngram global max_ngram global min_frequency global ngram_counts global selected_candidates global use_shelve global input_filetype_ext treat_options_simplest( opts, arg, n_arg, usage_string ) mode = [] for ( o, a ) in opts: if o in ("-s", "--surface") : surface_instead_lemmas = True base_attr = 'surface' elif o in ("-f", "--freq") : min_frequency = int(a) elif o in ("-n", "--ngram") : (min_ngram, max_ngram) = interpret_ngram(a) elif o in ("-G", "--glue"): if a == "scp": glue = scp_glue else: error("Unknown glue function '%s'" % a) elif o in ("-S", "--shelve"): use_shelve = True elif o == "--from": input_filetype_ext = a else: raise Exception("Bad arg: " + o)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global surface_instead_lemmas global glue global base_attr global min_ngram global max_ngram global min_frequency global ngram_counts global selected_candidates global use_shelve global input_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) mode = [] for (o, a) in opts: if o in ("-s", "--surface"): surface_instead_lemmas = True base_attr = 'surface' elif o in ("-f", "--freq"): min_frequency = int(a) elif o in ("-n", "--ngram"): (min_ngram, max_ngram) = interpret_ngram(a) elif o in ("-G", "--glue"): if a == "scp": glue = scp_glue else: error("Unknown glue function '%s'" % a) elif o in ("-S", "--shelve"): use_shelve = True elif o == "--from": input_filetype_ext = a else: raise Exception("Bad arg: " + o)
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global surface_instead_lemmas global glue global corpus_from_index global base_attr global min_ngram global max_ngram global min_frequency treat_options_simplest( opts, arg, n_arg, usage_string ) mode = [] for ( o, a ) in opts: if o in ("-s", "--surface") : surface_instead_lemmas = True base_attr = 'surface' elif o in ("-f", "--freq") : min_frequency = int(a) elif o in ("-n", "--ngram") : (min_ngram, max_ngram) = interpret_ngram(a) elif o in ("-i", "--index") : corpus_from_index = True elif o in ("-G", "--glue"): if a == "scp": glue = scp_glue else: error("Unknown glue function '%s'" % a)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global surface_instead_lemmas global glue global corpus_from_index global base_attr global min_ngram global max_ngram global min_frequency treat_options_simplest(opts, arg, n_arg, usage_string) mode = [] for (o, a) in opts: if o in ("-s", "--surface"): surface_instead_lemmas = True base_attr = 'surface' elif o in ("-f", "--freq"): min_frequency = int(a) elif o in ("-n", "--ngram"): (min_ngram, max_ngram) = interpret_ngram(a) elif o in ("-i", "--index"): corpus_from_index = True elif o in ("-G", "--glue"): if a == "scp": glue = scp_glue else: error("Unknown glue function '%s'" % a)