示例#1
0
def create_patterns_file( ngram_range ) :
    """
        Create an artificial list of MWE patterns in which all the parts of
        the words are wildcards. Such artificial patterns match every ngram
        of size n, which is exactly what we want to do with the option -n. This
        may seem a weird way to extract ngrams, but it allows a single 
        transparent candidate extraction function, treat_sentence.
        
        @param ngram_range String argument of the -n option.
    """        
    global patterns, usage_string, shortest_pattern, longest_pattern
    result = interpret_ngram( ngram_range )
    if result :
        ( shortest_pattern, longest_pattern ) = result
        patterns.append(build_generic_pattern(shortest_pattern, longest_pattern))
    else :
        error("Invalid argument for -n.")
示例#2
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global surface_instead_lemmas
    global glue
    global base_attr
    global min_ngram
    global max_ngram
    global min_frequency
    global ngram_counts
    global selected_candidates
    global use_shelve
    global input_filetype_ext

    treat_options_simplest( opts, arg, n_arg, usage_string )

    mode = []
    for ( o, a ) in opts:
        if o in ("-s", "--surface") : 
            surface_instead_lemmas = True
            base_attr = 'surface'
        elif o in ("-f", "--freq") :
            min_frequency = int(a)
        elif o in ("-n", "--ngram") :
            (min_ngram, max_ngram) = interpret_ngram(a)
        elif o in ("-G", "--glue"):
            if a == "scp":
                glue = scp_glue
            else:
                error("Unknown glue function '%s'" % a)
        elif o in ("-S", "--shelve"):
            use_shelve = True
        elif o == "--from":
            input_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)
示例#3
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global surface_instead_lemmas
    global glue
    global corpus_from_index
    global base_attr
    global min_ngram
    global max_ngram
    global min_frequency

    treat_options_simplest( opts, arg, n_arg, usage_string )

    mode = []
    for ( o, a ) in opts:
        if o in ("-s", "--surface") : 
            surface_instead_lemmas = True
            base_attr = 'surface'
        elif o in ("-f", "--freq") :
            min_frequency = int(a)
        elif o in ("-n", "--ngram") :
            (min_ngram, max_ngram) = interpret_ngram(a)
        elif o in ("-i", "--index") :
            corpus_from_index = True
        elif o in ("-G", "--glue"):
            if a == "scp":
                glue = scp_glue
            else:
                error("Unknown glue function '%s'" % a)