Exemplo n.º 1
0
def tokenize(corpus_name):
    """
    POST /{corpus}/tokenize
    GET  /{corpus}/tokenize?s=...

    Tokenize the given string for this corpus's language.
    """
    corpus = get_corpus_or_404(corpus_name)
    # Args... should be a file or strong
    content = get_string_content()
    return jsonify(tokens=corpus.tokenize(content, mid_line=False))
Exemplo n.º 2
0
def cross_entropy(corpus_name):
    """
    POST /{corpus}/xentropy/

    Calculate the cross-entropy of the uploaded file with respect to the
    corpus.
    """
    corpus = get_corpus_or_404(corpus_name)
    content = get_string_content()
    tokens = corpus.tokenize(content)
    return jsonify(cross_entropy=corpus.cross_entropy(tokens))
Exemplo n.º 3
0
def cross_entropy(corpus_name):
    """
    POST /{corpus}/xentropy/

    Calculate the cross-entropy of the uploaded file with respect to the
    corpus.
    """
    corpus = get_corpus_or_404(corpus_name)
    content = get_string_content()
    tokens = corpus.tokenize(content)
    return jsonify(cross_entropy=corpus.cross_entropy(tokens))
Exemplo n.º 4
0
def tokenize(corpus_name):
    """
    POST /{corpus}/tokenize
    GET  /{corpus}/tokenize?s=...

    Tokenize the given string for this corpus's language.
    """
    corpus = get_corpus_or_404(corpus_name)
    # Args... should be a file or string
    content = get_string_content()
    return jsonify(tokens=corpus.tokenize(content, mid_line=False))
Exemplo n.º 5
0
def train(corpus_name):
    """
    POST /{corpus}/

    Upload a file for training.
    """
    corpus = get_corpus_or_404(corpus_name)
    content = get_string_content()
    tokens = corpus.tokenize(content)

    # NOTE: train doesn't really have a useful return...
    corpus.train(tokens)
    return make_response(jsonify(tokens=len(tokens)), 202)
Exemplo n.º 6
0
def train(corpus_name):
    """
    POST /{corpus}/

    Upload a file for training.
    """
    corpus = get_corpus_or_404(corpus_name)
    content = get_string_content()
    tokens = corpus.tokenize(content)

    # NOTE: train doesn't really have a useful return...
    corpus.train(tokens)
    return make_response(jsonify(tokens=len(tokens)), 202)
Exemplo n.º 7
0
def predict(corpus_name, token_str=""):
    """
    POST /{corpus}/predict/{tokens*}
    POST /{corpus}/predict/f=?

    Returns a number of suggestions for the given token prefix.
    """
    corpus = get_corpus_or_404(corpus_name)

    if token_str:
        tokens = parse_tokens(token_str)
    else:
        tokens = corpus.tokenize(get_string_content())

    # Predict returns a nice, JSONable dictionary, so just return that.
    return jsonify(corpus.predict(tokens))
Exemplo n.º 8
0
def predict(corpus_name, token_str=""):
    """
    POST /{corpus}/predict/{tokens*}
    POST /{corpus}/predict/f=?

    Returns a number of suggestions for the given token prefix.
    """
    corpus = get_corpus_or_404(corpus_name)

    if token_str:
        tokens = parse_tokens(token_str)
    else:
        tokens = corpus.tokenize(get_string_content())

    # Predict returns a nice, JSONable dictionary, so just return that.
    return jsonify(corpus.predict(tokens))