示例#1
0
def demo4():
    from nltk_contrib import classify
    from nltk import detect

    from nltk.corpora import genesis
    from itertools import islice

    fd = detect.feature({
        "2-tup":
        lambda t: [' '.join(t)[n:n + 2] for n in range(len(' '.join(t)) - 1)],
        "words":
        lambda t: t
    })

    classifier = classify.NaiveBayes(fd)
    training_data = {}
    training_data["english-kjv"] = list(
        islice(genesis.raw("english-kjv"), 0, 400))
    training_data["french"] = list(islice(genesis.raw("french"), 0, 400))
    training_data["finnish"] = list(islice(genesis.raw("finnish"), 0, 400))

    classifier.train(training_data)

    result = classifier.get_class_probs(
        list(islice(genesis.raw("english-kjv"), 150, 200)))

    print 'english-kjv :', result.prob('english-kjv')
    print 'french :', result.prob('french')
    print 'finnish :', result.prob('finnish')
示例#2
0
def demo():
    from nltk.corpus import brown
    from nltk import detect

    detector = detect.feature({"initial": lambda t: [t[0]], "len": lambda t: [len(t)]})

    for sent in brown.words("a")[:10]:
        print detector(sent)
示例#3
0
def demo():
    from nltk.corpus import brown
    from nltk import detect

    detector = detect.feature({
        'initial': lambda t: [t[0]],
        'len': lambda t: [len(t)]
    })

    for sent in brown.words('a')[:10]:
        print detector(sent)
示例#4
0
def demo():
    from nltk_contrib import classify
    from nltk import detect

    fd = detect.feature({"1-tup": lambda t: list(t)})

    classifier = classify.NaiveBayes(fd)
    training_data = {"class a": "aaaaaab", "class b": "bbbbbba"}
    classifier.train(training_data)

    result = classifier.get_class_dict("a")

    for cls in result:
        print cls, ':', result[cls]
    """
示例#5
0
def demo():
    from nltk_contrib import classify
    from nltk import detect

    fd = detect.feature({"1-tup": lambda t: [t[n] for n in range(len(t))]})

    classifier = classify.spearman.Spearman(fd)
    trainning_data = {"class a": "aaaaaab", "class b": "bbbbbba"}
    classifier.train(trainning_data)

    result = classifier.get_class_dict("a")

    for cls in result:
        print cls, ':', result[cls]
    """
示例#6
0
def demo2():
    from nltk_contrib import classify
    from nltk import detect

    fd = detect.feature(
        {"2-tup": lambda t: [t[n:n + 2] for n in range(len(t))]})

    classifier = classify.NaiveBayes(fd)
    training_data = {"class a": "aaaaaab", "class b": "bbbbbba"}
    classifier.train(training_data)

    result = classifier.get_class_dict("aababb")

    for cls in result:
        print cls, ':', result[cls]
    """
示例#7
0
def demo2():
    from nltk_contrib import classify
    from nltk import detect
 
    fd = detect.feature({"2-tup": lambda t: [t[n:n+2] for n in range(len(t))]})

    classifier = classify.NaiveBayes(fd)
    training_data = {"class a": "aaaaaab",
                      "class b": "bbbbbba"}
    classifier.train(training_data)

    result = classifier.get_class_dict("aababb")

    for cls in result:
        print cls, ':', result[cls]
    """
示例#8
0
def demo():
    from nltk_contrib import classify
    from nltk import detect
    
    fd = detect.feature({"1-tup": lambda t: [t[n] for n in range(len(t))]})

    classifier = classify.spearman.Spearman(fd)
    trainning_data = {"class a": "aaaaaab",
                      "class b": "bbbbbba"}
    classifier.train(trainning_data)

    result = classifier.get_class_dict("a")

    for cls in result:
        print cls, ':', result[cls]
    """
示例#9
0
def demo():
    from nltk_contrib import classify
    from nltk import detect
  
    fd = detect.feature({"1-tup": lambda t: list(t)})

    classifier = classify.NaiveBayes(fd)
    training_data = {"class a": "aaaaaab",
                     "class b": "bbbbbba"}
    classifier.train(training_data)

    result = classifier.get_class_dict("a")

    for cls in result:
        print cls, ':', result[cls]
    
    """
示例#10
0
def demo4():
    from nltk_contrib import classify
    from nltk import detect

    from nltk.corpora import genesis
    from itertools import islice
  
    fd = detect.feature({"2-tup": lambda t: [' '.join(t)[n:n+2] for n in range(len(' '.join(t))-1)],
                     "words": lambda t: t})

    classifier = classify.NaiveBayes(fd)
    training_data = {}
    training_data["english-kjv"] = list(islice(genesis.raw("english-kjv"), 0, 400))
    training_data["french"] = list(islice(genesis.raw("french"), 0, 400))
    training_data["finnish"] = list(islice(genesis.raw("finnish"), 0, 400))

    classifier.train(training_data)

    result = classifier.get_class_probs(list(islice(genesis.raw("english-kjv"), 150, 200)))

    print 'english-kjv :', result.prob('english-kjv')
    print 'french :', result.prob('french')
    print 'finnish :', result.prob('finnish')
示例#11
0
文件: langid.py 项目: Daroth/collectr
from nltk_contrib import classify
from nltk import detect
from nltk.corpus import udhr
import string

def run(classifier, training_data, gold_data):
    classifier.train(training_data)
    correct = 0
    for lang in gold_data:
        cls = classifier.get_class(gold_data[lang])
        if cls == lang:
            correct += 1
    print correct, "in", len(gold_data), "correct"

# features: character bigrams
fd = detect.feature({"char-bigrams" : lambda t: [string.join(t)[n:n+2] for n in range(len(t)-1)]})

training_data = udhr.langs(['English-Latin1', 'French_Francais-Latin1', 'Indonesian-Latin1', 'Zapoteco-Latin1'])
gold_data = {}
for lang in training_data:
    gold_data[lang] = training_data[lang][:50]
    training_data[lang] = training_data[lang][100:200]

print "Cosine classifier: ",
run(classify.Cosine(fd), training_data, gold_data)

print "Naivebayes classifier: ",
run(classify.NaiveBayes(fd), training_data, gold_data)

print "Spearman classifier: ",
run(classify.Spearman(fd), training_data, gold_data)
示例#12
0
import string


def run(classifier, training_data, gold_data):
    classifier.train(training_data)
    correct = 0
    for lang in gold_data:
        cls = classifier.get_class(gold_data[lang])
        if cls == lang:
            correct += 1
    print correct, "in", len(gold_data), "correct"


# features: character bigrams
fd = detect.feature({
    "char-bigrams":
    lambda t: [string.join(t)[n:n + 2] for n in range(len(t) - 1)]
})

training_data = udhr.langs([
    'English-Latin1', 'French_Francais-Latin1', 'Indonesian-Latin1',
    'Zapoteco-Latin1'
])
gold_data = {}
for lang in training_data:
    gold_data[lang] = training_data[lang][:50]
    training_data[lang] = training_data[lang][100:200]

print "Cosine classifier: ",
run(classify.Cosine(fd), training_data, gold_data)

print "Naivebayes classifier: ",