Python tokenize示例

编程语言: Python

命名空间/包名称: scripts.twokenize

方法/功能: tokenize

hotexamples.com的示例: 4

Python tokenize - 已找到4个示例。这些是从开源项目中提取的最受好评的scripts.twokenize.tokenize现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： extract.py 项目： AvenTu/emote-cat

def train(file):
    """ Dictionary of word counts for each label """
    features_list = []

    f = open(romney_file)

    for line in f.readlines():
        line = line.rstrip().lower()
        [HITID, tweet, W1, A1, W2, A2, Agmt, label, date] = line.split(',')
        tokens = twokenize.tokenize(tweet)
        if "no_agreement" in label: continue
        print label
        for token in tokens:
            if not ok_word(token): continue
        features_list.append(({'token': token}, label))
        """ Feature Extraction """
        random.shuffle(features_list)
        classifier = nltk.NaiveBayesClassifier.train(features_list)

        classifier.show_most_informative_features(100)
        print nltk.classify.accuracy(classifier, features_list)

示例#2

显示文件

文件： extract.py 项目： AvenTu/emote-cat

def train(file):
    """ Dictionary of word counts for each label """
    features_list = []
    
    f = open(romney_file)
    
    for line in f.readlines():
        line = line.rstrip().lower()
        [HITID, tweet, W1, A1, W2, A2, Agmt, label, date] = line.split(',')
        tokens = twokenize.tokenize(tweet)
        if "no_agreement" in label: continue
        print label
        for token in tokens:
            if not ok_word(token): continue
        features_list.append(({'token': token}, label))

        """ Feature Extraction """
        random.shuffle(features_list)
        classifier = nltk.NaiveBayesClassifier.train(features_list)

        classifier.show_most_informative_features(100)
        print nltk.classify.accuracy(classifier, features_list)

示例#3

显示文件

文件： extract.py 项目： AvenTu/emote-cat

def test(file):
    f = open(romney_file)

    total_guesses = 0.0
    total_correct = 0.0

    for line in f.readlines():
        line = line.rstrip()
        [HITID, tweet, W1, A1, W2, A2, Agmt, label, date] = line.split(',')
        tokens = twokenize.tokenize(tweet)
        if "no_agreement" in label: continue
        votes = Counter()
        for token in tokens:
            if not ok_word(token): continue
            feature = {'token': token}
            guess = classifier.classify(feature)
            votes[guess] += 1
        [(pred_label, count)] = votes.most_common(1)
        if pred_label == label: total_correct += 1.0
        total_guesses += 1.0

    print total_guesses, total_correct
    print total_correct / total_guesses

示例#4

显示文件

文件： extract.py 项目： AvenTu/emote-cat

def test(file):
    f = open(romney_file)

    total_guesses = 0.0
    total_correct = 0.0

    for line in f.readlines():
        line = line.rstrip()
        [HITID, tweet, W1, A1, W2, A2, Agmt, label, date] = line.split(',')
        tokens = twokenize.tokenize(tweet)
        if "no_agreement" in label: continue
        votes = Counter()
        for token in tokens:
            if not ok_word(token): continue
            feature = {'token': token}
            guess = classifier.classify(feature)
            votes[guess] += 1
        [(pred_label, count)] = votes.most_common(1)
        if pred_label == label: total_correct += 1.0
        total_guesses += 1.0

    print total_guesses, total_correct
    print total_correct / total_guesses