def main(): '''Example usage: echo "The Fulton County Grand Jury said Friday an investigation of Atlanta's recent primary election produced no evidence that any irregularities took place." | python -m tweedr.ark.__init__ ''' if sys.stdin.isatty(): logger.error('You must pipe in a string') exit(1) from tweedr.ark.java import TwitterNLP tagger = TwitterNLP() for line in sys.stdin: print '[input]', line.strip() tag_line = tagger.predict(line) print '[output]', tag_line
class POSTagger(Mapper): INPUT = TweetDictProtocol OUTPUT = TweetDictProtocol def __init__(self): self.tagger = TwitterNLP() def __call__(self, tweet): '''Enhances the input tweet with POS tags, using only the tweet["text"] value: { ... "tokens": "@Donnie I hear ya and I hate earthquakes in Cali too ! But I still love living in LA ! :)", "pos": "@ O V O & O V N P ^ R , & O R V V P ^ ,", ... } The `tokens` and `pos` values can be split on whitespace to get equal-length lists of strings. ''' tokens, pos_tags = self.tagger.tokenize_and_tag(tweet['text']) tweet['tokens'] = tokens tweet['pos'] = pos_tags return tweet
def __init__(self): self.tagger = TwitterNLP()
from tweedr.ark.java import TwitterNLP import logging logger = logging.getLogger(__name__) logger.debug('The TwitterNLP POS tagger is being loaded as a module singleton') # simply by importing this module, the TwitterNLP tagger will be started up and # made available to other scripts. tagger = TwitterNLP()