Python Parser 예제들, ttp.Parser Python 예제들

예제 #1

0

파일 보기

파일: code.py 프로젝트: scoot557/ah

    def GET(self):
        web.header('Content-Type', 'application/json')
        data = web.input()
        limit = 20
        offset = int(data.get('offset', 0))

        usertweets = usermodel.getUserTweets(session.userid, limit, offset)
        tweets = map(
            lambda x: {
                'handle':
                x['data']['user']['screen_name'].replace('\n', ''),
                'name':
                x['data']['user']['name'].replace('\n', ''),
                'text_html':
                ttp.Parser().parse(x['data']['text'].replace('\n', '<br>').
                                   replace('"', '\\"')).html,
                'text':
                x['data']['text'].replace('\n', '\\n'),
                'created_at':
                x['data']['created_at'],
                'profile_pic':
                x['data']['user']['profile_image_url'],
                'tweet_id':
                str(x['data']['id']),
                'review_id':
                x.get('rid', 0),
                'hidden':
                x.get('hidden', 0)
            }, usertweets)
        return json.dumps(tweets)

예제 #2

0

파일 보기

def remove_hashtag(tweet_text):
	'''Obvious function.'''	

	p = ttp.Parser()
	tags = p.parse(tweet_text).tags
	for tag in tags:
		tweet_text = tweet_text.replace('#'+tag, tag)

	return tweet_text

예제 #3

0

파일 보기

def remove_URL(tweet_text):
	'''Obvious function.'''	

	p = ttp.Parser()
	urls = p.parse(tweet_text).urls
	for url in urls:
		tweet_text = tweet_text.replace(url, '')

	return tweet_text

예제 #4

0

파일 보기

def remove_username(tweet_text):
	'''Obvious function.'''

	p = ttp.Parser()
	users = p.parse(tweet_text).users
	for user in users:
		tweet_text = tweet_text.replace('@'+user, '')

	return tweet_text

예제 #5

0

파일 보기

 def show_tweets(self, tweets):
     p = ttp.Parser()
     links = []
     for tweet in tweets:
         result = p.parse(tweet["text"])
         if result.urls:
             # print result.urls
             links.append(result.urls[0])
             # print links
     self.render("main.html",
                 api_key=options.twitter_api_key,
                 links=links if links else None)

예제 #6

0

파일 보기

def get_tweets(context,
               username,
               asvar,
               exclude='',
               max_url_length=60,
               limit=None):
    tweet_parser = ttp.Parser(max_url_length=max_url_length)
    cache_key = get_cache_key(username, asvar, exclude, limit)
    tweets = []
    try:
        user_last_tweets = twitter.Api().GetUserTimeline(
            screen_name=username,
            include_rts=('retweets' not in exclude),
            include_entities=True)
    except (twitter.TwitterError, URLError), e:
        logging.getLogger(__name__).error(str(e))
        context[asvar] = cache.get(cache_key, [])
        return ""

예제 #7

0

파일 보기

def parse_twitter_status(status):
  no_entities = len(status['entities'])==0 or len(status['entities']['urls'])==0
  text = status["text"]
  author = status['user']['screen_name']
  author = status['retweeted_status']['user']['screen_name'] if 'retweeted_status' in status else author

  if no_entities:
    if text.find("http:")<0 and text.find("https:")<0:
      return None
    result = ttp.Parser().parse(text)
    (mentions, urls, hashtags) = (result.users, result.urls, result.tags)
  else:
    urls = map(lambda x:x['expanded_url'], status['entities']['urls'])
    hashtags = map(lambda x:x['text'], status['entities']['hashtags'])
    mentions = map(lambda x:x['screen_name'], status['entities']['user_mentions'])
    mentions = [m for m in mentions if m!=author]
  
  propers = re.findall('[A-Z][a-z]+[\s-][A-Z][a-z.]*', text)
  for proper in propers:
    text=text.replace(proper,"")
  exclude = set(string.punctuation)
  propers = [''.join(ch for ch in p if ch not in exclude) for p in propers]

  words = text.split(" ")
  words = [w if w.isalpha() else w[0:-1]+" " for w in words] #handle words that end in periods, colons, etc.
  words = [w for w in words if w.isalpha() and not w.lower().encode('utf-8') in STOP_WORDS]
  words = propers + words

  vals = {
    'id' : status['id'],
    'time' : status['created_at'],
    'text' : status['text'],
    'urls' : urls,
    'hashtags' : hashtags,
    'mentions' : mentions,
    'author' : author,
    'keywords' : words,
  }
  return vals

예제 #8

0

파일 보기

 def setUp(self):
     self.parser = ttp.Parser(include_spans=True)

예제 #9

0

파일 보기

 def setUp(self):
     self.parser = ttp.Parser()

예제 #10

0

파일 보기

파일: twitter_tag.py 프로젝트: MechanisM/django-twitter-tag

import logging
from urllib2 import URLError

from django import template
from django.core.cache import cache
from templatetag_sugar.parser import Optional, Constant, Name, Variable
from templatetag_sugar.register import tag
import ttp
import twitter

register = template.Library()
tweet_parser = ttp.Parser()


def get_cache_key(*args):
    return 'get_tweets_%s' % ('_'.join([str(arg) for arg in args if arg]))


@tag(register, [
    Constant("for"),
    Variable(),
    Constant("as"),
    Name(),
    Optional([Constant("exclude"), Variable("exclude")]),
    Optional([Constant("limit"), Variable("limit")])
])
def get_tweets(context, username, asvar, exclude='', limit=None):
    cache_key = get_cache_key(username, asvar, exclude, limit)
    tweets = []
    try:
        user_last_tweets = twitter.Api().GetUserTimeline(

예제 #11

0

파일 보기

파일: debugging.py 프로젝트: anforaProject/messageParser

import ttp

parser = ttp.Parser()

result = parser.parse('@loco testing @[email protected]')

print(result.html)
print(result.users)

print("========")
result = parser.parse('@username')

print(result.html)
print(result.users)

예제 #12

0

파일 보기

def get_users(tweet):
    """Return list of @users found in tweet"""
    parser = ttp.Parser()
    result = parser.parse(tweet)
    return result.users

예제 #13

0

파일 보기

파일: code.py 프로젝트: scoot557/ah

    def GET(self):
        if not logged():
            #use the generic one
            data = web.input()
            handle = data.get('handle', False)
            if handle: return autorender().newtweets(handle)
            else: raise web.seeother('%s/' % base_url)
        elif session.userid == 67:
            recenttweets = reviewmodel.getMBTATweets()
            recenttweets.reverse()
            #map to dict, filter out retweets for deegs.
            mbtatweets = map(
                lambda x: {
                    'handle':
                    x['td']['user']['screen_name'].replace('\n', ' '),
                    'name':
                    x['td']['user']['name'].replace('\n', ' '),
                    'text_html':
                    ttp.Parser().parse('@%s:%s' % (x['td'][
                        'user']['screen_name'], x['td']['text'].replace(
                            '\n', '<br>').replace('"', '\\"'))).html,
                    'text':
                    '@%s:%s' %
                    (x['td']['user']['screen_name'], x['td']['text'].replace(
                        '\n', '\\n')),
                    'created_at':
                    x['td']['created_at'],
                    'profile_pic':
                    x['td']['user']['profile_image_url'],
                    'tweet_id':
                    x['td']['id'],
                    'review_id':
                    x.get('rid', ''),
                    'hidden':
                    x['hidden']
                },
                filter(
                    lambda tweet: not (tweet['td'].get(
                        'retweeted_status', dict(retweeted=False)).get(
                            'retweeted', False) or tweet['td'].get(
                                'text', 'RT').startswith('RT')), recenttweets))

        elif session.twitoauth != {}:
            newtweets = session.tweetclient.syncTweets()
            if len(newtweets) > 0:
                #save these new tweets to the DB
                inserts = usermodel.storeUserTweets(session.userid,
                                                    session.twitoauth,
                                                    newtweets)
            usertweets = usermodel.getUserTweets(session.userid)
            mbtatweets = map(
                lambda x: {
                    'handle':
                    x['data']['user']['screen_name'].replace('\n', ''),
                    'name':
                    x['data']['user']['name'].replace('\n', ''),
                    'text_html':
                    ttp.Parser().parse(x['data']['text'].replace('\n', '<br>').
                                       replace('"', '\\"')).html,
                    'text':
                    x['data']['text'].replace('\n', '\\n'),
                    'created_at':
                    x['data']['created_at'],
                    'profile_pic':
                    x['data']['user']['profile_image_url'],
                    'tweet_id':
                    x['data']['id'],
                    'review_id':
                    x.get('rid', 0),
                    'hidden':
                    x.get('hidden', 0)
                }, usertweets)

        else:
            mbtatweets = []

        return autorender().mytweets(mbtatweets)