Exemplo n.º 1
0
Arquivo: tasks.py Projeto: sp00/kral
def facebook_post(item, query, **kwargs):
    logger = facebook_post.get_logger(**kwargs)
    time_format = "%Y-%m-%dT%H:%M:%S+0000"
    if item.has_key('message'):
        post_info = {
            "service" : 'facebook',
            "user" : {
                "name": item['from']['name'],
                "id": item['from']['id'],
            },
            "links" : [],
            "id" : item['id'],
            "text" : item['message'],
            "date": str(datetime.datetime.strptime(item['created_time'], time_format)),
        }
        url_regex = re.compile('(?:http|https|ftp):\/\/[\w\-_]+(?:\.[\w\-_]+)+(?:[\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?')
        for url in url_regex.findall(item['message']):
            post_info['links'].append({ 'href' : url })
        post_info['user']['avatar'] = "http://graph.facebook.com/%s/picture" % item['from']['id']
        if item.get('to'):
            post_info['to_users'] = item['to']['data']
        if item.get('likes'):
            post_info['likes'] = item['likes']['count']
        if item.get('application'):
            post_info['application'] = item['application']['name']
        push_data(post_info, queue=query)
        return
Exemplo n.º 2
0
Arquivo: tasks.py Projeto: sp00/kral
 def run(self, item, query, **kwargs):
     logger = self.get_logger(**kwargs)
     time_format = "%a, %d %b %Y %H:%M:%S +0000"
     date = str(datetime.datetime.strptime(item['created_at'], time_format))
     
     post_info = {
         "service": "identi.ca", 
         "user": {
             "name": item['from_user'],
             "id": item['from_user_id'],
         },
         "to_user": {
             "name": item['to_user'],
             "id": item['to_user_id'],
         }, 
         "text": item['text'],
         "date": date,
         "pictures": {
             "0": {
                 "thumbnail": item['profile_image_url'],
             },
         },
         "source": item['source'],
         "id": item['id'], 
     }
     print(post_info['id'])
     push_data(post_info, queue = query)
     logger.info("Saved Identica Post")
Exemplo n.º 3
0
Arquivo: tasks.py Projeto: sp00/kral
def buzz_post(item, query, **kwargs):
    logger = buzz_post.get_logger(**kwargs)
    time_format = "%Y-%m-%dT%H:%M:%S.%fZ"
    # FIXME should consider all pictures, not just one
    try:
        thumbnail = item["object"]["attachments"][0]["links"]["preview"][0]["href"]
        picture = item["object"]["attachments"][0]["links"]["enclosure"][0]["href"]
    except:
        picture = ""
        thumbnail = ""
    # END FIXME
    post_info = {
        "service": "buzz",
        "user": {
            "name": item["actor"]["name"],
            "id": item["actor"]["name"],
            "avatar": item["actor"]["thumbnailUrl"],
            "source": item["actor"]["profileUrl"],
        },
        "pictures": {  # hard-coding for only one picture. See above FIXME
            "0": {"picture": picture, "thumbnail": thumbnail}
        },
        "id": item["id"].split(":")[3],
        "date": str(datetime.datetime.strptime(item["published"], time_format)),
        "source": item["object"]["links"]["alternate"][0]["href"],
        "text": item["object"]["content"],
    }
    push_data(post_info, queue=query)
    logger.info("Saved Post/User")
Exemplo n.º 4
0
Arquivo: tasks.py Projeto: sp00/kral
def twitter_stream_tweet(data, queries, **kwargs):
    logger = twitter_stream_tweet.get_logger(**kwargs)
    content = json.loads(data)
    time_format = "%a %b %d %H:%M:%S +0000 %Y"
    post_info = { 
        'service' : 'twitter',
        'user' : {
            'id' : content['user']['id_str'],
            'utc' : content['user']['utc_offset'],
            'name' : content['user']['screen_name'],
            'description' : content['user']['description'],
            'location' : content['user']['location'],
            'avatar' : content['user']['profile_image_url'],
            'subscribers': content['user']['followers_count'],
            'subscriptions': content['user']['friends_count'],
            'website': content['user']['url'],
            'language' : content['user']['lang'],
        },
        'links' : [],
        'id' : content['id'],
        'application': content['source'],
        'date' : str(datetime.datetime.strptime(content['created_at'],time_format)),
        'text' : content['text'],
        'geo' : content['coordinates'],
    }
    for url in content['entities']['urls']:
        post_info['links'].append({ 'href' : url.get('url') })
    for query in [q.lower() for q in queries]:
        ns_query = query.replace('_','')
        if ns_query in content['text'].lower():
            push_data(post_info, queue=ns_query)
Exemplo n.º 5
0
Arquivo: tasks.py Projeto: sp00/kral
def youtube_video(item, query, **kwargs):
    logger = youtube_video.get_logger(**kwargs)
    if item.has_key('title'):
        post_info = {
                "service" : 'youtube',
                "id" : item['media$group']['yt$videoid']['$t'],
                "date" : item['media$group']['yt$uploaded']['$t'],
                "user" : item['author'][0]["name"]['$t'],
                "source" : item['link'][1]['href'],
                "text" : item["title"]['$t'],
                "keywords" : item['media$group']['media$keywords'].get('$t',''),
                "description" : item['media$group']['media$description']['$t'],
                "thumbnail" : "http://i.ytimg.com/vi/%s/hqdefault.jpg" % item['media$group']['yt$videoid']['$t'],
                "duration" : item['media$group']['yt$duration']['seconds'],
        }
        push_data(post_info, queue = query)
    logger.info("Saved Post/User")
Exemplo n.º 6
0
Arquivo: tasks.py Projeto: sp00/kral
def twitter_feed_tweet(item, query, **kwargs):
    if item.has_key('text'):
        post_info = {
            "service" : 'twitter',
            "user" : {
                "name": item['from_user'],
                "id": item['from_user_id_str'],
                'avatar' : item['profile_image_url'],
            },
            "links" : [],
            "id" : item['id_str'],
            "text" : item['text'],
            "source": item['source'],
            "date": str(datetime.datetime.fromtimestamp(rfc822.mktime_tz(rfc822.parsedate_tz(item['created_at'])))),
        }
        url_regex = re.compile('(?:http|https|ftp):\/\/[\w\-_]+(?:\.[\w\-_]+)+(?:[\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?')
        for url in url_regex.findall(item['text']):
            post_info['links'].append({ 'href' : url })
        push_data(post_info, queue=query)
Exemplo n.º 7
0
Arquivo: tasks.py Projeto: sp00/kral
def flickr_photo(photo_info, query, **kwargs):
    logger = flickr_photo.get_logger(**kwargs)
    #photo_info['url'] = "http://flickr.com/%s/%s" % (user_info['path_alias'], photo_info['id'])
    photo_info['thumbnail'] = "http://farm{farm}.static.flickr.com/{server}/{id}_{secret}_m.jpg".format(**photo_info)
    post_info = {
        "service" : 'flickr',
        "id" : photo_info['id'],
        "date" : photo_info['dateupload'],
        "user" : {
            "id" : photo_info['owner'],
            "name" : photo_info['ownername'],
            #"avatar" : "http://farm{iconfarm}.static.flickr.com/{iconserver}/buddyicons/{nsid}.jpg".format(**user_info),
            #"postings" : user_info['photos']['count'].get('_content', ""),
            #"profile" : user_info['profileurl'].get('_content', ""),
            #"website" : user_info['photosurl'].get('_content', ""),
        },
        "text" : photo_info["title"],
        "thumbnail" : photo_info['thumbnail'],
    }
    logger.info("Saved Post/User")
    push_data(post_info,queue=query)
Exemplo n.º 8
0
Arquivo: tasks.py Projeto: sp00/kral
            for link in links:
                if link["href"].decode("utf8") == url.decode("utf8"):
                    link["count"] += 1
                    if link["count"] > 1:
                        if title:
                            link["title"] = title
                        else:
                            url_title.delay(url)
                    new_link = True
                    post_info = link
            if new_link == False:
                post_info = {"service": "links", "href": url, "count": 1, "title": title}
                links.append(post_info)
            links = sorted(links, key=lambda link: link["count"], reverse=True)
            cache.set(cache_name, pickle.dumps(links), 31556926)
            push_data(post_info, queue=query)
        else:
            url_expand.delay(current_url, query, n)


@task
def url_title(url, **kwargs):
    cache_name = base64.b64encode(url)[:250]
    httprequest = urllib2.Request(url)
    try:
        data = urllib2.urlopen(httprequest)
    except urllib2.HTTPError:
        data = None
    if data:
        for line in data:
            if "<title>" in line: