Exemplo n.º 1
0
def fetch_data(data="facebook.pickle",
               friends_count=1000,
               news_count=100,
               key='REMOVED MY KEY'):
    """ Fetches link data from given key for the specified number 
		of friends and the specified number of posts and saves it
		to a facebook.pickle file

		data: name of data file to save to (default: facebook.pickle)
		friends_count: number of friends to search through (default: 1000)
		news_count: number of each friends' posts to search through (default: 100)
		key: the key for pattern to access Facebook
	"""
    print "Fetching data..."

    # Get the data
    fb = Facebook(license=key)
    me = fb.profile()
    print me
    counter = 1

    link_counts = {"num_posts": 0, "num_links": 0, "num_texts": 0}

    my_friends = fb.search(me[0], type=FRIENDS, count=friends_count)
    for friend in my_friends:
        print counter
        counter += 1
        friend_news = fb.search(friend.id, type=NEWS, count=news_count)
        for news in friend_news:
            link_counts["num_posts"] += 1
            if news.url:
                link_counts["num_links"] += 1
                if news.url in link_counts:
                    link_counts[news.url][0] += 1
                    link_counts[news.url][1] += news.likes
                else:
                    link_counts[news.url] = [1, news.likes]

    # Save the data to a file
    f = open(data, 'w')
    cPickle.dump(link_counts, f)
    f.close()
Exemplo n.º 2
0
def find_news():
    fb = Facebook(license='CAAEuAis8fUgBAAZB8tJX5T9qPXpFolTmCpFQNMZBHoHuGpwuhjHYUwyIHR2Xm9lENwbewkSwM0NS3sZBXJGFcOUeiwUYBKxWqtbDnfxMzmAOfI0s48bjXjKKYZB2eSvnZBMLA0iz1HeZCMHPFNxgaqhEufsZAzQuwT4bqQ77YFz426lH1YEZCJcJ', throttle=1.0, language='en')
    person = fb.profile(id=find_id())
    postkey=()    
    posts=[]    
    
    for post in fb.search(person['id'], type=NEWS, count=pullnum):
        if find_name() not in post.text: 
            popularity = post.comments + post.likes             
            postkey = (popularity, post.text)
            posts.append(postkey)
    return posts
Exemplo n.º 3
0
    # We only want to add new status updates, i.e., those we haven't seen yet.
    # With an index on the first column we can quickly check if an id already
    # exists.
    table = Datasheet.load(pd("opinions.csv"))
    index = set(table.columns[0])
except:
    table = Datasheet()
    index = set()

fb = Facebook()

# With Facebook.search(cached=False), a "live" request is sent to Facebook:
# we get the most recent results instead of those in the local cache.
# Keeping a local cache can also be useful (e.g., while testing)
# because a query is instant when it is executed the second time.
for status in fb.search("horrible", count=25, cached=False):
    print("=" * 100)
    print(status.id)
    print(status.text.encode("utf-8"))
    print(status.author)  # Yields an (id, name)-tuple.
    print(status.date)
    print(status.likes)
    print(status.comments)
    print()
    # Only add the tweet to the table if it doesn't already exists.
    if len(table) == 0 or status.id not in index:
        table.append([status.id, status.text])
        index.add(status.id)

# Create a .csv in pattern/examples/01-web/
table.save(pd("opinions.csv"))
Exemplo n.º 4
0
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 27 20:13:30 2014

@author: gianfranco
"""

from pattern.web import Facebook, NEWS, COMMENTS, LIKES

fb = Facebook(license='CAAEuAis8fUgBAIjddb5eck615kBpLhvmwaO9dh1ZBIv6YfsPGu0MhzdILYwAQX90qsmLTZCCeZCrwc0U07mzpE1grLUPAXfxyp6lbZCOD1xz1pzlplGbgM0gKWTKhR4nBYkwdJR9CZB0VDdS0f99IopQ7Uv6VZCyZAhMJnKz1yloCpwM4WNHoAG')
me = fb.profile(id=None) # user info dict
#This part of the code search for the last person that post in my wall that is not me
for post in fb.search(me['id'], type=NEWS, count=1000):
    if post.author[0]!=me['id']:
        amigo_id=post.author[0]
        break
#This part, is similar that the first part but in the friend wall that write my last post
for post in fb.search(amigo_id, type=NEWS, count=1000):
    if post.author[0]!=amigo_id:
        amigo1_id=post.author[0]
        break
#no that is defined the Facebook ID I will use google shortener URL API
#to give the link of the friend of my friend.


import requests
import urllib, urllib2
import json


def goo_shorten_url(url):
Exemplo n.º 5
0
from pattern.web import Facebook, NEWS, COMMENTS, LIKES, FRIENDS

fb = Facebook(license='CAAEuAis8fUgBAGvbgzlR88GbrThcUGshnz9njXesze8zWbc6ha9heTCKuHNE3KmGdrZCpTitFpFQwlIA5DN9b1kfYtueew9d5N74EKGfGjqGu8E8Ux2168sfjrsJ3QFi9pNG5DL4yBvWrEEmF9aOoTCIWfW8tCpyT9LXPKK61pkH4E5R0D')
me = fb.profile(id=None) # (id, name, date, gender, locale, likes)-tuple
count=0
for post in fb.search(100000008147254, type=NEWS, count=1000):
    count+=1
    print post.description
    print repr(post.id)
    print repr(post.date)
    print repr(post.text)
    print repr(post.url)
    if post.comments > 0:
        print '%i comments' % post.comments 
        print [(r.text, r.author) for r in fb.search(post.id, type=COMMENTS)]
    if post.likes > 0:
#        print post.text
        print '%i likes' % post.likes 
        print [r.author for r in fb.search(post.id, type=LIKES)]
Exemplo n.º 6
0
    # In the first column, we'll store a unique id for each status update.
    # We only want to add new status updates, i.e., those we haven't seen yet.
    # With an index on the first column we can quickly check if an id already exists.
    table = Datasheet.load(pd("opinions.csv"))
    index = set(table.columns[0])
except:
    table = Datasheet()
    index = set()

fb = Facebook()

# With Facebook.search(cached=False), a "live" request is sent to Facebook:
# we get the most recent results instead of those in the local cache.
# Keeping a local cache can also be useful (e.g., while testing)
# because a query is instant when it is executed the second time.
for status in fb.search("horrible", count=25, cached=False):
    print "=" * 100
    print status.id
    print status.text
    print status.author # Yields an (id, name)-tuple.
    print status.date
    print status.likes
    print status.comments
    print
    # Only add the tweet to the table if it doesn't already exists.
    if len(table) == 0 or status.id not in index:
        table.append([status.id, status.text])
        index.add(status.id)

# Create a .csv in pattern/examples/01-web/
table.save(pd("opinions.csv"))
Exemplo n.º 7
0
    # We store status in a Table that can be saved as a text file.
    # In the first column, we'll store a unique ID for each tweet.
    # We only want to add the latest facebook status, i.e. those we haven't previously encountered.
    # With an index on the first column we can quickly check if an ID already exists.
    # The index becomes important once more and more rows are added to the table (speed).
    table = Datasheet.load("travel.txt")
    index = dict.fromkeys(table.columns[0], True)
except:
    table = Datasheet()
    index = {}

engine = Facebook()

# With cached=False, a live request is sent to Facebook,
# so we get the latest results for the query instead of those in the local cache.
for status in engine.search("Travelling to", count=25, cached=False):
    print status.description
    print status.author
    print status.date
    print
    id = status.url
    # Only add the tweet to the table if it doesn't already contain this ID.
    if len(table) == 0 or id not in index:
        table.append([id, status.description])
        index[id] = True

table.save("travel.txt")

print "Total results:", len(table)
print
Exemplo n.º 8
0
    # In the first column, we'll store a unique id for each status update.
    # We only want to add new status updates, i.e., those we haven't seen yet.
    # With an index on the first column we can quickly check if an id already exists.
    table = Datasheet.load("opinions.txt")
    index = set(table.columns[0])
except:
    table = Datasheet()
    index = set()

fb = Facebook()

# With Facebook.search(cached=False), a "live" request is sent to Facebook:
# we get the most recent results instead of those in the local cache.
# Keeping a local cache can also be useful (e.g., while testing)
# because a query is instant when it is executed the second time.
for status in fb.search("horrible", count=25, cached=False):
    print "=" * 100
    print status.id
    print status.text
    print status.author # Yields an (id, name)-tuple.
    print status.date
    print status.likes
    print status.comments
    print
    # Only add the tweet to the table if it doesn't already exists.
    if len(table) == 0 or status.id not in index:
        table.append([status.id, status.text])
        index.add(status.id)

table.save("opinions.txt")
# -*- coding: utf-8 -*-

"""
Pattern.web can be used to extract information from Wikipedia, Facebook, Twitter, etc
Find out more: https://www.clips.uantwerpen.be/pages/pattern-web
"""

from pattern.web import Facebook
from pattern.en import sentiment
from pattern.web import Facebook, NEWS, COMMENTS, LIKES

fb = Facebook(license='568752540312810')
me = fb.profile(id=568752540312810) # user info dict

for post in fb.search(me['id'], type=NEWS, count=100):
    print repr(post.id)
    print repr(post.text)
    print repr(post.url)
    if post.comments > 0:
        print '%i comments' % post.comments 
        print [(r.text, r.author) for r in fb.search(post.id, type=COMMENTS)]
    if post.likes > 0:
        print '%i likes' % post.likes 
        print [r.author for r in fb.search(post.id, type=LIKES)]
"""
facebook = Facebook()
someTweet = None
feeling = ""

for j in range(1):
    for post in facebook.search('Teressa May', start=someTweet, count=1):
Exemplo n.º 10
0
Created on Fri Sep 26 14:13:12 2014

@author: abigail
"""
import time
start = time.time()

#def newsFeeds():

from pattern.web import Facebook, NEWS, SEARCH, FRIENDS

fb = Facebook(license='CAAEuAis8fUgBALDf0mJZAQrXiOCN01f3DmCoz9vtGmpgt7qYtMeIDwDRC9yPSxZBFg53HPLd9hVQuU6YmWvk0HDYIayKla2RTgIkk50dbsENw6n9KvLZCqwUWG2PrvCXy8fp2KHZCcKYUefAFO4CsTufwlWPmSzo1Pm9e5ywmWdRxaSBS5e5')
me = fb.profile()
#print me

my_friends = fb.search(me['id'], type=FRIENDS, count=100)

#result_ids = ['51903148', '500224184', '500331038', '500491857', '500915419', '501539308', '502215901', '503117562', '503482064', '503525848', '503598716', '503612001', '503795590', '503946675', '504570954', '505155592', '505219069', '505301520', '507739090', '509548423', '510268573', '510983529', '512858974', '513391195', '514153226', '514499433', '515474180', '517715547', '518082669', '518624114', '520069392', '521425712', '523243610', '525918735', '527123221', '527377645', '528835234', '529225509', '532865317', '533147424', '533513254', '534142834', '534892159', '535450021', '537276306', '537737982', '537960899', '538241303', '538251432', '538689702', '538971524', '539610327', '541816476', '541960760', '542538547', '544130598', '547351423', '547955592', '549901320', '550923436', '552073454', '552669116', '553271470', '557729366', '559224810', '560881275', '562042262', '563975426', '565023020', '565146441', '566414817', '566432568', '567325493', '567834534', '567849387', '568330261', '569550655', '573626897', '575231316', '575742532', '576515585', '576557656', '577032027', '579159177', '580981060', '581021945', '581730919', '582322564', '585321930', '585405968', '587953032', '591929898', '592249400', '592294158', '592731069', '593550706', '595397164', '595510549', '598737673']
result_ids = [friend.id.encode('utf-8') for friend in my_friends] #condensed version of code shown below

#result_ids = []
#for friend in my_friends:
#    result_ids = friend.id.encode('utf-8')
#    print result_ids

#print result_ids
print len(my_friends)

for friend in my_friends:
    friend_news = fb.search(friend.id, type=NEWS, count=100)
Exemplo n.º 11
0
@author: abigail
"""

import time # import package
start = time.time() #start keeps track of time

#import pickle

from pattern.web import Facebook, NEWS, LIKES, FRIENDS # import pattern from Facebook API

fb = Facebook(license='CAAEuAis8fUgBALDf0mJZAQrXiOCN01f3DmCoz9vtGmpgt7qYtMeIDwDRC9yPSxZBFg53HPLd9hVQuU6YmWvk0HDYIayKla2RTgIkk50dbsENw6n9KvLZCqwUWG2PrvCXy8fp2KHZCcKYUefAFO4CsTufwlWPmSzo1Pm9e5ywmWdRxaSBS5e5')
# put in my license key for Facebook
me = fb.profile() # takes information from my profile

my_friends = fb.search(me['id'], type=FRIENDS, count=100) #creates a list of my friends' ids
#print my_friends

result_ids = [friend.id.encode('utf-8') for friend in my_friends] #removed the u' from the ids
#print result_ids
word_counter = {} # created a word counter - initialized as an empty array
for friend in my_friends:
    news = fb.search(friend['id'], type=NEWS, count=100) # gets newsfeeds from all my friends
    for post in news:

        feed = post.text.lower() #made the entire newsfeed lowercase
#        print post.text.split()
#        print feed
        if post.likes > 0: #when the likes for posts is more than 0, print the post
            if 'event' in feed or 'added' in feed or 'changed' in feed or 'birthday' in feed or 'shared' in feed or 'tagged' in feed or 'updated' in feed:
                print " " #excludes all lines with these words in newsfeeds - not relevant
Exemplo n.º 12
0
"""
Created on Tue Sep 30 08:32:09 2014

@author: abigail
"""

import time
start = time.time()

from pattern.web import Facebook, NEWS, SEARCH, FRIENDS #imports data from Facebook

fb = Facebook(license='CAAEuAis8fUgBALDf0mJZAQrXiOCN01f3DmCoz9vtGmpgt7qYtMeIDwDRC9yPSxZBFg53HPLd9hVQuU6YmWvk0HDYIayKla2RTgIkk50dbsENw6n9KvLZCqwUWG2PrvCXy8fp2KHZCcKYUefAFO4CsTufwlWPmSzo1Pm9e5ywmWdRxaSBS5e5')
me = fb.profile()
#orint me #check to see if fb was printing my info

my_friends = fb.search(me['id'], type=FRIENDS, count=100) #list of my friends ids

result_ids = []
for friend in my_friends:
    result_ids = friend.id.encode('utf-8')
    print result_ids #prints all ids of my friends

#result_ids = [friend.id.encode('utf-8') for friend in my_friends] #condensed version of code above

    friend_news = fb.search(friend.id, type=NEWS, count=100) #finds the newsfeeds of all my friends
    for news in friend_news:
        if 'listed' in news.text or 'BIRTHDAY' in news.text or 'Birthday' in news.text or 'birthday' in news.text or 'invited' in news.text or 'updated' in news.text or 'likes' in news.text or 'shared' in news.text or 'commented' in news.text or 'event' in news.text or 'tagged' in news.text or 'timeline' in news.text or 'changed' in news.text or 'added' in news.text:
# or news.author != friend 
            print " "
            # if any of the words appear, print nothing
        else: