/
tweetFeeder.py
executable file
·72 lines (67 loc) · 3.68 KB
/
tweetFeeder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/python
import twitter
import re
import pinboard
import urllib2
from BeautifulSoup import BeautifulSoup
from StringIO import StringIO
import gzip
# The Pinboard account credentials to use
pyAccount = 'aUserAccount'
pyToken = 'anAwesomeToken'
# The Twitter API Keys. You need your own.
api = twitter.Api(consumer_key='gibberish', consumer_secret='moregibberish', access_token_key='evenlongergibberish', access_token_secret='thelastbitofgibberish')
#print api.VerifyCredentials()
# Twitter users with interesting links
feedList = ['binaryghost', 'viticci', 'marksiegal', 'ttscoff', 'gromble', 'drbunsen', 'waltonjones', 'TJLuoma', 'eddie_smith', 'chewingpencils', 'jeffhunsberger', 'nateboateng', 'macdrifter', 'themindfulbit']
# The adventure begins
for user in feedList:
# Get all of the user tweets
statuses = api.GetUserTimeline(user)
# Loop through all tweets and look for URLs
for status in statuses:
# A reasonably generous URL regex pattern to match
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', status.text)
# Everytime we find a URL, do some magic
if urls:
for url in urls:
# Put together the tweet info
tweetMsg = status.text
rssTag = ['rss_'+user, 'rss_tweets']
sourceURL = 'https://twitter.com/'+user+'/status/'+str(status.id)
try:
# Let's make sure the URL is valid by trying to visit it
request = urllib2.Request(url)
request.add_header('Accept-encoding', 'gzip')
response = urllib2.urlopen(request)
data = response
maintype = response.headers['Content-Type'].split(';')[0].lower()
# We don't want image links
if maintype not in ('image/png', 'image/jpeg', 'image/gif'):
# Need to handle gzip content if we want to grab the page title. Lots of sites send gzip now.
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO(response.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read()
# I guess I just prefer a real link to shitty t.co links
fullURL = response.url
if fullURL is not None:
print fullURL
# This could be done with lxml but BeautifulSoup is easy
soup = BeautifulSoup(data)
# Get the title for the page
myTitle = soup.html.head.title
pyTitle = myTitle.string
print pyTitle
# Assemble the bookmark notes. Create Twitter link for RSS viewing
bookmarkExtended = '<p>'+user+'</p>\n<p>' + tweetMsg + '</p>\n\n' + '<a href="'+sourceURL+'">Twitter Source</a>'
try:
p = pinboard.open(username = pyAccount, token = pyToken)
postResult = p.add(url=fullURL, description=pyTitle, extended=bookmarkExtended, tags= (rssTag))
except (RuntimeError, TypeError, NameError):
print RuntimeError
print NameError
print TypeError
except urllib2.HTTPError, err:
# If it's an http error like 404, just skip the link. We don't have time for this junk.
continue