forked from thisisparker/linkarchiver
-
Notifications
You must be signed in to change notification settings - Fork 0
/
archivelinks.py
executable file
路125 lines (101 loc) 路 3.97 KB
/
archivelinks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python3
# Listens to a Twitter timeline and sends tweeted URLs to the Internet Archive.
import datetime
import http
import os
import requests
import sqlite3
import yaml
from twython import Twython, TwythonStreamer, TwythonError
fullpath = os.path.dirname(os.path.realpath(__file__))
CONFIGFILE = os.path.join(fullpath, "config.yaml")
with open(CONFIGFILE, 'r') as c:
CONFIG = yaml.load(c)
DB = os.path.join(fullpath, CONFIG['db'])
conn = sqlite3.connect(DB)
cur = conn.cursor()
SCREEN_NAME = CONFIG['twitter_bot_name']
def get_twitter_creds():
twitter_app_key = CONFIG['twitter_app_key']
twitter_app_secret = CONFIG['twitter_app_secret']
twitter_oauth_token = CONFIG['twitter_oauth_token']
twitter_oauth_token_secret = CONFIG['twitter_oauth_token_secret']
return twitter_app_key, twitter_app_secret, twitter_oauth_token, twitter_oauth_token_secret
def get_stream_instance():
app_key, app_secret, oauth_token, oauth_token_secret = get_twitter_creds()
return TwythonStreamer(app_key, app_secret, oauth_token, oauth_token_secret)
def get_twitter_instance():
app_key, app_secret, oauth_token, oauth_token_secret = get_twitter_creds()
return Twython(app_key, app_secret, oauth_token, oauth_token_secret)
def check_tweet(data):
if 'entities' in data:
url_list = grab_urls(data)
screen_names = [user['screen_name'] for user in
data['entities']['user_mentions']]
tweet_id = data['id_str']
tweeter = data['user']['screen_name']
for url in url_list:
archive_link = send_to_archive(url, tweet_id, tweeter)
if SCREEN_NAME in screen_names:
tweet_reply(
archive_link, data['id_str'],
data['user']['screen_name'])
elif 'event' in data:
print("Some kind of event!")
if data['event'] == 'follow' and data['source']['screen_name'] != SCREEN_NAME:
print("I'm gonna follow {}.".format(data['source']['screen_name']))
twitter_follow(data['source']['screen_name'])
else:
print(data)
def log_failure(status_code, data):
print("Something's gone terribly wrong: " + str(status_code) + " " + str(data))
def twitter_follow(screen_name):
twitter = get_twitter_instance()
try:
twitter.create_friendship(screen_name = screen_name)
except TwythonError as err:
print("Had this error, bud: " + str(err))
def tweet_reply(archive_link, tweet_id, screen_name):
twitter = get_twitter_instance()
if archive_link:
message = "Here's your archived link: " + archive_link
else:
message = "Sorry, something went wrong :("
try:
twitter.update_status(status = "@" + screen_name + " " + message,
in_reply_to_status_id = tweet_id)
except:
pass
def grab_urls(tweet):
url_list = []
for url in tweet['entities']['urls']:
if url['expanded_url']:
url_list.append(url['expanded_url'])
return url_list
def send_to_archive(link, tweet_id, tweeter):
print("Sending {} to the Internet Archive.".format(link))
try:
res = requests.get("https://web.archive.org/save/{}".format(link),
headers = {'user-agent':'@{} twitter bot'.format(SCREEN_NAME)})
nowstring = str(datetime.datetime.utcnow())
cur.execute("""
insert into links (url, tweeter, tweet_id, time)
values ('{link}','{tweeter}','{tweet_id}','{nowstring}')
""".format(**locals()))
print("attempting to write to db")
conn.commit()
return "https://web.archive.org" + res.headers['Content-Location']
except:
return None
def do_the_streaming():
streamer = get_stream_instance()
streamer.on_success = check_tweet
streamer.on_error = log_failure
try:
streamer.user(replies=all)
except http.client.IncompleteRead:
do_the_streaming()
def main():
do_the_streaming()
if __name__ == '__main__':
main()