-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
132 lines (110 loc) · 3.85 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import sqlite3
from datetime import datetime, timedelta
import youtube
import reddit
from pprint import pprint
# This function creates database tables
def create_tables(conn):
conn.execute("""PRAGMA foreign_keys = 1""")
conn.execute("""
CREATE TABLE IF NOT EXISTS video
(id TEXT PRIMARY KEY,
channel TEXT,
date TEXT, title TEXT, url TEXT)
""")
conn.execute("""
CREATE TABLE IF NOT EXISTS comment
(id TEXT, site TEXT, video TEXT,
date TEXT, author TEXT,
comment TEXT, url TEXT,
up_votes INTEGER, down_votes INTEGER,
PRIMARY KEY (id, site))
""")
# This function stores a single comment
def store_comment(comment):
try:
conn.execute("""INSERT INTO comment
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (comment['id'], comment['site'], comment['video'],
comment['date'], comment['author'], comment['comment'],
comment['url'], comment['up_votes'], comment['down_votes']))
return comment
except sqlite3.IntegrityError, e:
return None
# This function fetches all types of comments
# and stores them in the database
def fetch_store_comments(conn):
two_weeks_ago = datetime.now() - timedelta(days=14)
for channel in youtube.channels:
print "Channel:", channel
for video in youtube.get_videos(channel):
# Video data
video_id = video['resourceId']['videoId']
video_url = youtube.video_url(video_id)
video = {
'id': video_id,
'channel': video['channelTitle'],
'date': video['publishedAt'],
'title': video['title'],
'url': video_url,
}
video_date = datetime.strptime(video['date'], "%Y-%m-%dT%H:%M:%S.%fZ")
if video_date < two_weeks_ago:
break
print "Video:", video['title']
try:
conn.execute("""INSERT INTO video
VALUES (?, ?, ?, ?, ?)
""", (video['id'], video['channel'], video['date'],
video['title'], video['url']))
except sqlite3.IntegrityError, e:
pass
# Youtube comment data
print " Downloading Youtube comments"
num_comments = 0
for comment in youtube.get_comments(video_id):
num_comments += 1
comment = {
'id': comment['id'],
'site': 'youtube',
'video': video_id,
'date': comment['updatedAt'],
'author': comment['authorDisplayName'],
'comment': comment['textDisplay'],
'url': comment['authorGoogleplusProfileUrl'],
'up_votes': comment['likeCount'],
'down_votes': 0,
}
new_comment = store_comment(comment)
if not new_comment:
break
print " Finished downloading ({}) Youtube comments".format(num_comments)
# Reddit comment data
print " Downloading Reddit comments"
num_comments = 0
for thread in reddit.get_threads(video_url):
for comment in reddit.get_comments(thread['id']):
num_comments += 1
date = datetime.utcfromtimestamp(comment['created_utc']).isoformat() if 'created_utc' in comment else None
if 'body' in comment:
comment = {
'id': comment['id'],
'site': 'reddit',
'video': video_id,
'date': date,
'author': comment['author'],
'comment': comment['body'],
'url': 'https://reddit.com/r/{}/comments/{}/{}/{}'.format(comment['subreddit'], thread['id'], comment['author'], comment['id']),
'up_votes': comment['ups'],
'down_votes': comment['downs'],
}
new_comment = store_comment(comment)
if not new_comment:
break
print " Finished downloading ({}) Reddit comments".format(num_comments)
conn.commit()
conn.close()
if __name__ == '__main__':
conn = sqlite3.connect('comments.db')
create_tables(conn)
fetch_store_comments(conn)