def _save_data(url, session, db): """Store data into movie_new for tweeting message later""" with sqlite3.connect(db) as conn: conn.text_factory = str cursor = conn.cursor() insert = """INSERT INTO movie_new (title, download_url, movie_url, douban_url, douban_title, douban_id, lpic_url, download_url_md5, upload_date) VALUES (?,?,?,?,?,?,?,?,?)""" query = """SELECT * from movie_new WHERE download_url_md5=?""" for item in parse_sourcelist(session=session, url=url): movie_url = item title, upload_date, download_url = parse_detail(item) download_url_md5 = md5(download_url).hexdigest() douban_title, douban_url, douban_id, lpic_url = \ parse_douban(title, api_key=API_KEY) data = (title, download_url, movie_url, douban_url, douban_title,\ douban_id, lpic_url, download_url_md5, upload_date) #test = datetime.strptime('2013-05-28', '%Y-%m-%d').date() == \ # datetime.strptime(upload_date, '%Y-%m-%d').date() #if test and not cursor.execute(query, (download_url_md5,)).fetchall(): if check_update(upload_date) and not cursor.execute(query, (download_url_md5, )).fetchall(): print "Insert data", douban_title, "into database" cursor.execute(insert, data) conn.commit() else: print "Can't insert data for duplicate content or no update"
def _save_data(url, session, db): """Store data into movie_new for tweeting message later""" with sqlite3.connect(db) as conn: conn.text_factory = str cursor = conn.cursor() insert = """INSERT INTO movie_new (title, download_url, movie_url, douban_url, douban_title, douban_id, lpic_url, download_url_md5, upload_date) VALUES (?,?,?,?,?,?,?,?,?)""" query = """SELECT * from movie_new WHERE download_url_md5=?""" for item in parse_sourcelist(session=session, url=url): movie_url = item title, upload_date, download_url = parse_detail(item) download_url_md5 = md5(download_url).hexdigest() douban_title, douban_url, douban_id, lpic_url = \ parse_douban(title, api_key=API_KEY) data = (title, download_url, movie_url, douban_url, douban_title,\ douban_id, lpic_url, download_url_md5, upload_date) #test = datetime.strptime('2013-05-28', '%Y-%m-%d').date() == \ # datetime.strptime(upload_date, '%Y-%m-%d').date() #if test and not cursor.execute(query, (download_url_md5,)).fetchall(): if check_update(upload_date) and not cursor.execute( query, (download_url_md5, )).fetchall(): print "Insert data", douban_title, "into database" cursor.execute(insert, data) conn.commit() else: print "Can't insert data for duplicate content or no update"
activate_this = 'venv/bin/activate_this.py' execfile(activate_this, dict(__file__=activate_this)) import requests from strs2weibo import parse_douban, parse_detail, parse_sourcelist,\ contruct_status, retrieve_image, ROOT if __name__ == '__main__': session = requests.Session() session.get(ROOT) url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=1' for detail_url in parse_sourcelist(session, url): title = parse_detail(detail_url)[0] download_url = parse_detail(detail_url)[2] (douban_title, douban_url, douban_id, lpic_url ) = parse_douban(title) pic = retrieve_image(lpic_url) topic = u'电影传送门' status = construct_status(topic, douban_title, download_url, douban_url) print status # weibo_upload(status, pic)
#!/usr/bin/env python # -*- coding: utf-8 -*- activate_this = 'venv/bin/activate_this.py' execfile(activate_this, dict(__file__=activate_this)) import requests from config import ROOT from strs2weibo import parse_detail, parse_sourcelist base_url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=%d' #base_url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=3&&FirstTypeID=3391&page=%d' session = requests.Session() session.get(ROOT) with open('detail.txt', 'w') as f: for page in xrange(1, 521): url = base_url % (page) for detail_url in parse_sourcelist(session, url): title = parse_detail(detail_url)[0].encode('utf8') download_url = parse_detail(detail_url)[2] print title, download_url, detail_url f.write('%s|%s|%s\n' % (title, download_url, detail_url.encode('utf8')))
#!usr/bin/env python # -*- coding: utf-8 -*- activate_this = 'venv/bin/activate_this.py' execfile(activate_this, dict(__file__=activate_this)) import requests from strs2weibo import parse_douban, parse_detail, parse_sourcelist,\ contruct_status, retrieve_image, ROOT if __name__ == '__main__': session = requests.Session() session.get(ROOT) url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=1' for detail_url in parse_sourcelist(session, url): title = parse_detail(detail_url)[0] download_url = parse_detail(detail_url)[2] (douban_title, douban_url, douban_id, lpic_url) = parse_douban(title) pic = retrieve_image(lpic_url) topic = u'电影传送门' status = construct_status(topic, douban_title, download_url, douban_url) print status # weibo_upload(status, pic)