Пример #1
0
def _save_data(url, session, db):
    """Store data into movie_new for tweeting message later"""

    with sqlite3.connect(db) as conn:
        conn.text_factory = str
        cursor = conn.cursor()
        insert = """INSERT INTO movie_new (title, download_url, movie_url,
        douban_url, douban_title, douban_id, lpic_url, download_url_md5, upload_date) VALUES
        (?,?,?,?,?,?,?,?,?)"""
        query = """SELECT * from movie_new WHERE download_url_md5=?"""

        for item in parse_sourcelist(session=session, url=url):
            movie_url = item
            title, upload_date, download_url = parse_detail(item)
            download_url_md5 = md5(download_url).hexdigest()
            douban_title, douban_url, douban_id, lpic_url = \
                    parse_douban(title, api_key=API_KEY)
            data = (title, download_url, movie_url, douban_url, douban_title,\
                    douban_id, lpic_url, download_url_md5, upload_date)

            #test = datetime.strptime('2013-05-28', '%Y-%m-%d').date() == \
            #        datetime.strptime(upload_date, '%Y-%m-%d').date()
            #if test and not cursor.execute(query, (download_url_md5,)).fetchall():
            if check_update(upload_date) and not cursor.execute(query,
                    (download_url_md5, )).fetchall():
                print "Insert data", douban_title, "into database"
                cursor.execute(insert, data)
                conn.commit()
            else:
                print "Can't insert data for duplicate content or no update"
Пример #2
0
def _save_data(url, session, db):
    """Store data into movie_new for tweeting message later"""

    with sqlite3.connect(db) as conn:
        conn.text_factory = str
        cursor = conn.cursor()
        insert = """INSERT INTO movie_new (title, download_url, movie_url,
        douban_url, douban_title, douban_id, lpic_url, download_url_md5, upload_date) VALUES
        (?,?,?,?,?,?,?,?,?)"""
        query = """SELECT * from movie_new WHERE download_url_md5=?"""

        for item in parse_sourcelist(session=session, url=url):
            movie_url = item
            title, upload_date, download_url = parse_detail(item)
            download_url_md5 = md5(download_url).hexdigest()
            douban_title, douban_url, douban_id, lpic_url = \
                    parse_douban(title, api_key=API_KEY)
            data = (title, download_url, movie_url, douban_url, douban_title,\
                    douban_id, lpic_url, download_url_md5, upload_date)

            #test = datetime.strptime('2013-05-28', '%Y-%m-%d').date() == \
            #        datetime.strptime(upload_date, '%Y-%m-%d').date()
            #if test and not cursor.execute(query, (download_url_md5,)).fetchall():
            if check_update(upload_date) and not cursor.execute(
                    query, (download_url_md5, )).fetchall():
                print "Insert data", douban_title, "into database"
                cursor.execute(insert, data)
                conn.commit()
            else:
                print "Can't insert data for duplicate content or no update"
Пример #3
0
activate_this = 'venv/bin/activate_this.py'
execfile(activate_this, dict(__file__=activate_this))

import requests
from strs2weibo import parse_douban, parse_detail, parse_sourcelist,\
    contruct_status, retrieve_image, ROOT



if __name__ == '__main__':
    session = requests.Session()
    session.get(ROOT)
    url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=1'
    for detail_url in parse_sourcelist(session, url):
        
        title = parse_detail(detail_url)[0] 
        download_url = parse_detail(detail_url)[2]
        (douban_title, douban_url, douban_id, lpic_url ) = parse_douban(title)
        pic = retrieve_image(lpic_url)
        topic = u'电影传送门'
        status = construct_status(topic, douban_title, download_url, douban_url)
        print status
#        weibo_upload(status, pic)

    




Пример #4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

activate_this = 'venv/bin/activate_this.py'
execfile(activate_this, dict(__file__=activate_this))


import requests
from config import ROOT
from strs2weibo import parse_detail, parse_sourcelist

base_url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=%d'
#base_url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=3&&FirstTypeID=3391&page=%d'
session = requests.Session()
session.get(ROOT)
with open('detail.txt', 'w') as f:
    for page in xrange(1, 521):
        url = base_url % (page)
        for detail_url in parse_sourcelist(session, url):
           title = parse_detail(detail_url)[0].encode('utf8')
           download_url = parse_detail(detail_url)[2]
           print title, download_url, detail_url
           f.write('%s|%s|%s\n' % (title, download_url, detail_url.encode('utf8')))
    
Пример #5
0
#!usr/bin/env python
# -*- coding: utf-8 -*-

activate_this = 'venv/bin/activate_this.py'
execfile(activate_this, dict(__file__=activate_this))

import requests
from strs2weibo import parse_douban, parse_detail, parse_sourcelist,\
    contruct_status, retrieve_image, ROOT

if __name__ == '__main__':
    session = requests.Session()
    session.get(ROOT)
    url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=1'
    for detail_url in parse_sourcelist(session, url):

        title = parse_detail(detail_url)[0]
        download_url = parse_detail(detail_url)[2]
        (douban_title, douban_url, douban_id, lpic_url) = parse_douban(title)
        pic = retrieve_image(lpic_url)
        topic = u'电影传送门'
        status = construct_status(topic, douban_title, download_url,
                                  douban_url)
        print status
#        weibo_upload(status, pic)