Пример #1
0
def _save_data(url, session, db):
    """Store data into movie_new for tweeting message later"""

    with sqlite3.connect(db) as conn:
        conn.text_factory = str
        cursor = conn.cursor()
        insert = """INSERT INTO movie_new (title, download_url, movie_url,
        douban_url, douban_title, douban_id, lpic_url, download_url_md5, upload_date) VALUES
        (?,?,?,?,?,?,?,?,?)"""
        query = """SELECT * from movie_new WHERE download_url_md5=?"""

        for item in parse_sourcelist(session=session, url=url):
            movie_url = item
            title, upload_date, download_url = parse_detail(item)
            download_url_md5 = md5(download_url).hexdigest()
            douban_title, douban_url, douban_id, lpic_url = \
                    parse_douban(title, api_key=API_KEY)
            data = (title, download_url, movie_url, douban_url, douban_title,\
                    douban_id, lpic_url, download_url_md5, upload_date)

            #test = datetime.strptime('2013-05-28', '%Y-%m-%d').date() == \
            #        datetime.strptime(upload_date, '%Y-%m-%d').date()
            #if test and not cursor.execute(query, (download_url_md5,)).fetchall():
            if check_update(upload_date) and not cursor.execute(query,
                    (download_url_md5, )).fetchall():
                print "Insert data", douban_title, "into database"
                cursor.execute(insert, data)
                conn.commit()
            else:
                print "Can't insert data for duplicate content or no update"
Пример #2
0
def create_db():
    with sqlite3.connect(_database) as conn:
        # print 'Creating schema'
        # with open(_schema, 'rt') as f:
        #     schema = f.read()
        # conn.executescript(schema)
        conn.text_factory = str
        cursor = conn.cursor()
        f = open(_detail)
        for line in f.readlines()[1083:]:
            (title, download_url, movie_url) = line.split('|')
            try:
                (douban_title, douban_url, douban_id,
                 lpic_url) = parse_douban(title, api_key=API_KEY)
                print title, '<><><>', douban_title
                data = (title, download_url, movie_url, douban_url,\
                        douban_title, douban_id, lpic_url)
                insert = """INSERT INTO movie (title, download_url, movie_url,
                douban_url, douban_title, douban_id, lpic_url) VALUES(?,?,?,?,?,?,?)"""
                cursor.execute(insert, data)
                #douban API request limit 40 request per second per IP
                time.sleep(1.5)
                conn.commit()
            except TypeError:
                print "%s not found in Douban" % title
Пример #3
0
def _save_data(url, session, db):
    """Store data into movie_new for tweeting message later"""

    with sqlite3.connect(db) as conn:
        conn.text_factory = str
        cursor = conn.cursor()
        insert = """INSERT INTO movie_new (title, download_url, movie_url,
        douban_url, douban_title, douban_id, lpic_url, download_url_md5, upload_date) VALUES
        (?,?,?,?,?,?,?,?,?)"""
        query = """SELECT * from movie_new WHERE download_url_md5=?"""

        for item in parse_sourcelist(session=session, url=url):
            movie_url = item
            title, upload_date, download_url = parse_detail(item)
            download_url_md5 = md5(download_url).hexdigest()
            douban_title, douban_url, douban_id, lpic_url = \
                    parse_douban(title, api_key=API_KEY)
            data = (title, download_url, movie_url, douban_url, douban_title,\
                    douban_id, lpic_url, download_url_md5, upload_date)

            #test = datetime.strptime('2013-05-28', '%Y-%m-%d').date() == \
            #        datetime.strptime(upload_date, '%Y-%m-%d').date()
            #if test and not cursor.execute(query, (download_url_md5,)).fetchall():
            if check_update(upload_date) and not cursor.execute(
                    query, (download_url_md5, )).fetchall():
                print "Insert data", douban_title, "into database"
                cursor.execute(insert, data)
                conn.commit()
            else:
                print "Can't insert data for duplicate content or no update"
Пример #4
0
def create_db():
    with sqlite3.connect(_database) as conn:
        # print 'Creating schema'
        # with open(_schema, 'rt') as f:
        #     schema = f.read()
        # conn.executescript(schema)
        conn.text_factory = str
        cursor = conn.cursor()
        f = open(_detail)
        for line in f.readlines()[1083:]:
            (title, download_url, movie_url) = line.split("|")
            try:
                (douban_title, douban_url, douban_id, lpic_url) = parse_douban(title, api_key=API_KEY)
                print title, "<><><>", douban_title
                data = (title, download_url, movie_url, douban_url, douban_title, douban_id, lpic_url)
                insert = """INSERT INTO movie (title, download_url, movie_url,
                douban_url, douban_title, douban_id, lpic_url) VALUES(?,?,?,?,?,?,?)"""
                cursor.execute(insert, data)
                # douban API request limit 40 request per second per IP
                time.sleep(1.5)
                conn.commit()
            except TypeError:
                print "%s not found in Douban" % title
Пример #5
0
activate_this = 'venv/bin/activate_this.py'
execfile(activate_this, dict(__file__=activate_this))

import requests
from strs2weibo import parse_douban, parse_detail, parse_sourcelist,\
    contruct_status, retrieve_image, ROOT



if __name__ == '__main__':
    session = requests.Session()
    session.get(ROOT)
    url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=1'
    for detail_url in parse_sourcelist(session, url):
        
        title = parse_detail(detail_url)[0] 
        download_url = parse_detail(detail_url)[2]
        (douban_title, douban_url, douban_id, lpic_url ) = parse_douban(title)
        pic = retrieve_image(lpic_url)
        topic = u'电影传送门'
        status = construct_status(topic, douban_title, download_url, douban_url)
        print status
#        weibo_upload(status, pic)

    




Пример #6
0
#!usr/bin/env python
# -*- coding: utf-8 -*-

activate_this = 'venv/bin/activate_this.py'
execfile(activate_this, dict(__file__=activate_this))

import requests
from strs2weibo import parse_douban, parse_detail, parse_sourcelist,\
    contruct_status, retrieve_image, ROOT

if __name__ == '__main__':
    session = requests.Session()
    session.get(ROOT)
    url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=1'
    for detail_url in parse_sourcelist(session, url):

        title = parse_detail(detail_url)[0]
        download_url = parse_detail(detail_url)[2]
        (douban_title, douban_url, douban_id, lpic_url) = parse_douban(title)
        pic = retrieve_image(lpic_url)
        topic = u'电影传送门'
        status = construct_status(topic, douban_title, download_url,
                                  douban_url)
        print status
#        weibo_upload(status, pic)