Exemplo n.º 1
0
def write_predictdb():
    cursor.execute(
        '''UPDATE UpdateFilm SET linear_predict=%s,linear_test=%s,
        lasso_predict=%s,lasso_test=%s,knn_predict=%s,knn_test=%s,poly_predict=%s,poly_test=%s 
        WHERE imdb_filmID = %s''',
        (scoredict['linear_predict'], scoredict['linear_test'],
         scoredict['lasso_predict'], scoredict['lasso_test'],
         scoredict['knn_predict'], scoredict['knn_test'],
         scoredict['poly_predict'], scoredict['poly_test'], filmid))
    db.commit()
from crawler_util import page_read
from film_update import moviescrawler
from db_helper.save import cursor, db


def get_exist_list():
    exist_tup = cursor.fetchall()
    exists = list()
    for a_item in exist_tup:
        exists.append(a_item[0])
    return exists


cursor.execute('SELECT imdb_filmID FROM FilmDB')
exist_films = get_exist_list()
soup = page_read.page_read_nolog(
    'http://www.imdb.com/search/title?count=100&'
    'groups=oscar_best_picture_winners&title_type=feature&sort=release_date,desc'
)
imdb_href = 'http://www.imdb.com'
for item in soup.select('.lister-item-header'):
    # print  movieurl
    the_filmid = item.a.get('href').split('title/')[1].split('/')[0]
    if the_filmid in exist_films:
        cursor.execute('''UPDATE FilmDB SET Oscar = 1 WHERE imdb_filmID=%s''',
                       (the_filmid, ))
        db.commit()
    else:
        print(the_filmid)
        moviescrawler.crawl_imdb(the_filmid, 'Oscar')
db.commit()
def get_exist_list():
    cursor.execute('SELECT imdb_filmID FROM FilmDB')
    exists = [x[0] for x in cursor]
    return exists
from crawler_util import page_read
from film_update import moviescrawler
from db_helper.save import cursor, db


def get_exist_list():
    cursor.execute('SELECT imdb_filmID FROM FilmDB')
    exists = [x[0] for x in cursor]
    return exists


exist_films = get_exist_list()
soup = page_read.page_read_nolog(
    'http://www.imdb.com/chart/top/?ref_=nv_mv_250_6')
for item in soup.select('.titleColumn'):
    ref = item.a.get('href').strip()
    film_id = ref.split('title/')[1].split('/')[0]

    if film_id in exist_films:
        cursor.execute(
            '''UPDATE FilmDB SET filmType = 'Top250' WHERE imdb_filmID=%s''',
            (film_id, ))
        print(film_id)
        db.commit()
    else:
        moviescrawler.crawl_imdb(film_id, 'Top250')
db.commit()
Exemplo n.º 5
0

def write_predictdb():
    cursor.execute(
        '''UPDATE UpdateFilm SET linear_predict=%s,linear_test=%s,
        lasso_predict=%s,lasso_test=%s,knn_predict=%s,knn_test=%s,poly_predict=%s,poly_test=%s 
        WHERE imdb_filmID = %s''',
        (scoredict['linear_predict'], scoredict['linear_test'],
         scoredict['lasso_predict'], scoredict['lasso_test'],
         scoredict['knn_predict'], scoredict['knn_test'],
         scoredict['poly_predict'], scoredict['poly_test'], filmid))
    db.commit()


cursor.execute('SELECT click_times,gross '
               'FROM FilmDB,TrailerClick '
               'WHERE FilmDB.imdb_filmID=TrailerClick.imdb_filmID '
               'AND gross>4*TrailerClick.click_times')
clicks = cursor.fetchall()
X_R1 = list()
y_R1 = list()
for click_time, gross in clicks:
    X_R1.append(click_time)
    y_R1.append(gross)
X_data = np.array(X_R1).reshape(-1, 1)
y_data = np.array(y_R1).reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X_data,
                                                    y_data,
                                                    test_size=0.2,
                                                    random_state=4)
cursor.execute(
    'SELECT TrailerClick.imdb_filmID,max(click_times) FROM UpdateFilm,TrailerClick WHERE TrailerClick.imdb_filmID=UpdateFilm.imdb_filmID GROUP BY UpdateFilm.imdb_filmID'