def write_predictdb(): cursor.execute( '''UPDATE UpdateFilm SET linear_predict=%s,linear_test=%s, lasso_predict=%s,lasso_test=%s,knn_predict=%s,knn_test=%s,poly_predict=%s,poly_test=%s WHERE imdb_filmID = %s''', (scoredict['linear_predict'], scoredict['linear_test'], scoredict['lasso_predict'], scoredict['lasso_test'], scoredict['knn_predict'], scoredict['knn_test'], scoredict['poly_predict'], scoredict['poly_test'], filmid)) db.commit()
from db_helper.init_cursor import cursor from db_helper.init_cursor import db def get_exist_list(): exist_tup = cursor.fetchall() exists = list() for a_item in exist_tup: exists.append(a_item[0]) return exists cursor.execute('SELECT tags FROM FilmDB') res = dict() tags = get_exist_list() for dd in tags: for tag in dd.split('/'): if tag!='': res[tag] = res.get(tag, 0) + 1 print res
return int(res) else: return 0 def get_weekgross(s): for item in s: if item.h4: if item.h4.string: if item.h4.string.startswith('Opening Weekend'): return get_num(item.get_text()) cursor.execute( 'SELECT FilmDB.imdb_filmID ' 'FROM FilmDB,TrailerClick ' 'WHERE FilmDB.imdb_filmID=TrailerClick.imdb_filmID ' 'AND (country=\'USA\'OR country=\'UK\') AND gross>1000000 and openweek_gross is null' ) filmids = get_exist_list() print filmids for filmid in filmids: soup = page_read.page_read_nolog('http://www.imdb.com/title/' + filmid + '/') if soup.select('.txt-block'): weekgross = get_weekgross(soup.select('.txt-block')) if weekgross and weekgross != 0: cursor.execute( '''UPDATE TrailerClick SET openweek_gross=%s WHERE imdb_filmID=%s''', (weekgross, filmid)) db.commit()
def write_predictdb(): cursor.execute( '''UPDATE UpdateFilm SET linear_predict=%s,linear_test=%s, lasso_predict=%s,lasso_test=%s,knn_predict=%s,knn_test=%s,poly_predict=%s,poly_test=%s WHERE imdb_filmID = %s''', (scoredict['linear_predict'], scoredict['linear_test'], scoredict['lasso_predict'], scoredict['lasso_test'], scoredict['knn_predict'], scoredict['knn_test'], scoredict['poly_predict'], scoredict['poly_test'], filmid)) db.commit() cursor.execute('SELECT click_times,gross ' 'FROM FilmDB,TrailerClick ' 'WHERE FilmDB.imdb_filmID=TrailerClick.imdb_filmID ' 'AND gross>4*TrailerClick.click_times') clicks = cursor.fetchall() X_R1 = list() y_R1 = list() for click_time, gross in clicks: X_R1.append(click_time) y_R1.append(gross) X_data = np.array(X_R1).reshape(-1, 1) y_data = np.array(y_R1).reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=4) cursor.execute( 'SELECT TrailerClick.imdb_filmID,max(click_times) FROM UpdateFilm,TrailerClick WHERE TrailerClick.imdb_filmID=UpdateFilm.imdb_filmID GROUP BY UpdateFilm.imdb_filmID'
from db_helper.init_cursor import db from db_helper.init_cursor import cursor def get_exist_list(): exist_tup = cursor.fetchall() exists = list() for a_item in exist_tup: exists.append(a_item[0]) return exists cursor.execute('SELECT imdb_filmID FROM UpdateFilm ') filmids = get_exist_list() cursor.execute('SELECT imdb_filmID FROM TrailerClick') all_ids = get_exist_list() for a_id in all_ids: if a_id not in filmids: cursor.execute('DELETE FROM TrailerClick WHERE imdb_filmID=%s', (a_id, )) db.commit()