def update_db(): with sql.connect(db_name) as conn: for query in queries: for page in range(1, 4): print(query, page) search_page = download_search_page(query, page) video_list = parse_search_page(search_page) for video in video_list: video_page = download_video_page(video['link']) video_json_data = parse_video_page(video_page) if 'watch-time-text' not in video_json_data: continue p = compute_prediction(video_json_data) video_id = video_json_data.get('og:video:url', '') watch_title = video_json_data['watch-title'].replace( "'", "") data_front = { "title": watch_title, "score": float(p), "video_id": video_id } data_front['update_time'] = time.time_ns() print(video_id, json.dumps(data_front)) c = conn.cursor() c.execute( "INSERT INTO videos VALUES ('{title}', '{video_id}', {score}, {update_time})" .format(**data_front)) conn.commit() return True
def update_db(): with sql.connect(db_name) as conn: for keyword in keywords: for page in range(1, 4): search_page = get_data.download_search_page(keyword, page) videos_list = get_data.parse_search_page(search_page) for video in videos_list: video_page = get_data.download_video_page(video['link']) video_json_data = get_data.parse_video_page(video_page) if 'watch-time-text' not in video_json_data: # if the video parsed does not have a title, discard continue p = compute_prediction(video_json_data) video_id = video_json_data.get('og:video:url', '') video_title = video_json_data['watch-title'].replace( "'", "''") data_front = { "title": video_title, "score": float(p), "video_id": video_id } print(video_id, json.dumps(data_front)) cursor = conn.cursor() cursor.execute( "INSERT INTO videos VALUES ('{title}', '{video_id}', {score})" .format(**data_front)) conn.commit() return True
def predict_api(): yt_video_id = request.args.get("yt_video_id", default='') video_page = get_data.download_video_page( "/watch?v={}".format(yt_video_id)) video_json_data = get_data.parse_video_page(video_page) if 'watch-time-text' not in video_json_data: return "not found" p = ml_utils.compute_prediction(video_json_data) output = {"title": video_json_data['watch-title'], "score": p} return json.dumps(output)