def official_method(): crawler = FilmBaseInfoCrawler() getor = get_url.GetUrl() film_id_list = crawler.read_film_id() cfo = config_operator.ConfigOperator() offset = int(cfo.get_maoyan_film('baseinfo_offset')) interval = int(cfo.get_maoyan_film('baseinfo_interval')) for num in range(offset, film_id_list.__len__()): film_id = int(film_id_list[num][0]) url = crawler.base_url + film_id.__str__() + '#award' try: response = getor.get_response(url) dict_base_info = crawler.get_base_info(response.text, film_id) dict_cast_staff = crawler.get_staff_info(response.text) dict_awards = crawler.get_award_info(response.text) crawler.write_db(dict_base_info, dict_cast_staff, dict_awards, film_id) cfo.write_maoyan_film('baseinfo_offset', num.__str__()) except Exception as e: while 1: try: print('出现错误,30s后重试\n' + str(e)) getor.change_account() time.sleep(30) response = getor.get_response(url) dict_base_info = crawler.get_base_info(response.text, film_id) dict_cast_staff = crawler.get_staff_info(response.text) dict_awards = crawler.get_award_info(response.text) crawler.write_db(dict_base_info, dict_cast_staff, dict_awards, film_id) cfo.write_maoyan_film('baseinfo_offset', num.__str__()) break except: pass time.sleep(interval)
def official_method(): crawler = DoubanFilmInfoCrawler() getor = get_url.GetUrl() film_id_list = crawler.read_db() cfo = config_operator.ConfigOperator() offset = int(cfo.get_douban_film('filminfo_offset')) interval = int(cfo.get_douban_film('filminfo_interval')) for num in range(offset, film_id_list.__len__()): film_id = int(film_id_list[num][0]) url = crawler.base_url.replace('-', str(film_id)) try: response = getor.get_response(url) dict_film_info = crawler.get_film_info(response.text, film_id) crawler.write_db(dict_film_info, film_id) cfo.write_maoyan_film('filminfo_offset', num.__str__()) except Exception as e: while 1: try: print('出现异常,30s后重试\n' + str(e)) getor.change_account() time.sleep(30) response = getor.get_response(url) dict_film_info = crawler.get_film_info( response.text, film_id) crawler.write_db(dict_film_info, film_id) cfo.write_maoyan_film('filminfo_offset', num.__str__()) break except: pass time.sleep(interval)
def official_method(): crawler = FilmRatingCrawler() getor = get_url.GetUrl() film_id_list = crawler.read_film_id() cfo = config_operator.ConfigOperator() offset = int(cfo.get_maoyan_film('rating_offset')) interval = int(cfo.get_maoyan_film('rating_interval')) for num in range(offset, film_id_list.__len__()): film_id = int(film_id_list[num][0]) # 获取评分信息 try: rating_url = crawler.rating_url.replace('-', film_id.__str__()) referer = 'https://piaofang.maoyan.com/movie/%s/promotion/trailers' % film_id response = getor.get_response(rating_url, referer=referer) # dict_data = crawler.get_datadict_fromscript(response.text) dict_rating_info = crawler.get_ratings(response.text) # 获取想看数 wanted_url = crawler.wanted_url.replace('-', film_id.__str__()) response_wanted = getor.get_response(wanted_url, referer=referer) # print(html_doc_wanted) dict_data_wanted = crawler.get_datadict_fromscript( response_wanted.text) want_num = crawler.get_wanted(dict_data_wanted) crawler.write_db(dict_rating_info, want_num, film_id) cfo.write_maoyan_film('rating_offset', num.__str__()) except Exception as e: while 1: try: print('出现问题,30s后重试\n' + str(e)) getor.change_account() time.sleep(30) rating_url = crawler.rating_url.replace( '-', film_id.__str__()) referer = 'https://piaofang.maoyan.com/movie/%s/promotion/trailers' % film_id response = getor.get_response(rating_url, referer=referer) # dict_data = crawler.get_datadict_fromscript(response.text) dict_rating_info = crawler.get_ratings(response.text) # 获取想看数 wanted_url = crawler.wanted_url.replace( '-', film_id.__str__()) response_wanted = getor.get_response(wanted_url, referer=referer) # print(html_doc_wanted) dict_data_wanted = crawler.get_datadict_fromscript( response_wanted.text) want_num = crawler.get_wanted(dict_data_wanted) crawler.write_db(dict_rating_info, want_num, film_id) cfo.write_maoyan_film('rating_offset', num.__str__()) break except Exception as e: continue time.sleep(interval)
def official_method(): crawler = FilmBoxOfficeCrawler() getor = get_url.GetUrl() film_id_list = crawler.read_film_id() #print(film_id_list) cfo = config_operator.ConfigOperator() offset = int(cfo.get_maoyan_film('boxoffice_offset')) interval = int(cfo.get_maoyan_film('boxoffice_interval')) for num in range(offset, film_id_list.__len__()): try: film_id = int(film_id_list[num][0]) url = crawler.base_url.replace('-', film_id.__str__()) response = getor.get_response(url) # print(html_doc) dict_data = crawler.get_datadict_fromscript(response.text) if crawler.check_data(dict_data): print(str(film_id) + '无数据') cfo.write_maoyan_film('boxoffice_offset', num.__str__()) time.sleep(interval) continue dict_summary_info = crawler.get_summary_boxoffice(dict_data) dict_day_boxoffice = crawler.get_day_boxoffice(dict_data) crawler.write_db(dict_summary_info, dict_day_boxoffice, film_id) cfo.write_maoyan_film('boxoffice_offset', num.__str__()) except Exception as e: while 1: try: print('出现错误,30s后重试\n' + str(e)) getor.change_account() time.sleep(30) response = getor.get_response(url) # print(html_doc) dict_data = crawler.get_datadict_fromscript(response.text) if crawler.check_data(dict_data): print(str(film_id) + '无数据') cfo.write_maoyan_film('boxoffice_offset', num.__str__()) time.sleep(interval) continue dict_summary_info = crawler.get_summary_boxoffice( dict_data) dict_day_boxoffice = crawler.get_day_boxoffice(dict_data) crawler.write_db(dict_summary_info, dict_day_boxoffice, film_id) cfo.write_maoyan_film('boxoffice_offset', num.__str__()) except: pass time.sleep(interval)
def official_method(): crawler = BaiduIndexActorAgeGenderCrawler() getor = get_url.GetUrl() actor_name_list = crawler.read_db_name() cfo = config_operator.ConfigOperator() offset = int(cfo.get_baidu_celebrity('gender_offset')) interval = int(cfo.get_baidu_celebrity('gender_interval')) for num in range(offset, actor_name_list.__len__()): name = actor_name_list[num][1] celebrity_id = actor_name_list[num][0] url = crawler.base_url + name try: response = getor.get_response(url) # print(response.text) if crawler.check_response_available(response.text): print('%s无数据' % celebrity_id) continue dict_result = crawler.get_info(response.text) crawler.write_db(dict_result, celebrity_id) cfo.write_baidu_celebrity('gender_offset', str(num)) except Exception as e: while 1: try: print('出现错误,30s后重试\n' + str(e)) time.sleep(30) getor.change_account() response = getor.get_response(url) if crawler.check_response_available(response.text): print('%s无数据' % celebrity_id) break dict_result = crawler.get_info(response.text) crawler.write_db(dict_result, celebrity_id) cfo.write_baidu_celebrity('gender_offset', str(num)) break except: pass time.sleep(interval)