def reload_and_config(): # 重新load from common.common import get_proxy import common.insert_db insert_db = common.insert_db get_proxy = get_proxy debug = False spider_factory.config_spider(insert_db, get_proxy, debug)
import mioji.common.pool import mioji.common.pages_store from toolbox import Common mioji.common.pool.pool.set_size(128) logger = get_task_logger('daodaoHotel') mioji.common.logger.logger = logger mioji.common.pages_store.cache_dir = list_cache_path mioji.common.pages_store.STORE_TYPE = cache_type # 初始化工作 (程序启动时执行一次即可) insert_db = None # get_proxy = simple_get_socks_proxy get_proxy = proxy_pool.get_proxy debug = False spider_factory.config_spider(insert_db, get_proxy, debug, need_flip_limit=False) def hotel_url_to_database(tid, used_times, source, keyword, spider_tag, need_cache=False): task = Task() task.ticket_info['hotel_name'] = keyword spider = factory.get_spider_by_old_source(spider_tag) spider.task = task error_code = spider.crawl(required=['hotel'], cache_config=none_cache_config)
import json import traceback import dataset import common.common from mioji.spider_factory import factory from mioji.common.task_info import Task from proj.celery import app from proj.my_lib.BaseTask import BaseTask from proj.my_lib.task_module.task_func import update_task, insert_task, get_task_id from mioji import spider_factory # 初始化工作 (程序启动时执行一次即可) insert_db = None get_proxy = common.common.get_proxy debug = False spider_factory.config_spider(insert_db, get_proxy, debug) hotel_default = {'check_in': '20171203', 'nights': 1, 'rooms': [{}]} hotel_rooms = { 'check_in': '20171203', 'nights': 1, 'rooms': [{ 'adult': 1, 'child': 3 }] } hotel_rooms_c = { 'check_in': '20171203', 'nights': 1, 'rooms': [{ 'adult': 1,
from proj.mysql_pool import service_platform_pool from proj.my_lib.Common.Browser import proxy_pool logger = get_logger("poiDaodao") mioji.common.spider.NEED_FLIP_LIMIT = False mioji.common.pool.pool.set_size(2024) mioji.common.pages_store.cache_dir = list_cache_path mioji.common.pages_store.STORE_TYPE = cache_type # 初始化工作 (程序启动时执行一次即可) insert_db = None # get_proxy = simple_get_socks_proxy get_proxy = proxy_pool.get_proxy debug = True spider_factory.config_spider(insert_db, None, debug) mioji.common.spider.NEED_FLIP_LIMIT = False mioji.common.logger.logger = logger client = pymongo.MongoClient(host='10.10.213.148') collections = client['data_result']['daodao'] URL = 'https://www.tripadvisor.cn' SQL = """insert ignore into {table_name} (source, source_id, city_id, country_id, hotel_url) values(%s, %s, %s, %s, %s)""" type_dict = {'attr': 'view', 'rest': 'restaurant'} spider_name = {'attr': 'View', 'rest': 'Rest'} def hotel_list_database(source, url, required,