Пример #1
0
    def __init__(self,
                 driver_path: str = DRIVER_PATH,
                 proxy: dict = None,
                 ua: str = USER_AGENT.DEFAULT_UA,
                 headless: bool = False):

        chrome_options = webdriver.ChromeOptions()
        if proxy is not None:
            chrome_options.add_argument("--proxy-server={}".format(
                "http://{}:{}".format(proxy["host"], proxy["port"])))
            chrome_options.add_argument("--ignore-certificate-errors")
        if ua:
            chrome_options.add_argument("user-agent=" + ua)
        if headless:
            chrome_options.add_argument("--headless")
            chrome_options.add_argument("--disable-gpu")

        self.driver = webdriver.Chrome(executable_path=driver_path,
                                       chrome_options=chrome_options)
        self.driver.click = self.click
        self.driver.tap = self.tap
        self.driver.send = self.send_keys
        self.driver.execute_js = self.execute_js
        self.driver.flick = self.flick
        self.driver.scroll = self.scroll
        self.driver.request = self.request
        self.driver.drag_and_drop = self.drag_and_drop

        self.logger = logger_util.get_logger(ChromeDriver)
        self.driver.logger = self.logger
Пример #2
0
    def __init__(self, urls, headers, proxies, timeout=8, log_file_path=""):
        self.urls = urls
        self.headers = headers
        self.proxies = proxies
        self.timeout = timeout
        self.logger = get_logger(SingleThreadApiTestUnit)

        handlerFormat = logging.Formatter(
            "[%(asctime)s]-[%(name)s]-[%(levelname)s]: %(message)s")
        streamHandler = logging.StreamHandler()
        streamHandler.setFormatter(handlerFormat)

        fileHandler = logging.FileHandler(log_file_path, "a")
        fileHandler.setFormatter(handlerFormat)
        self.logger.addHandler(streamHandler)
        self.logger.addHandler(fileHandler)
Пример #3
0
    def __init__(self, proxy: dict = None, ua: str = USER_AGENT.DEFAULT_UA):
        self.logger = logger_util.get_logger(PhantomjsDriver)

        dcap = dict(DesiredCapabilities.PHANTOMJS)

        if proxy is not None:
            service_args = [
                "--proxy={}:{}".format(proxy["host"], proxy["port"]),
                "--proxy-type={}".format(proxy["type"]),
                '--ignore-ssl-errors=true',
            ]
        else:
            service_args = []

        if not ua:
            dcap["phantomjs.page.settings.userAgent"] = ua
        else:
            dcap["phantomjs.page.settings.userAgent"] = ua
        self.driver = webdriver.PhantomJS(
            executable_path=
            "/opt/package/phantomjs-2.1.1-linux-x86_64/bin/phantomjs",
            service_args=service_args,
            desired_capabilities=dcap)
        self.driver.logger = self.logger
import traceback

import pymongo

from dio_core.network.downloader import Downloader
from dio_core.network.downloader.downloader import Setting
from dio_core.utils import logger_util, time_util, md5_util, url_util
from dio_core.utils.file_util import csv_util

logger = logger_util.get_logger(__file__)

fields = (
    "_id,productList,name,monthSalesTip,wmPoiScore,distance,shippingFeeTip,minPriceTip,deliveryTimeTip,averagePri"
    "ceTip,thirdCategory,recommendInfo,activityList,labelInfoList,keyword,url")

# 主页url, 搜索url,
MAIN_URL = "http://h5.waimai.meituan.com/waimai/mindex/home"
SEARCH_URL = "http://i.waimai.meituan.com/openh5/search/poi"
SHOP_SEARCH_URL = "http://i.waimai.meituan.com/openh5/homepage/poilist?_={}"
FOOD_URL = "http://i.waimai.meituan.com/openh5/poi/food"
COMMENT_URL = "http://i.waimai.meituan.com/openh5/poi/comments"

# mongodb 配置
client = pymongo.MongoClient(host='localhost', port=27017)
db = client['meituan']
meituanwaimai_shop_list = db['meituanwaimai_shop_list_v1']
meituanwaimai_search_list = db['meituanwaimai_search_list']
meituanwaimai_food_list = db['meituanwaimai_food_list_v1']
meituanwaimai_comment_list = db['meituanwaimai_comment_list_v1']
decrypt_collection = db["meituanwaimai_decrypt"]
Пример #5
0
 def __new__(cls, *args, **kwargs):
     if cls.logger is None:
         cls.logger = logger_util.get_logger(cls.__class__.__name__)
     return super().__new__(cls)
Пример #6
0
import pymysql

from dio_core.utils import json_util
from dio_core.utils.logger_util import get_logger

logger = get_logger(__file__)


def getRhino() -> pymysql.Connection:
    """获取 rhino connect"""
    return pymysql.connect("devrhino1",
                           "rhino",
                           "rhino",
                           "db_datatub_rhino",
                           port=3306,
                           cursorclass=pymysql.cursors.DictCursor,
                           charset='utf8')


def updateSourceCrawlId(taskId: int, mapping: dict):
    """
    更新
    mapping = {
       "redis-link": [1, 3]
    }
    """
    conn = getRhino()
    cur = conn.cursor()

    # 获取taskConfig
    querySql = "SELECT t.* FROM t_rhino_task_config t WHERE id = {};".format(
Пример #7
0
 def __init__(self):
     self.logger = logger_util.get_logger(self.__class__)
Пример #8
0
# @Time         : 18-5-26 下午8:13
# @Author       : DioMryang
# @File         : mysql_util.py
# @Description  :

from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from dio_core.utils import logger_util


logger = logger_util.get_logger("mysql_util")


def create_connection(**config):
    """
    创建mysql连接
    :return:
    """
    logger.info("create mysql connect {host}:{port}/{db_name}".format(**config))
    engine = create_engine('mysql+{driver}://{user}:{password}@{host}:{port}/{db_name}?charset=utf8'.format(**config)
                         , encoding='utf-8')
    return sessionmaker(bind=engine)()