Exemplo n.º 1
0
def get_config(config, key, default_value, mode):
    if config.has_option("setting", key):
        value = config.get("setting", key).encode("UTF-8")
    else:
        tool.print_msg("配置文件config.ini中没有找到key为'" + key + "'的参数,使用程序默认设置")
        value = default_value
    if mode == 0:
        pass
    elif mode == 1:
        if isinstance(value, int):
            pass
        elif isinstance(value, str) and value.isdigit():
            value = int(value)
        else:
            tool.print_msg("配置文件config.ini中key为'" + key + "'的值必须是一个整数,使用程序默认设置")
            value = default_value
    elif mode == 2:
        if not value or value == "0" or (isinstance(value, str) and value.lower() == "false"):
            value = False
        else:
            value = True
    elif mode == 3:
        if value[:2] == "\\\\":  # \\ 开头,程序所在目录
            value = os.path.join(os.path.abspath(""), value[2:])  # \\ 仅做标记使用,实际需要去除
        elif value[0] == "\\":   # \ 开头,项目根目录(common目录上级)
            value = os.path.join(tool.PROJECT_ROOT_PATH, value[1:])  # \ 仅做标记使用,实际需要去除
        value = os.path.realpath(value)
    return value
Exemplo n.º 2
0
def get_config(config, key, default_value, mode):
    if config.has_option("setting", key):
        value = config.get("setting", key).encode("utf-8")
    else:
        tool.print_msg("配置文件config.ini中没有找到key为'" + key + "'的参数,使用程序默认设置")
        value = default_value
    if mode == 0:
        pass
    elif mode == 1:
        if isinstance(value, int):
            pass
        elif isinstance(value, str) and value.isdigit():
            value = int(value)
        else:
            tool.print_msg("配置文件config.ini中key为'" + key + "'的值必须是一个整数,使用程序默认设置")
            value = default_value
    elif mode == 2:
        if not value or value == "0" or (isinstance(value, str) and value.lower() == "false"):
            value = False
        else:
            value = True
    elif mode == 3:
        if value[0] == "\\":
            value = os.path.join(os.path.abspath(""), value[1:])  # 第一个 \ 仅做标记使用,实际需要去除
        value = os.path.realpath(value)
    return value
Exemplo n.º 3
0
    def __init__(self):
        sys_config = {
            robot.SYS_DOWNLOAD_IMAGE: True,
            robot.SYS_NOT_CHECK_SAVE_DATA: True,
        }
        robot.Robot.__init__(self, sys_config)

        tool.print_msg("配置文件读取完成")
Exemplo n.º 4
0
def trace(msg):
    msg = tool.get_time() + " " + str(msg)
    if IS_SHOW_TRACE:
        tool.print_msg(msg, False)
    if TRACE_LOG_PATH != "":
        thread_lock.acquire()
        try:
            tool.write_file(msg, TRACE_LOG_PATH)
        except:
            raise
        finally:
            thread_lock.release()
Exemplo n.º 5
0
def step(msg):
    msg = tool.get_time() + " " + str(msg)
    if IS_SHOW_STEP:
        tool.print_msg(msg, False)
    if STEP_LOG_PATH != "":
        thread_lock.acquire()
        try:
            tool.write_file(msg, STEP_LOG_PATH)
        except:
            raise
        finally:
            thread_lock.release()
Exemplo n.º 6
0
def error(msg):
    msg = tool.get_time() + " [Error] " + str(msg)
    if IS_SHOW_ERROR:
        tool.print_msg(msg, False)
    if ERROR_LOG_PATH != "":
        thread_lock.acquire()
        try:
            tool.write_file(msg, ERROR_LOG_PATH)
        except:
            raise
        finally:
            thread_lock.release()
Exemplo n.º 7
0
def get_member_list():
    index_url = "http://blog.nogizaka46.com/"
    index_return_code, index_page = tool.http_request(index_url)[:2]
    if index_return_code:
        member_list_find = re.findall(
            '<div class="unit"><a href="./([^"]*)"><img src="[^>]*alt="([^"]*)" />', index_page
        )
        for member_info in member_list_find:
            tool.print_msg("%s\t\t\t%s" % (member_info[0], member_info[1].replace(" ", "")), False)
        if len(member_list_find) > 0:
            tool.print_msg("复制以上内容到save.data中,删除不需要的行,即可开始运行", False)
    return None
Exemplo n.º 8
0
def get_member_list():
    index_url = "http://www.keyakizaka46.com/mob/news/diarShw.php?cd=member"
    index_return_code, index_page = tool.http_request(index_url)[:2]
    if index_return_code:
        member_list_data = tool.find_sub_string(index_page, '<ul class="thumb">', "</ul>")
        if member_list_data:
            member_list_find = re.findall("<li ([\S|\s]*?)</li>", member_list_data)
            for member_info in member_list_find:
                ct = tool.find_sub_string(member_info, "&ct=", '">')
                name = tool.find_sub_string(member_info, '<p class="name">', "</p>").strip().replace(" ", "")
                tool.print_msg("%s\t\t\t%s" % (ct, name), False)
            if len(member_list_find) > 0:
                tool.print_msg("复制以上内容到save.data中,删除不需要的行,即可开始运行", False)
    return None
Exemplo n.º 9
0
def save_net_file(file_url, file_path, need_content_type=False, header_list=None, cookies_list=None):
    file_path = tool.change_path_encoding(file_path)
    # 判断保存目录是否存在
    if not tool.make_dir(os.path.dirname(file_path), 0):
        return False
    create_file = False
    for retry_count in range(0, 5):
        response = http_request(file_url, header_list=header_list, cookies_list=cookies_list, read_timeout=60)
        if response.status == HTTP_RETURN_CODE_SUCCEED:
            # response中的Content-Type作为文件后缀名
            if need_content_type:
                content_type = response.getheader("Content-Type")
                if content_type is not None and content_type != "octet-stream":
                    file_path = os.path.splitext(file_path)[0] + "." + content_type.split("/")[-1]
            # 下载
            with open(file_path, "wb") as file_handle:
                file_handle.write(response.data)
            create_file = True
            # 判断文件下载后的大小和response中的Content-Length是否一致
            content_length = response.getheader("Content-Length")
            if content_length is None:
                return {"status": 1, "code": 0, "file_path": file_path}
            file_size = os.path.getsize(file_path)
            if int(content_length) == file_size:
                return {"status": 1, "code": 0, "file_path": file_path}
            else:
                tool.print_msg("本地文件%s:%s和网络文件%s:%s不一致" % (file_path, content_length, file_url, file_size))
        elif response.status == HTTP_RETURN_CODE_URL_INVALID:
            if create_file:
                os.remove(file_path)
            return {"status": 0, "code": -1}
        # 超过重试次数,直接退出
        elif response.status == HTTP_RETURN_CODE_RETRY:
            if create_file:
                os.remove(file_path)
            return {"status": 0, "code": -2}
        # 500锡类错误,重试
        elif response.status in [500, 502, 503, 504]:
            pass
        # 其他http code,退出
        else:
            if create_file:
                os.remove(file_path)
            return {"status": 0, "code": response.status}
    if create_file:
        os.remove(file_path)
    return {"status": 0, "code": -3}
Exemplo n.º 10
0
 def print_msg(self, msg):
     if self.print_function is None:
         tool.print_msg(msg, True)
     else:
         self.print_function(msg)
Exemplo n.º 11
0
def http_request(url, method="GET", post_data=None, binary_data=None, header_list=None, cookies_list=None, connection_timeout=HTTP_CONNECTION_TIMEOUT,
                 read_timeout=HTTP_CONNECTION_TIMEOUT, is_random_ip=True, json_decode=False, encode_multipart=False, redirect=True, exception_return=""):
    if not (url.find("http://") == 0 or url.find("https://") == 0):
        return ErrorResponse(HTTP_RETURN_CODE_URL_INVALID)
    method = method.upper()
    if method not in ["GET", "POST", "HEAD", "PUT", "DELETE", "OPTIONS", "TRACE"]:
        return ErrorResponse(HTTP_RETURN_CODE_URL_INVALID)
    if HTTP_CONNECTION_POOL is None:
        init_http_connection_pool()

    retry_count = 0
    while True:
        while process.PROCESS_STATUS == process.PROCESS_STATUS_PAUSE:
            time.sleep(10)
        if process.PROCESS_STATUS == process.PROCESS_STATUS_STOP:
            tool.process_exit(0)

        if header_list is None:
            header_list = {}

        # 设置User-Agent
        if "User-Agent" not in header_list:
            header_list["User-Agent"] = _random_user_agent()

        # 设置一个随机IP
        if is_random_ip:
            random_ip = _random_ip_address()
            header_list["X-Forwarded-For"] = random_ip
            header_list["X-Real-Ip"] = random_ip

        # 设置cookie
        if cookies_list:
            header_list["Cookie"] = build_header_cookie_string(cookies_list)

        try:
            if connection_timeout == 0 and read_timeout == 0:
                timeout = None
            elif connection_timeout == 0:
                timeout = urllib3.Timeout(read=read_timeout)
            elif read_timeout == 0:
                timeout = urllib3.Timeout(connect=connection_timeout)
            else:
                timeout = urllib3.Timeout(connect=connection_timeout, read=read_timeout)
            if method == "POST":
                if binary_data is None:
                    response = HTTP_CONNECTION_POOL.request(method, url, headers=header_list, redirect=redirect, timeout=timeout, fields=post_data, encode_multipart=encode_multipart)
                else:
                    response = HTTP_CONNECTION_POOL.request(method, url, headers=header_list, redirect=redirect, timeout=timeout, body=binary_data, encode_multipart=encode_multipart)
            else:
                response = HTTP_CONNECTION_POOL.request(method, url, headers=header_list, redirect=redirect, timeout=timeout)
            if response.status == HTTP_RETURN_CODE_SUCCEED and json_decode:
                try:
                    response.json_data = json.loads(response.data)
                except ValueError:
                    is_error = True
                    content_type = response.getheader("Content-Type")
                    if content_type is not None:
                        charset = tool.find_sub_string(content_type, "charset=", None)
                        if charset:
                            if charset == "gb2312":
                                charset = "GBK"
                            try:
                                response.json_data = json.loads(response.data.decode(charset))
                            except:
                                pass
                            else:
                                is_error = False
                    if is_error:
                        response.status = HTTP_RETURN_CODE_JSON_DECODE_ERROR
            return response
        except urllib3.exceptions.ProxyError:
            notice = "无法访问代理服务器,请检查代理设置。检查完成后输入(C)ontinue继续程序或者(S)top退出程序:"
            input_str = tool.console_input(notice).lower()
            if input_str in ["c", "continue"]:
                pass
            elif input_str in ["s", "stop"]:
                tool.process_exit(0)
        except urllib3.exceptions.ReadTimeoutError:
            pass
        except urllib3.exceptions.ConnectTimeoutError, e:
            # 域名无法解析
            if str(e).find("[Errno 11004] getaddrinfo failed") >= 0:
                return ErrorResponse(HTTP_RETURN_CODE_DOMAIN_NOT_RESOLVED)
            pass
        # except urllib3.exceptions.MaxRetryError, e:
        #     print_msg(url)
        #     print_msg(str(e))
        #     # 无限重定向
        #     # if str(e).find("Caused by ResponseError('too many redirects',)") >= 0:
        #     #     return ErrorResponse(-1)
        # except urllib3.exceptions.ConnectTimeoutError, e:
        #     print_msg(str(e))
        #     print_msg(url + " 访问超时,稍后重试")
        #     # 域名无法解析
        #     # if str(e).find("[Errno 11004] getaddrinfo failed") >= 0:
        #     #     return ErrorResponse(-2)
        # except urllib3.exceptions.ProtocolError, e:
        #     print_msg(str(e))
        #     print_msg(url + " 访问超时,稍后重试")
        #     # 链接被终止
        #     # if str(e).find("'Connection aborted.', error(10054,") >= 0:
        #     #     return ErrorResponse(-3)
        except Exception, e:
            if exception_return and str(e).find(exception_return) >= 0:
                return ErrorResponse(HTTP_RETURN_CODE_EXCEPTION_CATCH)
            elif str(e).find("EOF occurred in violation of protocol") >=0:
                time.sleep(30)
            tool.print_msg(str(e))
            tool.print_msg(url + " 访问超时,稍后重试")
            traceback.print_exc()
Exemplo n.º 12
0
def set_proxy(ip, port):
    global HTTP_CONNECTION_POOL
    HTTP_CONNECTION_POOL = urllib3.ProxyManager("http://%s:%s" % (ip, port), retries=False)
    tool.print_msg("设置代理成功")
Exemplo n.º 13
0
        #     print_msg(url + " 访问超时,稍后重试")
        #     # 链接被终止
        #     # if str(e).find("'Connection aborted.', error(10054,") >= 0:
        #     #     return ErrorResponse(-3)
        except Exception, e:
            if exception_return and str(e).find(exception_return) >= 0:
                return ErrorResponse(HTTP_RETURN_CODE_EXCEPTION_CATCH)
            elif str(e).find("EOF occurred in violation of protocol") >=0:
                time.sleep(30)
            tool.print_msg(str(e))
            tool.print_msg(url + " 访问超时,稍后重试")
            traceback.print_exc()

        retry_count += 1
        if retry_count >= HTTP_REQUEST_RETRY_COUNT:
            tool.print_msg("无法访问页面:" + url)
            return ErrorResponse(HTTP_RETURN_CODE_RETRY)


# 随机生成一个合法的user agent
def _random_user_agent():
    # "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0"
    # "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
    firefox_version_max = 55
    # https://zh.wikipedia.org/zh-cn/Google_Chrome
    chrome_version_list = ["51.0.2704", "52.0.2743", "53.0.2785", "54.0.2840", "55.0.2883",
                           "56.0.2924", "57.0.2987", "58.0.3029", "59.0.3071", "60.0.3080"]
    windows_version_list = ["6.1", "6.3", "10.0"]
    browser_type = random.choice(["firefox", "chrome"])
    os_type = random.choice(windows_version_list)
    if browser_type == "firefox":
Exemplo n.º 14
0
 def print_msg(self, msg):
     if self.print_function is None:
         tool.print_msg(msg, True)
     else:
         self.print_function(msg)