Exemplo n.º 1
0
    def step2(self, keyword: str, html_args: dict):
        """请求数据接口"""
        url = "http://wsjs.saic.gov.cn/txnRead02.ajax"
        path = parse.urlparse(url).path

        page = 1
        page_size = 50

        request_args = {
            "locale": "zh_CN",  # 语言
            "request:queryCom": "1",
            "request:nc": "",  # 国际分类
            "request:sn": "",  # 申请/注册号
            "request:mn": keyword,  # 商标名称
            "request:hnc": "",  # 申请人名称(中文)
            "request:hne": "",  # 申请人名称(英文)
            "request:imf": "",
            "request:maxHint": "",
            "request:ncs": "",
            "request:queryAuto": "",
            "request:queryExp": f"mnoc = {keyword}*",
            "request:queryMode": "",
            "request:queryType": "",
            "request:mi": html_args["request:mi"],
            "request:tlong": html_args["request:tlong"],
            "attribute-node:record_cache-flag": "false",
            "attribute-node:record_page": page,
            "attribute-node:record_page-row": page_size,
            "attribute-node:record_sort-column": "RELEVANCE",
            "attribute-node:record_start-row": (page - 1) * page_size + 1,
        }

        # 本地加密 TODO 已失效
        # kwargs = self.local_encrypt(path=path, request_args=request_args)

        # 在线加密
        kwargs = online_encrypt(url=API, path=path, request_args=request_args)
        response = self.session.post(url, **kwargs)

        if response.status_code != 200:
            raise Exception(response.status_code)

        # 提取数据
        for tag in html.fromstring(response.content).xpath("//record"):
            print({
                "tid": tag.xpath("tid/text()")[0],
                "index": tag.xpath("index/text()")[0],
                "申请/注册号": tag.xpath("tmid/text()")[0],
                "国际分类": tag.xpath("nc/text()")[0],
                "申请日期": tag.xpath("fd/text()")[0],
                "商标名称": tag.xpath("mno/text()")[0],
                "申请人名称": tag.xpath("hnc/text()")[0],
            })
Exemplo n.º 2
0
    def _request(self, url: str, request_args: dict, *, method="POST", api: str = None) -> requests.Response:
        path = parse.urlparse(url).path

        if not api:
            kwargs = local_encrypt(path=path, request_args=request_args)  # 本地加密 TODO 已失效
        else:
            kwargs = online_encrypt(api=api, path=path, request_args=request_args)  # 在线加密

        response = self.session.request(method=method, url=url, **kwargs)
        if response.status_code != 200:
            raise Exception(response.status_code)

        return response
Exemplo n.º 3
0
    def _request(self, url: str) -> requests.Response:
        path = parse.urlparse(url).path

        request_args = {
            "request:tid": self.tid,
        }

        # 本地加密 TODO 已失效
        # kwargs = self.local_encrypt(path=path, request_args=request_args)
        # 在线加密
        kwargs = online_encrypt(url=API, path=path, request_args=request_args)

        response = self.session.post(url, **kwargs)
        if response.status_code != 200:
            raise Exception(response.status_code)

        return response
Exemplo n.º 4
0
    def step1(self, keyword: str) -> dict:
        """访问html页面获取参数"""
        url = "http://wsjs.saic.gov.cn/txnRead01.do"
        path = parse.urlparse(url).path

        request_args = {
            "locale": "zh_CN",  # 语言
            "request:queryCom": "1",  # 不明
            "request:nc": "",  # 国际分类
            "request:sn": "",  # 申请/注册号
            "request:mn": keyword,  # 商标名称
            "request:hnc": "",  # 申请人名称(中文)
            "request:hne": "",  # 申请人名称(英文)
            "request:md5": None,  # md5签名
        }
        request_args["request:md5"] = self.get_md5(request_args)

        # 本地加密 TODO 已失效
        # kwargs = self.local_encrypt(path=path, request_args=request_args)
        # 在线加密
        kwargs = online_encrypt(url=API, path=path, request_args=request_args)

        response = self.session.post(url, **kwargs)
        if response.status_code != 200:
            raise Exception(response.status_code)

        html_doc = html.fromstring(response.content)
        if html_doc.xpath("//title/text()")[0] == "请继续":
            raise Exception("出现验证码")  # 换IP

        # 提取后续请求所需参数
        meta = html_doc.xpath("//meta")[3].get("content")
        # 得到input隐藏域参数
        input_tags = ctx.call("get_hidden_input_v2", meta)
        html_args = {
            tag.get("name"): tag.get("value")
            for tag in html.fromstring(input_tags).xpath("//input")
        }

        print(html_args)
        return html_args
Exemplo n.º 5
0
    def step1(self, keyword: str) -> dict:
        """访问html页面获取参数"""
        url = "http://wsjs.saic.gov.cn/txnRead01.do"
        path = parse.urlparse(url).path

        request_args = {
            "locale": "zh_CN",  # 语言
            "request:queryCom": "1",  # 不明
            "request:nc": "",  # 国际分类
            "request:sn": "",  # 申请/注册号
            "request:mn": keyword,  # 商标名称
            "request:hnc": "",  # 申请人名称(中文)
            "request:hne": "",  # 申请人名称(英文)
            "request:md5": None,  # md5签名
        }
        request_args["request:md5"] = ListPageExample.get_md5(request_args)

        # 本地加密 TODO 已失效
        # kwargs = self.local_encrypt(path=path, request_args=request_args)

        # 在线加密
        kwargs = online_encrypt(url=API, path=path, request_args=request_args)
        response = self.session.post(url, **kwargs)

        if response.status_code != 200:
            raise Exception(response.status_code)

        # 解析meta标签(9DhefwqGPrzGxEp9hPaoag)
        meta = html.fromstring(response.content).xpath(
            "//*[@id='9DhefwqGPrzGxEp9hPaoag']")[0].get("content")
        # 得到input隐藏域参数
        input_tags = ctx.call("get_hidden_input", meta)
        html_args = {
            tag.get("name"): tag.get("value")
            for tag in html.fromstring(input_tags).xpath("//input")
        }

        print(html_args)
        return html_args