Пример #1
0
 def startProgress(self):
     print("开始爬取进程")
     Config.writeLog("开始爬取进程")
     try:
         self.__connection.connectUrl()
     except Exception as e:
         Config.writeException(e)
Пример #2
0
    def __isElementLoadingSuccess(self):
        try:
            search_button = self.__driver.find_element_by_xpath(
                Query.search_button_xpath)
            inventor_input = self.__driver.find_element_by_id(
                Query.inventor_input_id)
            proposer_input = self.__driver.find_element_by_id(
                Query.proposer_input_id)
            time_select = self.__driver.find_element_by_id(
                Query.time_select_id)
            time_input = self.__driver.find_element_by_id(Query.time_input_id)

            if search_button.is_displayed() and inventor_input.is_displayed(
            ) and proposer_input.is_displayed() and time_select.is_displayed(
            ) and time_input.is_displayed():
                return True
            else:
                print("元素没显示")
                Config.writeLog("元素没显示")
                return False
        except Exception as e:
            print("元素抛异常")
            Config.writeLog("元素抛异常")
            Config.writeException(e)
            return False
Пример #3
0
 def __wait_for_law_state_loading(self):
     if not self.wait_state.wait_for_loading():
         Config.writeLog("等待超时")
         print("等待超时")
         return False
         # self.__itemCollection.collectingLawDataUnsuccessfully()
     return True
 def changePageSuccessfully(self):
     Config.writeLog("换页成功")
     time.sleep(3)
     if WaitEngine(self.__driver).wait_for_loading():
         self.__pageCollection.startCollecting(self.__progressInfo.getPatentTypeIndex())
     else:
         self.changePageUnsuccessfully()
Пример #5
0
 def __check_if_lost(self):
     if self.wait_state.query_result_state():
         pass
     else:
         print("加载异常")
         Config.writeLog("加载异常")
         self.__itemCollection.collectingLawDataUnsuccessfully(
         )  # TODO:添加加载失败的处理函数
 def changePageUnsuccessfully(self):
     Config.writeLog("换页失败")
     print("换页失败")
     queryInfo = self.__progressInfo.getQueryInfo()
     inventor = queryInfo.getInventorList()[self.__progressInfo.getInventorIndex()]
     proposer = queryInfo.getProposer()
     startDate = queryInfo.getStartDate()
     patentTypeIndex = self.__progressInfo.getPatentTypeIndex()
     self.__query.queryTarget(inventor, proposer, startDate, patentTypeIndex)
 def collectingUnsuccessfully(self, itemIndex):
     Config.writeLog("收集信息失败")
     print("收集信息失败")
     self.__progressInfo.setItemIndex(itemIndex)
     queryInfo = self.__progressInfo.getQueryInfo()
     inventor = queryInfo.getInventorList()[self.__progressInfo.getInventorIndex()]
     proposer = queryInfo.getProposer()
     startDate = queryInfo.getStartDate()
     patentTypeIndex = self.__progressInfo.getPatentTypeIndex()
     self.__query.queryTarget(inventor, proposer, startDate, patentTypeIndex)
Пример #8
0
 def collectingItemSuccessfullyWithOutData(self):
     Config.writeLog("采集空item成功")
     self.__itemIndex += 1
     if self.__itemIndex < self.__itemLength:
         itemCollectiong = ItemCollection(
             self.__driver, self,
             CollectionResult.PATENT_TYPE[self.__patentTypeIndex],
             self.__itemIndex)
         itemCollectiong.collectingData()
     else:
         self.__progressController.collectingSuccessfully()
Пример #9
0
 def loadUrlUnsuccessfully(self):
     Config.writeLog("url连接失败")
     self.__refreshLostTime += 1
     if self.__refreshLostTime % 5 == 0:
         self.__rConnectLostTime += 1
         if self.__rConnectLostTime % 2 == 0:
             self.__driver.quit()
             time.sleep(10)
             self.__driver = self.__generateWebDriver(self.__browser)
         self.__connection.connectUrl()
     else:
         self.__connection.refreshUrl()
Пример #10
0
 def collectingItemSuccessfully(self, itemData):
     Config.writeLog("采集item成功")
     self.__collectionResult.addItem(itemData)
     self.__itemIndex += 1
     Config.writeLog("采集item成功itemIndex = {0}, itemLength = {1}".format(
         self.__itemIndex, self.__itemLength))
     if self.__itemIndex < self.__itemLength:
         itemCollectiong = ItemCollection(
             self.__driver, self,
             CollectionResult.PATENT_TYPE[self.__patentTypeIndex],
             self.__itemIndex)
         itemCollectiong.collectingData()
     else:
         self.__progressController.collectingSuccessfully()
 def loadUrlSuccessfully(self):
     Config.writeLog("成功连接url")
     if self.__driver.page_source.find("您的操作太过频繁") != -1:
         Config.writeLog("操作太过频繁")
         print(Config.REJECT_WAY)
         self.endProgress()
         return
     self.__refreshLostTime = 1
     self.__rConnectLostTime = 1
     queryInfo = self.__progressInfo.getQueryInfo()
     inventor = queryInfo.getInventorList()[self.__progressInfo.getInventorIndex()]
     proposer = queryInfo.getProposer()
     startDate = queryInfo.getStartDate()
     patentTypeIndex = self.__progressInfo.getPatentTypeIndex()
     self.__query.queryTarget(inventor, proposer, startDate, patentTypeIndex)
Пример #12
0
    def queryTargetSuccessfully(self, pageSum):
        Config.writeLog("检索成功")
        self.__refreshLostTime = 1
        self.__rConnectLostTime = 1
        self.__progressInfo.setPageSum(pageSum)

        if pageSum == 0:
            Config.writeLog("pageSum = 0")
            self.__progressInfo.setItemIndex(0)
            self.__progressInfo.setPageIndex(1)
            pt = self.__progressInfo.getPatentTypeIndex()
            if pt >= 2:
                Config.writeLog("pt >= 2")
                self.__progressInfo.setPatentTypeIndex(0)
                inventorIndex = self.__progressInfo.getInventorIndex() + 1
                if inventorIndex >= len(
                        self.__progressInfo.getQueryInfo().getInventorList()):
                    self.endProgress()
                else:
                    self.__progressInfo.setInventorIndex(inventorIndex)
                    queryInfo = self.__progressInfo.getQueryInfo()
                    print(queryInfo.getInventorList()[
                        self.__progressInfo.getInventorIndex()])
            else:
                Config.writeLog("pt < 2")
                self.__progressInfo.setPatentTypeIndex(pt + 1)

            queryInfo = self.__progressInfo.getQueryInfo()
            inventor = queryInfo.getInventorList()[
                self.__progressInfo.getInventorIndex()]
            proposer = queryInfo.getProposer()
            startDate = queryInfo.getStartDate()
            patentTypeIndex = self.__progressInfo.getPatentTypeIndex()
            self.__query.queryTarget(inventor, proposer, startDate,
                                     patentTypeIndex)
        else:
            Config.writeLog("pageSum != 0")
            if self.__progressInfo.getPageIndex() != 1:
                self.__query.changePage(self.__progressInfo.getPageIndex())
            else:
                self.__pageCollection.startCollecting(
                    self.__progressInfo.getPatentTypeIndex(),
                    self.__progressInfo.getItemIndex())
Пример #13
0
 def collectingData(self):
     try:
         name = self.collecting_name()
         self.__item_data.set_name(name)
         type = self.collecting_type()
         self.__item_data.set_type(type)
         if name != "" and type != "":
             pLen = self.__driver.execute_script(
                 "return document.getElementsByClassName(\"item-content-body\")["
                 + str(self.__whichItem) + "].children.length;")
             for i in range(pLen):
                 strData = self.__driver.execute_script(
                     "return document.getElementsByClassName(\"item-content-body\")["
                     + str(self.__whichItem) + "].children[" + str(i) +
                     "].innerText;")
                 strTemp = str(strData)
                 if strTemp.find("申请号") != -1:
                     requestNumber = strTemp[7:]
                     self.__item_data.set_request_number(requestNumber)
                 elif strTemp.find("申请日") != -1:
                     requestDate = strTemp[6:]
                     self.__item_data.set_request_date(requestDate)
                 elif strTemp.find("公告") != -1 and strTemp.find("日") != -1:
                     announcement_date = strTemp[10:]
                     self.__item_data.set_announcement_date(
                         announcement_date)
                 elif strTemp.find("申请") != -1 and strTemp.find("人") != -1:
                     proposer_name = strTemp[11:-2]
                     self.__item_data.set_proposer_name(proposer_name)
                 elif strTemp.find("发明人") != -1:
                     inventor_name = strTemp[6:-2].replace('\n', '')
                     self.__item_data.set_inventor_name(inventor_name)
             Config.writeLog("准备收集法律信息")
             LawState(self.__driver,
                      self).collectingLawState(self.__whichItem)
         else:
             self.__pageCollection.collectingItemSuccessfullyWithOutData()
     except Exception as e:
         # print(e)
         Config.writeException(e)
         self.__pageCollection.collectingItemUnsuccessfully()
         return False
Пример #14
0
 def __writeToExcel(self, index, patentType, name, lawState, lawStateDate,
                    aDate, requestNumber, requestDate, proposerName,
                    inventorName):
     try:
         editor = ExcelUtil(Config.FILE_NAME).edit()
         sh = editor.getSheet(0)
         sh.write(index, 0, patentType)
         sh.write(index, 1, name)
         sh.write(index, 2, lawState)
         sh.write(index, 3, lawStateDate)
         sh.write(index, 4, aDate)
         sh.write(index, 5, requestNumber)
         sh.write(index, 6, requestDate)
         sh.write(index, 7, proposerName)
         sh.write(index, 8, inventorName)
         editor.commit()
     except Exception as e:
         print("写excel报错")
         Config.writeLog("写excel报错")
         Config.writeException(e)
Пример #15
0
 def startCollecting(self, patentTypeIndex, startItemIndex=0):
     self.__itemLength = 0
     self.__patentTypeIndex = patentTypeIndex
     self.__itemIndex = startItemIndex
     try:
         self.__itemLength = self.__driver.execute_script(
             "return document.getElementsByClassName(\"item\").length;")
     except Exception as e:
         Config.writeException(e)
         print(e)
         self.__itemLength = 0
         self.__progressController.collectingUnsuccessfully()
         return False
     if self.__itemIndex < self.__itemLength:
         Config.writeLog("开始收集")
         itemCollectiong = ItemCollection(
             self.__driver, self,
             CollectionResult.PATENT_TYPE[patentTypeIndex],
             self.__itemIndex)
         itemCollectiong.collectingData()
     else:
         Config.writeLog("收集失败")
         print("收集失败")
         self.__progressController.collectingUnsuccessfully(
             self.__itemIndex)
     Config.writeLog("itemIndex = {0}".format(self.__itemIndex))
     return True
Пример #16
0
 def queryTarget(self, inventor, proposer, startDate, patentTypeIndex):
     if self.__waitEngine.wait_for_loading():
         if self.__isElementLoadingSuccess():
             if self.__inputQueryTargetData(inventor, proposer, startDate,
                                            patentTypeIndex):
                 if self.__waitEngine.wait_for_loading():
                     self.__waitEngine.waitForSeconds(1)
                     pageSum = self.__getPageSum()
                     if pageSum is not None:
                         self.__progressController.queryTargetSuccessfully(
                             pageSum)
                         return True
                     else:
                         print("页码为零")
                         Config.writeLog("页码为零")
                         self.__progressController.queryTargetUnsuccessfully(
                         )
                         return False
                 else:
                     print("查询等待超时")
                     Config.writeLog("查询等待超时")
                     self.__progressController.queryTargetUnsuccessfully()
                     return False
             else:
                 print("查询失败")
                 Config.writeLog("查询失败")
                 self.__progressController.queryTargetUnsuccessfully()
                 return False
         else:
             print("元素未加载")
             Config.writeLog("元素未加载")
             self.__progressController.queryTargetUnsuccessfully()
             return False
     else:
         print("url加载超时")
         Config.writeLog("url加载超时")
         self.__progressController.queryTargetUnsuccessfully()
         return False
Пример #17
0
 def collectingSuccessfully(self):
     Config.writeLog("收集信息成功")
     pi = self.__progressInfo.getPageIndex()
     pi += 1
     if pi > self.__progressInfo.getPageSum():
         self.__progressInfo.setPageIndex(1)
         self.__progressInfo.setItemIndex(0)
         pt = self.__progressInfo.getPatentTypeIndex()
         if pt >= 2:
             self.__progressInfo.setPatentTypeIndex(0)
             ii = self.__progressInfo.getInventorIndex()
             if ii < len(self.__progressInfo.getQueryInfo().getInventorList(
             )) - 1:
                 self.__progressInfo.setInventorIndex(ii + 1)
                 queryInfo = self.__progressInfo.getQueryInfo()
                 inventor = queryInfo.getInventorList()[
                     self.__progressInfo.getInventorIndex()]
                 proposer = queryInfo.getProposer()
                 startDate = queryInfo.getStartDate()
                 patentTypeIndex = self.__progressInfo.getPatentTypeIndex()
                 print(inventor)
                 self.__query.queryTarget(inventor, proposer, startDate,
                                          patentTypeIndex)
             else:
                 Config.writeLog("InventorIndex = {0}".format(ii))
                 self.endProgress()
         else:
             pt += 1
             self.__progressInfo.setPatentTypeIndex(pt)
             queryInfo = self.__progressInfo.getQueryInfo()
             inventor = queryInfo.getInventorList()[
                 self.__progressInfo.getInventorIndex()]
             proposer = queryInfo.getProposer()
             startDate = queryInfo.getStartDate()
             patentTypeIndex = self.__progressInfo.getPatentTypeIndex()
             self.__query.queryTarget(inventor, proposer, startDate,
                                      patentTypeIndex)
     else:
         Config.writeLog("pageIndex = {0}".format(pi))
         self.__progressInfo.setPageIndex(pi)
         self.__progressInfo.setItemIndex(0)
         self.__query.changePage(pi)
Пример #18
0
    def __wait_for_law_state(self):
        if not self.wait_state.wait_for_loading():
            Config.writeLog("等待超时")
            print("等待超时")
            self.__itemCollection.collectingLawDataUnsuccessfully()

        if self.__wait_for_close_button():
            pass
        else:
            Config.writeLog("关闭按钮没出来")
            print("关闭按钮没出来")
            self.__itemCollection.collectingLawDataUnsuccessfully()

        if self.wait_state.query_result_state():
            pass
        else:
            Config.writeLog("加载异常")
            print("加载异常")
            self.__itemCollection.collectingLawDataUnsuccessfully(
            )  # TODO:添加加载失败的处理函数
        return
Пример #19
0
 def collectingLawDataSuccessfully(self, lawUpdate, lawState):
     Config.writeLog("采集法律信息成功")
     self.__item_data.set_law_state(lawState)
     self.__item_data.set_law_state_date(lawUpdate)
     self.__pageCollection.collectingItemSuccessfully(self.__item_data)
Пример #20
0
 def collectingItemUnsuccessfully(self):
     print("采集item失败")
     Config.writeLog("采集item失败")
     self.__progressController.collectingUnsuccessfully(self.__itemIndex)
Пример #21
0
 def endProgress(self):
     Config.writeLog("结束进程")
     print("结束进程")
     self.__driver.quit()
     time.sleep(1)
     os._exit(0)
Пример #22
0
def init_excel_config():
    title_list = [
        "专利类型", "专利名称", "法律状态", "法律状态最后修改日期", "申请公布日/授权公告日", "申请号", "申请日",
        "申请人/专利权人", "发明人"
    ]
    editor = ExcelUtil(Config.FILE_NAME).edit()
    sh = editor.getSheet(0)
    for index, each in enumerate(title_list):
        sh.write(0, index, each)
    editor.commit()
    return


if __name__ == '__main__':
    initProgress()
    # 这句非常重要,提高python的递归深度,否则递归900次就炸了
    sys.setrecursionlimit(1000000)  # 例如这里设置为一百万
    startDate = input("请输入公布日开始日期,如{0}:".format(
        TimeUtil.getFormatTime("%Y-%m-%d")))
    Config.writeLog("程序启动,输入的公布开始日期为{0}".format(startDate))
    init_excel_config()

    progress = ProgressController(Config.BROSWER_NAME)
    Config.writeLog("启动{0}浏览器".format(Config.BROSWER_NAME))
    queryInfo = progress.getQueryInfo()
    queryInfo.setStartDate(startDate)

    progress.startProgress()

    # print(excel)
Пример #23
0
 def collecting_law_state(self, which_item):
     try:
         Config.writeLog("点击按钮")
         WaitEngine.waitForSeconds(2)
         self.__click_law_state_button(which_item)
         if self.__wait_for_law_state_loading() is True:
             self.__check_for_colse_button()
             self.__check_if_lost()
         else:
             Config.writeLog("点击按钮")
             WaitEngine.waitForSeconds(2)
             self.__click_law_state_button(which_item)
             if self.__wait_for_law_state_loading() is True:
                 self.__check_for_colse_button()
                 self.__check_if_lost()
             else:
                 self.__itemCollection.collectingLawDataUnsuccessfully()
                 return
         Config.writeLog("法律状态")
         law_state = self.__get_law_state()
         if law_state.find("无数据") == -1:
             Config.writeLog("法律日期")
             law_update = self.__get_law_update()
         else:
             law_update = "无数据"
         Config.writeLog("关闭按钮")
         self.__close_law_state()
         Config.writeLog("采集成功")
         self.__itemCollection.collectingLawDataSuccessfully(
             law_update, law_state)
     except Exception as e:
         print("采集异常")
         Config.writeLog("采集异常")
         Config.writeException(e)
         self.__itemCollection.collectingLawDataUnsuccessfully()
     return
Пример #24
0
 def __check_for_colse_button(self):
     if self.__wait_for_close_button():
         pass
     else:
         Config.writeLog("关闭按钮没出来")
         self.__itemCollection.collectingLawDataUnsuccessfully()
Пример #25
0
 def __inputQueryTargetData(self, inventor, proposer, startDate,
                            patentTypeIndex):
     try:
         # 填写发明人
         self.__driver.execute_script("document.getElementById(\"" +
                                      Query.inventor_input_id +
                                      "\").setAttribute(\"value\",\"" +
                                      inventor + "\")")
         Config.writeLog("发明人")
         # 填写申请人
         self.__driver.execute_script("document.getElementById(\"" +
                                      Query.proposer_input_id +
                                      "\").setAttribute(\"value\",\"" +
                                      proposer + "\")")
         Config.writeLog("申请人")
         # 点击时间的check_list
         self.__driver.execute_script(
             "document.getElementById(\"" + Query.time_select_id +
             "\").firstElementChild.firstElementChild.click();")
         WaitEngine.waitForSeconds(2)  # 等待两秒
         self.__driver.execute_script(
             "document.getElementById(\"" + Query.time_select_id +
             "\").firstElementChild.childNodes[2].childNodes[2].firstElementChild.click();"
         )
         Config.writeLog("点击时间")
         # 填写时间
         self.__driver.execute_script("document.getElementById(\"" +
                                      Query.time_input_id +
                                      "\").setAttribute(\"value\",\"" +
                                      startDate + "\")")
         Config.writeLog("填写时间")
         # 选择专利类型
         self.__choosePatentType(patentTypeIndex)
         Config.writeLog("专利类型")
         WaitEngine.waitForSeconds(3)  # 等待三秒
         # 点击检索按钮
         self.__driver.execute_script(
             "document.getElementsByClassName(\"box-content-bottom\").item(0).childNodes.item(5).click();"
         )
         Config.writeLog("点击按钮")
         return True
     except Exception as e:
         Config.writeException(e)
         print(e)
         return False
Пример #26
0
 def collectingLawDataUnsuccessfully(self):
     Config.writeLog("收集法律信息失败")
     self.__pageCollection.collectingItemUnsuccessfully()