示例#1
0
 def crawl(self):
     self.config = MonitorConfig()
     self.wx_content_md5 = self.config.get_value("wx_content", "md5_pic")
     self.md5_contents = []
     self.enter()
     """
     爬取
     :return:
     """
     index = 0
     isFirst = True
     contents = []
     lastItem = None
     while True:
         self.crawl_max_count = int(WxConfig.get_crawl_max_count())
         if index % self.crawl_max_count == 0:
             self.md5_contents.clear()
             Logger.println(
                 f"【======={index},抓取数退出{self.crawl_max_count},本轮抓取已完成,开始滑动到顶部下拉刷新继续==========】"
             )
             self.swipe_to_top()
         if wx_stop.stopFlag:
             break
         # 上滑
         if not isFirst:
             self.swipe_up_slow()
         isFirst = False
         top_element = self.wait_find_element(
             By.XPATH, '//android.support.v7.widget.LinearLayoutCompat')
         if lastItem and top_element:
             self.scrollElement(lastItem, top_element)
         sleep(3)
         items = self.wait_find_elements(
             By.XPATH,
             '//android.widget.ListView/android.widget.RelativeLayout')
         if items is None:
             Logger.println(f"【============未获取到列表================】")
             continue
         for item in items:
             accessibility_id = self.find_element_by_accessibility_id(
                 '头像', item)
             if accessibility_id:
                 lastItem = item
             b_e_content = None
             last_pic_md5 = None
             last_md5_ = None
             advise = self.find_element_by_xpath(
                 "//android.widget.TextView[contains(@text,'广告')]", item)
             if advise:
                 Logger.println(
                     f"【============检测到广告忽略进入下一条================】")
                 continue
             message_text_container = self.config.get_value(
                 "wx_content_ids", "message_text_container")
             content_element = self.find_element_by_id(
                 message_text_container, item)
             if content_element:
                 content_element.click()
                 b_e_content = self.waitContentTextById(
                     "com.tencent.mm:id/fpu")
                 if b_e_content:
                     Logger.println(f"【获取到详情页面全文内容={b_e_content}】")
                     self.go_back()
             if b_e_content is None:
                 message_text = self.config.get_value(
                     "wx_content_ids", "message_text")
                 b_e_content = self.getContentTextById(message_text, item)
             if b_e_content is None:
                 Logger.println(f"【============该条说说没有文本,忽略===========】")
                 continue
             nickName = self.getNickName(item)
             phone = self.get_phone_from_txt(b_e_content)
             md5_ = self.MD5(b_e_content)
             if len(b_e_content) > 3 and b_e_content[-3:] == '...':
                 elment_datas = self.scan_all_text_elment(item)
                 LogUtil.info_jsonformat(elment_datas)
             contition = (last_md5_ in self.wx_content_md5) and (
                 md5_ in self.wx_content_md5
             ) if last_md5_ else md5_ in self.wx_content_md5
             if contition:
                 Logger.println(
                     f"【crawl{index}已经抓取到上一次位置md5_=({md5_},last_md5_={last_md5_}).data={b_e_content}】"
                 )
                 md5 = None
                 if len(self.md5_contents) > 1:
                     md5 = ','.join(self.md5_contents[0:2])
                 elif len(self.md5_contents) > 0:
                     md5 = self.md5_contents[0]
                 if md5:
                     self.config.set_value("wx_content", "md5_pic", md5)
                 Logger.println(f"【============开始滑动到顶部===========】")
                 Logger.dingdingException(f"本轮抓取已完成,开始滑动到顶部下拉刷新继续")
                 self.swipe_to_top()
                 break
             if md5_ in self.today_md5_contents:
                 last_md5_ = md5_
                 Logger.println(f"【============该条说说已经抓取过,忽略===========】")
                 continue
             image0 = self.find_element_by_xpath("//*[@content-desc='图片']",
                                                 item)
             if image0:
                 Logger.println(f"【crawl({index}).开始点击图片】")
                 image0.click()
                 sleep(2)
                 start = '0'
                 end = '0'
                 for index_img in range(9):
                     image_detail = self.find_element_by_id(
                         'com.tencent.mm:id/c9h')
                     if image_detail:
                         text_content = self.wait_find_element(
                             By.XPATH, "//*[contains(@content-desc,'当前')]"
                         ).get_attribute('content-desc')
                         Logger.println(
                             f"【crawl.{index} text_content ={text_content}】"
                         )
                         pic_md5 = self.MD5(text_content)
                         if last_pic_md5 == pic_md5:
                             Logger.println(
                                 f"【crawl({index}.{index_img}).前后图片一致退出】")
                             if not end:
                                 sleep(2)
                                 end = time_util.get_time()
                             data = {
                                 'content_md5': md5_,
                                 'nick_name': nickName,
                                 'wx_number': "",
                                 'content': b_e_content,
                                 'phone': phone,
                                 'start': start,
                                 'end': end,
                                 'crawl_time': time_util.now_to_date(),
                                 'count': str(index_img)
                             }
                             if start != '0':
                                 contents.append(data)
                                 self.md5_contents.append(md5_)
                                 self.today_md5_contents.append(md5_)
                                 last_md5_ = md5_
                             self.go_back()
                             break
                         try:
                             action1 = TouchAction(self.driver)
                             action1.long_press(el=image_detail,
                                                duration=500).perform()
                             saveBtn = self.wait_find_element(
                                 By.XPATH, "//*[contains(@text,'保存图片')]")
                             if saveBtn:
                                 saveBtn.click()
                                 element = self.wait_find_element(
                                     By.XPATH,
                                     "//*[contains(@text,'图片已保存')]")
                                 attribute = element.get_attribute('text')
                                 Logger.println(
                                     f"【crawl.{index} text_content ={attribute}】"
                                 )
                                 if attribute and ".jpg" in attribute:
                                     if index_img == 0:
                                         start = get_time_from_text(
                                             attribute)
                                     end = get_time_from_text(attribute)
                             else:
                                 Logger.dingdingException(f"找不到保存按钮,保存图片失败")
                         except Exception as e:
                             Logger.dingdingException(f"保存图片失败{e}")
                             Logger.println(f'保存图片失败 Exception{e}')
                             self.go_back()
                             continue
                         last_pic_md5 = pic_md5
                         if index_img == 8:
                             if not end:
                                 sleep(2)
                                 end = time_util.get_time()
                             data = {
                                 'content_md5': md5_,
                                 'nick_name': nickName,
                                 'wx_number': "",
                                 'content': b_e_content,
                                 'phone': phone,
                                 'start': start,
                                 'end': end,
                                 'crawl_time': time_util.now_to_date(),
                                 'count': str(index_img + 1)
                             }
                             if start != '0':
                                 Logger.println(
                                     f"【crawl({index}.{index_img}).已保存图片=mmexport{end}.jpg】"
                                 )
                                 contents.append(data)
                                 self.md5_contents.append(md5_)
                                 self.today_md5_contents.append(md5_)
                                 last_md5_ = md5_
                             self.go_back()
                             break
                         self.swipeLeft()
             else:
                 # 纯文本
                 data = {
                     'content_md5': md5_,
                     'nick_name': nickName,
                     'wx_number': "",
                     'content': b_e_content,
                     'phone': phone,
                     'start': "0",
                     'end': "0",
                     'crawl_time': time_util.now_to_date(),
                     'count': "0"
                 }
                 contents.append(data)
             if len(contents) > 0:
                 value = self.config.get_value("wx_content", "select")
                 if value == 'True':
                     Logger.println(f"开始上传第{index}条数据")
                     res = WxUploader.uploadItems(contents)
                     # 有房源刷新的列表
                     if '20003' == res:
                         contents[0]['content'] = ""
                         date = time_util.now_to_date('%Y%m%d')
                         full_dir = FilePathUtil.get_full_dir(
                             "wxfriend", "excel", "text",
                             date + "wx_pic_update_moments.xls")
                         excel_util.write_excel_append(filename=full_dir,
                                                       worksheet_name=date,
                                                       items=contents)
                         contents.clear()
                 # 新房源列表
                 if len(contents) > 0:
                     contents[0]['content'] = ""
                     date = time_util.now_to_date('%Y%m%d')
                     full_dir = FilePathUtil.get_full_dir(
                         "wxfriend", "excel", "pic",
                         date + "wx_pic_moments.xls")
                     excel_util.write_excel_append(filename=full_dir,
                                                   worksheet_name=date,
                                                   items=contents)
                     contents.clear()
                     index += 1
             else:
                 Logger.println(f"【没有数据不处理】")
             md5 = None
             if len(self.md5_contents) > 1:
                 md5 = ','.join(self.md5_contents[0:2])
             elif len(self.md5_contents) > 0:
                 md5 = self.md5_contents[0]
             if md5:
                 self.config.set_value("wx_content", "md5_pic", md5)
示例#2
0
    def __init__(self, parent=None):
        super(ConfigDialog, self).__init__(parent)
        layout = QFormLayout()
        self.config = MonitorConfig()

        self.label = QLabel("已连接设备名称:")
        self.le0 = QLineEdit()
        deviceId = WxConfig.getAppiumConfig()["deviceName"]
        self.le0.setPlaceholderText("未检测到设备")
        self.le0.setText(deviceId)
        layout.addRow(self.label, self.le0)
        self.label = QLabel("已连接安卓版本:")
        self.le1 = QLineEdit()
        platformVersion = WxConfig.getAppiumConfig()["platformVersion"]
        self.le1.setPlaceholderText("未检测到版本")
        self.le1.setText(platformVersion)
        layout.addRow(self.label, self.le1)

        self.label = QLabel("服务器地址:")
        self.le2 = QLineEdit()
        url = WxConfig.getServerUrl()
        self.le2.setText(url)
        layout.addRow(self.label, self.le2)

        self.label = QLabel("最大添加好友上限(人数):")
        self.le3 = QLineEdit()
        add_friend_max_count = WxConfig.get_add_friend_max_count()
        self.le3.setText(add_friend_max_count)
        layout.addRow(self.label, self.le3)

        self.label = QLabel("任务暂停后继续执行的时间间隔(秒):")
        self.le31 = QLineEdit()
        max_count = WxConfig.get_addfriend_inte_seconds()
        self.le31.setText(max_count)
        layout.addRow(self.label, self.le31)

        self.label = QLabel("每次抓取任务上线数量(条):")
        self.le32 = QLineEdit()
        crawl_max_count = WxConfig.get_crawl_max_count()
        self.le32.setText(crawl_max_count)
        layout.addRow(self.label, self.le32)

        self.label = QLabel("批量导出图片上传脚本时间间隔(秒):")
        self.le41 = QLineEdit()
        batch_pic_seconds = self.config.get_value('appiumConfig',
                                                  'batch_pic_seconds')
        self.le41.setText(batch_pic_seconds)
        layout.addRow(self.label, self.le41)

        self.addbtn = QPushButton("设置手机号文件地址")
        self.addbtn.clicked.connect(
            self.add_phone_excel)  # 当点击save按钮时,对话框将会消失,点击Cacel按钮时,则不会消失。
        self.le4 = QLineEdit()
        phone_excel = WxConfig.getPhoneExcel()
        self.le4.setText(phone_excel)
        layout.addRow(self.addbtn, self.le4)

        self.label5 = QLabel("最新朋友圈图片md5值")
        self.le5 = QLineEdit()
        self.config = MonitorConfig()
        md5_pic = self.config.get_value("wx_content", "md5_pic")
        self.le5.setText(md5_pic)
        layout.addRow(self.label5, self.le5)

        self.label6 = QLabel("最新朋友圈文本md5值")
        self.le6 = QLineEdit()
        self.config = MonitorConfig()
        md5 = self.config.get_value("wx_content", "md5")
        self.le6.setText(md5)
        layout.addRow(self.label6, self.le6)

        self.label7 = QLabel("抓取文本时同时同步到云端")
        self.select_checkbox = QCheckBox("")
        value = self.config.get_value("wx_content", "select")
        if value == 'True':
            self.select_checkbox.setChecked(True)
        else:
            self.select_checkbox.setChecked(False)

        layout.addRow(self.label7, self.select_checkbox)
        self.label8 = QLabel("是否输出日志")
        self.log_checkbox = QCheckBox("")
        if Logger.debug:
            self.log_checkbox.setChecked(True)
        else:
            self.log_checkbox.setChecked(False)
        layout.addRow(self.label8, self.log_checkbox)

        self.cacelButton = QPushButton("重新检测")
        self.saveButton = QPushButton("保存")
        self.cacelButton.clicked.connect(
            self.reconnect)  # 当点击save按钮时,对话框将会消失,点击Cacel按钮时,则不会消失。
        self.saveButton.clicked.connect(
            self.save)  # 当点击save按钮时,对话框将会消失,点击Cacel按钮时,则不会消失。
        self.buttonBox = QDialogButtonBox(QtCore.Qt.Horizontal)
        self.buttonBox.addButton(self.saveButton, QDialogButtonBox.RejectRole)
        self.buttonBox.addButton(self.cacelButton, QDialogButtonBox.YesRole)
        layout.addRow(self.buttonBox)

        self.setLayout(layout)
        self.setWindowTitle("配置服务器地址")
        self.setWindowIcon(QIcon(IconConfig.LOGO_DIR))