def follow_user_list(self, keyword): try: logger.info(("搜索关键字:%s" % keyword)) # 输入关键字,点击搜索 self._appium_driver.tap(269, 147) self._appium_driver.find_element_by_id( "com.ss.android.ugc.aweme:id/a8g").send_keys(keyword) self._appium_driver.find_element_by_id( "com.ss.android.ugc.aweme:id/d7r").click() time.sleep(2) # 切换到用户的tab页 self._appium_driver.tap(450, 284) time.sleep(15) # 点击所有的关注按钮 els = self._appium_driver.find_elements_by_id( "com.ss.android.ugc.aweme:id/n2") count = len(els) logger.info("获取搜索到的关注按钮个数:%d" % count) user_list = item.getInstance().user_list logger.debug("======================user_list: %s" % str(user_list)) if len(user_list) == 0 or len(user_list) < count: logger.info("获取关键字搜索的结果出现异常") else: #每屏最多展示7个,如果需要更多,可以不停地往上滑动 item.getInstance().user_list = user_list[:count] for el in els: if el.text == "关注": el.click() time.sleep(5) logger.info("新关注用户数:%d" % count) except Exception as e: logger.error("根据关键字搜索用户出现异常, %s" % str(e)) return False return True
def response(self, flow: mitmproxy.http.HTTPFlow) -> None: if self._capture_user_post_url in flow.request.url or self._capture_search_video_url in flow.request.url: logger.info("捕捉到的用户搜索的http请求url: %s" % flow.request.url) logger.info("捕捉到的用户搜索的http请求响应text: %s" % flow.response.text) json_msg = json.loads(flow.response.text) aweme_list = json_msg[ 'aweme_list'] if 'aweme_list' in json_msg else [] item.getInstance().aweme_list = aweme_list logger.info("用户发布的作品: %s" % str(aweme_list)) elif self._capture_search_user_url in flow.request.url: logger.info("捕捉到的用户搜索的http请求url: %s" % flow.request.url) logger.info("捕捉到的用户搜索的http请求响应text: %s" % flow.response.text) json_msg = json.loads(flow.response.text) user_list = json_msg[ 'user_list'] if 'user_list' in json_msg else [] item.getInstance().user_list = user_list logger.info("用户搜索结果: %s" % str(user_list)) else: pass
def follow_accounts(self): logger.info("开始执行自动化脚本,关注根据关键字搜索出来的用户") self._appium_douyin.search_input() now = time.time() for w in self.ws: if 'time' in w: if w['periods'] == 0: logger.info('website %s not fetch !!!' % w['website']) continue else: if now < w['time'] + w['periods'] * 60: logger.info( 'website %s not fetch because of time limit !!!' % w['website']) continue self._appium_douyin.follow_user_list(w['url']) d = {} d['crawler'] = item.getInstance().crawler d['crawlerName'] = item.getInstance().crawlerName d['periods'] = w['periods'] d['website'] = w['website'] d['query'] = w['query'] for user_info in item.getInstance().user_list: if 'user_info' not in user_info: logging.warning("不是有效的用户信息") continue d['url'] = user_info['user_info']['uid'] d['name'] = "抖音-" + user_info['user_info'][ 'nickname'] + "-" + user_info['user_info']['unique_id'] d['time'] = time.time() logger.info("update account: %s" % d) self._accounts.update({'url': d['url']}, {'$setOnInsert': d}, True) self._websites.update_one({'website': w['website']}, {'$set': { 'time': time.time() }}) # 清空输入框,准备下个关键字的搜索 self._appium_douyin.search_clear_input() self._ws.close() logger.info("执行关注用户的自动化脚本结束")
def parse_content(self, w): try: aweme_list = item.getInstance().aweme_list logger.debug('===========user search===========aweme_list : %s' % str(aweme_list)) for aweme in aweme_list: if 'video' not in aweme: logging.warning("没有视频信息") return False self.content_upsert(aweme, w['crawlerName'], w['query']) except Exception as e: logging.error("抓取视频失败") return False return True
def user_posts(self): for unique_id in self._appium_douyin.my_follow_user(): aweme_list = item.getInstance().aweme_list logger.debug('===========user psot===========aweme_list : %s' % str(aweme_list)) us = {} for aweme in aweme_list: if 'video' not in aweme: logging.warning("没有视频信息") continue user_id = aweme['author']['uid'] nickname = aweme['author']['nickname'] if len(us) == 0: us = self.account_upsert(user_id, nickname, unique_id) self.content_upsert(aweme, us['crawlerName'], us['query'])