def runner_pool(getDevices): """ 根据链接的设备生成不同的dict 然后放到设备的list里面 设备list的长度产生进程池大小 """ devices_Pool = [] for device in getDevices: _pool = [] _initApp = { "udid": device["devices"], "port": device["port"], "deviceName": device["devices"], "platformVersion": getPhoneInfo(devices=device["devices"])["release"], "platformName": "android", "appPackage": 'com.aixuetang.online', "appActivity": 'com.aixuetang.mobile.activities.LaunchActivity' } _pool.append(_initApp) devices_Pool.append(_initApp) pools = Pool(processes=1) # 定义CPU核数量为3 res = pools.map(runner_case_app, devices_Pool) # 把测试用例放到设置到进程池 LOG.info(res) pools.close() pools.join()
def extract(system: str, kinetics: str): log_data = { "UID": CURRENT_JOB, "System": system, "Kinetics": kinetics, "TotalLipid": total_lipid_concentration, "Analysis": "Converting Old Parameters", "version": "3.0" } LOG.info(json.dumps(log_data, sort_keys=True)) progress_counter = 0 all_para = get_parameters() for para in all_para: initial_con = get_random_concentrations(total_lipid_concentration, system) sanity_counter = 0 update_progress(progress_counter / len(all_para), "Extracting Old Parameters") while sanity_counter < 10000: para[E_SOURCE].k = np.random.uniform(min_k, max_k) output = get_concentration_profile(system, initial_con, para, ode_end_time, ode_slices) e = calculate_wild_type_error(output[-1]) if e < save_cutoff: save_para(para, e) break sanity_counter += 1 progress_counter += 1
def getPhoneInfo(devices): """获取设备的一些基本信息""" cmd = "adb -s " + devices + " shell cat /system/build.prop" LOG.info("执行命令:%s" % cmd) phone_info = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.readlines() release = "ro.build.version.release=" # 版本 model = "ro.product.model=" # 型号 brand = "ro.product.brand=" # 品牌 device = "ro.product.device=" # 设备名 result = {"release": release} for line in phone_info: for i in line.split(): temp = i.decode() if temp.find(release) >= 0: result["release"] = temp[len(release):] break if temp.find(model) >= 0: result["model"] = temp[len(model):] break if temp.find(brand) >= 0: result["brand"] = temp[len(brand):] break if temp.find(device) >= 0: result["device"] = temp[len(device):] break LOG.info("移动设备信息:%s" % result) return result
def parse_acct_search(self, content="", save=True): _content = content soup = BeautifulSoup(_content, 'html') wechat_acct = soup.find("label", {"name": "em_weixinhao"}).text wechat_name = soup.find("em").text self.wechat_acct = wechat_acct self.wechat_name = wechat_name res = soup.find("div", {"class": "results"}) login_param = urlparse(res.div["href"]).query self.login_info = login_param if self.wechat_acct and self.login_info and save: with MongodbUtils(config.WECHAT_DB_IP, config.WECHAT_DB_PORT, config.WECHAT_COLLECTION, config.WECHAT_ACCOUNT_TABLE) as connect_db: _res = connect_db.find_one({"account": self.wechat_acct}) # acct_param = _res.get("param", "") if not _res: connect_db.update({"account": self.wechat_acct}, {"$set": { "keyword": self.keyword, "account": self.wechat_acct, "url_param": login_param, "account_name": self.wechat_name, "real_acct": self.wechat_acct, "category": "", "tag": [], }}, upsert=True) LOG.info("%s is add to the database" % (self.wechat_acct)) else: LOG.info("%s is already in the database" % (self.wechat_acct)) return login_param
def __init__(self): """检测待测试APK文件是否存在""" if not target_app: raise BaseErorr("未指定待测试的APK文件!") target_app_path = os.path.join(app_path, target_app) if not os.path.exists(target_app_path): raise BaseErorr("未找到指定测试的APK文件!") else: LOG.info("开始测试APP:%s..." % target_app) """检测是否连接移动设备""" self.l_devices = [] devices = AndroidDebugBridge().attached_devices() if len(devices) > 0: for dev in devices: app = {"devices": dev, "port": "4723"} self.l_devices.append(app) else: raise BaseErorr("没有可用的安卓设备!") """获取Apk基本信息""" self.app_info = getApkBaseInfo(target_app_path) """向移动设备中安装Apk文件""" pool_app_list = [] for i in range(0, len(self.l_devices)): pool_app_list.append({ "device": self.l_devices[i]["devices"], "info": self.app_info, "path": target_app_path }) pool = Pool(len(pool_app_list)) pool.map(installApp, pool_app_list) pool.close() pool.join() """启动appium""" appium_server = AppiumServer(self.l_devices) appium_server.start_server()
def runnerPool(self): """ 根据连接的设备生成不同的dict 然后放到设备的list里面 设备list的长度产生进程池大小 """ devices_pool = [] for i in range(0, len(self.l_devices)): _initApp = { "deviceName": self.l_devices[i]["devices"], "udid": self.l_devices[i]["devices"], "platformVersion": getPhoneInfo(devices=self.l_devices[i]["devices"])["release"], "platformName": "android", "port": self.l_devices[i]["port"], "appPackage": self.app_info["packageName"], "appActivity": self.app_info["appActivity"] } devices_pool.append(_initApp) LOG.info("进程数量:%s" % len(devices_pool)) pool = Pool(len(devices_pool)) # """运行测试用例""" pool.map(runnerCaseApp, devices_pool) pool.close() pool.join()
def parse_acct_search(self, content="", save=True): _content = content soup = BeautifulSoup(_content, 'html') wechat_acct = soup.find("label", {"name": "em_weixinhao"}).text wechat_name = soup.find("em").text self.wechat_acct = wechat_acct self.wechat_name = wechat_name res = soup.find("div", {"class": "results"}) login_param = urlparse(res.div["href"]).query self.login_info = login_param if self.wechat_acct and self.login_info and save: with MongodbUtils(config.WECHAT_DB_IP, config.WECHAT_DB_PORT, config.WECHAT_COLLECTION, config.WECHAT_ACCOUNT_TABLE) as connect_db: _res = connect_db.find_one({"account": self.wechat_acct}) # acct_param = _res.get("param", "") if not _res: connect_db.update({"account": self.wechat_acct}, { "$set": { "keyword": self.keyword, "account": self.wechat_acct, "url_param": login_param, "account_name": self.wechat_name, "real_acct": self.wechat_acct, "category": "", "tag": [], } }, upsert=True) LOG.info("%s is add to the database" % (self.wechat_acct)) else: LOG.info("%s is already in the database" % (self.wechat_acct)) return login_param
def _check_process(self): try: out = os.popen(self._cmd, 'r').read() return [tuple(line.split()) for line in out.splitlines()] except KeyboardInterrupt as e: pass except Exception as e: LOG.info(str(e))
def test_web(url): time.sleep(10) flag = False LOG.info("测试访问:%s..." % url) response = urllib.request.urlopen(url, timeout=5) if str(response.getcode()).startswith("2"): flag = True return flag
def reg(self,**kwargs): f = self.open.exce_case(**kwargs) if f['code']==1: LOG.info('无法获取断言') return else: beijing = f['data'] return beijing
def test_swipe(self): # 打印屏幕高和宽 window_size = self.driver.get_window_size() x = window_size["width"] y = window_size["height"] LOG.info("印屏幕大小 window={}".format(window_size)) self.driver.swipe(6 / 7 * x, 1 / 2 * y, 1 / 7 * x, 1 / 2 * y, 100)
def open_da(path): try: with open('{}'.format(path), 'r', encoding='utf-8') as file: data = yaml.load(file, Loader=yaml.FullLoader) return {'code': 0, 'data': data} except Exception as e: LOG.info('yaml文档解析失败!原因:{}'.format(e)) return {'code': 1, 'data': e}
def write_recording(cpu, neicun, devices): try: with open(recording, 'a', encoding='utf-8') as f: m = '%s:cpu:%s,内存:%s' % (devices, cpu, neicun) f.write(m + '\n') f.close() except Exception as e: LOG.info('写入性能数据失败!失败原因:%s' % e)
def exec_rt(self, cmd, out_func=None): ''' 实时显示远端信息 Usage:: >>> out, err = ssh.exec_rt('top -b -n 5', self.write_message) # tornado websocket ''' LOG.info('SSH RT_CMD: %s' % cmd) stdin, stdout, stderr = self._client.exec_command(cmd, get_pty=True) channel = stdout.channel pending = err_pending = None if not out_func: out_func = print out, err = [], [] while not channel.closed or channel.recv_ready( ) or channel.recv_stderr_ready(): readq, _, _ = select.select([channel], [], [], 1) for c in readq: if c.recv_ready(): chunk = c.recv(len(c.in_buffer)) if pending is not None: chunk = pending + chunk lines = chunk.splitlines() if lines and lines[-1] and lines[-1][-1] == chunk[-1]: pending = lines.pop() else: pending = None for line in lines: out_func(line) line = line.decode('utf-8') out.append(line) if c.recv_stderr_ready(): chunk = c.recv_stderr(len(c.in_stderr_buffer)) if err_pending is not None: chunk = err_pending + chunk lines = chunk.splitlines() if lines and lines[-1] and lines[-1][-1] == chunk[-1]: err_pending = lines.pop() else: err_pending = None for line in lines: out_func(line) line = line.decode('utf-8') err.append(line) if err == ['[', ']']: err = [] if err: self._log(err, 'RT_ERR') else: self._log(out, 'RT_OUT') return out, err
def on_submitBtn_clicked(self): caseName = self.caseNameEdit.text() testPoint = self.testPointEdit.text() httpStatus = self.httpStatusEdit.text() #前置处理 preDict = {} rowCount = self.preTableView.rowCount() for i in range(rowCount): key = self.preTableView.cellWidget(i,0).text() value = self.preTableView.cellWidget(i,1).text() preDict.setdefault(key,value) #请求参数 reqestDict = {} rowCount = self.reTableView.rowCount() for i in range(rowCount): key = self.reTableView.cellWidget(i,1).text() value = self.reTableView.cellWidget(i,2).text() reqestDict.setdefault(key,value) #预期结果 expected = self.expectedEdit.toPlainText() #后置处理 postposition = self.postpositionEdit.toPlainText() #获取签名 temp = [] signvalue="" if len(keyList)>0: for mykey in keyList: val = reqestDict.get(mykey) temp.append(val) signvalue = md5Encode(temp) reqestDict.setdefault("sign", signvalue) LOG.info('签名:%s,%s' % (keyList, signvalue)) caseObj = Cases() caseObj.casename = caseName caseObj.faceid = self.faceid caseObj.userid = self.userid caseObj.testpoint = testPoint caseObj.httpstatus = httpStatus caseObj.sign = signvalue caseObj.pretreat = preDict caseObj.request = reqestDict caseObj.expected = expected caseObj.postposition = postposition caseObj.updatetime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') caseObj.keylist = keyList count = self.casedb.createCase(caseObj) if count > 0: QMessageBox.information(self, "提示", "用例添加成功", QMessageBox.Yes, QMessageBox.Yes) self.add_case_success_signal.emit() self.close() self.clearEdit()
def getTotalRecordCount(self, id=None): '''获取总记录数''' sqlBuilder = [] sqlBuilder.append("select * from tc_interface where 1=1") if id is not None: sqlBuilder.append(" and projectId=%s" % id) recordCount = self.mydb.executeQuery_count(''.join(sqlBuilder)) LOG.info(''.join(sqlBuilder)) return recordCount
def get_url_content(self, url, head={}): LOG.info("Visit URL: %s" % url) _headers = head if head else self.head req = urllib2.Request(url, None, _headers) response = urllib2.urlopen(req) # req = requests.get(url) _content = response.read() # req = self.opener.open(url) # _content = req.read() return _content
def runner_case_app(devices): """ 利用unittest的testsuite来组织测试用例 """ LOG.info(devices) test_suit = unittest.TestSuite() test_suit.addTest(Parmer.parametrize(Regtest, param=devices)) #扩展的其他的测试用例均这样添加 unittest.TextTestRunner(verbosity=2).run(test_suit)
def tearDown(self): """ tearDown """ LOG.info("tearDown....") if self.driver.is_app_installed(AppPackage): self.driver.remove_app(AppPackage) LOG.info('测试用例执行完毕,测试环境正在还原!') time.sleep(5) self.driver.quit()
def main(): try: app = Application() app.listen(address=options.address, port=options.port, max_body_size=TORNADO_MAX_BODY_SIZE) LOG.info('Sever Listen {port}...'.format(port=options.port)) tornado.ioloop.IOLoop.instance().start() except: LOG.error(traceback.format_exc())
def make_case_files(case_name, desc, funtion_name): LOG.info("开始生成测试用例文件") file_path = os.path.join(base_dir, 'cases/{}_case_test.py'.format(case_name)) if not os.path.exists(file_path): with open(file_path, 'w', encoding='utf-8') as file: file.write(read_header().format(case_name, case_name)) file.write(reader_conntent().format(funtion_name, desc)) else: pass
def save_result(data): if os.path.exists(recording): with open(recording,'w+',encoding='utf-8') as f: f.write(data+'\n') else: # os.system(r"touch {}".format(recording)) with open(recording,'w+') as f: f.write(data+'\n') LOG.info('记录测试结果完毕')
def kill_process(self): if self._l: command_dict = dict(zip(self._l[0], self._l[1])) pid = int(command_dict.get("PID")) try: os.kill(pid, signal.SIGKILL) LOG.info('已杀死pid为{}的进程'.format(pid)) return True except OSError: LOG.info('没有appium此进程!!!') else: return None
def get_pid(port): pid = None cmd_find = 'netstat -aon | findstr "%s"' % port LOG.info("执行命令 '%s' ..." % cmd_find) result = os.popen(cmd_find) pid_list = result.readlines() if pid_list != "": for pid_info in pid_list: pid_info = pid_info.strip() if "LISTENING" in pid_info: pid = pid_info.split(" ")[-1] return pid
def send_report_mail(): file_path_tuple = (report_file_name, ) send_conf = SendEmail(email_smtp, email_username, email_passwd) if send_conf.send_email(mail_to_list, mail_to_cc, sub=mail_title + now, content=open(report_file_name, 'rb').read(), file_path=file_path_tuple ): # content=open(report_file_name, 'rb').read() LOG.info('测试邮件发送成功,测试时间{0}'.format(now)) else: LOG.info('测试邮件发送失败,测试时间{0}'.format(now))
def catch_error(*args, **kwargs): try: res = func(*args, **kwargs) except Exception as e: LOG.error(traceback.format_exc()) LOG.info("================content start================") _content = kwargs.get("content") _content = _content if _content else args[0] print _content LOG.info("================content end================") return None return res
def main(): os.chdir(sys.path[0]) test_type = sys.argv[1] if test_type: try: vm_num = int(sys.argv[2]) except (IndexError, ValueError): vm_num = 1 LOG.info(u"测试%s性能, 获取 %s VM, Time: %s" % (test_type, vm_num, gen_local_time())) csv_file = "test_vm_%s_%s.csv" % (vm_num, test_type) db.get_servers_and_save(csv_file, server_name_prefix=test_type, server_count=vm_num)
def getInterFaceByCondition(self, id=-1, index=0, pageRecord=20): '''根据条件查询接口信息,并分页''' sqlBuilder = [] sqlBuilder.append( "select id,faceName,address,requestType,dataType,contentType,caseCount,createTime,projectName,projectId,userId from tc_interface where 1=1" ) if id != 0: sqlBuilder.append(" and projectId=%d" % int(id)) sqlBuilder.append(" order by createTime desc limit %d,%d" % (index, pageRecord)) rsData = self.mydb.executeQuery_all(''.join(sqlBuilder)) LOG.info(''.join(sqlBuilder)) return rsData
def exec(self, cmd): LOG.info('SSH CMD: %s' % cmd) stdin, stdout, stderr = self._client.exec_command(cmd, get_pty=True) out, err = stdout.readlines(), stderr.readlines() if err: self._log(err, 'ERR') else: self._log(out, 'OUT') return out, err
def attached_devices(self): LOG.info("执行命令 'adb devices' ...") devices = [] result = subprocess.Popen("adb devices", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout.readlines() for item in result: t = item.decode().split("\tdevice") if len(t) >= 2: devices.append(t[0]) LOG.info("设备信息: %s" % ",".join(devices)) return devices
def browser_search(self, keyword="data"): self.keyword = keyword url = "http://weixin.sogou.com/weixin?type=1&query={keyword}&ie=utf8".format(keyword=keyword) LOG.info("Search Keyword [%s]: %s" % (keyword, url)) # _headers = self.head # req = urllib2.Request(url, None, _headers) # response = urllib2.urlopen(req) # _content = response.read() _content = self.browser.get_url_content(url) return _content
def do_sweep(system: str, kinetics: str, total_lipid=total_lipid_concentration): # Initial setup to start sweep log_data = { "UID": CURRENT_JOB, "System": system, "Kinetics": kinetics, "TotalLipid": total_lipid, "Analysis": "Nelder-Mead Simplex Sweep", "version": "3.0"} LOG.info(json.dumps(log_data, sort_keys=True)) lowest_error = 1000000 progress_counter = 0 update_progress(progress_counter / outer_iterations, "lowest_error : %s" % lowest_error) for i in range(outer_iterations): initial_condition = get_random_concentrations(total_lipid, system) best, good, worst = make_initial_set(initial_condition, system, kinetics, total_lipid) para_skip = 0 while para_skip < para_skip_threshold: mid = mid_point(best, good) mid_out = get_concentration_profile(system, initial_condition, mid, ode_end_time, ode_slices) mid_error = Error(total_lipid, mid_out[-1], mid) reflection = get_reflection(mid_error, worst) reflection_out = get_concentration_profile(system, initial_condition, reflection, ode_end_time, ode_slices) reflection_error = Error(total_lipid, reflection_out[-1], reflection) analysis_list = [best, good, worst, mid_error, reflection_error] before_sort = best analysis_list.sort(key=lambda x: x.total_error) best, good, worst = analysis_list[0], analysis_list[1], \ analysis_list[2] if before_sort == best: para_skip += 1 if best.total_error < lowest_error: lowest_error = best.total_error best.record() progress_counter += 1 update_progress(progress_counter / outer_iterations, "lowest_error : %s" % lowest_error)
def download_article_content(self, item): article_url = item.get("url", "") refer_url = item.get("site", "") uuid = item.get("uuid", "") if not refer_url: LOG.info("There is not refered URL. Try to download the content through original url") _url = self.domain + article_url article_content = self.browser.get_url_content(_url) with open(config.APACHE_DIR + uuid + ".html", "wb") as f: f.write(article_content) res = "download" else: LOG.info("Article is refered. No need to download the content") res = "refer" return res
def load_account_info(self, account): url_generator_js = None url_generator = None with MongodbUtils(config.WECHAT_DB_IP, config.WECHAT_DB_PORT, config.WECHAT_COLLECTION, config.WECHAT_ACCOUNT_TABLE) as connect_db: _res = connect_db.find_one({"account": account}) if _res: _acct_param = _res.get("url_param", "") wechat_acct = _res.get("account") wechat_name = _res.get("account_name") if _acct_param: # _param_ = "&cb=sogou.weixin_gzhcb&gzhArtKeyWord=%s&tsn=3&t=&_=&page=" _param_ = "&cb=sogou.weixin_gzhcb&tsn=3&page=" LOG.info("Infomation of account [%s] was loaded." % (account)) url_generator_js = lambda x: self.domain + "/gzhjs?" + _acct_param + _param_ + str(x) url_generator = lambda x: self.domain + "/gzh?" + _acct_param + _param_ + str(x) else: LOG.info("Account [%s] does not have login param." % (account)) else: LOG.info("Account [%s] was not found." % (account)) self.wechat_acct = wechat_acct self.wechat_name = wechat_name self.url_generator = url_generator self.url_generator_js = url_generator_js
def browser_account_page(self, page=1): url = self.url_generator_js(page) LOG.info("Visit account page: %s " % (url)) _content = self.browser.get_url_content(url) return _content
def record_time(*args, **kwargs): res = func(*args, **kwargs) sleep_time = round(np.random.chisquare(seed), 2) + offset LOG.info("I'm gona to sleep for %s sec." % (str(sleep_time))) time.sleep(sleep_time) return res
def parse_account_page(self, content): _end_of_process = False _tmp = content.replace("sogou.weixin_gzhcb(", "") _tmp = _tmp.replace(")\n\n\n\n\n\n\n\n\n", "") _tmp = _tmp.replace("\\/", "/") _res_dict = eval(_tmp) # Get pagination infomation total_pages = _res_dict.get("totalPages") page = _res_dict.get("page") _end_of_process = True if page >= total_pages else False LOG.info("Page: %s/%s" % (str(page), str(total_pages))) # Get articles on page _items = _res_dict.get("items") num_of_articles_downloaded = 0 for _item in _items: xmlp = ET.XMLParser(encoding="utf-8") _item_xml = ET.fromstring(_item, parser=xmlp) _uuid = make_uuid() _res = dict( uuid=_uuid, title=self.browser.get_xml_node(_item_xml, "item/display/title"), content=_item_xml.find("item").find("display").find("content168").text, site=self.browser.get_xml_node(_item_xml, "item/display/site"), url=self.browser.get_xml_node(_item_xml, "item/display/url"), sourcename=self.browser.get_xml_node(_item_xml, "item/display/sourcename"), release_date=self.browser.get_xml_node(_item_xml, "item/display/date"), wechat_acct=self.wechat_acct, timestamp=date_helper.current_timestamp(), unread=True, tag=[] ) LOG.info("Reading article: %s" % (_res.get("title"))) LOG.info("Source site: %s" % (_res.get("site"))) LOG.info("From: %s" % (_res.get("wechat_acct"))) with MongodbUtils(config.WECHAT_DB_IP, config.WECHAT_DB_PORT, config.WECHAT_COLLECTION, config.WECHAT_ARTICLE_SUMMARY_TABLE) as connect_db: _query = dict(wechat_acct=self.wechat_acct, title=_res.get("title")) exist_record = connect_db.find_one(_query) if not exist_record or self.mode == 'init': if self.mode == 'init' and exist_record: _old_uuid = exist_record.get("uuid", "") _res["uuid"] = _old_uuid connect_db.remove(_query) connect_db.insert(_res) if not _res.get("site"): self.download_article_content(_res) # _article_url = self.get_article_url(_res.get("site"), _uuid) # if _article_url: # num_of_articles_downloaded += 1 else: _end_of_process = True break return _end_of_process
def simulate_sleep(self, m=5, offset=10): sleep_time = round(np.random.chisquare(m), 2) + offset LOG.info("I'm gona to sleep for %s sec." % (str(sleep_time))) time.sleep(sleep_time)
def get_articles_on_page(self, page=1): _end_of_process = False url = self.url_generator_js(page) print url _content = self.get_url_content(url) print _content # soup = BeautifulSoup(req.text, 'html') _tmp = _content.replace("sogou.weixin_gzhcb(", "") _tmp = _tmp.replace(")\n\n\n\n\n\n\n\n\n", "") _tmp = _tmp.replace("\\/", "/") _res_dict = eval(_tmp) # print _content total_pages = _res_dict.get("totalPages") page = _res_dict.get("page") _items = _res_dict.get("items") LOG.info("Page: %s/%s" % (str(page), str(total_pages))) # total_pages = 3 articles = [] num_of_articles_downloaded = 0 for _item in _items: xmlp = ET.XMLParser(encoding="utf-8") _item_xml = ET.fromstring(_item, parser=xmlp) ref_url = _item_xml.find("item").find("display").find("url").text _uuid = make_uuid() # print _item_xml.find("item").find("display").text print "--------start test------" print self.get_xml_node(_item_xml, "item/display/title") print self.get_xml_node(_item_xml, "item/display/site") print "--------end test------" _res = dict( uuid=_uuid, title=self.get_xml_node(_item_xml, "item/display/title"), content=_item_xml.find("item").find("display").find("content168").text, site=self.get_xml_node(_item_xml, "item/display/site"), url=self.get_xml_node(_item_xml, "item/display/url"), sourcename=self.get_xml_node(_item_xml, "item/display/sourcename"), release_date=self.get_xml_node(_item_xml, "item/display/date"), wechat_acct=self.wechat_acct, timestamp=date_helper.current_timestamp(), unread=True, tag=[] ) # print _res.get("title") with MongodbUtils(config.WECHAT_DB_IP, config.WECHAT_DB_PORT, config.WECHAT_COLLECTION, config.WECHAT_ARTICLE_SUMMARY_TABLE) as connect_db: _query = dict(wechat_acct=self.wechat_acct, title=_res.get("title")) exist_record = connect_db.find_one(_query) if not exist_record or self.mode == 'override': if self.mode == 'override': _old_uuid = exist_record.get("uuid", "") _res["uuid"] = _old_uuid connect_db.remove(_query) connect_db.insert(_res) # _article_url = self.get_article_url(_res.get("site"), _uuid) # if _article_url: # num_of_articles_downloaded += 1 else: if self.mode in ["init"]: _end_of_process = False LOG.info("Already exist") else: _end_of_process = True LOG.info("Meet the last article in DB") break LOG.info("%s pages are downloaded on this page" % num_of_articles_downloaded) if (page < total_pages) and not _end_of_process: LOG.info("Go to next page") self.get_articles_on_page(page + 1) else: return articles
def get_wechat_articles(self, keyword=""): LOG.info("Start to get data of account [%s]" % (keyword)) login_info = self.login(keyword) LOG.info("Finished getting login param of account [%s]" % (keyword)) self.get_articles(1, login_info) LOG.info("End process for account [%s]" % (keyword))
def run_wechat_crawler(self): acct = self.random_select_account() LOG.info("Select account: %s" % acct) crawler = WechatCrawler( mode=self.mode) crawler.run_crawl_account_articles(acct)