def _test_links_in_dict(self, origin_url=None): """ text_links: 标签名及其对应的链接 origin_url: 所要测试的网页原始url 测试{text: links}格式的字典中,所有text所对应的元素点击是否跳转到links 如果链接打开了新窗口,切换窗口获取元素后,关闭窗口并切回原窗口 """ menu_name = get_menu() text_links = getattr(self.m, menu_name) for k, v in text_links.items(): with self.subTest(text=k, link=v): current_url = format_url(self.session.current_url) target_url = format_url(v) window_handle = self.session.current_window_handle if urlparse(current_url).netloc != urlparse(origin_url).netloc: # 如果跳转到了其他域名的页面,回到指定页面 self.session.get(origin_url) self.session.click(By.LINK_TEXT, k) if len(self.session.window_handles) > 1: # 点击打开了新窗口时的情况 windows = self.session.window_handles windows.remove(window_handle) self.session.switch_to_window(windows[0]) result_url = format_url(self.session.current_url) self.session.close() self.session.switch_to_window(window_handle) elif target_url == origin_url == self.session.current_url: # 点击跳转到相同的页面 result_url = self.session.current_url else: self.session.wait_unitl(EC.url_changes(origin_url)) result_url = format_url(self.session.current_url) self.assertEqual(result_url, format_url(v))
def test_format_url(self): params = {"123": "123"} key = "12345678901234567890" url = format_url(params) expect_url = "123=123" self.assertEqual(url, expect_url) params = {"abc": "abc", "123": "123"} url = format_url(params) expect_url = "123=123&abc=abc" self.assertEqual(url, expect_url) url = format_url(params, key) expect_url = "123=123&abc=abc&key=12345678901234567890" self.assertEqual(url, expect_url)
def __get_same_site_link(self, soup, url): """ 获取soup里的全部页面的url :param soup: :param url: :return: """ new_url = [] if self.is_full_site: a_list = soup.find_all("a") if a_list is not None: for a in a_list: try: raw_link = a.get("href") if raw_link is None or raw_link.startswith( "#") or not is_page_url(raw_link): continue abs_link = get_abs_url(url, raw_link) #新产生的url abs_link = format_url(abs_link) a['href'] = abs_link if is_page_url(abs_link) and not is_img_ext( abs_link) and abs_link not in new_url: new_url.append(abs_link) except Exception as e: self.logger.info("%s: %s", a, e) self.logger.exception(e) continue return new_url
def _create_oauth_url_for_openid(self, code): url_params = { "appid": self.appid, "secret": self.app_secret, "code": code, "grant_type": "authorization_code", } url = format_url(url_params) return OAUTH2_ACCESS_TOKEN_URL % url
def create_oauth_url_for_code(self, redirect_uri): url_params = { "appid": self.appid, "redirect_uri": redirect_uri, #一般是回调当前页面 "response_type": "code", "scope": "snsapi_base", "state": "STATE#wechat_redirect" } url = format_url(url_params) return OAUTH2_AUTHORIZE_URL % url
def get_menus_of_tb(self): # 获取页面上方标题-toolbar元素,页面上方的 # toolbar的内容是由js加载而来的,先获取js文件内容,再提取出页面代码 # 返回soup对象 resp = requests.request( 'get', 'https://csdnimg.cn/public/common/toolbar/js/content_toolbar.js') tb_re = re.compile(r"(?<=var tpl = \').*?</div>(?=\';)", flags=re.DOTALL) tb_html = tb_re.search(resp.text).group(0) soup = BeautifulSoup(tb_html, 'html.parser') menus = soup.find( 'ul', class_='pull-left left-menu clearfix').find_all('li')[1:] for menu in menus: text = menu.a.text href = format_url(menu.a['href']) self.var[text] = href
def __set_dup_url(self, url, file_save_path): url = format_url(url) if file_save_path: file_save_path = file_save_path.replace("//", '/') self.dl_urls[url] = file_save_path
def update(self, manga): if manga.countdown < 1: name = manga.name original_volume = manga.volume original_chapter = manga.chapter url = format_url(manga.url_scheme, original_volume, original_chapter) # Try current volume, next chapter volume = original_volume chapter = original_chapter + 1 content = Content(manga.url_scheme, volume, chapter) if volume >= 0 and content.is_not_found(): # Try next volume next chapter volume = original_volume + 1 chapter = original_chapter + 1 content.update(volume, chapter) if volume >= 0 and content.is_not_found(): # Try next volume first chapter volume = original_volume + 1 chapter = 1 content.update(volume, chapter) if content.is_not_found(): # Try current volume next next chapter volume = original_volume chapter = original_chapter + 2 content.update(volume, chapter) if volume >= 0 and content.is_not_found(): # Try next volume next next chapter volume = original_volume + 1 chapter = original_chapter + 2 content.update(volume, chapter) if content.is_not_found(): # Something's goofed, try again tomorrow. manga.countdown = float('-inf') logging.error( 'Unable to update volume/chapter for {name}'.format( name=name)) else: manga.prev_volume = original_volume manga.prev_chapter = original_chapter manga.volume = volume manga.chapter = chapter logging.info('Updating {name} from volume:{original_volume}, ' \ 'chapter:{original_chapter} to volume:{volume}, chapter:{chapter}' .format(name=name, original_volume=original_volume, original_chapter=original_chapter, volume=volume, chapter=chapter)) # The next page checks out if manga.freq_units == 'pages': logging.info('{}, {}'.format(name, content.page_count())) manga.countdown += manga.frequency * content.page_count() elif manga.freq_units == 'days': manga.countdown += manga.frequency else: logging.warning( 'Manga {manga} missing freq_units'.format(manga=name)) # Default acts like days. manga.countdown += manga.frequency # Assert 0 <= countdown <= 18 if manga.countdown != float('-inf'): manga.countdown = min(max(manga.countdown, 0.0), 18.0) else: manga.countdown = -1.0 manga.put() # Send the email message = self.render_template('update_email.html', manga=manga, url=url, write=False) if manga.volume >= 0: subject = 'Time for {manga.name} v{manga.volume}c{manga.chapter}'.format( manga=manga) else: subject = 'Time for {manga.name} c{manga.chapter}'.format( manga=manga) send_mail(subject, html=message) else: manga.countdown -= 1 manga.put()
def open_comment(self, el): post = self.get_focus_post() webbrowser.open_new_tab(utils.format_url(post['comment_url']))
def on_click_title(self, el, url): webbrowser.open_new_tab(utils.format_url(url))