def filter_urlquery(url, keys=[], keys_status=False): """Removes unwanted urlquerys :param url: an URL :param keys: list of query names :param keys_status: False = removes querys that are in keys True = allow only querys that are in keys :return: URL with filtered query """ parts = urlparse(url) query_dict = dict(parse_qsl(parts.query)) new_query_dict = {} for key in keys: try: if keys_status is True: new_query_dict[key] = query_dict[key] else: del query_dict[key] except KeyError: continue new_parts = list(parts) if keys_status is True: new_parts[4] = unquote(urlencode(new_query_dict)) else: new_parts[4] = unquote(urlencode(query_dict)) url = urlunparse(new_parts) return url
def test_unescape_iframe_re(self): from livecli.compat import unquote regex_test_list = [ { "data": """ <div id="player"> <script language='javascript'> document.write(unescape('%3Ciframe%20width%3D%22730%22%20height%3D%22440%22%20src%3D%22https%3A%2F%2Fwww.youtube.com%2Fembed%2Faqz-KE-bpKQ%3Fautoplay%3D1%22%20frameborder%3D%220%22%20gesture%3D%22media%22%20allow%3D%22encrypted-media%22%20allowfullscreen%3E%3C%2Fiframe%3E'));</script> </div> """, "result": "https://www.youtube.com/embed/aqz-KE-bpKQ?autoplay=1" }, { "data": """ <div id="player"> <script language='javascript'> document.write(unescape('%3C%69%66%72%61%6d%65%20width%3D%22730%22%20height%3D%22440%22%20src%3D%22https%3A%2F%2Fwww.youtube.com%2Fembed%2Faqz-KE-bpKQ%3Fautoplay%3D1%22%20frameborder%3D%220%22%20gesture%3D%22media%22%20allow%3D%22encrypted-media%22%20allowfullscreen%3E%3C%2Fiframe%3E'));</script> </div> """, "result": "https://www.youtube.com/embed/aqz-KE-bpKQ?autoplay=1" }, ] rr = Resolve("https://example.com") for test_dict in regex_test_list: m = rr._unescape_iframe_re.search(test_dict.get("data")) self.assertIsNotNone(m) data = unquote(m.group("data")) self.assertIsNotNone(m) m = rr._iframe_re.search(data) self.assertEqual(test_dict.get("result"), m.group("url"))
def _iframe_src(self, res): """Tries to find every iframe url, it will use the first iframe as self.url, but every other url can will be shown in the terminal. Args: res: Content from self._res_text Returns: True if self.url was changed with an iframe url. None if no iframe was found. """ iframe_all = self._iframe_re.findall(res) # Fallback for unescape('%3Ciframe%20 unescape_iframe = self._unescape_iframe_re.findall(res) if unescape_iframe: unescape_text = [] for data in unescape_iframe: unescape_text += [unquote(data)] unescape_text = ",".join(unescape_text) unescape_iframe = self._iframe_re.findall(unescape_text) if unescape_iframe: iframe_all = iframe_all + unescape_iframe if iframe_all: iframe_list = self._make_url_list(iframe_all, self.url, url_type="iframe") if iframe_list: self.logger.info("Found iframes: {0}".format(", ".join(iframe_list))) self.url = iframe_list[0] return True return None
def production_id(self): if self._stream not in CHANNEL_MAP: res = http.get(self.url, verify=False) production_id_match = re.findall(r"&productionId=(.*?)['&\"]", res.text, flags=re.DOTALL) if production_id_match: return unquote(production_id_match[0]) else: self.logger.error(u"No production ID found, has the page layout changed?")
def production_id(self): if self._stream not in CHANNEL_MAP: res = http.get(self.url, verify=False) production_id_match = re.findall( r'data-video-production-id="(.+?)"', res.text) if production_id_match: return unquote(production_id_match[0]) else: self.logger.error( u"No production ID found, has the page layout changed?")
def follow_vk_redirect(cls, url): # If this is a 'videos' catalog URL with an video ID in the GET request, get that instead parsed_url = urlparse(url) if parsed_url.path.startswith('/videos-'): query = {v[0]: v[1] for v in [q.split('=') for q in parsed_url.query.split('&')] if v[0] == 'z'} try: true_path = unquote(query['z']).split('/')[0] return parsed_url.scheme + '://' + parsed_url.netloc + '/' + true_path except KeyError: # No redirect found in query string, so return the catalog url and fail later return url else: return url
def _iframe_unescape(self, res): """Try to find iframes from unescape('%3Ciframe%20 Args: res: Content from self._res_text Returns: (list) A list of iframe urls or False if no iframe was found """ unescape_iframe = self._unescape_iframe_re.findall(res) if unescape_iframe: unescape_text = [] for data in unescape_iframe: unescape_text += [unquote(data)] unescape_text = ",".join(unescape_text) unescape_iframe = _iframe_re.findall(unescape_text) if unescape_iframe: return unescape_iframe return False
def filter_urlquery(url, keys=[], keys_status=False, new_dict={}): """manipulate parameters from an url Examples: All Examples uses this url. url = "http://example.com/z/manifest.f4m?FOO=BAR&n=20&b=1896" 1. allows only specified parameter and remove all other filter_urlquery(url, ["FOO"], True) http://example.com/z/manifest.f4m?FOO=BAR 2. same as 1. and add's a custom parameter filter_urlquery(url, ["FOO"], True, {'2FOO2': '2BAR2'}) http://example.com/z/manifest.f4m?FOO=BAR&2FOO2=2BAR2 3. remove only specified parameter filter_urlquery(url, ["FOO"], False) http://example.com/z/manifest.f4m?n=20&b=1896 4. remove all parameter filter_urlquery(url, keys_status=True) http://example.com/z/manifest.f4m 5. add new parameter filter_urlquery(url, new_dict={'QFOO': 'QBAR', 'AFOO': 'ABAR'}) http://example.com/z/manifest.f4m?FOO=BAR&n=20&b=1896&QFOO=QBAR&AFOO=ABAR :param url: an URL :param keys: list of query names :param keys_status: False = removes querys that are in keys True = allow only querys that are in keys :param new_dict: dict of new custom urlquerys :return: URL with filtered query """ parts = urlparse(url) query_dict = dict(parse_qsl(parts.query)) new_query_dict = {} for key in keys: try: if keys_status is True: new_query_dict[key] = query_dict[key] else: del query_dict[key] except KeyError: continue new_parts = list(parts) if keys_status is True: query_dict = new_query_dict query_dict.update(new_dict) new_parts[4] = unquote(urlencode(query_dict)) url = urlunparse(new_parts) return url
def _websocket_data(self, username, chat_servers): """Get data from the websocket. Args: username: Model Username chat_servers: servername from self._get_servers Returns: message: data to create a video url. php_message: data for self._php_fallback """ try_to_connect = 0 while (try_to_connect < 3): try: xchat = str(random.choice(chat_servers)) host = "wss://{0}.myfreecams.com/fcsl".format(xchat) ws = create_connection(host) ws.send("hello fcserver\n\0") r_id = str(uuid.uuid4().hex[0:32]) ws.send("1 0 0 20071025 0 {0}@guest:guest\n".format(r_id)) self.logger.debug( "Websocket server {0} connected".format(xchat)) try_to_connect = 3 except Exception: try_to_connect = try_to_connect + 1 self.logger.debug( "Failed to connect to WS server: {0} - try {1}".format( xchat, try_to_connect)) if try_to_connect == 5: self.logger.error("can't connect to the websocket") raise buff = "" php_message = "" ws_close = 0 while ws_close == 0: socket_buffer = ws.recv() socket_buffer = buff + socket_buffer buff = "" while True: ws_answer = self._socket_re.search(socket_buffer) if bool(ws_answer) == 0: break FC = ws_answer.group(1) FCTYPE = int(FC[4:]) message_length = int(FC[0:4]) message = socket_buffer[4:4 + message_length] if len(message) < message_length: buff = "".join(socket_buffer) break message = unquote(message) if FCTYPE == 1 and username: ws.send("10 0 0 20 0 {0}\n".format(username)) elif FCTYPE == 81: php_message = message if username is None: ws_close = 1 elif FCTYPE == 10: ws_close = 1 socket_buffer = socket_buffer[4 + message_length:] if len(socket_buffer) == 0: break ws.send("99 0 0 0 0") ws.close() return message, php_message