def request(flow: mitmproxy.http.HTTPFlow) -> None: scf_servers = choice(scf_servers) r = flow.request data = { "url": r.pretty_url, "method": r.method, "headers": dict(r.headers), "cookies": dict(r.cookies), "params": dict(r.query), "data": b64encode(r.raw_content).decode("ascii"), } flow.request = flow.request.make( "POST", url=scf_servers, content=json.dumps(data), headers={ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding": "gzip, deflate, compress", "Accept-Language": "en-us;q=0.8", "Cache-Control": "max-age=0", "Connection": "close", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36", "host": urlparse(scf_servers).netloc, "SCF-Token": SCF_TOKRN, })
def register_node(self, address): """ Add a new node to the list of nodes :param address: <str> Address a node. Eg. 'http://192.168.0.5:50000' :return: None """ parsed_url = urlparse(address) self.nodes.add(parsed_url.netloc)
def download(url, timeout=180): parsed_url = parse.urlparse(url) file_name = path.basename(parsed_url.path) sleep_time = random.choice(RANDOM_SLEEP_TIMES) time.sleep(sleep_time) r = requests.get(url, timeout=timeout) return Image(file_name=file_name, file_content=r.content)
def _add_to_con(self, con): """ Add connection information to connection data """ con["display"] = True if self.simplify: url = urlparse.urlparse(con["source"]).netloc if url: con["source"] = url url = urlparse.urlparse(con["destination"]).netloc if url: con["destination"] = url self._add_weak_loc(con["source"]) self._add_weak_loc(con["destination"]) for a in self.data["connections"]: d = DictDiffer(a, con) if not d.anychange(): return self.data["connections"].append(con)
def test_add_entry(self): response = self.client.post("/entry/add", data={ "title": "Test Entry", "content": "Test content" }) self.assertEqual(response.status_code, 302) self.assertEqual(urlparse(response.location).path, "/") entries = session.query(Entry).all() self.assertEqual(len(entries), 1) entry = entries[0] self.assertEqual(entry.title, "Test Entry") self.assertEqual(entry.content, "Test content") self.assertEqual(entry.author, self.user)
def _add_to_loc(self, loc): """ Add location information to location data """ loc["display"] = True if self.simplify: url = urlparse.urlparse(loc["url"]).netloc if url: loc["url"] = url for a in self.data["locations"]: d = DictDiffer(a, loc) if not d.anychange(): return self.data["locations"].append(loc)
def add_node(self,address): parsed_url=urlparse(address) self.nodes.add(parsed_url_netloc)
def parse_youtube_url(url): url = clean_url(url) u = normalize_url(url, strip_lang_subdomains=True, strip_trailing_slash=True) parsed = urlparse(url) # URL pattern youtu.be/VIDEO_ID if parsed.netloc == 'youtu.be': if "/" not in u: return "home", None url_id = u.split("/")[1] url_id = u.split("?")[0] url_id = u.split("%")[0] return "video", url_id # URL pattern youtube.googleapis.com/v/VIDEO_ID if parsed.netloc == 'youtube.googleapis.com': if "/v/" in u: url_id = u.split("/")[2] else: raise (Exception("Wrong url format %s" % u)) return "video", url_id if parsed.netloc in [ 'img.youtube.com', 'gaming.youtube.com', 'music.youtube.com', 'studio.youtube.com' ]: return "irrelevant", None if parsed.netloc.endswith('youtube.com'): if u in ["youtube.com"] and not parsed.fragment: return "home", None stem0 = parsed.path.split("/")[1] stem1 = parsed.path.split("/")[2] if "/" in parsed.path.lstrip( "/") else None queryargs = parsed.query.split("&") if stem0 in [ "t", "yt", "results", "playlist", "artist", "channels", "audiolibrary", "feed", "intl", "musicpremium", "premium", "show", "watch_videos", "comment", "creators", "profile_redirector", "static", "view_play_list", "index" ]: return "irrelevant", None # URL pattern youtube.com/channel/CHANNEL_ID if stem0 == "channel": return "channel", stem1 # URL pattern youtube.com/user/USER_ID if stem0 in ["user", "c"]: return "user", stem1 # URL pattern youtube.com/profile_videos?user=USER_ID if stem0 == "attribution_link": uarg = [arg for arg in queryargs if arg.startswith("u=")] if len(uarg): return parse_youtube_url("http://youtube.com" + unquote(uarg[0].split("=")[1])) if stem0 in ["profile_videos", "subscription_center"]: uarg = [ arg for arg in queryargs if arg.startswith("user="******"add_user="******"user", uarg[0].split("=")[1] # URL pattern youtube.com/v/VIDEO_ID if stem0 in ["v", "embed", "video"]: return "video", stem1 # URL pattern youtube.com/watch?v=VIDEO_ID if stem0 in [ "watch", "redirect", "comment_servlet", "all_comments", "watch_popup" ]: varg = [arg for arg in queryargs if arg.startswith("v=")] if len(varg): return "video", varg[0].split("=")[1] return "video", None if stem0 in ["edit", "swf"]: varg = [arg for arg in queryargs if arg.startswith("video_id=")] if len(varg): return "video", varg[0].split("=")[1] return "video", None # URL pattern youtube.com/#%2Fwatch%3Fv%3DVIDEO_ID if "v%3D" in parsed.query: fquery = unquote(parsed.query) queryargs = fquery.split("?")[1].split("&") varg = [arg for arg in queryargs if arg.startswith("v=")] if len(varg): return "video", varg[0].split("=")[1] if "v%3D" in parsed.fragment: fquery = unquote(parsed.fragment) queryargs = fquery.split("?")[1].split("&") varg = [arg for arg in queryargs if arg.startswith("v=")] if len(varg): return "video", varg[0].split("=")[1] if "continue=" in parsed.query: urlarg = [arg for arg in queryargs if arg.startswith("continue=")][0].split("=")[1] return parse_youtube_url(unquote(urlarg)) if not stem1 and (not parsed.query or parsed.query in ["sub_confirmation=1"]) and not parsed.fragment: return "user", stem0 return "error", None