def ham_download_image_gg(filePath, folder_save, keyword, number_scroll): nameFile = "-".join(keyword.split(" ")) driver = initDriver(filePath) driver.get('https://www.google.com/') driver.get('https://www.google.com/search?tbm=isch&q=' + keyword.replace(" ", "+")) time.sleep(10) for i in range(number_scroll): driver.execute_script("window.scrollBy(0,0.7*window.innerHeight);") time.sleep(10) #lay link truc tiep cua anh list_elements = driver.find_elements_by_css_selector('a[jsname="hSRGPd"]') list_href = [] for image in list_elements[3:]: href = image.get_attribute('href') list_href.append(href) print('len(list_href)', len(list_href)) list_src = [] #tach url chuoi va download pos = 0 for href in list_href: print(pos, href) try: href = urllib.parse_qs(href) href = href['https://www.google.com/imgres?imgurl'][0] print('href', href) #input(conan) downloadImg(href, nameFile + '-' + str(pos), folder_save) except: print('error at %s' % pos) pos += 1
def authenticate(self, host): output = None try: start = self.startup(host) if not start.url: return None query = parse.parse_qs(parse.urlparse(start.url).query) if 'client_id' in query.keys(): cid = query['client_id'] else: return None uri = '/dialog/authorize' payload = {'redirect_uri':'https://cms.lingotek.com/tms-ui/html/portal/sso_redirect.html','response_type':'token','client_id':cid} # r = requests.get(host + uri, headers={'Host': 'cmssso.lingotek.com', 'Referer': 'https://cmssso.lingotek.com/login', 'Cache-Control':'max-age=0', 'Upgrade-Insecure-Requests':'1', 'Cookie':'__ctmid=58220c510010e8c8dc704410; _gat=1; _ga=GA1.2.831256021.1467748163; connect.sid=s%3AxU6QRRV9jDVSX3SeYAOElBOI1Y5HdMRK.yU%2FTgKno2PqlKGljl50dJ8HarhRUT71zT0rF6aniDvw'}, data=payload) # r = requests.get(host + uri, headers={'Cookie':'connect.sid=s%3Aq4dTUpbJVb8uIgbM7s2T0txtHR6qpkhE.5dFEBdjsPtlcDGgG9MO9yNQMhyrkMpJVjhLH84J2mKI'}, params=payload) r = requests.get(host + uri, headers={'Cookie': self.cookie}, params=payload) log_api('GET', uri, r) # r = requests.get(host + uri, headers=self.headers, params=payload) fragment = parse.parse_qs(parse.urlparse(r.url).fragment) if 'access_token' in fragment.keys() and len(fragment['access_token']) > 0: return fragment['access_token'][0] else: return None except Exception as e: print("authenticate", e) self.handleError() return None
def get_title(vid_id): """ Get title of video from ID. @param vid_id: YouTube ID for the video. @type vid_id: str """ video_info = urllib.urlopen('http://youtube.com/get_video_info?video_id=' + vid_id) video_info = video_info.read() if urlparse.parse_qs(video_info)['status'][0] == 'fail': print( "WARNING: Couldn't get video title. This probably means you specified an invalid URL." ) return None else: return urllib.parse_qs(video_info)['title'][0]
def add_qs_parameter(url, key, value): """ >>> add_qs_parameter('http://localhost', 'foo', 'bar') 'http://localhost?foo=bar' >>> add_qs_parameter('http://localhost/', 'foo', 'bar') 'http://localhost/?foo=bar' >>> add_qs_parameter('http://localhost/example', 'foo', 'bar') 'http://localhost/example?foo=bar' >>> add_qs_parameter('http://localhost/example?test=example', 'foo', 'bar') 'http://localhost/example?test=example&foo=bar' """ url = urlparse(url) query = parse_qs(url.query) query[key] = [value] query_string = urlencode(dict([(k, v[0]) for k, v in query.items()])) return urlunparse((url.scheme, url.netloc, url.path, url.params, query_string, url.fragment))
def __parse_query_string(self, url): if sys.version_info < (3, 0): query = urlparse.parse_qs(urlparse.urlsplit(url).query) else: query = urllib.parse_qs(urllib.urlsplit(url).query) sorted_query = sorted(query.items(), key=lambda item: item[0]) sorted_query_string = '' for (k, v) in sorted_query: if type(v) is list: v.sort() for item in v: sorted_query_string += '&' + self.__urlencode( k) + '=' + self.__urlencode(item) else: sorted_query_string += '&' + self.__urlencode( k) + '=' + self.__urlencode(v) return sorted_query_string[1:]
def parse_url(url): # TODO use "t" parameter to start at a certain time parsed_url = urllib.urlparse(url) url_params = urllib.parse_qs(parsed_url.query) time = None if "t" in url_params: time = url_params["t"][0] if parsed_url.netloc == 'www.youtube.com': if "/v/" in parsed_url.path: youtube_id = parsed_url.path[3:] if "v" in url_params: youtube_id = url_params["v"][0] elif parsed_url.netloc != 'www.youtu.be': youtube_id = parsed_url.path[1:] if youtube_id is None: raise WrongUrlFormatException("Not a valid format") return youtube_id
def add_qs_parameter(url, key, value): """ >>> add_qs_parameter('http://localhost', 'foo', 'bar') 'http://localhost?foo=bar' >>> add_qs_parameter('http://localhost/', 'foo', 'bar') 'http://localhost/?foo=bar' >>> add_qs_parameter('http://localhost/example', 'foo', 'bar') 'http://localhost/example?foo=bar' >>> add_qs_parameter('http://localhost/example?test=example', 'foo', 'bar') 'http://localhost/example?test=example&foo=bar' """ url = urlparse(url) query = parse_qs(url.query) query[key] = [value] query_string = urlencode( dict([ (k,v[0]) for k,v in query.items()]) ) return urlunparse((url.scheme, url.netloc, url.path, url.params, query_string, url.fragment))
def is_dynamic(request): parsed = urllib.urlparse(request) query = urllib.parse_qs(parsed.query) return '_' in query
def extract_item_id(request): parsed = urllib.urlparse(request) query = urllib.parse_qs(parsed.query) return (query.get('item') or [None])[0]