Exemplo n.º 1
0
def ham_download_image_gg(filePath, folder_save, keyword, number_scroll):
    nameFile = "-".join(keyword.split(" "))
    driver = initDriver(filePath)
    driver.get('https://www.google.com/')
    driver.get('https://www.google.com/search?tbm=isch&q=' +
               keyword.replace(" ", "+"))
    time.sleep(10)

    for i in range(number_scroll):
        driver.execute_script("window.scrollBy(0,0.7*window.innerHeight);")
        time.sleep(10)

    #lay link truc tiep cua anh
    list_elements = driver.find_elements_by_css_selector('a[jsname="hSRGPd"]')
    list_href = []
    for image in list_elements[3:]:
        href = image.get_attribute('href')
        list_href.append(href)
    print('len(list_href)', len(list_href))
    list_src = []

    #tach url chuoi va download
    pos = 0
    for href in list_href:
        print(pos, href)
        try:
            href = urllib.parse_qs(href)
            href = href['https://www.google.com/imgres?imgurl'][0]
            print('href', href)
            #input(conan)
            downloadImg(href, nameFile + '-' + str(pos), folder_save)
        except:
            print('error at %s' % pos)
        pos += 1
Exemplo n.º 2
0
 def authenticate(self, host):
     output = None
     try:
         start = self.startup(host)
         if not start.url: return None
         query = parse.parse_qs(parse.urlparse(start.url).query)
         if 'client_id' in query.keys(): cid = query['client_id']
         else: return None
         uri = '/dialog/authorize'
         payload = {'redirect_uri':'https://cms.lingotek.com/tms-ui/html/portal/sso_redirect.html','response_type':'token','client_id':cid}
         # r = requests.get(host + uri, headers={'Host': 'cmssso.lingotek.com', 'Referer': 'https://cmssso.lingotek.com/login', 'Cache-Control':'max-age=0', 'Upgrade-Insecure-Requests':'1', 'Cookie':'__ctmid=58220c510010e8c8dc704410; _gat=1; _ga=GA1.2.831256021.1467748163; connect.sid=s%3AxU6QRRV9jDVSX3SeYAOElBOI1Y5HdMRK.yU%2FTgKno2PqlKGljl50dJ8HarhRUT71zT0rF6aniDvw'}, data=payload)
         # r = requests.get(host + uri, headers={'Cookie':'connect.sid=s%3Aq4dTUpbJVb8uIgbM7s2T0txtHR6qpkhE.5dFEBdjsPtlcDGgG9MO9yNQMhyrkMpJVjhLH84J2mKI'}, params=payload)
         r = requests.get(host + uri, headers={'Cookie': self.cookie}, params=payload)
         log_api('GET', uri, r)
         # r = requests.get(host + uri, headers=self.headers, params=payload)
         fragment = parse.parse_qs(parse.urlparse(r.url).fragment)
         if 'access_token' in fragment.keys() and len(fragment['access_token']) > 0: return fragment['access_token'][0]
         else: return None
     except Exception as e:
         print("authenticate", e)
         self.handleError()
         return None
Exemplo n.º 3
0
def get_title(vid_id):
    """
    Get title of video from ID.
    @param vid_id: YouTube ID for the video.
    @type vid_id: str
    """

    video_info = urllib.urlopen('http://youtube.com/get_video_info?video_id=' +
                                vid_id)
    video_info = video_info.read()
    if urlparse.parse_qs(video_info)['status'][0] == 'fail':
        print(
            "WARNING: Couldn't get video title. This probably means you specified an invalid URL."
        )
        return None
    else:
        return urllib.parse_qs(video_info)['title'][0]
Exemplo n.º 4
0
def add_qs_parameter(url, key, value):
    """
    >>> add_qs_parameter('http://localhost', 'foo', 'bar')
    'http://localhost?foo=bar'
    >>> add_qs_parameter('http://localhost/', 'foo', 'bar')
    'http://localhost/?foo=bar'
    >>> add_qs_parameter('http://localhost/example', 'foo', 'bar')
    'http://localhost/example?foo=bar'
    >>> add_qs_parameter('http://localhost/example?test=example', 'foo', 'bar')
    'http://localhost/example?test=example&foo=bar'
    """

    url = urlparse(url)
    query = parse_qs(url.query)
    query[key] = [value]
    query_string = urlencode(dict([(k, v[0]) for k, v in query.items()]))

    return urlunparse((url.scheme, url.netloc, url.path, url.params,
                       query_string, url.fragment))
Exemplo n.º 5
0
    def __parse_query_string(self, url):
        if sys.version_info < (3, 0):
            query = urlparse.parse_qs(urlparse.urlsplit(url).query)
        else:
            query = urllib.parse_qs(urllib.urlsplit(url).query)

        sorted_query = sorted(query.items(), key=lambda item: item[0])
        sorted_query_string = ''
        for (k, v) in sorted_query:
            if type(v) is list:
                v.sort()
                for item in v:
                    sorted_query_string += '&' + self.__urlencode(
                        k) + '=' + self.__urlencode(item)
            else:
                sorted_query_string += '&' + self.__urlencode(
                    k) + '=' + self.__urlencode(v)

        return sorted_query_string[1:]
Exemplo n.º 6
0
def parse_url(url):
    # TODO use "t" parameter to start at a certain time
    parsed_url = urllib.urlparse(url)
    url_params = urllib.parse_qs(parsed_url.query)
    time = None
    if "t" in url_params:
        time = url_params["t"][0]

    if parsed_url.netloc == 'www.youtube.com':
        if "/v/" in parsed_url.path:
            youtube_id = parsed_url.path[3:]
        if "v" in url_params:
            youtube_id = url_params["v"][0]

    elif parsed_url.netloc != 'www.youtu.be':
        youtube_id = parsed_url.path[1:]

    if youtube_id is None:
        raise WrongUrlFormatException("Not a valid format")

    return youtube_id
Exemplo n.º 7
0
def add_qs_parameter(url, key, value):
    """
    >>> add_qs_parameter('http://localhost', 'foo', 'bar')
    'http://localhost?foo=bar'
    >>> add_qs_parameter('http://localhost/', 'foo', 'bar')
    'http://localhost/?foo=bar'
    >>> add_qs_parameter('http://localhost/example', 'foo', 'bar')
    'http://localhost/example?foo=bar'
    >>> add_qs_parameter('http://localhost/example?test=example', 'foo', 'bar')
    'http://localhost/example?test=example&foo=bar'
    """

    url = urlparse(url)
    query = parse_qs(url.query)
    query[key] = [value]
    query_string = urlencode( dict([ (k,v[0]) for k,v in query.items()]) )

    return urlunparse((url.scheme,
                       url.netloc,
                       url.path,
                       url.params,
                       query_string,
                       url.fragment))
def is_dynamic(request):
    parsed = urllib.urlparse(request)
    query = urllib.parse_qs(parsed.query)
    return '_' in query
def extract_item_id(request):
    parsed = urllib.urlparse(request)
    query = urllib.parse_qs(parsed.query)
    return (query.get('item') or [None])[0]