Пример #1
0
 def download_all(self):
     while True:
         try:
             with contextlib.closing(requests.get(self.url, stream = True, headers = self.headers, timeout=self.timeout)) as request:
                 with open(os.path.join(self.directory, self.file_name), "wb") as file:
                     true_count = 1
                     count = self.chunk
                     total_chunks = int(self.content_lenght / self.chunk)
                     for chunk in request.iter_content(self.chunk):
                         if self.progress_queue is None:
                             print("Downloaded {:10.3f}% from {}".format(count/self.content_lenght*100, self.name), end="\n")
                         else:
                             self.progress_queue.put((self.name, true_count, total_chunks,))
                             
                         file.write(chunk)
                         true_count += 1
                         count += self.chunk
             break
         except requests.exceptions.Timeout:
             load = False
             time.sleep(1)
         except socket.timeout:
             load = False
             time.sleep(1)   
         
     os.rename(
         os.path.join(self.directory, self.file_name), 
         os.path.join(self.directory, self.true_file_name)
         )
Пример #2
0
    def download_image(self, src, website):
        # building the outpath
        url_path = urlparse(src).path

        # Prepend with website.id to make it unique
        file_name = str(website.id) + "_" + os.path.basename(url_path)
        request = requests.get(src, stream=True)

        # Checking if the result was fetched properly
        if request.status_code != requests.codes.ok:
            logger.error("Couldn't download image {}".format(src))
            return

        lf = tempfile.NamedTemporaryFile()
        for block in request.iter_content(1024 * 8):
            if not block:
                break
            lf.write(block)

        # Saving the image to the database. Also stores it in the folder /media/images
        image = Image()
        image.website = website
        image.image_file.save(file_name, files.File(lf))

        # append url to the file
        self.append_url_to_file(src, website)
Пример #3
0
def store(url, filename):
    if SESSION is None:
        setup()
    print('Downloading {0}'.format(url))
    request = SESSION.get(url, stream=True)
    with open(filename, 'wb') as file:
        for chunk in request.iter_content(1024):
            file.write(chunk)
Пример #4
0
 def _download_part(url, headers, in_queue, progress_queue, timeout, cookies):
     """
     This method is build as an independent downloader partial downloader 
     build for this class. It feeds of an queue and is best run from an 
     independet thread.
     
     :param str url: the url from where to download from
     :param dict headers: the header information for the request
     :param object in_queue: the input queue 
     :param object progress_queue: the queue where the downloaded parts do to
     :param int timeout: the timout for the request 
     """
     load = True
     chunk = None
     with requests.Session() as session:
         session.mount("https://", MyAdapter())
         session.cookies = cookies
         session.headers.update(headers)
         while True:
             if load is True:
                 try:
                     chunk = in_queue.get(True, 0.5)
                 except queue.Empty:
                     break
                 except:
                     raise
             
             load = True
             chunk_size = 1024 
             header = {"Range" : "bytes={}-{}".format(chunk[0], chunk[1])}
             
             try:
                 with contextlib.closing(session.get(url, stream=True, timeout=timeout, verify = r"Drivers/certs.pem", headers = header)) as request:
                     # downloading the needed parts
                     parts = []
                     for part in request.iter_content(chunk_size):
                         parts.append(part)
                     parts = b"".join(parts)
                     
                     # checking if the parts have all arrived, if not reload the part
                     if len(parts) == int(request.headers["Content-Length"]):
                         progress_queue.put((chunk[0], parts))  
                     else:
                         load = False
                         
             except requests.exceptions.Timeout:
                 load = False
                 time.sleep(1)
             except requests.exceptions.ConnectionError:
                 load = False
                 time.sleep(1)
             except socket.timeout:
                 load = False
                 time.sleep(1)          
             except:
                 raise
Пример #5
0
    def __download_jre(self):
        # http://download.oracle.com/otn-pub/java/jdk/8u152-b16/aa0333dd3019491ca4f6ddbe78cdb6d0/jdk-8u152-windows-i586.exe
        # http://download.oracle.com/otn-pub/java/jdk/8u152-b16/aa0333dd3019491ca4f6ddbe78cdb6d0/jdk-8u152-windows-x64.exe
        # http://download.oracle.com/otn-pub/java/jdk/8u152-b16/aa0333dd3019491ca4f6ddbe78cdb6d0/jdk-8u152-linux-i586.tar.gz
        # http://download.oracle.com/otn-pub/java/jdk/8u152-b16/aa0333dd3019491ca4f6ddbe78cdb6d0/jdk-8u152-linux-x64.rpm
        # http://download.oracle.com/otn-pub/java/jdk/8u152-b16/aa0333dd3019491ca4f6ddbe78cdb6d0/jdk-8u152-macosx-x64.dmg

        version = '8u152'
        build = 'b16'
        downloadId = 'aa0333dd3019491ca4f6ddbe78cdb6d0'
        urlPrefix = 'http://download.oracle.com/otn-pub/java/jdk/{0}-{1}/{2}/jre-{0}-'.format(
            version, build, downloadId)
        extension = 'tar.gz'
        jreOs = self.os.jreOs
        jreArch = self.arch.jreArch

        location = _to_storage_location(path.join('jre', version, build))
        makedirs(location, exist_ok=True)

        fileName = '{}-{}.{}'.format(jreOs, jreArch, extension)
        filePath = path.join(location, fileName)

        dirName = '{}-{}'.format(jreOs, jreArch)
        dirPath = path.join(location, dirName)

        if path.isdir(dirPath):
            return dirPath

        url = '{}{}'.format(urlPrefix, fileName)
        print('Downloading JRE from {}'.format(url))
        cookies = dict(gpw_e24='http%3A%2F%2Fwww.oracle.com%2F',
                       oraclelicense='accept-securebackup-cookie')
        request = requests.get(url, cookies=cookies)
        with open(filePath, 'wb') as file:
            for chunk in request.iter_content(1024):
                file.write(chunk)

        print('Extracting JRE to {}'.format(dirPath))
        with tarfile.open(filePath, 'r') as tar:
            tar.extractall(path=dirPath)
            rootName = _common_prefix(tar.getnames())
            rootDir = path.join(dirPath, rootName)
            for name in listdir(rootDir):
                move(path.join(rootDir, name), path.join(dirPath, name))
            rmdir(rootDir)

        # Delete ._ files found on macOS
        for walkDirPath, dirs, files in os.walk(dirPath):
            for walkFileName in files:
                if '._' in walkFileName:
                    os.remove(os.path.join(walkDirPath, walkFileName))

        remove(filePath)

        return dirPath
Пример #6
0
  def __DownloadJre(self):
    version = '8u92'
    urlPrefix = 'http://download.oracle.com/otn-pub/java/jdk/8u92-b14/jre-8u92-'

    extension = 'tar.gz'

    if self.config.os == 'macosx':
      jreOS = 'macosx'
    elif self.config.os == 'linux':
      jreOS = 'linux'
    elif self.config.os == 'win32':
      jreOS = 'windows'
    else:
      raise Exception('Unsupported JRE OS {}'.format(self.config.os))

    if self.config.arch == 'x86_64':
      jreArch = 'x64'
    elif self.config.arch == 'x86':
      jreArch = 'i586'
    else:
      raise Exception('Unsupported JRE architecture {}'.format(self.config.arch))

    location = self.__ToStorageLocation(path.join('jre', version))
    makedirs(location, exist_ok=True)

    fileName = '{}-{}.{}'.format(jreOS, jreArch, extension)
    filePath = path.join(location, fileName)

    dirName = '{}-{}'.format(jreOS, jreArch)
    dirPath = path.join(location, dirName)

    if path.isdir(dirPath):
      return dirPath

    url = '{}{}'.format(urlPrefix, fileName)
    print('Downloading JRE from {}'.format(url))
    cookies = dict(gpw_e24='http%3A%2F%2Fwww.oracle.com%2F', oraclelicense='accept-securebackup-cookie')
    request = requests.get(url, cookies=cookies)
    with open(filePath, 'wb') as file:
      for chunk in request.iter_content(1024):
        file.write(chunk)

    print('Extracting JRE to {}'.format(dirPath))
    with tarfile.open(filePath, 'r') as tar:
      tar.extractall(path=dirPath)
      rootName = CommonPrefix(tar.getnames())
      rootDir = path.join(dirPath, rootName)
      for name in listdir(rootDir):
        move(path.join(rootDir, name), path.join(dirPath, name))
      rmdir(rootDir)

    remove(filePath)

    return dirPath
Пример #7
0
def download(url, path, filename):
    request = requests.get(url, stream=True)
    total_size = int(request.headers.get('content-length', 0))
    block_size = 1024
    wrote = 0

    location = os.path.abspath(path)
    with open(location, 'wb') as file:
        for data in tqdm(request.iter_content(block_size),
                         desc=filename,
                         total=math.ceil(total_size // block_size),
                         unit='KB',
                         unit_scale=True):
            file.write(data)
Пример #8
0
def main():
    f = open('test_data.json', encoding='utf-8')
    res = f.read()
    data = json.loads(res)
    for item in data:
        user = data[item]
        cases = user["cases"]
        for case in cases:
            address = case["case_zip"]
            dirpath = "D:\\case\\" + case["case_type"]
            if not os.path.exists(dirpath):
                os.makedirs(dirpath)
            name = dirpath + "\\" + urllib.parse.unquote(
                os.path.basename(case["case_zip"]))
            request = requests.get(address, stream=True)
            file = open(name, "wb")
            for chunk in request.iter_content(chunk_size=1024):
                if chunk:
                    file.write(chunk)
                    file.flush()
            file.close()
Пример #9
0
def downloadupload(user_id):
    f = open('test_data.json', encoding='utf-8')
    res = f.read()
    data = json.loads(res)
    user = data[user_id]
    cases = user["cases"]
    for case in cases:
        upload_records = case["upload_records"]
        for upload_record in upload_records:
            address = upload_record["code_url"]
            dirpath = "D:\\" + user_id
            if not os.path.exists(dirpath):
                os.makedirs(dirpath)
            name = dirpath + "\\" + urllib.parse.unquote(
                os.path.basename(upload_record["code_url"]))
            request = requests.get(address, stream=True)
            file = open(name, "wb")
            for chunk in request.iter_content(chunk_size=1024):
                if chunk:
                    file.write(chunk)
                    file.flush()
            file.close()
Пример #10
0
    def download(self, path='downloads'):
        try:
            headers = {
                'User-Agent':
                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
            }
            request = requests.get(self.src,
                                   headers=headers,
                                   timeout=10000,
                                   stream=True,
                                   verify=False)
            src = unquote(os.path.basename(self.src))
            path = os.path.join(path, src)

            with open(path, 'wb') as fh:
                for chunk in request.iter_content(1024 * 1024):
                    fh.write(chunk)

            self.downloaded = True
            self.save()
        except Exception as ex:
            print(ex)
Пример #11
0
def file_download(url) -> File:
    # Steam the image from the url
    request = requests.get(url, stream=True)

    # Was the request OK?
    if request.status_code != requests.codes.ok:
        raise Exception("Something went wrong")

    # Create a temporary file
    lf = tempfile.NamedTemporaryFile()

    # Read the streamed image in sections
    for block in request.iter_content(1024 * 8):

        # If no more file then stop
        if not block:
            break

        # Write image block to temporary file
        lf.write(block)

    return files.File(lf)
Пример #12
0
def http_retrieve(url, filename=None, **kwargs):
    """
    :return: An integer denoting the download status; anything other than 0 
             denotes an error

    kwargs:
    username:  Use this username to access the url.
    password:  Use this password to access the url.
    save_as:  'foobar' Save remote file as 'foobar' (can include path)
    save_dir: 'foo/bar' Directory to save remote file; if both save_dir and 
               save_as are given, then the local file will be the concatenation
               of these two, aka os.path.join(save_dir, save_as)
    fail_error: True/False Throw exception if download fails. By default
               the function will throw if the download fails
  """
    if filename is None:
        url, filename = url_split(url)
    saveas = kwargs['save_as'] if 'save_as' in kwargs else filename
    if 'save_dir' in kwargs:
        if not os.path.isdir(kwargs['save_dir']):
            msg = '[ERROR] retrieve::http_retrieve Directory does not exist {:}'.format(
                kwargs['save_dir'])
            raise RuntimeError(msg)
        saveas = os.path.join(kwargs['save_dir'], saveas)
    if not 'fail_error' in kwargs:
        kwargs['fail_error'] = True

    use_credentials = False
    if set(['username', 'password']).intersection(set(kwargs)):
        use_credentials = True
        username = kwargs['username'] if 'username' in kwargs else ''
        password = kwargs['password'] if 'password' in kwargs else ''
        if (not username or username == '') and (not password or password == ''):
            use_credentials = False

    target = '{:}/{:}'.format(url, filename)

    status = 0
    if not use_credentials:  ## download with no credentials
        try:
            ## allow timeout with requests
            request = requests.get(target, timeout=20, stream=True)
            if request.status_code == 200:
              with open(saveas, 'wb') as fh:
                  for chunk in request.iter_content(1024 * 1024):
                      fh.write(chunk)

            if not os.path.isfile(saveas):
                status += 1
        except:
            status = 1
    else:  ## download with credentials (not sure if this works for python 2)
        try:
            with requests.get(target, auth=(username, password), timeout=20) as r:
                r.raise_for_status()
                if r.status_code == 200:
                  with open(saveas, 'wb') as f:
                      #shutil.copyfileobj(r.raw, f)
                      f.write(r.content)
                if not os.path.isfile(saveas):
                    status += 1
        except:
            status = 1

    if status > 0 and kwargs['fail_error'] == True:
        msg = '[ERROR] retrieve::http_retrieve Failed to download file {:}'.format(
            target)
        raise RuntimeError(msg)

    return status, target, saveas
Пример #13
0
root = './' + sys.argv[1][:sys.argv[1].rfind('.')]
print(root)
if not exists(root):
    mkdir(root)

urls = file.read().split('\n')

file.close()

# Make the actual request, set the timeout for no data to 10 seconds and enable streaming responses so we don't have to keep the large files in memory

# Now for each url, load the image
downloads = 0
for url in urls:
    img_name = url[url.rfind('/') + 1:]
    try:
        request = requests.get(url, timeout=3, stream=True)
        # Open the output file and make sure we write in binary mode
        with open(join(root, img_name), 'wb') as fh:
            # Walk through the request response in chunks of 1024 * 1024 bytes, so 1MiB
            for chunk in request.iter_content(1024 * 1024):
                # Write the chunk to the file
                fh.write(chunk)
                # Optionally we can check here if the download is taking too long
        downloads += 1
        print(f'Downloaded {downloads}')
    except SystemExit as e:
        raise
    except:
        print('Missing file, continuing...')
with session() as c:  #Create a cookie session to login to the protected page
    page_offset = 0
    result_list = []
    c.post('https://data.terapeak.com/verify/', payload)
    while page_offset <= 50:
        url = "http://data.terapeak.com/?id=0&search=1&view=item_browse&query=iphone+5c&date=2014-09-31&date_range=3&buyer_country_id=1&condition=rollup_3&type%5Bfixed%5D=1&from_start_price=100&to_start_price=1000&from_end_price=100&to_end_price=1000&seller_country_id=1&numPages=18&siteID=0&offset={0}".format(
            page_offset)
        #print (url)
        request = c.get(url)
        #print (request.headers)
        #print (request.text)

        if not request.ok:
            print("error")
            # Something went wrong

        for block in request.iter_content(1024):
            if not block:
                break

            result_list.append(block)
        page_offset += 25
        #print (page_offset)
        #print (result_list)
        end_data = ','.join([i.decode() for i in result_list])

    with open("baby.txt", 'w') as text_file:
        text_file.write(end_data.strip())

print("It's done d00dz")