def download_all(self): while True: try: with contextlib.closing(requests.get(self.url, stream = True, headers = self.headers, timeout=self.timeout)) as request: with open(os.path.join(self.directory, self.file_name), "wb") as file: true_count = 1 count = self.chunk total_chunks = int(self.content_lenght / self.chunk) for chunk in request.iter_content(self.chunk): if self.progress_queue is None: print("Downloaded {:10.3f}% from {}".format(count/self.content_lenght*100, self.name), end="\n") else: self.progress_queue.put((self.name, true_count, total_chunks,)) file.write(chunk) true_count += 1 count += self.chunk break except requests.exceptions.Timeout: load = False time.sleep(1) except socket.timeout: load = False time.sleep(1) os.rename( os.path.join(self.directory, self.file_name), os.path.join(self.directory, self.true_file_name) )
def download_image(self, src, website): # building the outpath url_path = urlparse(src).path # Prepend with website.id to make it unique file_name = str(website.id) + "_" + os.path.basename(url_path) request = requests.get(src, stream=True) # Checking if the result was fetched properly if request.status_code != requests.codes.ok: logger.error("Couldn't download image {}".format(src)) return lf = tempfile.NamedTemporaryFile() for block in request.iter_content(1024 * 8): if not block: break lf.write(block) # Saving the image to the database. Also stores it in the folder /media/images image = Image() image.website = website image.image_file.save(file_name, files.File(lf)) # append url to the file self.append_url_to_file(src, website)
def store(url, filename): if SESSION is None: setup() print('Downloading {0}'.format(url)) request = SESSION.get(url, stream=True) with open(filename, 'wb') as file: for chunk in request.iter_content(1024): file.write(chunk)
def _download_part(url, headers, in_queue, progress_queue, timeout, cookies): """ This method is build as an independent downloader partial downloader build for this class. It feeds of an queue and is best run from an independet thread. :param str url: the url from where to download from :param dict headers: the header information for the request :param object in_queue: the input queue :param object progress_queue: the queue where the downloaded parts do to :param int timeout: the timout for the request """ load = True chunk = None with requests.Session() as session: session.mount("https://", MyAdapter()) session.cookies = cookies session.headers.update(headers) while True: if load is True: try: chunk = in_queue.get(True, 0.5) except queue.Empty: break except: raise load = True chunk_size = 1024 header = {"Range" : "bytes={}-{}".format(chunk[0], chunk[1])} try: with contextlib.closing(session.get(url, stream=True, timeout=timeout, verify = r"Drivers/certs.pem", headers = header)) as request: # downloading the needed parts parts = [] for part in request.iter_content(chunk_size): parts.append(part) parts = b"".join(parts) # checking if the parts have all arrived, if not reload the part if len(parts) == int(request.headers["Content-Length"]): progress_queue.put((chunk[0], parts)) else: load = False except requests.exceptions.Timeout: load = False time.sleep(1) except requests.exceptions.ConnectionError: load = False time.sleep(1) except socket.timeout: load = False time.sleep(1) except: raise
def __download_jre(self): # http://download.oracle.com/otn-pub/java/jdk/8u152-b16/aa0333dd3019491ca4f6ddbe78cdb6d0/jdk-8u152-windows-i586.exe # http://download.oracle.com/otn-pub/java/jdk/8u152-b16/aa0333dd3019491ca4f6ddbe78cdb6d0/jdk-8u152-windows-x64.exe # http://download.oracle.com/otn-pub/java/jdk/8u152-b16/aa0333dd3019491ca4f6ddbe78cdb6d0/jdk-8u152-linux-i586.tar.gz # http://download.oracle.com/otn-pub/java/jdk/8u152-b16/aa0333dd3019491ca4f6ddbe78cdb6d0/jdk-8u152-linux-x64.rpm # http://download.oracle.com/otn-pub/java/jdk/8u152-b16/aa0333dd3019491ca4f6ddbe78cdb6d0/jdk-8u152-macosx-x64.dmg version = '8u152' build = 'b16' downloadId = 'aa0333dd3019491ca4f6ddbe78cdb6d0' urlPrefix = 'http://download.oracle.com/otn-pub/java/jdk/{0}-{1}/{2}/jre-{0}-'.format( version, build, downloadId) extension = 'tar.gz' jreOs = self.os.jreOs jreArch = self.arch.jreArch location = _to_storage_location(path.join('jre', version, build)) makedirs(location, exist_ok=True) fileName = '{}-{}.{}'.format(jreOs, jreArch, extension) filePath = path.join(location, fileName) dirName = '{}-{}'.format(jreOs, jreArch) dirPath = path.join(location, dirName) if path.isdir(dirPath): return dirPath url = '{}{}'.format(urlPrefix, fileName) print('Downloading JRE from {}'.format(url)) cookies = dict(gpw_e24='http%3A%2F%2Fwww.oracle.com%2F', oraclelicense='accept-securebackup-cookie') request = requests.get(url, cookies=cookies) with open(filePath, 'wb') as file: for chunk in request.iter_content(1024): file.write(chunk) print('Extracting JRE to {}'.format(dirPath)) with tarfile.open(filePath, 'r') as tar: tar.extractall(path=dirPath) rootName = _common_prefix(tar.getnames()) rootDir = path.join(dirPath, rootName) for name in listdir(rootDir): move(path.join(rootDir, name), path.join(dirPath, name)) rmdir(rootDir) # Delete ._ files found on macOS for walkDirPath, dirs, files in os.walk(dirPath): for walkFileName in files: if '._' in walkFileName: os.remove(os.path.join(walkDirPath, walkFileName)) remove(filePath) return dirPath
def __DownloadJre(self): version = '8u92' urlPrefix = 'http://download.oracle.com/otn-pub/java/jdk/8u92-b14/jre-8u92-' extension = 'tar.gz' if self.config.os == 'macosx': jreOS = 'macosx' elif self.config.os == 'linux': jreOS = 'linux' elif self.config.os == 'win32': jreOS = 'windows' else: raise Exception('Unsupported JRE OS {}'.format(self.config.os)) if self.config.arch == 'x86_64': jreArch = 'x64' elif self.config.arch == 'x86': jreArch = 'i586' else: raise Exception('Unsupported JRE architecture {}'.format(self.config.arch)) location = self.__ToStorageLocation(path.join('jre', version)) makedirs(location, exist_ok=True) fileName = '{}-{}.{}'.format(jreOS, jreArch, extension) filePath = path.join(location, fileName) dirName = '{}-{}'.format(jreOS, jreArch) dirPath = path.join(location, dirName) if path.isdir(dirPath): return dirPath url = '{}{}'.format(urlPrefix, fileName) print('Downloading JRE from {}'.format(url)) cookies = dict(gpw_e24='http%3A%2F%2Fwww.oracle.com%2F', oraclelicense='accept-securebackup-cookie') request = requests.get(url, cookies=cookies) with open(filePath, 'wb') as file: for chunk in request.iter_content(1024): file.write(chunk) print('Extracting JRE to {}'.format(dirPath)) with tarfile.open(filePath, 'r') as tar: tar.extractall(path=dirPath) rootName = CommonPrefix(tar.getnames()) rootDir = path.join(dirPath, rootName) for name in listdir(rootDir): move(path.join(rootDir, name), path.join(dirPath, name)) rmdir(rootDir) remove(filePath) return dirPath
def download(url, path, filename): request = requests.get(url, stream=True) total_size = int(request.headers.get('content-length', 0)) block_size = 1024 wrote = 0 location = os.path.abspath(path) with open(location, 'wb') as file: for data in tqdm(request.iter_content(block_size), desc=filename, total=math.ceil(total_size // block_size), unit='KB', unit_scale=True): file.write(data)
def main(): f = open('test_data.json', encoding='utf-8') res = f.read() data = json.loads(res) for item in data: user = data[item] cases = user["cases"] for case in cases: address = case["case_zip"] dirpath = "D:\\case\\" + case["case_type"] if not os.path.exists(dirpath): os.makedirs(dirpath) name = dirpath + "\\" + urllib.parse.unquote( os.path.basename(case["case_zip"])) request = requests.get(address, stream=True) file = open(name, "wb") for chunk in request.iter_content(chunk_size=1024): if chunk: file.write(chunk) file.flush() file.close()
def downloadupload(user_id): f = open('test_data.json', encoding='utf-8') res = f.read() data = json.loads(res) user = data[user_id] cases = user["cases"] for case in cases: upload_records = case["upload_records"] for upload_record in upload_records: address = upload_record["code_url"] dirpath = "D:\\" + user_id if not os.path.exists(dirpath): os.makedirs(dirpath) name = dirpath + "\\" + urllib.parse.unquote( os.path.basename(upload_record["code_url"])) request = requests.get(address, stream=True) file = open(name, "wb") for chunk in request.iter_content(chunk_size=1024): if chunk: file.write(chunk) file.flush() file.close()
def download(self, path='downloads'): try: headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' } request = requests.get(self.src, headers=headers, timeout=10000, stream=True, verify=False) src = unquote(os.path.basename(self.src)) path = os.path.join(path, src) with open(path, 'wb') as fh: for chunk in request.iter_content(1024 * 1024): fh.write(chunk) self.downloaded = True self.save() except Exception as ex: print(ex)
def file_download(url) -> File: # Steam the image from the url request = requests.get(url, stream=True) # Was the request OK? if request.status_code != requests.codes.ok: raise Exception("Something went wrong") # Create a temporary file lf = tempfile.NamedTemporaryFile() # Read the streamed image in sections for block in request.iter_content(1024 * 8): # If no more file then stop if not block: break # Write image block to temporary file lf.write(block) return files.File(lf)
def http_retrieve(url, filename=None, **kwargs): """ :return: An integer denoting the download status; anything other than 0 denotes an error kwargs: username: Use this username to access the url. password: Use this password to access the url. save_as: 'foobar' Save remote file as 'foobar' (can include path) save_dir: 'foo/bar' Directory to save remote file; if both save_dir and save_as are given, then the local file will be the concatenation of these two, aka os.path.join(save_dir, save_as) fail_error: True/False Throw exception if download fails. By default the function will throw if the download fails """ if filename is None: url, filename = url_split(url) saveas = kwargs['save_as'] if 'save_as' in kwargs else filename if 'save_dir' in kwargs: if not os.path.isdir(kwargs['save_dir']): msg = '[ERROR] retrieve::http_retrieve Directory does not exist {:}'.format( kwargs['save_dir']) raise RuntimeError(msg) saveas = os.path.join(kwargs['save_dir'], saveas) if not 'fail_error' in kwargs: kwargs['fail_error'] = True use_credentials = False if set(['username', 'password']).intersection(set(kwargs)): use_credentials = True username = kwargs['username'] if 'username' in kwargs else '' password = kwargs['password'] if 'password' in kwargs else '' if (not username or username == '') and (not password or password == ''): use_credentials = False target = '{:}/{:}'.format(url, filename) status = 0 if not use_credentials: ## download with no credentials try: ## allow timeout with requests request = requests.get(target, timeout=20, stream=True) if request.status_code == 200: with open(saveas, 'wb') as fh: for chunk in request.iter_content(1024 * 1024): fh.write(chunk) if not os.path.isfile(saveas): status += 1 except: status = 1 else: ## download with credentials (not sure if this works for python 2) try: with requests.get(target, auth=(username, password), timeout=20) as r: r.raise_for_status() if r.status_code == 200: with open(saveas, 'wb') as f: #shutil.copyfileobj(r.raw, f) f.write(r.content) if not os.path.isfile(saveas): status += 1 except: status = 1 if status > 0 and kwargs['fail_error'] == True: msg = '[ERROR] retrieve::http_retrieve Failed to download file {:}'.format( target) raise RuntimeError(msg) return status, target, saveas
root = './' + sys.argv[1][:sys.argv[1].rfind('.')] print(root) if not exists(root): mkdir(root) urls = file.read().split('\n') file.close() # Make the actual request, set the timeout for no data to 10 seconds and enable streaming responses so we don't have to keep the large files in memory # Now for each url, load the image downloads = 0 for url in urls: img_name = url[url.rfind('/') + 1:] try: request = requests.get(url, timeout=3, stream=True) # Open the output file and make sure we write in binary mode with open(join(root, img_name), 'wb') as fh: # Walk through the request response in chunks of 1024 * 1024 bytes, so 1MiB for chunk in request.iter_content(1024 * 1024): # Write the chunk to the file fh.write(chunk) # Optionally we can check here if the download is taking too long downloads += 1 print(f'Downloaded {downloads}') except SystemExit as e: raise except: print('Missing file, continuing...')
with session() as c: #Create a cookie session to login to the protected page page_offset = 0 result_list = [] c.post('https://data.terapeak.com/verify/', payload) while page_offset <= 50: url = "http://data.terapeak.com/?id=0&search=1&view=item_browse&query=iphone+5c&date=2014-09-31&date_range=3&buyer_country_id=1&condition=rollup_3&type%5Bfixed%5D=1&from_start_price=100&to_start_price=1000&from_end_price=100&to_end_price=1000&seller_country_id=1&numPages=18&siteID=0&offset={0}".format( page_offset) #print (url) request = c.get(url) #print (request.headers) #print (request.text) if not request.ok: print("error") # Something went wrong for block in request.iter_content(1024): if not block: break result_list.append(block) page_offset += 25 #print (page_offset) #print (result_list) end_data = ','.join([i.decode() for i in result_list]) with open("baby.txt", 'w') as text_file: text_file.write(end_data.strip()) print("It's done d00dz")