def embedded_fetch(url): name = utils.file_name_from_url(url) path, handle = utils.file_path_and_handle(self.storage_dir, name) body = urllib2.urlopen(url).read() handle.write(body) handle.close() return name
def fetch_multiple(self, url_list): total_bytes_written = 0 # literally to keep pycharm from complaining about an unused 'i' files = [] for url in url_list: file_name = utils.file_name_from_url(url) file_path = os.path.abspath(os.path.join(self.storage_dir, file_name)) files.append(file_path) for i in fetch_url_generator(url, file_path, BUFFER_SIZE): total_bytes_written += i return files
def fetch_multiple(self, url_list): total_bytes_written = 0 # literally to keep pycharm from complaining about an unused 'i' files = [] for url in url_list: file_name = utils.file_name_from_url(url) file_path = os.path.abspath( os.path.join(self.storage_dir, file_name)) files.append(file_path) for i in fetch_url_generator(url, file_path, BUFFER_SIZE): total_bytes_written += i return files
def fetch_multiple(self, url_list): files = [] generators = [] for url in url_list: file_name = utils.file_name_from_url(url) file_path = os.path.abspath(os.path.join(self.storage_dir, file_name)) files.append(file_name) generators.append(fetch_url_generator(url, file_path, BUFFER_SIZE)) loop = DownloadGeneratorLoop(generators, MAX_CONCURRENT) loop.loop() return files
def fetch_multiple(self, url_list): files = [] generators = [] for url in url_list: file_name = utils.file_name_from_url(url) file_path = os.path.abspath( os.path.join(self.storage_dir, file_name)) files.append(file_name) generators.append(fetch_url_generator(url, file_path, BUFFER_SIZE)) loop = DownloadGeneratorLoop(generators, MAX_CONCURRENT) loop.loop() return files
def fetch_multiple(self, url_list): multi_handle = pycurl.CurlMulti() requests = [] files = [] for url in url_list: file_name = utils.file_name_from_url(url) file_path, file_handle = utils.file_path_and_handle(self.storage_dir, file_name) files.append(file_path) easy_handle = pycurl.Curl() easy_handle.setopt(pycurl.URL, url) easy_handle.setopt(pycurl.WRITEFUNCTION, file_handle.write) req = (url, file_handle, easy_handle) multi_handle.add_handle(req[2]) requests.append(req) num_handles = len(requests) while num_handles: ret = multi_handle.select(SELECT_TIMEOUT) if ret == -1: continue while True: ret, num_handles = multi_handle.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break for req in requests: req[1].close() return files
def requests_from_urls(storage_dir, url_list): return [DownloadRequest(url, os.path.join(storage_dir, demo_utils.file_name_from_url(url))) for url in url_list]