def download_entry(self, entry, path): uid = get_safe_local_id(entry.url) new_path = os.path.join(path, uid) download_file(entry.url, new_path) relative_path = get_relative_url(new_path) entry.local_paths = [relative_path] entry.downloaded = True
def make_copy(self, url_to_copy, binary=False): destination_url = get_local_url(self.output_url, url_to_copy) try: ensure_path_exists(destination_url) except: raise Exception('Could not make copy of {0} in {1}'.format( url_to_copy, destination_url)) download_file(url_to_copy, get_path_from_url(destination_url), force=False, binary=binary) return destination_url
def download_entry(self, entry, path): local_paths = [] next_url = entry.url page_id = 0 while next_url is not None: uid = get_safe_local_id(next_url, '_page{0}'.format(page_id)) new_path = os.path.join(path, uid) download_file(next_url, new_path) relative_path = get_relative_url(new_path) local_paths.append(relative_path) tree = download_html_tree(new_path) page_id += 1 next_url = self._get_next_entry_url(next_url, page_id, tree) entry.downloaded = True entry.local_paths = local_paths
def download_entry(self, entry, path): local_paths = [] next_url = entry.url page_id = 0 while next_url is not None: uid = get_safe_local_id(next_url, "_page{0}".format(page_id)) new_path = os.path.join(path, uid) download_file(next_url, new_path) relative_path = get_relative_url(new_path) local_paths.append(relative_path) tree = download_html_tree(new_path) page_id += 1 next_url = self._get_next_entry_url(next_url, page_id, tree) entry.downloaded = True entry.local_paths = local_paths