def __init__(self): self.model = None self.output_size = -1 if not os.path.exists(img2emo_checkpoint): print("Downloading image to emotion classifier...") download( "https://www.dropbox.com/s/8dfj3b36q15iieo/best_model.pt?dl=1", img2emo_checkpoint) if not os.path.exists(speaker_checkpoint): print("Downloading emotion-grounded speaker...") path, _ = download( "https://www.dropbox.com/s/0erh464wag8ods1/emo_grounded_sat_speaker_cvpr21.zip?dl=1", speaker_checkpoint) with zipfile.ZipFile(path, "r") as f: f.extractall("cache/") shutil.move("cache/03-17-2021-20-32-19/checkpoints/best_model.pt", speaker_checkpoint) shutil.move("cache/03-17-2021-20-32-19/config.json.txt", speaker_saved_args) shutil.rmtree("cache/03-17-2021-20-32-19") self.normalize = tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
def prepare(self): if "version" in self.settings: version = self.settings["version"] download(self.url % (version, version), self.zipfile) unzip(self.zipfile, 'temp') else: git_clone(self.repo, 'master', 'src')
def find_threads(course, forum_folder, forum_id): """ Find all threads in current forum. Note: forum 0 has every thread! """ # download the 1st page of given forum query = 'sort=firstposted&page=1' url = '{}/api/forum/forums/{}/threads?{}' url = url.format(course.get_url(), forum_id, query) path = forum_folder + '/temp.json' util.download(url, path, course.get_cookie_file()) # download a huge page with all threads forum = util.read_json(path) num_threads = forum['total_threads'] url += '&page_size={}'.format(num_threads) util.download(url, path, course.get_cookie_file()) # add each thread's id to forum info threads = util.read_json(path)['threads'] util.remove(path) path = forum_folder + '/info.json' forum = util.read_json(path) forum_threads = [] for thread in reversed(threads): forum_threads.append({'id': thread['id']}) forum['num_threads'] = num_threads forum['threads'] = forum_threads util.write_json(path, forum)
def fetch_file(filename, path, patchline='live', platform='mac', region='NA'): """ fetches a file from the game client """ rman = PatcherManifest(download(patchline, platform, region)) file = rman.files[filename] bundle_ids = {} for chunk in file.chunks: bundle_ids[chunk.bundle.bundle_id] = True bundle_ids = list(bundle_ids.keys()) for bundle_id in bundle_ids: name = f'{bundle_id:016X}.bundle' url = os.path.join(constants.riotcdn_url, 'channels', 'public', 'bundles', name) util.download(url, os.path.join(bundle_dir, name)) f = open(path, 'wb') for chunk in file.chunks: bundle_id = chunk.bundle.bundle_id bundle = open(os.path.join(bundle_dir, f'{bundle_id:016X}.bundle'), 'rb') bundle.seek(chunk.offset) f.write(zstd.decompress(bundle.read(chunk.size))) bundle.close() f.close()
def download(patchline='live', platform='mac', region='NA'): """ downloads a manifest file """ manifest_path = os.path.join( manifest_dir, '{region}.manifest'.format(region=region.lower())) if os.path.exists(manifest_path): return manifest_path data = patchlines_conf patchline_key = 'keystone.products.league_of_legends.patchlines.' + patchline if patchline_key not in data: print("patchline '{patchline}' not found".format(patchline)) exit(1) data = data[patchline_key]['platforms'] if platform not in data: print("platform '{platform}' not found".format(platform)) exit(1) data = data[platform]['configurations'] data = list(filter(lambda x: x['id'].lower() == region.lower(), data)) if len(data) != 1: print("region '{region}' not found".format(platform)) exit(1) data = list( filter(lambda x: x['id'].lower() == 'game_patch', data[0]['secondary_patchlines'])) util.download(data[0]['url'], manifest_path) return manifest_path
def build_files(): try: kwargs = {'top': folderid, 'by_name': False} for path, root, dirs, files in util.walk(service, **kwargs): path = [ "".join([ c for c in dirname if c.isalpha() or c.isdigit() or c == ' ' ]).rstrip() for dirname in path ] for f in files: dest = os.path.join(destination, os.path.join(*path)) file_dest.append((service, f, dest, skip)) if file_dest != []: # First valid account found, break to prevent further searches return True except ValueError: # mimetype is not a folder dlfile = service.files().get(fileId=folderid, supportsAllDrives=True).execute() print( f"\nNot a valid folder ID. \nDownloading the file : {dlfile['name']}" ) # Only use a single process for downloading 1 file util.download(service, dlfile, destination, skip) sys.exit(0) except HttpError: print( f"{Fore.RED}File not found in account: {acc}{Style.RESET_ALL}") return False
def download_cache(directory, url): if not len(glob(f"{directory}/*")) > 0: os.makedirs(directory, exist_ok=True) tgz_name = f"{directory}.tgz" download(url, tgz_name) extract_tgz(tgz_name, directory) os.remove(tgz_name)
def download_thread(course, threads_folder, thread_id, page=1, post_id=None): """ Download a thread. """ # Download 1st page url = '{}/api/forum/threads/{}'.format(course.get_url(), thread_id) if post_id: url = '{}?post_id={}&position=after'.format(url, post_id) path = '{}/{}/{}.json'.format(threads_folder, thread_id, page) util.download(url, path, course.get_cookie_file()) thread = util.read_json(path) download_images(course, threads_folder, thread) util.write_json(path, thread) # Download rest pages page = thread['start_page'] num_page = thread['num_pages'] if page < num_page: page += 1 print 'thread page {}/{}'.format(page, num_page) post_id = get_next_post_id(thread['posts']) if post_id: download_thread(course, threads_folder, thread_id, page, post_id)
def _find_files(url, folder, cookie): """ Recursively find all files in current page. :param url: A URL to given page. :param folder: A destination folder for this page. :param cookie: A cookie file used for downloading. :return: A list of files (URL, path) in current page. """ files = [] path = '{}/temp.html'.format(folder) util.download(url, path, cookie) page = util.read_file(path) util.remove(path) # recursively find all files in sub-folders pattern = r'<tr><td colspan="4"><a href="(.*?)">(.*?)</a>' for find in re.finditer(pattern, page, re.DOTALL): url = find.group(1) sub_folder = '{}/{}'.format(folder, find.group(2)) files += _find_files(url, sub_folder, cookie) # find all files in this page pattern = r'<tr><td>(.*?)</td>.*?Embed.*?<a href="(.*?)\?.*?">Download</a>' for find in re.finditer(pattern, page, re.DOTALL): url = find.group(2) file_name = find.group(1) path = u'{}/{}'.format(folder, file_name) files.append((url, path)) return files
def download_images(course, threads_folder, thread): """ Download images in given thread. The given thread object will be mutated. """ posts = thread['posts'] comments = thread['comments'] thread_id = thread['id'] thread_page = thread['start_page'] images = [] last_post_is_full = False for post in reversed(posts): if 'post_text' in post: text = post['post_text'] text = find_images(text, images, thread_id, thread_page) post['post_text'] = text last_post_is_full = True elif last_post_is_full: break for comment in comments: text = comment['comment_text'] text = find_images(text, images, thread_id, thread_page) comment['comment_text'] = text for url, path in images: path = '{}/{}'.format(threads_folder, path) util.download(url, path, course.get_cookie_file(), resume=True)
def get_languages_by_patch(patch): """ fetches the languages by patch from CommunityDragon """ if (len(lang_cache.keys()) > 0): return lang_cache tmp_path = tempfile.mkdtemp() data = requests.get( '{base}/json/{patch}/{path}'.format(base=constants.cdragon_url, patch=patch, path='game/data/menu')).json() data = list(filter(lambda x: x['name'].startswith('fontconfig_'), data)) for _, item in enumerate(data): if item['name'] != 'fontconfig_en_us.txt': continue export_path = os.path.join(tmp_path, item['name']) util.download( '{base}/{patch}/{path}'.format( base=constants.cdragon_url, patch=patch, path='game/data/menu/' + item['name'], ), export_path) country, lang = item['name'].replace('fontconfig_', '').replace('.txt', '').split('_') lang_cache[country + '_' + lang.upper()] = RstFile(export_path) return lang_cache
def _download_old_quizzes(course, item, path): """ Download old version in-video quizzes. """ url = '{}/admin/quiz/quiz_load?quiz_id={}' url = url.format(course.get_url(), item['quiz']['parent_id']) util.download(url, path, course.get_cookie_file()) util.write_json(path, util.read_json(path))
def download(untar_dir): valgrind_release_url = VALGRIND_URL_FORMAT.replace('VER', VALGRIND_VER) util.download(valgrind_release_url) valgrind_tarball = 'valgrind-VER.tar.bz2'.replace('VER', VALGRIND_VER) util.untar(valgrind_tarball) shutil.move('valgrind-VER'.replace('VER', VALGRIND_VER), untar_dir)
def download_stats(self): url = self.url + '/data/stats' path = self.info_folder + '/stats.html' util.download(url, path, self.cookie_file) content = util.read_file(path) pattern = r'<h1.*?</table>' content = re.search(pattern, content, re.DOTALL).group(0) util.write_file(path, content)
def _check_pyparsing(self): if self.check_module("pyparsing"): return url, name = URLS['pyparsing'] util.download(url, name) self.run_ez(name)
def _check_setuptools(self): if self.check_module("setuptools"): return url, name = URLS['ez_setup'] util.download(url) self.run_py(name)
def prepare(self): if "version" in self.settings: version = self.settings["version"] download(self.url % (version), self.zipfile) unzip(self.zipfile, 'temp') cp('temp/variant-%s/' % (version), 'temp/') # TODO: mv would be cleaner else: git_clone(self.repo, 'master', 'temp')
def __init__(self, split): assert(split in MNIST.urls) # check if downloads exist, and download otherwise image_file = util.download(MNIST.urls[split]['image']) label_file = util.download(MNIST.urls[split]['label']) # parse mats and read into tf.data.Dataset self.x, self.y = self._load(image_file, label_file)
def __init__(self, split, one_hot=False): assert (split in MNIST.urls) self.one_hot = one_hot # check if downloads exist, and download otherwise image_file = util.download(MNIST.urls[split]['image']) label_file = util.download(MNIST.urls[split]['label']) self.x, self.y = self._load(image_file, label_file)
def do_chunk(file_list, verbose): for item in file_list: url = item[0] filename = item[1] if not url: print("There is no url available for '%s', so cannot download it" % filename) continue util.download(session, url, filename, verbose=verbose)
def test_download(self): # Create a temporary file name, then delete it... # Of course, never do this in non-testing code!! f = tempfile.NamedTemporaryFile() f.close() # Make sure it doesn't exist. assert not os.path.exists(f.name) # Now download, using the deleted temporary file name. util.download(self.url, f.name) assert util.sha1file(f.name) == self.sha1_gtfs
def prepare_negative_dataset(dataset_directory): negative_dataset_url = \ 'http://www.ics.uci.edu/~dramanan/papers/parse/people.zip' data_filepath = os.path.join(dataset_root, os.path.basename(negative_dataset_url)) if not(os.path.exists(data_filepath)): download(negative_dataset_url, path=data_filepath) unzip(data_filepath, dataset_root) shutil.move(os.path.join(dataset_root, 'people_all'), dataset_directory)
def _check_python(self): if os.path.exists(os.path.join(PYDIR, "python.exe")): return True url, name = URLS['python'] util.download(url) print "Extracting the the python installer..." os.system('msiexec /a %s /qn TARGETDIR="%s"' % (name, PYDIR))
def download(url,name): downloads = __addon__.getSetting('downloads') if '' == downloads: xbmcgui.Dialog().ok(__scriptname__,__language__(30031)) return stream = resolve(url) if stream: util.reportUsage(__scriptid__,__scriptid__+'/download') name+='.flv' util.download(__addon__,name,stream['url'],os.path.join(downloads,name))
def _load_from_url(url, to_filename): """ First downloads the connectome file to a file to_filename load it and return the reference to the connectome object Not tested. """ from util import download download(url, to_filename) return _load_from_cff(to_filename)
def prepare(self): if "version" in self.settings: version = self.settings["version"] download(self.url % (version), self.zipfile) unzip(self.zipfile, 'temp') cp('temp/imgui-%s/' % (version), 'temp/') # TODO: mv would be cleaner else: git_clone(self.repo, 'master', 'temp') if "patch" in self.settings: with cd('temp/'): patch(self.settings["patch"])
def get_bulk_data(data_type='default_cards', dest_uri="data"): bulk_data = Request('bulk-data') bulk_data_list = bulk_data.data data_to_get = [d for d in bulk_data_list if d['type'] == data_type][0] # Check if bulk data has any changes from last download uri = data_to_get['permalink_uri'] dest = os.path.join(dest_uri, uri.split('/')[-1]) last_dl = datetime.fromtimestamp(os.path.getmtime(dest), timezone.utc) last_update = datetime.fromisoformat(data_to_get['updated_at']) if last_dl < last_update: util.download(uri, dest_uri) return dest
def download_email_blacklist(self): url = self.url + '/data/export/pii' path = self.info_folder + '/temp.html' util.download(url, path, self.cookie_file) content = util.read_file(path) pattern = r'href="(https://coursera-reports.*?)"' url = re.search(pattern, content).group(1) util.remove(path) path = self.info_folder + '/email_blacklist.csv' util.download(url, path, self.cookie_file)
def run(): parser = ArgumentParser( prog='download_file', description= 'download and cache the jar, then create a hard link specified by \'-o\'.' + ' Background: with the jar to create genrule used for prebuilt_jar.') parser.add_argument( '--repo', nargs='?', type=lambda alias: map_of_sites[alias] if alias in map_of_sites.keys() else alias, default=npmjs_defs.MAVEN, help= 'web site from where to download (default: %s). Can be one of the key of %s' % (npmjs_defs.MAVEN, __builtin__.str(map_of_sites)), metavar='web site') parser.add_argument('--cache-path', nargs='?', type=util.path_of, default=util.path_of(npmjs_defs.CACHED_PATH), help='path to cached jar (default: %s) ' % npmjs_defs.CACHED_PATH, metavar='cached path') required = parser.add_argument_group('required input') required.add_argument( '-u', required=True, help='part of URL work with, not including the content of \'-w\'', metavar='tail of URL') required.add_argument('-o', required=True, help='the local hard link name', metavar='file name') parser.add_argument( '--sha1', help='jar\'s SHA-1, with it to verify content integration', metavar='SHA-1') args = parser.parse_args() cache_entry = cached_file(args.sha1, args.o, args.u, args.cache_path) url = npmjs_defs.get_url(args.repo, args.u) if not os.path.isfile(cache_entry): util.download(url, cache_entry, True) if args.sha1 and util.is_integrated(cache_entry, args.sha1) is False: print('error download %s' % url, file=sys.stderr) delete_wrong_file(cache_entry) sys.exit(1) cache_entry = npmjs_defs.make_sure_deps(args.u, cache_entry, args.repo) check_dir_of(args.o) hard_link(cache_entry, args.o) return 0
def download_info(self): url = self.url temp = self.info_folder + '/temp.html' util.download(url, temp, self.cookie_file) page_html = util.read_file(temp) util.remove(temp) info_files = ['user.json', 'course.json', 'sidebar.json'] matches = re.findall(r'JSON\.parse\("(.*?)"\);', page_html) for match, info_file in zip(matches, info_files)[1:]: info = util.unicode_unescape(match).replace('\\\\', '') path = '{}/{}'.format(self.info_folder, info_file) util.write_json(path, util.read_json(info, True))
def prepare_gold(version): curdir = os.getcwd() binutils_tarball = binutils_tarball_format.replace('VERSION', version) binutils_url = binutils_url_format.replace('TARBALL', binutils_tarball) util.download(binutils_url) util.untar(binutils_tarball) os.chdir('binutils-VERSION'.replace('VERSION', version)) util.configure( '--prefix=%s --enable-gold --enable-plugins --disable-werror' % curdir) util.make('-j4') util.make('-j4 all-gold') util.make('install') os.chdir(curdir)
def pip_download_install(): url = 'https://pypi.python.org/packages/source/p/pip/pip-6.0.8.tar.gz' target = 'pip-6.0.8.tar.gz' targetdir = 'pip-6.0.8' print('============ downloading ' + target + ' from:' + url) util.download(url, target) print('============ extracting ' + target) util.decompress(target, '.') os.chdir(targetdir) print('============ installing pip') cmdResult = os.popen('python setup.py install').readlines() util.printCommandResult(cmdResult) print('============ installed,plese add pip to your path')
def pip_download_install(): url = 'https://pypi.python.org/packages/source/p/pip/pip-6.0.8.tar.gz' target = 'pip-6.0.8.tar.gz' targetdir = 'pip-6.0.8' print('============ downloading ' + target + ' from:' + url) util.download(url,target) print('============ extracting ' + target) util.decompress(target,'.') os.chdir(targetdir) print('============ installing pip') cmdResult = os.popen('python setup.py install').readlines() util.printCommandResult(cmdResult) print('============ installed,plese add pip to your path')
def run(): parser = ArgumentParser(prog='download_file', description='download and cache the jar, then create a hard link specified by \'-o\'.' + ' Background: with the jar to create genrule used for prebuilt_jar.' ) parser.add_argument('--repo', nargs='?', type=lambda alias: map_of_sites[alias] if alias in map_of_sites.keys() else alias, default=npmjs_defs.MAVEN, help='web site from where to download (default: %s). Can be one of the key of %s' % ( npmjs_defs.MAVEN, __builtin__.str(map_of_sites)), metavar='web site' ) parser.add_argument('--cache-path', nargs='?', type=util.path_of, default=util.path_of(npmjs_defs.CACHED_PATH), help='path to cached jar (default: %s) ' % npmjs_defs.CACHED_PATH, metavar='cached path' ) required = parser.add_argument_group('required input') required.add_argument('-u', required=True, help='part of URL work with, not including the content of \'-w\'', metavar='tail of URL' ) required.add_argument('-o', required=True, help='the local hard link name', metavar='file name' ) parser.add_argument('--sha1', help='jar\'s SHA-1, with it to verify content integration', metavar='SHA-1' ) args = parser.parse_args() cache_entry = cached_file(args.sha1, args.o, args.u, args.cache_path) url = npmjs_defs.get_url(args.repo, args.u) if not os.path.isfile(cache_entry): util.download(url, cache_entry, True) if args.sha1 and util.is_integrated(cache_entry, args.sha1) is False: print('error download %s' % url, file=sys.stderr) delete_wrong_file(cache_entry) sys.exit(1) cache_entry = npmjs_defs.make_sure_deps(args.u, cache_entry, args.repo) check_dir_of(args.o) hard_link(cache_entry, args.o) return 0
def setup(self): # check model path if self._check_model_path(): print("{} path is already exist!".format( self._get_model_dir_path())) return # download model data model_dir_path = self._get_model_dir_path() os.makedirs(model_dir_path, exist_ok=True) for url in self._url_list: file_name = os.path.basename(url) file_path = os.path.join(model_dir_path, file_name) util.download(url, file_path)
def setup(self): # check model path if self._check_model_path(): print("{} path is already exist!".format( self._get_model_dir_path())) return # download archive model zip_file_name = self.__get_zip_filename() util.download(self._url, os.path.join(self._root_path, zip_file_name)) # extract archive archive_path = self.__get_archive_path() util.extract_all(archive_path, self._root_path) os.remove(archive_path)
def _check_pywin32(self): if self.check_module("pywintypes"): return url, name = URLS['pywin32'] util.download(url, name) util.unzip(name, 'tmp_pyw32') os.system("xcopy /q /y /e tmp_pyw32\\PLATLIB\\* \"%s\\Lib\\site-packages\"" % PYDIR) os.system("copy /y \"%s\\Lib\\site-packages\\pywin32_system32\\*\" \"%s\"" % (PYDIR, PYDIR)) os.system("copy /y \"%s\\Lib\\site-packages\\win32\\*.exe\" \"%s\"" % (PYDIR, PYDIR)) os.system("copy /y \"%s\\Lib\\site-packages\\win32\\*.dll\" \"%s\"" % (PYDIR, PYDIR)) os.system("rmdir /s /q tmp_pyw32")
def download(course, item): """ Download quiz XML. :param course: A Course object. :param item: { "last_updated": 1409275771, "authentication_required": 1, "proctoring_requirement": "none", "open_time": 1409263200, "parent_id": 87, "soft_close_time": 1409752800, "duration": 0, "maximum_submissions": 1, "deleted": 0, "section_id": "6", "__type": "quiz", "order": "8", "item_type": "quiz", "quiz_type": "survey", "hard_close_time": 1409925600, "item_id": "87", "title": "Welcome Survey", "__published": 1, "id": 88, "uid": "quiz88" } :return: None. """ # path = '{}/quiz/info/{}.json' # path = path.format(course.get_folder(), item['item_id']) # # util.make_folder(path, True) # util.write_json(path, item) # url = '{}/admin/quiz/raw_edit?quiz_id={}' url = url.format(course.get_url(), item['item_id']) path = '{}/quiz/{}.xml' path = path.format(course.get_folder(), item['item_id']) util.download(url, path, course.get_cookie_file()) pattern = r'<textarea.*?>(.*)</textarea>' xml = re.search(pattern, util.read_file(path), re.DOTALL).group(1) xml = util.remove_coursera_bad_formats(xml) xml = '<?xml version="1.0" encoding="UTF-8"?>\n' + xml util.write_file(path, xml)
def download(course, item): """ Download a wiki page. :param course: A Course object. :param item: { "uid": "coursepageEYJIs_YAEeKNdCIACugoiw", "section_id": "27", "order": "1", "item_type": "coursepage", "__type": "coursepage", "item_id": "EYJIs_YAEeKNdCIACugoiw", "id": "EYJIs_YAEeKNdCIACugoiw", "metadata": { "openTime": 1373785724930, "locked": true, "creator": 726142, "title": "Home", "modified": 1405321775510, "canonicalName": "home", "created": 1374849092873, "visible": true, "version": 11 } } :return: None. """ # path = '{}/wiki/info/{}.json' # path = path.format(course.get_folder(), item['metadata']['canonicalName']) # # util.make_folder(path, True) # util.write_json(path, item) # url = '{}/admin/api/pages/{}?fields=content' url = url.format(course.get_url(), item['item_id']) path = '{}/wiki/{}.html' path = path.format(course.get_folder(), item['metadata']['canonicalName']) util.download(url, path, course.get_cookie_file()) wiki = util.read_json(path) content = wiki['content'] if content: content = util.remove_coursera_bad_formats(content) else: content = '' util.write_file(path, content)
def __getitem__(self, index): path = self.paths[index] cache_path = self.cache_paths[index] main_label = self.main_labels[index] aux_label = self.aux_labels[index] if (cache_path in self.is_cached or (self.cache_images_on_disk and os.path.isfile(cache_path))): # Set path to cache path so we will read from disk path = cache_path self.is_cached.add(cache_path) if path.startswith('http'): raw_data = util.download(path) # Write to disk if we are caching if self.cache_images_on_disk: with open(cache_path, 'wb') as f: f.write(raw_data) self.is_cached.add(cache_path) data = torch.tensor(list(raw_data), dtype=torch.uint8) image = io.decode_image(data, mode=io.image.ImageReadMode.RGB) else: image = io.read_image(path, mode=io.image.ImageReadMode.RGB) image = self.transform(image) if self.transform else image return image, main_label, aux_label
def upgrade(self, version, force = False): if not mkdir(self.path): return version if self.updater == "bukkitdev": uver, urlh = self.__bukkitdev_info() elif self.updater == "bukkitdl": uver, urlh = self.__bukkitdl_info() elif self.updater == "github": uver, urlh = self.__github_info() elif self.updater == "jenkins": uver, urlh = self.__jenkins_info() else: log.error("%s: package upgrade failed: invalid updater `%s'", self.package, self.updater) return version if not urlh: log.error("%s: package upgrade failed", self.package) return version out = os.path.join(self.path, "%s.%s" % (self.package, self.type)) if uver and uver == version and not force: log.info("%s: package already up-to-date", self.package) return version if not self.dryrun and not download(urlh, out): return version log.info("%s: package upgraded: %s -> %s", self.package, version, uver) if self.dryrun or (self.type != "zip"): return uver if len(self.extract) < 1: return uver zf = ZipFile(out, "r") nl = zf.namelist() for path in self.extract: if not path.endswith("/") and path in nl: zf.extract(path, self.path) continue for zpath in nl: if zpath.endswith("/"): continue if not zpath.startswith(path): continue zf.extract(zpath, self.path) zf.close() unlink(out) return uver
def download(course): """ Download course assets. :param course: A Coursera course object. :return: None """ url = course.get_url() + '/admin/assets' folder = course.get_folder() + '/../assets' cookie = course.get_cookie_file() files = _find_files(url, folder, cookie) num_file = len(files) for idx, (url, path) in enumerate(files): print '{}/{}'.format(idx + 1, num_file) util.download(url, path, cookie, resume=True)
def action(header, message, ops): # <a href=3D"http://www.qmags.com/R/?i=3D2374a8&=\ne=3D2278786&doi=3D52256083&uk=3D2FE1171B167127DE131449DD111622C5882FF14=\nF115.htm" target=3D"_blank">Download</a>' oneline = message.replace('=\r\n', '').replace('=3D', '=') m = re.search('(http://[^"]*)[^>]*>(?=Download)', oneline) if not m: m = re.search('(http://[^"]*)[^>]*>(?=DOWNLOAD)', oneline) if not m: m = re.search('(http://[^"]*)[^>]*>(?=<b>DOWNLOAD)', oneline) if not m: m = re.search('(http://[^"]*)[^>]*>(?=Click here</a> to download)', oneline) #, re.DOTALL) try: if not m: # Qmags PDF....http://" pdfinx = message.index('Qmags PDF') ro = re.compile('(http://.+?)["|\s]') m = ro.search(message, pdfinx) except ValueError: m = None if not m: ops.move('Downloaded Qmags/Error') return "SKIP" link = m.group(1) print "Opening link", link r = urllib2.urlopen(urllib2.Request(link)) text = r.read() # TODO: check for error/expired link, and move to expired folder # extract the download link m = re.search('(http://delivery.+?)["|\s]', text) if not m: ops.move('Downloaded Qmags/Error') return 'NOT FOUND' found = m.group(1) print "Downloading", found download(found, '~/Dropbox/Qmags') ops.move('Downloaded Qmags')
def dealOneWord(wordPageURL): """处理一个单词 :param wordPageURL example: 'http://www.51voa.com/Voa_English_Learning/1949-minority-54577.html' """ try: print wordPageURL content = util.getPageContent(wordPageURL) mp3URL = getMp3URL(content) mp3Text = getMp3Text(content) util.download(mp3URL) title = re.findall(r'[\w-]+.mp3', mp3URL).pop() util.insertToFile(title + ':\n' + mp3Text) except IndexError: print traceback.format_exc()
def __init__(self, split): assert(split in SVHN.urls) # check if downloads exist, and download otherwise file = util.download(SVHN.urls[split]) # parse mats and read into tf.data.Dataset self.x, self.y = self._load(file)
def download(course): """ Download grade book. :param course: A Coursera course object. :return: None. """ path = course.get_info_folder() + '/temp.html' url = course.get_url() + '/admin/course_grade/export_grades' util.download(url, path, course.get_cookie_file()) pattern = r'graded. <a href="(.*?)">' find = re.search(pattern, util.read_file(path), re.DOTALL) util.remove(path) if find: url = find.group(1) path = course.get_info_folder() + '/grades.csv' util.download(url, path, course.get_cookie_file())
def download_original_video(course, item): """ Download original (high-quality) video. """ if item['source_video']: url = 'https://spark-public.s3.amazonaws.com/{}/source_videos/{}' url = url.format(course.get_name(), item['source_video']) path = '{}/../original_videos/{}' file_name = item['source_video'].replace('.mp4.mpg', '.mp4') path = path.format(course.get_folder(), file_name) # if item['__published'] == 1: # title = item['title'].replace('"', '\\"') # text = ' "{}": ["{}", "{}"], '.format(file_name, path, title) # util.write_log(text) util.download(url, path, course.get_cookie_file(), resume=True)
def install_talib_for_linux(): url = 'http://downloads.sourceforge.net/project/ta-lib/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz' target = 'ta-lib-0.4.0-src.tar.gz' util.download(url,target) util.decompress(target,'.') os.chdir('ta-lib') print('==========configure ta-lib============') result = os.popen('./configure').readlines() util.printCommandResult(result) print('==========configure end ============') print('==========make ta-lib ================') result = os.popen('make').readlines() util.printCommandResult(result) print('==========make ta-lib end ============') print('==========make install tab-lib =======') result = os.popen('make install').readlines() util.printCommandResult(result) print('==========make install tab-lib end =======')
def install_talib_for_linux(): url = 'http://downloads.sourceforge.net/project/ta-lib/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz' target = 'ta-lib-0.4.0-src.tar.gz' util.download(url, target) util.decompress(target, '.') os.chdir('ta-lib') print('==========configure ta-lib============') result = os.popen('./configure').readlines() util.printCommandResult(result) print('==========configure end ============') print('==========make ta-lib ================') result = os.popen('make').readlines() util.printCommandResult(result) print('==========make ta-lib end ============') print('==========make install tab-lib =======') result = os.popen('make install').readlines() util.printCommandResult(result) print('==========make install tab-lib end =======')
def download(self, filename=None, cache=True, verify=True, sha1=None): """Download the GTFS feed to a file. Return filename.""" if cache and self.verify_sha1(filename, sha1): return filename filename = util.download(self.url(), filename) if verify and sha1 and not self.verify_sha1(filename, sha1): raise errors.InvalidChecksumError( "Incorrect checksum: %s, expected %s" % (util.sha1file(filename), sha1)) return filename
def download(course, item): """ Download assignment HTML. :param course: A Course object. :param item: { "maximum_submissions": 0, "open_time": 1409234400, "parent_id": 5, "soft_close_time": 1409965200, "title": "Module 1: Circles", "deleted": 0, "section_id": "6", "order": "9", "item_type": "assignment", "__type": "assignment", "hard_close_time": 1410138000, "item_id": "5", "last_updated": 1409236863, "__published": 1, "id": 6, "uid": "assignment6" } :return: None. """ # path = '{}/assignment/info/{}.json' # path = path.format(course.get_folder(), item['item_id']) # # util.make_folder(path, True) # util.write_json(path, item) # url = '{}/admin/assignment?assignment_id={}' url = url.format(course.get_url(), item['item_id']) path = '{}/assignment/{}.html' path = path.format(course.get_folder(), item['item_id']) util.download(url, path, course.get_cookie_file()) pattern = r'<textarea.*?>(.*)</textarea>' content = re.search(pattern, util.read_file(path), re.DOTALL).group(1) content = util.remove_coursera_bad_formats(content) util.write_file(path, content)
def mask_tiles(tcd_tile): vrt = 'tile.vrt' tcd_s3 = 's3://gfw2-data/forest_cover/2000_treecover/{}' masked_output = 's3://gfw2-data/alerts-tsv/sofi/raster-analysis/elevation/tif/' if len(tcd_tile) != 0 and '.tif' in tcd_tile: # download tcd tile util.download(tcd_s3.format(tcd_tile), 'data/tcd/') # clip elevation raster to extent boox clipped_elevation = util.clip_raster(vrt, 'data/tcd/{}'.format(tcd_tile)) # mask to 30% tcd masked_30tcd = util.mask_raster(clipped_elevation, 'data/tcd/{}'.format(tcd_tile)) # upload to s3 util.upload_to_s3(masked_30tcd, masked_output) # clean workspace util.clean_workspace(tcd_tile.replace('Hansen_GFC2014_treecover2000_', ''))
def download_paper_pdf_if_needed(meta): info = util.get_info_fn('[{}] '.format(meta['title'])) if os.path.isfile(meta['pdf-path']): info('file "{}" already exists, exitting'.format(meta['pdf-path'])) return meta try: url = get_paper_pdf_url(meta['url']) except ValueError: info('ERROR: could not get pdf url for paper url "{}"'.format( meta['url'])) return meta try: path = get_local_pdf_path(meta, cfg.paths['pdfs-dir']) info('downloading pdf from "{}"'.format(url)) util.download(url, path) meta['pdf-path'] = path except Exception as e: info('ERROR downloading pdf: "{}"'.format(e)) return meta
def _check_pywin32(self): if self.check_module("pywintypes"): return url, name = URLS['pywin32'] util.download(url, name) util.unzip(name, 'tmp_pyw32') os.system( "xcopy /q /y /e tmp_pyw32\\PLATLIB\\* \"%s\\Lib\\site-packages\"" % PYDIR) os.system( "copy /y \"%s\\Lib\\site-packages\\pywin32_system32\\*\" \"%s\"" % (PYDIR, PYDIR)) os.system("copy /y \"%s\\Lib\\site-packages\\win32\\*.exe\" \"%s\"" % (PYDIR, PYDIR)) os.system("copy /y \"%s\\Lib\\site-packages\\win32\\*.dll\" \"%s\"" % (PYDIR, PYDIR)) os.system("rmdir /s /q tmp_pyw32")
def sendImage(self, job): """ Send an image to a contact. """ # create db session _session = self.session() # get the message message = _session.query(Message).get(job.message_id) caption = message.text logger.debug('Retrieved the message with caption %s of type %s' % (caption, message.message_type)) asset = _session.query(Asset).get(message.asset_id) if asset is not None: url = asset.url logger.debug('About to download %s' % url) if url.startswith("//"): url = "https:%s" % url # download the file path = download(url) logger.debug('File downloaded to %s' % path) if path is not None: # get whatsapp username from targets target = normalizeJid(job.targets) # create the upload request entity entity = RequestUploadIqProtocolEntity( RequestUploadIqProtocolEntity.MEDIA_TYPE_IMAGE, filePath=path) # the success callback successFn = lambda successEntity, originalEntity: self.onRequestUploadSuccess( target, path, successEntity, originalEntity, caption) # The on error callback errorFn = lambda errorEntity, originalEntity: self.onRequestUploadError( target, path, errorEntity, originalEntity) logger.debug('About to call send the image send iq') self._sendIq(entity, successFn, errorFn) job.runs += 1 job.sent = True