def process_track(root_dir, track_url): print("======= Process track: " + track_url) f = urlopen(track_url) # f = codecs.open("test_chapter.html", "r", "utf-8") data = f.read() f.close() parsed_html = BeautifulSoup(data) param = parsed_html.find("param", {"name": "flashvars"}) songs_urls = re.findall( "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", param["value"] ) for songs_url in songs_urls: f2 = urlopen(songs_url) data2 = f2.read() parsed_html2 = BeautifulSoup(data2) tracks = parsed_html2.findAll("track") for track in tracks: song_url = track["location"] o = urlparse(song_url) file_name = basename(song_url) file_path = root_dir + "/" + file_name if os.path.exists(file_path): print("File exists. Skip: " + file_path) else: print("Download: " + file_name) urlretrieve(song_url, file_path)
def _download_reference_files(conn): print('Downloading reference files') if not exists(reference_base_dir): mkdir(reference_base_dir) files = {'tree': (get_reference_fp('gg_13_8-97_otus.tree'), 'ftp://ftp.microbio.me/greengenes_release/' 'gg_13_8_otus/trees/97_otus.tree'), 'taxonomy': (get_reference_fp('gg_13_8-97_otu_taxonomy.txt'), 'ftp://ftp.microbio.me/greengenes_release/' 'gg_13_8_otus/taxonomy/97_otu_taxonomy.txt'), 'sequence': (get_reference_fp('gg_13_8-97_otus.fasta'), 'ftp://ftp.microbio.me/greengenes_release/' 'gg_13_8_otus/rep_set/97_otus.fasta')} for file_type, (local_fp, url) in viewitems(files): # Do not download the file if it exists already if exists(local_fp): print("SKIPPING %s: file already exists at %s. To " "download the file again, erase the existing file first" % (file_type, local_fp)) else: try: urlretrieve(url, local_fp) except: raise IOError("Error: Could not fetch %s file from %s" % (file_type, url)) ref = Reference.create('Greengenes', '13_8', files['sequence'][0], files['taxonomy'][0], files['tree'][0]) _insert_processed_params(conn, ref)
def maybe_download_pretrained_vgg(data_dir): """ Download and extract pretrained vgg model if it doesn't exist :param data_dir: Directory to download the model to """ vgg_filename = 'vgg.zip' vgg_path = os.path.join(data_dir, 'vgg') vgg_files = [ os.path.join(vgg_path, 'variables/variables.data-00000-of-00001'), os.path.join(vgg_path, 'variables/variables.index'), os.path.join(vgg_path, 'saved_model.pb')] missing_vgg_files = [vgg_file for vgg_file in vgg_files if not os.path.exists(vgg_file)] if missing_vgg_files: # Clean vgg dir if os.path.exists(vgg_path): shutil.rmtree(vgg_path) os.makedirs(vgg_path) # Download vgg print('Downloading pre-trained vgg model...') with DLProgress(unit='B', unit_scale=True, miniters=1) as pbar: urlretrieve( 'https://s3-us-west-1.amazonaws.com/udacity-selfdrivingcar/vgg.zip', os.path.join(vgg_path, vgg_filename), pbar.hook) # Extract vgg print('Extracting model...') zip_ref = zipfile.ZipFile(os.path.join(vgg_path, vgg_filename), 'r') zip_ref.extractall(data_dir) zip_ref.close() # Remove zip file to save space os.remove(os.path.join(vgg_path, vgg_filename))
def retrieveDNAPDBonServer(self,path,name=None,pathTo=None): done = False cut= 0 dnafile = None print ("http://w3dna.rutgers.edu/"+path[1:-1]+"/s0.pdb") if name is None : name = "s0.pdb" if pathTo is None : pathTo = self.vf.rcFolder+os.sep+"pdbcache"+os.sep tmpFileName = pathTo+name while not done : if cut > 100 : break try : # dnafile = urllib2.urlopen("http://w3dna.rutgers.edu/data/usr/"+path+"/rebuild/s0.pdb") # dnafile = urllib2.urlopen("http://w3dna.rutgers.edu/"+path[1:-1]+"/s0.pdb") urllib.urlretrieve("http://w3dna.rutgers.edu/"+path[1:-1]+"/s0.pdb", tmpFileName) done = True except : cut+=1 continue if done : #should download in the rcFolder # # output = open(pathTo+name,'w') # output.write(dnafile.read()) # output.close() return name,pathTo return None,None
def _load_mnist(): data_dir = os.path.dirname(os.path.abspath(__file__)) data_file = os.path.join(data_dir, "mnist.pkl.gz") print("Looking for data file: ", data_file) if not os.path.isfile(data_file): import urllib.request as url origin = "http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz" print("Downloading data from: ", origin) url.urlretrieve(origin, data_file) print("Loading MNIST data") f = gzip.open(data_file, "rb") u = pickle._Unpickler(f) u.encoding = "latin1" train_set, valid_set, test_set = u.load() f.close() train_x, train_y = train_set valid_x, valid_y = valid_set testing_x, testing_y = test_set training_x = np.vstack((train_x, valid_x)) training_y = np.concatenate((train_y, valid_y)) training_x = training_x.reshape((training_x.shape[0], 1, 28, 28)) testing_x = testing_x.reshape((testing_x.shape[0], 1, 28, 28)) return training_x, training_y, testing_x, testing_y
def main(target_dir, target_arch, major_py_version, miniconda_version='3.8.3', install_obvci=True): system = platform.system() URL = miniconda_url(system, target_arch, major_py_version, miniconda_version) basename = URL.rsplit('/', 1)[1] if system in ['Linux', 'Darwin']: cmd = ['bash', basename, '-b', '-p', target_dir] bin_dir = 'bin' elif system in ['Windows']: cmd = ['powershell', 'Start-Process', '-FilePath', basename, '-ArgumentList', '/S,/D=' + target_dir, '-Wait', ]#'-Passthru'] bin_dir = 'scripts' else: raise ValueError('Unsupported operating system.') if not os.path.exists(basename): print('Downloading from {}'.format(URL)) urlretrieve(URL, basename) else: print('Using cached version of {}'.format(URL)) # Install with powershell. if os.path.exists(target_dir): raise IOError('Installation directory already exists') subprocess.check_call(cmd) if not os.path.isdir(target_dir): raise RuntimeError('Failed to install miniconda :(') if install_obvci: conda_path = os.path.join(target_dir, bin_dir, 'conda') subprocess.check_call([conda_path, 'install', '--yes', '--quiet', '-c', 'pelson', 'obvious-ci'])
def getArtwork(self, mp4Path, filename='cover', thumbnail=False): # Check for local cover.jpg or cover.png artwork in the same directory as the mp4 extensions = valid_poster_extensions poster = None for e in extensions: head, tail = os.path.split(os.path.abspath(mp4Path)) path = os.path.join(head, filename + os.extsep + e) if (os.path.exists(path)): poster = path self.log.info("Local artwork detected, using %s." % path) break # Pulls down all the poster metadata for the correct season and sorts them into the Poster object if poster is None: if thumbnail: try: poster = urlretrieve(self.episodedata['filename'], os.path.join(tempfile.gettempdir(), "poster-%s.jpg" % self.title))[0] except Exception as e: self.log.error("Exception while retrieving poster %s.", str(e)) poster = None else: posters = posterCollection() try: for bannerid in self.showdata['_banners']['season']['season'].keys(): if str(self.showdata['_banners']['season']['season'][bannerid]['season']) == str(self.season): poster = Poster() poster.ratingcount = int(self.showdata['_banners']['season']['season'][bannerid]['ratingcount']) if poster.ratingcount > 0: poster.rating = float(self.showdata['_banners']['season']['season'][bannerid]['rating']) poster.bannerpath = self.showdata['_banners']['season']['season'][bannerid]['_bannerpath'] posters.addPoster(poster) poster = urlretrieve(posters.topPoster().bannerpath, os.path.join(tempfile.gettempdir(), "poster-%s.jpg" % self.title))[0] except: poster = None return poster
def get_sources(itemList, output=srcDir, verb=None): '''Get source files from local and internet. Args: itemList: A list of source files. output: A string of temp directory. verb: A bool of verbose. ''' for item in itemList: if not os.path.exists(os.path.join(output, item[0].split('/')[-1])): if item[0].split('://')[0] in ['http', 'https', 'ftp']: if verb: echo('cyan', 'verb:', ' downloading {} file.'.format(item[0])) try: urlretrieve(item[0], '{}/{}'.format(output, item[0].split('/')[-1])) #call(['wget', '-q', '-P', output, item[0]]) except Exception as e: echo('red', 'erro:', ' downloading error. {}'.format(e)) sys.exit(1) else: for src in find_files(item[0], 'rpms'): if verb: echo('cyan', 'verb:', ' copy {} file to build directory.'.format(src)) shutil.copy(src, output)
def install_from_source(setuptools_source, pip_source): setuptools_temp_dir = tempfile.mkdtemp('-setuptools', 'ptvs-') pip_temp_dir = tempfile.mkdtemp('-pip', 'ptvs-') cwd = os.getcwd() try: os.chdir(setuptools_temp_dir) print('Downloading setuptools from ' + setuptools_source) sys.stdout.flush() setuptools_package, _ = urlretrieve(setuptools_source, 'setuptools.tar.gz') package = tarfile.open(setuptools_package) try: safe_members = [m for m in package.getmembers() if not m.name.startswith(('..', '\\'))] package.extractall(setuptools_temp_dir, members=safe_members) finally: package.close() extracted_dirs = [d for d in os.listdir(setuptools_temp_dir) if os.path.exists(os.path.join(d, 'setup.py'))] if not extracted_dirs: raise OSError("Failed to find setuptools's setup.py") extracted_dir = extracted_dirs[0] print('\nInstalling from ' + extracted_dir) sys.stdout.flush() os.chdir(extracted_dir) subprocess.check_call( EXECUTABLE + ['setup.py', 'install', '--single-version-externally-managed', '--record', 'setuptools.txt'] ) os.chdir(pip_temp_dir) print('Downloading pip from ' + pip_source) sys.stdout.flush() pip_package, _ = urlretrieve(pip_source, 'pip.tar.gz') package = tarfile.open(pip_package) try: safe_members = [m for m in package.getmembers() if not m.name.startswith(('..', '\\'))] package.extractall(pip_temp_dir, members=safe_members) finally: package.close() extracted_dirs = [d for d in os.listdir(pip_temp_dir) if os.path.exists(os.path.join(d, 'setup.py'))] if not extracted_dirs: raise OSError("Failed to find pip's setup.py") extracted_dir = extracted_dirs[0] print('\nInstalling from ' + extracted_dir) sys.stdout.flush() os.chdir(extracted_dir) subprocess.check_call( EXECUTABLE + ['setup.py', 'install', '--single-version-externally-managed', '--record', 'pip.txt'] ) print('\nInstallation Complete') sys.stdout.flush() finally: os.chdir(cwd) shutil.rmtree(setuptools_temp_dir, ignore_errors=True) shutil.rmtree(pip_temp_dir, ignore_errors=True)
def download_feed(): """ Grab feed to local file, make sure it actually has some data. """ advertiser = sys.argv[1] print("Grabbing Feed For Advertiser: {0}<br>".format(parse.unquote(advertiser))) start_time = time.time() if os.path.isfile("feed.xml"): os.remove("feed.xml") try: feed_url = "https://api.rewardstyle.com/v1/product_feed?" "oauth_token={0}&advertiser={1}".format( TOKEN, advertiser ) request.urlretrieve(feed_url, "feed.xml") print("Feed download time: {0}ms<br>".format(str(round((time.time() - start_time) * 1000)))) except error.HTTPError as e: print("Feed most likely does not exist: {0}".format(e)) sys.exit() file = open("feed.xml", "rb") file.seek(0, 2) size = file.tell() file.close() if size <= 64: print("Feed looks empty") sys.exit()
def download_results(self, savedir=None, only_raw=True, only_calib=False, index=None): """Download the previously found and stored Opus obsids. Parameters ========== savedir: str or pathlib.Path, optional If the database root folder as defined by the config.ini should not be used, provide a different savedir here. It will be handed to PathManager. """ obsids = self.obsids if index is None else [self.obsids[index]] for obsid in obsids: pm = io.PathManager(obsid.img_id, savedir=savedir) pm.basepath.mkdir(exist_ok=True) if only_raw is True: to_download = obsid.raw_urls elif only_calib is True: to_download = obsid.calib_urls else: to_download = obsid.all_urls for url in to_download: basename = Path(url).name print("Downloading", basename) store_path = str(pm.basepath / basename) try: urlretrieve(url, store_path) except Exception as e: urlretrieve(url.replace('https', 'http'), store_path) return str(pm.basepath)
def download_grocery_data(): base_folder = os.path.dirname(os.path.abspath(__file__)) dataset_folder = os.path.join(base_folder, "..") if not os.path.exists(os.path.join(dataset_folder, "Grocery", "testImages")): filename = os.path.join(dataset_folder, "Grocery.zip") if not os.path.exists(filename): url = "https://www.cntk.ai/DataSets/Grocery/Grocery.zip" print('Downloading data from ' + url + '...') urlretrieve(url, filename) try: print('Extracting ' + filename + '...') with zipfile.ZipFile(filename) as myzip: myzip.extractall(dataset_folder) if platform != "win32": testfile = os.path.join(dataset_folder, "Grocery", "test.txt") unixfile = os.path.join(dataset_folder, "Grocery", "test_unix.txt") out = open(unixfile, 'w') with open(testfile) as f: for line in f: out.write(line.replace('\\', '/')) out.close() shutil.move(unixfile, testfile) finally: os.remove(filename) print('Done.') else: print('Data already available at ' + dataset_folder + '/Grocery')
def get_tool(tool): sys_name = platform.system() archive_name = tool['archiveFileName'] local_path = dist_dir + archive_name url = tool['url'] #real_hash = tool['checksum'].split(':')[1] if not os.path.isfile(local_path): print('Downloading ' + archive_name); sys.stdout.flush() if 'CYGWIN_NT' in sys_name: ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE urlretrieve(url, local_path, report_progress, context=ctx) elif 'Windows' in sys_name: r = requests.get(url) f = open(local_path, 'wb') f.write(r.content) f.close() else: urlretrieve(url, local_path, report_progress) sys.stdout.write("\rDone\n") sys.stdout.flush() else: print('Tool {0} already downloaded'.format(archive_name)) sys.stdout.flush() #local_hash = sha256sum(local_path) #if local_hash != real_hash: # print('Hash mismatch for {0}, delete the file and try again'.format(local_path)) # raise RuntimeError() unpack(local_path, '.')
def run(params): bucket_name, prefix_name, key_name = params s3_key_name = '{}/{}'.format(prefix_name, key_name) git_key_url = 'http://data.githubarchive.org/{}'.format(key_name) print('Processing {} to s3...'.format(s3_key_name)) s3_conn = S3Connection() bucket = s3_conn.get_bucket(bucket_name) key = bucket.get_key(s3_key_name) if key: print('{} is already in the bucket'.format(key)) elif exists_url(git_key_url) is False: print('{} does not exist'.format(git_key_url)) else: urlretrieve(git_key_url, key_name) # pre-process data preprocess.process_file(key_name) retry_count = 0 while not upload_to_s3(key_name, bucket, s3_key_name) and retry_count <= MAX_RETRIES: retry_count += 1 print('Failed to upload {} !'.format(s3_key_name)) else: print('File {} is uploaded to {}/{}!'.format(key_name, bucket_name, prefix_name)) os.remove(key_name)
def download(url): """ saves url to current folder. returns filename """ #TODO: check that download was successful filename = os.path.basename(url) print('downloading', filename) urlretrieve(url, filename) return filename
def install_script(self, context, name, url): _, _, path, _, _, _ = urlparse(url) fn = os.path.split(path)[-1] binpath = context.bin_path distpath = os.path.join(binpath, fn) # Download script into the env's binaries folder urlretrieve(url, distpath) progress = self.progress if self.verbose: term = '\n' else: term = '' if progress is not None: progress('Installing %s ...%s' % (name, term), 'main') else: sys.stderr.write('Installing %s ...%s' % (name, term)) sys.stderr.flush() # Install in the env args = [context.env_exe, fn] p = Popen(args, stdout=PIPE, stderr=PIPE, cwd=binpath) t1 = Thread(target=self.reader, args=(p.stdout, 'stdout')) t1.start() t2 = Thread(target=self.reader, args=(p.stderr, 'stderr')) t2.start() p.wait() t1.join() t2.join() if progress is not None: progress('done.', 'main') else: sys.stderr.write('done.\n') # Clean up - no longer needed os.unlink(distpath)
def GetOsmTileData(z,x,y): """Download OSM data for the region covering a slippy-map tile""" if(x < 0 or y < 0 or z < 0 or z > 25): print("Disallowed (%d,%d) at zoom level %d" % (x, y, z)) return directory = 'cache/%d/%d/%d' % (z,x,y) filename = '%s/data.osm.pkl' % (directory) if(not os.path.exists(directory)): os.makedirs(directory) if(z == DownloadLevel()): # Download the data s,w,n,e = tileEdges(x,y,z) # /api/0.6/map?bbox=left,bottom,right,top URL = 'http://api.openstreetmap.org/api/0.6/map?bbox={},{},{},{}'.format(w,s,e,n) if(not os.path.exists(filename)): # TODO: allow expiry of old data urlretrieve(URL, filename) return(filename) elif(z > DownloadLevel()): # use larger tile while(z > DownloadLevel()): z = z - 1 x = int(x / 2) y = int(y / 2) return(GetOsmTileData(z,x,y)) return(None)
def save_images(self, word, max_count=100, resize=False): """ 画像を取得する """ dir_name = IMAGE_DIR + "/" + word + "/" if not os.path.exists(dir_name): os.mkdir(dir_name) for start in range(1, max_count+1): image_info = self.get_image_info(word, start) for image in image_info: file_name = self.create_file_name( dir_name, image["link"], image["extension"]) file_path = dir_name + file_name print(image["link"]) try: connection_test = request.urlopen(image["link"]) except (HTTPError, OSError) as e: continue request.urlretrieve(image["link"], file_path) if resize is True: self.resize_image(file_path, image["extension"]) # APIの接続制限があるため、5秒ごとにあける time.sleep(5)
def get_files(): """ Gets files of specified extension through user input from a specified full URL path; downloads each file to the user's specified local directory. """ while True: url = input("Enter the URL you want to scrape from: ") suffix = input("\nWhat type of file do you want to scrape? \nExamples: .png, .pdf, .doc - ") filepath = input("Specify a file path to save to: ") if not url.startswith('http://') and not url.startswith('https://'): url += 'http://' response = requests.get(url, stream=True) soup = bs(response.text) list_of_links = [link.get('href') for link in soup.find_all('a') if suffix in str(link)] for link in list_of_links: file_name = link.rpartition('/')[-1] urlretrieve(url.rsplit('/', 1)[0] + '/' + link, filepath + '\\' + file_name) print_message(list_of_links, suffix) if not repeat(input("\nScrape from another URL? ")): break
def get_remote(url = 'http://www.trainingimages.org/uploads/3/4/7/0/34703305/ti_strebelle.sgems',local_file = 'ti.dat', is_zip=0, filename_in_zip=''): #import os if (is_zip==1): local_file_zip = local_file + '.zip' if not (os.path.exists(local_file)): if (is_zip==1): import zipfile # download zip file print('Beginning download of ' + url + ' to ' + local_file_zip) urlretrieve(url, local_file_zip) # unzip file print('Unziping %s to %s' % (local_file_zip,local_file)) zip_ref = zipfile.ZipFile(local_file_zip, 'r') zip_ref.extractall('.') zip_ref.close() # rename unzipped file if len(filename_in_zip)>0: os.rename(filename_in_zip,local_file) else: print('Beginning download of ' + url + ' to ' + local_file) urlretrieve(url, local_file) return local_file
def main(): os.makedirs('imgs') Product.objects.all().delete() with open('./scripts/condoms.csv') as csv_file: csv_content = csv.reader(csv_file, delimiter=',') counter = 0 for row in csv_content: img_url = row[3] img_type = img_url.split('.')[-1] img_dest = './media/imgs/product_img{0}.{1}'.format(counter, img_type) try: request.urlretrieve(img_url, img_dest) except error.HTTPError as e: print(counter, e) product = Product.objects.create( name=row[0], desc=row[1], price=row[2], amount=50, pic= ('/imgs/product_img{0}.{1}'.format(counter, img_type)) ) product.save() counter += 1 print(Product.objects.all())
def main(): scripts_path = os.path.dirname(os.path.abspath(__file__)) repo = os.path.dirname(scripts_path) cldr_dl_path = os.path.join(repo, 'cldr') cldr_path = os.path.join(repo, 'cldr', os.path.splitext(FILENAME)[0]) zip_path = os.path.join(cldr_dl_path, FILENAME) changed = False while not is_good_file(zip_path): log('Downloading \'%s\'', FILENAME) if os.path.isfile(zip_path): os.remove(zip_path) urlretrieve(URL, zip_path, reporthook) changed = True print() common_path = os.path.join(cldr_path, 'common') if changed or not os.path.isdir(common_path): if os.path.isdir(common_path): log('Deleting old CLDR checkout in \'%s\'', cldr_path) shutil.rmtree(common_path) log('Extracting CLDR to \'%s\'', cldr_path) with contextlib.closing(zipfile.ZipFile(zip_path)) as z: z.extractall(cldr_path) subprocess.check_call([ sys.executable, os.path.join(scripts_path, 'import_cldr.py'), common_path])
def navigate_dl(): browser.get('https://www.urltodlownloadfrom.com/specificaddress') while True: wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "body > div.course-mainbar.lecture-content > " "div:nth-child(2) > div.video-options > a"))) dl_url = browser.find_element_by_css_selector("body > div.course-mainbar.lecture-content > " "div:nth-child(2) > div.video-options > a").get_attribute("href") next_btn = browser.find_element_by_css_selector("#lecture_complete_button > span") title = get_title() try: dl_extras = browser.find_element_by_css_selector("body > div.course-mainbar.lecture-content > " "div:nth-child(4) > div:nth-child(3) > a").get_attribute("href") print(dl_extras) urlretrieve(dl_extras, save_path + title + '.pptx', reporthook) except NoSuchElementException: pass try: print(dl_url) urlretrieve(dl_url, save_path+title+'.mp4', reporthook) next_btn.click() except NoSuchElementException: break
def retrieve_nxml_abstract(pmid, outfile = None): """ Retrieves nxml file of the abstract associated with the provided pmid """ query = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id={}&rettype=abstract".format(pmid) nxml_file = outfile or "{}.nxml".format(pmid) urlretrieve(query, nxml_file)
def retrieve_nxml_paper(pmcid, outfile = None): """ Retrieves nxml file for the provided pmcid """ query = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id={}".format(pmcid) nxml_file = outfile or "{}.nxml".format(pmcid) urlretrieve(query, nxml_file)
def _download_episode(self, url, title): """Save the video stream to the disk. The filename will be sanitized(title).mp4.""" filename = self._sanitize_filename(title) + '.mp4' print('Downloading {}...'.format(title)) filename = '{}/{}'.format(self.folder, filename) urlretrieve(url, filename=filename)
def fetch(self, directory): if not os.path.exists(directory): os.makedirs(directory) self.path = os.path.join(directory, self.filename) url = self.artifact if self.check_sum(): logger.info("Using cached artifact for %s" % self.filename) return logger.info("Fetching %s from %s." % (self.filename, url)) try: if os.path.basename(url) == url: raise CCTError("Artifact is referenced by filename - can't download it.") urlrequest.urlretrieve(url, self.path) except Exception as ex: if self.hint: raise CCTError('artifact: "%s" was not found. %s' % (self.path, self.hint)) else: raise CCTError("cannot download artifact from url %s, error: %s" % (url, ex)) if not self.check_sum(): if self.hint: raise CCTError('hash is not correct for artifact: "%s". %s' % (self.path, self.hint)) else: raise CCTError("artifact from %s doesn't match required chksum" % url)
def do_local(self): attrs_dict = [tag[1] for tag in self.list_tags] for attrs in attrs_dict: if 'src' in attrs and 'http://' in attrs['src']: filename = attrs['src'].split('/')[-1] urlretrieve(attrs['src'], filename) attrs['src'] = filename
def _fetch_remote(remote, dirname=None): """Helper function to download a remote dataset into path Fetch a dataset pointed by remote's url, save into path using remote's filename and ensure its integrity based on the SHA256 Checksum of the downloaded file. Parameters ---------- remote : RemoteFileMetadata Named tuple containing remote dataset meta information: url, filename and checksum dirname : string Directory to save the file to. Returns ------- file_path: string Full path of the created file. """ file_path = (remote.filename if dirname is None else join(dirname, remote.filename)) urlretrieve(remote.url, file_path) checksum = _sha256(file_path) if remote.checksum != checksum: raise IOError("{} has an SHA256 checksum ({}) " "differing from expected ({}), " "file may be corrupted.".format(file_path, checksum, remote.checksum)) return file_path
def download(self): if 'rc' in self.version: base_url = 'https://git.kernel.org/torvalds/t' url = '{0}/linux-{1}.tar.gz'.format(base_url, self.version) else: base_url = 'https://cdn.kernel.org/pub/linux/kernel' major = 'v' + self.version[0] + '.x' url = '{0}/{1}/linux-{2}.tar.gz'.format( base_url, major, self.version ) destination = '{0}/archives/linux-{1}.tar.gz'.format( self.build_dir, self.version ) if os.path.isfile(destination): self.log('Kernel already downloaded: {0}'.format(self.version)) return self.log('Downloading kernel: {0}'.format(self.version)) if self.verbose: hook = download_progress else: hook = None try: urlretrieve( url, filename=destination, reporthook=hook ) except Exception: os.remove(destination) raise
def download_file(url, filename, folder): print("Downloading file : ", filename) urlretrieve(url, os.path.join(folder, filename), reporthook) print("Download Complete") return True
def main(): headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'no-cache', 'DNT': '1', 'Host': 'c.dun.163yun.com', 'Referer': 'https://dun.163.com/trial/jigsaw', 'Pragma': 'no-cache', 'Proxy-Connection': 'keep-alive', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' } with open('mm.js', 'r', encoding='utf-8') as f: content = f.read() ctx = execjs.compile(content) fp = ctx.call('get_fp') callback = ctx.call('get_callback') with open('tnp.js', 'r') as f: content = f.read() ctx = execjs.compile(content) cb = ctx.call('cb') data = { "id": "07e2387ab53a4d6f930b8d9a9be71bdf", "fp": fp, "https": "true", "type": "2", "version": "2.13.6", "dpr": "1", "dev": "1", "cb": cb, "ipv6": "false", "runEnv": "10", "group": "", "scene": "", "width": "320", "token": "", "referer": "https://dun.163.com/trial/jigsaw", "callback": callback } r = requests.get('https://c.dun.163.com/api/v2/get', params=data, headers=headers) data = json.loads(re.findall('.*?\((.*?)\);', r.text)[0]) token = data['data']['token'] request.urlretrieve(data['data']['front'][0], 'img/1.png') request.urlretrieve(data['data']['bg'][0], 'img/2.jpg') distance = get_gap() + 5 trace = get_track(distance) left = trace[-1][0] - 10 data = ctx.call('get_data', token, trace, left) cb = ctx.call('cb') get_data = { "id": "07e2387ab53a4d6f930b8d9a9be71bdf", "token": token, "acToken": "", "data": data, "width": "320", "type": "2", "version": "2.13.6", "cb": cb, "extraData": "", "runEnv": "10", "referer": "https://dun.163.com/trial/jigsaw", "callback": "__JSONP_hhjwbon_4" } r = requests.get('https://c.dun.163.com/api/v2/check', headers=headers, params=get_data) print(r.text)
sys.stdout.write( "\r [%i/%i] %s %s..." % (tweet_count, tweet_length, "Copying" if can_be_copied else "Downloading", url)) sys.stdout.write("\033[K") # Clear the end of the line sys.stdout.flush() if can_be_copied: copyfile(earlier_archive_path + local_filename, local_filename) else: while not downloaded: # Actually download the file! try: urlretrieve(better_url, local_filename) except: download_tries = download_tries - 1 if download_tries == 0: print("") print( "Failed to download %s after 3 tries." % better_url) print("Please try again later?") sys.exit() time.sleep(5) # Wait 5 seconds before retrying else: downloaded = True # Change the URL so that the archive's index.html will now point to the # just-download local file...
if not os.path.isdir(args.directory): print('The specified output directory could not be accessed.') sys.exit(1) chromedriver_version = get_chromedriver_version(chrome_version) system = get_system() url = 'https://chromedriver.storage.googleapis.com/{}/chromedriver_{}.zip' \ .format(chromedriver_version, system) print('Downloading chromedriver v{} for Chrome v{} on {}...'.format( chromedriver_version, chrome_version, system)) try: file, headers = urlretrieve(url) except URLError as e: print('The chromedriver download URL could not be accessed: {}'.format(e)) sys.exit(1) # Unzip chromedriver print('Extracting chromedriver...') found = False fp = open(file, 'rb') z = zipfile.ZipFile(fp) for name in z.namelist(): if (system == 'win32' and name == 'chromedriver.exe') or \ (system != 'win32' and name == 'chromedriver'): z.extract(name, args.directory) found = True
if len(sys.argv) > 1 and sys.argv[1] == 'sdist': # exclude the weight files in sdist weight_files = [] else: if not os.path.exists(model_dir): os.makedirs(model_dir) # in all other cases, download and decompress weight files for weight_file in weight_files: weight_path = os.path.join(model_dir, weight_file) if not os.path.isfile(weight_path): compressed_file = weight_file + '.gz' compressed_path = os.path.join(model_dir, compressed_file) if not os.path.isfile(compressed_file): print('Downloading weight file {} ...'.format(compressed_file)) urlretrieve(base_url + compressed_file, compressed_path) print('Decompressing ...') with open(weight_path, 'wb') as target: try: with gzip.open(compressed_path, 'rb') as source: target.write(source.read()) except OSError: # Handle symlinks with open(compressed_path) as symlink: # Github raw stores symlinks as text files, so we need # to read it to check the text real_compressed_file = symlink.read() os.remove(compressed_path) msg = '{} is symlink, downloading {} ...' print(msg.format(compressed_file, real_compressed_file))
def download(self): urlretrieve(self.uri, self.destination, self._progress_callback) if self.progress_bar: self.progress_bar.finish()
async def speedtest(event): arg_from_event = event.pattern_match.group(1) chat = await event.get_chat() share_as_pic = True if arg_from_event.lower() == "pic" else False if share_as_pic: # if speedtest is send to a group and send media is # not allowed then skip 'pic' argument if (hasattr(chat, "default_banned_rights") and not chat.creator and not chat.admin_rights and chat.default_banned_rights.send_media): share_as_pic = False # disable process = None all_test_passed = False check_mark = u"\u2705" warning = u"\u26A0" try: process = (f"**Speedtest by Ookla**\n\n" f"- {msgRep.SPD_TEST_SELECT_SERVER}...") await event.edit(process) s = Speedtest() s.get_best_server() process = (f"**Speedtest by Ookla**\n\n" f"- {msgRep.SPD_TEST_SELECT_SERVER} {check_mark}\n" f"- {msgRep.SPD_TEST_DOWNLOAD}...") await event.edit(process) s.download() process = (f"**Speedtest by Ookla**\n\n" f"- {msgRep.SPD_TEST_SELECT_SERVER} {check_mark}\n" f"- {msgRep.SPD_TEST_DOWNLOAD} {check_mark}\n" f"- {msgRep.SPD_TEST_UPLOAD}...") await event.edit(process) s.upload() process = (f"**Speedtest by Ookla**\n\n" f"- {msgRep.SPD_TEST_SELECT_SERVER} {check_mark}\n" f"- {msgRep.SPD_TEST_DOWNLOAD} {check_mark}\n" f"- {msgRep.SPD_TEST_UPLOAD} {check_mark}") all_test_passed = True if share_as_pic: s.results.share() result = s.results.dict() if not result: await event.edit(process + "\n\n" + f"`{msgRep.SPD_FAILED}: {msgRep.SPD_NO_RESULT}`") return except MemoryError as me: log.error(me) if not all_test_passed: process = process[:-3] + f" {warning}" await event.edit(process + "\n\n" + f"`{msgRep.SPD_FAILED}: {msgRep.SPD_NO_MEMORY}`") else: await event.edit(process + "\n\n" + f"`{msgRep.SPD_FAILED}: {msgRep.SPD_NO_MEMORY}`") return except Exception as e: log.error(e) if not all_test_passed: process = process[:-3] + f" {warning}" await event.edit(process + "\n\n" + msgRep.SPD_FAILED) else: await event.edit(process + "\n\n" + msgRep.SPD_FAILED) return if share_as_pic: try: await event.edit(process + "\n\n" + f"{msgRep.SPD_PROCESSING}...") png_file = path.join(getConfig("TEMP_DL_DIR"), "speedtest.png") urlretrieve(result["share"], png_file) await event.client.send_file(chat.id, png_file) await event.delete() remove(png_file) except Exception as e: log.error(e) await event.edit(msgRep.SPD_FAIL_SEND_RESULT) else: # Convert speed to Mbit/s down_in_mbits = round(result["download"] / 10**6, 2) up_in_mbits = round(result["upload"] / 10**6, 2) # Convert speed to MB/s (real speed?) down_in_mb = round(result["download"] / ((10**6) * 8), 2) up_in_mb = round(result["upload"] / ((10**6) * 8), 2) time = parse(result["timestamp"]) ping = result["ping"] isp = result["client"]["isp"] host = result["server"]["sponsor"] host_cc = result["server"]["cc"] text = "<b>Speedtest by Ookla</b>\n\n" text += (f"<b>{msgRep.SPD_TIME}</b>: " f"<code>{time.strftime('%B %d, %Y')} - " f"{time.strftime('%H:%M:%S')} {time.tzname()}</code>\n") text += (f"<b>{msgRep.SPD_DOWNLOAD}</b>: " f"<code>{down_in_mbits}</code> " f"{msgRep.SPD_MEGABITS} (<code>{down_in_mb}</code> " f"{msgRep.SPD_MEGABYTES})\n") text += (f"<b>{msgRep.SPD_UPLOAD}</b>: " f"<code>{up_in_mbits}</code> {msgRep.SPD_MEGABITS} " f"(<code>{up_in_mb}</code> {msgRep.SPD_MEGABYTES})\n") text += f"<b>{msgRep.SPD_PING}</b>: <code>{ping}</code> ms\n" text += f"<b>{msgRep.SPD_ISP}</b>: {isp}\n" text += f"<b>{msgRep.SPD_HOSTED_BY}</b>: {host} ({host_cc})\n" await event.edit(text, parse_mode="html") return
) # a new folder where everything for today's date will eventually be stored write_folder = "C:/Users/samihoch/" + today os.mkdir(write_folder) arcpy.management.CreateFileGDB(write_folder, "Drought.gdb") arcpy.env.workspace = os.path.join(write_folder, "Drought.gdb") # copy a blank map template into the new folder blank_map = "C:/Users/samihoch/BlankMap/BlankMap.aprx" shutil.copyfile(blank_map, write_folder + "/DroughtMap.aprx") # download the raw data into the folder for today write_filename = write_folder + "/Drought.gdb/most_recent.geojson" response = request.urlretrieve(url, write_filename) print("Downloading data from noaa") json_file = open(write_filename) data_raw = json.load(json_file) # create a dictionary so that the json data can be properly read with open(write_filename) as json_file: data_raw = json.load(json_file) if not os.path.exists('C:\Temp'): os.makedirs('C:\Temp') arcpy.management.CreateFileGDB(r'C:\Temp', 'Live.gdb') arcpy.env.workspace = os.path.join(r'C:\Temp', 'Live.gdb')
def get_mkdir(self): jsonobj = json.loads(self.get_html().decode('utf-8')) # 列表页 - 图片 imgList = jsonpath.jsonpath(jsonobj, '$..img') # 列表页 - 价格 pricelist = jsonpath.jsonpath(jsonobj, '$..price') # 列表页 - 商品名 titleList = jsonpath.jsonpath(jsonobj, '$..title') # 列表页 - 商品id -- skuId skuIdList = jsonpath.jsonpath(jsonobj, '$..promotionInfo.skuId') # 商品价格 priceList = jsonpath.jsonpath(jsonobj, '$..price') # 商品品牌 brandList = jsonpath.jsonpath(jsonobj, '$..brandName') # 商品分类 categoryList = jsonpath.jsonpath(jsonobj, '$..thirdCatName') listdata = zip(titleList, imgList, pricelist, skuIdList, priceList, brandList, categoryList) for item in listdata: print(item) # 替换'/' import re strinfo = re.compile('/') itemdir = strinfo.sub('-', item[0]) print(itemdir) time.sleep(1) # 商品名称目录 if not os.path.exists(itemdir): os.makedirs(itemdir) else: print(itemdir + ' -- 目录已存在!') self.dataurl = '' # 存储本地主页图片链接地址 self.pimg = '' # 列表页 - 图片 # 文件夹和文件命名不能出现这9个字符:/ \ : * " < > | ? if os.path.exists(itemdir + '/' + item[1][-20:].replace( '/', '-').replace('\\', '-').replace(':', '-').replace( '*', '-').replace('"', '-').replace('<', '-').replace( '>', '-').replace('|', '-').replace('?', '-') + '.webp'): print('文件已存在!') # return 0 else: if item[1].startswith('//'): self.dataurl = "http:" + item[1] else: self.dataurl = item[1] try: req = request.Request(self.dataurl, headers=self.headers) reponse = request.urlopen(req) get_img = reponse.read() self.pimg = '/pimgs/' + itemdir + '/' + self.dataurl[ -20:].replace('/', '-').replace('\\', '-').replace( ':', '-').replace('*', '-').replace( '"', '-').replace('<', '-').replace( '>', '-').replace('|', '-').replace( '?', '-') + '.webp' with open( itemdir + '/' + self.dataurl[-20:].replace('/', '-').replace( '\\', '-').replace(':', '-').replace( '*', '-').replace('"', '-').replace( '<', '-').replace('>', '-').replace( '|', '-').replace('?', '-') + '.webp', 'wb') as fp: fp.write(get_img) except Exception as e: print(e) # 详情目录 if not os.path.exists(itemdir + '/详情'): os.makedirs(itemdir + '/详情') else: print('详情' + ' -- 目录已存在!') driver = webdriver.PhantomJS( executable_path='./phantomjs-2.1.1-macosx/bin/phantomjs') time.sleep(5) driver.get(self.detailurl + str(item[3])) time.sleep(5) driver.find_element_by_class_name('tipinfo').click() time.sleep(5) html = etree.HTML(driver.page_source) imglist = html.xpath('//img/@src') print(self.detailurl + str(item[3])) # 轮番图 lunfantu = html.xpath('//img[@class="detail-img"]/@src') # 猜你喜欢 # like = html.xpath('//img[@class="J_ItemImage recommend-img"]/@src') # 商品宣传图 xuanchuan = html.xpath( '//div[@class="J_descriptionDetail parameter"]//img/@src') # 规格 # 左边的参数名 leftspec = html.xpath( '//div[@class="left attr_key border-1px border-r border-b"]/text()' ) # 右边的参数值 rightspec = html.xpath( '//div[@class="left attr_value border-1px border-b"]/span/text()' ) spec = zip(leftspec, rightspec) # time.sleep(5) # print(driver.page_source) print(str(item[3])) print( "-------------------------- 轮播图 --------------------------------" ) print(lunfantu) print( "--------------------------- 规格 ---------------------------------" ) print(spec) print( "-------------------------- 介绍图 ---------------------------------" ) print(xuanchuan) print( "-------------------------- 主页图 ---------------------------------" ) print(self.dataurl) for simple in imglist: if not os.path.exists( itemdir + '/详情/' + simple[-20:].replace('/', '-'). replace('\\', '-').replace(':', '-').replace('*', '-'). replace('"', '-').replace('<', '-').replace('>', '-'). replace('|', '-').replace('?', '-') + '.webp'): request.urlretrieve( simple, itemdir + '/详情' + '/' + simple[-20:].replace('/', '-').replace( '\\', '-').replace(':', '-').replace( '*', '-').replace('"', '-').replace( '<', '-').replace('>', '-').replace( '|', '-').replace('?', '-') + ".webp") print("正在下载......") else: print('文件已存在!') # NOT # NULL # AUTO_INCREMENT, title # VARCHAR(1000), img # VARCHAR(1000), lunfanimg # VARCHAR(1000), spec # VARCHAR(1000), xcimg # VARCHAR(1000), # 插入数据库l # 判断数据库是否有skuId,有就不插入,无则插入 result = self.cur.execute( "select skuid from duodian WHERE skuid=" + str(item[3])) print(str(result) + '-----------------------') if result: print("数据库里面存在此数据") else: # 不存在,存数据 lunfantu1 = {} specpagram = {} xuanchuan1 = {} # 轮番图 for index1, item1 in enumerate(lunfantu): lunfantu1[index1] = item1 # 规格 speckey = 0 for itemspec in spec: specvalue = str(itemspec[0]) + '-' + str(itemspec[1]) specpagram[str(speckey)] = specvalue speckey += 1 # 介绍图 for index3, item3 in enumerate(xuanchuan): xuanchuan1[index3] = item3 # 存储本地图片链接地址 plunfantu = {} pxuanchuan = {} for pindex1, pitem1 in enumerate(lunfantu): plunfantu[pindex1] = '/pimgs/' + itemdir + '/详情/' + pitem1[ -20:].replace('/', '-').replace('\\', '-').replace( ':', '-').replace('*', '-').replace( '"', '-').replace('<', '-').replace( '>', '-').replace('|', '-').replace( '?', '-') + '.webp' for pindex2, pitem2 in enumerate(xuanchuan): pxuanchuan[ pindex2] = '/pimgs/' + itemdir + '/详情/' + pitem2[ -20:].replace('/', '-').replace('\\', '-').replace( ':', '-').replace('*', '-').replace( '"', '-').replace('<', '-').replace( '>', '-').replace('|', '-').replace( '?', '-') + '.webp' self.cur.execute( 'INSERT INTO ' + self.tablename + ' (title, img, lunfanimg, spec, xcimg,skuid,pimg, plunfanimg, pxcimg,categoryid,price,brandname,categoryname) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s,%s, %s, %s,%s)', (itemdir, self.dataurl, json.dumps(lunfantu1, ensure_ascii=False), json.dumps(specpagram, ensure_ascii=False), json.dumps(xuanchuan1, ensure_ascii=False), str(item[3]), self.pimg, json.dumps(plunfantu, ensure_ascii=False), json.dumps(pxuanchuan, ensure_ascii=False), '11386', '%.2f' % (item[4] / 100), str(item[5]), str(item[6]))) self.cur.connection.commit() print( "------------------------ 插入成功 ----------------------------------" )
def retrieve_to_file(url): filename, headers = urlretrieve(url) return filename, headers
import sys import io import os.path import urllib.request as req from bs4 import BeautifulSoup sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding='utf-8') url = 'https://www.weather.go.kr/weather/forecast/mid-term-rss3.jsp?stnId=159' rawdata = 'D:/6_PWork/5_inflearn/01_Python_Automation_and_GUI/Section4/forecast_hw.xml' if not os.path.exists(rawdata): req.urlretrieve(url, rawdata) xml = open(rawdata, 'r', encoding='utf-8').read() soup = BeautifulSoup(xml, 'html.parser') #print(soup.find_all('city')) info = {} for location in soup.find_all('location'): city = location.find('city').string #print(city) weather = location.find_all('tmn') #print(tmns) if not city in info: info[city] = [] for tmn in weather: info[city].append(tmn.string)
title = title_soup.get_text('title') #print(title) rename_title = title for symbol in cannot_mk_dir_list: rename_title = rename_title.replace(symbol, '') concert_path = f'{out_dir}/{rename_title}' if not os.path.exists(concert_path): os.mkdir(concert_path) pic_soup = li_soup.find('a', {'class': 'box-img'}) #print(pic_soup) img_soup = pic_soup.find('img') #print(img_soup) link = img_soup.get('data-src') print(link) urlretrieve('http://www.thaiticketmajor.com' + link, f"{concert_path}/img.jpg") date_soup = detail_soup.find('span') #print(date_soup) date = date_soup.get_text('span') print(date) detail = {} detail['title'] = title detail['date'] = date # record[] = .... with codecs.open(f'{concert_path}/detail.json', 'w', 'utf-8') as outfile: json.dump(detail, outfile, ensure_ascii=False)
#Imports from urllib.request import urlretrieve import re import datetime URL_PATH = 'https://s3.amazonaws.com/tcmg476/http_access_log' LOCAL_FILE = 'local_copy.log' total_requests = 0 year_count = 0 local_file, headers = urlretrieve(URL_PATH, LOCAL_FILE) FILE_NAME = 'path/to/file' #counts and matches for dates oct94_count = 0 nov94_count = 0 dec94_count = 0 jan94_count = 0 feb94_count = 0 mar94_count = 0 apr94_count = 0 may94_count = 0 jun94_count = 0 jul94_count = 0 aug94_count = 0 sep94_count = 0 oct95_count = 0 jan_match = 0 feb_match = 0 mar_match = 0
from urllib import request import datetime today = datetime.datetime.today() from bs4 import BeautifulSoup import ctypes code = request.urlopen('http://cn.bing.com').read().decode('utf-8') soup = BeautifulSoup(code, 'html.parser') imgLink = 'http://cn.bing.com' + soup.find(name='link', attrs={'id': 'bgLink' })['href'] filename = str(today.year) + ',' + str(today.month) + ',' + str( today.day) + '.png' request.urlretrieve(imgLink, filename) ctypes.windll.user32.SystemParametersInfoW( 20, 0, __import__('os').path.abspath('.') + '/' + filename, 0)
def kang(update: Update, context: CallbackContext): msg = update.effective_message user = update.effective_user args = context.args packnum = 0 packname = "a" + str(user.id) + "_by_" + context.bot.username packname_found = 0 max_stickers = 120 while packname_found == 0: try: stickerset = context.bot.get_sticker_set(packname) if len(stickerset.stickers) >= max_stickers: packnum += 1 packname = ( "a" + str(packnum) + "_" + str(user.id) + "_by_" + context.bot.username ) else: packname_found = 1 except TelegramError as e: if e.message == "Stickerset_invalid": packname_found = 1 kangsticker = "kangsticker.png" is_animated = False file_id = "" if msg.reply_to_message: if msg.reply_to_message.sticker: if msg.reply_to_message.sticker.is_animated: is_animated = True file_id = msg.reply_to_message.sticker.file_id elif msg.reply_to_message.photo: file_id = msg.reply_to_message.photo[-1].file_id elif msg.reply_to_message.document: file_id = msg.reply_to_message.document.file_id else: msg.reply_text("Yea, I can't kang that.") kang_file = context.bot.get_file(file_id) if not is_animated: kang_file.download("kangsticker.png") else: kang_file.download("kangsticker.tgs") if args: sticker_emoji = str(args[0]) elif msg.reply_to_message.sticker and msg.reply_to_message.sticker.emoji: sticker_emoji = msg.reply_to_message.sticker.emoji else: sticker_emoji = "🤔" if not is_animated: try: im = Image.open(kangsticker) maxsize = (512, 512) if (im.width and im.height) < 512: size1 = im.width size2 = im.height if im.width > im.height: scale = 512 / size1 size1new = 512 size2new = size2 * scale else: scale = 512 / size2 size1new = size1 * scale size2new = 512 size1new = math.floor(size1new) size2new = math.floor(size2new) sizenew = (size1new, size2new) im = im.resize(sizenew) else: im.thumbnail(maxsize) if not msg.reply_to_message.sticker: im.save(kangsticker, "PNG") context.bot.add_sticker_to_set( user_id=user.id, name=packname, png_sticker=open("kangsticker.png", "rb"), emojis=sticker_emoji, ) msg.reply_text( f"Sticker successfully added to [pack](t.me/addstickers/{packname})" + f"\nEmoji is: {sticker_emoji}", parse_mode=ParseMode.MARKDOWN, ) except OSError as e: msg.reply_text("I can only kang images m8.") print(e) return except TelegramError as e: if e.message == "Stickerset_invalid": makepack_internal( update, context, msg, user, sticker_emoji, packname, packnum, png_sticker=open("kangsticker.png", "rb"), ) elif e.message == "Sticker_png_dimensions": im.save(kangsticker, "PNG") context.bot.add_sticker_to_set( user_id=user.id, name=packname, png_sticker=open("kangsticker.png", "rb"), emojis=sticker_emoji, ) msg.reply_text( f"Sticker successfully added to [pack](t.me/addstickers/{packname})" + f"\nEmoji is: {sticker_emoji}", parse_mode=ParseMode.MARKDOWN, ) elif e.message == "Invalid sticker emojis": msg.reply_text("Invalid emoji(s).") elif e.message == "Stickers_too_much": msg.reply_text("Max packsize reached. Press F to pay respecc.") elif e.message == "Internal Server Error: sticker set not found (500)": msg.reply_text( "Sticker successfully added to [pack](t.me/addstickers/%s)" % packname + "\n" "Emoji is:" + " " + sticker_emoji, parse_mode=ParseMode.MARKDOWN, ) print(e) else: packname = "animated" + str(user.id) + "_by_" + context.bot.username packname_found = 0 max_stickers = 50 while packname_found == 0: try: stickerset = context.bot.get_sticker_set(packname) if len(stickerset.stickers) >= max_stickers: packnum += 1 packname = ( "animated" + str(packnum) + "_" + str(user.id) + "_by_" + context.bot.username ) else: packname_found = 1 except TelegramError as e: if e.message == "Stickerset_invalid": packname_found = 1 try: context.bot.add_sticker_to_set( user_id=user.id, name=packname, tgs_sticker=open("kangsticker.tgs", "rb"), emojis=sticker_emoji, ) msg.reply_text( f"Sticker successfully added to [pack](t.me/addstickers/{packname})" + f"\nEmoji is: {sticker_emoji}", parse_mode=ParseMode.MARKDOWN, ) except TelegramError as e: if e.message == "Stickerset_invalid": makepack_internal( update, context, msg, user, sticker_emoji, packname, packnum, tgs_sticker=open("kangsticker.tgs", "rb"), ) elif e.message == "Invalid sticker emojis": msg.reply_text("Invalid emoji(s).") elif e.message == "Internal Server Error: sticker set not found (500)": msg.reply_text( "Sticker successfully added to [pack](t.me/addstickers/%s)" % packname + "\n" "Emoji is:" + " " + sticker_emoji, parse_mode=ParseMode.MARKDOWN, ) print(e) elif args: try: try: urlemoji = msg.text.split(" ") png_sticker = urlemoji[1] sticker_emoji = urlemoji[2] except IndexError: sticker_emoji = "🤔" urllib.urlretrieve(png_sticker, kangsticker) im = Image.open(kangsticker) maxsize = (512, 512) if (im.width and im.height) < 512: size1 = im.width size2 = im.height if im.width > im.height: scale = 512 / size1 size1new = 512 size2new = size2 * scale else: scale = 512 / size2 size1new = size1 * scale size2new = 512 size1new = math.floor(size1new) size2new = math.floor(size2new) sizenew = (size1new, size2new) im = im.resize(sizenew) else: im.thumbnail(maxsize) im.save(kangsticker, "PNG") msg.reply_photo(photo=open("kangsticker.png", "rb")) context.bot.add_sticker_to_set( user_id=user.id, name=packname, png_sticker=open("kangsticker.png", "rb"), emojis=sticker_emoji, ) msg.reply_text( f"Sticker successfully added to [pack](t.me/addstickers/{packname})" + f"\nEmoji is: {sticker_emoji}", parse_mode=ParseMode.MARKDOWN, ) except OSError as e: msg.reply_text("I can only kang images m8.") print(e) return except TelegramError as e: if e.message == "Stickerset_invalid": makepack_internal( update, context, msg, user, sticker_emoji, packname, packnum, png_sticker=open("kangsticker.png", "rb"), ) elif e.message == "Sticker_png_dimensions": im.save(kangsticker, "PNG") context.bot.add_sticker_to_set( user_id=user.id, name=packname, png_sticker=open("kangsticker.png", "rb"), emojis=sticker_emoji, ) msg.reply_text( "Sticker successfully added to [pack](t.me/addstickers/%s)" % packname + "\n" + "Emoji is:" + " " + sticker_emoji, parse_mode=ParseMode.MARKDOWN, ) elif e.message == "Invalid sticker emojis": msg.reply_text("Invalid emoji(s).") elif e.message == "Stickers_too_much": msg.reply_text("Max packsize reached. Press F to pay respecc.") elif e.message == "Internal Server Error: sticker set not found (500)": msg.reply_text( "Sticker successfully added to [pack](t.me/addstickers/%s)" % packname + "\n" "Emoji is:" + " " + sticker_emoji, parse_mode=ParseMode.MARKDOWN, ) print(e) else: packs = "Please reply to a sticker, or image to kang it!\nOh, by the way. here are your packs:\n" if packnum > 0: firstpackname = "a" + str(user.id) + "_by_" + context.bot.username for i in range(0, packnum + 1): if i == 0: packs += f"[pack](t.me/addstickers/{firstpackname})\n" else: packs += f"[pack{i}](t.me/addstickers/{packname})\n" else: packs += f"[pack](t.me/addstickers/{packname})" msg.reply_text(packs, parse_mode=ParseMode.MARKDOWN) try: if os.path.isfile("kangsticker.png"): os.remove("kangsticker.png") elif os.path.isfile("kangsticker.tgs"): os.remove("kangsticker.tgs") except: pass
def run(self): while True: link, filename = self.links_filenames.get() urlretrieve(link, filename) self.links_filenames.task_done()
def download(from_url, to_file): if not os.path.isfile(to_file): req.urlretrieve(from_url, to_file)
from config import get_config from util.pointcloud import combine_point_clouds from util.misc import get_random_color BASE_URL = "http://node2.chrischoy.org/data/" DOWNLOAD_LIST = [(BASE_URL + "datasets/registration/", "redkitchen_000.ply"), (BASE_URL + "datasets/registration/", "redkitchen_010.ply"), (BASE_URL + "projects/DGR/", "ResUNetBN2C-feat32-3dmatch-v0.05.pth")] # Check if the weights and file exist and download if not os.path.isfile('redkitchen_000.ply'): print('Downloading weights and pointcloud files...') for f in DOWNLOAD_LIST: print(f"Downloading {f}") urlretrieve(f[0] + f[1], f[1]) if __name__ == '__main__': config = get_config() if config.weights is None: config.weights = DOWNLOAD_LIST[-1][-1] # preprocessing pcd0 = o3d.io.read_point_cloud(config.pcd0) pcd0.estimate_normals() pcd1 = o3d.io.read_point_cloud(config.pcd1) pcd1.estimate_normals() # registration dgr = DeepGlobalRegistration(config) T01 = dgr.register(pcd0, pcd1)
from urllib.request import urlretrieve from urllib.request import urlopen from bs4 import BeautifulSoup html = urlopen("http://www.pythonscraping.com") soup = BeautifulSoup(html, 'html.parser') imageLocation = soup.find('a', id='logo').find('img')['src'] urlretrieve(imageLocation, 'logo.jpg') #download the logo.jpg, and put it in the current directory
def extract_data(url, file_name): urlretrieve(url, file_name) json_data = open(file_name) return json.load(json_data)
opener.addheaders = [('User-agent', UserAgent().ie)] request.install_opener(opener) base = "https://search.naver.com/search.naver?where=image&sm=tab_jum&query=" quote = parse.quote_plus("벤츠") url = base + quote res = request.urlopen(url) savePath = BASE_DIR + ('/5-2/') try: if not (os.path.isdir(savePath)): os.makedirs(os.path.join(savePath)) except OSError as e: print("Folder creation failed!") else: print('Folder creation success!') soup = BeautifulSoup(res, "html.parser") imageSource = soup.select("div.img_area > a.thumb._thumb > img") for i, imageSource in enumerate(imageSource, 1): fullFileName = os.path.join(savePath, savePath + str(i) + '.png') print(i, imageSource) request.urlretrieve(imageSource['data-source'], fullFileName) print("download succeeded!")
def download_url(url, filename): """Download a file from url to filename, with a progress bar.""" with TqdmUpTo(unit="B", unit_scale=True, unit_divisor=1024, miniters=1) as t: urlretrieve(url, filename, reporthook=t.update_to, data=None) # nosec
from bs4 import BeautifulSoup import urllib.request as req import os.path # XML 다운로드 url = "http://www.kma.go.kr/weather/forecast/mid-term-rss3.jsp?stnId=108" # 파일로 저장 savename = "forecast.xml" if not os.path.exists(savename) : req.urlretrieve(url, savename) # BeautifulSoup 로 분석 xml = open(savename, "r", encoding="utf-8").read() soup = BeautifulSoup(xml, "html.parser") # 각 지역 확인 info = {} for location in soup.find_all("location") : name = location.find('city').string weather = location.find('wf').string if not (weather in info) : info[weather] = [] info[weather].append(name) # 출력 for weather in info.keys() : print("+", weather) for name in info[weather] : print("- ", name)
def _main(i, all_num, url, filename): print("[{}/{}] Downloading {} -> {}".format(i, all_num, url, filename)) request.urlretrieve(url, filename) pass
def download_files(self, folder, action=DOWNLOAD_UPDATED_REQUESTS, flat=False): run_time = time.strftime('%d%m%y%H%M%S', time.localtime()) log_file = os.path.join(folder, 'log-%s.txt' % run_time) self.log_infos, self.log_errors, self.log_links = '', '', '' paths = set() name = "" counter = 0 existing_update_time, new_update_time = None, {} request_updates_path = os.path.join(folder, 'requests-update-time.json') if os.path.isfile(request_updates_path): existing_update_time = json.load(open(request_updates_path, 'r', encoding='utf-8'), parse_int=True) if action >= self.DOWNLOAD_UPDATED_REQUESTS: os.rename(request_updates_path, request_updates_path.replace('.', '-bkp-%s.' % run_time)) for row in self.data: prev_name = name request_id, update_time, nom, num, title, file_type, file = row name = self.to_filename(title if title else 'No title').replace(' ', ' ') nom, file_type = self.to_filename(nom), self.to_filename(file_type) download_skipped_by_preprocessor, dir_name, file_name = self.preprocess(int(num), name, file_type) display_path = ' | '.join([nom, dir_name, file_name]) if download_skipped_by_preprocessor: self.log_info('SKIP: ' + display_path) continue dir_path = os.path.join(folder, dir_name) if flat else os.path.join(folder, nom, dir_name) try: is_img = False if not file: self.log_error('No file for %s.' % display_path) continue file = json.loads(file) new_update_time[request_id] = update_time # assuming it's the same for all request files if 'link' in file.keys(): # External site file_exists = file_name in [name.split('.', 1)[0] for name in os.listdir(dir_path)] \ if os.path.exists(dir_path) else False request_up_to_date = existing_update_time \ and str(request_id) in existing_update_time \ and existing_update_time[str(request_id)] == update_time if file_exists: self.log_info(display_path + ' exists. ', inline=True) if request_up_to_date: self.log_info('And the request did not update. Skipping...', head=False) else: self.log_info('And the request updated. You should update it!', head=False) if not file_exists or (file_exists and not request_up_to_date): link_dir_path = os.path.join(folder, dir_name) if flat else os.path.join(folder, nom, dir_name) if not os.path.exists(dir_path) \ and not os.path.exists(link_dir_path): os.makedirs(link_dir_path) successful_download = False if action >= self.DOWNLOAD_UPDATED_REQUESTS: self.log_info(("DL: " + file['link'] + " -> " + display_path)) successful_download = CloudDownloader.get(file['link'], os.path.join(dir_path, file_name)) if successful_download: self.log_info("[CLOUD OK] " + display_path) else: self.log_info("[CLOUD FAIL] " + display_path) self.log_link("%s -> %s" % (file['link'], display_path)) continue else: src_filename = file['filename'] if 'fileext' in file: file_ext = file['fileext'] else: file_ext = '.jpg' is_img = True if prev_name == name: counter += 1 file_name += '-' + str(counter) file_name += file_ext path = os.path.join(dir_path, file_name) file_url = 'http://' + parse.quote('%s.cosplay2.ru/uploads/%d/%d/%s' % (self.event_name, self.event_id, request_id, src_filename)) if is_img: file_url += '.jpg' # Yes, it works this way download_required = True if os.path.isfile(path) or os.path.isfile(path + '_'): # This makes a file invisible for extractor self.log_info(display_path + ' exists. ', inline=True) if action in (self.CHECK_UPDATES_ONLY, self.DOWNLOAD_UPDATED_REQUESTS) and existing_update_time: if str(request_id) in existing_update_time \ and existing_update_time[str(request_id)] == update_time: self.log_info('And the request did not update. Skipping...', head=False) download_required = False else: self.log_info('And the request updated. Updating...', head=False) else: self.log_info('Configured not to check or no data on updates. Skipping...', head=False) download_required = False if download_required: if path not in paths: paths.add(path) else: self.log_error("!!!! %s was about to overwrite. Check your SQL query!!!" % path) break self.log_info(("DL: " + file_url + " -> " + path), inline=True) if action >= self.DOWNLOAD_UPDATED_REQUESTS: if not os.path.isdir(dir_path): os.makedirs(dir_path) request.urlretrieve(file_url, path) self.log_info(' [OK]', head=False) else: self.log_info(' [READY]', head=False) except (TypeError, AttributeError, request.HTTPError) as e: print("[FAIL]", name + ":", e) if not os.path.isdir(folder): os.makedirs(folder) if action >= self.DOWNLOAD_UPDATED_REQUESTS: json.dump(new_update_time, open(request_updates_path, 'w', encoding='utf-8'), indent=4) with open(log_file, 'w', encoding='utf-8') as f: f.write("ERRORS:" + os.linesep + self.log_errors + os.linesep) f.write("LINKS:" + os.linesep + self.log_links + os.linesep) f.write("INFO:" + os.linesep + self.log_infos + os.linesep) if self.log_errors: print("\n--- ERRORS ---") print(self.log_errors) if self.log_links: print("\n--- LINKS ---") print(self.log_links)
import os from pathlib import Path from urllib.request import urlretrieve from collections import defaultdict import xml.etree.ElementTree as ET # import the countries xml file tmp = Path(os.getenv("TMP", "/tmp")) countries = tmp / 'countries.xml' if not countries.exists(): urlretrieve('https://bites-data.s3.us-east-2.amazonaws.com/countries.xml', countries) def get_income_distribution(xml=countries): """ - Read in the countries xml as stored in countries variable. - Parse the XML - Return a dict of: - keys = incomes (wb:incomeLevel) - values = list of country names (wb:name) """ dist = defaultdict(list) tree = ET.parse(countries) root = tree.getroot() for child in root: dist[child[4].text].append(child[1].text) return dist
time_values = [] for i in range(time_slots): if i == (time_slots - 1): new_data = data[counter:] time_values.append(dataFourier(new_data, fourier_slots)) else: new_data = data[counter:counter+data_slots] counter += data_slots time_values.append(dataFourier(new_data, fourier_slots)) return time_values for i in range(len(autisminput)): item = autisminput[i] dtft_output = dtft_output + item + "|" filename = item+".wav" urlretrieve("http://api.voicerss.org/?key=04f49802d32d442ca997d4d2ea76d3d5" "&hl=en-us&c=wav&src="+item, filename) rate, data = wav.read(filename) realitem = parentinput[i] timefingers = dataTimeDivandFourier(data, 300, 10) time_str_output = ','.join(str(x) for x in timefingers) dtft_output += time_str_output dtft_output += "|" dtft_output += realitem dtft_output += "&" dtft_output = dtft_output[:len(dtft_output) - 1]
def urllib_download(IMAGE_URL, ID, imgtype): from urllib.request import urlretrieve urlretrieve(IMAGE_URL, './image/' + ID + imgtype) return True
def checkDependencies(): #Check git retcode = subprocess.Popen(subprocess.list2cmdline(["git", "--version"]), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True).wait() if retcode != 0: logging.error( 'Lime requires git. Get it from http://git-scm.com/download') sys.exit(1) #Closure Library if not (os.path.exists(closure_dir) and os.path.exists(closure_deps_file)): print('Closure Library not found. Downloading to %s' % closure_dir) print('Please wait...') retcode = subprocess.Popen(subprocess.list2cmdline([ "git", "clone", "https://github.com/google/closure-library.git", closure_dir ]), shell=True).wait() if (retcode != 0): print('Failed to clone Closure Library via Git. Discontinuing.') sys.exit(1) #Box2D if not os.path.exists(box2d_dir): print('Box2DJS not found. Downloading to %s' % box2d_dir) print('Please wait...') retcode = subprocess.Popen(subprocess.list2cmdline([ "git", "clone", "https://github.com/thinkpixellab/pl.git", box2d_dir ]), shell=True).wait() if (retcode != 0): logging.error('Error while downloading Box2D. Discontinuing.') sys.exit(1) #External tools dir if not os.path.exists(extdir): os.mkdir(extdir) #Closure compiler if not os.path.exists(compiler_path): zip_path = os.path.join(extdir, 'compiler.zip') print('Downloading Closure Compiler: ') urlretrieve( "http://closure-compiler.googlecode.com/files/compiler-20130411.zip", zip_path, rephook) print('\nUnzipping...') zippedFile = zipfile.ZipFile(zip_path) zippedFile.extract('compiler.jar', extdir) zippedFile.close() print('Cleanup') os.unlink(zip_path) os.rename(os.path.join(extdir, 'compiler.jar'), compiler_path) #Closure Templates if not os.path.exists(soy_path): zip_path = os.path.join(extdir, 'soy.zip') print('Downloading Closure Templates(Soy):') urlretrieve( "http://closure-templates.googlecode.com/files/closure-templates-for-javascript-latest.zip", zip_path, rephook) print('\nUnzipping...') zippedFile = zipfile.ZipFile(zip_path) zippedFile.extract('SoyToJsSrcCompiler.jar', extdir) zippedFile.close() print('Cleanup') os.unlink(zip_path) if not os.path.exists(projects_path): open(projects_path, 'w').close() makeProjectPaths('')
def PolaRxS_batchDataDownloadToLocal(input_datetime): # start timer start_timer = time.time() #local path where the script and spreadsheet exists local_pwd = '/home/kibrom/kwork/sw-GNSS/fdl18_Frontiers/GNSS_data_derived_products' #sub directory to put raw data level1 = '/level1/' #CHAIN data labels spreadsheet data_labels = '/CHAIN_data_labels.xlsx' #how long is the downloading time download_time = 'data_download_runtimes/' #Python makes directories if they don't exist if not os.path.exists(local_pwd + level1): os.makedirs(local_pwd + level1) if not os.path.exists(local_pwd + level1 + download_time): os.makedirs(local_pwd + level1 + download_time) df_labels_PolaRxS = pd.read_excel(local_pwd + data_labels, sheet_name='PolaRxS_labels', header=None, usecols=[1]) # input python function to generate daily data here print('-------> working on datetime = {0}'.format(input_datetime)) thisdatetime = input_datetime thisdoy = thisdatetime.timetuple().tm_yday thisyr = thisdatetime.year thisdy = thisdatetime.day thismon = thisdatetime.month # Initialize the daily dataframe df_save = pd.DataFrame() # SET DESIRED DIRECTORY HERE save_data = local_pwd + level1 # File name to save the the full day of data to local disk filename_save = save_data + 'PolaRxS_CHAINdata__' + format( thisyr, '04') + '_' + format(thisdoy, '03') + '.csv' if os.path.isfile(filename_save): print('The data for this doy is available at: ') print(filename_save) sys.exit('We have data for this doy') #for h in range(0,1): for h in range(0, 24): print('this date = {0}'.format(thisdatetime)) hour_dir = '/gps/ismr/' + '{:04}'.format( int(thisyr)) + '/' + '{:03}'.format( int(thisdoy)) + '/' + '{:02}'.format( int(thisdatetime.hour + h)) + '/' print('this hour directory = {0}'.format(hour_dir)) #Get files for current hour try: ftp = ftplib.FTP("chain.physics.unb.ca") ftp.login("*****@*****.**", "4Kindahafti4") ftp.cwd(hour_dir) #List the files in the current directory files_thishour = ftp.nlst() except Exception as e: print( '\n-------unable to login, change to directory, or list files {0}' .format(hour_dir)) print('with error {0}--------\n'.format(e)) continue for s in range(len(files_thishour)): # print('this station file = {0}'.format(files_thishour[s])) # establish and make, if necessary, a local directory for the data local_dir = save_data # local_fn_and_dir = local_dir + files_thishour[s] local_fn_and_dir = local_dir + files_thishour[s][-18:] # print('local_fn_and_dir = {0}'.format(local_fn_and_dir)) # if not os.path.exists(local_dir): # os.makedirs(local_dir) # clean up the cache that may have been created by previous calls to urlretrieve urlcleanup() # download the data for the current hour if not os.path.isfile(local_fn_and_dir): urlretrieve( 'ftp://[email protected]:[email protected]/' + hour_dir[1:] + files_thishour[s], local_fn_and_dir) try: txt_thishour_thisfile = np.genfromtxt(local_fn_and_dir, delimiter=",", filling_values=99) # df_thishour_thisfile = pd.DataFrame(np.genfromtxt(local_fn_and_dir, delimiter=",", filling_values=99),columns=df_labels_PolaRxS[1].tolist()) #print np.shape(txt_thishour_thisfile) except: print('\n\n ***unable to read {} ***\n\n'.format( local_fn_and_dir)) continue thisabbr = local_fn_and_dir[-18:-15] # Remove KUG station due to bias if thisabbr == 'kug': # print('\n\n skipping kugc... \n\n') os.remove(local_fn_and_dir) continue if len(txt_thishour_thisfile) == 0: print('\n\n ***file is empty, continuing***\n\n') os.remove(local_fn_and_dir) continue os.remove(local_fn_and_dir) df_thishour_thisfile = pd.DataFrame( data=txt_thishour_thisfile, columns=df_labels_PolaRxS[0].tolist()) df_thishour_thisfile['CHAIN station'] = pd.Series( np.full((len(txt_thishour_thisfile[:, 0])), thisabbr)) # Concatenate the new dataframe to the existing dataframe df_save = pd.concat([df_save, df_thishour_thisfile]) del df_thishour_thisfile # Save the full day of data to local disk pd.DataFrame.to_csv(df_save, filename_save, na_rep='NaN') del filename_save # end timer end_timer = time.time() runtime_thisday = end_timer - start_timer np.savetxt(save_data + download_time + 'runtime__' + format(thisyr, '04') + '_' + format(thisdoy, '03') + '.txt', np.array(runtime_thisday).reshape(1, ), fmt='%.2f')