def Img(html): photo = r'src="(.*?\.jpg)" width' image = re.compile(photo) imagelist = re.findall(image,html) x = 0 for j in imagelist: urllib2.urlretrieve(j, '%s.jpg' % x) x += 1
def DownloadFile(url, save_file): def reporthook(bnum, bsize, tsize): progress = bnum * bsize sys.stdout.write("\rDownloaded: {n} bytes{blank}".format( n=progress, blank=" " * (15 - len(str(progress)))),) sys.stdout.flush() try: urlrequest.urlretrieve(url, save_file, reporthook) print("") return True except: return False
def getContent(content): #利用正则来进行匹配 pattern=re.compile(r'src="(.*?)" pic_ext=') items=re.findall(pattern,content) count=0 for item in items: count++ urllib2.urlretrieve(item,"%s.jpg" % count)
def fetch_arrhythmia(data_home=None, download_if_missing=True): """Fetcher for xxxxxxxxxxxxxxxxxxxxx. Parameters ---------- data_home : optional, default: None Specify another download and cache folder for the datasets. By default the original datasets for this `data_balance` study are stored at `../data/raw/` subfolders. download_if_missing: optional, True by default If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. """ data_home = get_dataset_home(data_home=data_home) if not exists(data_home): makedirs(data_home) print('downloading Arrhythmia data from %s to %s' % (DATA_URL, data_home)) urlretrieve(DATA_URL, join(data_home,'data.csv'))
def downloadSources(self): if( os.path.exists(self.installPath) ): return trymakedir( self.installPath+'/sources' ) os.chdir( os.path.dirname(self.installPath) ) import urllib2 as urllib if self.version == '2005' : # cernlib fix from Harald Vogt: http://www-zeuthen.desy.de/~hvogt/ urllib.urlretrieve( "http://www-zeuthen.desy.de/linear_collider/cernlib/new/cernlib-2005-all-new.tgz", "cernlib-2005-all-new.tgz" ) urllib.urlretrieve( "http://www-zeuthen.desy.de/linear_collider/cernlib/new/cernlib.2005.corr.2009.06.13.tgz", "cernlib.2005.corr.2009.06.13.tgz" ) if( os.system( "tar xzf cernlib-2005-all-new.tgz") != 0 ): self.abort("failed to extract cernlib sources") # use more recent corrections (64 bit compatible) os.system( "mv cernlib.2005.corr.tgz cernlib.2005.corr.tgz-old && ln -s cernlib.2005.corr.2009.06.13.tgz cernlib.2005.corr.tgz") elif self.version == '2006' : # binary tarballs #if platform.architecture()[0] == '64bit': # if Version( self.parent.debugInfo['GCC_VERSION'] )[:2] == (4,1) : # ... ## download index.html #if( os.system( "wget " + self.download.url ) != 0 ): # self.abort( "Problems ocurred downloading sources!!") ## parse index.html for extracting source tarballs #src_tarballs = getoutput( r"grep tar.gz index.html | sed -e 's/.*href=\"\(.*\)\".*/\1/'" ).split('\n') ## index.html no longer needed #os.unlink( "index.html" ) #index_html=urllib.urlopen( self.download.url ).read() #import re #regex=re.compile( 'href="(.*)"' , re.VERBOSE ) #hrefs=regex.findall( index_html ) #src_tarballs=[ i.strip() for i in hrefs if i.strip()[-7:] == '.tar.gz' ] #for tarball in src_tarballs: # print 'downloading:', self.download.url + tarball # urllib.urlretrieve( self.download.url + tarball, tarball ) # print 'extracting:', tarball # os.system( "tar xzf " + tarball ) # os.system( "mv %s %s/sources" % (tarball, self.installPath) ) tarballs = [ '2006_src.tar.gz', 'include.tar.gz' ] for tarball in tarballs: print 'downloading:', tarball urllib.urlretrieve( self.download.url + tarball, tarball ) if os.system( "tar xzf " + tarball ) != 0: self.abort( 'failed to extract '+ tarball ) os.system( "mv " + tarball + " " + self.installPath+'/sources' )
def check_fetch_data(dataset_raw_home=None, base_url='', target_filenames=[], dataset_name='', download_if_missing=True): """ Helper function for downloading any missing data of the dataset. Parameters ---------- dataset_raw_home : Specify the folder for downloading the data of the dataset. the original datasets for this `data_balance` study are stored at `../data/raw/` subfolders. base_url: string containing the base url for fetching. target_filenames: list of the files that need to be download. download_if_missing: optional, True by default If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. #TODO: create a test for download_if_missing """ #TODO: assert url directory #TODO: assert no empty list if not exists(dataset_raw_home): makedirs(dataset_raw_home) for target in target_filenames: path = join(dataset_raw_home, target) if not exists(path): if download_if_missing: full_url = join(base_url, target) print('downloading %s data from %s to %s' % (RAW_DATA_LABEL, full_url, dataset_raw_home)) urlretrieve(full_url, path) else: raise IOError('%s is missing' % path)
def fetch_coil_2000(data_home=None, download_if_missing=True): """Fetcher for the CoIL 2000 dataset. Parameters ---------- data_home : optional, default: None Specify another download and cache folder for the datasets. By default the original datasets for this `data_balance` study are stored at `../data/raw/` subfolders. download_if_missing: optional, True by default If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. """ data_home = join(get_data_home(data_home=data_home), 'coil_2000') if not exists(data_home): makedirs(data_home) for target_filename in TARGET_FILENAME_: path = join(data_home, target_filename) if not exists(path): url = join(DATA_URL, target_filename) print('downloading Coil 2000 from %s to %s' % (url, data_home)) urlretrieve(url, path)
sql_query = """ SELECT DISTINCT * FROM photo_data_table WHERE datetaken >= '{day}'::date AND datetaken < ('{day}'::date + '1 day'::interval) AND latitude > {lat_min} AND latitude < {lat_max} AND longitude > {lon_min} AND longitude < {lon_max} ORDER BY views; """.format(day='10-%02d-11' %date, lon_min=-74.3, lat_min=40.5, lon_max=-73.64,lat_max=40.94) photos = pd.read_sql_query(sql_query,con) # print 'hour: ', i, photo_data_from_sql.shape[0], 'hits' for url in photos['url_s']: print url photo_name = url, '10-%d/%s' %(date, url.split('/')[-1]) if not os.path.exists(photo_name[1]): urllib.urlretrieve(*photo_name) # correction # 12: 1 # 13: 1 # 14: 2 # 15: 10 # 23: 129 # 30: 87
url01 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp01totals.zip" url02 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp02totals.zip" url03 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp03totals.zip" url04 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp04totals.zip" url05 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp05totals.zip" url06 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp06totals.zip" url07 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp07totals.zip" url08 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp08totals.zip" url09 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp09totals.zip" url10 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp10totals.zip" url11 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp11totals.zip" url12 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp12totals.zip" url13 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp13totals.zip" url14 = "https://github.com/fedhere/PUI2018_fb55/raw/master/HW12_fb55/zbp14totals.zip" urllib.urlretrieve(url94, "zbp94totals.zip") urllib.urlretrieve(url95, "zbp95totals.zip") urllib.urlretrieve(url96, "zbp96totals.zip") urllib.urlretrieve(url97, "zbp97totals.zip") urllib.urlretrieve(url98, "zbp98totals.zip") urllib.urlretrieve(url99, "zbp99totals.zip") urllib.urlretrieve(url00, "zbp00totals.zip") urllib.urlretrieve(url01, "zbp01totals.zip") urllib.urlretrieve(url02, "zbp02totals.zip") urllib.urlretrieve(url03, "zbp03totals.zip") urllib.urlretrieve(url04, "zbp04totals.zip") urllib.urlretrieve(url05, "zbp05totals.zip") urllib.urlretrieve(url06, "zbp06totals.zip") urllib.urlretrieve(url07, "zbp07totals.zip") urllib.urlretrieve(url08, "zbp08totals.zip") urllib.urlretrieve(url09, "zbp09totals.zip")
def load_photo(url): file, mime = urllib.urlretrieve(url) photo = Image.open(file) return photo
'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko' }) rep = urllib2.urlopen(req) cont = rep.read().decode('utf-8') pat = re.compile('<div.*?id="title">(.*?)</h1>', re.S) fname = re.search(pat, cont).group(1) fname = fname.strip().lstrip().rstrip().replace('<h1>', '') # J'Ai Deux Amours转码为J'Ai Deux Amours fname = html.parser.unescape(fname) fname = fname.split("<a target")[0] fname = str(i + 1) + "_" + fname print("歌曲名为: " + fname + " 开始下载") xi = XiamiDownload(url) if xi.url_location == "exception": continue url_download = xi.get_url() url_pic = xi.pic url_lyc = xi.lyc print('下载地址是: ' + url_download) try: urllib2.urlretrieve(url_download, fname + '.mp3') urllib2.urlretrieve(url_pic, fname + '.jpg') urllib2.urlretrieve(url_lyc, fname + '.lyc') except: continue print("完成下载...")
import urllib2 i = 1 #change it to any value according to your url while True: chr = str(i) ret = urllib2.urlopen("your URL" + chr + ".jpg") #this checks for response if ret.code == 200: urllib2.urlretrieve("your url" + chr + ".jpg", chr + ".jpg") #this downloads in your current directory i = i + 1 #its infinite loop unless u want to make it finite
def download(links, url): for link in links: urllib2.urlretrieve(url+link)
import tweepy import urllib2 # twitter crap consumer_key = "uGrShu4GnN5TOTQHLU7d61aIm" consumer_secret = "RWpcnM191iY0zsxeXeENZ3O2PayhNtuH54hKWo4xrgH8tz9XJe" access_token = "4717423181-ZiIHqKgOyxQ66MXSdTGORTXgICphU047mIdVy8W" access_token_secret = "WUzTfkG7KdAYFYsv3o1iPg2hUYapS0qf9IdspgzQ1h3oS" auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) # Get the image req = urllib2.Request('http://inspirobot.me/api?generate=true', headers={ 'User-Agent': 'Mozilla/5.0' }) img_src = urllib2.urlopen(req).read() urllib2.urlretrieve(img_src, "new_image.jpg") api.update_with_media("new_image.jpg")
def loadFile(url, destination): print("Loading file " + url) file, message = urllib.urlretrieve(url) shutil.copy(file, destination) os.remove(file)
def getImg(html,i): imgre = re.compile(r'(?i)src="(\/U.+?\.jpg)') imglist = imgre.findall(html) my_str =str(i)+ '_' for num in range(len(imglist)): urllib2.urlretrieve("http://www.ulux.cn/"+imglist[num],'%s.jpg' % (my_str+str(num)))
import os import sys import urllib2 if len(sys.argv) == 3 and sys.argv[1] == '-u': response = urllib2.urlretrieve(sys.argv[2], 'tmp.pdf') filename = 'tmp.pdf' elif len(sys.argv) == 3: print("Invalid option.") sys.exit() else: filename = sys.argv[1] try: os.system("pdftotext -raw -enc UTF-8 " + filename + " tmp.txt") os.system("python3 process.py tmp.txt") except IOError: sys.exit()
def urlsave(link, filename): urllib2.urlretrieve(link, filename) return "Saved " + link + " as " + filename
import urllib2 i=1 #change it to any value according to your url while True: chr=str(i) ret = urllib2.urlopen("your URL"+chr+".jpg") #this checks for response if ret.code == 200: urllib2.urlretrieve("your url"+chr+".jpg",chr+".jpg") #this downloads in your current directory i=i+1 #its infinite loop unless u want to make it finite
def download(link, dirname): if not os.path.isdir(dirname): os.makedirs(dirname) urlretrieve(link, dirname + "/" + link.split("#")[0].split("/")[-1])
def download(url, filePath): try: urlretrieve(url, filePath) except IOError: ssl._create_default_https_context = ssl._create_unverified_context urlretrieve(url, filePath)