def download_all(vol_num, music_num): try: all_url = "http://luoo-mp3.kssws.ks-cdn.com/low/luoo/radio" + vol_num + "/" + music_num + ".mp3" wget.download(all_url) wget.filename_from_url(all_url) print("正在下载落网" + vol_num + "期第" + music_num + "首歌曲...") print("下载完成!") except: print("第" + music_num + "首歌曲下载失败")
def download_music(vol_num, download_num): try: down_url = "http://luoo-mp3.kssws.ks-cdn.com/low/luoo/radio" + vol_num + "/" + download_num + ".mp3" wget.download(down_url) wget.filename_from_url(down_url) print("正在下载落网" + vol_num + "期第" + download_num + "首歌曲") print("下载完成!") except: print("编号错误!!!")
def processWget(argDict): url = argDict['url'] savePath = argDict['-p'] argDict['isrecord'] = False #### check dir exists, if not create if os.path.exists( savePath ) == False: os.mkdir( savePath ) if os.path.exists( savePath ): file_name = wget.filename_from_url(url) file_full_path = '%s/%s'%(savePath, file_name) if os.path.exists( file_full_path ): argDict['file_name'] = file_full_path return False if file_name.find('.Z') != -1 or file_name.find('gz') != -1: file_full_path = '%s/%s'%(savePath, file_name.replace('.Z', '')) if file_full_path.find('.gz') != -1: file_full_path = '%s/%s'%(savePath, file_name.replace('.gz','')) if os.path.exists( file_full_path ): argDict['file_name'] = file_full_path return False argDict['isrecord'] = True print '[url=%s, savePath=%s]'%(url, savePath) file_name = wget.download(url, savePath) argDict['file_name'] = file_name return True else: print 'can not create save path' return False
def get_titles(self): """ Return a list of titles grabbed from a Topix Newswire page. """ # grab topix content filename = wget.filename_from_url(self.topix_url) # get filename print "[DEBUG] Downloading from topix..." with open(wget.download(self.topix_url)) as raw: # download and open content = raw.readlines() # save content as list print "[DEBUG] Content saved." try: remove(filename) # remove downloaded file, if exist except: print "[DEBUG] Cannot download topix page." return 0 # filter results titles = [] # container for titles for line in content: if "<a t=\"artclick\"" in line: # find and filter out title titles.append(self.rmtags(line[:line.find("<img")]).strip()) pp(titles) # pretty print titles to console # return list of titles return titles
def _download_graphviz(url, verbose=3): """Import example dataset from github. Parameters ---------- url : str, optional url-Link to graphviz. The default is 'https://erdogant.github.io/datasets/graphviz-2.38.zip'. verbose : int, optional Print message to screen. The default is 3. Returns ------- tuple : (gfile, curpath). gfile : filename curpath : currentpath """ curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'RESOURCES') gfile = wget.filename_from_url(url) PATH_TO_DATA = os.path.join(curpath, gfile) if not os.path.isdir(curpath): if verbose >= 3: print('[treeplot] >Downloading graphviz..') os.makedirs(curpath, exist_ok=True) # Check file exists. if not os.path.isfile(PATH_TO_DATA): # Download data from URL if verbose >= 3: print('[treeplot] >Downloading graphviz..') wget.download(url, curpath) return (gfile, curpath)
def get_geo_names( url='https://erdogant.github.io/datasets/country_and_code.zip', verbose=3): """Import dataset from github. Parameters ---------- url : str url-Link to dataset. verbose : int, optional Print message to screen. The default is 3. Returns ------- tuple containing import status and resources. """ import wget import os curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url)) # Check file exists. if not os.path.isfile(PATH_TO_DATA): if verbose >= 3: print('[googletrends] Downloading resources..') wget.download(url, curpath) # Extract and import local dataset df = pd.read_csv(PATH_TO_DATA) df['code'] = df['code'].str.upper() # Return return df
def download_resources(url='https://erdogant.github.io/datasets/SVG_MAPS.zip', verbose=3): """Import example dataset from github. Parameters ---------- url : str url-Link to dataset. The default is 'https://erdogant.github.io/datasets/SVG_MAPS.zip'. verbose : int, optional Print message to screen. The default is 3. Returns ------- tuple containing import status and resources. """ import wget curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url)) # Check file exists. if not os.path.isfile(PATH_TO_DATA): if verbose>=3: print('[worldmap] Downloading resources..') wget.download(url, curpath) # Extract and import local dataset [DIROK, DIRMAP] = _extract_zip_files(PATH_TO_DATA) # Return return DIROK, DIRMAP
def get_image_name(self, image_url: Optional[str] = None, delimiter_key: Optional[str] = None, use_wget: bool = False) -> str: """ Builds image name from URL :param image_url: (str) Image URL :param delimiter_key: (str) - Token used to determine start of image name in url :param use_wget: (boolean) - For testing purposes or specific need, force use of wget. :return: (str) Name of URL """ image_url = image_url or self.image_url delimiter_key = delimiter_key or self.url_split_token LOG.debug(f"Image URL: {image_url}") if image_url is None: msg = 'Image Url is None.' self.status = Status.ERROR self.image_info.error_info = msg LOG.error(msg) return '' # Build regexp from key sig_comp = re.compile(delimiter_key) # Check if url has key (try two different ways) match = sig_comp.search(image_url) if match is not None and not use_wget: LOG.debug("Image name found via regex") image_name = image_url.split(delimiter_key, 1)[1] else: LOG.debug("Image name found via wget") image_name = wget.filename_from_url(image_url) if image_name is not None: image_name = image_name.split(delimiter_key, 1)[1] LOG.debug(f'Image URL: {image_url} Image_name: {image_name} ' f'delimiter: {delimiter_key}') # Didn't find the url or something bad happened if image_name is None: msg = f"Unable to get image_name from url: {image_url}" self.status = Status.ERROR self.image_info.error_info = msg LOG.error(msg) # Append the extension if it is not present (and image name is not an empty string) elif image_name != '' and not image_name.endswith(self.EXTENSION): image_name += f'.{self.EXTENSION}' LOG.debug(f"Image Name: {image_name}") return image_name
def getfilename_fromurl(url): """Get Destination filename for a downloaded file""" # Try to get destination Filename from Content Disposition tmprequest = urllib2.urlopen(url) filename = wget.filename_from_headers(tmprequest.info()) # Get filename from url if filename == None: filename = wget.filename_from_url(url) return filename
def install_lastools(): file_name = wget.filename_from_url(LASTOOLS_URL) if not os.path.exists(BLAST2DEM_EXE): print('lastools missing, downloading...') with closing(request.urlopen(LASTOOLS_URL)) as r: with open(file_name, 'wb') as f: shutil.copyfileobj(r, f) with zipfile.ZipFile(file_name, "r") as zip_ref: zip_ref.extractall("") os.remove(file_name)
def _get_wav_file(self, ix): print(ix) pod = self.pods_info[ix] pod_name = wget.filename_from_url(pod['content_url']) # pod_name = 'a182f2b0-e229-4035-82cd-77a7447068f9_2.wav' pod_name = os.path.join(self.config['PODS_DIR'], pod_name) # pod_aud = load_audio(pod_name, self.config['SAMPLING_RATE']) ads = len(pod['ads']) return pod_name, ads
def get_episode(self, episode_number): filename = wget.filename_from_url(self.file_urls[episode_number]) full_file_path = self.destination_dir + filename if (not os.path.isfile(full_file_path)): print "Downloading... " + filename file_url = self.file_urls[episode_number] print file_url wget.download(file_url, out=full_file_path) print "Done... Next!" else: print "Skipping ... " + filename
def get_pdf(link, directory='./'): global file_count file_name = wget.filename_from_url(link) file = s.get(link, stream=True) local_file = open(os.path.join(directory, file_name), 'wb') local_file.write(file.content) local_file.close() print("downloaded\x1b[1;32m",file_name,"\x1b[0m") file_count += 1
def guess_ContentType_from_url(url): filename = filename_from_url(url) filename = filename if filename and url.endswith( filename) else None # 排除连接到php的url if filename and "." in filename: filetype = filename.rsplit(".", 1)[-1] debug_suffix.add(filetype) else: filetype = "html" filetype = "html" if "htm" in filetype else filetype return (filename, filetype, -1, -1 ) # filename, filetype, filesize, connect_time
def fetch_aal3_vascular_atlas(target_affine=np.diag((5, 5, 5))): """ Fetch the AAL3 brain atlas given its resolution. Parameters ---------- target_affine : np.array, (default=np.diag((5, 5, 5))), affine matrix for the produced Nifti images Return ------ mask_full_brain : Nifti Image, full mask brain atlas_rois : Nifti Image, ROIs atlas """ data_dir = os.path.join(os.path.expanduser('~'), 'hemolearn_data') aal3_dir = os.path.join(data_dir, 'AAL3v1') if not os.path.exists(data_dir): os.makedirs(data_dir) if not os.path.exists(aal3_dir): os.makedirs(aal3_dir) url = 'https://www.gin.cnrs.fr/wp-content/uploads/AAL3v1_for_SPM12.zip' dest_filename = os.path.join(aal3_dir, wget.filename_from_url(url)) if not os.path.exists(os.path.join(aal3_dir, 'AAL3')): # download files wget.download(url, out=aal3_dir) # extract files with zipfile.ZipFile(dest_filename, 'r') as zip_ref: zip_ref.extractall(aal3_dir) # clean directory cmd = ( f"find {data_dir} -type f \( -iname \*.m -o " # noqa: W605 f"-iname \*.zip -o -iname \*.rtf -o -iname " # noqa: W605 f"\*.pdf \) -delete") # noqa: W605 subprocess.call(cmd, shell=True, stdout=subprocess.DEVNULL) atlas_fname = os.path.join(aal3_dir, 'AAL3', 'AAL3v1.nii.gz') atlas_to_return = image.load_img(atlas_fname) atlas_to_return = image.resample_img(atlas_to_return, target_affine, interpolation='nearest') brain_mask = image_nilearn.binarize_img(atlas_to_return, threshold=0) return brain_mask, atlas_to_return
def get_xml(): url = cfg.GOOGLE_FILE filename = wget.filename_from_url(url) for file in os.listdir(os.getcwd()): if file == filename: print('File, ' + file + ', already exists!') os.remove(file) print('File, ' + file + ', has been removed') xml = wget.download(url) print('File, ' + filename + ', has been downloaded')
def import_example(self, data='USA', verbose=3): """Import example dataset from github source. Description ----------- Import one of the few datasets from github source. Parameters ---------- data : str * 'USA' * 'RUS' verbose : int, (default: 3) Print message to screen. Returns ------- pd.DataFrame() Dataset containing mixed features. """ if data == 'USA': url = 'https://erdogant.github.io/datasets/USA_2016_elections.zip' elif data == 'RUS': url = 'https://erdogant.github.io/datasets/RUS_2018_elections.zip' else: if verbose >= 3: print( '[benfordslaw] >[%s] does not exists. Try "USA" or "RUS" <return>' % (data)) curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url)) if not os.path.isdir(curpath): os.makedirs(curpath, exist_ok=True) # Check file exists. if not os.path.isfile(PATH_TO_DATA): if verbose >= 3: print( '[benfordslaw] >Downloading [%s] dataset from github source..' % (data)) wget.download(url, curpath) # Import local dataset if verbose >= 3: print('[benfordslaw] >Import dataset [%s]' % (data)) df = pd.read_csv(PATH_TO_DATA, sep=',') # Return return df
def import_example(data='sprinkler', n=10000, verbose=3): """Load example dataset. Parameters ---------- data : str, (default: sprinkler) Pre-defined examples. 'titanic', 'sprinkler', 'alarm', 'andes', 'asia', 'pathfinder', 'sachs' n : int, optional Number of samples to generate. The default is 1000. verbose : int, (default: 3) Print progress to screen. 0: None, 1: ERROR, 2: WARN, 3: INFO, 4: DEBUG, 5: TRACE Returns ------- df : pd.DataFrame() """ import wget url = 'https://erdogant.github.io/datasets/' if data=='sprinkler': url=url + 'sprinkler.zip' elif data=='titanic': url=url + 'titanic_train.zip' else: try: DAG = import_DAG(data, verbose=2) df = sampling(DAG, n=n, verbose=2) except: print('[bnlearn] >Oops! Example dataset not found!') df = None return df curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url)) if not os.path.isdir(curpath): os.mkdir(curpath) # Check file exists. if not os.path.isfile(PATH_TO_DATA): if verbose>=3: print('[bnlearn] >Downloading example dataset..') wget.download(url, curpath) # Import local dataset if verbose>=3: print('[bnlearn] >Import dataset..') df = pd.read_csv(PATH_TO_DATA) return df
def download_pod(pod_url): """ Download a single pod based on pod_url Args: Returns: """ fname = wget.filename_from_url(pod_url) if not os.path.exists(os.path.join(PODS_DIR, fname)): try: print("Downloading ", fname) wget.download(pod_url, PODS_DIR, bar=None) # break except Exception as e: print(e, fname)
def get_targz(self): archivefile_payload = {'id' : self.pmcid} archivefile_locator = requests.get('http://www.pubmedcentral.nih.gov/utils/oa/oa.fcgi', params=archivefile_payload) record = BeautifulSoup(archivefile_locator.content) # parse response for archive file location archivefile_url = record.oa.records.record.find(format='tgz')['href'] archivefile_name = wget.filename_from_url(archivefile_url) complete_path_targz = os.path.join(self.parameters["data_dir"], archivefile_name) urllib.urlretrieve(archivefile_url, complete_path_targz) self.complete_path_targz = complete_path_targz # @TODO For some reason, wget hangs and doesn't finish, using # urllib.urlretrieve() instead for this for now. # archivefile = wget.download(archivefileurl, wget.bar_thermometer) self.phase['get_targz'] = True
def get_rawfile_from_github(raw_url: str, outdir: str = None) -> None: """ Download raw script from github. URL should point to the raw script off github Args: raw_url (str): url of raw script off github (should be publicly accessible) outdir (str): directory for saving the script; defaults to current path otherwise Example: >>> get_rawfile_from_github('https://raw.githubusercontent.com/beoutbreakprepared/nCoV2019/master/entry-checker.R','other_authors') """ from wget import download, filename_from_url filename = filename_from_url(raw_url) if outdir: outpath = outdir + '/' + filename else: outpath = filename download(raw_url, outpath)
def process_file(tasks, dem_files, dem_hs_files, process_name, options_dict, zf, multiDirectional, tile_pyramid_levels, tar_dir, dem_dir): ''' Download file using wget, extract dem from tar archive, and calculate stats ''' while True: url = tasks.get() if not isinstance(url, str): print('[%s] evaluation routine quits' % process_name) # Indicate finished dem_files.put(0) dem_hs_files.put(0) break else: out_file = join(tar_dir, wget.filename_from_url(url)) m_file = basename(out_file) root, ext = splitext(m_file) if ext == '.gz': root, ext = splitext(root) m_file = join(dem_dir, root + '_reg_dem.tif') m_ovr_file = join(m_file, ".ovr") m_hs_file = join(dem_dir, root + '_reg_dem_hs.tif') m_hs_ovr_file = join(m_hs_file, ".ovr") if options_dict['download']: if not exists(out_file) or overwrite: print('Processing file {}'.format(url)) out_file = wget.download(url, out=tar_dir) if options_dict['extract']: # Only extract if DEM file does not exists # FIXME this extracts all files?? extract_tar(out_file, dem_dir=dem_dir) if options_dict['build_tile_overviews']: if not exists(m_ovr_file) or overwrite: calc_stats_and_overviews(m_file, tile_pyramid_levels) if options_dict['build_tile_hillshade']: if not exists(m_hs_file) or overwrite: create_hillshade(m_file, m_hs_file, zf, multiDirectional) if options_dict['build_tile_hillshade_overviews']: if not exists(m_hs_ovr_file) or overwrite: calc_stats_and_overviews(m_hs_file, tile_pyramid_levels) dem_files.put(m_file) dem_hs_files.put(m_hs_file) return
def get_targz(self): try: archivefile_payload = {'id' : self.pmcid} archivefile_locator = requests.get('http://www.pubmedcentral.nih.gov/utils/oa/oa.fcgi', params=archivefile_payload) record = BeautifulSoup(archivefile_locator.content) # parse response for archive file location archivefile_url = record.oa.records.record.find(format='tgz')['href'] archivefile_name = wget.filename_from_url(archivefile_url) complete_path_targz = os.path.join(self.dirs.data_dir, archivefile_name) urllib.urlretrieve(archivefile_url, complete_path_targz) self.complete_path_targz = complete_path_targz # @TODO For some reason, wget hangs and doesn't finish, using # urllib.urlretrieve() instead for this for now. # archivefile = wget.download(archivefileurl, wget.bar_thermometer) except: raise ConversionError(message='could not get the tar.gz file from the pubmed', doi=self.doi)
def down_info(self): assert not self.is_html(), "Can't get down_info from html url!" filename = wget.filename_from_headers( self.headers) or wget.filename_from_url(self.url) filename = urllib.parse.unquote(filename) if filename and "." in filename: filetype = filename.rsplit(".", 1)[-1] else: suffix = re.search(r"plain|rar|zip|7z|mobi|epub|application" ) # applicaton/类下的后缀名基本不是文本文件(pdf,doc等不算) if suffix: suffix = suffix.group() suffix = suffix.replace("plain", "txt").replace("application", "app") else: assert False, "Unknown filetype:" + self.ContentType filetype = None if suffix is None else suffix filesize = int(self.headers.get('Content-Length', -1)) connect_time = self.elapse return filename, filetype, filesize, connect_time
def download_image(url, filepath): """Downloads an image, the output filepath is a hash of the url :url: The url to download :returns: The filename. If an error occured or the url passed does not point to an image, None is returned """ check = r'[^/\\&\?]+\.\w{3,4}(?=([\?&].*$|$))' match = re.search(check, url) if match: filename = wget.filename_from_url(url) if filepath[-1] != '/': filepath += '/' wget.download(url, filepath + filename, bar=lambda current, total, width: None) return filename return None
def downbakupfile(AccessKeyId, AccessKeySecret, args): client = AcsClient(AccessKeyId, AccessKeySecret, 'cn-shanghai', timeout=600) start_time = args.st end_time = args.et request = DescribeBackupsRequest() request.set_accept_format('json') request.set_DBInstanceId(args.i) # 新建目录并下载备份文件 if os.path.exists(args.d): pass else: os.mkdir(args.d) # 获取备份总数及每页显示数,计算页数 request.set_StartTime(start_time) request.set_EndTime(end_time) response = client.do_action_with_exception(request) pages = math.ceil(eval(str(response, encoding='utf-8'))['TotalRecordCount'] / eval(str(response, encoding='utf-8'))['PageRecordCount']) for page in range(1, pages + 1): request.set_PageNumber(page) response = client.do_action_with_exception(request) backupdetail = eval(str(response, encoding='utf-8'))['Items']['Backup'] for i in range(len(backupdetail)): bakfile_url = backupdetail[i]['BackupDownloadURL'] # 外网下载地址 # bakfile_url = backupdetail[i]['BackupIntranetDownloadURL'] # 内网下载地址 re_result = wget.filename_from_url(bakfile_url) bakfile = os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), args.d),re_result) if os.path.exists(bakfile): pass else: wget.download(bakfile_url, out=bakfile) print('下载文件 %s 成功' % bakfile) deletefile(args.d)
def download_nndb(self): """ Download fullnames from NNDB website, save into fullnames.dat. """ for i in xrange(26): print "\n[DEBUG] %d pages left..." % (26 - i) # url pattern for all NNDB pages based on last names url = "http://www.nndb.com/lists/%d/000063%d" % (493 + i, 304 + i) # download page, get raw data. fn = wget.filename_from_url(url) with open(wget.download(url)) as raw: content = raw.readlines() os.remove(fn) for line in content: if "nndb.com/people" in line: name = self.rmtags(line).replace("\n", "") with open(self.nndb_file, 'a') as w: w.write(name + "\n") print "[DEBUG] NNDB Download complete!"
def download(url: str, path: str, name: str = None, ext: str = None, timestamp: bool = False): working_directory = os.getcwd() if name is None: name = wget.filename_from_url(url) if name is None: name = str(uuid.uuid4()) if timestamp: name = f'{MediaTools.generate_date_string()}_{name}' if not os.path.exists(path): os.makedirs(path) file_path = os.path.join(path, name) filename = wget.download(url, file_path) os.chdir(working_directory)
def import_example(data='titanic', verbose=3): """Import example dataset from github source. Parameters ---------- data : str, optional Name of the dataset 'sprinkler' or 'titanic' or 'student'. verbose : int, optional Print message to screen. The default is 3. Returns ------- pd.DataFrame() Dataset containing mixed features. """ if data == 'sprinkler': url = 'https://erdogant.github.io/datasets/sprinkler.zip' elif data == 'titanic': url = 'https://erdogant.github.io/datasets/titanic_train.zip' elif data == 'student': url = 'https://erdogant.github.io/datasets/student_train.zip' curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url)) if not os.path.isdir(curpath): os.mkdir(curpath) # Check file exists. if not os.path.isfile(PATH_TO_DATA): if verbose >= 3: print('[pca] >Downloading example dataset from github source..') wget.download(url, curpath) # Import local dataset if verbose >= 3: print('[pca] >Import dataset [%s]' % (data)) df = pd.read_csv(PATH_TO_DATA) # Return return df
def get_targz(self): # make request for archive file location archivefile_payload = {'id': self.pmcid} archivefile_locator = requests.get( 'http://www.pubmedcentral.nih.gov/utils/oa/oa.fcgi', params=archivefile_payload) record = BeautifulSoup(archivefile_locator.content) # parse response for archive file location archivefile_url = record.oa.records.record.find(format='tgz')['href'] archivefile_name = wget.filename_from_url(archivefile_url) complete_path_targz = os.path.join(self.parameters["data_dir"], archivefile_name) # @TODO For some reason, wget hangs and doesn't finish # archivefile = wget.download(archivefileurl, wget.bar_thermometer) # Using urllib.urlretrieve() instead of wget for now: # Download targz urllib.urlretrieve(archivefile_url, complete_path_targz) self.complete_path_targz = complete_path_targz self.phase['get_targz'] = True
def get_project(browser, project, to_git=False): name = project.rpartition("/")[-1] if name.startswith("exam"): return if not os.path.isdir(name): os.mkdir(name) os.chdir(name) browser.get(project) while not page_is_loaded(browser, "projects"): continue for links in browser.find_elements_by_tag_name("a"): link = links.get_attribute("href") if link.startswith("https://ceph.assistants.epita.fr/") and not os.path.exists(wget.filename_from_url(link)): wget.download(link) while rows(browser) is True: for links in browser.find_elements_by_tag_name("a"): link = links.get_attribute("href") if link.startswith("https://ceph.assistants.epita.fr/") and not os.path.exists(wget.filename_from_url(link)): try: wget.download(link) except Exception as e: pass print() print(e) print("Error:") print(link) print(wget.filename_from_url(link)) print() if to_git: for git_links in browser.find_elements_by_tag_name("input"): git_link = git_links.get_attribute("value") if git_link.startswith("git@"): git.Git(".").clone(git_link) os.chdir("../")
#!/usr/bin/env python3 import os, wget from ase import Atom, Atoms from ase.build import bulk from ase.calculators.lammpsrun import LAMMPS #基于文件的ASE计算器 from ase.calculators.lammpslib import LAMMPSlib #基于LAMMPS原生的Python接口 # 势函数下载 url = "https://openkim.org/files/MO_418978237058_005/NiAlH_jea.eam.alloy" pot_fname = wget.filename_from_url(url) if not os.path.exists(pot_fname): pot_fname = wget.download(url) # 模型构建 Ni = bulk('Ni', cubic=True) H = Atom('H', position=Ni.cell.diagonal() / 2) NiH = Ni + H NiH.pbc = True # 开始计算 lammps = LAMMPS(files=[pot_fname], parameters={ 'pair_style': 'eam/alloy', 'pair_coeff': ['* * {} H Ni'.format(pot_fname)] }) lammps.set(command="/usr/bin/lmp") NiH.calc = lammps print("Energy ", NiH.get_potential_energy())
def _import_example(data='2dpeaks', url=None, sep=';', verbose=3): """Import example dataset from github source. Description ----------- Import one of the few datasets from github source or specify your own download url link. Parameters ---------- data : str Name of datasets: "2dpeaks" or "2dpeaks_image" url : str url link to to dataset. Verbose : int (default : 3) Print to screen. 0: None, 1: Error, 2: Warning, 3: Info, 4: Debug, 5: Trace. Returns ------- pd.DataFrame() Dataset containing mixed features. """ if url is not None: data = wget.filename_from_url(url) elif data == '2dpeaks_image': url = 'https://erdogant.github.io/datasets/' + data + '.png' elif data == '2dpeaks': url = 'https://erdogant.github.io/datasets/' + data + '.zip' elif data == '1dpeaks': x = [ 0, 13, 22, 30, 35, 38, 42, 51, 57, 67, 73, 75, 89, 126, 141, 150, 200 ] y = [ 1.5, 0.8, 1.2, 0.2, 0.4, 0.39, 0.42, 0.22, 0.23, 0.1, 0.11, 0.1, 0.14, 0.09, 0.04, 0.02, 0.01 ] # X = np.c_[x, y] return y else: if verbose >= 3: print('[findpeaks] >Nothing to download <return>.') return None curpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') PATH_TO_DATA = os.path.join(curpath, wget.filename_from_url(url)) if not os.path.isdir(curpath): os.makedirs(curpath, exist_ok=True) # Check file exists. if not os.path.isfile(PATH_TO_DATA): if verbose >= 3: print('[findpeaks] >Downloading from github source: [%s]' % (url)) wget.download(url, curpath) # Import local dataset if verbose >= 3: print('[findpeaks] >Import [%s]' % (PATH_TO_DATA)) if data == '2dpeaks_image': cv2 = stats._import_cv2() X = cv2.imread(PATH_TO_DATA) else: X = pd.read_csv(PATH_TO_DATA, sep=sep).values # Return return X
def main(): try: # standard flags parser = argparse.ArgumentParser(description = 'Command-line interface to jats-to-mediawiki.xslt, a script to ' + 'manage conversion of articles (documents) from JATS xml format ' + 'to MediaWiki markup, based on DOI or PMCID') parser.add_argument('-d', '--destination', default='articles/', help='path to destination directory for purposes of this script') parser.add_argument('-x', '--xmlcatalogfiles', default='dtd/catalog-test-jats-v1.xml', help='path to xml catalog files for xsltproc') # includes arbitrarily long list of keywords, or an input file parser.add_argument('-i', '--infile', nargs='?', type=argparse.FileType('r'),default=sys.stdin, help='path to input file', required=False) parser.add_argument('-o', '--outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout, help='path to output file', required=False) parser.add_argument('-a', '--articleids', nargs='+', default=None, help='an article ID or article IDs, either as DOIs or PMCIDs') args = parser.parse_args() # print args #debug # Handle and convert input values destination = args.destination xmlcatalogfiles = args.xmlcatalogfiles infile = args.infile outfile = args.outfile articleids = [] # add articleids if passed as option values if args.articleids: articleids.extend([to_unicode_or_bust(articleid) for articleid in args.articleids]) # add articleids from file or STDIN if not sys.stdin.isatty() or infile.name != "<stdin>": articleids.extend([to_unicode_or_bust(line.strip()) for line in infile.readlines()]) # De-duplicate by converting to set (unique) then back to list again articleids = list(set(articleids)) # set environment variable for xsltproc and jats dtd try: cwd = to_unicode_or_bust(os.getcwd()) if xmlcatalogfiles.startswith("/"): os.environ["XML_CATALOG_FILES"] = xmlcatalogfiles else: os.environ["XML_CATALOG_FILES"] = (cwd + to_unicode_or_bust("/") + to_unicode_or_bust(xmlcatalogfiles) ) except: print 'Unable to set XML_CATALOG_FILES environment variable' sys.exit(-1) # print "\n" + os.environ.get('XML_CATALOG_FILES') + "\n" #debug # create destination directory destination = cwd + "/" + to_unicode_or_bust(destination) try: if not os.path.exists(destination): os.makedirs(destination) except: print 'Unable to find or create temporary directory' sys.exit(-1) # separate DOIs and PMCIDs articledois = [i for i in articleids if re.match('^10*', i)] articlepmcids = [i for i in articleids if re.match('^PMC', i)] articlepmcidsfromdois = [] # Send DOIs through PMC ID converter API: # http://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/ if articledois: articledois = ",".join(articledois) idpayload = {'ids' : articledois, 'format' : 'json'} idconverter = requests.get( 'http://www.pubmedcentral.nih.gov/utils/idconv/v1.0/', params=idpayload) print idconverter.text records = idconverter.json()['records'] if records: articlepmcidsfromdois = [i['pmcid'] for i in records] # Extend PMCIDs with those from converted DOIs articlepmcids.extend(articlepmcidsfromdois) # De-duplicate with set to list conversion articlepmcids = list(set(articlepmcids)) print "\nArticle IDs to convert:\n" #debug print articlepmcids #debug # Main loop to grab the archive file, get the .nxml file, and convert for articlepmcid in articlepmcids: # @TODO make flag an alternative to .tar.gz archive download # use instead the regular API for xml document # http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=PMC2953622 # unclear if this front-facing XML is updated frequently # I recall from plos that updates are made via packaged archives # request archive file location archivefilepayload = {'id' : articlepmcid} archivefilelocator = requests.get( 'http://www.pubmedcentral.nih.gov/utils/oa/oa.fcgi', params=archivefilepayload) record = BeautifulSoup(archivefilelocator.content) # parse response for archive file location archivefileurl = record.oa.records.record.find( format='tgz')['href'] # download the file archivefilename = wget.filename_from_url(archivefileurl) if not os.path.exists(destination + archivefilename): urllib.urlretrieve(archivefileurl, destination + archivefilename) print "\nDownloading file..." else: print "\nFound local file, skipping download..." # @TODO For some reason, wget hangs and doesn't finish, using # urllib.urlretrieve() instead for this for now. # archivefile = wget.download(archivefileurl, wget.bar_thermometer) # open the archive archivedirectoryname, archivefileextension = archivefilename.split( '.tar.gz') if not os.path.exists(destination + archivedirectoryname): print "\nExtracting " + archivedirectoryname + " ..." tfile = tarfile.open(destination + archivefilename, 'r:gz') tfile.extractall(destination) else: print "\nFound local directory, skipping extraction..." # run xsltproc # @TODO use list comprehension instead for n in glob.glob(destination + archivedirectoryname + "/*.nxml"): nxmlfilepath = n print "\nConverting... " print nxmlfilepath xsltoutputfile = open(destination + articlepmcid + ".mw.xml", 'w') xslt_file = os.path.abspath( os.path.dirname(__file__)) + '/' + 'jats-to-mediawiki.xsl' xsltcommand = call( ['xsltproc', xslt_file, nxmlfilepath], stdout=xsltoutputfile) print "\nReturning results..." if xsltcommand == 0: print xsltoutputfile.name + "\n" else: print "xslt conversion: failure" sys.exit(-1) except KeyboardInterrupt: print "Killed script with keyboard interrupt, exiting..." except Exception: traceback.print_exc(file=sys.stdout) sys.exit(0)
import wget import tempfile import os url = 'https://p0.ifengimg.com/2019_30/1106F5849B0A2A2A03AAD4B14374596C76B2BDAB_w1000_h626.jpg' # 获取文件名 file_name = wget.filename_from_url(url) print(file_name) #1106F5849B0A2A2A03AAD4B14374596C76B2BDAB_w1000_h626.jpg # 下载文件,使用默认文件名,结果返回文件名 file_name = wget.download(url) print(file_name) #1106F5849B0A2A2A03AAD4B14374596C76B2BDAB_w1000_h626.jpg # 下载文件,重新命名输出文件名 target_name = 't1.jpg' file_name = wget.download(url, out=target_name) print(file_name) #t1.jpg # 创建临时文件夹,下载到临时文件夹里 tmpdir = tempfile.gettempdir() target_name = 't2.jpg' file_name = wget.download(url, out=os.path.join(tmpdir, target_name)) print(file_name) #/tmp/t2.jpg
url = str(link.get('href')) if "http://www.fab.mil.br/cabine/voos/" in url and ".pdf" in url: str_date = re.search('\d+', url).group(0) date = datetime.datetime.strptime(str_date, "%Y%m%d").date() flights.append(FileFlights(url, date)) # remove all files and create downloads directory if os.path.exists('downloads/'): os.popen('rm -f downloads/*') else: os.mkdir('downloads') # download all files for flight in flights: filename = wget.download(flight.url, 'downloads') filename = wget.filename_from_url(flight.url) filepath = 'downloads/' + filename fileobj = open(filepath,'rb') doc = PDFDocument(fileobj) result = get_tables_from_document(doc) # print result for r in result: for i in r: # print i res = [x for x in i if '' not in i] if len(res) > 0: print len(res), res # print [t.encode('utf-8') for t in i] # print i # a = []
def extract_ad(pod_info, create_ads=False, create_non_ads=False): """ extract ad from a given audio file based on timestamps Args: Returns: """ pod_info['fname'] = wget.filename_from_url(pod_info['content_url']) if os.path.exists(os.path.join(PODS_DIR, pod_info['fname'])): pod_meta = MP3_META(os.path.join(PODS_DIR, pod_info['fname'])) pod_file_length = float(pod_meta.info.length) try: pod_length_json = pod_info['content_duration'] if pod_length_json.count(':') == 1: pod_length = [float(x) for x in pod_length_json.split(':')] pod_length_json = pod_length[0]*60 + pod_length[1] elif pod_length_json.count(':') == 2: pod_length = [float(x) for x in pod_length_json.split(':')] pod_length_json = pod_length[0]*3600 + \ pod_length[1]*60 + pod_length[2] else: pod_length_json = float(pod_length_json) except ValueError as e: print(e, pod_info['fname']) pod_length_json = pod_file_length # if abs(pod_file_length-pod_length_json) < 5: if pod_file_length <= pod_length_json: # if True: # print('Extracting ad from {}'.format(pod_info['fname'])) if create_non_ads or create_ads: pod_aud, pod_sr = librosa.load(os.path.join( PODS_DIR, pod_info['fname']), sr=None) pod_aud, pod_sr = preprocess_aud(pod_aud, pod_sr) # return 1 aud_len = len(pod_aud) ad_slices = [] non_ad_aud = np.array([]) ad_stop = 0 for i, ad in enumerate(pod_info['ads']): ad_slice = slice( floor(int(ad['ad_start'])*pod_sr), ceil((int(ad['ad_end'])+1)*pod_sr)) ad_aud = pod_aud[ad_slice.start:ad_slice.stop] ad_slices.append(ad_slice) non_ad_aud = np.append( non_ad_aud, pod_aud[ad_stop:ad_slice.start]) ad_stop = ad_slice.stop ad_fname = os.path.join(ADS_DIR, "{}_{}.wav".format( pod_info['fname'].split('.')[0], i)) if not os.path.exists(ad_fname) and create_ads: soundfile.write(ad_fname, ad_aud, pod_sr, format='WAV') if ad_slice.stop < aud_len: non_ad_aud = np.append(non_ad_aud, pod_aud[ad_slice.stop:]) # print(ad_slices) ad_ranges = [x for y in [list(range(ad_slice.start, ad_slice.stop)) for ad_slice in ad_slices] for x in y] try: assert len(ad_ranges)+len(non_ad_aud) <= aud_len non_ad_fname = os.path.join(NON_ADS_DIR, "{}_content.wav".format( pod_info['fname'].split('.')[0])) if not os.path.exists(non_ad_fname) and create_non_ads: soundfile.write(non_ad_fname, non_ad_aud, pod_sr, format='WAV') except AssertionError as ae: print("{} Aud,ad length mismatch".format(pod_info['fname']), len(ad_ranges), len(non_ad_aud), aud_len, aud_len-len(ad_ranges)+len(non_ad_aud)) else: print('Skipping {} length mismatch'.format(pod_info['fname']))
# print(file_name) # audio_request = requests.get(obj_link) # with open("D:\\Downloads\\" + file_name, "wb") as file: # file.write(audio_request.content) # print(audio_request.status_code) # print(audio_request.headers["content-type"]) # print(audio_request.encoding) #==================================== dir_to_save = "/" if sys.platform == "win32": dir_to_save = "D:\\Downloads\\" elif sys.platform == "linux": dir_to_save = "/storage/sdcard0/Download/" print("Directory to save: ", dir_to_save) if not os.path.exists(dir_to_save): print("Directory to save not found!") quit() file_name = wget.filename_from_url(obj_link) if not os.path.exists(dir_to_save + file_name): wget.download(obj_link, dir_to_save + file_name) else: print("This file already downloaded!") quit()
def path_and_filename_from_url(url, data_dir): """ Given a URL and directory, return the filename in that dir """ filename = wget.filename_from_url(url) filename = os.path.join(data_dir, filename) return filename