def site_reachable(url): try: urlopen(url, timeout=1) except (URLError, socket.timeout): return False else: return True
def url_exists(url, timeout=2): """ Checks whether a url is online. Parameters ---------- url: str A string containing a URL Returns ------- value: bool Examples -------- >>> from sunpy.util.net import url_exists >>> url_exists('http://www.google.com') True >>> url_exists('http://aslkfjasdlfkjwerf.com') False """ try: urlopen(url, timeout=timeout) except HTTPError: return False except URLError: return False else: return True
def url_exists(url, timeout=2): """ Checks whether a url is online. Parameters ---------- url: `str` A string containing a URL Returns ------- value: `bool` Examples -------- >>> from sunpy.util.net import url_exists >>> url_exists('http://www.google.com') #doctest: +REMOTE_DATA True >>> url_exists('http://aslkfjasdlfkjwerf.com') #doctest: +REMOTE_DATA False """ try: urlopen(url, timeout=timeout) except HTTPError: return False except URLError: return False else: return True
def site_reachable(url): try: urlopen(url, timeout=1) except URLError: return False else: return True
def get_base_url(): """ Find the first mirror which is online """ for server in data_servers: try: urlopen(server, timeout=1) return server except (URLError, socket.timeout): pass raise IOError('Unable to find an online HESSI server from {0}'.format(data_servers))
def get_base_url(): """ Find the first mirror which is online """ for server in data_servers: try: urlopen(server, timeout=1) except (URLError, socket.timeout): pass else: return server
def filelist(self, timerange): """ Returns the list of existent files in the archive for the given time range. Parameters ---------- timerange : `~sunpy.time.TimeRange` Time interval where to find the directories for a given pattern. Returns ------- filesurls : list of strings List of all the files found between the time range given. Examples -------- >>> from sunpy.time import TimeRange >>> timerange = TimeRange('2015-01-01','2015-01-01T16:00:00') >>> print(solmon.filelist(timerange)) ['http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_025423.fts.gz'] Note ---- The search is strict with the time range, so if the archive scraped contains daily files, but the range doesn't start from the beginning of the day, then the file for that day won't be selected. The end of the timerange will normally be OK as includes the file on such end. """ directories = self.range(timerange) filesurls = [] if directories[0][0:3] == "ftp": # TODO use urlsplit from pr #1807 return self._ftpfileslist(timerange) for directory in directories: try: opn = urlopen(directory) try: soup = BeautifulSoup(opn, "lxml") for link in soup.find_all("a"): href = link.get("href") if href.endswith(self.pattern.split('.')[-1]): fullpath = directory + href if self._URL_followsPattern(fullpath): datehref = self._extractDateURL(fullpath) if (datehref >= timerange.start and datehref <= timerange.end): filesurls.append(fullpath) finally: opn.close() except: raise return filesurls
def download_file(url, directory, default=u'file', overwrite=False): """ Download file from url into directory. Try to get filename from Content-Disposition header, otherwise get from path of url. Fall back to default if both fail. Only overwrite existing files when overwrite is True. """ opn = urlopen(url) try: path = download_fileobj(opn, directory, url, default, overwrite) finally: opn.close() return path
def download_file(url, directory, default="file", overwrite=False): """ Download file from url into directory. Try to get filename from Content-Disposition header, otherwise get from path of url. Fall back to default if both fail. Only overwrite existing files when overwrite is True. """ opn = urlopen(url) try: path = download_fileobj(opn, directory, url, default, overwrite) finally: opn.close() return path
def filelist(self, timerange): """ Returns the list of existent files in the archive for the given time range. Parameters ---------- timerange : `~sunpy.time.TimeRange` Time interval where to find the directories for a given pattern. Returns ------- filesurls : list of strings List of all the files found between the time range given. Examples -------- >>> from sunpy.time import TimeRange >>> timerange = TimeRange('2015-01-01','2015-01-01T16:00:00') >>> print(solmon.filelist(timerange)) ['http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_025423.fts.gz'] """ directories = self.range(timerange) filesurls = [] for directory in directories: try: opn = urlopen(directory) try: soup = BeautifulSoup(opn) for link in soup.find_all("a"): href = link.get("href") if href.endswith(self.pattern.split('.')[-1]): fullpath = directory + href if self._URL_followsPattern(fullpath): datehref = self._extractDateURL(fullpath) if (datehref >= timerange.start and datehref <= timerange.end): filesurls.append(fullpath) finally: opn.close() except: pass return filesurls
def filelist(self, timerange): """ Returns the list of existent files in the archive for the given time range. Parameters ---------- timerange : `~sunpy.time.TimeRange` Time interval where to find the directories for a given pattern. Returns ------- filesurls : list of strings List of all the files found between the time range given. Examples -------- >>> from sunpy.util.scraper import Scraper >>> solmon_pattern = ('http://solarmonitor.org/data/' ... '%Y/%m/%d/fits/{instrument}/' ... '{instrument}_{wave:05d}_fd_%Y%m%d_%H%M%S.fts.gz') >>> solmon = Scraper(solmon_pattern, instrument = 'swap', wave = 174) >>> from sunpy.time import TimeRange >>> timerange = TimeRange('2015-01-01','2015-01-01T16:00:00') >>> print(solmon.filelist(timerange)) # doctest: +REMOTE_DATA ['http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_025423.fts.gz', 'http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_061145.fts.gz', 'http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_093037.fts.gz', 'http://solarmonitor.org/data/2015/01/01/fits/swap/swap_00174_fd_20150101_124927.fts.gz'] Note ---- The search is strict with the time range, so if the archive scraped contains daily files, but the range doesn't start from the beginning of the day, then the file for that day won't be selected. The end of the timerange will normally be OK as includes the file on such end. """ directories = self.range(timerange) filesurls = [] if directories[0][0:3] == "ftp": # TODO use urlsplit from pr #1807 return self._ftpfileslist(timerange) for directory in directories: try: opn = urlopen(directory) try: soup = BeautifulSoup(opn, "html.parser") for link in soup.find_all("a"): href = link.get("href") if href.endswith(self.pattern.split('.')[-1]): fullpath = directory + href if self._URL_followsPattern(fullpath): datehref = self._extractDateURL(fullpath) if (datehref >= timerange.start and datehref <= timerange.end): filesurls.append(fullpath) finally: opn.close() except: raise return filesurls
def _is_url(arg): try: urlopen(arg) except: return False return True