def init_album(self): #album json js = self.handler.read_link(url_album % self.album_id).json()['album'] #name self.album_name = util.decode_html(js['name']) #album logo self.logo = js['picUrl'] # artist_name self.artist_name = js['artists'][0]['name'] #handle songs for jsong in js['songs']: song = NeteaseSong(self.handler, song_json=jsong) song.group_dir = self.artist_name + u'_' + self.album_name song.group_dir = song.group_dir.replace('/', '_') song.post_set() self.songs.append(song) d = path.dirname(self.songs[-1].abs_path) #creating the dir LOG.debug(msg.head_163 + msg.fmt_create_album_dir % d) util.create_dir(d) #download album logo images LOG.debug(msg.head_163 + msg.fmt_dl_album_cover % self.album_name) downloader.download_url(self.logo, path.join(d,'cover.' +self.logo.split('.')[-1]))
def init_album(self): #album json js = self.handler.read_link(url_album % self.album_id).json()['album'] #name self.album_name = util.decode_html(js['name']) #album logo self.logo = js['picUrl'] # artist_name self.artist_name = js['artists'][0]['name'] #handle songs for jsong in js['songs']: song = NeteaseSong(self.handler, song_json=jsong) song.group_dir = self.artist_name + u'_' + self.album_name song.group_dir = song.group_dir.replace('/', '_') song.post_set() self.songs.append(song) d = path.dirname(self.songs[-1].abs_path) #creating the dir LOG.debug(msg.head_163 + msg.fmt_create_album_dir % d) util.create_dir(d) #download album logo images LOG.debug(msg.head_163 + msg.fmt_dl_album_cover % self.album_name) downloader.download_url( self.logo, path.join(d, 'cover.' + self.logo.split('.')[-1]))
def init_album(self): resp_json = self.handler.read_link(url_album % self.album_id).json() j = resp_json['data']['trackList'] if not j: LOG.error(resp_json['message']) return #description html = self.handler.read_link(self.url).text soup = BeautifulSoup(html, 'html.parser') if soup.find('meta', property="og:title"): self.album_desc = soup.find('span', property="v:summary").text # name self.album_name = soup.find('meta', property="og:title")['content'] # album logo self.logo = soup.find('meta', property="og:image")['content'] # artist_name self.artist_name = soup.find('meta', property="og:music:artist")['content'] else: aSong = j[0] self.album_name = aSong['album_name'] self.logo = aSong['album_pic'] self.artist_name = aSong['artistVOs'][0]['artistName'] self.album_desc = None #handle songs for jsong in j: song = XiamiSong(self.handler, song_json=jsong) song.song_name = jsong['name'] # name or songName song.group_dir = self.artist_name + u'_' + self.album_name song.group_dir = song.group_dir.replace('/', '_') song.post_set() self.songs.append(song) d = path.dirname(self.songs[-1].abs_path) #creating the dir LOG.debug(msg.head_xm + msg.fmt_create_album_dir % d) util.create_dir(d) #download album logo images LOG.debug(msg.head_xm + msg.fmt_dl_album_cover % self.album_name) if self.logo: self.logo = self.handler.add_http_prefix(self.logo) downloader.download_url( self.logo, path.join(d, 'cover.' + self.logo.split('.')[-1])) LOG.debug(msg.head_xm + msg.fmt_save_album_desc % self.album_name) if self.album_desc: self.album_desc = re.sub(r'<\s*[bB][rR]\s*/>', '\n', self.album_desc) self.album_desc = re.sub(r'<.*?>', '', self.album_desc) self.album_desc = util.decode_html(self.album_desc) import codecs with codecs.open(path.join(d, 'album_description.txt'), 'w', 'utf-8') as f: f.write(self.album_desc)
def init_album(self): resp_json = self.handler.read_link(url_album % self.album_id).json() j = resp_json['data']['trackList'] if not j : LOG.error(resp_json['message']) return #description html = self.handler.read_link(self.url).text soup = BeautifulSoup(html,'html.parser') if soup.find('meta', property="og:title"): self.album_desc = soup.find('span', property="v:summary").text # name self.album_name = soup.find('meta', property="og:title")['content'] # album logo self.logo = soup.find('meta', property="og:image")['content'] # artist_name self.artist_name = soup.find('meta', property="og:music:artist")['content'] else: aSong = j[0] self.album_name = aSong['album_name'] self.logo = aSong['album_pic'] self.artist_name = aSong['artistVOs'][0]['artistName'] self.album_desc = None #handle songs for jsong in j: song = XiamiSong(self.handler, song_json=jsong) song.song_name = jsong['name'] # name or songName song.group_dir = self.artist_name + u'_' + self.album_name song.group_dir = song.group_dir.replace('/','_') song.post_set() self.songs.append(song) d = path.dirname(self.songs[-1].abs_path) #creating the dir LOG.debug(msg.head_xm + msg.fmt_create_album_dir % d) util.create_dir(d) #download album logo images LOG.debug(msg.head_xm + msg.fmt_dl_album_cover % self.album_name) if self.logo: self.logo = self.handler.add_http_prefix(self.logo) downloader.download_url(self.logo, path.join(d,'cover.' +self.logo.split('.')[-1])) LOG.debug(msg.head_xm + msg.fmt_save_album_desc % self.album_name) if self.album_desc: self.album_desc = re.sub(r'<\s*[bB][rR]\s*/>','\n',self.album_desc) self.album_desc = re.sub(r'<.*?>','',self.album_desc) self.album_desc = util.decode_html(self.album_desc) import codecs with codecs.open(path.join(d,'album_description.txt'), 'w', 'utf-8') as f: f.write(self.album_desc)
def download_handler(event, context): config = _get_configuration() wowc = wow.WoWCommunityAPIClient(config['wow_client_id'], config['wow_client_secret'], endpoint=config['wow_api_endpoint']) for batch in wowc.get_auction_data_status(config['wow_realm'], config['wow_locale']): s3key = _keyname_from_datetime(config, batch.last_modified) downloader.download_url(batch.url, s3key, s3bucket=config['s3_bucket_name'], s3region=config['aws_region']) return
def download_grib(self, url_base, rel_path, max_retries=3): """ Download a GRIB file from a GRIB service and stream to <rel_path> in ingest_dir. :param url_base: the base URL part of the GRIB service :param rel_path: the relative path of the file (w.r.t GRIB base url and w.r.t self.ingest_dir) :param max_retries: how many times we may retry to download the file """ url = url_base + '/' + rel_path grib_path = osp.join(self.ingest_dir, rel_path) try: download_url(url, grib_path, max_retries) except DownloadError as e: raise GribError('GribSource: failed to download file %s' % url)
def download_hdf(self, url_base, rel_path, max_retries=3): """ Download an HDF file from an HDF service and stream to <rel_path> in ingest_dir. :param url_base: the base URL part of the HDF service :param rel_path: the relative path of the file :param max_retries: how many times we may retry to download the file """ url = url_base + '/' + rel_path hdf_path = osp.join(self.ingest_dir, rel_path) try: download_url(url, hdf_path, max_retries) except DownloadError as e: raise HdfError('HDFSource: failed to download file %s' % url)
def download_file(ingest_dir, url, rel_path, max_retries=3): """ Download a file and stream to <rel_path> in ingest_dir. :param url_base: the base URL where the file is hosted :param rel_path: the relative path of the file :param max_retries: how many times we may retry to download the file """ # logging.info("Downloading %s from %s" % (rel_path, url)) path = osp.join(ingest_dir, rel_path) try: download_url(url, path, max_retries) except DownloadError as e: raise data_sourceError('data_source: failed to download file %s' % url)
def retrieve_rtma(self, cycle): """ Attempts to retrieve the variables passed in during initialization. Any files already downloaded are not modified. Returns a list of variables that have not been downloaded. :param cycle: the cycle (UTC) for which to retrieve the RTMA :return: tuple with list of all variables that are not ready yet and dictonary with path to stored files """ ts = cycle.replace(minute=0, second=0, microsecond=0) logging.info('RTMA retrieving variables %s for cycle %s.' % (self.var_list, str(ts))) vars_paths = map(lambda x: (x, self._local_var_path(ts, x)), self.var_list) ready = dict(filter(lambda x: self._is_var_cached(x[1]), vars_paths)) nonlocals = filter(lambda x: not self._is_var_cached(x[1]), vars_paths) if nonlocals: nl_vars = [x[0] for x in nonlocals] logging.info('RTMA variables %s are not available locally, trying to download.' % nl_vars) not_ready = [] for var, local_path in nonlocals: var_ready = False for i in range(0, max_retries): try: if self._is_var_ready(ts, var): download_url(self._remote_var_url(cycle.hour, var), local_path) num=grib_messages(local_path,print_messages=True,max_messages=9999) logging.info('file %s contains %s message(s)' % (local_path, num)) if num == 0: raise ValueError var_ready = True break except Exception as e: logging.error(str(e)) time.sleep(sleep_seconds) if var_ready: ready[var] = local_path else: not_ready.append(var) if not_ready: logging.info('RTMA the variables %s for hour %d are not ready.' % (not_ready, cycle.hour)) # unless a file was downloaded, it makes no sense to check the server immediately again else: # if all files are available, return logging.info('RTMA success obtaining variables %s for hour %d.' % (self.var_list, cycle.hour)) return not_ready, ready
def download_file(self, url_base, rel_path, max_retries=3): """ Download a file and stream to <rel_path> in ingest_dir. :param url_base: the base URL where the file is hosted :param rel_path: the relative path of the file :param max_retries: how many times we may retry to download the file """ url = url_base + '/' + rel_path path = osp.join(self.ingest_dir, rel_path) try: # print 'downloading', url download_url(url, path, max_retries) # print 'done' except DownloadError as e: raise data_sourceError('data_source: failed to download file %s' % url)
def download_grib(self, url_base, rel_path): """ Download a GRIB file from a GRIB service and stream to <rel_path> in ingest_dir. :param url_base: the base URL part of the GRIB service :param rel_path: the relative path of the file (w.r.t GRIB base url and w.r.t self.ingest_dir) :param max_retries: how many times we may retry to download the file """ url = url_base + '/' + rel_path logging.info('downloading %s grib from %s' % (self.id, url)) grib_path = osp.join(self.ingest_dir, rel_path) try: download_url(url, grib_path) except DownloadError as e: logging.error('%s cannot download grib file %s' % (self.id, url)) logging.warning('Pleae check %s for %s' % (self.info_url, self.info)) raise GribError('GribSource: failed to download file %s' % url)
def init_album(self): j = self.handler.read_link(url_album % self.album_id).json()['data']['trackList'] j_first_song = j[0] #name self.album_name = util.decode_html(j_first_song['album_name']) #album logo self.logo = j_first_song['album_pic'] # artist_name self.artist_name = j_first_song['artist'] #description html = self.handler.read_link(self.url).text soup = BeautifulSoup(html, 'html.parser') self.album_desc = soup.find('span', property="v:summary").text #handle songs for jsong in j: song = XiamiSong(self.handler, song_json=jsong) song.group_dir = self.artist_name + u'_' + self.album_name song.post_set() self.songs.append(song) d = path.dirname(self.songs[-1].abs_path) #creating the dir LOG.debug(msg.head_xm + msg.fmt_create_album_dir % d) util.create_dir(d) #download album logo images LOG.debug(msg.head_xm + msg.fmt_dl_album_cover % self.album_name) downloader.download_url( self.logo, path.join(d, 'cover.' + self.logo.split('.')[-1])) LOG.debug(msg.head_xm + msg.fmt_save_album_desc % self.album_name) if self.album_desc: self.album_desc = re.sub(r'<\s*[bB][rR]\s*/>', '\n', self.album_desc) self.album_desc = re.sub(r'<.*?>', '', self.album_desc) self.album_desc = util.decode_html(self.album_desc) import codecs with codecs.open(path.join(d, 'album_description.txt'), 'w', 'utf-8') as f: f.write(self.album_desc)
def getYarnMappings(version: str): DOWNLOAD_LINK = f"https://github.com/FabricMC/yarn/archive/{version}.zip" ZIP_PATH = f"stich_yarn_tmp_{version}.zip" if os.path.exists(f"yarn-{version}"): print("Found existing mappings, will use those") else: print("Could not find existing mappings, this might take a minute") print(f"Getting {DOWNLOAD_LINK}") downloader.download_url(DOWNLOAD_LINK, ZIP_PATH) print("Extracting mappings from zip (Could take upwards of 2 minutes)") archive = ZipFile(ZIP_PATH, 'r') for zippedFile in archive.namelist(): if zippedFile.startswith(f"yarn-{version}/mappings/"): archive.extract(zippedFile) archive.close() print("Cleaning up zip") os.remove(ZIP_PATH)
def init_album(self): j = self.handler.read_link(url_album % self.album_id).json()['data']['trackList'] j_first_song = j[0] #name self.album_name = util.decode_html(j_first_song['album_name']) #album logo self.logo = j_first_song['album_pic'] # artist_name self.artist_name = j_first_song['artist'] #description html = self.handler.read_link(self.url).text soup = BeautifulSoup(html,'html.parser') self.album_desc = soup.find('span', property="v:summary").text #handle songs for jsong in j: song = XiamiSong(self.handler, song_json=jsong) song.group_dir = self.artist_name + u'_' + self.album_name song.post_set() self.songs.append(song) d = path.dirname(self.songs[-1].abs_path) #creating the dir LOG.debug(msg.head_xm + msg.fmt_create_album_dir % d) util.create_dir(d) #download album logo images LOG.debug(msg.head_xm + msg.fmt_dl_album_cover % self.album_name) downloader.download_url(self.logo, path.join(d,'cover.' +self.logo.split('.')[-1])) LOG.debug(msg.head_xm + msg.fmt_save_album_desc % self.album_name) if self.album_desc: self.album_desc = re.sub(r'<\s*[bB][rR]\s*/>','\n',self.album_desc) self.album_desc = re.sub(r'<.*?>','',self.album_desc) self.album_desc = util.decode_html(self.album_desc) import codecs with codecs.open(path.join(d,'album_description.txt'), 'w', 'utf-8') as f: f.write(self.album_desc)
def retrieve_rtma(self, cycle): """ Attempts to retrieve the variables passed in during initialization. Any files already downloaded are not modified. Returns a list of variables that have not been downloaded. :param cycle: the cycle (UTC) for which to retrieve the RTMA :return: tuple with list of all variables that are not ready yet and dictonary with path to stored files """ ts = cycle.replace(minute=0, second=0, microsecond=0) logging.info('RTMA retrieving variables %s for cycle %s.' % (self.var_list, str(ts))) vars_paths = map(lambda x: (x, self._local_var_path(ts, x)), self.var_list) ready = dict(filter(lambda x: self._is_var_cached(x[1]), vars_paths)) nonlocals = filter(lambda x: not self._is_var_cached(x[1]), vars_paths) if nonlocals: nl_vars = [x[0] for x in nonlocals] logging.info('RTMA variables %s are not available locally, trying to download.' % nl_vars) not_ready = [] for var, local_path in nonlocals: if self._is_var_ready(ts, var): download_url(self._remote_var_url(cycle.hour, var), local_path) ready[var] = local_path else: not_ready.append(var) if not_ready: logging.info('RTMA the variables %s for hour %d are not ready.' % (not_ready, cycle.hour)) # unless a file was downloaded, it makes no sense to check the server immediately again else: # if all files are available, return logging.info('RTMA success obtaining variables %s for hour %d.' % (self.var_list, cycle.hour)) return not_ready, ready
# today's data is not available yet, we want yesterdays which has recently become available yesterday = datetime.utcnow() - timedelta(days=1) julian_day = (yesterday - datetime(yesterday.year, 1, 1)).days + 1 year = yesterday.year urls = [ 'ftp://*****:*****@nrt3.modaps.eosdis.nasa.gov/FIRMS/c6/USA_contiguous_and_Hawaii/', 'ftp://*****:*****@nrt3.modaps.eosdis.nasa.gov/FIRMS/c6/Alaska/', 'ftp://*****:*****@nrt3.modaps.eosdis.nasa.gov/FIRMS/viirs/USA_contiguous_and_Hawaii/', 'ftp://*****:*****@nrt3.modaps.eosdis.nasa.gov/FIRMS/viirs/Alaska/' ] filenames = [ 'MODIS_C6_USA_contiguous_and_Hawaii_MCD14DL_NRT_%04d%03d.txt' % (year, julian_day), 'MODIS_C6_Alaska_MCD14DL_NRT_%04d%03d.txt' % (year, julian_day), 'VIIRS_I_USA_contiguous_and_Hawaii_VNP14IMGTDL_NRT_%04d%03d.txt' % (year, julian_day), 'VIIRS_I_Alaska_VNP14IMGTDL_NRT_%04d%03d.txt' % (year, julian_day) ] for i in range(len(urls)): download_url(urls[i] + filenames[i], ingest_dir + '/' + filenames[i]) logging.info('SUCCESS, the following files are now available:') print('') for f in filenames: print(osp.join(ingest_dir, f))
params.filter_by_user("RaidyHD") params.set_purity(True, True, True) latest_scrape = wallhaven.search(params) if os.path.exists("walls.csv"): with open("walls.csv", newline="") as f: reader = csv.reader(f) local_data = np.array(list(reader)) else: local_data = np.array([ ['id', 'downloaded', 'url'], ]) new_downloads = list() for wall in reversed(latest_scrape[:10]): if wall["id"] not in local_data[:, 0]: download_path = wall["path"] status = download_url(download_path) status = True local_data = np.vstack( (local_data, [wall["id"], status, download_path])) new_downloads.append([wall["id"], status, download_path]) print(new_downloads) with open("walls.csv", "w", newline="") as f: writer = csv.writer(f) writer.writerows(local_data.tolist())