def download_kegg_info_files(kegg_set_ids, species_ini_file): """ This is a KEGG-specific function that downloads the files containing information about the KEGG sets, such as their title, abstract, supporting publications, etc. Arguments: kegg_set_ids -- List of kegg set identifiers (e.g. hsa00010) for which info files will be downloaded. species_ini_file -- Path to the species INI config file. This is a string. Returns: Nothing, just downloads and saves files to keggset_info folder, which will be the SPECIES_DOWNLOAD_FOLDER + 'KEGG/keggset_info_folder' """ species_file = SafeConfigParser() species_file.read(species_ini_file) sd_folder = species_file.get('species_info', 'SPECIES_DOWNLOAD_FOLDER') keggset_info_folder = os.path.join(sd_folder, KEGGSET_INFO_FOLDER) check_create_folder(keggset_info_folder) full_info_url = species_file.get('KEGG', 'KEGG_ROOT_URL') + \ species_file.get('KEGG', 'SET_INFO_DIR') for kegg_id in kegg_set_ids: kegg_info_file = full_info_url + kegg_id download_from_url(kegg_info_file, keggset_info_folder)
def __init__(self, verbose=False, download_dir=None): self.download_dir = download_dir if download_dir else settings.DOWNLOAD_DIR self.google = settings.GOOGLE_STORAGE self.s3 = settings.S3_LANDSAT # Make sure download directory exist check_create_folder(self.download_dir)
def __init__(self, player, *args, **kwargs): super(PodcastPage, self).__init__(*args, **kwargs) self.player = player self.podcast_entry = Gtk.Entry() self.podcast_entry.set_placeholder_text('Insert a new podcast feed.') add_button = Gtk.Button.new_from_stock('gtk-add') add_button.connect('clicked', self.add_podcast) update_button = Gtk.Button.new_from_stock('gtk-refresh') update_button.connect('clicked', self.on_update_all) episodes = Gtk.Label('Episodes') all_button = Gtk.Button('All') all_button.connect('clicked', self.all_button_pressed) self.podcast_box = Gtk.ListBox() self.podcast_box.connect('row-activated', self.on_podcast_selected) podcast_sw = create_scrolled_window(self.podcast_box) self.episode_box = Gtk.ListBox() self.episode_box.connect('row-activated', self.on_episode_selected) self.episode_scroll = create_scrolled_window(self.episode_box) self.episode_scroll.connect('edge-overshot', self.on_scroll_overshot) self.paned = Gtk.Paned(orientation=Gtk.Orientation.VERTICAL) self.paned.set_position(300) self.paned.add1(self.episode_scroll) pod_hbox = Gtk.HBox() pod_hbox.pack_start(self.podcast_entry, True, True, 0) pod_hbox.pack_start(add_button , False, True, 0) pod_hbox.pack_start(update_button , False, True, 0) self.attach(pod_hbox , 0, 0, 1, 1) self.attach(podcast_sw, 0, 1, 1, 1) self.attach(episodes , 2, 0, 2, 1) self.attach(all_button, 4, 0, 1, 1) self.attach(self.paned, 1, 1, 4, 1) self.set_column_spacing(3) check_create_folder('./temp') self.database = PodcastDB() self.on_load() self.pod_row_selected = None self.ep_selected_link = None self.episode_info = None
def _unzip(self, src, dst, scene): """ Unzip tar files """ self.output("Unzipping %s - It might take some time" % scene, normal=True, arrow=True) try: tar = tarfile.open(src, 'r') tar.extractall(path=dst) tar.close() except tarfile.ReadError: check_create_folder(dst) subprocess.check_call(['tar', '-xf', src, '-C', dst])
def download(self, scenes, bands=None): """ Download scenese from Google Storage or Amazon S3 if bands are provided @params scenes - A list of sceneIDs bands - A list of bands """ if isinstance(scenes, list): for scene in scenes: # If bands are provided the image is from 2015 or later use Amazon if (bands and int(scene[12]) > 4): if isinstance(bands, list): # Create a folder to download the specific bands into path = check_create_folder( join(self.download_dir, scene)) try: # Always grab MTL.txt if bands are specified bands_plus = bands bands_plus.append('MTL') for band in bands_plus: self.amazon_s3(scene, band, path) except RemoteFileDoesntExist: self.google_storage(scene, self.download_dir) else: raise Exception('Expected bands list') else: self.google_storage(scene, self.download_dir) return True else: raise Exception('Expected sceneIDs list')
def _unzip(self, src, dst, scene, force_unzip=False): """ Unzip tar files """ self.output("Unzipping %s - It might take some time" % scene, normal=True, arrow=True) try: # check if file is already unzipped, skip if isdir(dst) and not force_unzip: self.output("%s is already unzipped." % scene, normal=True, arrow=True) return else: tar = tarfile.open(src, 'r') tar.extractall(path=dst) tar.close() except tarfile.ReadError: check_create_folder(dst) subprocess.check_call(['tar', '-xf', src, '-C', dst])
def __init__(self, path, bands=None, dst_path=None, verbose=False): """ @params scene - the scene ID bands - The band sequence for the final image. Must be a python list src_path - The path to the source image bundle dst_path - The destination path zipped - Set to true if the scene is in zip format and requires unzipping verbose - Whether to sh ow verbose output """ self.projection = {'init': 'epsg:3857'} self.dst_crs = {'init': u'epsg:3857'} self.scene = get_file(path).split('.')[0] self.bands = bands if isinstance(bands, list) else [4, 3, 2] # Landsat source path self.src_path = path.replace(get_file(path), '') # Build destination folder if doesn't exits self.dst_path = dst_path if dst_path else settings.PROCESSED_IMAGE self.dst_path = check_create_folder(join(self.dst_path, self.scene)) self.verbose = verbose # Path to the unzipped folder self.scene_path = join(self.src_path, self.scene) if self._check_if_zipped(path): self._unzip(join(self.src_path, get_file(path)), join(self.src_path, self.scene), self.scene) self.bands_path = [] for band in self.bands: self.bands_path.append(join(self.scene_path, self._get_full_filename(band)))
def _unzip(self, src, dst, scene, force_unzip=False): """ Unzip tar files """ self.output("Unzipping %s - It might take some time" % scene, normal=True, arrow=True) try: # check if file is already unzipped, skip if isdir(dst) and not force_unzip: self.output("%s is already unzipped." % scene, normal=True, arrow=True) return else: tar = tarfile.open(src, "r") tar.extractall(path=dst) tar.close() except tarfile.ReadError: check_create_folder(dst) subprocess.check_call(["tar", "-xf", src, "-C", dst])
def download(self, scenes, bands=None): """ Download scenese from Google Storage or Amazon S3 if bands are provided @params scenes - A list of sceneIDs bands - A list of bands """ if isinstance(scenes, list): for scene in scenes: if bands: if isinstance(bands, list): # Create a folder to download the specific bands into path = check_create_folder( join(self.download_dir, scene)) for band in bands: self.amazon_s3(scene, band, path) else: raise Exception('Expected bands list') else: self.google_storage(scene, self.download_dir) return True else: raise Exception('Expected sceneIDs list')
def _unzip(self, src, dst, scene, force_unzip=False): """ Unzip tar files """ self.output("Unzipping %s - It might take some time" % scene, normal=True, arrow=True) try: # check if file is already unzipped, skip if isdir(dst) and not force_unzip: self.output('%s is already unzipped.' % scene, normal=True, color='green', indent=1) return else: tar = tarfile.open(src, 'r') tar.extractall(path=dst) tar.close() except tarfile.ReadError: check_create_folder(dst) subprocess.check_call(['tar', '-xf', src, '-C', dst])
def amazon_s3(self, scene, bands): """ Amazon S3 downloader """ sat = self.scene_interpreter(scene) # Always grab MTL.txt and QA band if bands are specified if 'BQA' not in bands: bands.append('QA') if 'MTL' not in bands: bands.append('MTL') urls = [] for band in bands: # get url for the band url = self.amazon_s3_url(sat, band) # make sure it exist self.remote_file_exists(url) urls.append(url) # create folder path = check_create_folder(join(self.download_dir, scene)) self.output('Source: AWS S3', normal=True, arrow=True) for url in urls: self.fetch(url, path) return path
def __init__(self, path, bands=None, dst_path=None, verbose=False, force_unzip=False, bounds=None): self.projection = {'init': 'epsg:3857'} self.dst_crs = {'init': u'epsg:3857'} self.scene = get_file(path).split('.')[0] self.bands = bands if isinstance(bands, list) else [4, 3, 2] self.clipped = False # Landsat source path self.src_path = path.replace(get_file(path), '') # Build destination folder if doesn't exist self.dst_path = dst_path if dst_path else os.getcwd() self.dst_path = check_create_folder(join(self.dst_path, self.scene)) self.verbose = verbose # Path to the unzipped folder self.scene_path = join(self.src_path, self.scene) # Unzip files if self._check_if_zipped(path): self._unzip(join(self.src_path, get_file(path)), join(self.src_path, self.scene), self.scene, force_unzip) if (bounds): self.bounds = bounds self.scene_path = self.clip() self.clipped = True self.bands_path = [] for band in self.bands: self.bands_path.append(join(self.scene_path, self._get_full_filename(band)))
def clip(self): """ Clip images based on bounds provided Implementation is borrowed from https://github.com/brendan-ward/rasterio/blob/e3687ce0ccf8ad92844c16d913a6482d5142cf48/rasterio/rio/convert.py """ self.output("Clipping", normal=True) # create new folder for clipped images path = check_create_folder(join(self.scene_path, 'clipped')) try: temp_bands = copy(self.bands) temp_bands.append('QA') for i, band in enumerate(temp_bands): band_name = self._get_full_filename(band) band_path = join(self.scene_path, band_name) self.output("Band %s" % band, normal=True, color='green', indent=1) with rasterio.open(band_path) as src: bounds = transform_bounds( { 'proj': 'longlat', 'ellps': 'WGS84', 'datum': 'WGS84', 'no_defs': True }, src.crs, *self.bounds) if disjoint_bounds(bounds, src.bounds): bounds = adjust_bounding_box(src.bounds, bounds) window = src.window(*bounds) out_kwargs = src.meta.copy() out_kwargs.update({ 'driver': 'GTiff', 'height': window[0][1] - window[0][0], 'width': window[1][1] - window[1][0], 'transform': src.window_transform(window) }) with rasterio.open(join(path, band_name), 'w', **out_kwargs) as out: out.write(src.read(window=window)) # Copy MTL to the clipped folder copyfile(join(self.scene_path, self.scene + '_MTL.txt'), join(path, self.scene + '_MTL.txt')) return path except IOError as e: exit(e.message, 1)
def clip(self): """ Clip images based on bounds provided Implementation is borrowed from https://github.com/brendan-ward/rasterio/blob/e3687ce0ccf8ad92844c16d913a6482d5142cf48/rasterio/rio/convert.py """ self.output("Clipping", normal=True) # create new folder for clipped images path = check_create_folder(join(self.scene_path, 'clipped')) try: temp_bands = copy(self.bands) temp_bands.append('QA') for i, band in enumerate(temp_bands): band_name = self._get_full_filename(band) band_path = join(self.scene_path, band_name) self.output("Band %s" % band, normal=True, color='green', indent=1) with rasterio.open(band_path) as src: bounds = transform_bounds( { 'proj': 'longlat', 'ellps': 'WGS84', 'datum': 'WGS84', 'no_defs': True }, src.crs, *self.bounds ) if disjoint_bounds(bounds, src.bounds): bounds = adjust_bounding_box(src.bounds, bounds) window = src.window(*bounds) out_kwargs = src.meta.copy() out_kwargs.update({ 'driver': 'GTiff', 'height': window[0][1] - window[0][0], 'width': window[1][1] - window[1][0], 'transform': src.window_transform(window) }) with rasterio.open(join(path, band_name), 'w', **out_kwargs) as out: out.write(src.read(window=window)) # Copy MTL to the clipped folder copyfile(join(self.scene_path, self.scene + '_MTL.txt'), join(path, self.scene + '_MTL.txt')) return path except IOError as e: exit(e.message, 1)
def download(self, scenes, bands=None): """ Download scenese from Google Storage or Amazon S3 if bands are provided :param scenes: A list of scene IDs :type scenes: List :param bands: A list of bands. Default value is None. :type scenes: List :returns: (List) includes downloaded scenes as key and source as value (aws or google) """ if isinstance(scenes, list): output = {} for scene in scenes: # If bands are provided the image is from 2015 or later use Amazon self.scene_interpreter(scene) if (bands and int(scene[12]) > 4): if isinstance(bands, list): # Create a folder to download the specific bands into path = check_create_folder(join(self.download_dir, scene)) try: # Always grab MTL.txt if bands are specified if 'BQA' not in bands: bands.append('QA') if 'MTL' not in bands: bands.append('MTL') for band in bands: self.amazon_s3(scene, band, path) output[scene] = 'aws' except RemoteFileDoesntExist: self.google_storage(scene, self.download_dir) output[scene] = 'google' else: raise Exception('Expected bands list') else: self.google_storage(scene, self.download_dir) output[scene] = 'google' return output else: raise Exception('Expected sceneIDs list')
def __init__(self, path, bands=None, dst_path=None, verbose=False, force_unzip=False, bounds=None): self.projection = {'init': 'epsg:3857'} self.dst_crs = {'init': u'epsg:3857'} self.scene = get_file(path).split('.')[0] self.bands = bands if isinstance(bands, list) else [4, 3, 2] self.clipped = False # Landsat source path self.src_path = path.replace(get_file(path), '') # Build destination folder if doesn't exist self.dst_path = dst_path if dst_path else os.getcwd() self.dst_path = check_create_folder(join(self.dst_path, self.scene)) self.verbose = verbose # Path to the unzipped folder self.scene_path = join(self.src_path, self.scene) # Unzip files if self._check_if_zipped(path): self._unzip(join(self.src_path, get_file(path)), join(self.src_path, self.scene), self.scene, force_unzip) if (bounds): self.bounds = bounds self.scene_path = self.clip() self.clipped = True self.bands_path = [] for band in self.bands: self.bands_path.append( join(self.scene_path, self._get_full_filename(band)))
def __init__(self, path, bands=None, dst_path=None, verbose=False, force_unzip=False): self.projection = {"init": "epsg:3857"} self.dst_crs = {"init": u"epsg:3857"} self.scene = get_file(path).split(".")[0] self.bands = bands if isinstance(bands, list) else [4, 3, 2] # Landsat source path self.src_path = path.replace(get_file(path), "") # Build destination folder if doesn't exits self.dst_path = dst_path if dst_path else settings.PROCESSED_IMAGE self.dst_path = check_create_folder(join(self.dst_path, self.scene)) self.verbose = verbose # Path to the unzipped folder self.scene_path = join(self.src_path, self.scene) if self._check_if_zipped(path): self._unzip(join(self.src_path, get_file(path)), join(self.src_path, self.scene), self.scene, force_unzip) self.bands_path = [] for band in self.bands: self.bands_path.append(join(self.scene_path, self._get_full_filename(band)))
def download_all_files(species_ini_file, base_download_folder, secrets_location=None): """ Reads config INI file for a species, which contains the files (and their locations, or URLs) that must be loaded for this species, and calls the download_from_url function for each of those files. Arguments: species_ini_file -- Path to the particular species INI file. This is a string. base_download_folder -- A string. Path of the root folder where download folders for other species will be created and where common downloaded files will be saved. This is stored in the main configuration INI file. secrets_location -- Optional string of location of the secrets INI file. Returns: Nothing, just downloads and saves files to download_folder """ check_create_folder(base_download_folder) species_file = SafeConfigParser() species_file.read(species_ini_file) sd_folder = species_file.get('species_info', 'SPECIES_DOWNLOAD_FOLDER') check_create_folder(sd_folder) if species_file.has_section('GO'): if species_file.getboolean('GO', 'DOWNLOAD'): obo_url = species_file.get('GO', 'GO_OBO_URL') download_from_url(obo_url, base_download_folder) go_dir = os.path.join(sd_folder, 'GO') check_create_folder(go_dir) goa_urls = species_file.get('GO', 'ASSOC_FILE_URLS') goa_urls = re.sub(r'\s', '', goa_urls).split(',') for goa_url in goa_urls: download_from_url(goa_url, go_dir) if species_file.has_section('KEGG'): if species_file.getboolean('KEGG', 'DOWNLOAD'): kegg_root_url = species_file.get('KEGG', 'KEGG_ROOT_URL') kegg_info_url = kegg_root_url + species_file.get( 'KEGG', 'DB_INFO_URL') download_from_url(kegg_info_url, base_download_folder, 'kegg_db_info') kegg_dir = os.path.join(sd_folder, 'KEGG') check_create_folder(kegg_dir) ks_urls = species_file.get('KEGG', 'SETS_TO_DOWNLOAD') kegg_urls = [ kegg_root_url + url.strip() for url in ks_urls.split(',') ] for kegg_url in kegg_urls: download_from_url(kegg_url, kegg_dir) if species_file.has_section('DO'): if species_file.getboolean('DO', 'DOWNLOAD'): do_dir = os.path.join(sd_folder, 'DO') check_create_folder(do_dir) obo_url = species_file.get('DO', 'DO_OBO_URL') download_from_url(obo_url, do_dir) mim2gene_url = species_file.get('DO', 'MIM2GENE_URL') download_from_url(mim2gene_url, do_dir) # The genemap_file needs a special Secret Key, which must be # retrieved from the secrets file if the user wishes to download # the genemap_file genemap_url = species_file.get('DO', 'GENEMAP_URL') if not secrets_location: logger.error('Secrets file was not passed to ' 'download_all_files() function. A secrets file ' 'containing an OMIM API secret key is required to' ' download the genemap file to process Disease ' 'Ontology.') sys.exit(1) secrets_file = SafeConfigParser() secrets_file.read(secrets_location) if not secrets_file.has_section('OMIM API secrets'): logger.error('Secrets file has no "OMIM API secrets" section,' 'which is required to download the genemap file ' ' to process Disease Ontology.') sys.exit(1) omim_secret_key = secrets_file.get('OMIM API secrets', 'SECRET_KEY') genemap_url = genemap_url.replace('<SecretKey>', omim_secret_key) download_from_url(genemap_url, do_dir)