def plot_some(tags, n=15): """plot label of the n most discrepants tags""" schema = {'geometry': 'Polygon', 'properties': {'tag': 'str'}} style = [] KARTO_CONFIG['bounds']['data'] = [SF_BBOX[1], SF_BBOX[0], SF_BBOX[3], SF_BBOX[2]] # TODO cluster polys by their area so label's size can depend of it # polys = [{'geometry': mapping(r[1]), # 'properties': {'tag': r[2]}} for r in tags[:n]] polys = [] cover = None for r in tags[:n]: diff = r[1] if cover is None else r[1].difference(cover) polys.append({'geometry': mapping(diff), 'properties': {'tag': r[2]}}) cover = r[1] if cover is None else cover.union(r[1]) name = u'top_disc' KARTO_CONFIG['layers'][name] = {'src': name+'.shp', 'labeling': {'key': 'tag'}} color = '#ffa873' style.append(CSS.format(name, color, 'black')) with fiona.collection(mkpath('disc', name+'.shp'), "w", "ESRI Shapefile", schema) as f: f.writerecords(polys) with open(mkpath('disc', 'photos.json'), 'w') as f: json.dump(KARTO_CONFIG, f) style.append('#top_disc-label {font-family: OpenSans; font-size: 14px}') with open(mkpath('disc', 'photos.css'), 'w') as f: f.write('\n'.join(style)) sf = box(SF_BBOX[1], SF_BBOX[0], SF_BBOX[3], SF_BBOX[2]) print(sf.bounds) print(100*cover.area, sf.area)
def get_files(directory_location: FolderStructure, file_names: Union[List[str], str] = None, match_all: bool = True): """ | Helper function used to get the paths of files contained within directories of the system. :param directory_location: A location within the system's root directory structure. :param file_names: default = None<br/> None - retrieves a list of file paths within the specified directory location.<br/> List of file names - retrieves a list of file paths that match the file names listed. Check match_all for more details on matching criteria. Names must include file extension.<br/> string - retrieves a single path to a file with the given name. Name must include file extension.<br/> :param match_all: default = True<br/> True - if a list of file names is given, all files must match to return a list of paths.<br/> False - if a list of file names is given, only file names that have a match will have their paths return. :return: Returns:<br/> - If file_names is None, a list of all files in the directory location are returned.<br/> - Else, one or more paths to files in the directory location are returned.<br/> - If no files were found, an empty list is returned. """ location = directory_location.get_path() all_files = [f_name for f_name in os.listdir(location) if filesys.isfile(mkpath(location, f_name))] if file_names is None: return [mkpath(location, file) for file in all_files] if isinstance(file_names, str) and file_names in all_files: file = [x for x in all_files if x == file_names] return mkpath(location, file[0]) if len(file) > 0 else [] match, existing_files = all if match_all else any, [(file in all_files) for file in file_names] if isinstance(file_names, List) and match(existing_files): return [mkpath(location, file) for file, exists in zip(file_names, existing_files) if exists]
def plot_regions(regions, bbox, tag): """Output one shapefile for each region (represented by its bottom left and upper right index in the grid) with color depending of its discrepancy.""" # TODO not unicode safe discrepancies = [v[0] for v in regions] colormap = cm.ScalarMappable(mcolor.Normalize(min(discrepancies), max(discrepancies)), 'YlOrBr') schema = {'geometry': 'Polygon', 'properties': {}} style = [] KARTO_CONFIG['bounds']['data'] = [BBOX[1], BBOX[0], BBOX[3], BBOX[2]] polys = [{'geometry': mapping(r[1]), 'properties': {}} for r in regions] for i, r in enumerate(regions): color = to_css_hex(colormap.to_rgba(r[0])) name = u'disc_{}_{:03}'.format(tag, i+1) KARTO_CONFIG['layers'][name] = {'src': name+'.shp'} color = 'red' style.append(CSS.format(name, color, 'black')) # style.append(CSS.format(name, color, color)) with fiona.collection(mkpath('disc', name+'.shp'), "w", "ESRI Shapefile", schema) as f: f.writerecords(polys) break with open(mkpath('disc', 'photos.json'), 'w') as f: json.dump(KARTO_CONFIG, f) with open(mkpath('disc', 'photos.css'), 'w') as f: f.write('\n'.join(style))
def plot_regions(regions, bbox, tag): """Output one shapefile for each region (represented by its bottom left and upper right index in the grid) with color depending of its discrepancy.""" # TODO not unicode safe discrepancies = [v[0] for v in regions] colormap = cm.ScalarMappable( mcolor.Normalize(min(discrepancies), max(discrepancies)), 'YlOrBr') schema = {'geometry': 'Polygon', 'properties': {}} style = [] KARTO_CONFIG['bounds']['data'] = [BBOX[1], BBOX[0], BBOX[3], BBOX[2]] polys = [{'geometry': mapping(r[1]), 'properties': {}} for r in regions] for i, r in enumerate(regions): color = to_css_hex(colormap.to_rgba(r[0])) name = u'disc_{}_{:03}'.format(tag, i + 1) KARTO_CONFIG['layers'][name] = {'src': name + '.shp'} color = 'red' style.append(CSS.format(name, color, 'black')) # style.append(CSS.format(name, color, color)) with fiona.collection(mkpath('disc', name + '.shp'), "w", "ESRI Shapefile", schema) as f: f.writerecords(polys) break with open(mkpath('disc', 'photos.json'), 'w') as f: json.dump(KARTO_CONFIG, f) with open(mkpath('disc', 'photos.css'), 'w') as f: f.write('\n'.join(style))
def _script_output_paths(script_name, label=None): """ Given a script path, return the path to the output files. This uses a sceheme mirroring how PBS actually works where jobs have '$PBS_O_WORKDIR/<script-name>.[eo]<$PBS_JOBID>' """ script_basename = os.path.basename(script_name) if label is None: output_label = 'hod-%s' % script_basename else: output_label = 'hod-%s-%s' % (label, script_basename) script_stdout = mkpath('$PBS_O_WORKDIR', '%s.o${PBS_JOBID}' % output_label) script_stderr = mkpath('$PBS_O_WORKDIR', '%s.e${PBS_JOBID}' % output_label) return (script_stdout, script_stderr)
def make_root_directory(root_dir: str = None): """ | Constructs the system's working root environment if the root environment has been changed, or if the root environment does not exist. | | **Different cases:** | - if **root_dir** is None, then the root environment is created at the default location. | - if the root directory is being changed and a current root already exists, the location is changed and the old file tree is copied over. | - if the root directory is not change, then the file structure of the current root directory is ensured to match | with the defined directory tree structure defined in the FolderStructure Enum. | | **Notes:** | If a new location for the root directory is being set, the new directory should be empty. Otherwise, the directory structure will be made within an additional sub-directory contained in **root_dir**. :param root_dir: A path to the new working root directory. :raises NotADirectoryError: raised if root_dir does not lead to an existing directory location or is a file. """ root = FStruct.ROOT_DIR.get_path() location = root if root_dir is None else filesys.abspath(root_dir) location_exists = filesys.exists(location) if root_dir is None and not location_exists: os.mkdir(location) if not location_exists or filesys.isfile(location): raise NotADirectoryError(f"'{location}' - path is not a directory. ") if location != root: location = location if len(os.listdir(location)) == 0 else mkpath(location, DEFAULT_ROOT_NAME) if filesys.exists(root): shutil.copytree(root, location, dirs_exist_ok=True) FStruct.ROOT_DIR.set_path(location) for dir_path in FStruct.__members__.values(): os.makedirs(dir_path.get_path(), exist_ok=True)
def resolve_dist_path(dist): """ Given a distribution name like Hadoop-2.3.0-cdh5.0.0, return the path to the relevant hod.conf """ distpath = resolve_dists_dir() distpath = mkpath(distpath, dist, 'hod.conf') return distpath
def find_files(): """ Simple recursive glob for Python 2, `glob.glob`doesn't support recursive argument in Python 2. """ files_regex = '*' + FILE_EXT for root, dirnames, filenames in os.walk(RESOURCES_ROOT): for filename in fnmatch.filter(filenames, files_regex): yield mkpath(root, filename)
def prepare_work_cfg(self): """Prepare the config: collect the parameters and make the necessary xml cfg files""" self.controldir = mkpath(self._config.localworkdir, 'controldir') try: os.makedirs(self.controldir) except OSError as e: if e.errno == EEXIST: pass else: raise
def mklocalworkdir(workdir): ''' Construct the pathname for a workdir with a path local to this host/job/user. ''' user = _current_user() pid = os.getpid() jobid = os.getenv('PBS_JOBID') if jobid is None: raise RuntimeError('$PBS_JOBID must be defined to create a localworkdir.') hostname = socket.getfqdn() dir_name = '.'.join([user, hostname, str(pid)]) return mkpath(workdir, 'hod', jobid, dir_name)
def mklocalworkdir(workdir): ''' Construct the pathname for a workdir with a path local to this host/job/user. ''' user = _current_user() pid = os.getpid() jobid = os.getenv('PBS_JOBID') if jobid is None: raise RuntimeError( '$PBS_JOBID must be defined to create a localworkdir.') hostname = socket.getfqdn() dir_name = '.'.join([user, hostname, str(pid)]) return mkpath(workdir, 'hod', jobid, dir_name)
def plot_some(tags, n=15): """plot label of the n most discrepants tags""" schema = {'geometry': 'Polygon', 'properties': {'tag': 'str'}} style = [] KARTO_CONFIG['bounds']['data'] = [ SF_BBOX[1], SF_BBOX[0], SF_BBOX[3], SF_BBOX[2] ] # TODO cluster polys by their area so label's size can depend of it # polys = [{'geometry': mapping(r[1]), # 'properties': {'tag': r[2]}} for r in tags[:n]] polys = [] cover = None for r in tags[:n]: diff = r[1] if cover is None else r[1].difference(cover) polys.append({'geometry': mapping(diff), 'properties': {'tag': r[2]}}) cover = r[1] if cover is None else cover.union(r[1]) name = u'top_disc' KARTO_CONFIG['layers'][name] = { 'src': name + '.shp', 'labeling': { 'key': 'tag' } } color = '#ffa873' style.append(CSS.format(name, color, 'black')) with fiona.collection(mkpath('disc', name + '.shp'), "w", "ESRI Shapefile", schema) as f: f.writerecords(polys) with open(mkpath('disc', 'photos.json'), 'w') as f: json.dump(KARTO_CONFIG, f) style.append('#top_disc-label {font-family: OpenSans; font-size: 14px}') with open(mkpath('disc', 'photos.css'), 'w') as f: f.write('\n'.join(style)) sf = box(SF_BBOX[1], SF_BBOX[0], SF_BBOX[3], SF_BBOX[2]) print(sf.bounds) print(100 * cover.area, sf.area)
def _abspath(filepath, working_dir): ''' Take a filepath and working_dir and return the absolute path for the filepath. If the filepath is already absolute then just return it. >>> _abspath('somedir/file', '/tmp') /tmp/somedir/file >>> _abspath('', '/tmp') /tmp >>> _abspath('/not-tmp/somedir/file', '/tmp') /not-tmp/somedir/file ''' if not len(filepath): return realpath(working_dir) elif filepath[0] == '/': # filepath is already absolute return filepath return realpath(mkpath(working_dir, filepath))
def _setup_config_paths(precfg, resolver): """ Make the base and config directories; copy target service (i.e. hadoop xml) config into config dir. This needs to happen on master and slave nodes. """ _ignore_eexist(lambda: os.makedirs(precfg.workdir)) _ignore_eexist(lambda: os.makedirs(precfg.configdir)) for d in precfg.directories: _ignore_eexist(lambda: os.makedirs(resolver(d))) _log.info("Looking up config_writer %s", len(precfg.config_writer)) config_writer = service_config_fn(precfg.config_writer) _log.info("Copying %d config files to %s", len(precfg.service_configs), precfg.configdir) for dest_file, cfg in precfg.service_configs.items(): _log.info("Copying config %s file to '%s'", cfg, precfg.configdir) dest_path = mkpath(precfg.configdir, dest_file) write_service_config(dest_path, cfg, config_writer, resolver)
def configdir(self): return mkpath(self.localworkdir, 'conf')
class FolderStructure(Enum): """ | Contains a list of directory locations maintained by the system. | Specifically, the FolderStructure Enum lists out the root directory and sub-directories maintained by the system. | Use this Enum to get paths to specific containing directories. | | **Developer Notes:** | When adding new enumerations to FolderStructure, order matters. Enumerations in FolderStructure are initialized in the order they appear, thus when defining the 'parent' parameter, the parent enumeration must already be defined and initialized. Additionally, when defining the 'parent' parameter, provide a string with the name of the enumeration that is the parent. An empty string as the parent can be used to represent a base directory. """ ROOT_DIR = ('', mkpath('..', DEFAULT_ROOT_NAME)) """ | Location: ./AIClimateChange | Path: ../../AIClimateChange """ CLIMATE_DATA_DIR = ('ROOT_DIR', mkpath('data', 'modeling')) """ | Location: ./data/modeling | Path: ../../AIClimateChange/data/modeling """ WEATHER_DATA_DIR = ('ROOT_DIR', mkpath('data', 'weather')) """ | Location: ./data/weather | Path: ../../AIClimateChange/data/weather """ LOGS_DIR = ('ROOT_DIR', mkpath('logs')) """ | Location: ./logs | Path: ../../AIClimateChange/logs """ MODEL_SCHEMA_DIR = ('ROOT_DIR', mkpath('models', 'schema')) """ | Location: ./models/schema | Path: ../../AIClimateChange/models/schema """ TRAINED_MODELS_DIR = ('ROOT_DIR', mkpath('models', 'trained')) """ | Location: ./models/trained | Path: ../../AIClimateChange/models/trained """ def __init__(self, parent, directory): if parent != '' and parent not in self._member_map_: raise AttributeError(f'Enumeration {parent} has either not been initialized yet, or does not exist.') self.__parent = parent if parent == '' else self._member_map_[parent].get_path() self.__location = directory def set_path(self, location): """ Sets the path to the ROOT directory. :param location: The new location of the root directory. :raises ModificationError: raised if an operation attempts to modify the path of a directory other than the ROOT directory. """ if self.name != 'ROOT_DIR': raise ModificationError(f'{self.name} is a sub-directory of the root directory and should not be modified.') self.__location = location def get_path(self): """ :return: Returns the absolute path the directory location. """ return filesys.abspath(mkpath(self.__parent, self.__location))
def get_path(self): """ :return: Returns the absolute path the directory location. """ return filesys.abspath(mkpath(self.__parent, self.__location))
def find_files(*dirs): results = [] for src_dir in dirs: for root, dirs, files in os.walk(src_dir): results.append((root, map(lambda f: mkpath(root, f), files))) return results
def __init__(self, filename, ext, path_to): self.__name = filename self.__ext = ext self.__filename = f'{filename}.{ext}' self.__containing_dir = path_to self.__path = mkpath(path_to, self.__filename)
# -*- coding: utf-8 from __future__ import unicode_literals, absolute_import import fnmatch import json import os import unittest from os.path import join as mkpath, dirname, abspath import requests_mock from six.moves.urllib.parse import urljoin HOST_ROOT = 'https://api.deezer.com/' RESOURCES_ROOT = mkpath(abspath(dirname(__file__)), 'resources') FILE_EXT = '.json' # Override a local path -> URL path PATH_OVERRIDES = { mkpath('album', '302127', 'tracks14'): 'album/302127/tracks?index=14', mkpath('genre', 'noid'): 'genre', mkpath('radio', 'noid'): 'radio', mkpath('search', 'noid'): 'search?q=Billy+Jean', mkpath('search_1', 'noid'): 'search?q=Billy Jean&limit=2&index=2', } def find_files(): """ Simple recursive glob for Python 2, `glob.glob`doesn't support recursive argument in Python 2. """
# -*- coding: utf-8 from __future__ import unicode_literals, absolute_import import fnmatch import json import os import unittest from os.path import join as mkpath, dirname, abspath import requests_mock from six.moves.urllib.parse import urljoin HOST_ROOT = 'https://api.deezer.com/' RESOURCES_ROOT = mkpath(abspath(dirname(__file__)), 'resources') FILE_EXT = '.json' # Override a local path -> URL path PATH_OVERRIDES = { '/album/302127/tracks14': '/album/302127/tracks?index=14', '/genre/noid': '/genre', '/radio/noid': '/radio', '/search/noid': '/search?q=Billy+Jean', '/search_1/noid': '/search?q=Billy Jean&limit=2&index=2', } def find_files(): """ Simple recursive glob for Python 2, `glob.glob`doesn't support recursive argument in Python 2. """
def resource(path): from pkg_resources import resource_filename, Requirement return resource_filename(Requirement.parse('dazzle'), mkpath('resources', path))