def add_key_to_index(self, key): dir_name, key_basename = split_path(key.name) if key_basename == "index.html": return directory = self.dirs.get(dir_name) if directory is None: directory = Directory(dir_name) self.dirs[dir_name] = directory directory.add_content(key) # Write a subdirectory entry into each parent directory as needed. while dir_name != "": parent_dirname, tail = split_path(dir_name) parent = self.dirs.get(parent_dirname) if parent is None: parent = Directory(parent_dirname) self.dirs[parent_dirname] = parent parent.add_subdir(tail, directory) dir_name = parent_dirname directory = parent return
def read_image_stack(fn, *args, **kwargs): """Read a 3D volume of images in image or .h5 format into a numpy.ndarray. The format is automatically detected from the (first) filename. A 'crop' keyword argument is supported, as a list of [xmax, xmin, ymax, ymin, zmax, zmin]. Use 'None' for no crop in that coordinate. If reading in .h5 format, keyword arguments are passed through to read_h5_stack(). """ #pdb.set_trace() if os.path.isdir(fn): fn += '/' d, fn = split_path(os.path.expanduser(fn)) if len(d) == 0: d = '.' crop = kwargs.get('crop', [None] * 6) if len(crop) == 4: crop.extend([None] * 2) elif len(crop) == 2: crop = [None] * 4 + crop kwargs['crop'] = crop if any([fn.endswith(ext) for ext in supported_image_extensions]): # image types, such as a set of pngs or a multi-page tiff xmin, xmax, ymin, ymax, zmin, zmax = crop if len(args) > 0 and type(args[0]) == str and args[0].endswith( fn[-3:]): # input is a list of filenames fns = [fn] + [split_path(f)[1] for f in args] else: # input is a filename pattern to match fns = fnfilter(os.listdir(d), '*' + fn[-4:]) if len(fns) == 1 and fns[0].endswith('.tif'): stack = read_multi_page_tif(join_path(d, fns[0]), crop) else: fns.sort(key=alphanumeric_key) # sort filenames numerically fns = fns[zmin:zmax] im0 = pil_to_numpy(Image.open(join_path(d, fns[0]))) ars = (pil_to_numpy(Image.open(join_path(d, fn))) for fn in fns) im0 = im0[xmin:xmax, ymin:ymax] dtype = im0.dtype stack = zeros((len(fns), ) + im0.shape, dtype) for i, im in enumerate(ars): stack[i] = im[xmin:xmax, ymin:ymax] elif fn.endswith('_boundpred.h5') or fn.endswith('_prediction.h5'): # Ilastik batch prediction output file stack = read_prediction_from_ilastik_batch(os.path.join(d, fn), **kwargs) elif fn.endswith('.h5'): # other HDF5 file stack = read_h5_stack(join_path(d, fn), *args, **kwargs) elif os.path.isfile(os.path.join(d, 'superpixel_to_segment_map.txt')): # Raveler export stack = raveler_to_labeled_volume(d, *args, **kwargs) return squeeze(stack)
def main(args): argp = ARGPARSER.parse_args(args[1:]) id_to_ann_files = defaultdict(list) for file_path in (l.strip() for l in stdin): if not any((file_path.endswith(suff) for suff in UNMERGED_SUFFIXES)): if not argp.no_warn: import sys print >> sys.stderr, ( 'WARNING: invalid file suffix for %s, ignoring') % ( file_path, ) continue dirname, basename = split_path(file_path) id = join_path(dirname, basename.split('.')[0]) id_to_ann_files[id].append(file_path) for id, ann_files in id_to_ann_files.iteritems(): lines = [] for ann_file_path in ann_files: with open(ann_file_path, 'r') as ann_file: for line in ann_file: lines.append(line) with open(id + '.' + MERGED_SUFFIX, 'w') as merged_ann_file: for line in lines: merged_ann_file.write(line)
def make_module(self, module): ''' Create a module directory under `testdir`. Each of the intermediate directories (if there are any) will also be usable as modules (i.e., they'll have __init__.py files in them). Parameters ---------- module : str Path to the module directory. Must be a relative path Returns ------- str The full path to the module directory ''' if isabs(module): raise ValueError('Must use a relative path. Given ' + str(module)) modpath = p(self.testdir, module) os.makedirs(modpath) last_dname = None dname = modpath while last_dname != dname and dname != self.testdir: open(p(dname, '__init__.py'), 'x').close() base = '' while not base and last_dname != dname: last_dname = dname dname, base = split_path(modpath) return modpath
def main(args): argp = ARGPARSER.parse_args(args[1:]) # ID is the stem of a file id_to_ann_files = defaultdict(list) # Index all ID;s before we merge so that we can do a little magic for file_path in (l.strip() for l in stdin): if not any((file_path.endswith(suff) for suff in UNMERGED_SUFFIXES)): if not argp.no_warn: import sys print >> sys.stderr, ( 'WARNING: invalid file suffix for %s, ignoring' ) % (file_path, ) continue dirname, basename = split_path(file_path) id = join_path(dirname, basename.split('.')[0]) id_to_ann_files[id].append(file_path) for id, ann_files in id_to_ann_files.iteritems(): #XXX: Check if output file exists lines = [] for ann_file_path in ann_files: with open(ann_file_path, 'r') as ann_file: for line in ann_file: lines.append(line) with open(id + '.' + MERGED_SUFFIX, 'w') as merged_ann_file: for line in lines: merged_ann_file.write(line)
def main(args): argp = ARGPARSER.parse_args(args[1:]) # ID is the stem of a file id_to_ann_files = defaultdict(list) # Index all ID;s before we merge so that we can do a little magic for file_path in (l.strip() for l in stdin): if not any((file_path.endswith(suff) for suff in UNMERGED_SUFFIXES)): if not argp.no_warn: import sys print(('WARNING: invalid file suffix for %s, ignoring') % (file_path, ), file=sys.stderr) continue dirname, basename = split_path(file_path) id = join_path(dirname, basename.split('.')[0]) id_to_ann_files[id].append(file_path) for id, ann_files in id_to_ann_files.items(): # XXX: Check if output file exists lines = [] for ann_file_path in ann_files: with open(ann_file_path, 'r') as ann_file: for line in ann_file: lines.append(line) with open(id + '.' + MERGED_SUFFIX, 'w') as merged_ann_file: for line in lines: merged_ann_file.write(line)
def __init__(self, path, repo, origin): clean_repo = Target.clean_path(repo) if path is not None: self.name = clean_repo self.path = join(path, self.name) else: self.path = clean_repo self.name = split_path(self.path)[1] self.origin = origin
def set(self, data: Union[str, bytes], content_type: str = "application/json", cache: str = "no-store") -> NoReturn: mode = "w" if isinstance(data, str) else "wb" dir_path, _ = split_path(self.path) makedirs(dir_path, exist_ok=True) with open(self.path, mode=mode) as output_file: print(data, file=output_file)
async def get_fresh_data(repository_url: str, excluded: Iterable) -> AsyncIterator[str]: """ Retrieve a fresh batch of data from the repository. Parameters ---------- repository_url: str URL for the repository (the zip file). excluded: Iterable Returns ------- AsyncIterator[str] An async iterator of relative paths to the file. """ url = BASE_REPOSITORY + repository_url.lstrip( processor_settings.URL_SEPARATOR) # Requesting the latest files from the repository. async with Lock(): response = get_request(url=url) logging.info(f"> Download request completed with " f"status {response.status_code}: {repository_url}") if response.status_code != HTTPStatus.OK: raise RuntimeError( f"Failed to download the data from {url}: {response.text}") # `ZipFile` only understands files. data_bin = BytesIO(response.content) async with Lock(): with ZipFile(data_bin, mode="r") as zip_obj: paths = zip_obj.namelist() # Extracting the contents into the temp directory. zip_obj.extractall(TEMP_DIR_PATH) logging.info("> Successfully extracted and stored the data") for path in paths: _, filename = split_path(path) if any(map(lambda p: p in path, excluded)): continue full_path = join_path(TEMP_DIR_PATH, path) # Discard directories if not isfile(full_path): continue logging.info(f"> Processing file '{path}'") yield path
def archive_path(path, files=None): """Archives a folder or some files in this folder""" split = split_path(path) chdir = join_path(*split[:-1]) print(chdir) dest = split[-1] dest_path = '/tmp/' + dest + str(random.randint(0, 200000)).zfill(6) print(dest) return make_archive(dest_path, 'zip', chdir, dest)
def send_report( report=None, url=None, user=None, key=None, prompt=True, force=False, quiet=False ): """Send a crash report to a Breakpad Server URL endpoint.""" if DISABLED and not force: return if not report: if not hasattr(sys, 'last_traceback'): return report = format_exception( sys.last_type, sys.last_value, sys.last_traceback, as_html=True ) if prompt: try: confirm = raw_input('Do you want to send a crash report [Y/n]? ') except EOFError: return if confirm.lower() in ['n', 'no']: return if not user: config = SCMConfig() user = config.get('codereview.email') key = config.get('codereview.key') url = config.get('codereview.breakpad_url') if not (user and key and url): exit("Sorry, you need to configure your codereview settings.") if not quiet: print print "Sending crash report ... " payload = { 'args': ' '.join(sys.argv[1:]), 'command': split_path(sys.argv[0])[1], 'report': ''.join(report), 'user': user, } payload['sig'], payload = sign_payload(payload, key) try: response = urlopen(url, urlencode(payload)) if not quiet: print print response.read() response.close() except Exception: if not quiet: print print "Sorry, couldn't send the crash report for some reason."
def read_image_stack(fn, *args, **kwargs): """Read a 3D volume of images in image or .h5 format into a numpy.ndarray. The format is automatically detected from the (first) filename. A 'crop' keyword argument is supported, as a list of [xmax, xmin, ymax, ymin, zmax, zmin]. Use 'None' for no crop in that coordinate. If reading in .h5 format, keyword arguments are passed through to read_h5_stack(). """ d, fn = split_path(os.path.expanduser(fn)) if len(d) == 0: d = '.' crop = kwargs.get('crop', [None] * 6) if len(crop) == 4: crop.extend([None] * 2) elif len(crop) == 2: crop = [None] * 4 + crop kwargs['crop'] = crop if any([fn.endswith(ext) for ext in supported_image_extensions]): xmin, xmax, ymin, ymax, zmin, zmax = crop if len(args) > 0 and type(args[0]) == str and args[0].endswith( fn[-3:]): # input is a list of filenames fns = [fn] + [split_path(f)[1] for f in args] else: # input is a filename pattern to match fns = fnfilter(os.listdir(d), fn) if len(fns) == 1 and fns[0].endswith('.tif'): stack = read_multi_page_tif(join_path(d, fns[0]), crop) else: fns.sort(key=alphanumeric_key) # sort filenames numerically fns = fns[zmin:zmax] im0 = pil_to_numpy(Image.open(join_path(d, fns[0]))) ars = (pil_to_numpy(Image.open(join_path(d, fn))) for fn in fns) im0 = im0[xmin:xmax, ymin:ymax] dtype = im0.dtype stack = zeros((len(fns), ) + im0.shape, dtype) for i, im in enumerate(ars): stack[i] = im[xmin:xmax, ymin:ymax] if fn.endswith('.h5'): stack = read_h5_stack(join_path(d, fn), *args, **kwargs) return squeeze(stack)
def read_image_stack(fn, *args, **kwargs): """Read a 3D volume of images in image or .h5 format into a numpy.ndarray. The format is automatically detected from the (first) filename. A 'crop' keyword argument is supported, as a list of [xmax, xmin, ymax, ymin, zmax, zmin]. Use 'None' for no crop in that coordinate. If reading in .h5 format, keyword arguments are passed through to read_h5_stack(). """ d, fn = split_path(os.path.expanduser(fn)) if len(d) == 0: d = '.' crop = kwargs.get('crop', [None]*6) if len(crop) == 4: crop.extend([None]*2) elif len(crop) == 2: crop = [None]*4 + crop kwargs['crop'] = crop if any([fn.endswith(ext) for ext in supported_image_extensions]): xmin, xmax, ymin, ymax, zmin, zmax = crop if len(args) > 0 and type(args[0]) == str and args[0].endswith(fn[-3:]): # input is a list of filenames fns = [fn] + [split_path(f)[1] for f in args] else: # input is a filename pattern to match fns = fnfilter(os.listdir(d), fn) if len(fns) == 1 and fns[0].endswith('.tif'): stack = read_multi_page_tif(join_path(d,fns[0]), crop) else: fns.sort(key=alphanumeric_key) # sort filenames numerically fns = fns[zmin:zmax] im0 = pil_to_numpy(Image.open(join_path(d,fns[0]))) ars = (pil_to_numpy(Image.open(join_path(d,fn))) for fn in fns) im0 = im0[xmin:xmax,ymin:ymax] dtype = im0.dtype stack = zeros((len(fns),)+im0.shape, dtype) for i, im in enumerate(ars): stack[i] = im[xmin:xmax,ymin:ymax] if fn.endswith('.h5'): stack = read_h5_stack(join_path(d,fn), *args, **kwargs) return squeeze(stack)
def resource_path(filename): # we are inside singlefile pyinstaller paths = appdirs.site_data_dir(appname=__app_id__, appauthor=False, multipath=True).split(pathsep) if getattr(sys, 'frozen', False): paths.insert(0, sys._MEIPASS) else: paths.append(split_path(__file__)[0]) for dir_ in paths: path = join(dir_, filename) if exists(path): return path dir_ = appdirs.user_data_dir(appname=__app_id__, appauthor=__author__) return join(dir_, filename)
async def process_and_upload_data(path: str, get_file_data: FileFetcherType, container: str, base_path: str) -> NoReturn: """ Uploads processed files to the storage using the correct caching and ``content-type`` specs. Parameters ---------- path: str Path (within the storage container) in which the file is to be stored. get_file_data: FileFetcherType base_path: str container: str Storage container in which the file is to be stored. Returns ------- NoReturn """ _, file_name = split_path(path) # Files are stored as JSON - the extension must be updated: file_name, _ = splitext(file_name) json_name = f"{file_name}.json" yaml_name = f"{file_name}.yaml" json_path = str.join(processor_settings.URL_SEPARATOR, [STORAGE_PATH, json_name]) yaml_path = str.join(processor_settings.URL_SEPARATOR, [STORAGE_PATH, yaml_name]) if ".github" in path: return None raw_data = await get_file_data(path, base_path) data = await prepare_data(raw_data) # Uploading the data with StorageClient(container=container, path=json_path) as client: async with Lock(): client.upload(data=data.json_data) with StorageClient(container=container, path=yaml_path, content_type="application/x-yaml") as client: async with Lock(): client.upload(data=data.yaml_data)
def splitall(self, path): allparts = list() while True: parts = split_path(path) if parts[0] == path: # sentinel for absolute paths allparts.insert(0, parts[0]) break elif parts[1] == path: # sentinel for relative paths allparts.insert(0, parts[1]) break else: path = parts[0] allparts.insert(0, parts[1]) return allparts
def load_ids(dir, langs): if not langs: langs = LANGS # root bsnlp/sample_pl_cs_ru_bg/raw/cs # filename brexit_cs.txt_file_100.txt for root, subdirs, filenames in walk(dir): tail, lang = split_path(root) if lang not in langs: continue tail, type = split_path(tail) if type not in (ANNOTATED, RAW): # raw/nord_stream/ru/nord_stream_ru.txt_file_44.txt tail, type = split_path(tail) assert type in (ANNOTATED, RAW), root for filename in filenames: name, ext = split_ext(filename) if ext not in (TXT, OUT): continue path = join_path(root, filename) yield BsnlpId(lang, type, name, path)
def __init__(self, steps, parent=None): super(ProgressDialog, self).__init__(parent) form, _ = uic.loadUiType(split_path(__file__)[0] + "/progressDialog.ui") self.ui = form() self.ui.setupUi(self) self.steps = steps self.current = 0 self._add_pending() self.ui.cancel.clicked.connect(self.cancel) self.trigger_popup.connect(self.popup) self.trigger_update.connect(self.update_step)
def make_module(self, module): if isabs(module): raise Exception('Must use a relative path. Given ' + str(module)) modpath = p(self.testdir, module) os.makedirs(modpath) last_dname = None dname = modpath while last_dname != dname and dname != self.testdir: open(p(dname, '__init__.py'), 'x').close() base = '' while not base and last_dname != dname: last_dname = dname dname, base = split_path(modpath) return modpath
def generate_thumbnail_name(self, raw_name, thumb_name, size): """ Return a thumbnail file path like:: `path/to/thumb_name-raw_name-sizes[0]xsizes[1].jpg` """ filepath, filename = split_path(raw_name) fn, ext = split_ext(filename) thumbnail_filename = "%s-%s-%sx%s%s" % (thumb_name, fn, size[0], size[1], ext) # join path and new filename thumbnail_full_path = join_path(filepath, thumbnail_filename) return thumbnail_full_path
def run(self, **kwargs): parent, file_name = self._get_current_file() python = self._get_python() if file_name.startswith("test_"): kwargs["working_dir"] = parent _, parent_name = split_path(parent) file_name = parent_name + path_separator + file_name flags = "-m unittest" else: flags = "-u" shell_cmd = " ".join([python, flags, file_name]) kwargs["shell_cmd"] = shell_cmd self.window.run_command("exec", kwargs)
def generate_thumbnail_name(self, raw_name, thumb_name, size): """ Return a thumbnail file path like:: `path/to/thumb_name-raw_name-sizes[0]xsizes[1].jpg` """ filepath, filename = split_path(raw_name) fn, ext = split_ext(filename) thumbnail_filename = "%s-%s-%sx%s%s" % ( thumb_name, fn, size[0], size[1], ext) # join path and new filename thumbnail_full_path = join_path(filepath, thumbnail_filename) return thumbnail_full_path
def parse_otool_output(output): """Search otool output for MKL dependencies. Return (mkl_dirs, mkl_libs).""" from re import compile from os.path import join, dirname, split as split_path, abspath, basename import numpy import sys # like "@rpath/libmkl_intel.dylib (compatibility version 0.0.0, current version 0.0.0)" re1 = compile(r"\s*@rpath/lib/(.+) \(.+\)") # like "@loader_path/libmkl_intel.dylib (compatibility version 0.0.0, current version 0.0.0)" re2 = compile(r"\s*@loader_path/(.+) \(.+\)") # like "/usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 111.0.0)" re3 = compile(r"\s*(.+) \(.+\)") re_fname = compile(r"lib(mkl.*|iomp.*)\.(so|dylib)(\.[^ ]*)?") # we assume for now that @rpath == <sys.prefix>/lib prefix_dir = getattr(sys, 'base_prefix', sys.prefix) sys_lib_dir = join(prefix_dir, "lib") mkl_dirs, mkl_libs = [], [] re1_match, output_lines = split_regex(output.splitlines(), re1) for m in re1_match: fname = m.group(1) m_fname = re_fname.match(fname) if m_fname: # we assume that @rpath is equal to sys.prefix mkl_libs.append(join(sys_lib_dir, m.group(1))) mkl_dirs.append(sys_lib_dir) re2_match, output_lines = split_regex(output_lines, re2) for m in re2_match: full_path = join(dirname(numpy.__file__), 'linalg', m.group(1)) fpath, fname = split_path(abspath(full_path)) m_fname = re_fname.match(fname) if m_fname: mkl_libs.append(full_path) mkl_dirs.append(fpath) for m in split_regex(output_lines, re3)[0]: path = m.group(1) fname = basename(path) m_fname = re_fname.match(fname) if m_fname: mkl_libs.append(path) mkl_dirs.append(dirname(path)) return set(mkl_dirs), set(mkl_libs)
def list_dir(path): """Return folder content or filename """ try: if isdir(path) and exists(path): ls = prep_ls(path) return ls elif isfile(path): if DEBUG: root, filename = split_path(path) return static_file(filename, root=root, download=True) else: response.headers['X-Accel-Redirect'] = path return '' except OSError: abort(404)
def read_image_stack(fn): """Read a 3D volume of images in .tif or .h5 formats into a numpy.ndarray. This function attempts to automatically determine input file types and wraps specific image-reading functions. Adapted from gala.imio (https://github.com/janelia-flyem/gala) """ if os.path.isdir(fn): fn += '/' d, fn = split_path(os.path.expanduser(fn)) if len(d) == 0: d = '.' fns = fnfilter(os.listdir(d), fn) if len(fns) == 1 and fns[0].endswith('.tif'): stack = read_multi_page_tif(join_path(d,fns[0])) elif fn.endswith('.h5'): data=h5py.File(join_path(d,fn),'r') stack=data[group_name].value return squeeze(stack)
def get_emotion_number_from_filename(filename): return EMOTION_NUMBERS[split_path(filename)[-1].split('.')[0].split('_') [1]]
def get_path(url): u = urlparse(url) return split_path(u.path)
def main(argv, genfiles=None): op = OptionParser() op.add_option('-a', dest='authors', default='', help="Set the path for a special authors file (optional)") op.add_option('-c', dest='package', default='', help="Generate documentation for the Python package (optional)") op.add_option('-d', dest='data_file', default='', help="Set the path for a persistent data file (optional)") op.add_option('-e', dest='output_encoding', default='utf-8', help="Set the output encoding (default: utf-8)") op.add_option('-f', dest='format', default='html', help="Set the output format (default: html)") op.add_option('-i', dest='input_encoding', default='utf-8', help="Set the input encoding (default: utf-8)") op.add_option('-o', dest='output_path', default=HOME, help="Set the output directory for files (default: $PWD)") op.add_option('-p', dest='pattern', default='', help="Generate index files for the path pattern (optional)") op.add_option('-r', dest='root_path', default='', help="Set the path to the root working directory (optional)") op.add_option('-t', dest='template', default='', help="Set the path to a template file (optional)") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") op.add_option('--stdout', dest='stdout', default=False, action='store_true', help="Flag to redirect to stdout instead of to a file") try: options, args = op.parse_args(argv) except SystemExit: return authors = options.authors if authors: if not isfile(authors): raise IOError("%r is not a valid path!" % authors) authors = parse_authors_file(authors) else: authors = {} email2author = {'unknown': 'unknown'} author2link = {'unknown': ''} for author, author_info in authors.iteritems(): for _info in author_info: if _info.startswith('http://') or _info.startswith('https://'): if author not in author2link: author2link[author] = _info elif '@' in _info: email2author[_info] = author authors['unknown'] = ['unknown'] output_path = options.output_path.rstrip('/') if not isdir(output_path): raise IOError("%r is not a valid directory!" % output_path) root_path = options.root_path siteinfo = join_path(output_path, '.siteinfo') if isfile(siteinfo): env = {} execfile(siteinfo, env) siteinfo = env['INFO'] else: siteinfo = { 'site_url': '', 'site_nick': '', 'site_description': '', 'site_title': '' } stdout = sys.stdout if options.stdout else None verbose = False if stdout else (not options.quiet) format = options.format if format not in ('html', 'tex'): raise ValueError("Unknown format: %s" % format) if (format == 'tex') or (not options.template): template = False elif not isfile(options.template): raise IOError("%r is not a valid template!" % options.template) else: template_path = abspath(options.template) template_root = dirname(template_path) template_loader = TemplateLoader([template_root]) template_file = open(template_path, 'rb') template = MarkupTemplate( template_file.read(), loader=template_loader, encoding='utf-8' ) template_file.close() data_file = options.data_file if data_file: if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} input_encoding = options.input_encoding output_encoding = options.output_encoding if genfiles: files = genfiles elif options.package: package_root = options.package files = [] add_file = files.append package = None for part in reversed(package_root.split(SEP)): if part: package = part break if package is None: raise ValueError("Couldn't find the package name from %r" % package_root) for dirpath, dirnames, filenames in walk(package_root): for filename in filenames: if not filename.endswith('.py'): continue filename = join_path(dirpath, filename) module = package + filename[len(package_root):] if module.endswith('__init__.py'): module = module[:-12] else: module = module[:-3] module = '.'.join(module.split(SEP)) module_file = open(filename, 'rb') module_source = module_file.read() module_file.close() docstring = docstring_regex.search(module_source) if docstring: docstring = docstring.group(0) if docstring.startswith('r'): docstring = docstring[4:-3] else: docstring = docstring[3:-3] if docstring and docstring.strip().startswith('=='): docstring = strip_leading_indent(docstring) module_source = docstring_regex.sub('', module_source, 1) else: docstring = '' info = {} if root_path and isabs(filename) and filename.startswith(root_path): info['__path__'] = filename[len(root_path)+1:] else: info['__path__'] = filename info['__updated__'] = datetime.utcfromtimestamp( stat(filename).st_mtime ) info['__outdir__'] = output_path info['__name__'] = 'package.' + module info['__type__'] = 'py' info['__title__'] = module info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER) add_file((docstring, '', info)) else: files = [] add_file = files.append for filename in args: if not isfile(filename): raise IOError("%r doesn't seem to be a valid file!" % filename) if root_path and isabs(filename) and filename.startswith(root_path): path = filename[len(root_path)+1:] else: path = filename info = get_git_info(filename, path) # old svn support: # info = get_svn_info(path.split(SEP)[0], '*.txt')[path] source_file = open(filename, 'rb') source = source_file.read() source_file.close() if MORE_LINE in source: source_lead = source.split(MORE_LINE)[0] source = source.replace(MORE_LINE, '') else: source_lead = '' filebase, filetype = splitext(basename(filename)) info['__outdir__'] = output_path info['__name__'] = filebase.lower() info['__type__'] = 'txt' info['__title__'] = filebase.replace('-', ' ') add_file((source, source_lead, info)) for source, source_lead, info in files: if verbose: print print LINE print 'Converting: [%s] %s in [%s]' % ( info['__type__'], info['__path__'], split_path(output_path)[1] ) print LINE print if template: output, props = render_rst( source, format, input_encoding, True ) # output = output.encode(output_encoding) info['__text__'] = output.encode(output_encoding) info.update(props) if source_lead: info['__lead__'] = render_rst( source_lead, format, input_encoding, True )[0].encode(output_encoding) output = template.generate( content=output, info=info, authors=authors, email2author=email2author, author2link=author2link, **siteinfo ).render('xhtml', encoding=output_encoding) else: output, props = render_rst( source, format, input_encoding, True, as_whole=True ) info.update(props) output = output.encode(output_encoding) info['__text__'] = output if source_lead: info['__lead__'] = render_rst( source_lead, format, input_encoding, True, as_whole=True )[0].encode(output_encoding) if data_file: data_dict[info['__path__']] = info if stdout: print output else: output_filename = join_path( output_path, '%s.%s' % (info['__name__'], format) ) output_file = open(output_filename, 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() if options.pattern: pattern = options.pattern items = [ item for item in data_dict.itervalues() if item['__outdir__'] == pattern ] # index.js/json import json index_js_template = join_path(output_path, 'index.js.template') if isfile(index_js_template): index_json = json.dumps([ [_art['__name__'], _art['title'].encode('utf-8')] for _art in sorted( [item for item in items if item.get('x-created') and item.get('x-type', 'blog') == 'blog'], key=lambda i: i['x-created'] ) ]) index_js_template = open(index_js_template, 'rb').read() index_js = open(join_path(output_path, 'index.js'), 'wb') index_js.write(index_js_template % index_json) index_js.close() for name, mode, format in INDEX_FILES: pname = name.split('.', 1)[0] template_file = None if siteinfo['site_nick']: template_path = join_path( template_root, '%s.%s.genshi' % (pname, siteinfo['site_nick']) ) if isfile(template_path): template_file = open(template_path, 'rb') if not template_file: template_path = join_path(template_root, '%s.genshi' % pname) template_file = open(template_path, 'rb') page_template = MarkupTemplate( template_file.read(), loader=template_loader, encoding='utf-8' ) template_file.close() poutput = page_template.generate( items=items[:], authors=authors, email2author=email2author, author2link=author2link, root_path=output_path, **siteinfo ).render(format) poutput = unicode(poutput, output_encoding) if mode: output = template.generate( alternative_content=poutput, authors=authors, **siteinfo ).render(format) else: output = poutput # @/@ wtf is this needed??? if isinstance(output, unicode): output = output.encode(output_encoding) output_file = open(join_path(output_path, name), 'wb') output_file.write(output) output_file.close()
def create_file_form(data, file): """ Creates a `multipart/form-data` form from the message's data and from the file data. If there is no files to send, will return `None` to tell the caller, that nothing is added to the overall data. Parameters ---------- data : `dict` of `Any` The data created by the ``.message_create`` method. file : `dict` of (`file-name`, `io`) items, `list` of (`file-name`, `io`) elements, tuple (`file-name`, `io`), `io` The files to send. Returns ------- form : `None` or `Formdata` Returns a `Formdata` of the files and from the message's data. If there are no files to send, returns `None` instead. Raises ------ ValueError If more than `10` file is registered to be sent. Notes ----- Accepted `io` types with check order are: - ``BodyPartReader`` instance - `bytes`, `bytearray`, `memoryview` instance - `str` instance - `BytesIO` instance - `StringIO` instance - `TextIOBase` instance - `BufferedReader`, `BufferedRandom` instance - `IOBase` instance - ``AsyncIO`` instance - `async-iterable` Raises `TypeError` at the case of invalid `io` type. There are two predefined data types specialized to send files: - ``ReuBytesIO`` - ``ReuAsyncIO`` If a buffer is sent, then when the request is done, it is closed. So if the request fails, we would not be able to resend the file, except if we have a data type, what instead of closing on `.close()` just seeks to `0` (or later if needed) on close, instead of really closing instantly. These data types implement a `.real_close()` method, but they do `real_close` on `__exit__` as well. """ form = Formdata() form.add_field('payload_json', to_json(data)) files = [] # checking structure # case 1 dict like if hasattr(type(file), 'items'): files.extend(file.items()) # case 2 tuple => file, filename pair elif isinstance(file, tuple): files.append(file) # case 3 list like elif isinstance(file, (list, deque)): for element in file: if type(element) is tuple: name, io = element else: io = element name = '' if not name: #guessing name name = getattr(io, 'name', '') if name: _, name = split_path(name) else: name = str(random_id()) files.append((name, io),) #case 4 file itself else: name = getattr(file, 'name', '') #guessing name if name: _, name = split_path(name) else: name = str(random_id()) files.append((name, file),) # checking the amount of files # case 1 one file if len(files) == 1: name, io = files[0] form.add_field('file', io, filename=name, content_type='application/octet-stream') # case 2, no files -> return None, we should use the already existing data elif len(files) == 0: return None # case 3 maximum 10 files elif len(files) < 11: for index, (name, io) in enumerate(files): form.add_field(f'file{index}s', io, filename=name, content_type='application/octet-stream') # case 4 more than 10 files else: raise ValueError('You can send maximum 10 files at once.') return form
def main(): global progress_bar # get the list of resolvers res = requests.get(NS_LIST_URL) if res.status_code == 200: # perform a baseline test to compare against sanity_check = perform_lookup(config.baseline_server, config.query_domain, tries=5) if sanity_check is not None: sanity_check = set(sanity_check) all_resolvers = res.content.decode().splitlines() initial_resolvers = [] if config.no_clean: # skip cleaning initial_resolvers = all_resolvers else: # remove false positives for line in all_resolvers: replace_result = [ bool(re.sub(regex, '', line)) for regex in config.clean_regex ] if all(replace_result): initial_resolvers.append(line) # remove any existing output_file if path_exists(config.output_file): if config.keep_old: name, ext = split_path(config.output_file) backup_name = '{}-{}{}'.format(name, uuid4().hex, ext) print('[*] Output file already exists, renaming {} to {}'. format(config.output_file, backup_name)) rename_file(config.output_file, backup_name) # path still exists, rename failed if path_exists(config.output_file): print('[!] Rename failed, outputting to {} instead!'. format(backup_name)) config.output_file = backup_name else: print('[*] Overwriting existing output file {}'.format( config.output_file)) remove_file(config.output_file) # create progress bar if not verbose mode if not config.verbose: progress_bar = tqdm(total=len(initial_resolvers), unit='resolvers') # create a thread pool and start the workers thread_pool = ThreadPool(config.job_count) workers = [] for resolver in initial_resolvers: w = thread_pool.apply_async(check_resolver, (resolver, sanity_check), callback=callback) workers.append(w) # ensure all workers complete for w in workers: w.get() thread_pool.close() thread_pool.join() if not config.verbose: progress_bar.close() else: print( 'Error performing baseline sanity check! (DNS lookup {} using {})' .format(config.query_domain, config.baseline_server))
def get_emotion_number_from_filename(filename): return EMOTION_NUMBERS[split_path(filename)[-1].split('.')[0].split('_')[1]]
ms_figures_dir = join_path(ms_dir, 'figures') zip_fname = '/Users/yoavram/Dropbox/Sunnyvale/figures.zip' tex_files = ['ms_sunnyvale.tex'] pattern = re.compile(r'\\includegraphics(?:\[.*\])?\{(.*\.\w{3})\}') if os.path.exists(ms_figures_dir): shutil.rmtree(ms_figures_dir) os.mkdir(ms_figures_dir) if not os.path.exists(ms_figures_dir): os.mkdir(ms_figures_dir) figures = [] for fn in tex_files: with open(join_path(ms_dir, fn)) as f: matches = (pattern.match(line) for line in f) matches = (m for m in matches if m is not None) filenames = (m.groups()[0] for m in matches) filenames = (split_path(fn)[-1] for fn in filenames) filenames = (join_path(figures_dir, fn) for fn in filenames) figures.extend(filenames) with ZipFile(zip_fname, 'w') as z: for fn in figures: fn = fn.replace('{', '').replace('}', '') print(fn) shutil.copy(fn, join_path(ms_figures_dir, split_path(fn)[-1])) z.write(fn) print("{} figures copied to {} and zipped to {}".format( len(figures), ms_figures_dir, zip_fname))
from datetime import datetime from os.path import abspath, split as split_path, join as join_path # 3rd party: # Internal: from ..common.visualisation import plot_thumbnail # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ metrics = [ 'newAdmissions', 'newCasesByPublishDate', 'newDeaths28DaysByPublishDate', 'newVirusTests' ] curr_dir, _ = split_path(abspath(__file__)) queries_dir = join_path(curr_dir, "queries") with open(join_path(queries_dir, "time_series_data.sql")) as fp: time_series_data_query = fp.read() with open(join_path(queries_dir, "latest_change_data.sql")) as fp: latest_change_data_query = fp.read() async def get_timeseries(conn, timestamp): ts = datetime.fromisoformat(timestamp.replace("5Z", "")) partition = f"{ts:%Y_%-m_%-d}_other" partition_id = f"{ts:%Y_%-m_%-d}|other" values_query = time_series_data_query.format(partition=partition) change_query = latest_change_data_query.format(partition=partition)
def read_image_stack(fn, *args, **kwargs): """Read a 3D volume of images in image or .h5 format into a numpy.ndarray. This function attempts to automatically determine input file types and wraps specific image-reading functions. Parameters ---------- fn : filename (string) A file path or glob pattern specifying one or more valid image files. The file format is automatically determined from this argument. *args : filenames (string, optional) More than one positional argument will be interpreted as a list of filenames pointing to all the 2D images in the stack. **kwargs : keyword arguments (optional) Arguments to be passed to the underlying functions. A 'crop' keyword argument is supported, as a list of length 6: [xmin, xmax, ymin, ymax, zmin, zmax]. Use 'None' for no crop in that coordinate. Returns ------- stack : 3-dimensional numpy ndarray Notes ----- If reading in .h5 format, keyword arguments are passed through to read_h5_stack(). Automatic file type detection may be deprecated in the future. """ # TODO: Refactor. Rather than have implicit designation of stack format # based on filenames (*_boundpred.h5, etc), require explicit parameters # in config JSON files. if os.path.isdir(fn): fn += '/' d, fn = split_path(os.path.expanduser(fn)) if len(d) == 0: d = '.' crop = kwargs.get('crop', [None] * 6) if crop is None: crop = [None] * 6 if len(crop) == 4: crop.extend([None] * 2) elif len(crop) == 2: crop = [None] * 4 + crop kwargs['crop'] = crop if any(fn.endswith(ext) for ext in supported_image_extensions): # image types, such as a set of pngs or a multi-page tiff xmin, xmax, ymin, ymax, zmin, zmax = crop if len(args) > 0 and type(args[0]) == str and args[0].endswith( fn[-3:]): # input is a list of filenames fns = [fn] + [split_path(f)[1] for f in args] else: # input is a filename pattern to match fns = fnfilter(os.listdir(d), fn) if len(fns) == 1 and fns[0].endswith('.tif'): stack = read_multi_page_tif(join_path(d, fns[0]), crop) else: fns.sort(key=alphanumeric_key) # sort filenames numerically fns = fns[zmin:zmax] im0 = imread(join_path(d, fns[0])) ars = (imread(join_path(d, fn)) for fn in fns) im0 = im0[xmin:xmax, ymin:ymax] dtype = im0.dtype stack = zeros((len(fns), ) + im0.shape, dtype) for i, im in enumerate(ars): stack[i] = im[xmin:xmax, ymin:ymax] elif fn.endswith('_boundpred.h5') or fn.endswith('_processed.h5'): # Ilastik batch prediction output file stack = read_prediction_from_ilastik_batch(os.path.join(d, fn), **kwargs) elif fn.endswith('.h5'): # other HDF5 file stack = read_h5_stack(join_path(d, fn), *args, **kwargs) elif os.path.isfile(os.path.join(d, 'superpixel_to_segment_map.txt')): # Raveler export stack = raveler_to_labeled_volume(d, *args, **kwargs) return squeeze(stack)
def split_path_file_extension(full_path): path, file = split_path(full_path) file, extension = splitext(file) return path, file, extension
def main(argv, genfiles=None): op = OptionParser() op.add_option('-a', dest='authors', default='', help="Set the path for a special authors file (optional)") op.add_option( '-c', dest='package', default='', help="Generate documentation for the Python package (optional)") op.add_option('-d', dest='data_file', default='', help="Set the path for a persistent data file (optional)") op.add_option('-e', dest='output_encoding', default='utf-8', help="Set the output encoding (default: utf-8)") op.add_option('-f', dest='format', default='html', help="Set the output format (default: html)") op.add_option('-i', dest='input_encoding', default='utf-8', help="Set the input encoding (default: utf-8)") op.add_option('-o', dest='output_path', default=HOME, help="Set the output directory for files (default: $PWD)") op.add_option('-p', dest='pattern', default='', help="Generate index files for the path pattern (optional)") op.add_option('-r', dest='root_path', default='', help="Set the path to the root working directory (optional)") op.add_option('-t', dest='template', default='', help="Set the path to a template file (optional)") op.add_option('--quiet', dest='quiet', default=False, action='store_true', help="Flag to suppress output") op.add_option('--stdout', dest='stdout', default=False, action='store_true', help="Flag to redirect to stdout instead of to a file") try: options, args = op.parse_args(argv) except SystemExit: return authors = options.authors if authors: if not isfile(authors): raise IOError("%r is not a valid path!" % authors) authors = parse_authors_file(authors) else: authors = {} email2author = {'unknown': 'unknown'} author2link = {'unknown': ''} for author, author_info in authors.iteritems(): for _info in author_info: if _info.startswith('http://') or _info.startswith('https://'): if author not in author2link: author2link[author] = _info elif '@' in _info: email2author[_info] = author authors['unknown'] = ['unknown'] output_path = options.output_path.rstrip('/') if not isdir(output_path): raise IOError("%r is not a valid directory!" % output_path) root_path = options.root_path siteinfo = join_path(output_path, '.siteinfo') if isfile(siteinfo): env = {} execfile(siteinfo, env) siteinfo = env['INFO'] else: siteinfo = { 'site_url': '', 'site_nick': '', 'site_description': '', 'site_title': '' } stdout = sys.stdout if options.stdout else None verbose = False if stdout else (not options.quiet) format = options.format if format not in ('html', 'tex'): raise ValueError("Unknown format: %s" % format) if (format == 'tex') or (not options.template): template = False elif not isfile(options.template): raise IOError("%r is not a valid template!" % options.template) else: template_path = abspath(options.template) template_root = dirname(template_path) template_loader = TemplateLoader([template_root]) template_file = open(template_path, 'rb') template = MarkupTemplate(template_file.read(), loader=template_loader, encoding='utf-8') template_file.close() data_file = options.data_file if data_file: if isfile(data_file): data_file_obj = open(data_file, 'rb') data_dict = load_pickle(data_file_obj) data_file_obj.close() else: data_dict = {} input_encoding = options.input_encoding output_encoding = options.output_encoding if genfiles: files = genfiles elif options.package: package_root = options.package files = [] add_file = files.append package = None for part in reversed(package_root.split(SEP)): if part: package = part break if package is None: raise ValueError("Couldn't find the package name from %r" % package_root) for dirpath, dirnames, filenames in walk(package_root): for filename in filenames: if not filename.endswith('.py'): continue filename = join_path(dirpath, filename) module = package + filename[len(package_root):] if module.endswith('__init__.py'): module = module[:-12] else: module = module[:-3] module = '.'.join(module.split(SEP)) module_file = open(filename, 'rb') module_source = module_file.read() module_file.close() docstring = docstring_regex.search(module_source) if docstring: docstring = docstring.group(0) if docstring.startswith('r'): docstring = docstring[4:-3] else: docstring = docstring[3:-3] if docstring and docstring.strip().startswith('=='): docstring = strip_leading_indent(docstring) module_source = docstring_regex.sub('', module_source, 1) else: docstring = '' info = {} if root_path and isabs(filename) and filename.startswith( root_path): info['__path__'] = filename[len(root_path) + 1:] else: info['__path__'] = filename info['__updated__'] = datetime.utcfromtimestamp( stat(filename).st_mtime) info['__outdir__'] = output_path info['__name__'] = 'package.' + module info['__type__'] = 'py' info['__title__'] = module info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER) add_file((docstring, '', info)) else: files = [] add_file = files.append for filename in args: if not isfile(filename): raise IOError("%r doesn't seem to be a valid file!" % filename) if root_path and isabs(filename) and filename.startswith( root_path): path = filename[len(root_path) + 1:] else: path = filename info = get_git_info(filename, path) # old svn support: # info = get_svn_info(path.split(SEP)[0], '*.txt')[path] source_file = open(filename, 'rb') source = source_file.read() source_file.close() if MORE_LINE in source: source_lead = source.split(MORE_LINE)[0] source = source.replace(MORE_LINE, '') else: source_lead = '' filebase, filetype = splitext(basename(filename)) info['__outdir__'] = output_path info['__name__'] = filebase.lower() info['__type__'] = 'txt' info['__title__'] = filebase.replace('-', ' ') add_file((source, source_lead, info)) for source, source_lead, info in files: if verbose: print print LINE print 'Converting: [%s] %s in [%s]' % ( info['__type__'], info['__path__'], split_path(output_path)[1]) print LINE print if template: output, props = render_rst(source, format, input_encoding, True) # output = output.encode(output_encoding) info['__text__'] = output.encode(output_encoding) info.update(props) if source_lead: info['__lead__'] = render_rst(source_lead, format, input_encoding, True)[0].encode(output_encoding) output = template.generate(content=output, info=info, authors=authors, email2author=email2author, author2link=author2link, **siteinfo).render( 'xhtml', encoding=output_encoding) else: output, props = render_rst(source, format, input_encoding, True, as_whole=True) info.update(props) output = output.encode(output_encoding) info['__text__'] = output if source_lead: info['__lead__'] = render_rst( source_lead, format, input_encoding, True, as_whole=True)[0].encode(output_encoding) if data_file: data_dict[info['__path__']] = info if stdout: print output else: output_filename = join_path(output_path, '%s.%s' % (info['__name__'], format)) output_file = open(output_filename, 'wb') output_file.write(output) output_file.close() if verbose: print 'Done!' if data_file: data_file_obj = open(data_file, 'wb') dump_pickle(data_dict, data_file_obj) data_file_obj.close() if options.pattern: pattern = options.pattern items = [ item for item in data_dict.itervalues() if item['__outdir__'] == pattern ] # index.js/json import json index_js_template = join_path(output_path, 'index.js.template') if isfile(index_js_template): index_json = json.dumps( [[_art['__name__'], _art['title'].encode('utf-8')] for _art in sorted([ item for item in items if item.get('x-created') and item.get('x-type', 'blog') == 'blog' ], key=lambda i: i['x-created'])]) index_js_template = open(index_js_template, 'rb').read() index_js = open(join_path(output_path, 'index.js'), 'wb') index_js.write(index_js_template % index_json) index_js.close() for name, mode, format in INDEX_FILES: pname = name.split('.', 1)[0] template_file = None if siteinfo['site_nick']: template_path = join_path( template_root, '%s.%s.genshi' % (pname, siteinfo['site_nick'])) if isfile(template_path): template_file = open(template_path, 'rb') if not template_file: template_path = join_path(template_root, '%s.genshi' % pname) template_file = open(template_path, 'rb') page_template = MarkupTemplate(template_file.read(), loader=template_loader, encoding='utf-8') template_file.close() poutput = page_template.generate(items=items[:], authors=authors, email2author=email2author, author2link=author2link, root_path=output_path, **siteinfo).render(format) poutput = unicode(poutput, output_encoding) if mode: output = template.generate(alternative_content=poutput, authors=authors, **siteinfo).render(format) else: output = poutput # @/@ wtf is this needed??? if isinstance(output, unicode): output = output.encode(output_encoding) output_file = open(join_path(output_path, name), 'wb') output_file.write(output) output_file.close()
def _file_for(self, instance, resource): if instance.definition.name in resource: _, path = split_path(resource) return join_paths(self._directory_for(instance), path) else: return join_paths(self._output_directory, resource)
def _get_current_file(self): return split_path(self._get_view().file_name())
def get_emotion_from_filename(filename): return split_path(filename)[-1].split('.')[0].split('_')[1]
def root(self): return split_path(self.path)[0]
def read_image_stack(fn, *args, **kwargs): """Read a 3D volume of images in image or .h5 format into a numpy.ndarray. This function attempts to automatically determine input file types and wraps specific image-reading functions. Parameters ---------- fn : filename (string) A file path or glob pattern specifying one or more valid image files. The file format is automatically determined from this argument. *args : filenames (string, optional) More than one positional argument will be interpreted as a list of filenames pointing to all the 2D images in the stack. **kwargs : keyword arguments (optional) Arguments to be passed to the underlying functions. A 'crop' keyword argument is supported, as a list of length 6: [xmin, xmax, ymin, ymax, zmin, zmax]. Use 'None' for no crop in that coordinate. Returns ------- stack : 3-dimensional numpy ndarray Notes ----- If reading in .h5 format, keyword arguments are passed through to read_h5_stack(). Automatic file type detection may be deprecated in the future. """ # TODO: Refactor. Rather than have implicit designation of stack format # based on filenames (*_boundpred.h5, etc), require explicit parameters # in config JSON files. if os.path.isdir(fn): fn += '/' d, fn = split_path(os.path.expanduser(fn)) if len(d) == 0: d = '.' crop = kwargs.get('crop', [None]*6) if crop is None: crop = [None]*6 if len(crop) == 4: crop.extend([None]*2) elif len(crop) == 2: crop = [None]*4 + crop kwargs['crop'] = crop if any([fn.endswith(ext) for ext in supported_image_extensions]): # image types, such as a set of pngs or a multi-page tiff xmin, xmax, ymin, ymax, zmin, zmax = crop if len(args) > 0 and type(args[0]) == str and args[0].endswith(fn[-3:]): # input is a list of filenames fns = [fn] + [split_path(f)[1] for f in args] else: # input is a filename pattern to match fns = fnfilter(os.listdir(d), fn) if len(fns) == 1 and fns[0].endswith('.tif'): stack = read_multi_page_tif(join_path(d,fns[0]), crop) else: fns.sort(key=alphanumeric_key) # sort filenames numerically fns = fns[zmin:zmax] im0 = imread(join_path(d, fns[0])) ars = (imread(join_path(d, fn)) for fn in fns) im0 = im0[xmin:xmax, ymin:ymax] dtype = im0.dtype stack = zeros((len(fns),)+im0.shape, dtype) for i, im in enumerate(ars): stack[i] = im[xmin:xmax,ymin:ymax] elif fn.endswith('_boundpred.h5') or fn.endswith('_processed.h5'): # Ilastik batch prediction output file stack = read_prediction_from_ilastik_batch(os.path.join(d,fn), **kwargs) elif fn.endswith('.h5'): # other HDF5 file stack = read_h5_stack(join_path(d,fn), *args, **kwargs) elif os.path.isfile(os.path.join(d, 'superpixel_to_segment_map.txt')): # Raveler export stack = raveler_to_labeled_volume(d, *args, **kwargs) return squeeze(stack)
def guess_config(python_exe_full_path): """ Then gets the path like how conda activate defines path If not found : MSG about downloading and installing miniconda if installed still ask for manual file chooser if found return CPYTHON and EXTRA_PATH fields """ # to open a new window with cmd prompt with activated miniconda environment: # start %windir%\System32\cmd.exe "/K" %USERPROFILE%\Miniconda2\Scripts\activate.bat JTutils CPYTHON = python_exe_full_path import subprocess OS = get_os_version() if 'win' in OS: CONDA_EXE = ['condabin', 'conda.bat'] else: #unix (linux or mac) CONDA_EXE = ['condabin', 'conda'] #split path into folder list splited_path_to_python = [] head = python_exe_full_path i = 0 tail = "start" while tail != "": head, tail = split_path(head) if tail != "": splited_path_to_python.append(tail) splited_path_to_python.append(head) splited_path_to_python.reverse() # test if provided file is from conda distribution and initialize conda_base and conda_exe conda_exe = False for neg_index_base in range(1,len(splited_path_to_python)): tmp = splited_path_to_python[:-neg_index_base]+CONDA_EXE tested_file = join_path(*tmp) if is_exe(tested_file): conda_exe = tested_file conda_base = join_path(*splited_path_to_python[:-neg_index_base]) break if conda_exe == False: MSG("This doesn't seem to be a conda (miniconda/anaconda) distribution. Can be fine: check the report.") return { 'CPYTHON': python_exe_full_path} # now I have the base and conda_exe and we are in conda distribution # which environement is the python_exe_full_path from ? # check if JTutils environement exists if 'win' in OS: if neg_index_base < 2 : conda_env = 'base' else: if splited_path_to_python[-3].lower() == 'envs' : conda_env = splited_path_to_python[-2] else: raise ValueError("Conda distribution seems inconsistent !") else: # macosx or linux if neg_index_base < 3 : conda_env = 'base' else: if splited_path_to_python[-4].lower() == 'envs' : conda_env = splited_path_to_python[-3] else: raise ValueError("Conda distribution seems inconsistent !") # yes we are in JTutils environment # if JTutils does not exist ask of automatic installation if conda_env != 'JTutils': if 'win' in OS: new_python_exe = join_path(conda_base, 'envs', 'JTutils', 'python.exe') else: new_python_exe = join_path(conda_base, 'envs', 'JTutils', 'bin', 'python') select_val = SELECT(title="Create JTutils environnent (recommended)", message="""JTutils conda environment not found. %s is found. Do you want to create JTutils python environment (recommended)? If Yes, new python exe file will be selected : %s Please CLICK on one button (enter on keyboard does not work)""" % (conda_env, new_python_exe) , buttons=["Yes", "No"], mnemonics=["y", "n"] ) if select_val == 0 : # first check JTutils does not exists already: if is_exe(new_python_exe): pass else: # run install script if 'win' in OS: cmd = " ".join([conda_exe, "activate &", conda_exe, "create -y -n JTutils numpy&", conda_exe, "env list"]) else: # unix # default jython shell is /bin/sh : source command is "." # first source the conda initialisation script # once done conda is available as a shell internal command shell_init_file = join_path(conda_base,'etc', 'profile.d', 'conda.sh') cmd = " ".join([".", shell_init_file, ";", "conda create -y -n JTutils numpy ;", "conda env list"]) MSG(subprocess.check_output(cmd, shell=True)) MSG("JTutils environment created") CPYTHON = new_python_exe conda_env = 'JTutils' if 'win' in OS: # batfile = join_path(dirname(abspath(sys.argv[0])), "..", "condat_env_setup.bat") # cmd = ["start", "%windir%\system32\cmd.exe", "/k", batfile + " " + found_path + " JTutils"] cmd = " ".join([ join_path(conda_base,'condabin', 'activate.bat'), conda_env, '& set CONDA', '& set PATH']) elif ('linux' in OS) or ('mac' in OS): # default shell is /bin/sh : source command is "." # first source the conda initialisation script # once done conda is available as a shell internal command shell_init_file = join_path(conda_base,'etc', 'profile.d', 'conda.sh') cmd = " ".join([ ".", shell_init_file, ";", "conda activate", conda_env, ";" "env |grep -i CONDA ; echo PATH=$PATH" ]) try: # MSG(cmd) res = subprocess.check_output(cmd, shell=True) if 'win' in OS: res = res.decode('cp850') except subprocess.CalledProcessError as grepexc: if 'win' in OS: grepexc = grepexc.decode('cp850') print("error code", grepexc.returncode, grepexc.output) MSG("Error during retrieval of conda environment.") raise # now Parse the returned string to extract conda env vars conf_dict = dict() for line in res.split('\n'): line = line.strip() if '=' in line: key, val = line.split('=', 1) if 'PATH' == key.upper(): path_list = val.split(';') extra_path = [] # windows is not case sensitive : required for valid string comparison if 'win' in OS : cb = conda_base.lower() else : cb = conda_base for one_path in path_list: if 'win' in OS : op = one_path.lower() else: op = one_path if cb in op: extra_path.append(one_path) conf_dict['EXTRA_PATH'] = extra_path else: if 'CONDA' in key.upper(): conf_dict[key] = val conf_dict['CPYTHON'] = CPYTHON return conf_dict
def convert_to_compactsplit(the_iter = False, dir_out = False, do_sort = True, pre_split_num = 32, max_num_per_split = 1000000, num_digits = 4, max_num = 0, confirm_clear = True, cache_images_now = False, doing_run_num = False, doing_job_num = False, via_cli = False, ): """ Post-processing step to convert getty images dataset to the new single-file format. Requires GNU `sort`. Optionally sort the file to ensure that any contiguous sample of the output file will be representative of the overall dataset. Args: the_iter: Input iterator that outputs dicts containing, at a minimum, an '_id' key. dir_out: Prefix for output file name. A suffix of the form "-split-0001.gz" will be appended. Can also be a path to a prefix name, e.g. 'output_prefix' or 'a/b/c/output_prefix'. getty_path: Path to getty-formatted directory. do_sort: Sort output files by ID afterward. pre_split_num: Pre-split output into at least `pre_split_num` files, for easy parallel loading. Probably best to error on the high side here. max_num_per_split: If any of the splits have more than `max_num_per_split` records, then multiple files per split will be created. num_digits: How much to zero-pad numbers in output. Should probably leave this at `4`. max_num: Terminate early after `max_num` records. confirm_clear: Prompt for confirmation before clearing output directory. """ from mc_ingest import cache_image, decode_image assert pre_split_num <= (10 ** num_digits - 1),(pre_split_num, num_digits) assert the_iter,('REQUIRED_ARG: the_iter',) assert the_iter is not False assert dir_out is not False the_path, the_dir = split_path(dir_out) assert exists(the_path),('PATH_DOES_NOT_EXIST', the_path) assert the_dir,('SPECIFY_OUTPUT_DIR',the_dir) if not exists(dir_out): makedirs(dir_out) fn_out = join(dir_out, the_dir) fn_out_temp = fn_out + '-tempfile-' + str(doing_run_num) + ('-%03d' % doing_job_num) + '-' + str(randint(1,1000000000000)) fn_out_temp_2 = fn_out_temp + '-2' fn_out_temp_3 = fn_out_temp + '-3' try: with open(fn_out_temp, 'w') as f: for hh_batch in igroup(the_iter, 50): for hh in hh_batch: xid = hh['_id'] if type(xid) == unicode: xid = xid.encode('utf8') new_id = hashlib.md5(xid).hexdigest() #assert len(new_id) == 24,new_id ## Fixed-length makes sorting easier. assert '\t' not in new_id assert '\n' not in new_id if cache_images_now: assert False if n_jobs != 1: assert False, 'TODO' inner_args.append((hh,)) else: worker_convert_to_compactsplit((hh, )) dd = json.dumps(hh, separators=(',', ':')) assert '\t' not in dd assert '\n' not in dd f.write(new_id + '\t' + dd + '\n') if not do_sort: rename(fn_out_temp, fn_out_temp_2) else: assert exists(fn_out_temp),(fn_out_temp,) ## Sort via gnu `sort`: print ('FILES', fn_out_temp, fn_out_temp_2) cmd = "LC_ALL=C sort --temporary-directory=%s %s > %s" % (pipes_quote(dir_out), pipes_quote(fn_out_temp), pipes_quote(fn_out_temp_2)) print ('SORTING',cmd) rr = check_output(cmd, shell = True, executable = "/bin/bash", ) unlink(fn_out_temp) assert exists(fn_out_temp_2),(fn_out_temp_2,) print ('DONE_STEP_1', fn_out_temp_2) if pre_split_num == 1: print ('WRITE_AND_COMPRESS') with open(fn_out_temp_2) as src, gzip.open(fn_out_temp_3, 'wb') as dst: dst.writelines(src) unlink(fn_out_temp_2) rename(fn_out_temp_3, fn_out + (('-split-%0' + str(int(num_digits)) + 'd.gz') % 1)) else: print ('SPLITTING_AND_COMPRESS',pre_split_num) hh = {x:[False, x, 0, 0] ## [output_file, this_split_num, file_num_this_split, record_count_this_split] for x in xrange(pre_split_num) } with open(fn_out_temp_2) as f: for c,line in enumerate(f): xx = hh[c % pre_split_num] if (xx[0] is False) or (xx[3] > max_num_per_split): if xx[0] is not False: xx[0].close() fn = fn_out + (('-compactsplit-v' + VERSION_COMPACTSPLIT + \ '-%0' + str(int(num_digits)) + 'd' + \ '-%0' + str(int(num_digits)) + 'd' + \ '-%0' + str(int(num_digits)) + 'd.gz') % (doing_job_num, xx[1], xx[2], )) print ('NEW_FILE',fn) xx[0] = GzipFile(fn, 'w') xx[2] += 1 xx[3] = 0 xx[0].write(line) xx[3] += 1 unlink(fn_out_temp_2) for xx in hh.values(): if xx[0] is not False: print ('CLOSING_FILE',xx[0]) xx[0].close() print ('DONE',dir_out) finally: pass
def add_content(self, key): self.contents[split_path(key.name)[-1]] = key return