Пример #1
0
    def add_key_to_index(self, key):
        dir_name, key_basename = split_path(key.name)

        if key_basename == "index.html":
            return

        directory = self.dirs.get(dir_name)
        if directory is None:
            directory = Directory(dir_name)
            self.dirs[dir_name] = directory

        directory.add_content(key)

        # Write a subdirectory entry into each parent directory as needed.
        while dir_name != "":
            parent_dirname, tail = split_path(dir_name)
            parent = self.dirs.get(parent_dirname)

            if parent is None:
                parent = Directory(parent_dirname)
                self.dirs[parent_dirname] = parent

            parent.add_subdir(tail, directory)

            dir_name = parent_dirname
            directory = parent

        return
Пример #2
0
def read_image_stack(fn, *args, **kwargs):
    """Read a 3D volume of images in image or .h5 format into a numpy.ndarray.

    The format is automatically detected from the (first) filename.

    A 'crop' keyword argument is supported, as a list of 
    [xmax, xmin, ymax, ymin, zmax, zmin]. Use 'None' for no crop in that 
    coordinate.

    If reading in .h5 format, keyword arguments are passed through to
    read_h5_stack().
    """
    #pdb.set_trace()
    if os.path.isdir(fn):
        fn += '/'
    d, fn = split_path(os.path.expanduser(fn))
    if len(d) == 0: d = '.'
    crop = kwargs.get('crop', [None] * 6)
    if len(crop) == 4: crop.extend([None] * 2)
    elif len(crop) == 2: crop = [None] * 4 + crop
    kwargs['crop'] = crop
    if any([fn.endswith(ext) for ext in supported_image_extensions]):
        # image types, such as a set of pngs or a multi-page tiff
        xmin, xmax, ymin, ymax, zmin, zmax = crop
        if len(args) > 0 and type(args[0]) == str and args[0].endswith(
                fn[-3:]):
            # input is a list of filenames
            fns = [fn] + [split_path(f)[1] for f in args]
        else:
            # input is a filename pattern to match
            fns = fnfilter(os.listdir(d), '*' + fn[-4:])
        if len(fns) == 1 and fns[0].endswith('.tif'):
            stack = read_multi_page_tif(join_path(d, fns[0]), crop)
        else:
            fns.sort(key=alphanumeric_key)  # sort filenames numerically
            fns = fns[zmin:zmax]
            im0 = pil_to_numpy(Image.open(join_path(d, fns[0])))
            ars = (pil_to_numpy(Image.open(join_path(d, fn))) for fn in fns)
            im0 = im0[xmin:xmax, ymin:ymax]
            dtype = im0.dtype
            stack = zeros((len(fns), ) + im0.shape, dtype)
            for i, im in enumerate(ars):
                stack[i] = im[xmin:xmax, ymin:ymax]
    elif fn.endswith('_boundpred.h5') or fn.endswith('_prediction.h5'):
        # Ilastik batch prediction output file
        stack = read_prediction_from_ilastik_batch(os.path.join(d, fn),
                                                   **kwargs)
    elif fn.endswith('.h5'):
        # other HDF5 file
        stack = read_h5_stack(join_path(d, fn), *args, **kwargs)
    elif os.path.isfile(os.path.join(d, 'superpixel_to_segment_map.txt')):
        # Raveler export
        stack = raveler_to_labeled_volume(d, *args, **kwargs)
    return squeeze(stack)
Пример #3
0
def main(args):
    argp = ARGPARSER.parse_args(args[1:])

    id_to_ann_files = defaultdict(list)

    for file_path in (l.strip() for l in stdin):
        if not any((file_path.endswith(suff) for suff in UNMERGED_SUFFIXES)):
            if not argp.no_warn:
                import sys
                print >> sys.stderr, (
                    'WARNING: invalid file suffix for %s, ignoring') % (
                        file_path, )
            continue

        dirname, basename = split_path(file_path)
        id = join_path(dirname, basename.split('.')[0])
        id_to_ann_files[id].append(file_path)

    for id, ann_files in id_to_ann_files.iteritems():

        lines = []
        for ann_file_path in ann_files:
            with open(ann_file_path, 'r') as ann_file:
                for line in ann_file:
                    lines.append(line)

        with open(id + '.' + MERGED_SUFFIX, 'w') as merged_ann_file:
            for line in lines:
                merged_ann_file.write(line)
Пример #4
0
    def make_module(self, module):
        '''
        Create a module directory under `testdir`. Each of the intermediate directories
        (if there are any) will also be usable as modules (i.e., they'll have __init__.py
        files in them).

        Parameters
        ----------
        module : str
            Path to the module directory. Must be a relative path

        Returns
        -------
        str
            The full path to the module directory
        '''
        if isabs(module):
            raise ValueError('Must use a relative path. Given ' + str(module))
        modpath = p(self.testdir, module)
        os.makedirs(modpath)
        last_dname = None
        dname = modpath
        while last_dname != dname and dname != self.testdir:
            open(p(dname, '__init__.py'), 'x').close()
            base = ''
            while not base and last_dname != dname:
                last_dname = dname
                dname, base = split_path(modpath)

        return modpath
Пример #5
0
def main(args):
    argp = ARGPARSER.parse_args(args[1:])
    # ID is the stem of a file
    id_to_ann_files = defaultdict(list)
    # Index all ID;s before we merge so that we can do a little magic
    for file_path in (l.strip() for l in stdin):
        if not any((file_path.endswith(suff) for suff in UNMERGED_SUFFIXES)):
            if not argp.no_warn:
                import sys
                print >> sys.stderr, (
                        'WARNING: invalid file suffix for %s, ignoring'
                        ) % (file_path, )
            continue
        
        dirname, basename = split_path(file_path)
        id = join_path(dirname, basename.split('.')[0])
        id_to_ann_files[id].append(file_path)

    for id, ann_files in id_to_ann_files.iteritems():
        #XXX: Check if output file exists
        lines = []
        for ann_file_path in ann_files:
            with open(ann_file_path, 'r') as ann_file:
                for line in ann_file:
                    lines.append(line)

        with open(id + '.' + MERGED_SUFFIX, 'w') as merged_ann_file:
            for line in lines:
                merged_ann_file.write(line)
Пример #6
0
def main(args):
    argp = ARGPARSER.parse_args(args[1:])
    # ID is the stem of a file
    id_to_ann_files = defaultdict(list)
    # Index all ID;s before we merge so that we can do a little magic
    for file_path in (l.strip() for l in stdin):
        if not any((file_path.endswith(suff) for suff in UNMERGED_SUFFIXES)):
            if not argp.no_warn:
                import sys
                print(('WARNING: invalid file suffix for %s, ignoring') %
                      (file_path, ),
                      file=sys.stderr)
            continue

        dirname, basename = split_path(file_path)
        id = join_path(dirname, basename.split('.')[0])
        id_to_ann_files[id].append(file_path)

    for id, ann_files in id_to_ann_files.items():
        # XXX: Check if output file exists
        lines = []
        for ann_file_path in ann_files:
            with open(ann_file_path, 'r') as ann_file:
                for line in ann_file:
                    lines.append(line)

        with open(id + '.' + MERGED_SUFFIX, 'w') as merged_ann_file:
            for line in lines:
                merged_ann_file.write(line)
Пример #7
0
 def __init__(self, path, repo, origin):
     clean_repo = Target.clean_path(repo)
     if path is not None:
         self.name = clean_repo
         self.path = join(path, self.name)
     else:
         self.path = clean_repo
         self.name = split_path(self.path)[1]
     self.origin = origin
    def set(self, data: Union[str, bytes], content_type: str = "application/json",
            cache: str = "no-store") -> NoReturn:
        mode = "w" if isinstance(data, str) else "wb"

        dir_path, _ = split_path(self.path)
        makedirs(dir_path, exist_ok=True)

        with open(self.path, mode=mode) as output_file:
            print(data, file=output_file)
async def get_fresh_data(repository_url: str,
                         excluded: Iterable) -> AsyncIterator[str]:
    """
    Retrieve a fresh batch of data from the repository.

    Parameters
    ----------
    repository_url: str
        URL for the repository (the zip file).

    excluded: Iterable

    Returns
    -------
    AsyncIterator[str]
        An async iterator of relative paths to the file.
    """
    url = BASE_REPOSITORY + repository_url.lstrip(
        processor_settings.URL_SEPARATOR)

    # Requesting the latest files from the repository.
    async with Lock():
        response = get_request(url=url)

        logging.info(f"> Download request completed with "
                     f"status {response.status_code}: {repository_url}")

    if response.status_code != HTTPStatus.OK:
        raise RuntimeError(
            f"Failed to download the data from {url}: {response.text}")

    # `ZipFile` only understands files.
    data_bin = BytesIO(response.content)

    async with Lock():
        with ZipFile(data_bin, mode="r") as zip_obj:
            paths = zip_obj.namelist()

            # Extracting the contents into the temp directory.
            zip_obj.extractall(TEMP_DIR_PATH)
        logging.info("> Successfully extracted and stored the data")

    for path in paths:
        _, filename = split_path(path)

        if any(map(lambda p: p in path, excluded)):
            continue

        full_path = join_path(TEMP_DIR_PATH, path)

        # Discard directories
        if not isfile(full_path):
            continue

        logging.info(f"> Processing file '{path}'")

        yield path
Пример #10
0
def archive_path(path, files=None):
    """Archives a folder or some files in this folder"""
    split = split_path(path)
    chdir = join_path(*split[:-1])
    print(chdir)
    dest = split[-1]
    dest_path = '/tmp/' + dest + str(random.randint(0, 200000)).zfill(6)
    print(dest)
    return make_archive(dest_path, 'zip', chdir, dest)
Пример #11
0
def send_report(
    report=None, url=None, user=None, key=None, prompt=True, force=False,
    quiet=False
    ):
    """Send a crash report to a Breakpad Server URL endpoint."""

    if DISABLED and not force:
        return

    if not report:
        if not hasattr(sys, 'last_traceback'):
            return
        report = format_exception(
            sys.last_type, sys.last_value, sys.last_traceback, as_html=True
            )

    if prompt:
        try:
            confirm = raw_input('Do you want to send a crash report [Y/n]? ')
        except EOFError:
            return
        if confirm.lower() in ['n', 'no']:
            return

    if not user:
        config = SCMConfig()
        user = config.get('codereview.email')
        key = config.get('codereview.key')
        url = config.get('codereview.breakpad_url')
        if not (user and key and url):
            exit("Sorry, you need to configure your codereview settings.")

    if not quiet:
        print
        print "Sending crash report ... "

    payload = {
        'args': ' '.join(sys.argv[1:]),
        'command': split_path(sys.argv[0])[1],
        'report': ''.join(report),
        'user': user,
        }

    payload['sig'], payload = sign_payload(payload, key)

    try:
        response = urlopen(url, urlencode(payload))
        if not quiet:
            print
            print response.read()
        response.close()
    except Exception:
        if not quiet:
            print
            print "Sorry, couldn't send the crash report for some reason."
Пример #12
0
def read_image_stack(fn, *args, **kwargs):
    """Read a 3D volume of images in image or .h5 format into a numpy.ndarray.

    The format is automatically detected from the (first) filename.

    A 'crop' keyword argument is supported, as a list of 
    [xmax, xmin, ymax, ymin, zmax, zmin]. Use 'None' for no crop in that 
    coordinate.

    If reading in .h5 format, keyword arguments are passed through to
    read_h5_stack().
    """
    d, fn = split_path(os.path.expanduser(fn))
    if len(d) == 0: d = '.'
    crop = kwargs.get('crop', [None] * 6)
    if len(crop) == 4: crop.extend([None] * 2)
    elif len(crop) == 2: crop = [None] * 4 + crop
    kwargs['crop'] = crop
    if any([fn.endswith(ext) for ext in supported_image_extensions]):
        xmin, xmax, ymin, ymax, zmin, zmax = crop
        if len(args) > 0 and type(args[0]) == str and args[0].endswith(
                fn[-3:]):
            # input is a list of filenames
            fns = [fn] + [split_path(f)[1] for f in args]
        else:
            # input is a filename pattern to match
            fns = fnfilter(os.listdir(d), fn)
        if len(fns) == 1 and fns[0].endswith('.tif'):
            stack = read_multi_page_tif(join_path(d, fns[0]), crop)
        else:
            fns.sort(key=alphanumeric_key)  # sort filenames numerically
            fns = fns[zmin:zmax]
            im0 = pil_to_numpy(Image.open(join_path(d, fns[0])))
            ars = (pil_to_numpy(Image.open(join_path(d, fn))) for fn in fns)
            im0 = im0[xmin:xmax, ymin:ymax]
            dtype = im0.dtype
            stack = zeros((len(fns), ) + im0.shape, dtype)
            for i, im in enumerate(ars):
                stack[i] = im[xmin:xmax, ymin:ymax]
    if fn.endswith('.h5'):
        stack = read_h5_stack(join_path(d, fn), *args, **kwargs)
    return squeeze(stack)
Пример #13
0
def read_image_stack(fn, *args, **kwargs):
    """Read a 3D volume of images in image or .h5 format into a numpy.ndarray.

    The format is automatically detected from the (first) filename.

    A 'crop' keyword argument is supported, as a list of 
    [xmax, xmin, ymax, ymin, zmax, zmin]. Use 'None' for no crop in that 
    coordinate.

    If reading in .h5 format, keyword arguments are passed through to
    read_h5_stack().
    """
    d, fn = split_path(os.path.expanduser(fn))
    if len(d) == 0: d = '.'
    crop = kwargs.get('crop', [None]*6)
    if len(crop) == 4: crop.extend([None]*2)
    elif len(crop) == 2: crop = [None]*4 + crop
    kwargs['crop'] = crop
    if any([fn.endswith(ext) for ext in supported_image_extensions]):
        xmin, xmax, ymin, ymax, zmin, zmax = crop
        if len(args) > 0 and type(args[0]) == str and args[0].endswith(fn[-3:]):
            # input is a list of filenames
            fns = [fn] + [split_path(f)[1] for f in args]
        else:
            # input is a filename pattern to match
            fns = fnfilter(os.listdir(d), fn)
        if len(fns) == 1 and fns[0].endswith('.tif'):
            stack = read_multi_page_tif(join_path(d,fns[0]), crop)
        else:
            fns.sort(key=alphanumeric_key) # sort filenames numerically
            fns = fns[zmin:zmax]
            im0 = pil_to_numpy(Image.open(join_path(d,fns[0])))
            ars = (pil_to_numpy(Image.open(join_path(d,fn))) for fn in fns)
            im0 = im0[xmin:xmax,ymin:ymax]
            dtype = im0.dtype
            stack = zeros((len(fns),)+im0.shape, dtype)
            for i, im in enumerate(ars):
                stack[i] = im[xmin:xmax,ymin:ymax]
    if fn.endswith('.h5'):
        stack = read_h5_stack(join_path(d,fn), *args, **kwargs)
    return squeeze(stack)
Пример #14
0
def resource_path(filename):
    # we are inside singlefile pyinstaller
    paths = appdirs.site_data_dir(appname=__app_id__, appauthor=False, multipath=True).split(pathsep)
    if getattr(sys, 'frozen', False):
        paths.insert(0, sys._MEIPASS)
    else:
        paths.append(split_path(__file__)[0])
    for dir_ in paths:
        path = join(dir_, filename)
        if exists(path):
            return path
    dir_ = appdirs.user_data_dir(appname=__app_id__, appauthor=__author__)
    return join(dir_, filename)
async def process_and_upload_data(path: str, get_file_data: FileFetcherType,
                                  container: str, base_path: str) -> NoReturn:
    """
    Uploads processed files to the storage using the correct
    caching and ``content-type`` specs.

    Parameters
    ----------
    path: str
        Path (within the storage container) in which the
        file is to be stored.

    get_file_data: FileFetcherType

    base_path: str

    container: str
        Storage container in which the file is to be stored.

    Returns
    -------
    NoReturn
    """
    _, file_name = split_path(path)
    # Files are stored as JSON - the extension must be updated:
    file_name, _ = splitext(file_name)
    json_name = f"{file_name}.json"
    yaml_name = f"{file_name}.yaml"

    json_path = str.join(processor_settings.URL_SEPARATOR,
                         [STORAGE_PATH, json_name])
    yaml_path = str.join(processor_settings.URL_SEPARATOR,
                         [STORAGE_PATH, yaml_name])

    if ".github" in path:
        return None

    raw_data = await get_file_data(path, base_path)
    data = await prepare_data(raw_data)

    # Uploading the data
    with StorageClient(container=container, path=json_path) as client:
        async with Lock():
            client.upload(data=data.json_data)

    with StorageClient(container=container,
                       path=yaml_path,
                       content_type="application/x-yaml") as client:
        async with Lock():
            client.upload(data=data.yaml_data)
Пример #16
0
 def splitall(self, path):
     allparts = list()
     while True:
         parts = split_path(path)
         if parts[0] == path:  # sentinel for absolute paths
             allparts.insert(0, parts[0])
             break
         elif parts[1] == path:  # sentinel for relative paths
             allparts.insert(0, parts[1])
             break
         else:
             path = parts[0]
             allparts.insert(0, parts[1])
     return allparts
Пример #17
0
def load_ids(dir, langs):
    if not langs:
        langs = LANGS

    # root bsnlp/sample_pl_cs_ru_bg/raw/cs
    # filename brexit_cs.txt_file_100.txt
    for root, subdirs, filenames in walk(dir):
        tail, lang = split_path(root)
        if lang not in langs:
            continue

        tail, type = split_path(tail)
        if type not in (ANNOTATED, RAW):
            # raw/nord_stream/ru/nord_stream_ru.txt_file_44.txt
            tail, type = split_path(tail)
        assert type in (ANNOTATED, RAW), root

        for filename in filenames:
            name, ext = split_ext(filename)
            if ext not in (TXT, OUT):
                continue
            path = join_path(root, filename)
            yield BsnlpId(lang, type, name, path)
 def splitall(self, path):
     allparts = list()
     while True:
         parts = split_path(path)
         if parts[0] == path:  # sentinel for absolute paths
             allparts.insert(0, parts[0])
             break
         elif parts[1] == path: # sentinel for relative paths
             allparts.insert(0, parts[1])
             break
         else:
             path = parts[0]
             allparts.insert(0, parts[1])
     return allparts
Пример #19
0
    def __init__(self, steps, parent=None):
        super(ProgressDialog, self).__init__(parent)

        form, _ = uic.loadUiType(split_path(__file__)[0] + "/progressDialog.ui")
        self.ui = form()
        self.ui.setupUi(self)

        self.steps = steps
        self.current = 0

        self._add_pending()

        self.ui.cancel.clicked.connect(self.cancel)
        self.trigger_popup.connect(self.popup)
        self.trigger_update.connect(self.update_step)
Пример #20
0
    def make_module(self, module):
        if isabs(module):
            raise Exception('Must use a relative path. Given ' + str(module))
        modpath = p(self.testdir, module)
        os.makedirs(modpath)
        last_dname = None
        dname = modpath
        while last_dname != dname and dname != self.testdir:
            open(p(dname, '__init__.py'), 'x').close()
            base = ''
            while not base and last_dname != dname:
                last_dname = dname
                dname, base = split_path(modpath)

        return modpath
Пример #21
0
    def generate_thumbnail_name(self, raw_name, thumb_name, size):
        """
        Return a thumbnail file path like::

            `path/to/thumb_name-raw_name-sizes[0]xsizes[1].jpg`
        """
        filepath, filename = split_path(raw_name)
        fn, ext = split_ext(filename)
        thumbnail_filename = "%s-%s-%sx%s%s" % (thumb_name, fn, size[0],
                                                size[1], ext)

        # join path and new filename
        thumbnail_full_path = join_path(filepath, thumbnail_filename)

        return thumbnail_full_path
Пример #22
0
    def __init__(self, steps, parent=None):
        super(ProgressDialog, self).__init__(parent)

        form, _ = uic.loadUiType(split_path(__file__)[0] + "/progressDialog.ui")
        self.ui = form()
        self.ui.setupUi(self)

        self.steps = steps
        self.current = 0

        self._add_pending()

        self.ui.cancel.clicked.connect(self.cancel)
        self.trigger_popup.connect(self.popup)
        self.trigger_update.connect(self.update_step)
Пример #23
0
    def run(self, **kwargs):
        parent, file_name = self._get_current_file()
        python = self._get_python()

        if file_name.startswith("test_"):
            kwargs["working_dir"] = parent
            _, parent_name = split_path(parent)
            file_name = parent_name + path_separator + file_name
            flags = "-m unittest"
        else:
            flags = "-u"

        shell_cmd = " ".join([python, flags, file_name])
        kwargs["shell_cmd"] = shell_cmd
        self.window.run_command("exec", kwargs)
Пример #24
0
    def generate_thumbnail_name(self, raw_name, thumb_name, size):
        """
        Return a thumbnail file path like::

            `path/to/thumb_name-raw_name-sizes[0]xsizes[1].jpg`
        """
        filepath, filename = split_path(raw_name)
        fn, ext = split_ext(filename)
        thumbnail_filename = "%s-%s-%sx%s%s" % (
            thumb_name, fn, size[0], size[1], ext)

        # join path and new filename
        thumbnail_full_path = join_path(filepath, thumbnail_filename)

        return thumbnail_full_path
Пример #25
0
def parse_otool_output(output):
    """Search otool output for MKL dependencies.

    Return (mkl_dirs, mkl_libs)."""
    from re import compile
    from os.path import join, dirname, split as split_path, abspath, basename
    import numpy
    import sys

    # like "@rpath/libmkl_intel.dylib (compatibility version 0.0.0, current version 0.0.0)"
    re1 = compile(r"\s*@rpath/lib/(.+) \(.+\)")
    # like "@loader_path/libmkl_intel.dylib (compatibility version 0.0.0, current version 0.0.0)"
    re2 = compile(r"\s*@loader_path/(.+) \(.+\)")
    # like "/usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 111.0.0)"
    re3 = compile(r"\s*(.+) \(.+\)")
    re_fname = compile(r"lib(mkl.*|iomp.*)\.(so|dylib)(\.[^ ]*)?")
    # we assume for now that @rpath == <sys.prefix>/lib
    prefix_dir = getattr(sys, 'base_prefix', sys.prefix)
    sys_lib_dir = join(prefix_dir, "lib")

    mkl_dirs, mkl_libs = [], []
    re1_match, output_lines = split_regex(output.splitlines(), re1)
    for m in re1_match:
        fname = m.group(1)
        m_fname = re_fname.match(fname)
        if m_fname:
            # we assume that @rpath is equal to sys.prefix
            mkl_libs.append(join(sys_lib_dir, m.group(1)))
            mkl_dirs.append(sys_lib_dir)

    re2_match, output_lines = split_regex(output_lines, re2)
    for m in re2_match:
        full_path = join(dirname(numpy.__file__), 'linalg', m.group(1))
        fpath, fname = split_path(abspath(full_path))
        m_fname = re_fname.match(fname)
        if m_fname:
            mkl_libs.append(full_path)
            mkl_dirs.append(fpath)

    for m in split_regex(output_lines, re3)[0]:
        path = m.group(1)
        fname = basename(path)
        m_fname = re_fname.match(fname)
        if m_fname:
            mkl_libs.append(path)
            mkl_dirs.append(dirname(path))

    return set(mkl_dirs), set(mkl_libs)
Пример #26
0
def list_dir(path):
    """Return folder content or filename
    """
    try:
        if isdir(path) and exists(path):
            ls = prep_ls(path)
            return ls
        elif isfile(path):
            if DEBUG:
                root, filename = split_path(path)
                return static_file(filename, root=root, download=True)
            else:
                response.headers['X-Accel-Redirect'] = path
                return ''
    except OSError:
        abort(404)
Пример #27
0
def read_image_stack(fn):
	
    """Read a 3D volume of images in .tif or .h5 formats into a numpy.ndarray.
    This function attempts to automatically determine input file types and
    wraps specific image-reading functions.
    Adapted from gala.imio (https://github.com/janelia-flyem/gala)
    """
    if os.path.isdir(fn):
        fn += '/'
    d, fn = split_path(os.path.expanduser(fn))
    if len(d) == 0: d = '.'
    fns = fnfilter(os.listdir(d), fn)
    if len(fns) == 1 and fns[0].endswith('.tif'):
		stack = read_multi_page_tif(join_path(d,fns[0]))
    elif fn.endswith('.h5'):
		data=h5py.File(join_path(d,fn),'r')
		stack=data[group_name].value
    return squeeze(stack)
Пример #28
0
def get_emotion_number_from_filename(filename):
    return EMOTION_NUMBERS[split_path(filename)[-1].split('.')[0].split('_')
                           [1]]
Пример #29
0
def get_path(url):
    u = urlparse(url)
    return split_path(u.path)
Пример #30
0
def main(argv, genfiles=None):

    op = OptionParser()

    op.add_option('-a', dest='authors', default='',
                  help="Set the path for a special authors file (optional)")

    op.add_option('-c', dest='package', default='',
                  help="Generate documentation for the Python package (optional)")

    op.add_option('-d', dest='data_file', default='',
                  help="Set the path for a persistent data file (optional)")

    op.add_option('-e', dest='output_encoding', default='utf-8',
                  help="Set the output encoding (default: utf-8)")

    op.add_option('-f', dest='format', default='html',
                  help="Set the output format (default: html)")

    op.add_option('-i', dest='input_encoding', default='utf-8',
                  help="Set the input encoding (default: utf-8)")

    op.add_option('-o', dest='output_path', default=HOME,
                  help="Set the output directory for files (default: $PWD)")

    op.add_option('-p', dest='pattern', default='',
                  help="Generate index files for the path pattern (optional)")

    op.add_option('-r', dest='root_path', default='',
                  help="Set the path to the root working directory (optional)")

    op.add_option('-t', dest='template', default='',
                  help="Set the path to a template file (optional)")

    op.add_option('--quiet', dest='quiet', default=False, action='store_true',
                  help="Flag to suppress output")

    op.add_option('--stdout', dest='stdout', default=False, action='store_true',
                  help="Flag to redirect to stdout instead of to a file")

    try:
        options, args = op.parse_args(argv)
    except SystemExit:
        return

    authors = options.authors

    if authors:
        if not isfile(authors):
            raise IOError("%r is not a valid path!" % authors)
        authors = parse_authors_file(authors)
    else:
        authors = {}

    email2author = {'unknown': 'unknown'}
    author2link = {'unknown': ''}

    for author, author_info in authors.iteritems():
        for _info in author_info:
            if _info.startswith('http://') or _info.startswith('https://'):
                if author not in author2link:
                    author2link[author] = _info
            elif '@' in _info:
                email2author[_info] = author

    authors['unknown'] = ['unknown']

    output_path = options.output_path.rstrip('/')

    if not isdir(output_path):
        raise IOError("%r is not a valid directory!" % output_path)

    root_path = options.root_path

    siteinfo = join_path(output_path, '.siteinfo')
    if isfile(siteinfo):
        env = {}
        execfile(siteinfo, env)
        siteinfo = env['INFO']
    else:
        siteinfo = {
            'site_url': '',
            'site_nick': '',
            'site_description': '',
            'site_title': ''
            }

    stdout = sys.stdout if options.stdout else None
    verbose = False if stdout else (not options.quiet)

    format = options.format

    if format not in ('html', 'tex'):
        raise ValueError("Unknown format: %s" % format)

    if (format == 'tex') or (not options.template):
        template = False
    elif not isfile(options.template):
        raise IOError("%r is not a valid template!" % options.template)
    else:
        template_path = abspath(options.template)
        template_root = dirname(template_path)
        template_loader = TemplateLoader([template_root])
        template_file = open(template_path, 'rb')
        template = MarkupTemplate(
            template_file.read(), loader=template_loader, encoding='utf-8'
            )
        template_file.close()

    data_file = options.data_file

    if data_file:
        if isfile(data_file):
            data_file_obj = open(data_file, 'rb')
            data_dict = load_pickle(data_file_obj)
            data_file_obj.close()
        else:
            data_dict = {}

    input_encoding = options.input_encoding
    output_encoding = options.output_encoding

    if genfiles:

        files = genfiles

    elif options.package:

        package_root = options.package
        files = []
        add_file = files.append
        package = None
        for part in reversed(package_root.split(SEP)):
            if part:
                package = part
                break
        if package is None:
            raise ValueError("Couldn't find the package name from %r" % package_root)

        for dirpath, dirnames, filenames in walk(package_root):
            for filename in filenames:

                if not filename.endswith('.py'):
                    continue

                filename = join_path(dirpath, filename)
                module = package + filename[len(package_root):]
                if module.endswith('__init__.py'):
                    module = module[:-12]
                else:
                    module = module[:-3]

                module = '.'.join(module.split(SEP))
                module_file = open(filename, 'rb')
                module_source = module_file.read()
                module_file.close()

                docstring = docstring_regex.search(module_source)

                if docstring:
                    docstring = docstring.group(0)
                    if docstring.startswith('r'):
                        docstring = docstring[4:-3]
                    else:
                        docstring = docstring[3:-3]

                if docstring and docstring.strip().startswith('=='):
                    docstring = strip_leading_indent(docstring)
                    module_source = docstring_regex.sub('', module_source, 1)
                else:
                    docstring = ''

                info = {}

                if root_path and isabs(filename) and filename.startswith(root_path):
                    info['__path__'] = filename[len(root_path)+1:]
                else:
                    info['__path__'] = filename

                info['__updated__'] = datetime.utcfromtimestamp(
                    stat(filename).st_mtime
                    )

                info['__outdir__'] = output_path
                info['__name__'] = 'package.' + module
                info['__type__'] = 'py'
                info['__title__'] = module
                info['__source__'] = highlight(module_source, PythonLexer(), SYNTAX_FORMATTER)
                add_file((docstring, '', info))

    else:

        files = []
        add_file = files.append

        for filename in args:

            if not isfile(filename):
                raise IOError("%r doesn't seem to be a valid file!" % filename)

            if root_path and isabs(filename) and filename.startswith(root_path):
                path = filename[len(root_path)+1:]
            else:
                path = filename

            info = get_git_info(filename, path)

            # old svn support:
            # info = get_svn_info(path.split(SEP)[0], '*.txt')[path]

            source_file = open(filename, 'rb')
            source = source_file.read()
            source_file.close()

            if MORE_LINE in source:
                source_lead = source.split(MORE_LINE)[0]
                source = source.replace(MORE_LINE, '')
            else:
                source_lead = ''

            filebase, filetype = splitext(basename(filename))
            info['__outdir__'] = output_path
            info['__name__'] = filebase.lower()
            info['__type__'] = 'txt'
            info['__title__'] = filebase.replace('-', ' ')
            add_file((source, source_lead, info))

    for source, source_lead, info in files:

        if verbose:
            print
            print LINE
            print 'Converting: [%s] %s in [%s]' % (
                info['__type__'], info['__path__'], split_path(output_path)[1]
                )
            print LINE
            print

        if template:
            output, props = render_rst(
                source, format, input_encoding, True
                )
            # output = output.encode(output_encoding)
            info['__text__'] = output.encode(output_encoding)
            info.update(props)
            if source_lead:
                info['__lead__'] = render_rst(
                    source_lead, format, input_encoding, True
                    )[0].encode(output_encoding)
            output = template.generate(
                content=output,
                info=info,
                authors=authors,
                email2author=email2author,
                author2link=author2link,
                **siteinfo
                ).render('xhtml', encoding=output_encoding)
        else:
            output, props = render_rst(
                source, format, input_encoding, True, as_whole=True
                )
            info.update(props)
            output = output.encode(output_encoding)
            info['__text__'] = output
            if source_lead:
                info['__lead__'] = render_rst(
                    source_lead, format, input_encoding, True, as_whole=True
                    )[0].encode(output_encoding)

        if data_file:
            data_dict[info['__path__']] = info

        if stdout:
            print output
        else:
            output_filename = join_path(
                output_path, '%s.%s' % (info['__name__'], format)
                )
            output_file = open(output_filename, 'wb')
            output_file.write(output)
            output_file.close()
            if verbose:
                print 'Done!'

    if data_file:
        data_file_obj = open(data_file, 'wb')
        dump_pickle(data_dict, data_file_obj)
        data_file_obj.close()

    if options.pattern:

        pattern = options.pattern

        items = [
            item
            for item in data_dict.itervalues()
            if item['__outdir__'] == pattern
            ]

        # index.js/json

        import json

        index_js_template = join_path(output_path, 'index.js.template')

        if isfile(index_js_template):

            index_json = json.dumps([
                [_art['__name__'], _art['title'].encode('utf-8')]
                for _art in sorted(
                    [item for item in items if item.get('x-created') and
                     item.get('x-type', 'blog') == 'blog'],
                    key=lambda i: i['x-created']
                    )
                ])

            index_js_template = open(index_js_template, 'rb').read()
            index_js = open(join_path(output_path, 'index.js'), 'wb')
            index_js.write(index_js_template % index_json)
            index_js.close()

        for name, mode, format in INDEX_FILES:

            pname = name.split('.', 1)[0]
            template_file = None

            if siteinfo['site_nick']:
                template_path = join_path(
                    template_root, '%s.%s.genshi' % (pname, siteinfo['site_nick'])
                    )
                if isfile(template_path):
                    template_file = open(template_path, 'rb')

            if not template_file:
                template_path = join_path(template_root, '%s.genshi' % pname)

            template_file = open(template_path, 'rb')
            page_template = MarkupTemplate(
                template_file.read(), loader=template_loader, encoding='utf-8'
                )
            template_file.close()

            poutput = page_template.generate(
                items=items[:],
                authors=authors,
                email2author=email2author,
                author2link=author2link,
                root_path=output_path,
                **siteinfo
                ).render(format)

            poutput = unicode(poutput, output_encoding)

            if mode:
                output = template.generate(
                    alternative_content=poutput,
                    authors=authors,
                    **siteinfo
                    ).render(format)
            else:
                output = poutput

            # @/@ wtf is this needed???
            if isinstance(output, unicode):
                output = output.encode(output_encoding)

            output_file = open(join_path(output_path, name), 'wb')
            output_file.write(output)
            output_file.close()
Пример #31
0
def create_file_form(data, file):
    """
    Creates a `multipart/form-data` form from the message's data and from the file data. If there is no files to
    send, will return `None` to tell the caller, that nothing is added to the overall data.
    
    Parameters
    ----------
    data : `dict` of `Any`
        The data created by the ``.message_create`` method.
    file : `dict` of (`file-name`, `io`) items, `list` of (`file-name`, `io`) elements, tuple (`file-name`, `io`), `io`
        The files to send.
    
    Returns
    -------
    form : `None` or `Formdata`
        Returns a `Formdata` of the files and from the message's data. If there are no files to send, returns `None`
        instead.
    
    Raises
    ------
    ValueError
        If more than `10` file is registered to be sent.
    
    Notes
    -----
    Accepted `io` types with check order are:
    - ``BodyPartReader`` instance
    - `bytes`, `bytearray`, `memoryview` instance
    - `str` instance
    - `BytesIO` instance
    - `StringIO` instance
    - `TextIOBase` instance
    - `BufferedReader`, `BufferedRandom` instance
    - `IOBase` instance
    - ``AsyncIO`` instance
    - `async-iterable`
    
    Raises `TypeError` at the case of invalid `io` type.
    
    There are two predefined data types specialized to send files:
    - ``ReuBytesIO``
    - ``ReuAsyncIO``
    
    If a buffer is sent, then when the request is done, it is closed. So if the request fails, we would not be
    able to resend the file, except if we have a data type, what instead of closing on `.close()` just seeks to
    `0` (or later if needed) on close, instead of really closing instantly. These data types implement a
    `.real_close()` method, but they do `real_close` on `__exit__` as well.
    """
    form = Formdata()
    form.add_field('payload_json', to_json(data))
    files = []
    
    # checking structure
    
    # case 1 dict like
    if hasattr(type(file), 'items'):
        files.extend(file.items())
    
    # case 2 tuple => file, filename pair
    elif isinstance(file, tuple):
        files.append(file)
    
    # case 3 list like
    elif isinstance(file, (list, deque)):
        for element in file:
            if type(element) is tuple:
                name, io = element
            else:
                io = element
                name = ''
            
            if not name:
                #guessing name
                name = getattr(io, 'name', '')
                if name:
                    _, name = split_path(name)
                else:
                    name = str(random_id())
            
            files.append((name, io),)
    
    #case 4 file itself
    else:
        name = getattr(file, 'name', '')
        #guessing name
        if name:
            _, name = split_path(name)
        else:
            name = str(random_id())
        
        files.append((name, file),)
    
    # checking the amount of files
    # case 1 one file
    if len(files) == 1:
        name, io = files[0]
        form.add_field('file', io, filename=name, content_type='application/octet-stream')
    # case 2, no files -> return None, we should use the already existing data
    elif len(files) == 0:
        return None
    # case 3 maximum 10 files
    elif len(files) < 11:
        for index, (name, io) in enumerate(files):
            form.add_field(f'file{index}s', io, filename=name, content_type='application/octet-stream')
    
    # case 4 more than 10 files
    else:
        raise ValueError('You can send maximum 10 files at once.')
    
    return form
Пример #32
0
def main():
    global progress_bar

    # get the list of resolvers
    res = requests.get(NS_LIST_URL)
    if res.status_code == 200:
        # perform a baseline test to compare against
        sanity_check = perform_lookup(config.baseline_server,
                                      config.query_domain,
                                      tries=5)

        if sanity_check is not None:
            sanity_check = set(sanity_check)

            all_resolvers = res.content.decode().splitlines()
            initial_resolvers = []

            if config.no_clean:
                # skip cleaning
                initial_resolvers = all_resolvers
            else:
                # remove false positives
                for line in all_resolvers:
                    replace_result = [
                        bool(re.sub(regex, '', line))
                        for regex in config.clean_regex
                    ]
                    if all(replace_result):
                        initial_resolvers.append(line)

            # remove any existing output_file
            if path_exists(config.output_file):
                if config.keep_old:
                    name, ext = split_path(config.output_file)
                    backup_name = '{}-{}{}'.format(name, uuid4().hex, ext)
                    print('[*] Output file already exists, renaming {} to {}'.
                          format(config.output_file, backup_name))

                    rename_file(config.output_file, backup_name)

                    # path still exists, rename failed
                    if path_exists(config.output_file):
                        print('[!] Rename failed, outputting to {} instead!'.
                              format(backup_name))
                        config.output_file = backup_name
                else:
                    print('[*] Overwriting existing output file {}'.format(
                        config.output_file))
                    remove_file(config.output_file)

            # create progress bar if not verbose mode
            if not config.verbose:
                progress_bar = tqdm(total=len(initial_resolvers),
                                    unit='resolvers')

            # create a thread pool and start the workers
            thread_pool = ThreadPool(config.job_count)
            workers = []
            for resolver in initial_resolvers:
                w = thread_pool.apply_async(check_resolver,
                                            (resolver, sanity_check),
                                            callback=callback)
                workers.append(w)

            # ensure all workers complete
            for w in workers:
                w.get()

            thread_pool.close()
            thread_pool.join()

            if not config.verbose:
                progress_bar.close()
        else:
            print(
                'Error performing baseline sanity check! (DNS lookup {} using {})'
                .format(config.query_domain, config.baseline_server))
Пример #33
0
def get_emotion_number_from_filename(filename):
    return EMOTION_NUMBERS[split_path(filename)[-1].split('.')[0].split('_')[1]]
Пример #34
0
	ms_figures_dir = join_path(ms_dir, 'figures')
	zip_fname = '/Users/yoavram/Dropbox/Sunnyvale/figures.zip'
	tex_files = ['ms_sunnyvale.tex']
	pattern = re.compile(r'\\includegraphics(?:\[.*\])?\{(.*\.\w{3})\}')

	if os.path.exists(ms_figures_dir):
		shutil.rmtree(ms_figures_dir)
		os.mkdir(ms_figures_dir)
	if not os.path.exists(ms_figures_dir):
		os.mkdir(ms_figures_dir)

	figures = []
	for fn in tex_files:
		with open(join_path(ms_dir, fn)) as f:
			matches = (pattern.match(line) for line in f)
			matches = (m for m in matches if m is not None)
			filenames = (m.groups()[0] for m in matches)
			filenames = (split_path(fn)[-1] for fn in filenames)
			filenames = (join_path(figures_dir, fn) for fn in filenames)
			figures.extend(filenames)

	with ZipFile(zip_fname, 'w') as z:
		for fn in figures:
			fn = fn.replace('{', '').replace('}', '')
			print(fn)
			shutil.copy(fn, join_path(ms_figures_dir, split_path(fn)[-1]))
			z.write(fn)

	print("{} figures copied to {} and zipped to {}".format(
		len(figures), ms_figures_dir, zip_fname))
from datetime import datetime
from os.path import abspath, split as split_path, join as join_path

# 3rd party:

# Internal:
from ..common.visualisation import plot_thumbnail

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

metrics = [
    'newAdmissions', 'newCasesByPublishDate', 'newDeaths28DaysByPublishDate',
    'newVirusTests'
]

curr_dir, _ = split_path(abspath(__file__))
queries_dir = join_path(curr_dir, "queries")

with open(join_path(queries_dir, "time_series_data.sql")) as fp:
    time_series_data_query = fp.read()

with open(join_path(queries_dir, "latest_change_data.sql")) as fp:
    latest_change_data_query = fp.read()


async def get_timeseries(conn, timestamp):
    ts = datetime.fromisoformat(timestamp.replace("5Z", ""))
    partition = f"{ts:%Y_%-m_%-d}_other"
    partition_id = f"{ts:%Y_%-m_%-d}|other"
    values_query = time_series_data_query.format(partition=partition)
    change_query = latest_change_data_query.format(partition=partition)
Пример #36
0
def read_image_stack(fn, *args, **kwargs):
    """Read a 3D volume of images in image or .h5 format into a numpy.ndarray.

    This function attempts to automatically determine input file types and
    wraps specific image-reading functions.

    Parameters
    ----------
    fn : filename (string)
        A file path or glob pattern specifying one or more valid image files.
        The file format is automatically determined from this argument.

    *args : filenames (string, optional)
        More than one positional argument will be interpreted as a list of
        filenames pointing to all the 2D images in the stack.

    **kwargs : keyword arguments (optional)
        Arguments to be passed to the underlying functions. A 'crop'
        keyword argument is supported, as a list of length 6:
        [xmin, xmax, ymin, ymax, zmin, zmax]. Use 'None' for no crop in
        that coordinate.

    Returns
    -------
    stack : 3-dimensional numpy ndarray

    Notes
    -----
        If reading in .h5 format, keyword arguments are passed through to
        read_h5_stack().

        Automatic file type detection may be deprecated in the future.
    """
    # TODO: Refactor.  Rather than have implicit designation of stack format
    # based on filenames (*_boundpred.h5, etc), require explicit parameters
    # in config JSON files.
    if os.path.isdir(fn):
        fn += '/'
    d, fn = split_path(os.path.expanduser(fn))
    if len(d) == 0: d = '.'
    crop = kwargs.get('crop', [None] * 6)
    if crop is None:
        crop = [None] * 6
    if len(crop) == 4: crop.extend([None] * 2)
    elif len(crop) == 2: crop = [None] * 4 + crop
    kwargs['crop'] = crop
    if any(fn.endswith(ext) for ext in supported_image_extensions):
        # image types, such as a set of pngs or a multi-page tiff
        xmin, xmax, ymin, ymax, zmin, zmax = crop
        if len(args) > 0 and type(args[0]) == str and args[0].endswith(
                fn[-3:]):
            # input is a list of filenames
            fns = [fn] + [split_path(f)[1] for f in args]
        else:
            # input is a filename pattern to match
            fns = fnfilter(os.listdir(d), fn)
        if len(fns) == 1 and fns[0].endswith('.tif'):
            stack = read_multi_page_tif(join_path(d, fns[0]), crop)
        else:
            fns.sort(key=alphanumeric_key)  # sort filenames numerically
            fns = fns[zmin:zmax]
            im0 = imread(join_path(d, fns[0]))
            ars = (imread(join_path(d, fn)) for fn in fns)
            im0 = im0[xmin:xmax, ymin:ymax]
            dtype = im0.dtype
            stack = zeros((len(fns), ) + im0.shape, dtype)
            for i, im in enumerate(ars):
                stack[i] = im[xmin:xmax, ymin:ymax]
    elif fn.endswith('_boundpred.h5') or fn.endswith('_processed.h5'):
        # Ilastik batch prediction output file
        stack = read_prediction_from_ilastik_batch(os.path.join(d, fn),
                                                   **kwargs)
    elif fn.endswith('.h5'):
        # other HDF5 file
        stack = read_h5_stack(join_path(d, fn), *args, **kwargs)
    elif os.path.isfile(os.path.join(d, 'superpixel_to_segment_map.txt')):
        # Raveler export
        stack = raveler_to_labeled_volume(d, *args, **kwargs)
    return squeeze(stack)
Пример #37
0
def split_path_file_extension(full_path):
    path, file = split_path(full_path)
    file, extension = splitext(file)
    return path, file, extension
Пример #38
0
def main(argv, genfiles=None):

    op = OptionParser()

    op.add_option('-a',
                  dest='authors',
                  default='',
                  help="Set the path for a special authors file (optional)")

    op.add_option(
        '-c',
        dest='package',
        default='',
        help="Generate documentation for the Python package (optional)")

    op.add_option('-d',
                  dest='data_file',
                  default='',
                  help="Set the path for a persistent data file (optional)")

    op.add_option('-e',
                  dest='output_encoding',
                  default='utf-8',
                  help="Set the output encoding (default: utf-8)")

    op.add_option('-f',
                  dest='format',
                  default='html',
                  help="Set the output format (default: html)")

    op.add_option('-i',
                  dest='input_encoding',
                  default='utf-8',
                  help="Set the input encoding (default: utf-8)")

    op.add_option('-o',
                  dest='output_path',
                  default=HOME,
                  help="Set the output directory for files (default: $PWD)")

    op.add_option('-p',
                  dest='pattern',
                  default='',
                  help="Generate index files for the path pattern (optional)")

    op.add_option('-r',
                  dest='root_path',
                  default='',
                  help="Set the path to the root working directory (optional)")

    op.add_option('-t',
                  dest='template',
                  default='',
                  help="Set the path to a template file (optional)")

    op.add_option('--quiet',
                  dest='quiet',
                  default=False,
                  action='store_true',
                  help="Flag to suppress output")

    op.add_option('--stdout',
                  dest='stdout',
                  default=False,
                  action='store_true',
                  help="Flag to redirect to stdout instead of to a file")

    try:
        options, args = op.parse_args(argv)
    except SystemExit:
        return

    authors = options.authors

    if authors:
        if not isfile(authors):
            raise IOError("%r is not a valid path!" % authors)
        authors = parse_authors_file(authors)
    else:
        authors = {}

    email2author = {'unknown': 'unknown'}
    author2link = {'unknown': ''}

    for author, author_info in authors.iteritems():
        for _info in author_info:
            if _info.startswith('http://') or _info.startswith('https://'):
                if author not in author2link:
                    author2link[author] = _info
            elif '@' in _info:
                email2author[_info] = author

    authors['unknown'] = ['unknown']

    output_path = options.output_path.rstrip('/')

    if not isdir(output_path):
        raise IOError("%r is not a valid directory!" % output_path)

    root_path = options.root_path

    siteinfo = join_path(output_path, '.siteinfo')
    if isfile(siteinfo):
        env = {}
        execfile(siteinfo, env)
        siteinfo = env['INFO']
    else:
        siteinfo = {
            'site_url': '',
            'site_nick': '',
            'site_description': '',
            'site_title': ''
        }

    stdout = sys.stdout if options.stdout else None
    verbose = False if stdout else (not options.quiet)

    format = options.format

    if format not in ('html', 'tex'):
        raise ValueError("Unknown format: %s" % format)

    if (format == 'tex') or (not options.template):
        template = False
    elif not isfile(options.template):
        raise IOError("%r is not a valid template!" % options.template)
    else:
        template_path = abspath(options.template)
        template_root = dirname(template_path)
        template_loader = TemplateLoader([template_root])
        template_file = open(template_path, 'rb')
        template = MarkupTemplate(template_file.read(),
                                  loader=template_loader,
                                  encoding='utf-8')
        template_file.close()

    data_file = options.data_file

    if data_file:
        if isfile(data_file):
            data_file_obj = open(data_file, 'rb')
            data_dict = load_pickle(data_file_obj)
            data_file_obj.close()
        else:
            data_dict = {}

    input_encoding = options.input_encoding
    output_encoding = options.output_encoding

    if genfiles:

        files = genfiles

    elif options.package:

        package_root = options.package
        files = []
        add_file = files.append
        package = None
        for part in reversed(package_root.split(SEP)):
            if part:
                package = part
                break
        if package is None:
            raise ValueError("Couldn't find the package name from %r" %
                             package_root)

        for dirpath, dirnames, filenames in walk(package_root):
            for filename in filenames:

                if not filename.endswith('.py'):
                    continue

                filename = join_path(dirpath, filename)
                module = package + filename[len(package_root):]
                if module.endswith('__init__.py'):
                    module = module[:-12]
                else:
                    module = module[:-3]

                module = '.'.join(module.split(SEP))
                module_file = open(filename, 'rb')
                module_source = module_file.read()
                module_file.close()

                docstring = docstring_regex.search(module_source)

                if docstring:
                    docstring = docstring.group(0)
                    if docstring.startswith('r'):
                        docstring = docstring[4:-3]
                    else:
                        docstring = docstring[3:-3]

                if docstring and docstring.strip().startswith('=='):
                    docstring = strip_leading_indent(docstring)
                    module_source = docstring_regex.sub('', module_source, 1)
                else:
                    docstring = ''

                info = {}

                if root_path and isabs(filename) and filename.startswith(
                        root_path):
                    info['__path__'] = filename[len(root_path) + 1:]
                else:
                    info['__path__'] = filename

                info['__updated__'] = datetime.utcfromtimestamp(
                    stat(filename).st_mtime)

                info['__outdir__'] = output_path
                info['__name__'] = 'package.' + module
                info['__type__'] = 'py'
                info['__title__'] = module
                info['__source__'] = highlight(module_source, PythonLexer(),
                                               SYNTAX_FORMATTER)
                add_file((docstring, '', info))

    else:

        files = []
        add_file = files.append

        for filename in args:

            if not isfile(filename):
                raise IOError("%r doesn't seem to be a valid file!" % filename)

            if root_path and isabs(filename) and filename.startswith(
                    root_path):
                path = filename[len(root_path) + 1:]
            else:
                path = filename

            info = get_git_info(filename, path)

            # old svn support:
            # info = get_svn_info(path.split(SEP)[0], '*.txt')[path]

            source_file = open(filename, 'rb')
            source = source_file.read()
            source_file.close()

            if MORE_LINE in source:
                source_lead = source.split(MORE_LINE)[0]
                source = source.replace(MORE_LINE, '')
            else:
                source_lead = ''

            filebase, filetype = splitext(basename(filename))
            info['__outdir__'] = output_path
            info['__name__'] = filebase.lower()
            info['__type__'] = 'txt'
            info['__title__'] = filebase.replace('-', ' ')
            add_file((source, source_lead, info))

    for source, source_lead, info in files:

        if verbose:
            print
            print LINE
            print 'Converting: [%s] %s in [%s]' % (
                info['__type__'], info['__path__'], split_path(output_path)[1])
            print LINE
            print

        if template:
            output, props = render_rst(source, format, input_encoding, True)
            # output = output.encode(output_encoding)
            info['__text__'] = output.encode(output_encoding)
            info.update(props)
            if source_lead:
                info['__lead__'] = render_rst(source_lead, format,
                                              input_encoding,
                                              True)[0].encode(output_encoding)
            output = template.generate(content=output,
                                       info=info,
                                       authors=authors,
                                       email2author=email2author,
                                       author2link=author2link,
                                       **siteinfo).render(
                                           'xhtml', encoding=output_encoding)
        else:
            output, props = render_rst(source,
                                       format,
                                       input_encoding,
                                       True,
                                       as_whole=True)
            info.update(props)
            output = output.encode(output_encoding)
            info['__text__'] = output
            if source_lead:
                info['__lead__'] = render_rst(
                    source_lead, format, input_encoding, True,
                    as_whole=True)[0].encode(output_encoding)

        if data_file:
            data_dict[info['__path__']] = info

        if stdout:
            print output
        else:
            output_filename = join_path(output_path,
                                        '%s.%s' % (info['__name__'], format))
            output_file = open(output_filename, 'wb')
            output_file.write(output)
            output_file.close()
            if verbose:
                print 'Done!'

    if data_file:
        data_file_obj = open(data_file, 'wb')
        dump_pickle(data_dict, data_file_obj)
        data_file_obj.close()

    if options.pattern:

        pattern = options.pattern

        items = [
            item for item in data_dict.itervalues()
            if item['__outdir__'] == pattern
        ]

        # index.js/json

        import json

        index_js_template = join_path(output_path, 'index.js.template')

        if isfile(index_js_template):

            index_json = json.dumps(
                [[_art['__name__'], _art['title'].encode('utf-8')]
                 for _art in sorted([
                     item for item in items if item.get('x-created')
                     and item.get('x-type', 'blog') == 'blog'
                 ],
                                    key=lambda i: i['x-created'])])

            index_js_template = open(index_js_template, 'rb').read()
            index_js = open(join_path(output_path, 'index.js'), 'wb')
            index_js.write(index_js_template % index_json)
            index_js.close()

        for name, mode, format in INDEX_FILES:

            pname = name.split('.', 1)[0]
            template_file = None

            if siteinfo['site_nick']:
                template_path = join_path(
                    template_root,
                    '%s.%s.genshi' % (pname, siteinfo['site_nick']))
                if isfile(template_path):
                    template_file = open(template_path, 'rb')

            if not template_file:
                template_path = join_path(template_root, '%s.genshi' % pname)

            template_file = open(template_path, 'rb')
            page_template = MarkupTemplate(template_file.read(),
                                           loader=template_loader,
                                           encoding='utf-8')
            template_file.close()

            poutput = page_template.generate(items=items[:],
                                             authors=authors,
                                             email2author=email2author,
                                             author2link=author2link,
                                             root_path=output_path,
                                             **siteinfo).render(format)

            poutput = unicode(poutput, output_encoding)

            if mode:
                output = template.generate(alternative_content=poutput,
                                           authors=authors,
                                           **siteinfo).render(format)
            else:
                output = poutput

            # @/@ wtf is this needed???
            if isinstance(output, unicode):
                output = output.encode(output_encoding)

            output_file = open(join_path(output_path, name), 'wb')
            output_file.write(output)
            output_file.close()
Пример #39
0
 def _file_for(self, instance, resource):
     if instance.definition.name in resource:
         _, path = split_path(resource)
         return join_paths(self._directory_for(instance), path)
     else:
         return join_paths(self._output_directory, resource)
Пример #40
0
 def _get_current_file(self):
     return split_path(self._get_view().file_name())
Пример #41
0
def get_emotion_from_filename(filename):
    return split_path(filename)[-1].split('.')[0].split('_')[1]
Пример #42
0
 def root(self):
     return split_path(self.path)[0]
Пример #43
0
def read_image_stack(fn, *args, **kwargs):
    """Read a 3D volume of images in image or .h5 format into a numpy.ndarray.

    This function attempts to automatically determine input file types and
    wraps specific image-reading functions.

    Parameters
    ----------
    fn : filename (string)
        A file path or glob pattern specifying one or more valid image files.
        The file format is automatically determined from this argument.

    *args : filenames (string, optional)
        More than one positional argument will be interpreted as a list of
        filenames pointing to all the 2D images in the stack.

    **kwargs : keyword arguments (optional)
        Arguments to be passed to the underlying functions. A 'crop'
        keyword argument is supported, as a list of length 6:
        [xmin, xmax, ymin, ymax, zmin, zmax]. Use 'None' for no crop in
        that coordinate.

    Returns
    -------
    stack : 3-dimensional numpy ndarray

    Notes
    -----
        If reading in .h5 format, keyword arguments are passed through to
        read_h5_stack().

        Automatic file type detection may be deprecated in the future.
    """
    # TODO: Refactor.  Rather than have implicit designation of stack format
    # based on filenames (*_boundpred.h5, etc), require explicit parameters
    # in config JSON files.
    if os.path.isdir(fn):
        fn += '/'
    d, fn = split_path(os.path.expanduser(fn))
    if len(d) == 0: d = '.'
    crop = kwargs.get('crop', [None]*6)
    if crop is None:
        crop = [None]*6
    if len(crop) == 4: crop.extend([None]*2)
    elif len(crop) == 2: crop = [None]*4 + crop
    kwargs['crop'] = crop
    if any([fn.endswith(ext) for ext in supported_image_extensions]):
        # image types, such as a set of pngs or a multi-page tiff
        xmin, xmax, ymin, ymax, zmin, zmax = crop
        if len(args) > 0 and type(args[0]) == str and args[0].endswith(fn[-3:]):
            # input is a list of filenames
            fns = [fn] + [split_path(f)[1] for f in args]
        else:
            # input is a filename pattern to match
            fns = fnfilter(os.listdir(d), fn)
        if len(fns) == 1 and fns[0].endswith('.tif'):
            stack = read_multi_page_tif(join_path(d,fns[0]), crop)
        else:
            fns.sort(key=alphanumeric_key) # sort filenames numerically
            fns = fns[zmin:zmax]
            im0 = imread(join_path(d, fns[0]))
            ars = (imread(join_path(d, fn)) for fn in fns)
            im0 = im0[xmin:xmax, ymin:ymax]
            dtype = im0.dtype
            stack = zeros((len(fns),)+im0.shape, dtype)
            for i, im in enumerate(ars):
                stack[i] = im[xmin:xmax,ymin:ymax]
    elif fn.endswith('_boundpred.h5') or fn.endswith('_processed.h5'):
        # Ilastik batch prediction output file
        stack = read_prediction_from_ilastik_batch(os.path.join(d,fn), **kwargs)
    elif fn.endswith('.h5'):
        # other HDF5 file
        stack = read_h5_stack(join_path(d,fn), *args, **kwargs)
    elif os.path.isfile(os.path.join(d, 'superpixel_to_segment_map.txt')):
        # Raveler export
        stack = raveler_to_labeled_volume(d, *args, **kwargs)
    return squeeze(stack)
Пример #44
0
def guess_config(python_exe_full_path):
    """
    Then gets the path like how conda activate defines path
    If not found : MSG about downloading and installing miniconda
        if installed still ask for manual file chooser
    if found return CPYTHON and EXTRA_PATH fields
    """

#    to open a new window with cmd prompt with activated miniconda environment:
#        start %windir%\System32\cmd.exe "/K"  %USERPROFILE%\Miniconda2\Scripts\activate.bat  JTutils

    CPYTHON = python_exe_full_path
    import subprocess

    OS = get_os_version()
    if 'win' in OS:
        CONDA_EXE = ['condabin', 'conda.bat']
    else: #unix (linux or mac)
        CONDA_EXE = ['condabin', 'conda']

    #split path into folder list
    splited_path_to_python = []
    head = python_exe_full_path
    i = 0
    tail = "start"
    while tail != "":  
        head, tail = split_path(head)
        if tail != "":
            splited_path_to_python.append(tail)
    splited_path_to_python.append(head)
    splited_path_to_python.reverse()
    
    # test if provided file is from conda distribution and initialize conda_base and conda_exe
    conda_exe = False
    for neg_index_base in range(1,len(splited_path_to_python)):
        tmp = splited_path_to_python[:-neg_index_base]+CONDA_EXE
        tested_file = join_path(*tmp)
        if is_exe(tested_file):
            conda_exe = tested_file
            conda_base = join_path(*splited_path_to_python[:-neg_index_base])
            break
    if conda_exe == False:
        MSG("This doesn't seem to be a conda (miniconda/anaconda) distribution. Can be fine: check the report.")
        return { 'CPYTHON': python_exe_full_path}
        
    # now I have the base and conda_exe and we are in conda distribution
    # which environement is the python_exe_full_path from ?
    # check if JTutils environement exists
    if 'win' in OS:
        if neg_index_base < 2 :
            conda_env = 'base'
        else:
            if splited_path_to_python[-3].lower() == 'envs' :
                conda_env = splited_path_to_python[-2]
            else: 
                raise ValueError("Conda distribution seems inconsistent !")
    else: # macosx or linux
        if neg_index_base < 3 :
            conda_env = 'base'
        else:
            if splited_path_to_python[-4].lower() == 'envs' :
                conda_env = splited_path_to_python[-3]
            else: 
                raise ValueError("Conda distribution seems inconsistent !")
    # yes we are in JTutils environment
    # if JTutils does not exist ask of automatic installation    
    if conda_env != 'JTutils':
        if 'win' in OS:
            new_python_exe = join_path(conda_base, 'envs', 'JTutils', 'python.exe')
        else:
            new_python_exe = join_path(conda_base, 'envs', 'JTutils', 'bin', 'python')  
            
        select_val = SELECT(title="Create JTutils environnent (recommended)", 
                            message="""JTutils conda environment not found. %s is found. 
Do you want to create JTutils python environment (recommended)?
If Yes, new python exe file will be selected : %s
Please CLICK on one button (enter on keyboard does not work)""" % (conda_env, new_python_exe) ,
                            buttons=["Yes", "No"],
                            mnemonics=["y", "n"] )
        if select_val == 0 :
            # first check JTutils does not exists already:
            if is_exe(new_python_exe):
                pass
            else: # run install script
                if 'win' in OS:
                    cmd = " ".join([conda_exe, "activate &",
                                   conda_exe, "create -y -n JTutils numpy&",
                                   conda_exe, "env list"])
                else: # unix
                    # default jython shell is /bin/sh : source command is "."
                    # first source the conda initialisation script
                    # once done conda is available as a shell internal command
                    shell_init_file = join_path(conda_base,'etc', 'profile.d', 'conda.sh')
                    cmd = " ".join([".", shell_init_file, ";",
                                  "conda create -y -n JTutils numpy ;", 
                                  "conda env list"])
                MSG(subprocess.check_output(cmd, shell=True))
                MSG("JTutils environment created")
            CPYTHON = new_python_exe
            conda_env = 'JTutils'

    if 'win' in OS:
#        batfile = join_path(dirname(abspath(sys.argv[0])), "..", "condat_env_setup.bat")
#        cmd = ["start", "%windir%\system32\cmd.exe", "/k",  batfile + " " + found_path + " JTutils"]
        cmd = " ".join([
                      join_path(conda_base,'condabin', 'activate.bat'), conda_env,
                        '& set CONDA',
                        '& set PATH'])
                        
    elif ('linux' in OS) or ('mac' in OS):
        # default shell is /bin/sh : source command is "."
        # first source the conda initialisation script
        # once done conda is available as a shell internal command
        shell_init_file = join_path(conda_base,'etc', 'profile.d', 'conda.sh')
        cmd = " ".join([
                        ".", shell_init_file, ";",
                        "conda activate", conda_env, ";"
                        "env |grep -i CONDA ; echo PATH=$PATH" ])
    try:
#        MSG(cmd)
        res = subprocess.check_output(cmd, shell=True)
        if 'win' in OS:
            res = res.decode('cp850')
    except subprocess.CalledProcessError as grepexc:
        if 'win' in OS:
            grepexc = grepexc.decode('cp850')
        print("error code", grepexc.returncode, grepexc.output)
        MSG("Error during retrieval of conda environment.")
        raise

    # now Parse the returned string to extract conda env vars 
    conf_dict = dict()
    for line in res.split('\n'):
        line = line.strip()
        if '=' in line:
            key, val = line.split('=', 1)
            if 'PATH' == key.upper():
                path_list = val.split(';')
                extra_path = []
                # windows is not case sensitive : required for valid string comparison
                if 'win' in OS : cb = conda_base.lower() 
                else : cb = conda_base
                for one_path in path_list:
                    if 'win' in OS : op = one_path.lower() 
                    else: op = one_path
                    if cb in op:
                        extra_path.append(one_path)
                conf_dict['EXTRA_PATH'] = extra_path
            else:
                if 'CONDA' in key.upper():
                    conf_dict[key] = val
    conf_dict['CPYTHON'] = CPYTHON
        
    return conf_dict
Пример #45
0
def get_emotion_from_filename(filename):
    return split_path(filename)[-1].split('.')[0].split('_')[1]
Пример #46
0
def convert_to_compactsplit(the_iter = False,
                            dir_out = False,
                            do_sort = True,
                            pre_split_num = 32,
                            max_num_per_split = 1000000,
                            num_digits = 4,
                            max_num = 0,
                            confirm_clear = True,
                            cache_images_now = False,
                            doing_run_num = False,
                            doing_job_num = False,
                            via_cli = False,
                            ):
    """
    Post-processing step to convert getty images dataset to the new single-file format. Requires GNU `sort`.
    
    Optionally sort the file to ensure that any contiguous sample of the output file will be representative 
    of the overall dataset.
    
    Args:
        the_iter:           Input iterator that outputs dicts containing, at a minimum, an '_id' key.
        dir_out:            Prefix for output file name. A suffix of the form "-split-0001.gz" will be appended.
                            Can also be a path to a prefix name, e.g. 'output_prefix' or 'a/b/c/output_prefix'.
        getty_path:         Path to getty-formatted directory.
        do_sort:            Sort output files by ID afterward.
        pre_split_num:      Pre-split output into at least `pre_split_num` files, for easy parallel loading. Probably best to
                            error on the high side here.
        max_num_per_split:  If any of the splits have more than `max_num_per_split` records, then multiple files per split will 
                            be created.
        num_digits:         How much to zero-pad numbers in output. Should probably leave this at `4`.
        max_num:            Terminate early after `max_num` records.
        confirm_clear:      Prompt for confirmation before clearing output directory.
    """

    from mc_ingest import cache_image, decode_image
    
    assert pre_split_num <= (10 ** num_digits - 1),(pre_split_num, num_digits)
    
    assert the_iter,('REQUIRED_ARG: the_iter',)
    
    assert the_iter is not False
    assert dir_out is not False
        
    the_path, the_dir = split_path(dir_out)
    assert exists(the_path),('PATH_DOES_NOT_EXIST', the_path)
    assert the_dir,('SPECIFY_OUTPUT_DIR',the_dir)
    if not exists(dir_out):
        makedirs(dir_out)
    
    fn_out = join(dir_out, the_dir)
    
    fn_out_temp = fn_out + '-tempfile-' + str(doing_run_num) + ('-%03d' % doing_job_num) + '-' + str(randint(1,1000000000000))
    fn_out_temp_2 = fn_out_temp + '-2'
    fn_out_temp_3 = fn_out_temp + '-3'
    
    try:
        with open(fn_out_temp, 'w') as f:
            
            for hh_batch in igroup(the_iter, 50):
                
                for hh in hh_batch:
                    xid = hh['_id']
                    if type(xid) == unicode:
                        xid = xid.encode('utf8')

                    new_id = hashlib.md5(xid).hexdigest()

                    #assert len(new_id) == 24,new_id ## Fixed-length makes sorting easier.
                    assert '\t' not in new_id
                    assert '\n' not in new_id            

                    if cache_images_now:
                        assert False
                        if n_jobs != 1:
                            assert False, 'TODO'
                            inner_args.append((hh,))
                        else:
                            worker_convert_to_compactsplit((hh, ))

                    dd = json.dumps(hh, separators=(',', ':'))

                    assert '\t' not in dd
                    assert '\n' not in dd
                    
                    f.write(new_id + '\t' + dd + '\n')

                
                
        if not do_sort:
            rename(fn_out_temp, fn_out_temp_2)
        
        else:

            assert exists(fn_out_temp),(fn_out_temp,)
            
            ## Sort via gnu `sort`:
            
            print ('FILES', fn_out_temp, fn_out_temp_2)
            
            cmd = "LC_ALL=C sort --temporary-directory=%s %s > %s" % (pipes_quote(dir_out),
                                                                      pipes_quote(fn_out_temp),
                                                                      pipes_quote(fn_out_temp_2))
            
            print ('SORTING',cmd)
            
            rr = check_output(cmd,
                              shell = True,
                              executable = "/bin/bash",
                              )
            
            unlink(fn_out_temp)

        assert exists(fn_out_temp_2),(fn_out_temp_2,)
        
        print ('DONE_STEP_1', fn_out_temp_2)
        
        if pre_split_num == 1:
            print ('WRITE_AND_COMPRESS')
            
            with open(fn_out_temp_2) as src, gzip.open(fn_out_temp_3, 'wb') as dst:
                dst.writelines(src)

            unlink(fn_out_temp_2)
            
            rename(fn_out_temp_3, fn_out + (('-split-%0' + str(int(num_digits)) + 'd.gz') % 1))
            
        else:

            print ('SPLITTING_AND_COMPRESS',pre_split_num)

            
            
            hh = {x:[False, x, 0, 0] ## [output_file, this_split_num, file_num_this_split, record_count_this_split]
                  for x
                  in xrange(pre_split_num)
                  }
            
            with open(fn_out_temp_2) as f:
                for c,line in enumerate(f):
                    
                    xx = hh[c % pre_split_num]
                    
                    if (xx[0] is False) or (xx[3] > max_num_per_split):
                        if xx[0] is not False:
                            xx[0].close()
                        fn = fn_out + (('-compactsplit-v' + VERSION_COMPACTSPLIT + \
                                        '-%0' + str(int(num_digits)) + 'd' + \
                                        '-%0' + str(int(num_digits)) + 'd' + \
                                        '-%0' + str(int(num_digits)) + 'd.gz') % (doing_job_num,
                                                                                  xx[1],
                                                                                  xx[2],
                                                                                  ))
                        print ('NEW_FILE',fn)
                        xx[0] = GzipFile(fn, 'w')
                        xx[2] += 1
                        xx[3] = 0
                    
                    xx[0].write(line)
                    xx[3] += 1
            
            unlink(fn_out_temp_2)
            
            for xx in hh.values():
                if xx[0] is not False:
                    print ('CLOSING_FILE',xx[0])
                    xx[0].close()
        
        print ('DONE',dir_out)
    
    finally:
        pass
Пример #47
0
 def add_content(self, key):
     self.contents[split_path(key.name)[-1]] = key
     return