Exemplos de izip em Python, exemplos de nanshe.util.iters.izip em Python

Exemplo n.º 1

0

Exibir arquivo

def generate_neurons_blocks(input_filename,
                            output_filename,
                            num_processes=multiprocessing.cpu_count(),
                            block_shape=None,
                            num_blocks=None,
                            half_window_shape=None,
                            half_border_shape=None,
                            use_drmaa=False,
                            num_drmaa_cores=16,
                            debug=False,
                            **parameters):
    # TODO: Move function into new module with its own command line interface.
    # TODO: Heavy refactoring required on this function.

    # Extract and validate file extensions.

    # Parse input filename and validate that the name is acceptable
    input_filename_ext, input_dataset_name = hdf5.serializers.split_hdf5_path(
        input_filename)

    # Parse output filename and validate that the name is acceptable
    output_filename_ext, output_group_name = hdf5.serializers.split_hdf5_path(
        output_filename)

    # Directory where individual block runs will be stored.
    intermediate_output_dir = output_filename_ext.rsplit(
        os.path.splitext(output_filename_ext)[1], 1)[0] + "_blocks"

    # Read the input data.
    original_images_shape_array = None
    with h5py.File(input_filename_ext, "r") as input_file_handle:
        original_images_shape_array = numpy.array(
            input_file_handle[input_dataset_name].shape)

    # Get the amount of the border to slice
    half_border_shape_array = None
    if half_border_shape is None:
        half_border_shape_array = numpy.zeros(len(original_images_shape_array),
                                              dtype=int)
    else:
        assert (len(half_window_shape) == len(original_images_shape_array))

        half_border_shape_array = numpy.array(half_border_shape)

        # Should be of type integer
        assert (issubclass(half_border_shape_array.dtype.type, numpy.integer))

        # Should not cut along temporal portion.
        # Maybe replace with a warning.
        assert (half_border_shape[0] == 0)

    # TODO: Refactor to expanded_numpy.
    # Cuts boundaries from original_images_shape
    original_images_pared_shape_array = original_images_shape_array - \
                                        2*half_border_shape_array

    # At least one of them must be specified. If not some mixture of both.
    assert ((block_shape is not None) or (num_blocks is not None))

    # Size of the block to use by pixels
    block_shape_array = None
    block_shape_array_undefined = None
    if block_shape is None:
        block_shape_array = -numpy.ones(
            original_images_pared_shape_array.shape, dtype=int)
        block_shape_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool)
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(block_shape))

        block_shape_array = numpy.array(block_shape, dtype=int)

        # Should be of type integer
        assert issubclass(block_shape_array.dtype.type, numpy.integer)

        block_shape_array_undefined = (block_shape_array == -1)

    # Number of
    num_blocks_array = None
    num_blocks_array_undefined = None
    if num_blocks is None:
        num_blocks_array = - \
            numpy.ones(original_images_pared_shape_array.shape, dtype=int)
        num_blocks_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool)
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(num_blocks))

        num_blocks_array = numpy.array(num_blocks, dtype=int)

        # Should be of type integer
        assert issubclass(num_blocks_array.dtype.type, numpy.integer)

        num_blocks_array_undefined = (num_blocks_array == -1)

    # Want to ensure that both aren't defined.
    assert ~(~block_shape_array_undefined & ~num_blocks_array_undefined).all()

    # If both are undefined, then the block should span that dimension
    missing_both = (block_shape_array_undefined & num_blocks_array_undefined)
    block_shape_array[missing_both] = original_images_pared_shape_array[
        missing_both]
    num_blocks_array[missing_both] = 1
    # Thus, we have resolved these values and can continue.
    block_shape_array_undefined[missing_both] = False
    num_blocks_array_undefined[missing_both] = False

    # Replace undefined values in block_shape_array
    missing_block_shape_array, block_shape_array_remainder = divmod(
        original_images_pared_shape_array[block_shape_array_undefined],
        num_blocks_array[block_shape_array_undefined])
    # Block shape must be well defined.
    assert (block_shape_array_remainder == 0).all()
    missing_block_shape_array = missing_block_shape_array.astype(int)
    block_shape_array[block_shape_array_undefined] = missing_block_shape_array

    # Replace undefined values in num_blocks_array
    missing_num_blocks_array, num_blocks_array_remainder = divmod(
        original_images_pared_shape_array[num_blocks_array_undefined],
        block_shape_array[num_blocks_array_undefined])
    # Allow some blocks to be smaller
    missing_num_blocks_array += (num_blocks_array_remainder != 0).astype(int)
    num_blocks_array[num_blocks_array_undefined] = missing_num_blocks_array
    # Get the overlap window
    half_window_shape_array = None
    if half_window_shape is None:
        half_window_shape_array = block_shape_array / 2.0
    else:
        assert (
            len(half_window_shape) == len(original_images_pared_shape_array))

        half_window_shape_array = numpy.array(half_window_shape)

        assert issubclass(half_window_shape_array.dtype.type, numpy.integer)

    # Want to make our window size is at least as large as the one used for
    # the f0 calculation.
    if "extract_f0" in parameters["generate_neurons"]["preprocess_data"]:
        #assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] == half_window_shape_array[0])
        assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]
                ["half_window_size"] <= half_window_shape_array[0])

    # Estimate bounds for each slice. Uses typical python [begin, end) for the
    # indices.
    estimated_bounds = numpy.zeros(
        tuple(num_blocks_array),
        dtype=(int, original_images_pared_shape_array.shape + (2, )))

    for each_block_indices in iters.index_generator(*num_blocks_array):
        for each_dim, each_block_dim_index in enumerate(each_block_indices):
            estimated_lower_bound = each_block_dim_index * block_shape_array[
                each_dim]
            estimated_upper_bound = (each_block_dim_index +
                                     1) * block_shape_array[each_dim]

            estimated_bounds[each_block_indices][each_dim] = numpy.array(
                [estimated_lower_bound, estimated_upper_bound])

    original_images_pared_slices = numpy.zeros(
        estimated_bounds.shape[:-2],
        dtype=[("actual", int, estimated_bounds.shape[-2:]),
               ("windowed", int, estimated_bounds.shape[-2:]),
               ("windowed_stack_selection", int, estimated_bounds.shape[-2:]),
               ("windowed_block_selection", int, estimated_bounds.shape[-2:])])

    # Get the slice that is within bounds
    original_images_pared_slices["actual"] = estimated_bounds
    original_images_pared_slices["actual"][..., 0] = numpy.where(
        0 < original_images_pared_slices["actual"][..., 0],
        original_images_pared_slices["actual"][..., 0], 0)
    original_images_pared_slices["actual"][..., 1] = numpy.where(
        original_images_pared_slices["actual"][..., 1] <
        original_images_pared_shape_array,
        original_images_pared_slices["actual"][..., 1],
        original_images_pared_shape_array)

    # Gets the defined half_window_size.
    window_addition = numpy.zeros(estimated_bounds.shape, dtype=int)
    window_addition[..., 0] = -half_window_shape_array
    window_addition[..., 1] = half_window_shape_array

    # Get the slice with a window added.
    original_images_pared_slices[
        "windowed"] = estimated_bounds + window_addition
    original_images_pared_slices["windowed"][..., 0] = numpy.where(
        0 < original_images_pared_slices["windowed"][..., 0],
        original_images_pared_slices["windowed"][..., 0], 0)
    original_images_pared_slices["windowed"][..., 1] = numpy.where(
        original_images_pared_slices["windowed"][..., 1] <
        original_images_pared_shape_array,
        original_images_pared_slices["windowed"][..., 1],
        original_images_pared_shape_array)

    # Get the slice information to get the windowed block from the original
    # image stack.
    original_images_pared_slices[
        "windowed_stack_selection"] = original_images_pared_slices["windowed"]
    original_images_pared_slices[
        "windowed_stack_selection"] += xnumpy.expand_view(
            half_border_shape_array, reps_after=2)

    # Get slice information for the portion within
    # `original_images_pared_slices["windowed"]`, which corresponds to
    # `original_images_pared_slices["actual"]`.
    #original_images_pared_slices["windowed_block_selection"][..., 0] = 0
    original_images_pared_slices["windowed_block_selection"][..., 1] = (
        original_images_pared_slices["actual"][..., 1] -
        original_images_pared_slices["actual"][..., 0])
    original_images_pared_slices[
        "windowed_block_selection"][:] += xnumpy.expand_view(
            original_images_pared_slices["actual"][..., 0] -
            original_images_pared_slices["windowed"][..., 0],
            reps_after=2)

    # Get a directory for intermediate results.
    try:
        os.mkdir(intermediate_output_dir)
    except OSError:
        # If it already exists, that is fine.
        pass

    intermediate_config = intermediate_output_dir + "/" + "config.json"

    # Overwrite the config file always
    with open(intermediate_config, "w") as fid:
        json.dump(
            dict(list(parameters.items()) + list({"debug": debug}.items())),
            fid,
            indent=4,
            separators=(",", " : "))
        fid.write("\n")

    # Construct an HDF5 file for each block
    input_filename_block = []
    output_filename_block = []
    stdout_filename_block = []
    stderr_filename_block = []
    with h5py.File(output_filename_ext, "a") as output_file_handle:
        # Create a new output directory if doesn't exists.
        output_file_handle.require_group(output_group_name)

        output_group = output_file_handle[output_group_name]

        if "original_images" not in output_group:
            if input_filename_ext == output_filename_ext:
                output_group["original_images"] = h5py.SoftLink(
                    input_dataset_name)
            else:
                output_group["original_images"] = h5py.ExternalLink(
                    input_filename_ext, "/" + input_dataset_name)

        output_group.require_group("blocks")

        output_group_blocks = output_group["blocks"]

        input_file_handle = None
        try:
            # Skipping using region refs.
            input_file_handle = h5py.File(input_filename_ext, "r")
        except IOError:
            # File is already open
            input_file_handle = output_file_handle

        for i, i_str, sequential_block_i in iters.filled_stringify_enumerate(
                original_images_pared_slices.flat):
            intermediate_basename_i = intermediate_output_dir + "/" + i_str

            # Hold redirected stdout and stderr for each subprocess.
            stdout_filename_block.append(intermediate_basename_i + os.extsep +
                                         "out")
            stderr_filename_block.append(intermediate_basename_i + os.extsep +
                                         "err")

            # Ensure that the blocks are corrected to deal with trimming of the image stack
            # Must be done after the calculation of
            # original_images_pared_slices["windowed_block_selection"].
            sequential_block_i_windowed = sequential_block_i[
                "windowed_stack_selection"]
            slice_i = tuple(
                slice(_1, _2, 1) for _1, _2 in sequential_block_i_windowed)

            if i_str not in output_group_blocks:
                output_group_blocks[i_str] = []
                output_group_blocks[i_str].attrs[
                    "filename"] = input_file_handle.filename
                output_group_blocks[i_str].attrs[
                    "dataset"] = input_dataset_name
                output_group_blocks[i_str].attrs["slice"] = str(slice_i)

            block_i = output_group_blocks[i_str]

            with h5py.File(intermediate_basename_i + os.extsep + "h5",
                           "a") as each_block_file_handle:
                # Create a soft link to the original images. But use the
                # appropriate type of soft link depending on whether
                # the input and output file are the same.
                if "original_images" not in each_block_file_handle:
                    each_block_file_handle[
                        "original_images"] = h5py.ExternalLink(
                            os.path.relpath(block_i.file.filename,
                                            intermediate_output_dir),
                            block_i.name)

                input_filename_block.append(each_block_file_handle.filename +
                                            "/" + "original_images")
                output_filename_block.append(each_block_file_handle.filename +
                                             "/")

        if input_file_handle != output_file_handle:
            input_file_handle.close()

    cur_module_dirpath = os.path.dirname(os.path.dirname(nanshe.__file__))
    cur_module_filepath = os.path.splitext(os.path.abspath(__file__))[0]
    cur_module_name = os.path.relpath(cur_module_filepath, cur_module_dirpath)
    cur_module_name = cur_module_name.replace(os.path.sep, ".")
    cur_module_filepath += os.extsep + "py"

    import sys

    python = sys.executable

    executable_run = ""
    executable_run += "from sys import argv, path, exit; "

    executable_run += "path[:] = [\"%s\"] + [_ for _ in path if _ != \"%s\"]; " % \
                      (cur_module_dirpath, cur_module_dirpath,)
    executable_run += "from %s import main; exit(main(*argv))" % \
                      (cur_module_name,)

    block_process_args_gen = iters.izip(itertools.repeat(python),
                                        itertools.repeat("-c"),
                                        itertools.repeat(executable_run),
                                        itertools.repeat(intermediate_config),
                                        input_filename_block,
                                        output_filename_block,
                                        stdout_filename_block,
                                        stderr_filename_block)

    if use_drmaa:
        # Attempt to import drmaa.
        # If it fails to import, either the user has no intent in using it or
        # forgot to install it. If it imports, but fails to find symbols,
        # then the user has not set DRMAA_LIBRARY_PATH or
        # does not have libdrmaa.so.
        try:
            import drmaa
        except ImportError:
            # python-drmaa is not installed.
            logger.error(
                "Was not able to import drmaa. " +
                "If this is meant to be run using the OpenGrid submission " +
                "system, then drmaa needs to be installed via pip or " +
                "easy_install.")
            raise
        except RuntimeError:
            # The drmaa library was not specified, but python-drmaa is
            # installed.
            logger.error(
                "Was able to import drmaa. " +
                "However, the drmaa library could not be found. Please " +
                "either specify the location of libdrmaa.so using the " +
                "DRMAA_LIBRARY_PATH environment variable or disable/remove " +
                "use_drmaa from the config file.")
            raise

        s = drmaa.Session()
        s.initialize()

        ready_processes = []
        for each_arg_pack in block_process_args_gen:
            ready_processes.append((each_arg_pack, s.createJobTemplate()))
            ready_processes[-1][1].jobName = os.path.basename(
                os.path.splitext(cur_module_filepath)
                [0]) + "-" + os.path.basename(
                    os.path.dirname(each_arg_pack[3].split(".h5")[0])
                ) + "-" + os.path.basename(each_arg_pack[3].split(".h5")[0])
            ready_processes[-1][1].remoteCommand = each_arg_pack[0]
            ready_processes[-1][1].args = each_arg_pack[1:-2]
            ready_processes[-1][1].jobEnvironment = os.environ
            ready_processes[-1][1].inputPath = "localhost:" + os.devnull
            ready_processes[-1][
                1].outputPath = "localhost:" + each_arg_pack[-2]
            ready_processes[-1][1].errorPath = "localhost:" + each_arg_pack[-1]
            ready_processes[-1][1].workingDirectory = os.getcwd()
            ready_processes[-1][1].nativeSpecification = "-pe batch " + str(
                num_drmaa_cores)

        running_processes = []
        for each_arg_pack, each_process_template in ready_processes:
            each_process_id = s.runJob(each_process_template)
            running_processes.append(
                (each_arg_pack, each_process_id, each_process_template))
            logger.info("Started new process ( \"" + " ".join(each_arg_pack) +
                        "\" ).")

        start_queue_time = time.time()
        logger.info("Waiting for queued jobs to complete.")

        #finished_processes = []
        for each_arg_pack, each_process_id, each_process_template in running_processes:
            each_process_status = s.wait(each_process_id)

            if not each_process_status.hasExited:
                raise RuntimeError("The process (\"" +
                                   " ".join(each_arg_pack) +
                                   "\") has exited prematurely.")

            logger.info("Finished process ( \"" + " ".join(each_arg_pack) +
                        "\" ).")
            s.deleteJobTemplate(each_process_template)
            #finished_processes.append((each_arg_pack, each_process_id))

        s.exit()

        end_queue_time = time.time()
        diff_queue_time = end_queue_time - start_queue_time

        logger.info("Run time for queued jobs to complete is \"" +
                    str(diff_queue_time) + " s\".")
    else:
        # TODO: Refactor into a separate class (have it return futures somehow)
        #finished_processes = []
        running_processes = []
        pool_tasks_empty = False
        while (not pool_tasks_empty) or len(running_processes):
            while (not pool_tasks_empty) and (len(running_processes) <
                                              num_processes):
                try:
                    each_arg_pack = next(block_process_args_gen)
                    each_arg_pack, each_stdout_filename, each_stderr_filename = each_arg_pack[:-2], each_arg_pack[
                        -2], each_arg_pack[-1]
                    each_process = subprocess.Popen(
                        each_arg_pack,
                        stdout=open(each_stdout_filename, "w"),
                        stderr=open(each_stderr_filename, "w"))

                    running_processes.append((
                        each_arg_pack,
                        each_process,
                    ))

                    logger.info("Started new process ( \"" +
                                " ".join(each_arg_pack) + "\" ).")
                except StopIteration:
                    pool_tasks_empty = True

            while ((not pool_tasks_empty) and
                       (len(running_processes) >= num_processes)) or \
                    (pool_tasks_empty and len(running_processes)):
                time.sleep(1)

                i = 0
                while i < len(running_processes):
                    if running_processes[i][1].poll() is not None:
                        logger.info("Finished process ( \"" +
                                    " ".join(running_processes[i][0]) +
                                    "\" ).")

                        #finished_processes.append(running_processes[i])
                        del running_processes[i]
                    else:
                        time.sleep(1)
                        i += 1

        # finished_processes = None

    start_time = time.time()
    logger.info("Starting merge over all blocks.")

    with h5py.File(output_filename_ext, "a") as output_file_handle:
        output_group = output_file_handle[output_group_name]

        new_neurons_set = segment.get_empty_neuron(shape=tuple(
            original_images_shape_array[1:]),
                                                   dtype=float)

        for i, i_str, (output_filename_block_i,
                       sequential_block_i) in iters.filled_stringify_enumerate(
                           iters.izip(output_filename_block,
                                      original_images_pared_slices.flat)):
            windowed_slice_i = tuple(
                slice(_1, _2, 1) for _1, _2 in [(None, None)] +
                sequential_block_i["windowed_stack_selection"].tolist()[1:])
            window_trimmed_i = tuple(
                slice(_1, _2, 1) for _1, _2 in
                sequential_block_i["windowed_block_selection"].tolist())
            output_filename_block_i = output_filename_block_i.rstrip("/")

            with h5py.File(output_filename_block_i,
                           "r") as each_block_file_handle:
                if "neurons" in each_block_file_handle:
                    neurons_block_i_smaller = hdf5.serializers.read_numpy_structured_array_from_HDF5(
                        each_block_file_handle, "/neurons")

                    neurons_block_i_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum,
                            neurons_block_i_smaller["mask"].astype(float),
                            tuple(
                                iters.irange(
                                    1, neurons_block_i_smaller["mask"].ndim))))

                    if neurons_block_i_windowed_count.shape == tuple():
                        neurons_block_i_windowed_count = numpy.array(
                            [neurons_block_i_windowed_count])

                    neurons_block_i_non_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum, neurons_block_i_smaller["mask"]
                            [window_trimmed_i].astype(float),
                            tuple(
                                iters.irange(
                                    1, neurons_block_i_smaller["mask"].ndim))))

                    if neurons_block_i_non_windowed_count.shape == tuple():
                        neurons_block_i_non_windowed_count = numpy.array(
                            [neurons_block_i_non_windowed_count])

                    if len(neurons_block_i_non_windowed_count):
                        # Find ones that are inside the margins by more than
                        # half
                        neurons_block_i_acceptance = (
                            (neurons_block_i_non_windowed_count /
                             neurons_block_i_windowed_count) > 0.5)

                        logger.info(
                            "Accepted the following neurons %s from block %s."
                            % (str(neurons_block_i_acceptance.nonzero()
                                   [0].tolist()), i_str))

                        # Take a subset of our previous neurons that are within
                        # the margins by half
                        neurons_block_i_accepted = neurons_block_i_smaller[
                            neurons_block_i_acceptance]

                        neurons_block_i = numpy.zeros(
                            neurons_block_i_accepted.shape,
                            dtype=new_neurons_set.dtype)
                        neurons_block_i["mask"][
                            windowed_slice_i] = neurons_block_i_accepted[
                                "mask"]
                        neurons_block_i["contour"][
                            windowed_slice_i] = neurons_block_i_accepted[
                                "contour"]
                        neurons_block_i["image"][
                            windowed_slice_i] = neurons_block_i_accepted[
                                "image"]

                        # Copy other properties
                        neurons_block_i["area"] = neurons_block_i_accepted[
                            "area"]
                        neurons_block_i["max_F"] = neurons_block_i_accepted[
                            "max_F"]
                        neurons_block_i[
                            "gaussian_mean"] = neurons_block_i_accepted[
                                "gaussian_mean"]
                        neurons_block_i[
                            "gaussian_cov"] = neurons_block_i_accepted[
                                "gaussian_cov"]
                        # TODO: Correct centroid to larger block position.
                        neurons_block_i["centroid"] = neurons_block_i_accepted[
                            "centroid"]
                        neurons_block_i["centroid"] += sequential_block_i[
                            "windowed_stack_selection"][1:, 0]

                        array_debug_recorder = hdf5.record.generate_HDF5_array_recorder(
                            output_group,
                            group_name="debug",
                            enable=debug,
                            overwrite_group=False,
                            recorder_constructor=hdf5.record.
                            HDF5EnumeratedArrayRecorder)

                        segment.merge_neuron_sets.recorders.array_debug_recorder = array_debug_recorder
                        new_neurons_set = segment.merge_neuron_sets(
                            new_neurons_set, neurons_block_i,
                            **parameters["generate_neurons"]
                            ["postprocess_data"]["merge_neuron_sets"])
                    else:
                        logger.info(
                            "Accepted the following neurons %s from block %s."
                            % (str([]), i_str))
                else:
                    logger.info(
                        "No neurons accepted as none were found for block"
                        " %s." % i_str)

        hdf5.serializers.create_numpy_structured_array_in_HDF5(output_group,
                                                               "neurons",
                                                               new_neurons_set,
                                                               overwrite=True)

        if "parameters" not in output_group["neurons"].attrs:
            output_group["neurons"].attrs["parameters"] = repr(
                dict(
                    list(parameters.items()) +
                    [("block_shape", block_shape), ("num_blocks", num_blocks),
                     ("half_window_shape", half_window_shape),
                     ("half_border_shape",
                      half_border_shape), ("use_drmaa", use_drmaa),
                     ("num_drmaa_cores", num_drmaa_cores), ("debug", debug)]))

    logger.info("Finished merge over all blocks.")
    end_time = time.time()

    diff_time = end_time - start_time

    logger.info("Run time for merge over all blocks is \"" + str(diff_time) +
                " s\".")

Exemplo n.º 2

0

Exibir arquivo

Arquivo: registerer.py Projeto: DudLab/nanshe

def main(*argv):
    """
        Simple main function (like in C). Takes all arguments (as from
        sys.argv) and returns an exit status.

        Args:
            argv(list):     arguments (includes command line call).

        Returns:
            int:            exit code (0 if success)
    """

    # Only necessary if running main (normally if calling command line). No
    # point in importing otherwise.
    import argparse

    argv = list(argv)

    # Creates command line parser
    parser = argparse.ArgumentParser(
        description="Parses input from the command line " +
                    "for a registration job."
    )

    parser.add_argument("config_filename",
                        metavar="CONFIG_FILE",
                        type=str,
                        help="JSON file that provides configuration options " +
                             "for how to import TIFF(s)."
    )
    parser.add_argument("input_filenames",
                        metavar="INPUT_FILE",
                        type=str,
                        nargs=1,
                        help="HDF5 file to import (this should include a " +
                             "path to where the internal dataset should be " +
                             "stored)."
    )

    parser.add_argument("output_filenames",
                        metavar="OUTPUT_FILE",
                        type=str,
                        nargs=1,
                        help="HDF5 file to export (this should include a " +
                             "path to where the internal dataset should be " +
                             "stored)."
    )

    # Results of parsing arguments
    # (ignore the first one as it is the command line call).
    parsed_args = parser.parse_args(argv[1:])

    # Go ahead and stuff in parameters with the other parsed_args
    parsed_args.parameters = xjson.read_parameters(parsed_args.config_filename)

    parsed_args.input_file_components = []
    for each_input_filename in parsed_args.input_filenames:
        parsed_args.input_file_components.append(
            hdf5.serializers.split_hdf5_path(each_input_filename)
        )

    parsed_args.output_file_components = []
    for each_output_filename in parsed_args.output_filenames:
        parsed_args.output_file_components.append(
            hdf5.serializers.split_hdf5_path(each_output_filename)
        )

    for each_input_filename_components, each_output_filename_components in iters.izip(
            parsed_args.input_file_components, parsed_args.output_file_components):
        with h5py.File(each_input_filename_components[0], "r") as input_file:
            with h5py.File(each_output_filename_components[0], "a") as output_file:
                data = input_file[each_input_filename_components[1]]
                result_filename = registration.register_mean_offsets(
                    data, to_truncate=True, **parsed_args.parameters
                )
                with h5py.File(result_filename, "r") as result_file:
                    result_file.copy(
                        "reg_frames",
                        output_file[os.path.dirname(each_output_filename_components[1])],
                        name=each_output_filename_components[1]
                    )

                    if parsed_args.parameters.get("include_shift", False):
                        result_file.copy(
                            "space_shift",
                            output_file[os.path.dirname(each_output_filename_components[1])],
                            name=each_output_filename_components[1] + "_shift"
                        )

                # Copy all attributes from raw data to the final result.
                output = output_file[
                    each_output_filename_components[1]
                ]
                for each_attr_name in data.attrs:
                    output.attrs[each_attr_name] = data.attrs[each_attr_name]

                # Only remove the directory if our input or output files are
                # not stored there.
                os.remove(result_filename)
                in_out_dirnames = set(
                    os.path.dirname(os.path.abspath(_.filename)) for _ in [
                        input_file, output_file
                    ]
                )
                result_dirname = os.path.dirname(result_filename)
                if result_dirname not in in_out_dirnames:
                    os.rmdir(result_dirname)

    return(0)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: registerer.py Projeto: jakirkham/nanshe

def main(*argv):
    """
        Simple main function (like in C). Takes all arguments (as from
        sys.argv) and returns an exit status.

        Args:
            argv(list):     arguments (includes command line call).

        Returns:
            int:            exit code (0 if success)
    """

    # Only necessary if running main (normally if calling command line). No
    # point in importing otherwise.
    import argparse

    argv = list(argv)

    # Creates command line parser
    parser = argparse.ArgumentParser(
        description="Parses input from the command line " +
        "for a registration job.")

    parser.add_argument("config_filename",
                        metavar="CONFIG_FILE",
                        type=str,
                        help="JSON file that provides configuration options " +
                        "for how to import TIFF(s).")
    parser.add_argument("input_filenames",
                        metavar="INPUT_FILE",
                        type=str,
                        nargs=1,
                        help="HDF5 file to import (this should include a " +
                        "path to where the internal dataset should be " +
                        "stored).")

    parser.add_argument("output_filenames",
                        metavar="OUTPUT_FILE",
                        type=str,
                        nargs=1,
                        help="HDF5 file to export (this should include a " +
                        "path to where the internal dataset should be " +
                        "stored).")

    # Results of parsing arguments
    # (ignore the first one as it is the command line call).
    parsed_args = parser.parse_args(argv[1:])

    # Go ahead and stuff in parameters with the other parsed_args
    parsed_args.parameters = xjson.read_parameters(parsed_args.config_filename)

    parsed_args.input_file_components = []
    for each_input_filename in parsed_args.input_filenames:
        parsed_args.input_file_components.append(
            hdf5.serializers.split_hdf5_path(each_input_filename))

    parsed_args.output_file_components = []
    for each_output_filename in parsed_args.output_filenames:
        parsed_args.output_file_components.append(
            hdf5.serializers.split_hdf5_path(each_output_filename))

    for each_input_filename_components, each_output_filename_components in iters.izip(
            parsed_args.input_file_components,
            parsed_args.output_file_components):
        with h5py.File(each_input_filename_components[0], "r") as input_file:
            with h5py.File(each_output_filename_components[0],
                           "a") as output_file:
                data = input_file[each_input_filename_components[1]]
                result_filename = registration.register_mean_offsets(
                    data, to_truncate=True, **parsed_args.parameters)
                with h5py.File(result_filename, "r") as result_file:
                    result_file.copy("reg_frames",
                                     output_file[os.path.dirname(
                                         each_output_filename_components[1])],
                                     name=each_output_filename_components[1])

                    if parsed_args.parameters.get("include_shift", False):
                        result_file.copy(
                            "space_shift",
                            output_file[os.path.dirname(
                                each_output_filename_components[1])],
                            name=each_output_filename_components[1] + "_shift")

                # Copy all attributes from raw data to the final result.
                output = output_file[each_output_filename_components[1]]
                for each_attr_name in data.attrs:
                    output.attrs[each_attr_name] = data.attrs[each_attr_name]

                # Only remove the directory if our input or output files are
                # not stored there.
                os.remove(result_filename)
                in_out_dirnames = set(
                    os.path.dirname(os.path.abspath(_.filename))
                    for _ in [input_file, output_file])
                result_dirname = os.path.dirname(result_filename)
                if result_dirname not in in_out_dirnames:
                    os.rmdir(result_dirname)

    return (0)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: registration.py Projeto: DudLab/nanshe

def register_mean_offsets(frames2reg,
                          max_iters=-1,
                          block_frame_length=-1,
                          include_shift=False,
                          to_truncate=False,
                          float_type=numpy.dtype(float).type):
    """
        This algorithm registers the given image stack against its mean
        projection. This is done by computing translations needed to put each
        frame in alignment. Then the translation is performed and new
        translations are computed. This is repeated until no further
        improvement can be made.

        The code for translations can be found in find_mean_offsets.

        Notes:
            Adapted from code provided by Wenzhi Sun with speed improvements
            provided by Uri Dubin.

        Args:
            frames2reg(numpy.ndarray):           Image stack to register (time
                                                 is the first dimension uses
                                                 C-order tyx or tzyx).

            max_iters(int):                      Number of iterations to allow
                                                 before forcing termination if
                                                 stable point is not found yet.
                                                 Set to -1 if no limit.
                                                 (Default -1)

            block_frame_length(int):             Number of frames to work with
                                                 at a time. By default all.
                                                 (Default -1)

            include_shift(bool):                 Whether to return the shifts
                                                 used, as well. (Default False)

            to_truncate(bool):                   Whether to truncate the frames
                                                 to remove all masked portions.
                                                 (Default False)

            float_type(type):                    Type of float to use for
                                                 calculation. (Default
                                                 numpy.float64).

        Returns:
            (numpy.ndarray):                     an array containing the
                                                 translations to apply to each
                                                 frame.

        Examples:
            >>> a = numpy.zeros((5, 3, 4)); a[:,0] = 1; a[2,0] = 0; a[2,2] = 1
            >>> a
            array([[[ 1.,  1.,  1.,  1.],
                    [ 0.,  0.,  0.,  0.],
                    [ 0.,  0.,  0.,  0.]],
            <BLANKLINE>
                   [[ 1.,  1.,  1.,  1.],
                    [ 0.,  0.,  0.,  0.],
                    [ 0.,  0.,  0.,  0.]],
            <BLANKLINE>
                   [[ 0.,  0.,  0.,  0.],
                    [ 0.,  0.,  0.,  0.],
                    [ 1.,  1.,  1.,  1.]],
            <BLANKLINE>
                   [[ 1.,  1.,  1.,  1.],
                    [ 0.,  0.,  0.,  0.],
                    [ 0.,  0.,  0.,  0.]],
            <BLANKLINE>
                   [[ 1.,  1.,  1.,  1.],
                    [ 0.,  0.,  0.,  0.],
                    [ 0.,  0.,  0.,  0.]]])

            >>> register_mean_offsets(a, include_shift=True)
            (masked_array(data =
             [[[1.0 1.0 1.0 1.0]
              [0.0 0.0 0.0 0.0]
              [0.0 0.0 0.0 0.0]]
            <BLANKLINE>
             [[1.0 1.0 1.0 1.0]
              [0.0 0.0 0.0 0.0]
              [0.0 0.0 0.0 0.0]]
            <BLANKLINE>
             [[-- -- -- --]
              [0.0 0.0 0.0 0.0]
              [0.0 0.0 0.0 0.0]]
            <BLANKLINE>
             [[1.0 1.0 1.0 1.0]
              [0.0 0.0 0.0 0.0]
              [0.0 0.0 0.0 0.0]]
            <BLANKLINE>
             [[1.0 1.0 1.0 1.0]
              [0.0 0.0 0.0 0.0]
              [0.0 0.0 0.0 0.0]]],
                         mask =
             [[[False False False False]
              [False False False False]
              [False False False False]]
            <BLANKLINE>
             [[False False False False]
              [False False False False]
              [False False False False]]
            <BLANKLINE>
             [[ True  True  True  True]
              [False False False False]
              [False False False False]]
            <BLANKLINE>
             [[False False False False]
              [False False False False]
              [False False False False]]
            <BLANKLINE>
             [[False False False False]
              [False False False False]
              [False False False False]]],
                   fill_value = 0.0)
            , array([[0, 0],
                   [0, 0],
                   [1, 0],
                   [0, 0],
                   [0, 0]]))
    """

    float_type = numpy.dtype(float_type).type

    # Must be of type float and must be at least 32-bit (smallest complex type
    # uses two 32-bit floats).
    assert issubclass(float_type, numpy.floating)
    assert numpy.dtype(float_type).itemsize >= 4

    # Sadly, there is no easier way to map the two types; so, this is it.
    float_complex_mapping = {
        numpy.float32 : numpy.complex64,
        numpy.float64 : numpy.complex128,
        numpy.float128 : numpy.complex256
    }
    complex_type = float_complex_mapping[float_type]

    if block_frame_length == -1:
        block_frame_length = len(frames2reg)

    tempdir_name = ""
    temporaries_filename = ""
    if isinstance(frames2reg, h5py.Dataset):
        tempdir_name, temporaries_filename = os.path.split(
            os.path.abspath(frames2reg.file.filename)
        )

        temporaries_filename = os.path.splitext(temporaries_filename)[0]
        temporaries_filename += "_".join(
            [
                frames2reg.name.replace("/", "_"),
                "temporaries.h5"
            ]
        )
        temporaries_filename = os.path.join(
            tempdir_name,
            temporaries_filename
        )
    elif (block_frame_length != len(frames2reg)):
        tempdir_name = tempfile.mkdtemp()
        temporaries_filename = os.path.join(tempdir_name, "temporaries.h5")

    frames2reg_fft = None
    space_shift = None
    this_space_shift = None
    if tempdir_name:
        temporaries_file = h5py.File(temporaries_filename, "w")

        frames2reg_fft = temporaries_file.create_dataset(
            "frames2reg_fft", shape=frames2reg.shape, dtype=complex_type
        )
        space_shift = temporaries_file.create_dataset(
            "space_shift",
            shape=(len(frames2reg), len(frames2reg.shape)-1),
            dtype=int
        )
        this_space_shift = temporaries_file.create_dataset(
            "this_space_shift",
            shape=space_shift.shape,
            dtype=space_shift.dtype
        )
    else:
        frames2reg_fft = numpy.empty(frames2reg.shape, dtype=complex_type)
        space_shift = numpy.zeros(
            (len(frames2reg), len(frames2reg.shape)-1), dtype=int
        )
        this_space_shift = numpy.empty_like(space_shift)

    for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
        frames2reg_fft[range_ij] = fft.fftn(
            frames2reg[range_ij], axes=range(1, len(frames2reg.shape))
        )

    template_fft = numpy.empty(frames2reg.shape[1:], dtype=complex_type)

    this_space_shift_mean = numpy.empty(
        this_space_shift.shape[1:],
        dtype=this_space_shift.dtype
    )

    # Repeat shift calculation until there is no further adjustment.
    num_iters = 0
    squared_magnitude_delta_space_shift = 1.0
    while (squared_magnitude_delta_space_shift != 0.0):
        squared_magnitude_delta_space_shift = 0.0

        template_fft[:] = 0
        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            template_fft += translate_fourier(
                frames2reg_fft[range_ij] / len(frames2reg),
                space_shift[range_ij]
            ).sum(axis=0)

        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            this_space_shift[range_ij] = find_offsets(
                frames2reg_fft[range_ij], template_fft
            )

        # Remove global shifts.
        this_space_shift_mean[...] = 0
        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            this_space_shift_mean += this_space_shift[range_ij].sum(axis=0)
        this_space_shift_mean[...] = numpy.round(
            this_space_shift_mean.astype(float_type) / len(this_space_shift)
        ).astype(this_space_shift_mean.dtype)
        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            this_space_shift[range_ij] = xnumpy.find_relative_offsets(
                this_space_shift[range_ij],
                this_space_shift_mean
            )

        # Find the shortest roll possible (i.e. if it is going over halfway
        # switch direction so it will go less than half).
        # Note all indices by definition were positive semi-definite and upper
        # bounded by the shape. This change will make them bound by
        # the half shape, but with either sign.
        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            this_space_shift[range_ij] = xnumpy.find_shortest_wraparound(
                this_space_shift[range_ij],
                frames2reg_fft.shape[1:]
            )

        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            delta_space_shift_ij = this_space_shift[range_ij] - \
                                   space_shift[range_ij]
            squared_magnitude_delta_space_shift += numpy.dot(
                delta_space_shift_ij, delta_space_shift_ij.T
            ).sum()

        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            space_shift[range_ij] = this_space_shift[range_ij]

        num_iters += 1
        logger.info(
            "Completed iteration, %i, " %
            num_iters
            + "where the L_2 norm squared of the relative shift was, %f." %
            squared_magnitude_delta_space_shift
        )
        if (max_iters != -1) and (num_iters >= max_iters):
            logger.info("Hit maximum number of iterations.")
            break

    reg_frames_shape = frames2reg.shape
    if to_truncate:
        space_shift_max = numpy.zeros(
            space_shift.shape[1:], dtype=space_shift.dtype
        )
        space_shift_min = numpy.zeros(
            space_shift.shape[1:], dtype=space_shift.dtype
        )
        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            numpy.maximum(
                space_shift_max,
                space_shift[range_ij].max(axis=0),
                out=space_shift_max
            )
            numpy.minimum(
                space_shift_min,
                space_shift[range_ij].min(axis=0),
                out=space_shift_min
            )
        reg_frames_shape = numpy.asarray(reg_frames_shape)
        reg_frames_shape[1:] -= space_shift_max
        reg_frames_shape[1:] += space_shift_min
        reg_frames_shape = tuple(reg_frames_shape)

        space_shift_max = tuple(space_shift_max)
        space_shift_min = space_shift_min.astype(object)
        space_shift_min[space_shift_min == 0] = None
        space_shift_min = tuple(space_shift_min)
        reg_frames_slice = tuple(
            slice(_1, _2) for _1, _2 in iters.izip(
                space_shift_max, space_shift_min
            )
        )

    # Adjust the registered frames using the translations found.
    # Mask rolled values.
    reg_frames = None
    if tempdir_name:
        if to_truncate:
            reg_frames = temporaries_file.create_dataset(
                "reg_frames",
                shape=reg_frames_shape,
                dtype=frames2reg.dtype,
                chunks=True
            )
        else:
            reg_frames = temporaries_file.create_group("reg_frames")
            reg_frames = hdf5.serializers.HDF5MaskedDataset(
                reg_frames, shape=frames2reg.shape, dtype=frames2reg.dtype
            )
    else:
        if to_truncate:
            reg_frames = numpy.empty(reg_frames_shape, dtype=frames2reg.dtype)
        else:
            reg_frames = numpy.ma.empty_like(frames2reg)
            reg_frames.mask = numpy.ma.getmaskarray(reg_frames)
            reg_frames.set_fill_value(reg_frames.dtype.type(0))

    for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
        for k in range_ij:
            if to_truncate:
                reg_frames[k] = xnumpy.roll(
                    frames2reg[k], space_shift[k]
                )[reg_frames_slice]
            else:
                reg_frames[k] = xnumpy.roll(
                    frames2reg[k], space_shift[k], to_mask=True
                )

    result = None
    results_filename = ""
    if tempdir_name:
        result = results_filename
        results_filename = os.path.join(tempdir_name, "results.h5")
        results_file = h5py.File(results_filename, "w")
        if to_truncate:
            temporaries_file.copy(reg_frames.name, results_file)
        else:
            temporaries_file.copy(reg_frames.group, results_file)
        if include_shift:
            temporaries_file.copy(space_shift.name, results_file)
        frames2reg_fft = None
        reg_frames = None
        space_shift = None
        this_space_shift = None
        temporaries_file.close()
        os.remove(temporaries_filename)
        temporaries_filename = ""
        result = results_filename
    else:
        result = reg_frames
        if include_shift:
            result = (reg_frames, space_shift)

    if tempdir_name:
        results_file.close()
        results_file = None

    return(result)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: registration.py Projeto: jakirkham/nanshe

def register_mean_offsets(frames2reg,
                          max_iters=-1,
                          block_frame_length=-1,
                          include_shift=False,
                          to_truncate=False,
                          float_type=numpy.dtype(float).type):
    """
        This algorithm registers the given image stack against its mean
        projection. This is done by computing translations needed to put each
        frame in alignment. Then the translation is performed and new
        translations are computed. This is repeated until no further
        improvement can be made.

        The code for translations can be found in find_mean_offsets.

        Notes:
            Adapted from code provided by Wenzhi Sun with speed improvements
            provided by Uri Dubin.

        Args:
            frames2reg(numpy.ndarray):           Image stack to register (time
                                                 is the first dimension uses
                                                 C-order tyx or tzyx).

            max_iters(int):                      Number of iterations to allow
                                                 before forcing termination if
                                                 stable point is not found yet.
                                                 Set to -1 if no limit.
                                                 (Default -1)

            block_frame_length(int):             Number of frames to work with
                                                 at a time. By default all.
                                                 (Default -1)

            include_shift(bool):                 Whether to return the shifts
                                                 used, as well. (Default False)

            to_truncate(bool):                   Whether to truncate the frames
                                                 to remove all masked portions.
                                                 (Default False)

            float_type(type):                    Type of float to use for
                                                 calculation. (Default
                                                 numpy.float64).

        Returns:
            (numpy.ndarray):                     an array containing the
                                                 translations to apply to each
                                                 frame.

        Examples:
            >>> a = numpy.zeros((5, 3, 4)); a[:,0] = 1; a[2,0] = 0; a[2,2] = 1
            >>> a
            array([[[ 1.,  1.,  1.,  1.],
                    [ 0.,  0.,  0.,  0.],
                    [ 0.,  0.,  0.,  0.]],
            <BLANKLINE>
                   [[ 1.,  1.,  1.,  1.],
                    [ 0.,  0.,  0.,  0.],
                    [ 0.,  0.,  0.,  0.]],
            <BLANKLINE>
                   [[ 0.,  0.,  0.,  0.],
                    [ 0.,  0.,  0.,  0.],
                    [ 1.,  1.,  1.,  1.]],
            <BLANKLINE>
                   [[ 1.,  1.,  1.,  1.],
                    [ 0.,  0.,  0.,  0.],
                    [ 0.,  0.,  0.,  0.]],
            <BLANKLINE>
                   [[ 1.,  1.,  1.,  1.],
                    [ 0.,  0.,  0.,  0.],
                    [ 0.,  0.,  0.,  0.]]])

            >>> register_mean_offsets(a, include_shift=True)
            (masked_array(data =
             [[[1.0 1.0 1.0 1.0]
              [0.0 0.0 0.0 0.0]
              [0.0 0.0 0.0 0.0]]
            <BLANKLINE>
             [[1.0 1.0 1.0 1.0]
              [0.0 0.0 0.0 0.0]
              [0.0 0.0 0.0 0.0]]
            <BLANKLINE>
             [[-- -- -- --]
              [0.0 0.0 0.0 0.0]
              [0.0 0.0 0.0 0.0]]
            <BLANKLINE>
             [[1.0 1.0 1.0 1.0]
              [0.0 0.0 0.0 0.0]
              [0.0 0.0 0.0 0.0]]
            <BLANKLINE>
             [[1.0 1.0 1.0 1.0]
              [0.0 0.0 0.0 0.0]
              [0.0 0.0 0.0 0.0]]],
                         mask =
             [[[False False False False]
              [False False False False]
              [False False False False]]
            <BLANKLINE>
             [[False False False False]
              [False False False False]
              [False False False False]]
            <BLANKLINE>
             [[ True  True  True  True]
              [False False False False]
              [False False False False]]
            <BLANKLINE>
             [[False False False False]
              [False False False False]
              [False False False False]]
            <BLANKLINE>
             [[False False False False]
              [False False False False]
              [False False False False]]],
                   fill_value = 0.0)
            , array([[0, 0],
                   [0, 0],
                   [1, 0],
                   [0, 0],
                   [0, 0]]))
    """

    float_type = numpy.dtype(float_type).type

    # Must be of type float and must be at least 32-bit (smallest complex type
    # uses two 32-bit floats).
    assert issubclass(float_type, numpy.floating)
    assert numpy.dtype(float_type).itemsize >= 4

    # Sadly, there is no easier way to map the two types; so, this is it.
    float_complex_mapping = {
        numpy.float32: numpy.complex64,
        numpy.float64: numpy.complex128,
        numpy.float128: numpy.complex256
    }
    complex_type = float_complex_mapping[float_type]

    if block_frame_length == -1:
        block_frame_length = len(frames2reg)

    tempdir_name = ""
    temporaries_filename = ""
    if isinstance(frames2reg, h5py.Dataset):
        tempdir_name, temporaries_filename = os.path.split(
            os.path.abspath(frames2reg.file.filename))

        temporaries_filename = os.path.splitext(temporaries_filename)[0]
        temporaries_filename += "_".join(
            [frames2reg.name.replace("/", "_"), "temporaries.h5"])
        temporaries_filename = os.path.join(tempdir_name, temporaries_filename)
    elif (block_frame_length != len(frames2reg)):
        tempdir_name = tempfile.mkdtemp()
        temporaries_filename = os.path.join(tempdir_name, "temporaries.h5")

    frames2reg_fft = None
    space_shift = None
    this_space_shift = None
    if tempdir_name:
        temporaries_file = h5py.File(temporaries_filename, "w")

        frames2reg_fft = temporaries_file.create_dataset(
            "frames2reg_fft", shape=frames2reg.shape, dtype=complex_type)
        space_shift = temporaries_file.create_dataset(
            "space_shift",
            shape=(len(frames2reg), len(frames2reg.shape) - 1),
            dtype=int)
        this_space_shift = temporaries_file.create_dataset(
            "this_space_shift",
            shape=space_shift.shape,
            dtype=space_shift.dtype)
    else:
        frames2reg_fft = numpy.empty(frames2reg.shape, dtype=complex_type)
        space_shift = numpy.zeros((len(frames2reg), len(frames2reg.shape) - 1),
                                  dtype=int)
        this_space_shift = numpy.empty_like(space_shift)

    for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
        frames2reg_fft[range_ij] = fft.fftn(frames2reg[range_ij],
                                            axes=range(1,
                                                       len(frames2reg.shape)))

    template_fft = numpy.empty(frames2reg.shape[1:], dtype=complex_type)

    this_space_shift_mean = numpy.empty(this_space_shift.shape[1:],
                                        dtype=this_space_shift.dtype)

    # Repeat shift calculation until there is no further adjustment.
    num_iters = 0
    squared_magnitude_delta_space_shift = 1.0
    while (squared_magnitude_delta_space_shift != 0.0):
        squared_magnitude_delta_space_shift = 0.0

        template_fft[:] = 0
        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            template_fft += translate_fourier(
                frames2reg_fft[range_ij] / len(frames2reg),
                space_shift[range_ij]).sum(axis=0)

        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            this_space_shift[range_ij] = find_offsets(frames2reg_fft[range_ij],
                                                      template_fft)

        # Remove global shifts.
        this_space_shift_mean[...] = 0
        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            this_space_shift_mean += this_space_shift[range_ij].sum(axis=0)
        this_space_shift_mean[...] = numpy.round(
            this_space_shift_mean.astype(float_type) /
            len(this_space_shift)).astype(this_space_shift_mean.dtype)
        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            this_space_shift[range_ij] = xnumpy.find_relative_offsets(
                this_space_shift[range_ij], this_space_shift_mean)

        # Find the shortest roll possible (i.e. if it is going over halfway
        # switch direction so it will go less than half).
        # Note all indices by definition were positive semi-definite and upper
        # bounded by the shape. This change will make them bound by
        # the half shape, but with either sign.
        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            this_space_shift[range_ij] = xnumpy.find_shortest_wraparound(
                this_space_shift[range_ij], frames2reg_fft.shape[1:])

        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            delta_space_shift_ij = this_space_shift[range_ij] - \
                                   space_shift[range_ij]
            squared_magnitude_delta_space_shift += numpy.dot(
                delta_space_shift_ij, delta_space_shift_ij.T).sum()

        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            space_shift[range_ij] = this_space_shift[range_ij]

        num_iters += 1
        logger.info(
            "Completed iteration, %i, " % num_iters +
            "where the L_2 norm squared of the relative shift was, %f." %
            squared_magnitude_delta_space_shift)
        if (max_iters != -1) and (num_iters >= max_iters):
            logger.info("Hit maximum number of iterations.")
            break

    reg_frames_shape = frames2reg.shape
    if to_truncate:
        space_shift_max = numpy.zeros(space_shift.shape[1:],
                                      dtype=space_shift.dtype)
        space_shift_min = numpy.zeros(space_shift.shape[1:],
                                      dtype=space_shift.dtype)
        for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
            numpy.maximum(space_shift_max,
                          space_shift[range_ij].max(axis=0),
                          out=space_shift_max)
            numpy.minimum(space_shift_min,
                          space_shift[range_ij].min(axis=0),
                          out=space_shift_min)
        reg_frames_shape = numpy.asarray(reg_frames_shape)
        reg_frames_shape[1:] -= space_shift_max
        reg_frames_shape[1:] += space_shift_min
        reg_frames_shape = tuple(reg_frames_shape)

        space_shift_max = tuple(space_shift_max)
        space_shift_min = space_shift_min.astype(object)
        space_shift_min[space_shift_min == 0] = None
        space_shift_min = tuple(space_shift_min)
        reg_frames_slice = tuple(
            slice(_1, _2)
            for _1, _2 in iters.izip(space_shift_max, space_shift_min))

    # Adjust the registered frames using the translations found.
    # Mask rolled values.
    reg_frames = None
    if tempdir_name:
        if to_truncate:
            reg_frames = temporaries_file.create_dataset(
                "reg_frames",
                shape=reg_frames_shape,
                dtype=frames2reg.dtype,
                chunks=True)
        else:
            reg_frames = temporaries_file.create_group("reg_frames")
            reg_frames = hdf5.serializers.HDF5MaskedDataset(
                reg_frames, shape=frames2reg.shape, dtype=frames2reg.dtype)
    else:
        if to_truncate:
            reg_frames = numpy.empty(reg_frames_shape, dtype=frames2reg.dtype)
        else:
            reg_frames = numpy.ma.empty_like(frames2reg)
            reg_frames.mask = numpy.ma.getmaskarray(reg_frames)
            reg_frames.set_fill_value(reg_frames.dtype.type(0))

    for range_ij in iters.subrange(0, len(frames2reg), block_frame_length):
        for k in range_ij:
            if to_truncate:
                reg_frames[k] = xnumpy.roll(frames2reg[k],
                                            space_shift[k])[reg_frames_slice]
            else:
                reg_frames[k] = xnumpy.roll(frames2reg[k],
                                            space_shift[k],
                                            to_mask=True)

    result = None
    results_filename = ""
    if tempdir_name:
        result = results_filename
        results_filename = os.path.join(tempdir_name, "results.h5")
        results_file = h5py.File(results_filename, "w")
        if to_truncate:
            temporaries_file.copy(reg_frames.name, results_file)
        else:
            temporaries_file.copy(reg_frames.group, results_file)
        if include_shift:
            temporaries_file.copy(space_shift.name, results_file)
        frames2reg_fft = None
        reg_frames = None
        space_shift = None
        this_space_shift = None
        temporaries_file.close()
        os.remove(temporaries_filename)
        temporaries_filename = ""
        result = results_filename
    else:
        result = reg_frames
        if include_shift:
            result = (reg_frames, space_shift)

    if tempdir_name:
        results_file.close()
        results_file = None

    return (result)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: learner.py Projeto: DudLab/nanshe

def generate_neurons_blocks(input_filename,
                            output_filename,
                            num_processes=multiprocessing.cpu_count(),
                            block_shape=None,
                            num_blocks=None,
                            half_window_shape=None,
                            half_border_shape=None,
                            use_drmaa=False,
                            num_drmaa_cores=16,
                            debug=False,
                            **parameters):
    # TODO: Move function into new module with its own command line interface.
    # TODO: Heavy refactoring required on this function.

    # Extract and validate file extensions.

    # Parse input filename and validate that the name is acceptable
    input_filename_ext, input_dataset_name = hdf5.serializers.split_hdf5_path(input_filename)

    # Parse output filename and validate that the name is acceptable
    output_filename_ext, output_group_name = hdf5.serializers.split_hdf5_path(output_filename)


    # Directory where individual block runs will be stored.
    intermediate_output_dir = output_filename_ext.rsplit(
        os.path.splitext(output_filename_ext)[1], 1)[0] + "_blocks"


    # Read the input data.
    original_images_shape_array = None
    with h5py.File(input_filename_ext, "r") as input_file_handle:
        original_images_shape_array = numpy.array(
            input_file_handle[input_dataset_name].shape
        )

    # Get the amount of the border to slice
    half_border_shape_array = None
    if half_border_shape is None:
        half_border_shape_array = numpy.zeros(
            len(original_images_shape_array), dtype=int
        )
    else:
        assert (len(half_window_shape) == len(original_images_shape_array))

        half_border_shape_array = numpy.array(half_border_shape)

        # Should be of type integer
        assert (issubclass(half_border_shape_array.dtype.type, numpy.integer))

        # Should not cut along temporal portion.
        # Maybe replace with a warning.
        assert (half_border_shape[0] == 0)

    # TODO: Refactor to expanded_numpy.
    # Cuts boundaries from original_images_shape
    original_images_pared_shape_array = original_images_shape_array - \
                                        2*half_border_shape_array

    # At least one of them must be specified. If not some mixture of both.
    assert ((block_shape is not None) or (num_blocks is not None))

    # Size of the block to use by pixels
    block_shape_array = None
    block_shape_array_undefined = None
    if block_shape is None:
        block_shape_array = -numpy.ones(
            original_images_pared_shape_array.shape, dtype=int
        )
        block_shape_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool
        )
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(block_shape))

        block_shape_array = numpy.array(block_shape, dtype=int)

        # Should be of type integer
        assert issubclass(block_shape_array.dtype.type, numpy.integer)

        block_shape_array_undefined = (block_shape_array == -1)

    # Number of
    num_blocks_array = None
    num_blocks_array_undefined = None
    if num_blocks is None:
        num_blocks_array = - \
            numpy.ones(original_images_pared_shape_array.shape, dtype=int)
        num_blocks_array_undefined = numpy.ones(
            original_images_pared_shape_array.shape, dtype=bool)
    else:
        # Should have the same number of values in each
        assert (len(original_images_pared_shape_array) == len(num_blocks))

        num_blocks_array = numpy.array(num_blocks, dtype=int)

        # Should be of type integer
        assert issubclass(num_blocks_array.dtype.type, numpy.integer)

        num_blocks_array_undefined = (num_blocks_array == -1)

    # Want to ensure that both aren't defined.
    assert ~(~block_shape_array_undefined & ~num_blocks_array_undefined).all()

    # If both are undefined, then the block should span that dimension
    missing_both = (block_shape_array_undefined & num_blocks_array_undefined)
    block_shape_array[
        missing_both] = original_images_pared_shape_array[missing_both]
    num_blocks_array[missing_both] = 1
    # Thus, we have resolved these values and can continue.
    block_shape_array_undefined[missing_both] = False
    num_blocks_array_undefined[missing_both] = False

    # Replace undefined values in block_shape_array
    missing_block_shape_array, block_shape_array_remainder = divmod(
        original_images_pared_shape_array[block_shape_array_undefined],
        num_blocks_array[block_shape_array_undefined]
    )
    # Block shape must be well defined.
    assert (block_shape_array_remainder == 0).all()
    missing_block_shape_array = missing_block_shape_array.astype(int)
    block_shape_array[block_shape_array_undefined] = missing_block_shape_array

    # Replace undefined values in num_blocks_array
    missing_num_blocks_array, num_blocks_array_remainder = divmod(
        original_images_pared_shape_array[num_blocks_array_undefined],
        block_shape_array[num_blocks_array_undefined]
    )
    # Allow some blocks to be smaller
    missing_num_blocks_array += (num_blocks_array_remainder != 0).astype(int)
    num_blocks_array[num_blocks_array_undefined] = missing_num_blocks_array
    # Get the overlap window
    half_window_shape_array = None
    if half_window_shape is None:
        half_window_shape_array = block_shape_array / 2.0
    else:
        assert (len(half_window_shape) == len(
            original_images_pared_shape_array))

        half_window_shape_array = numpy.array(half_window_shape)

        assert issubclass(half_window_shape_array.dtype.type, numpy.integer)

    # Want to make our window size is at least as large as the one used for
    # the f0 calculation.
    if "extract_f0" in parameters["generate_neurons"]["preprocess_data"]:
        #assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] == half_window_shape_array[0])
        assert (parameters["generate_neurons"]["preprocess_data"]["extract_f0"]["half_window_size"] <= half_window_shape_array[0])

    # Estimate bounds for each slice. Uses typical python [begin, end) for the
    # indices.
    estimated_bounds = numpy.zeros(
        tuple(num_blocks_array),
        dtype=(int, original_images_pared_shape_array.shape + (2,))
    )

    for each_block_indices in iters.index_generator(*num_blocks_array):
        for each_dim, each_block_dim_index in enumerate(each_block_indices):
            estimated_lower_bound = each_block_dim_index * block_shape_array[each_dim]
            estimated_upper_bound = (each_block_dim_index + 1) * block_shape_array[each_dim]

            estimated_bounds[each_block_indices][each_dim] = numpy.array([
                estimated_lower_bound, estimated_upper_bound
            ])

    original_images_pared_slices = numpy.zeros(
        estimated_bounds.shape[:-2],
        dtype=[("actual", int, estimated_bounds.shape[-2:]),
               ("windowed", int, estimated_bounds.shape[-2:]),
               ("windowed_stack_selection", int, estimated_bounds.shape[-2:]),
               ("windowed_block_selection", int, estimated_bounds.shape[-2:])])

    # Get the slice that is within bounds
    original_images_pared_slices["actual"] = estimated_bounds
    original_images_pared_slices["actual"][..., 0] = numpy.where(
        0 < original_images_pared_slices["actual"][..., 0],
        original_images_pared_slices["actual"][..., 0],
        0
    )
    original_images_pared_slices["actual"][..., 1] = numpy.where(
        original_images_pared_slices["actual"][..., 1] < original_images_pared_shape_array,
        original_images_pared_slices["actual"][..., 1],
        original_images_pared_shape_array
    )

    # Gets the defined half_window_size.
    window_addition = numpy.zeros(estimated_bounds.shape, dtype=int)
    window_addition[..., 0] = -half_window_shape_array
    window_addition[..., 1] = half_window_shape_array

    # Get the slice with a window added.
    original_images_pared_slices[
        "windowed"] = estimated_bounds + window_addition
    original_images_pared_slices["windowed"][..., 0] = numpy.where(
        0 < original_images_pared_slices["windowed"][..., 0],
        original_images_pared_slices["windowed"][..., 0],
        0
    )
    original_images_pared_slices["windowed"][..., 1] = numpy.where(
        original_images_pared_slices["windowed"][..., 1] < original_images_pared_shape_array,
        original_images_pared_slices["windowed"][..., 1],
        original_images_pared_shape_array
    )

    # Get the slice information to get the windowed block from the original
    # image stack.
    original_images_pared_slices["windowed_stack_selection"] = original_images_pared_slices["windowed"]
    original_images_pared_slices["windowed_stack_selection"] += xnumpy.expand_view(
        half_border_shape_array, reps_after=2
    )

    # Get slice information for the portion within
    # `original_images_pared_slices["windowed"]`, which corresponds to
    # `original_images_pared_slices["actual"]`.
    #original_images_pared_slices["windowed_block_selection"][..., 0] = 0
    original_images_pared_slices["windowed_block_selection"][..., 1] = (
        original_images_pared_slices["actual"][..., 1] - original_images_pared_slices["actual"][..., 0]
    )
    original_images_pared_slices["windowed_block_selection"][:] += xnumpy.expand_view(
        original_images_pared_slices["actual"][..., 0] - original_images_pared_slices["windowed"][..., 0],
        reps_after=2
    )

    # Get a directory for intermediate results.
    try:
        os.mkdir(intermediate_output_dir)
    except OSError:
        # If it already exists, that is fine.
        pass

    intermediate_config = intermediate_output_dir + "/" + "config.json"

    # Overwrite the config file always
    with open(intermediate_config, "w") as fid:
        json.dump(
            dict(list(parameters.items()) + list({"debug" : debug}.items())),
            fid,
            indent=4,
            separators=(",", " : ")
        )
        fid.write("\n")

    # Construct an HDF5 file for each block
    input_filename_block = []
    output_filename_block = []
    stdout_filename_block = []
    stderr_filename_block = []
    with h5py.File(output_filename_ext, "a") as output_file_handle:
        # Create a new output directory if doesn't exists.
        output_file_handle.require_group(output_group_name)

        output_group = output_file_handle[output_group_name]

        if "original_images" not in output_group:
            if input_filename_ext == output_filename_ext:
                output_group["original_images"] = h5py.SoftLink(
                    input_dataset_name
                )
            else:
                output_group["original_images"] = h5py.ExternalLink(
                    input_filename_ext,
                    "/" + input_dataset_name
                )

        output_group.require_group("blocks")

        output_group_blocks = output_group["blocks"]

        input_file_handle = None
        try:
            # Skipping using region refs.
            input_file_handle = h5py.File(
                input_filename_ext, "r"
            )
        except IOError:
            # File is already open
            input_file_handle = output_file_handle

        for i, i_str, sequential_block_i in iters.filled_stringify_enumerate(
                original_images_pared_slices.flat
        ):
            intermediate_basename_i = intermediate_output_dir + "/" + i_str

            # Hold redirected stdout and stderr for each subprocess.
            stdout_filename_block.append(
                intermediate_basename_i + os.extsep + "out")
            stderr_filename_block.append(
                intermediate_basename_i + os.extsep + "err")

            # Ensure that the blocks are corrected to deal with trimming of the image stack
            # Must be done after the calculation of
            # original_images_pared_slices["windowed_block_selection"].
            sequential_block_i_windowed = sequential_block_i["windowed_stack_selection"]
            slice_i = tuple(
                slice(_1, _2, 1) for _1, _2 in sequential_block_i_windowed
            )

            if i_str not in output_group_blocks:
                output_group_blocks[i_str] = []
                output_group_blocks[i_str].attrs["filename"] = input_file_handle.filename
                output_group_blocks[i_str].attrs["dataset"] = input_dataset_name
                output_group_blocks[i_str].attrs["slice"] = str(slice_i)

            block_i = output_group_blocks[i_str]

            with h5py.File(intermediate_basename_i + os.extsep + "h5", "a") as each_block_file_handle:
                # Create a soft link to the original images. But use the
                # appropriate type of soft link depending on whether
                # the input and output file are the same.
                if "original_images" not in each_block_file_handle:
                    each_block_file_handle["original_images"] = h5py.ExternalLink(
                        os.path.relpath(
                            block_i.file.filename, intermediate_output_dir
                        ),
                        block_i.name
                    )

                input_filename_block.append(
                    each_block_file_handle.filename + "/" + "original_images"
                )
                output_filename_block.append(
                    each_block_file_handle.filename + "/"
                )

        if input_file_handle != output_file_handle:
            input_file_handle.close()

    cur_module_dirpath = os.path.dirname(os.path.dirname(nanshe.__file__))
    cur_module_filepath = os.path.splitext(os.path.abspath(__file__))[0]
    cur_module_name = os.path.relpath(cur_module_filepath, cur_module_dirpath)
    cur_module_name = cur_module_name.replace(os.path.sep, ".")
    cur_module_filepath += os.extsep + "py"

    import sys

    python = sys.executable

    executable_run = ""
    executable_run += "from sys import argv, path, exit; "

    executable_run += "path[:] = [\"%s\"] + [_ for _ in path if _ != \"%s\"]; " % \
                      (cur_module_dirpath, cur_module_dirpath,)
    executable_run += "from %s import main; exit(main(*argv))" % \
                      (cur_module_name,)

    block_process_args_gen = iters.izip(
        itertools.repeat(python),
        itertools.repeat("-c"),
        itertools.repeat(executable_run),
        itertools.repeat(intermediate_config),
        input_filename_block,
        output_filename_block,
        stdout_filename_block,
        stderr_filename_block
    )

    if use_drmaa:
        # Attempt to import drmaa.
        # If it fails to import, either the user has no intent in using it or
        # forgot to install it. If it imports, but fails to find symbols,
        # then the user has not set DRMAA_LIBRARY_PATH or
        # does not have libdrmaa.so.
        try:
            import drmaa
        except ImportError:
            # python-drmaa is not installed.
            logger.error(
                "Was not able to import drmaa. " +
                "If this is meant to be run using the OpenGrid submission " +
                "system, then drmaa needs to be installed via pip or " +
                "easy_install."
            )
            raise
        except RuntimeError:
            # The drmaa library was not specified, but python-drmaa is
            # installed.
            logger.error(
                "Was able to import drmaa. " +
                "However, the drmaa library could not be found. Please " +
                "either specify the location of libdrmaa.so using the " +
                "DRMAA_LIBRARY_PATH environment variable or disable/remove " +
                "use_drmaa from the config file."
            )
            raise

        s=drmaa.Session()
        s.initialize()

        ready_processes = []
        for each_arg_pack in block_process_args_gen:
            ready_processes.append((each_arg_pack, s.createJobTemplate()))
            ready_processes[-1][1].jobName = os.path.basename(
                os.path.splitext(cur_module_filepath)[0]
            ) + "-" + os.path.basename(
                os.path.dirname(each_arg_pack[3].split(".h5")[0])
            ) + "-" + os.path.basename(each_arg_pack[3].split(".h5")[0])
            ready_processes[-1][1].remoteCommand = each_arg_pack[0]
            ready_processes[-1][1].args = each_arg_pack[1:-2]
            ready_processes[-1][1].jobEnvironment = os.environ
            ready_processes[-1][1].inputPath = "localhost:" + os.devnull
            ready_processes[-1][1].outputPath = "localhost:" + each_arg_pack[-2]
            ready_processes[-1][1].errorPath = "localhost:" + each_arg_pack[-1]
            ready_processes[-1][1].workingDirectory = os.getcwd()
            ready_processes[-1][1].nativeSpecification = "-pe batch " + str(num_drmaa_cores)


        running_processes = []
        for each_arg_pack, each_process_template in ready_processes:
            each_process_id = s.runJob(each_process_template)
            running_processes.append(
                (each_arg_pack, each_process_id, each_process_template)
            )
            logger.info(
                "Started new process ( \"" + " ".join(each_arg_pack) + "\" )."
            )

        start_queue_time = time.time()
        logger.info("Waiting for queued jobs to complete.")

        #finished_processes = []
        for each_arg_pack, each_process_id, each_process_template in running_processes:
            each_process_status = s.wait(each_process_id)

            if not each_process_status.hasExited:
                raise RuntimeError(
                    "The process (\"" + " ".join(each_arg_pack) +
                    "\") has exited prematurely."
                )

            logger.info(
                "Finished process ( \"" + " ".join(each_arg_pack) + "\" )."
            )
            s.deleteJobTemplate(each_process_template)
            #finished_processes.append((each_arg_pack, each_process_id))

        s.exit()

        end_queue_time = time.time()
        diff_queue_time = end_queue_time - start_queue_time

        logger.info(
            "Run time for queued jobs to complete is \""
            + str(diff_queue_time) + " s\"."
        )
    else:
        # TODO: Refactor into a separate class (have it return futures somehow)
        #finished_processes = []
        running_processes = []
        pool_tasks_empty = False
        while (not pool_tasks_empty) or len(running_processes):
            while (not pool_tasks_empty) and (len(running_processes) < num_processes):
                try:
                    each_arg_pack = next(block_process_args_gen)
                    each_arg_pack, each_stdout_filename, each_stderr_filename = each_arg_pack[:-2], each_arg_pack[-2], each_arg_pack[-1]
                    each_process = subprocess.Popen(
                        each_arg_pack,
                        stdout=open(each_stdout_filename, "w"),
                        stderr=open(each_stderr_filename, "w")
                    )

                    running_processes.append((each_arg_pack, each_process,))

                    logger.info(
                        "Started new process ( \"" + " ".join(each_arg_pack) + "\" )."
                    )
                except StopIteration:
                    pool_tasks_empty = True

            while ((not pool_tasks_empty) and
                       (len(running_processes) >= num_processes)) or \
                    (pool_tasks_empty and len(running_processes)):
                time.sleep(1)

                i = 0
                while i < len(running_processes):
                    if running_processes[i][1].poll() is not None:
                        logger.info(
                            "Finished process ( \"" +
                            " ".join(running_processes[i][0]) + "\" )."
                        )

                        #finished_processes.append(running_processes[i])
                        del running_processes[i]
                    else:
                        time.sleep(1)
                        i += 1

        # finished_processes = None

    start_time = time.time()
    logger.info("Starting merge over all blocks.")

    with h5py.File(output_filename_ext, "a") as output_file_handle:
        output_group = output_file_handle[output_group_name]

        new_neurons_set = segment.get_empty_neuron(
            shape=tuple(original_images_shape_array[1:]), dtype=float
        )

        for i, i_str, (output_filename_block_i, sequential_block_i) in iters.filled_stringify_enumerate(
                iters.izip(output_filename_block, original_images_pared_slices.flat)):
            windowed_slice_i = tuple(
                slice(_1, _2, 1) for _1, _2 in [(None, None)] + sequential_block_i["windowed_stack_selection"].tolist()[1:]
            )
            window_trimmed_i = tuple(
                slice(_1, _2, 1) for _1, _2 in sequential_block_i["windowed_block_selection"].tolist()
            )
            output_filename_block_i = output_filename_block_i.rstrip("/")

            with h5py.File(output_filename_block_i, "r") as each_block_file_handle:
                if "neurons" in each_block_file_handle:
                    neurons_block_i_smaller = hdf5.serializers.read_numpy_structured_array_from_HDF5(
                        each_block_file_handle, "/neurons"
                    )

                    neurons_block_i_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum,
                            neurons_block_i_smaller["mask"].astype(float),
                            tuple(iters.irange(1, neurons_block_i_smaller["mask"].ndim))
                        )
                    )

                    if neurons_block_i_windowed_count.shape == tuple():
                        neurons_block_i_windowed_count = numpy.array(
                            [neurons_block_i_windowed_count])

                    neurons_block_i_non_windowed_count = numpy.squeeze(
                        numpy.apply_over_axes(
                            numpy.sum,
                            neurons_block_i_smaller["mask"][window_trimmed_i].astype(float),
                            tuple(iters.irange(1, neurons_block_i_smaller["mask"].ndim))
                        )
                    )

                    if neurons_block_i_non_windowed_count.shape == tuple():
                        neurons_block_i_non_windowed_count = numpy.array(
                            [neurons_block_i_non_windowed_count]
                        )

                    if len(neurons_block_i_non_windowed_count):
                        # Find ones that are inside the margins by more than
                        # half
                        neurons_block_i_acceptance = (
                            (neurons_block_i_non_windowed_count / neurons_block_i_windowed_count) > 0.5
                        )

                        logger.info(
                            "Accepted the following neurons %s from block %s."
                            % (
                                str(neurons_block_i_acceptance.nonzero()[0].tolist()),
                                i_str
                            )
                        )

                        # Take a subset of our previous neurons that are within
                        # the margins by half
                        neurons_block_i_accepted = neurons_block_i_smaller[neurons_block_i_acceptance]

                        neurons_block_i = numpy.zeros(
                            neurons_block_i_accepted.shape, dtype=new_neurons_set.dtype
                        )
                        neurons_block_i["mask"][windowed_slice_i] = neurons_block_i_accepted["mask"]
                        neurons_block_i["contour"][windowed_slice_i] = neurons_block_i_accepted["contour"]
                        neurons_block_i["image"][windowed_slice_i] = neurons_block_i_accepted["image"]

                        # Copy other properties
                        neurons_block_i["area"] = neurons_block_i_accepted["area"]
                        neurons_block_i["max_F"] = neurons_block_i_accepted["max_F"]
                        neurons_block_i["gaussian_mean"] = neurons_block_i_accepted["gaussian_mean"]
                        neurons_block_i["gaussian_cov"] = neurons_block_i_accepted["gaussian_cov"]
                        # TODO: Correct centroid to larger block position.
                        neurons_block_i["centroid"] = neurons_block_i_accepted["centroid"]
                        neurons_block_i["centroid"] += sequential_block_i["windowed_stack_selection"][1:, 0]

                        array_debug_recorder = hdf5.record.generate_HDF5_array_recorder(
                            output_group,
                            group_name="debug",
                            enable=debug,
                            overwrite_group=False,
                            recorder_constructor=hdf5.record.HDF5EnumeratedArrayRecorder
                        )

                        segment.merge_neuron_sets.recorders.array_debug_recorder = array_debug_recorder
                        new_neurons_set = segment.merge_neuron_sets(
                            new_neurons_set,
                            neurons_block_i,
                            **parameters["generate_neurons"]["postprocess_data"]["merge_neuron_sets"]
                        )
                    else:
                        logger.info(
                            "Accepted the following neurons %s from block %s." %
                            (
                                str([]),
                                i_str
                            )
                        )
                else:
                    logger.info(
                        "No neurons accepted as none were found for block"
                        " %s." %
                        i_str
                    )

        hdf5.serializers.create_numpy_structured_array_in_HDF5(
            output_group, "neurons", new_neurons_set, overwrite=True)

        if "parameters" not in output_group["neurons"].attrs:
            output_group["neurons"].attrs["parameters"] = repr(dict(
                list(parameters.items()) +
                [("block_shape", block_shape),
                 ("num_blocks", num_blocks),
                 ("half_window_shape", half_window_shape),
                 ("half_border_shape", half_border_shape),
                 ("use_drmaa", use_drmaa),
                 ("num_drmaa_cores", num_drmaa_cores),
                 ("debug", debug)]
            ))

    logger.info("Finished merge over all blocks.")
    end_time = time.time()

    diff_time = end_time - start_time

    logger.info(
        "Run time for merge over all blocks is \"" + str(diff_time) + " s\"."
    )