def make_new_fragment_segment_assignment():
    path1 = '/g/kreshuk/data/arendt/platyneris_v1/data.n5'
    k1 = 'volumes/paintera/proofread_cells_multiset/data/s0'
    path2 = './data.n5'
    k2 = 'volumes/segmentation2/s0'

    tmp_folder = './tmp_subdivision_labels'
    config_folder = os.path.join(tmp_folder, 'configs')
    set_default_qos('high')
    write_default_global_config(config_folder)

    target = 'slurm'
    max_jobs = 200
    assignments = node_labels(path1, k1, path2, k2, 'new-frag-seg', tmp_folder,
                              target, max_jobs)
    assert assignments.ndim == 1
    assignments = np.concatenate([
        np.arange(len(assignments), dtype='uint64')[:, None], assignments[:,
                                                                          None]
    ],
                                 axis=1)

    with z5py.File(path2) as f:
        f.create_dataset('node_labels/fragment-segment-assignment2',
                         data=assignments,
                         compression='gzip',
                         chunks=assignments.shape)
Exemplo n.º 2
0
def cell_segmentation_workflow(path,
                               aff_path,
                               mask_path,
                               mask_key,
                               region_path,
                               region_key,
                               use_curated_affs,
                               use_lmc,
                               tmp_folder,
                               target,
                               max_jobs,
                               roi_begin=None,
                               roi_end=None):
    # number of jobs and threads for target
    assert target in ('slurm', 'local')
    if target == 'local':
        max_jobs_mc = 1
        max_threads = 16
    else:
        max_jobs_mc = 15
        max_threads = 8

    config_dir = os.path.join(tmp_folder, 'configs')
    write_default_global_config(config_dir, roi_begin, roi_end)

    run_watershed(path, aff_path, use_curated_affs, mask_path, mask_key,
                  tmp_folder, target, max_jobs)
    if use_lmc:
        run_lmc(path, aff_path, use_curated_affs, region_path, region_key,
                tmp_folder, target, max_threads, max_jobs, max_jobs_mc)
    else:
        run_mc(path, aff_path, use_curated_affs, tmp_folder, target,
               max_threads, max_jobs, max_jobs_mc)

    # postprocessing:
    # 1.) compute sizes for size threshold
    run_morphology(path, use_curated_affs, use_lmc, tmp_folder, target,
                   max_jobs)

    identifier = 'result'
    # we unmerge only if we also use lmc, because this takes nuclei into account
    if use_lmc:
        # 2.) unmerge cells with more than one assigned nucleus
        unmerge_nuclei(path, use_curated_affs, tmp_folder, target, max_jobs,
                       max_threads)
        identifier = 'filtered_unmerge'

    # 3.) filter sizes with graph watershed
    filter_size(path, use_curated_affs, use_lmc, identifier, target,
                tmp_folder, max_jobs, max_threads)
def downscale(path, in_key, out_key, tmp_folder, max_jobs, target, n_scales=5):
    task = DownscalingWorkflow

    config_folder = os.path.join(tmp_folder, 'configs')
    write_default_global_config(config_folder)
    configs = task.get_config()

    config = configs['downscaling']
    config.update({
        'mem_limit': 8,
        'time_limit': 120,
        'library_kwargs': {
            'order': 0
        }
    })
    with open(os.path.join(config_folder, 'downscaling.config'), 'w') as f:
        json.dump(config, f)

    scale_factors = [[2, 2, 2]] * n_scales
    halos = [[0, 0, 0]] * n_scales

    t = task(tmp_folder=tmp_folder,
             config_dir=config_folder,
             target=target,
             max_jobs=max_jobs,
             input_path=path,
             input_key=in_key,
             output_key_prefix=out_key,
             scale_factors=scale_factors,
             halos=halos,
             metadata_format='paintera')
    ret = luigi.build([t], local_scheduler=True)
    if not ret:
        raise RuntimeError("Downscaling the segmentation failed")

    with z5py.File(path, 'r') as f:
        ds = f[in_key]
        max_id = ds.attrs['maxId']

    for scale in range(n_scales + 1):
        scale_key = '%s/s%i' % (out_key, scale)
        add_max_id(path, scale_key, max_id=max_id)
def compute_baseline_tables(version, target, max_jobs):
    path = BASELINE_ROOT
    folder = os.path.join(ROOT, version, 'images', 'local')
    for name in BASELINE_NAMES:
        key = 'volumes/cells/%s/filtered_size' % name
        out_path = '%s.csv' % name

        if os.path.exists(out_path):
            continue

        tmp_folder = './tmp_regions_%s' % name
        config_folder = os.path.join(tmp_folder, 'configs')
        write_default_global_config(config_folder)
        label_ids = get_label_ids(path, key)
        region_attributes(path,
                          out_path,
                          folder,
                          label_ids,
                          tmp_folder,
                          target=target,
                          max_jobs=max_jobs,
                          key_seg=key)
Exemplo n.º 5
0
def make_proofreading_project(project_folder, tmp_folder, assignments,
                              block_labels, block_roi, target, max_jobs):

    if len(block_labels) == 0:
        return
    # don't do anything if we have a paintera project already
    if os.path.exists(os.path.join(project_folder, 'attributes.json')):
        return

    os.makedirs(project_folder, exist_ok=True)
    config_dir = os.path.join(tmp_folder, 'configs')

    roi_begin, roi_end = block_roi
    write_default_global_config(config_dir, roi_begin, roi_end)
    with open(os.path.join(config_dir, 'global.config'), 'r') as f:
        block_shape = json.load(f)['block_shape']

    data_path = os.path.join(project_folder, 'data.n5')
    f = z5py.File(data_path)
    f.require_group('volumes')

    # make a link to the raw data
    raw_out_key = 'volumes/raw'
    if raw_out_key not in f:
        print("Make raw symlink")
        raw_in = os.path.join(RAW_PATH, RAW_KEY)
        raw_out = os.path.join(data_path, raw_out_key)
        os.symlink(raw_in, raw_out)

    # get the relevant fragment segment assignments for this block
    print("Get assignment mask")
    assignment_mask = np.isin(assignments[:, 1], block_labels)
    assert assignment_mask.sum() > 0
    block_assignments = assignments[assignment_mask]
    assert block_assignments.shape[0] == assignment_mask.sum()
    assert block_assignments.shape[1] == 2
    print("Sub assignments have the shape:", block_assignments.shape)

    # copy the relevant part of the fragment segment assignment
    print("Copy the assignments")
    g_out = f.require_group('volumes/paintera')
    save_assignments = block_assignments.T
    ds_ass = g_out.require_dataset('fragment-segment-assignment',
                                   shape=save_assignments.shape,
                                   chunks=save_assignments.shape,
                                   compression='gzip',
                                   dtype='uint64')
    ds_ass[:] = save_assignments

    # copy the relevant parts of the watersheds
    print("Copy the watersheds")
    ws_ids = block_assignments[:, 0]
    copy_watersheds(PAINTERA_PATH, os.path.join(PAINTERA_KEY,
                                                'data/s0'), data_path,
                    'volumes/watershed', ws_ids, tmp_folder, target, max_jobs)

    # make the paintera data
    res = [0.025, 0.01, 0.01]
    restrict_sets = [-1, -1, 5, 4, 4, 3, 3, 1]
    print("Make new paintera data")
    set_default_roi(roi_begin, roi_end)
    set_default_block_shape(block_shape)
    convert_to_paintera_format(data_path,
                               raw_out_key,
                               'volumes/watershed',
                               'volumes/paintera',
                               label_scale=1,
                               resolution=res,
                               tmp_folder=tmp_folder,
                               target=target,
                               max_jobs=max_jobs,
                               max_threads=16,
                               convert_to_label_multisets=True,
                               restrict_sets=restrict_sets)
def export_selected_projects(projects, rois_to_blocks, target, max_jobs):
    """ Export only selected projects and fill in the rest with the
    old global paintera project. This means we need to keep ids consistent
    between projects.
    """
    project_folders = [
        os.path.join(PROJECT_ROOT, 'project%02i' % project_id)
        for project_id in projects
    ]
    assert all(os.path.exists(pfolder) for pfolder in project_folders)

    tmp_folder = './tmp_export'
    tmp_path = os.path.join(tmp_folder, 'data.n5')

    #
    # load the original paintera data
    #

    # copy the watershed segmentation
    ws_in_key = os.path.join(PAINTERA_KEY, 'data', 's0')
    ws_out_key = 'volumes/watershed'
    copy_watersheds(PAINTERA_PATH, ws_in_key, tmp_path, ws_out_key, None,
                    tmp_folder, target, max_jobs)
    with z5py.File(tmp_path, 'r') as f:
        max_id = f[ws_out_key].attrs['maxId']

    # load the fragment segment assignments
    ass_key = os.path.join(PAINTERA_KEY, 'fragment-segment-assignment')
    with z5py.File(PAINTERA_PATH, 'r') as f:
        assignments = f[ass_key][:].T

    #
    # load corrections from the projects and insert them
    #

    for project_folder in project_folders:
        proj_id = int(project_folder[-2:])
        tmp_project = os.path.join(tmp_folder, 'tmp_proj%i' % proj_id)
        project_path = os.path.join(project_folder, 'data.n5')
        project_in_root = 'volumes/paintera'
        project_in_key = os.path.join(project_in_root, 'data', 's0')

        # set the bounding box for this project
        config_dir = os.path.join(tmp_project, 'configs')
        rb, re = rois_to_blocks[proj_id]
        set_default_block_shape([50, 512, 512])
        write_default_global_config(config_dir, rb, re)

        # copy this watersheds, offsetting everything with the current max id
        copy_watersheds(project_path,
                        project_in_key,
                        tmp_path,
                        ws_out_key,
                        None,
                        tmp_project,
                        target,
                        max_jobs,
                        offset=max_id,
                        insert_mode=True)

        # update the fragment segment assignment
        project_ass_key = os.path.join(project_in_root,
                                       'fragment-segment-assignment')
        with z5py.File(project_path, 'r') as f:
            this_assignments = f[project_ass_key][:].T
        # offset the assignments
        this_assignments += max_id
        assignments = np.concatenate([assignments, this_assignments], axis=0)

        # update the max id
        max_id = int(assignments.max())

    # write the new segmentation
    seg_out_key = 'volumes/segmentation2/s0'
    serialize_segmentation(tmp_path, ws_out_key, TMP_PATH, seg_out_key,
                           assignments, tmp_folder, target, max_jobs)
def make_root_seg(tmp_folder, target, max_jobs):
    in_path = SEG_PATH
    in_key = SEG_KEY + '/s0'
    ws_path = PAINTERA_PATH
    ws_key = PAINTERA_KEY + "/data/s0"
    out_path = TMP_PATH
    out_key = 'volumes/segmentation'
    assignment_out_key = 'node_labels/fragment_segment_assignment'

    config_dir = os.path.join(tmp_folder, 'configs')
    write_default_global_config(config_dir)
    tmp_path = os.path.join(tmp_folder, 'data.n5')

    # get the current fragment segment assignment
    assignments = node_labels(ws_path,
                              ws_key,
                              in_path,
                              in_key,
                              'rootseg',
                              tmp_folder,
                              target=target,
                              max_jobs=max_jobs,
                              max_overlap=True,
                              ignore_label=None)

    # find the unique ids of the watersheds
    unique_key = 'uniques'
    find_uniques(ws_path, ws_key, tmp_path, unique_key, tmp_folder, config_dir,
                 max_jobs, target)

    with z5py.File(tmp_path, 'r') as f:
        ds = f[unique_key]
        ws_ids = ds[:]

    # convert to paintera fragment segment assignments
    id_offset = int(ws_ids.max()) + 1
    # print("Max ws id:", id_offset)
    # print("Ws  len  :", ws_ids.shape)
    # print("Ass len  :", assignments.shape)
    # print(ws_ids[-10:])
    assignments = assignments[ws_ids]
    assignments = vigra.analysis.relabelConsecutive(assignments,
                                                    start_label=id_offset,
                                                    keep_zeros=True)[0]
    assert len(assignments) == len(ws_ids), "%i, %i" % (len(assignments),
                                                        len(ws_ids))
    paintera_assignments = np.concatenate(
        [ws_ids[:, None], assignments[:, None]], axis=1).T

    assignment_tmp_key = 'tmp_assignments'
    with z5py.File(tmp_path) as f:
        ds = f.require_dataset(assignment_tmp_key,
                               shape=paintera_assignments.shape,
                               compression='gzip',
                               chunks=paintera_assignments.shape,
                               dtype='uint64')
        ds[:] = paintera_assignments

    # make and serialize new assignments
    print("Serializing assignments ...")
    serialize_assignments(tmp_folder,
                          tmp_path,
                          assignment_tmp_key,
                          tmp_path,
                          unique_key,
                          out_path,
                          assignment_out_key,
                          locked_segments=None,
                          relabel_output=False,
                          map_to_background=None)

    # write the new segmentation
    print("Serializing new segmentation ...")
    serialize_merged_segmentation(ws_path, ws_key, out_path, out_key, out_path,
                                  assignment_out_key, tmp_folder, max_jobs,
                                  target)
def compute_connected_components(ws_path,
                                 ws_key,
                                 seg_path,
                                 seg_key,
                                 out_path,
                                 node_label_key,
                                 cc_key,
                                 tmp_folder,
                                 target,
                                 max_jobs,
                                 graph_key='graph',
                                 ignore_label=True):

    config_folder = os.path.join(tmp_folder, 'configs')
    write_default_global_config(config_folder)

    #
    # compute the graph
    #
    task = GraphWorkflow
    configs = task.get_config()
    conf = configs['initial_sub_graphs']
    conf.update({'ignore_label': ignore_label})

    with open(os.path.join(config_folder, 'inital_sub_graphs.config'),
              'w') as f:
        json.dump(conf, f)

    n_threads = 8
    task_names = ['merge_sub_graphs', 'map_edge_ids']
    for tt in task_names:
        conf = configs['map_edge_ids']
        conf.update({'threads_per_job': n_threads, 'mem_limit': 128})
        with open(os.path.join(config_folder, '%s.config' % tt), 'w') as f:
            json.dump(conf, f)

    t = task(tmp_folder=tmp_folder,
             max_jobs=max_jobs,
             config_dir=config_folder,
             target=target,
             input_path=ws_path,
             input_key=ws_key,
             graph_path=out_path,
             output_key=graph_key)

    ret = luigi.build([t], local_scheduler=True)
    assert ret, "Graph computation failed"

    #
    # compute the node labels
    #
    task = NodeLabelWorkflow

    # configs = task.get_config()

    t = task(tmp_folder=tmp_folder,
             max_jobs=max_jobs,
             target=target,
             config_dir=config_folder,
             ws_path=ws_path,
             ws_key=ws_key,
             input_path=seg_path,
             input_key=seg_key,
             output_path=out_path,
             output_key=node_label_key,
             ignore_label=0 if ignore_label else None)
    ret = luigi.build([t], local_scheduler=True)
    assert ret, "Node label computation failed"

    with z5py.File(out_path, 'r') as f:
        node_labels = f[node_label_key][:]

    #
    # load the graph and check for connected components
    #
    ccs = _cc_nifty(out_path, graph_key, node_labels, ignore_label)
    return node_labels, ccs