Exemplo n.º 1
0
def run_glia_prediction(e3=False):
    log = initialize_logging('glia_prediction',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    # only append to this key if needed (for e.g. different versions, change accordingly in 'axoness_mapping.py')
    pred_key = "glia_probas"

    # Load initial RAG from  Knossos mergelist text file.
    g = nx.read_edgelist(global_params.config.pruned_rag_path,
                         nodetype=np.uint)
    all_sv_ids_in_rag = np.array(list(g.nodes()), dtype=np.uint)

    log.debug('Found {} CCs with a total of {} SVs in inital RAG.'.format(
        nx.number_connected_components(g), g.number_of_nodes()))
    # chunk them
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    multi_params = chunkify(sd.so_dir_paths, global_params.NGPU_TOTAL * 2)
    # get model properties
    if e3 == True:
        model_kwargs = 'get_glia_model_e3'
    else:
        m = get_glia_model()
        model_kwargs = dict(model_path=m._path,
                            normalize_data=m.normalize_data,
                            imposed_batch_size=m.imposed_batch_size,
                            nb_labels=m.nb_labels,
                            channels_to_load=m.channels_to_load)
    # all other kwargs like obj_type='sv' and version are the current SV SegmentationDataset by default
    so_kwargs = dict(working_dir=global_params.config.working_dir)
    # for glia views set woglia to False (because glia are included),
    #  raw_only to True
    pred_kwargs = dict(woglia=False,
                       pred_key=pred_key,
                       verbose=False,
                       raw_only=True)

    multi_params = [[par, model_kwargs, so_kwargs, pred_kwargs]
                    for par in multi_params]
    if e3 is True:
        # TODO: using two GPUs on a single node seems to be error-prone
        #  -> wb13 froze when processing example_cube=2
        n_cores = global_params.NCORES_PER_NODE // global_params.NGPUS_PER_NODE
        if 'example_cube' in global_params.config.working_dir:
            n_cores = global_params.NCORES_PER_NODE  # do not run two predictions in parallel
        qu.QSUB_script(multi_params,
                       "predict_sv_views_chunked_e3",
                       log=log,
                       n_max_co_processes=global_params.NGPU_TOTAL,
                       script_folder=None,
                       n_cores=n_cores,
                       suffix="_glia",
                       additional_flags="--gres=gpu:1",
                       remove_jobfolder=True)
    else:
        # randomly assign to gpu 0 or 1
        for par in multi_params:
            mk = par[1]
            # GPUs are made available for every job via slurm,
            # no need for random assignments: np.random.rand(0, 2)
            mk["init_gpu"] = 0
        _ = qu.QSUB_script(multi_params,
                           "predict_sv_views_chunked",
                           log=log,
                           n_max_co_processes=global_params.NGPU_TOTAL,
                           n_cores=global_params.NCORES_PER_NODE //
                           global_params.NGPUS_PER_NODE,
                           suffix="_glia",
                           additional_flags="--gres=gpu:1",
                           remove_jobfolder=True)
    log.info('Finished glia prediction. Checking completeness.')
    res = find_missing_sv_views(sd,
                                woglia=False,
                                n_cores=global_params.NCORES_PER_NODE)
    missing_not_contained_in_rag = []
    missing_contained_in_rag = []
    for el in res:
        if el not in all_sv_ids_in_rag:
            missing_not_contained_in_rag.append(
                el)  # TODO: decide whether to use or not
        else:
            missing_contained_in_rag.append(el)
    if len(missing_contained_in_rag) != 0:
        msg = "Not all SVs were predicted! {}/{} missing:\n" \
              "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag),
                          missing_contained_in_rag[:100])
        log.error(msg)
        raise ValueError(msg)
    else:
        log.info('Success.')
Exemplo n.º 2
0
def run_glia_rendering(max_n_jobs=None):
    """
    Uses the pruned RAG (stored as edge list .bz2 file) which is computed
     in `init_cell_subcell_sds`.

    Parameters
    ----------
    max_n_jobs :

    Returns
    -------

    """
    if max_n_jobs is None:
        max_n_jobs = global_params.NGPU_TOTAL * 4 if global_params.PYOPENGL_PLATFORM == 'egl' \
            else global_params.NCORE_TOTAL * 4
    log = initialize_logging('glia_view_rendering',
                             global_params.config.working_dir + '/logs/',
                             overwrite=True)
    log.info("Preparing RAG.")
    np.random.seed(0)

    # view rendering prior to glia removal, choose SSD accordingly
    # glia removal is based on the initial RAG and does not require explicitly stored SSVs
    # TODO: refactor how splits are stored, currently those are stored at ssv_tmp
    version = "tmp"

    G = nx.read_edgelist(global_params.config.pruned_rag_path,
                         nodetype=np.uint)

    cc_gs = sorted(list(nx.connected_component_subgraphs(G)),
                   key=len,
                   reverse=True)
    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)

    # generate parameter for view rendering of individual SSV
    # TODO: remove SVs below minimum size (-> global_params.min_cc_size_ssv)
    sds = SegmentationDataset("sv",
                              working_dir=global_params.config.working_dir)
    sv_size_dict = {}
    bbs = sds.load_cached_data('bounding_box') * sds.scaling
    for ii in range(len(sds.ids)):
        sv_size_dict[sds.ids[ii]] = bbs[ii]
    ccsize_dict = create_ccsize_dict(cc_gs,
                                     sv_size_dict,
                                     is_connected_components=True)

    multi_params = cc_gs
    big_ssv = []
    small_ssv = []
    for g in multi_params:
        if g.number_of_nodes() > RENDERING_MAX_NB_SV:
            big_ssv.append(g)
        elif ccsize_dict[list(g.nodes())[0]] < global_params.min_cc_size_ssv:
            pass  # ignore this CC
        else:
            small_ssv.append(g)

    log.info("View rendering for glia separation started.")
    # # identify huge SSVs and process them on the entire cluster
    if len(big_ssv) > 0:
        n_threads = 2
        log.info("Processing {} huge SSVs in {} threads on the entire cluster"
                 ".".format(len(big_ssv), n_threads))
        q_in = Queue()
        q_out = Queue()
        for kk, g in enumerate(big_ssv):
            q_in.put((kk, g, version))
        for _ in range(n_threads):
            q_in.put(-1)
        ps = [
            Process(target=_run_huge_ssv_render_worker, args=(q_in, q_out))
            for _ in range(n_threads)
        ]
        for p in ps:
            p.start()
            time.sleep(0.5)
        q_in.close()
        q_in.join_thread()
        for p in ps:
            p.join()
        if q_out.qsize() != len(big_ssv):
            raise ValueError(
                'Not all `_run_huge_ssv_render_worker` jobs completed successfully.'
            )
    # render small SSV without overhead and single cpus on whole cluster
    multi_params = small_ssv
    np.random.shuffle(multi_params)
    multi_params = chunkify(multi_params, max_n_jobs)
    # list of SSV IDs and SSD parameters need to be given to a single QSUB job
    multi_params = [(ixs, global_params.config.working_dir, version)
                    for ixs in multi_params]
    _ = qu.QSUB_script(multi_params,
                       "render_views_glia_removal",
                       log=log,
                       n_max_co_processes=global_params.NGPU_TOTAL,
                       n_cores=global_params.NCORES_PER_NODE //
                       global_params.NGPUS_PER_NODE,
                       additional_flags="--gres=gpu:1",
                       remove_jobfolder=True)

    # check completeness
    log.info(
        'Finished view rendering for glia separation. Checking completeness.')
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    res = find_missing_sv_views(sd,
                                woglia=False,
                                n_cores=global_params.NCORES_PER_NODE)
    missing_not_contained_in_rag = []
    missing_contained_in_rag = []
    for el in res:
        if el not in all_sv_ids_in_rag:
            missing_not_contained_in_rag.append(
                el)  # TODO: decide whether to use or not
        else:
            missing_contained_in_rag.append(el)
    if len(missing_contained_in_rag) != 0:
        msg = "Not all SVs were rendered completely! {}/{} missing:\n" \
              "{}".format(len(missing_contained_in_rag), len(all_sv_ids_in_rag),
                          missing_contained_in_rag[:100])
        log.error(msg)
        raise ValueError(msg)
    else:
        log.info('All SVs now contain views required for glia prediction.')
Exemplo n.º 3
0
def run_glia_rendering():
    log = initialize_logging('glia_view_rendering',
                             global_params.config.working_dir + '/logs/',
                             overwrite=False)
    np.random.seed(0)

    # view rendering prior to glia removal, choose SSD accordingly
    version = "tmp"  # glia removal is based on the initial RAG and does not require explicitly stored SSVs

    G = nx.Graph()  # TODO: Add factory method for initial RAG
    with open(global_params.config.init_rag_path, 'r') as f:
        for l in f.readlines():
            edges = [int(v) for v in re.findall('(\d+)', l)]
            G.add_edge(edges[0], edges[1])

    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)
    log.info("Found {} SVs in initial RAG.".format(len(all_sv_ids_in_rag)))

    # add single SV connected components to initial graph
    sd = SegmentationDataset(obj_type='sv',
                             working_dir=global_params.config.working_dir)
    sv_ids = sd.ids
    diff = np.array(list(set(sv_ids).difference(set(all_sv_ids_in_rag))))
    log.info('Found {} single connected component SVs which were missing'
             ' in initial RAG.'.format(len(diff)))

    for ix in diff:
        G.add_node(ix)

    all_sv_ids_in_rag = np.array(list(G.nodes()), dtype=np.uint)
    log.info("Found {} SVs in initial RAG after adding size-one connected "
             "components. Writing kml text file".format(
                 len(all_sv_ids_in_rag)))

    # write out readable format for 'glia_prediction.py'
    ccs = [[n for n in cc] for cc in nx.connected_component_subgraphs(G)]
    kml = knossos_ml_from_ccs([np.sort(cc)[0] for cc in ccs], ccs)
    with open(global_params.config.working_dir + "initial_rag.txt", 'w') as f:
        f.write(kml)

    # generate parameter for view rendering of individual SSV
    log.info("Starting view rendering.")
    multi_params = []
    for cc in nx.connected_component_subgraphs(G):
        multi_params.append(cc)
    multi_params = np.array(multi_params)

    # identify huge SSVs and process them individually on whole cluster
    nb_svs = np.array([g.number_of_nodes() for g in multi_params])
    big_ssv = multi_params[nb_svs > RENDERING_MAX_NB_SV]

    for kk, g in enumerate(big_ssv[::-1]):
        # Create SSV object
        sv_ixs = np.sort(list(g.nodes()))
        log.info("Processing SSV [{}/{}] with {} SVs on whole cluster.".format(
            kk + 1, len(big_ssv), len(sv_ixs)))
        sso = SuperSegmentationObject(
            sv_ixs[0],
            working_dir=global_params.config.working_dir,
            version=version,
            create=False,
            sv_ids=sv_ixs)
        # nodes of sso._rag need to be SV
        new_G = nx.Graph()
        for e in g.edges():
            new_G.add_edge(sso.get_seg_obj("sv", e[0]),
                           sso.get_seg_obj("sv", e[1]))
        sso._rag = new_G
        sso.render_views(add_cellobjects=False,
                         cellobjects_only=False,
                         skip_indexviews=True,
                         woglia=False,
                         qsub_pe="openmp",
                         overwrite=True,
                         qsub_co_jobs=global_params.NCORE_TOTAL)

    # render small SSV without overhead and single cpus on whole cluster
    multi_params = multi_params[nb_svs <= RENDERING_MAX_NB_SV]
    np.random.shuffle(multi_params)
    multi_params = chunkify(multi_params, 2000)

    # list of SSV IDs and SSD parameters need to be given to a single QSUB job
    multi_params = [(ixs, global_params.config.working_dir, version)
                    for ixs in multi_params]
    path_to_out = qu.QSUB_script(multi_params,
                                 "render_views_glia_removal",
                                 n_max_co_processes=global_params.NCORE_TOTAL,
                                 pe="openmp",
                                 queue=None,
                                 script_folder=None,
                                 suffix="")

    # check completeness
    sd = SegmentationDataset("sv",
                             working_dir=global_params.config.working_dir)
    res = find_missing_sv_views(sd, woglia=False, n_cores=10)
    missing_not_contained_in_rag = []
    missing_contained_in_rag = []
    for el in res:
        if el not in all_sv_ids_in_rag:
            missing_not_contained_in_rag.append(el)
        else:
            missing_contained_in_rag.append(el)
    if len(missing_not_contained_in_rag):
        log.info("%d SVs were not rendered but also not part of the initial"
                 "RAG: {}".format(missing_not_contained_in_rag))
    if len(missing_contained_in_rag) != 0:
        msg = "Not all SSVs were rendered completely! Missing:\n" \
              "{}".format(missing_contained_in_rag)
        log.error(msg)
        raise RuntimeError(msg)