示例#1
0
文件: merge.py 项目: tomka/fafbseg-py
def merge_into_catmaid(x,
                       target_instance,
                       tag,
                       min_node_overlap=4,
                       min_overlap_size=1,
                       merge_limit=1,
                       min_upload_size=0,
                       min_upload_nodes=1,
                       update_radii=True,
                       import_tags=False,
                       label_joins=True,
                       sid_from_nodes=True,
                       mesh=None):
    """Merge neuron into target CATMAID instance.

    This function will attempt to:

        1. Find fragments in ``target_instance`` that overlap with ``x``
           using whatever segmentation data source you have set using
           ``fafbseg.use_...``.
        2. Generate a union of these fragments and ``x``.
        3. Make a differential upload of the union leaving existing nodes
           untouched.
        4. Join uploaded and existing tracings into a single continuous
           neuron. This will also upload connectors but no node tags.

    Parameters
    ----------
    x :                 pymaid.CatmaidNeuron/List | navis.TreeNeuron/List
                        Neuron(s)/fragment(s) to commit to ``target_instance``.
    target_instance :   pymaid.CatmaidInstance
                        Target Catmaid instance to commit the neuron to.
    tag :               str
                        A tag to be added as part of a ``{URL} upload {tag}``
                        annotation. This should be something identifying your
                        group - e.g. ``tag='WTCam'`` for the Cambridge Wellcome
                        Trust group.
    min_node_overlap :  int, optional
                        Minimal overlap between `x` and a potentially
                        overlapping neuron in ``target_instance``. If
                        the fragment has less total nodes than `min_overlap`,
                        the threshold will be lowered to:
                        ``min_overlap = min(min_overlap, fragment.n_nodes)``
    min_overlap_size :  int, optional
                        Minimum node count for potentially overlapping neurons
                        in ``target_instance``. Use this to e.g. exclude
                        single-node synapse orphans.
    merge_limit :       int, optional
                        Distance threshold [um] for collapsing nodes of ``x``
                        into overlapping fragments in target instance. Decreasing
                        this will help if your neuron has complicated branching
                        patterns (e.g. uPN dendrites) at the cost of potentially
                        creating duplicate parallel tracings in the neuron's
                        backbone.
    min_upload_size :   float, optional
                        Minimum size in microns for upload of new branches:
                        branches found in ``x`` but not in the overlapping
                        neuron(s) in ``target_instance`` are uploaded in
                        fragments. Use this parameter to exclude small branches
                        that might not be worth the additional review time.
    min_upload_nodes :  int, optional
                        As ``min_upload_size`` but for number of nodes instead
                        of cable length.
    update_radii :      bool, optional
                        If True, will use radii in ``x`` to update radii of
                        overlapping fragments if (and only if) the nodes
                        do not currently have a radius (i.e. radius<=0).
    import_tags :       bool, optional
                        If True, will import node tags. Please note that this
                        will NOT import tags of nodes that have been collapsed
                        into manual tracings.
    label_joins :       bool, optional
                        If True, will label nodes at which old and new
                        tracings have been joined with tags ("Joined from ..."
                        and "Joined with ...") and with a lower confidence of
                        1.
    sid_from_nodes :    bool, optional
                        If True and the to-be-merged neuron has a "skeleton_id"
                        column it will be used to set the ``source_id`` upon
                        uploading new branches. This is relevant if your neuron
                        is a virtual chimera of several neurons: in order to
                        preserve provenance (i.e. correctly associating each
                        node with a ``source_id`` origin).
    mesh :              Volume | MeshNeuron | mesh-like object | list thereof
                        Mesh representation of ``x``. If provided, will use to
                        improve merging. If ``x`` is a list of neurons, must
                        provide a mesh for each of them.

    Returns
    -------
    Nothing
                        If all went well.
    dict
                        If something failed, returns server responses with
                        error logs.

    Examples
    --------
    Setup

    >>> import fafbseg
    >>> import pymaid

    >>> # Set up connections to manual and autoseg CATMAID
    >>> manual = pymaid.CatmaidInstance('URL', 'HTTP_USER', 'HTTP_PW', 'API_TOKEN')
    >>> auto = pymaid.CatmaidInstance('URL', 'HTTP_USER', 'HTTP_PW', 'API_TOKEN')

    >>> # Set a segmentation data source
    >>> fafbseg.use_google_storage("https://storage.googleapis.com/fafb-ffn1-20190805/segmentation")

    Merge a neuron from autoseg into v14

    >>> # Fetch the autoseg neuron to transfer to v14
    >>> x = pymaid.get_neuron(267355161, remote_instance=auto)

    >>> # Get the neuron's annotations so that they can be merged too
    >>> x.get_annotations(remote_instance=auto)

    >>> # Start the commit
    >>> # See online documentation for video of merge process
    >>> resp = fafbseg.merge_neuron(x, target_instance=manual)

    """
    if not isinstance(x, navis.NeuronList):
        if not isinstance(x, navis.TreeNeuron):
            raise TypeError('Expected TreeNeuron/List, got "{}"'.format(
                type(x)))
        x = navis.NeuronList(x)

    if not isinstance(mesh, (np.ndarray, list)):
        if isinstance(mesh, type(None)):
            mesh = [mesh] * len(x)
        else:
            mesh = [mesh]

    if len(mesh) != len(x):
        raise ValueError(f'Got {len(mesh)} meshes for {len(x)} neurons.')

    # Make a copy - in case we make any changes to the neurons
    # (like changing duplicate skeleton IDs)
    x = x.copy()

    if not isinstance(tag, (str, type(None))):
        raise TypeError('Tag must be string, got "{}"'.format(type(tag)))

    # Check user permissions
    perm = target_instance.fetch(target_instance.make_url('permissions'))
    requ_perm = ['can_annotate', 'can_annotate_with_token', 'can_import']
    miss_perm = [
        p for p in requ_perm
        if target_instance.project_id not in perm[0].get(p, [])
    ]

    if miss_perm:
        msg = 'You lack permissions: {}. Please contact an administrator.'
        raise PermissionError(msg.format(', '.join(miss_perm)))

    pymaid.set_loggers('WARNING')

    # Throttle requests just to play it safe
    # On a bad connection one might have to decrease max_threads further
    target_instance.max_threads = min(target_instance.max_threads, 50)

    # For user convenience, we will do all the stuff that needs user
    # interaction first and then run the automatic merge:

    # Start by find all overlapping fragments
    overlapping = []
    for n, m in tqdm(zip(x, mesh),
                     desc='Pre-processing neuron(s)',
                     leave=False,
                     disable=not use_pbars,
                     total=len(x)):
        ol = find_fragments(n,
                            min_node_overlap=min_node_overlap,
                            min_nodes=min_overlap_size,
                            mesh=m,
                            remote_instance=target_instance)

        if ol:
            # Add number of samplers to each neuron
            n_samplers = pymaid.get_sampler_counts(
                ol, remote_instance=target_instance)

            for nn in ol:
                nn.sampler_count = n_samplers[str(nn.id)]

        overlapping.append(ol)

    # Now have the user confirm merges before we actually make them
    viewer = navis.Viewer(title='Confirm merges')
    viewer.clear()
    overlap_cnf = []
    base_neurons = []
    try:
        for n, ol in zip(x, overlapping):
            # This asks user a bunch of questions prior to merge and upload
            ol, bn = confirm_overlap(n, ol, viewer=viewer)
            overlap_cnf.append(ol)
            base_neurons.append(bn)
    except BaseException:
        raise
    finally:
        viewer.close()

    for i, (n, ol, bn, m) in enumerate(zip(x, overlap_cnf, base_neurons,
                                           mesh)):
        print(f'Processing neuron "{n.name}" ({n.id}) [{i}/{len(x)}]',
              flush=True)
        # If no overlapping neurons proceed with just uploading.
        if not ol:
            print(
                'No overlapping fragments found. Uploading without merging...',
                end='',
                flush=True)
            resp = pymaid.upload_neuron(n,
                                        import_tags=import_tags,
                                        import_annotations=True,
                                        import_connectors=True,
                                        remote_instance=target_instance)
            if 'error' in resp:
                return resp

            # Add annotations
            _ = __merge_annotations(n, resp['skeleton_id'], tag,
                                    target_instance)

            msg = '\nNeuron "{}" successfully uploaded to target instance as "{}" #{}'
            print(msg.format(n.name, n.name, resp['skeleton_id']), flush=True)
            continue

        # Check if there is a duplicate skeleton ID between the to-be-merged
        # neuron and the to-merge-into neurons
        original_skid = None
        if n.id in ol.id:
            print('Fixing duplicate skeleton IDs.', flush=True)
            # Keep track of old skid
            original_skid = n.id
            # Skeleton ID must stay convertable to integer
            n.id = str(random.randint(1, 1000000))
            n._clear_temp_attr()

        # Check if there are any duplicate node IDs between neuron ``x`` and the
        # overlapping fragments and create new IDs for ``x`` if necessary
        duplicated = n.nodes[n.nodes.node_id.isin(ol.nodes.node_id.values)]
        if not duplicated.empty:
            print('Duplicate node IDs found. Regenerating node tables... ',
                  end='',
                  flush=True)
            max_ix = max(ol.nodes.node_id.max(), n.nodes.node_id.max()) + 1
            new_ids = range(max_ix, max_ix + duplicated.shape[0])
            id_map = {
                old: new
                for old, new in zip(duplicated.node_id, new_ids)
            }
            n.nodes['node_id'] = n.nodes.node_id.map(
                lambda n: id_map.get(n, n))
            n.nodes['parent_id'] = n.nodes.parent_id.map(
                lambda n: id_map.get(n, n))
            if n.has_connectors:
                n.connectors['node_id'] = n.connectors.node_id.map(
                    lambda n: id_map.get(n, n))
            n._clear_temp_attr()
            print('Done.', flush=True)

        # Combining the fragments into a single neuron is actually non-trivial:
        # 1. Collapse nodes of our input neuron `x` into within-distance nodes
        #    in the overlapping fragments (never the other way around!)
        # 2. At the same time keep connectivity (i.e. edges) of the input-neuron
        # 3. Keep track of the nodes' provenance (i.e. the contractions)
        #
        # In addition there are a lot of edge-cases to consider. For example:
        # - multiple nodes collapsing onto the same node
        # - nodes of overlapping fragments that are close enough to be collapsed
        #   (e.g. orphan synapse nodes)

        # Keep track of original skeleton IDs
        for a in ol + n:
            # Original skeleton of each node
            a.nodes['origin_skeletons'] = a.id
            if a.has_connectors:
                # Original skeleton of each connector
                a.connectors['origin_skeletons'] = a.id

        print('Generating union of all fragments... ', end='', flush=True)
        union, new_edges, collapsed_into = collapse_nodes(n,
                                                          ol,
                                                          limit=merge_limit,
                                                          base_neuron=bn,
                                                          mesh=m)
        print('Done.', flush=True)

        print('Extracting new nodes to upload... ', end='', flush=True)
        # Now we have to break the neuron into "new" fragments that we can upload
        # First get the new and old nodes
        new_nodes = union.nodes[union.nodes.origin_skeletons ==
                                n.id].node_id.values
        old_nodes = union.nodes[
            union.nodes.origin_skeletons != n.id].node_id.values

        # Now remove the already existing nodes from the union
        only_new = navis.subset_neuron(union, new_nodes)

        # And then break into continuous fragments for upload
        frags = navis.break_fragments(only_new)
        print('Done.', flush=True)

        # Also get the new edges we need to generate
        to_stitch = new_edges[~new_edges.parent_id.isnull()]

        # We need this later -> no need to compute this for every uploaded fragment
        cond1b = to_stitch.node_id.isin(old_nodes)
        cond2b = to_stitch.parent_id.isin(old_nodes)

        # Now upload each fragment and keep track of new node IDs
        tn_map = {}
        for f in tqdm(frags,
                      desc='Merging new arbors',
                      leave=False,
                      disable=not use_pbars):
            # In cases of complete merging into existing neurons, the fragment
            # will have no nodes
            if f.n_nodes < 1:
                continue

            # Check if fragment is a "linker" and as such can not be skipped
            lcond1 = np.isin(f.nodes.node_id.values, new_edges.node_id.values)
            lcond2 = np.isin(f.nodes.node_id.values,
                             new_edges.parent_id.values)

            # If not linker, check skip conditions
            if sum(lcond1) + sum(lcond2) <= 1:
                if f.cable_length < min_upload_size:
                    continue
                if f.n_nodes < min_upload_nodes:
                    continue

            # Collect origin info for this neuron if it's a CatmaidNeuron
            if isinstance(n, pymaid.CatmaidNeuron):
                source_info = {'source_type': 'segmentation'}

                if not sid_from_nodes or 'origin_skeletons' not in f.nodes.columns:
                    # If we had to change the skeleton ID due to duplication, make
                    # sure to pass the original skid as source ID
                    if original_skid:
                        source_info['source_id'] = int(original_skid)
                    else:
                        source_info['source_id'] = int(n.id)
                else:
                    if f.nodes.origin_skeletons.unique().shape[0] == 1:
                        skid = f.nodes.origin_skeletons.unique()[0]
                    else:
                        print(
                            'Warning: uploading chimera fragment with multiple '
                            'skeleton IDs! Using largest contributor ID.')
                        # Use the skeleton ID that has the most nodes
                        by_skid = f.nodes.groupby('origin_skeletons').x.count()
                        skid = by_skid.sort_values(
                            ascending=False).index.values[0]

                    source_info['source_id'] = int(skid)

                if not isinstance(getattr(n, '_remote_instance', None),
                                  type(None)):
                    source_info[
                        'source_project_id'] = n._remote_instance.project_id
                    source_info['source_url'] = n._remote_instance.server
            else:
                # Unknown source
                source_info = {}

            resp = pymaid.upload_neuron(f,
                                        import_tags=import_tags,
                                        import_annotations=False,
                                        import_connectors=True,
                                        remote_instance=target_instance,
                                        **source_info)

            # Stop if there was any error while uploading
            if 'error' in resp:
                return resp

            # Collect old -> new node IDs
            tn_map.update(resp['node_id_map'])

            # Now check if we can create any of the new edges by joining nodes
            # Both treenode and parent ID have to be either existing nodes or
            # newly uploaded
            cond1a = to_stitch.node_id.isin(tn_map)
            cond2a = to_stitch.parent_id.isin(tn_map)

            to_gen = to_stitch.loc[(cond1a | cond1b) & (cond2a | cond2b)]

            # Join nodes
            for node in to_gen.itertuples():
                # Make sure our base_neuron always come out as winner on top
                if node.node_id in bn.nodes.node_id.values:
                    winner, looser = node.node_id, node.parent_id
                else:
                    winner, looser = node.parent_id, node.node_id

                # We need to map winner and looser to the new node IDs
                winner = tn_map.get(winner, winner)
                looser = tn_map.get(looser, looser)

                # And now do the join
                resp = pymaid.join_nodes(winner,
                                         looser,
                                         no_prompt=True,
                                         tag_nodes=label_joins,
                                         remote_instance=target_instance)

                # See if there was any error while uploading
                if 'error' in resp:
                    print('Skipping joining nodes '
                          '{} and {}: {} - '.format(node.node_id,
                                                    node.parent_id,
                                                    resp['error']))
                    # Skip changing confidences
                    continue

                # Pop this edge from new_edges and from condition
                new_edges.drop(node.Index, inplace=True)
                cond1b.drop(node.Index, inplace=True)
                cond2b.drop(node.Index, inplace=True)

                # Change node confidences at new join
                if label_joins:
                    new_conf = {looser: 1}
                    resp = pymaid.update_node_confidence(
                        new_conf, remote_instance=target_instance)

        # Add annotations
        if n.has_annotations:
            _ = __merge_annotations(n, bn, tag, target_instance)

        # Update node radii
        if update_radii and 'radius' in n.nodes.columns and np.all(
                n.nodes.radius):
            print('Updating radii of existing nodes... ', end='', flush=True)
            resp = update_node_radii(source=n,
                                     target=ol,
                                     remote_instance=target_instance,
                                     limit=merge_limit,
                                     skip_existing=True)
            print('Done.', flush=True)

        print(
            'Neuron "{}" successfully merged into target instance as "{}" #{}'.
            format(n.name, bn.name, bn.id),
            flush=True)

    return
示例#2
0
def autoreview_edges(x, conf_threshold=1, vol=None, remote_instance=None):
    """Automatically review (low-confidence) edges between nodes.

    The way this works:
      1. Fetch the live version of the neuron(s) from the CATMAID instance
      2. Use raycasting to test (low-confidence) edges
      3. Edge confidence is set to ``5`` if test is passed and to ``1`` if not

    You *can* use this function to test all edges in a neuron by increasing
    ``conf_threshold`` to 5. Please note that this could produce a lot of false
    positives (i.e. edges will be flagged as incorrect even though they
    aren't). Part of the problem is that mitochondria are segmented as
    separate entities and hence introduce membranes inside a neuron.

    Parameters
    ----------
    x :                 skeleton ID(s) | pymaid.CatmaidNeuron/List
                        Neuron(s) to review.
    conf_threshold :    int, optional
                        Confidence threshold for edges to be tested. By
                        default only reviews edges with confidence <= 1.
    vol :               cloudvolume.CloudVolume, optional
                        CloudVolume pointing to segmentation data.
    remote_instance :   pymaid.CatmaidInstance, optional
                        CATMAID instance. If ``None``, will use globally
                        define instance.

    Returns
    -------
    server response
                        CATMAID server response from updating node
                        confidences.

    See Also
    --------
    :func:`fafbseg.test_edges`
                        If you only need to test without changing confidences.

    Examples
    --------
    >>> # Set up CloudVolume from the publicly hosted FAFB segmentation data
    >>> # (if you have a local copy, use that instead)
    >>> from cloudvolume import CloudVolume
    >>> vol = CloudVolume('https://storage.googleapis.com/fafb-ffn1-20190805/segmentation',
    ...                   cache=True,
    ...                   progress=False)
    >>> # Autoreview edges
    >>> _ = fafbseg.autoreview_edges(14401884, vol=vol, remote_instance=manual)

    """
    # Fetch neuron(s)
    n = pymaid.get_neurons(x, remote_instance=remote_instance)

    # Extract low confidence edges
    not_root = ~n.nodes.parent_id.isnull()
    is_low_conf = n.nodes.confidence <= conf_threshold
    to_test = n.nodes[is_low_conf & not_root]

    if to_test.empty:
        print('No low-confidence edges to test in neuron(s) '
              '{} found'.format(n.skeleton_id))
        return

    # Test edges
    verdict = test_edges(n,
                         edges=to_test[['treenode_id', 'parent_id']].values,
                         vol=vol)

    # Update node confidences
    new_confidences = {n: 5 for n in to_test[verdict].treenode_id.values}
    new_confidences.update(
        {n: 1
         for n in to_test[~verdict].treenode_id.values})
    resp = pymaid.update_node_confidence(new_confidences,
                                         remote_instance=remote_instance)

    msg = '{} of {} tested low-confidence edges were found to be correct.'
    msg = msg.format(sum(verdict), to_test.shape[0])
    print(msg)

    return resp