Пример #1
0
    def splitdata(self, exp_filepath):
        ''' Organises each experiment's data in the form of a DataFrame
        ------
        Args: 
            filepath: path for experiment CSV file
        Returns:
            list of 
        '''
        # extract array of data, which is flattened
        data = self.extractdata(exp_filepath)

        ### This creates a reshaped array of data
        # Determining the starting point of each curve, assumes same number of points per curve
        totpts = len(data[:, 0])
        curvepts = int(totpts / self.ncurves)
        curvestarts = sp.arange(0, totpts, curvepts)

        result = []
        #loop through curves,
        for i, crvname in zip(curvestarts, self.curvenames):
            x = data[i:i + curvepts, 0]
            y = data[i:i + curvepts, 1]

            if y[0] > y[-1]:  # flip datasets so they align
                y = sp.flip(y)
                x = sp.flip(x)
            result += [x, y]
        return result
Пример #2
0
def distance_transform_lin(im, axis=0, mode='both'):
    r"""
    Replaces each void voxel with the linear distance to the nearest solid
    voxel along the specified axis.

    Parameters
    ----------
    im : ND-array
        The image of the porous material with ``True`` values indicating the
        void phase (or phase of interest)

    axis : scalar
        The direction along which the distance should be measured, the default
        is 0 (i.e. along the x-direction)

    mode : string
        Controls how the distance is measured.  Options are:

        *'forward'* - Distances are measured in the increasing direction along
        the specified axis

        *'reverse'* - Distances are measured in the reverse direction.
        *'backward'* is also accepted.

        *'both'* - Distances are calculated in both directions (by recursively
        calling itself), then reporting the minimum value of the two results.
    """
    if mode in ['backward', 'reverse']:
        im = sp.flip(im, axis)
        im = distance_transform_lin(im=im, axis=axis, mode='forward')
        im = sp.flip(im, axis)
        return im
    elif mode in ['both']:
        im_f = distance_transform_lin(im=im, axis=axis, mode='forward')
        im_b = distance_transform_lin(im=im, axis=axis, mode='backward')
        return sp.minimum(im_f, im_b)
    else:
        b = sp.cumsum(im > 0, axis=axis)
        c = sp.diff(b * (im == 0), axis=axis)
        d = sp.minimum.accumulate(c, axis=axis)
        if im.ndim == 1:
            e = sp.pad(d, pad_width=[1, 0], mode='constant', constant_values=0)
        elif im.ndim == 2:
            ax = [[[1, 0], [0, 0]], [[0, 0], [1, 0]]]
            e = sp.pad(d,
                       pad_width=ax[axis],
                       mode='constant',
                       constant_values=0)
        elif im.ndim == 3:
            ax = [[[1, 0], [0, 0], [0, 0]], [[0, 0], [1, 0], [0, 0]],
                  [[0, 0], [0, 0], [1, 0]]]
            e = sp.pad(d,
                       pad_width=ax[axis],
                       mode='constant',
                       constant_values=0)
        f = im * (b + e)
        return f
Пример #3
0
    def parse(self, r1files, *, r2files=None, add_cols=None):
        """Parse barcodes from files.

        Parameters
        ----------
        r1files : str or list
            Name of R1 FASTQ file, or list of such files. Can be gzipped.
        r2files : None, str, or list
            `None` or empty list if not using R2, or like `r1files` for R2.
        add_cols : None or dict
            If dict, specify names and values (i.e., sample or library names)
            to be aded to returned data frames.

        Returns
        -------
        tuple
            The 2-tuple `(barcodes, fates)`, where:
                - `barcodes` is pandas DataFrame giving number of observations
                  of each barcode (columns are "barcode" and "count").
                - `fates` is pandas DataFrame giving total number of reads with
                  each fate (columns "fate" and "count"). Possible fates:
                  - "failed chastity filter"
                  - "valid barcode"
                  - "invalid barcode": not in barcode whitelist
                  - "R1 / R2 disagree" (if using `r2files`)
                  - "low quality barcode": sequencing quality low
                  - "unparseable barcode": invalid flank sequence, N in barcode

            Note that these data frames also include any columns specified by
            `add_cols`.

        """
        if isinstance(r1files, str):
            r1files = [r1files]
        if isinstance(r2files, str):
            r2files = [r2files]

        if not r2files:
            reads = ["R1"]
            r2files = None
            fileslist = [r1files]
            r1only = True
        else:
            reads = ["R1", "R2"]
            if len(r1files) != len(r2files):
                raise ValueError("`r1files` and `r2files` different length")
            fileslist = [r1files, r2files]
            r1only = False

        if self.valid_barcodes and self.list_all_valid_barcodes:
            barcodes = {bc: 0 for bc in self.valid_barcodes}
        else:
            barcodes = collections.defaultdict(int)

        fates = {
            "failed chastity filter": 0,
            "unparseable barcode": 0,
            "low quality barcode": 0,
            "invalid barcode": 0,
            "valid barcode": 0,
        }
        if not r1only:
            fates["R1 / R2 disagree"] = 0

        # max length of interest for reads
        max_len = self.bclen + len(self.upstream) + len(self.downstream)

        for filetup in zip(*fileslist):
            if r1only:
                assert len(filetup) == 1
                iterator = iterate_fastq(filetup[0],
                                         check_pair=1,
                                         trim=max_len)
            else:
                assert len(filetup) == 2, f"{filetup}\n{fileslist}"
                iterator = iterate_fastq_pair(filetup[0],
                                              filetup[1],
                                              r1trim=max_len,
                                              r2trim=max_len)

            for entry in iterator:

                if r1only:
                    readlist = [entry[1]]
                    qlist = [entry[2]]
                    fail = entry[3]

                else:
                    readlist = [entry[1], entry[2]]
                    qlist = [entry[3], entry[4]]
                    fail = entry[5]

                if fail and self.chastity_filter:
                    fates["failed chastity filter"] += 1
                    continue

                matches = {}
                for read, r in zip(reads, readlist):
                    rlen = len(r)

                    # get or build matcher for read of this length
                    len_past_bc = rlen - self._bcend[read]
                    if len_past_bc < 0:
                        raise ValueError(f"{read} too short: {rlen}")
                    elif rlen in self._matches[read]:
                        matcher = self._matches[read][rlen]
                    else:
                        if read == "R1":
                            match_str = (f"^({self._rcdownstream})"
                                         f"{{s<={self.downstream_mismatch}}}"
                                         f"(?P<bc>[ACTG]{{{self.bclen}}})"
                                         f"({self._rcupstream[: len_past_bc]})"
                                         f"{{s<={self.upstream_mismatch}}}")
                        else:
                            assert read == "R2"
                            match_str = (f"^({self.upstream})"
                                         f"{{s<={self.upstream_mismatch}}}"
                                         f"(?P<bc>[ACTG]{{{self.bclen}}})"
                                         f"({self.downstream[: len_past_bc]})"
                                         f"{{s<={self.downstream_mismatch}}}")
                        matcher = regex.compile(match_str,
                                                flags=regex.BESTMATCH)
                        self._matches[read][rlen] = matcher

                    m = matcher.match(r)
                    if m:
                        matches[read] = m
                    else:
                        break

                if len(matches) == len(reads):
                    bc = {}
                    bc_q = {}
                    for read, q in zip(reads, qlist):
                        bc[read] = matches[read].group("bc")
                        bc_q[read] = qual_str_to_array(
                            q[matches[read].start("bc"):matches[read].end("bc"
                                                                          )])
                    if self.bc_orientation == "R1":
                        if not r1only:
                            bc["R2"] = reverse_complement(bc["R2"])
                            bc_q["R2"] = scipy.flip(bc_q["R2"], axis=0)
                    else:
                        assert self.bc_orientation == "R2"
                        bc["R1"] = reverse_complement(bc["R1"])
                        bc_q["R1"] = scipy.flip(bc_q["R1"], axis=0)
                    if r1only:
                        if (bc_q["R1"] >= self.minq).all():
                            if self.valid_barcodes and (
                                    bc["R1"] not in self.valid_barcodes):
                                fates["invalid barcode"] += 1
                            else:
                                barcodes[bc["R1"]] += 1
                                fates["valid barcode"] += 1
                        else:
                            fates["low quality barcode"] += 1
                    else:
                        if bc["R1"] == bc["R2"]:
                            if self.valid_barcodes and (
                                    bc["R1"] not in self.valid_barcodes):
                                fates["invalid barcode"] += 1
                            elif (scipy.maximum(bc_q["R1"], bc_q["R2"]) >=
                                  self.minq).all():
                                barcodes[bc["R1"]] += 1
                                fates["valid barcode"] += 1
                            else:
                                fates["low quality barcode"] += 1
                        else:
                            fates["R1 / R2 disagree"] += 1
                else:
                    # invalid flanking sequence or N in barcode
                    fates["unparseable barcode"] += 1

        if add_cols is None:
            add_cols = {}
        existing_cols = {"barcode", "count", "fate"}
        if set(add_cols).intersection(existing_cols):
            raise ValueError(f"`add_cols` cannot contain {existing_cols}")

        barcodes = (pd.DataFrame(
            list(barcodes.items()), columns=["barcode", "count"]).sort_values(
                ["count", "barcode"],
                ascending=[False,
                           True]).assign(**add_cols).reset_index(drop=True))

        fates = (pd.DataFrame(list(fates.items()), columns=[
            "fate", "count"
        ]).sort_values(["count", "fate"],
                       ascending=[False, True
                                  ]).assign(**add_cols).reset_index(drop=True))

        return (barcodes, fates)
Пример #4
0
 def FlipBoundary(self):
     self.__boundary         = flip(self.__boundary, 0);
     self.__boundary         = roll(self.__boundary, 1);
Пример #5
0
    def __CalculateBoundary(self):
        startingPoint           = None;
        currentPoint            = None;
        foundBoundary           = False;
        cellId                  = None;
        boundary                = [];
        visitedEdges            = [];
        visitedPoints           = [];
        visitedBoundaryEdges    = [];

        for cellId in xrange(self.__originalPolyData.GetNumberOfCells()):
            cellPointIdList     = vtkIdList();
            cellEdges           = [];

            self.__originalPolyData.GetCellPoints(cellId, cellPointIdList);

            cellEdges           = [[cellPointIdList.GetId(0), 
                                    cellPointIdList.GetId(1)], 
                                   [cellPointIdList.GetId(1), 
                                    cellPointIdList.GetId(2)], 
                                   [cellPointIdList.GetId(2), 
                                    cellPointIdList.GetId(0)]];

            for i in xrange(len(cellEdges)):
                if (cellEdges[i] in visitedEdges) == False:
                    visitedEdges.append(cellEdges[i]);

                    edgeIdList  = vtkIdList()
                    edgeIdList.InsertNextId(cellEdges[i][0]);
                    edgeIdList.InsertNextId(cellEdges[i][1]);

                    singleCellEdgeNeighborIds = vtkIdList();

                    self.__originalPolyData.GetCellEdgeNeighbors(cellId, cellEdges[i][0], cellEdges[i][1], singleCellEdgeNeighborIds);

                    if singleCellEdgeNeighborIds.GetNumberOfIds() == 0:
                        foundBoundary   = True;

                        startingPoint   = cellEdges[i][0];
                        currentPoint    = cellEdges[i][1];

                        boundary.append(cellEdges[i][0]);
                        boundary.append(cellEdges[i][1]);

                        visitedBoundaryEdges.append([currentPoint,startingPoint]);
                        visitedBoundaryEdges.append([startingPoint,currentPoint]);

            if foundBoundary == True:
                break;

        if foundBoundary == False:
            raise Exception("The mesh provided has no boundary; not possible to do Quasi-Conformal Mapping on this dataset.");

        while currentPoint != startingPoint:
            neighboringCells    = vtkIdList();

            self.__originalPolyData.GetPointCells(currentPoint, neighboringCells);

            for i in xrange(neighboringCells.GetNumberOfIds()):
                cell = neighboringCells.GetId(i);
                triangle = self.__originalPolyData.GetCell(cell);

                for j in xrange(triangle.GetNumberOfPoints()):
                    if triangle.GetPointId(j) == currentPoint:
                        j1      = (j + 1) % 3;
                        j2      = (j + 2) % 3;

                        edge1   = [triangle.GetPointId(j),
                             triangle.GetPointId(j1)];
                        edge2   = [triangle.GetPointId(j),
                             triangle.GetPointId(j2)];

                edgeNeighbors1  = vtkIdList();
                edgeNeighbors2  = vtkIdList();

                self.__originalPolyData.GetCellEdgeNeighbors(cell, edge1[0], edge1[1], edgeNeighbors1);

                self.__originalPolyData.GetCellEdgeNeighbors(cell, edge2[0], edge2[1], edgeNeighbors2);

                if edgeNeighbors1.GetNumberOfIds() == 0:
                    if ([edge1[1], edge1[0]] in visitedBoundaryEdges) == False:
                        if (edge1[1] in boundary) == False:
                            boundary.append(edge1[1]);
                        visitedBoundaryEdges.append([edge1[0], edge1[1]]);
                        visitedBoundaryEdges.append([edge1[1], edge1[0]]);
                        currentPoint = edge1[1];
                        break;

                if edgeNeighbors2.GetNumberOfIds() == 0:
                    if ([edge2[1], edge2[0]] in visitedBoundaryEdges) == False:
                        if (edge2[1] in boundary) == False:
                            boundary.append(edge2[1]);
                        visitedBoundaryEdges.append([edge2[0], edge2[1]]);
                        visitedBoundaryEdges.append([edge2[1], edge2[0]]);
                        currentPoint = edge2[1];
                        break;

        boundary    = asarray(boundary, dtype=int);

        center      = mean(self.__pointData[:,boundary], axis=1);
        vector1     = asarray(self.__pointData[:,boundary[0]] - center);
        vector2     = asarray(self.__pointData[:,boundary[1]] - center);
        vectorNormal= cross(vector1, vector2);
        vectorApex  = self.__pointData[:, self.__apex] - center;

        if len(center.shape) is not 1:
            if center.shape[0] is not 3:
                raise Exception("Something went wrong. Probably forgot to transpose this. Contact maintainer.");

        if dot(vectorApex, vectorNormal) < 0:
            boundary         = flip(boundary, 0);
            boundary         = roll(boundary, 1);

        self.__boundary = boundary;
Пример #6
0
 def FlipBoundary(self):
     self.__boundary = scipy.flip(self.__boundary, 0)
     self.__boundary = scipy.roll(self.__boundary, 1)
Пример #7
0
def distance_transform_lin(im, axis=0, mode='both'):
    r"""
    Replaces each void voxel with the linear distance to the nearest solid
    voxel along the specified axis.

    Parameters
    ----------
    im : ND-array
        The image of the porous material with ``True`` values indicating the
        void phase (or phase of interest)

    axis : int
        The direction along which the distance should be measured, the default
        is 0 (i.e. along the x-direction)

    mode : string
        Controls how the distance is measured.  Options are:

        'forward' - Distances are measured in the increasing direction along
        the specified axis

        'reverse' - Distances are measured in the reverse direction.
        *'backward'* is also accepted.

        'both' - Distances are calculated in both directions (by recursively
        calling itself), then reporting the minimum value of the two results.

    Returns
    -------
    image : ND-array
        A copy of ``im`` with each foreground voxel containing the distance to
        the nearest background along the specified axis.
    """
    if im.ndim != im.squeeze().ndim:
        warnings.warn('Input image conains a singleton axis:' + str(im.shape) +
                      ' Reduce dimensionality with np.squeeze(im) to avoid' +
                      ' unexpected behavior.')
    if mode in ['backward', 'reverse']:
        im = sp.flip(im, axis)
        im = distance_transform_lin(im=im, axis=axis, mode='forward')
        im = sp.flip(im, axis)
        return im
    elif mode in ['both']:
        im_f = distance_transform_lin(im=im, axis=axis, mode='forward')
        im_b = distance_transform_lin(im=im, axis=axis, mode='backward')
        return sp.minimum(im_f, im_b)
    else:
        b = sp.cumsum(im > 0, axis=axis)
        c = sp.diff(b*(im == 0), axis=axis)
        d = sp.minimum.accumulate(c, axis=axis)
        if im.ndim == 1:
            e = sp.pad(d, pad_width=[1, 0], mode='constant', constant_values=0)
        elif im.ndim == 2:
            ax = [[[1, 0], [0, 0]], [[0, 0], [1, 0]]]
            e = sp.pad(d, pad_width=ax[axis], mode='constant', constant_values=0)
        elif im.ndim == 3:
            ax = [[[1, 0], [0, 0], [0, 0]],
                  [[0, 0], [1, 0], [0, 0]],
                  [[0, 0], [0, 0], [1, 0]]]
            e = sp.pad(d, pad_width=ax[axis], mode='constant', constant_values=0)
        f = im*(b + e)
        return f
Пример #8
0
 def FlipBoundary(self):
     self.boundary       = scipy.flip(self.boundary, 0);
     self.boundary       = scipy.roll(self.boundary, 1);