def splitdata(self, exp_filepath): ''' Organises each experiment's data in the form of a DataFrame ------ Args: filepath: path for experiment CSV file Returns: list of ''' # extract array of data, which is flattened data = self.extractdata(exp_filepath) ### This creates a reshaped array of data # Determining the starting point of each curve, assumes same number of points per curve totpts = len(data[:, 0]) curvepts = int(totpts / self.ncurves) curvestarts = sp.arange(0, totpts, curvepts) result = [] #loop through curves, for i, crvname in zip(curvestarts, self.curvenames): x = data[i:i + curvepts, 0] y = data[i:i + curvepts, 1] if y[0] > y[-1]: # flip datasets so they align y = sp.flip(y) x = sp.flip(x) result += [x, y] return result
def distance_transform_lin(im, axis=0, mode='both'): r""" Replaces each void voxel with the linear distance to the nearest solid voxel along the specified axis. Parameters ---------- im : ND-array The image of the porous material with ``True`` values indicating the void phase (or phase of interest) axis : scalar The direction along which the distance should be measured, the default is 0 (i.e. along the x-direction) mode : string Controls how the distance is measured. Options are: *'forward'* - Distances are measured in the increasing direction along the specified axis *'reverse'* - Distances are measured in the reverse direction. *'backward'* is also accepted. *'both'* - Distances are calculated in both directions (by recursively calling itself), then reporting the minimum value of the two results. """ if mode in ['backward', 'reverse']: im = sp.flip(im, axis) im = distance_transform_lin(im=im, axis=axis, mode='forward') im = sp.flip(im, axis) return im elif mode in ['both']: im_f = distance_transform_lin(im=im, axis=axis, mode='forward') im_b = distance_transform_lin(im=im, axis=axis, mode='backward') return sp.minimum(im_f, im_b) else: b = sp.cumsum(im > 0, axis=axis) c = sp.diff(b * (im == 0), axis=axis) d = sp.minimum.accumulate(c, axis=axis) if im.ndim == 1: e = sp.pad(d, pad_width=[1, 0], mode='constant', constant_values=0) elif im.ndim == 2: ax = [[[1, 0], [0, 0]], [[0, 0], [1, 0]]] e = sp.pad(d, pad_width=ax[axis], mode='constant', constant_values=0) elif im.ndim == 3: ax = [[[1, 0], [0, 0], [0, 0]], [[0, 0], [1, 0], [0, 0]], [[0, 0], [0, 0], [1, 0]]] e = sp.pad(d, pad_width=ax[axis], mode='constant', constant_values=0) f = im * (b + e) return f
def parse(self, r1files, *, r2files=None, add_cols=None): """Parse barcodes from files. Parameters ---------- r1files : str or list Name of R1 FASTQ file, or list of such files. Can be gzipped. r2files : None, str, or list `None` or empty list if not using R2, or like `r1files` for R2. add_cols : None or dict If dict, specify names and values (i.e., sample or library names) to be aded to returned data frames. Returns ------- tuple The 2-tuple `(barcodes, fates)`, where: - `barcodes` is pandas DataFrame giving number of observations of each barcode (columns are "barcode" and "count"). - `fates` is pandas DataFrame giving total number of reads with each fate (columns "fate" and "count"). Possible fates: - "failed chastity filter" - "valid barcode" - "invalid barcode": not in barcode whitelist - "R1 / R2 disagree" (if using `r2files`) - "low quality barcode": sequencing quality low - "unparseable barcode": invalid flank sequence, N in barcode Note that these data frames also include any columns specified by `add_cols`. """ if isinstance(r1files, str): r1files = [r1files] if isinstance(r2files, str): r2files = [r2files] if not r2files: reads = ["R1"] r2files = None fileslist = [r1files] r1only = True else: reads = ["R1", "R2"] if len(r1files) != len(r2files): raise ValueError("`r1files` and `r2files` different length") fileslist = [r1files, r2files] r1only = False if self.valid_barcodes and self.list_all_valid_barcodes: barcodes = {bc: 0 for bc in self.valid_barcodes} else: barcodes = collections.defaultdict(int) fates = { "failed chastity filter": 0, "unparseable barcode": 0, "low quality barcode": 0, "invalid barcode": 0, "valid barcode": 0, } if not r1only: fates["R1 / R2 disagree"] = 0 # max length of interest for reads max_len = self.bclen + len(self.upstream) + len(self.downstream) for filetup in zip(*fileslist): if r1only: assert len(filetup) == 1 iterator = iterate_fastq(filetup[0], check_pair=1, trim=max_len) else: assert len(filetup) == 2, f"{filetup}\n{fileslist}" iterator = iterate_fastq_pair(filetup[0], filetup[1], r1trim=max_len, r2trim=max_len) for entry in iterator: if r1only: readlist = [entry[1]] qlist = [entry[2]] fail = entry[3] else: readlist = [entry[1], entry[2]] qlist = [entry[3], entry[4]] fail = entry[5] if fail and self.chastity_filter: fates["failed chastity filter"] += 1 continue matches = {} for read, r in zip(reads, readlist): rlen = len(r) # get or build matcher for read of this length len_past_bc = rlen - self._bcend[read] if len_past_bc < 0: raise ValueError(f"{read} too short: {rlen}") elif rlen in self._matches[read]: matcher = self._matches[read][rlen] else: if read == "R1": match_str = (f"^({self._rcdownstream})" f"{{s<={self.downstream_mismatch}}}" f"(?P<bc>[ACTG]{{{self.bclen}}})" f"({self._rcupstream[: len_past_bc]})" f"{{s<={self.upstream_mismatch}}}") else: assert read == "R2" match_str = (f"^({self.upstream})" f"{{s<={self.upstream_mismatch}}}" f"(?P<bc>[ACTG]{{{self.bclen}}})" f"({self.downstream[: len_past_bc]})" f"{{s<={self.downstream_mismatch}}}") matcher = regex.compile(match_str, flags=regex.BESTMATCH) self._matches[read][rlen] = matcher m = matcher.match(r) if m: matches[read] = m else: break if len(matches) == len(reads): bc = {} bc_q = {} for read, q in zip(reads, qlist): bc[read] = matches[read].group("bc") bc_q[read] = qual_str_to_array( q[matches[read].start("bc"):matches[read].end("bc" )]) if self.bc_orientation == "R1": if not r1only: bc["R2"] = reverse_complement(bc["R2"]) bc_q["R2"] = scipy.flip(bc_q["R2"], axis=0) else: assert self.bc_orientation == "R2" bc["R1"] = reverse_complement(bc["R1"]) bc_q["R1"] = scipy.flip(bc_q["R1"], axis=0) if r1only: if (bc_q["R1"] >= self.minq).all(): if self.valid_barcodes and ( bc["R1"] not in self.valid_barcodes): fates["invalid barcode"] += 1 else: barcodes[bc["R1"]] += 1 fates["valid barcode"] += 1 else: fates["low quality barcode"] += 1 else: if bc["R1"] == bc["R2"]: if self.valid_barcodes and ( bc["R1"] not in self.valid_barcodes): fates["invalid barcode"] += 1 elif (scipy.maximum(bc_q["R1"], bc_q["R2"]) >= self.minq).all(): barcodes[bc["R1"]] += 1 fates["valid barcode"] += 1 else: fates["low quality barcode"] += 1 else: fates["R1 / R2 disagree"] += 1 else: # invalid flanking sequence or N in barcode fates["unparseable barcode"] += 1 if add_cols is None: add_cols = {} existing_cols = {"barcode", "count", "fate"} if set(add_cols).intersection(existing_cols): raise ValueError(f"`add_cols` cannot contain {existing_cols}") barcodes = (pd.DataFrame( list(barcodes.items()), columns=["barcode", "count"]).sort_values( ["count", "barcode"], ascending=[False, True]).assign(**add_cols).reset_index(drop=True)) fates = (pd.DataFrame(list(fates.items()), columns=[ "fate", "count" ]).sort_values(["count", "fate"], ascending=[False, True ]).assign(**add_cols).reset_index(drop=True)) return (barcodes, fates)
def FlipBoundary(self): self.__boundary = flip(self.__boundary, 0); self.__boundary = roll(self.__boundary, 1);
def __CalculateBoundary(self): startingPoint = None; currentPoint = None; foundBoundary = False; cellId = None; boundary = []; visitedEdges = []; visitedPoints = []; visitedBoundaryEdges = []; for cellId in xrange(self.__originalPolyData.GetNumberOfCells()): cellPointIdList = vtkIdList(); cellEdges = []; self.__originalPolyData.GetCellPoints(cellId, cellPointIdList); cellEdges = [[cellPointIdList.GetId(0), cellPointIdList.GetId(1)], [cellPointIdList.GetId(1), cellPointIdList.GetId(2)], [cellPointIdList.GetId(2), cellPointIdList.GetId(0)]]; for i in xrange(len(cellEdges)): if (cellEdges[i] in visitedEdges) == False: visitedEdges.append(cellEdges[i]); edgeIdList = vtkIdList() edgeIdList.InsertNextId(cellEdges[i][0]); edgeIdList.InsertNextId(cellEdges[i][1]); singleCellEdgeNeighborIds = vtkIdList(); self.__originalPolyData.GetCellEdgeNeighbors(cellId, cellEdges[i][0], cellEdges[i][1], singleCellEdgeNeighborIds); if singleCellEdgeNeighborIds.GetNumberOfIds() == 0: foundBoundary = True; startingPoint = cellEdges[i][0]; currentPoint = cellEdges[i][1]; boundary.append(cellEdges[i][0]); boundary.append(cellEdges[i][1]); visitedBoundaryEdges.append([currentPoint,startingPoint]); visitedBoundaryEdges.append([startingPoint,currentPoint]); if foundBoundary == True: break; if foundBoundary == False: raise Exception("The mesh provided has no boundary; not possible to do Quasi-Conformal Mapping on this dataset."); while currentPoint != startingPoint: neighboringCells = vtkIdList(); self.__originalPolyData.GetPointCells(currentPoint, neighboringCells); for i in xrange(neighboringCells.GetNumberOfIds()): cell = neighboringCells.GetId(i); triangle = self.__originalPolyData.GetCell(cell); for j in xrange(triangle.GetNumberOfPoints()): if triangle.GetPointId(j) == currentPoint: j1 = (j + 1) % 3; j2 = (j + 2) % 3; edge1 = [triangle.GetPointId(j), triangle.GetPointId(j1)]; edge2 = [triangle.GetPointId(j), triangle.GetPointId(j2)]; edgeNeighbors1 = vtkIdList(); edgeNeighbors2 = vtkIdList(); self.__originalPolyData.GetCellEdgeNeighbors(cell, edge1[0], edge1[1], edgeNeighbors1); self.__originalPolyData.GetCellEdgeNeighbors(cell, edge2[0], edge2[1], edgeNeighbors2); if edgeNeighbors1.GetNumberOfIds() == 0: if ([edge1[1], edge1[0]] in visitedBoundaryEdges) == False: if (edge1[1] in boundary) == False: boundary.append(edge1[1]); visitedBoundaryEdges.append([edge1[0], edge1[1]]); visitedBoundaryEdges.append([edge1[1], edge1[0]]); currentPoint = edge1[1]; break; if edgeNeighbors2.GetNumberOfIds() == 0: if ([edge2[1], edge2[0]] in visitedBoundaryEdges) == False: if (edge2[1] in boundary) == False: boundary.append(edge2[1]); visitedBoundaryEdges.append([edge2[0], edge2[1]]); visitedBoundaryEdges.append([edge2[1], edge2[0]]); currentPoint = edge2[1]; break; boundary = asarray(boundary, dtype=int); center = mean(self.__pointData[:,boundary], axis=1); vector1 = asarray(self.__pointData[:,boundary[0]] - center); vector2 = asarray(self.__pointData[:,boundary[1]] - center); vectorNormal= cross(vector1, vector2); vectorApex = self.__pointData[:, self.__apex] - center; if len(center.shape) is not 1: if center.shape[0] is not 3: raise Exception("Something went wrong. Probably forgot to transpose this. Contact maintainer."); if dot(vectorApex, vectorNormal) < 0: boundary = flip(boundary, 0); boundary = roll(boundary, 1); self.__boundary = boundary;
def FlipBoundary(self): self.__boundary = scipy.flip(self.__boundary, 0) self.__boundary = scipy.roll(self.__boundary, 1)
def distance_transform_lin(im, axis=0, mode='both'): r""" Replaces each void voxel with the linear distance to the nearest solid voxel along the specified axis. Parameters ---------- im : ND-array The image of the porous material with ``True`` values indicating the void phase (or phase of interest) axis : int The direction along which the distance should be measured, the default is 0 (i.e. along the x-direction) mode : string Controls how the distance is measured. Options are: 'forward' - Distances are measured in the increasing direction along the specified axis 'reverse' - Distances are measured in the reverse direction. *'backward'* is also accepted. 'both' - Distances are calculated in both directions (by recursively calling itself), then reporting the minimum value of the two results. Returns ------- image : ND-array A copy of ``im`` with each foreground voxel containing the distance to the nearest background along the specified axis. """ if im.ndim != im.squeeze().ndim: warnings.warn('Input image conains a singleton axis:' + str(im.shape) + ' Reduce dimensionality with np.squeeze(im) to avoid' + ' unexpected behavior.') if mode in ['backward', 'reverse']: im = sp.flip(im, axis) im = distance_transform_lin(im=im, axis=axis, mode='forward') im = sp.flip(im, axis) return im elif mode in ['both']: im_f = distance_transform_lin(im=im, axis=axis, mode='forward') im_b = distance_transform_lin(im=im, axis=axis, mode='backward') return sp.minimum(im_f, im_b) else: b = sp.cumsum(im > 0, axis=axis) c = sp.diff(b*(im == 0), axis=axis) d = sp.minimum.accumulate(c, axis=axis) if im.ndim == 1: e = sp.pad(d, pad_width=[1, 0], mode='constant', constant_values=0) elif im.ndim == 2: ax = [[[1, 0], [0, 0]], [[0, 0], [1, 0]]] e = sp.pad(d, pad_width=ax[axis], mode='constant', constant_values=0) elif im.ndim == 3: ax = [[[1, 0], [0, 0], [0, 0]], [[0, 0], [1, 0], [0, 0]], [[0, 0], [0, 0], [1, 0]]] e = sp.pad(d, pad_width=ax[axis], mode='constant', constant_values=0) f = im*(b + e) return f
def FlipBoundary(self): self.boundary = scipy.flip(self.boundary, 0); self.boundary = scipy.roll(self.boundary, 1);