def _getCisDistance(self, chrom=None): """ Get all chromosome cis distance. Returns: ---------- out: dict Examples: ---------- >>> vp.getCisDistance() {'Chr1': [32, 4434, 23223, ...], 'Chr2': [2342, ...]} """ cis_dist_db = OrderedDict() for vpl in self.getCisLine(): if chrom: chrom = listify(chrom) if vpl.chr1 in chrom: if vpl.chr1 not in cis_dist_db: cis_dist_db[vpl.chr1] = [] cis_dist_db[vpl.chr1].append(vpl.getCisDistance()) else: if vpl.chr1 not in cis_dist_db: cis_dist_db[vpl.chr1] = [] cis_dist_db[vpl.chr1].append(vpl.getCisDistance()) self.cis_dist_db = cis_dist_db return self.cis_dist_db
def __init__(self, filename, exclude=None, exclude_contig=['tig', 'Un', 'Sy', 'scaffold', 'ctg', 'Pt', 'Mt'], mem_cache='.'): check_file_exists(filename) self.filename = filename self.exclude = listify(exclude) self.exclude_contig = listify(exclude_contig) self.getChrSizes() self.idx2label = dict((i, chrom) for i, chrom in enumerate(self.chromLabels)) self.label2idx = dict((chrom, i) for i, chrom in enumerate(self.chromLabels)) self.mem_cache = mem_cache self.memory = Memory(mem_cache, verbose=0) self.getGCBin = self.memory.cache(self._getGCBin)
def _getGCBin(self, window, chr=[], correct=True, thread=24): """ Calculate GC content of a series of windows, and return a OrderedDict Params: -------- window: `int` window of bin chr: `list` default: `[]` thread: `int` thread of parallel running default: `24` Returns: -------- out: `list` and gc store in array-like Examples: -------- >>> getGCbin(1000000) [[0.5, 0.2, 0.5 ...], ...] """ self.gcBin = [] chroms = listify(chr) if chr else self.chromLabels _chromsidx = [self.label2idx[i] for i in chroms] """ def subgc(chrom): chromWindow = int(self.chromSizes[chrom] // self.window) + 1 _gc = np.ones(chromWindow, dtype=np.float) for i in range(chromWindow): _gc[i] = self.getGC(chrom, i*self.window, (i+1)*self.window, correct=correct) return _gc res = Parallel(thread)(delayed(subgc)(args) for args in _chromsidx) """ for chrom in _chromsidx: chromWindow = int(self.chromSizes[chrom] // self.window) + 1 self.gcBin.append(np.ones(chromWindow, dtype=np.float)) for i in range(chromWindow - 1): self.gcBin[chrom][i] = self.getGC(chrom, i * self.window, (i + 1) * self.window, correct=correct) else: self.gcBin[chrom][chromWindow - 1] = self.getGC( chrom, (chromWindow - 1) * self.window, chromWindow * self.window, correct=correct) logging.debug('Successful getGCBin') return self.gcBin
def savefig(figname, dpi=300, bbox_inches=None, formats=['pdf', 'png'], cleanup=True): """ function for savefig, can save multi format Params: ------- figname: `str` figname dpi: `int` dpi for figure [default: 300] formats: `list` or `str` picture formats [default: ["pdf", "png"]] bbox_inches: `str` bbox_inches params for plt.savefig [defult: None] cleanup: `bool` if cleanup rcParms after savefig [default: True] Returns: -------- out: output figure Examples: -------- >>> savefig('out') """ formats = listify(formats) try: if not op.splitext(figname)[-1]: raise else: fmt = op.splitext(figname)[-1].lower() except: fmt = "pdf" if fmt not in formats: formats.append(fmt) for fmt in formats: figprefix = op.splitext(figname)[0] outname = "{}.{}".format(figprefix, fmt) plt.savefig(outname, dpi=dpi, format=fmt, bbox_inches=bbox_inches) msg = "Figure saved to `{}`".format(outname) logging.debug(msg) ## reset rcParams after savefig if cleanup: plt.rcdefaults()
def plotMultiLR(data, species, output='out_multiSpecies', workdir='./', ncols=5): """ plot multi linregression scatter picture Params: -------- data: `dict` data of pc values after synteny analysis output: `str` output figname [default: out_multiSpecies] workdir: `str` path to workdir [default: ./] ncols: int columns of picture [default: 5] Returns: -------- out: picutre Examples: -------- >>> data = getPCValues(species) >>> plotMultiLR(data) """ nrows = int(math.ceil(len(data) * 1.0 / ncols)) ncols = min(ncols, len(data)) if ncols < 2: fig, ax = plt.subplots(nrows, ncols, figsize=(ncols * 5.2, nrows * 5)) axes = listify(ax) else: fig, axes = plt.subplots(nrows, ncols, figsize=(ncols * 5.2, nrows * 5)) plt.subplots_adjust(wspace=0.28) axes = trim_axes(axes, len(data)) for i, pairs in enumerate(data): bg1, bg2 = data[pairs] ax = plotLineRegress(axes[i], bg1.score, bg2.score, xlabel=pairs[0], ylabel=pairs[1]) savefig("{}/{}".format(workdir, output), bbox_inches='tight')
def __init__(self, cmds, threads=4): self.cmds = listify(cmds) self.threads = threads self.run()