def get_sequences(self, locations, width=200): # need to ensure that most locations on the forward # and reverse strands are mappable seqs = [utils.makestr(self.genome[loc[0]][int(loc[1])-width/2:int(loc[1])+width/2]) if loc[3]=='+' \ else utils.reverse_complement(utils.makestr(self.genome[loc[0]][int(loc[2])-width/2+1:int(loc[2])+width/2+1])) \ for loc in locations] return seqs
def getnull(self, locations, sample='', width=200): left = self.k/2 right = self.k/2-1 if sample=='': cutrate = self.cutrate else: cutrate = self.cutrate[sample] strand = np.array([1 if loc[3]=='+' else 0 for loc in locations]) # removed a +1 for the - strand sequences = np.array([utils.makestr(self.genome[loc[0]][int(loc[1])-width/2-left:int(loc[1])+width/2+right]) if loc[3]=='+' \ else utils.makestr(self.genome[loc[0]][int(loc[2])-width/2-left:int(loc[2])+width/2+right]) \ for loc in locations]) null = sequence_null.getnull(sequences, strand, cutrate, width, self.k) null[null==0] = 1e-8 null = null/utils.insum(null,[1]) return null
def _make_cutrate(self, data): cutrate = dict([(utils.makestr(key),[val[0]/val[2],val[1]/val[2]]) for key,val in data.iteritems()]) fwdtotal = np.sum([val[0] for val in data.itervalues()]) revtotal = np.sum([val[1] for val in data.itervalues()]) total = np.sum([val[2] for val in data.itervalues()]) cutrate['mean'] = [fwdtotal/total, revtotal/total] return cutrate
def getnull(self, locations, sample='', width=200): left = self.k / 2 right = self.k / 2 - 1 if sample == '': cutrate = self.cutrate else: cutrate = self.cutrate[sample] strand = np.array([1 if loc[3] == '+' else 0 for loc in locations]) # removed a +1 for the - strand sequences = np.array([utils.makestr(self.genome[loc[0]][int(loc[1])-width/2-left:int(loc[1])+width/2+right]) if loc[3]=='+' \ else utils.makestr(self.genome[loc[0]][int(loc[2])-width/2-left:int(loc[2])+width/2+right]) \ for loc in locations]) null = sequence_null.getnull(sequences, strand, cutrate, width, self.k) null[null == 0] = 1e-8 null = null / utils.insum(null, [1]) return null
def _make_cutrate(self, data): cutrate = dict([(utils.makestr(key), [val[0] / val[2], val[1] / val[2]]) for key, val in data.iteritems()]) fwdtotal = np.sum([val[0] for val in data.itervalues()]) revtotal = np.sum([val[1] for val in data.itervalues()]) total = np.sum([val[2] for val in data.itervalues()]) cutrate['mean'] = [fwdtotal / total, revtotal / total] return cutrate