Python sentinelize примеры, bbcflib.gfminer.common.sentinelize Python примеры использования

Пример #1

0

Показать файл

Файл: test_gfminer.py Проект: MolbioUnige/bbcflib

 def test_sentinelize(self):
     stream = fstream([(10, 12, 0.5), (14, 15, 1.2)],
                      fields=['start', 'end', 'score'])
     stream = sentinelize(stream, 'Z')
     for y in stream:
         x = y
     self.assertEqual(x, 'Z')

Пример #2

0

Показать файл

 def _stream(ts, tf):
     X = [common.sentinelize(x, [sys.maxint] * len(x.fields)) for x in ts]
     S = [[(-sys.maxint, -sys.maxint, 0.0)] for t in ts]
     start_idx = tf.fields.index('start')
     end_idx = tf.fields.index('end')
     if hasattr(method, '__call__'):
         mean_fn = lambda scores, denom: method(scores)
     else:
         mean_fn = _score_functions.get(method, _arithmetic_mean)
     for y in tf:
         ystart = y[start_idx]
         yend = y[end_idx]
         scores = ()
         for i in range(len(ts)):
             xnext = S[i][-1]
             # Load into S all score items which intersect feature y
             while xnext[0] < yend:
                 xnext = X[i].next()
                 if xnext[1] > ystart: S[i].append(xnext)
             n = 0
             while S[i][n][1] <= ystart:
                 n += 1
             S[i] = S[i][n:]
             scores_y = []
             for s in S[i]:
                 if yend <= s[0]: continue
                 if s[0] < ystart: start = ystart
                 else: start = s[0]
                 if yend < s[1]: end = yend
                 else: end = s[1]
                 scores_y.extend([s[2]] * (end - start))
             scores += (mean_fn(scores_y, 1.0 / (yend - ystart)), )
         yield tuple(y) + scores

Пример #3

0

Показать файл

Файл: scores.py Проект: bbcf/bbcflib

def merge_scores(trackList, method="arithmetic"):
    """
    Creates a stream with per-base average of several score tracks::

        X1: __________666666666______
        X2: _____2222222222__________
        R:  _____11111444443333______

    :param trackList: list of FeatureStream objects.
    :param method: (str) type of average: one of 'arithmetic','geometric', or 'sum' (no average).
    :rtype: FeatureStream
    """
    tracks = [FeatureStream(common.sentinelize(x, [sys.maxint] * len(x.fields)), x.fields) for x in trackList]
    tracks = [common.reorder(t, ["start", "end", "score"]) for t in tracks]
    fields = [f for f in tracks[0].fields if all([f in t.fields for t in tracks])]  # common fields
    elements = [list(x.next()) for x in tracks]
    track_denom = 1.0 / len(trackList)

    if hasattr(method, "__call__"):
        mean_fn = lambda scores, denom: method(scores)
    else:
        mean_fn = _score_functions.get(method, _arithmetic_mean)
    for i in xrange(len(tracks) - 1, -1, -1):
        if elements[i][0] == sys.maxint:
            tracks.pop(i)
            elements.pop(i)

    def _stream(tracks):
        while tracks:
            start = min([x[0] for x in elements])
            end = min([x[0] for x in elements if x[0] > start] + [x[1] for x in elements])
            scores = [x[2] for x in elements if x[1] > start and x[0] < end]
            if len(fields) > 3:
                rest = []
                for i in range(len(fields[3:])):
                    r = [str(x[3 + i]) for x in elements if not (x[3 + i] is None) and x[1] > start and x[0] < end]
                    if all([x == r[0] for x in r]):
                        rest.append(r[0])
                    else:
                        rest.append("|".join(r))
                yield (start, end, mean_fn(scores, track_denom)) + tuple(rest)
            else:
                yield (start, end, mean_fn(scores, track_denom))
            for i in xrange(len(tracks) - 1, -1, -1):
                if elements[i][0] < end:
                    elements[i][0] = end
                if elements[i][1] <= end:
                    elements[i] = list(tracks[i].next())
                if elements[i][0] == sys.maxint:
                    tracks.pop(i)
                    elements.pop(i)

    return FeatureStream(_stream(tracks), fields)

Пример #4

0

Показать файл

Файл: scores.py Проект: bbcf/bbcflib

 def _stream(ts, tf):
     tf = common.sentinelize(tf, [sys.maxint] * len(tf.fields))
     info_idx = [k for k, f in enumerate(tf.fields) if f not in ts.fields]
     if stranded:
         ts_strand_idx = ts.fields.index("strand")
         tf_strand_idx = tf.fields.index("strand")
         same_strand = lambda x, y: x[ts_strand_idx] == y[tf_strand_idx]
     else:
         same_strand = lambda x, y: True
     Y = []
     ynext = (-sys.maxint, -sys.maxint, 0.0)
     for x in ts:
         xstart = x[0]
         xend = x[1]
         # Load into Y all feature items which intersect score x
         while ynext[0] < xend:
             if ynext[1] > xstart:
                 Y.append(ynext)
             ynext = tf.next()
         # Remove features that are far behind x
         if Y:
             n = 0
             try:
                 while Y[n][1] <= xstart:
                     n += 1
                 Y = Y[n:]
             except IndexError:
                 Y = [ynext]
         # Yield intersections
         for y in Y:
             if not same_strand(x, y):
                 continue
             info = tuple([y[k] for k in info_idx]) if annotate else ()
             if strict and (y[0] > xstart or y[1] < xend):
                 continue
             if y[0] >= xend:
                 continue  # keep for next iteration
             start = xstart if y[0] < xstart else y[0]
             end = xend if y[1] > xend else y[1]
             yield (start, end) + tuple(x[2:]) + info

Пример #5

0

Показать файл

Файл: scores.py Проект: bbcf/bbcflib

 def _stream(ts, tf):
     X = [common.sentinelize(x, [sys.maxint] * len(x.fields)) for x in ts]
     S = [[(-sys.maxint, -sys.maxint, 0.0)] for t in ts]
     start_idx = tf.fields.index("start")
     end_idx = tf.fields.index("end")
     if hasattr(method, "__call__"):
         mean_fn = lambda scores, denom: method(scores)
     else:
         mean_fn = _score_functions.get(method, _arithmetic_mean)
     for y in tf:
         ystart = y[start_idx]
         yend = y[end_idx]
         scores = ()
         for i in range(len(ts)):
             xnext = S[i][-1]
             # Load into S all score items which intersect feature y
             while xnext[0] < yend:
                 xnext = X[i].next()
                 if xnext[1] > ystart:
                     S[i].append(xnext)
             n = 0
             while S[i][n][1] <= ystart:
                 n += 1
             S[i] = S[i][n:]
             scores_y = []
             for s in S[i]:
                 if yend <= s[0]:
                     continue
                 if s[0] < ystart:
                     start = ystart
                 else:
                     start = s[0]
                 if yend < s[1]:
                     end = yend
                 else:
                     end = s[1]
                 scores_y.extend([s[2]] * (end - start))
             scores += (mean_fn(scores_y, 1.0 / (yend - ystart)),)
         yield tuple(y) + scores

Пример #6

0

Показать файл

 def _stream(ts, tf):
     tf = common.sentinelize(tf, [sys.maxint] * len(tf.fields))
     info_idx = [k for k, f in enumerate(tf.fields) if f not in ts.fields]
     if stranded:
         ts_strand_idx = ts.fields.index('strand')
         tf_strand_idx = tf.fields.index('strand')
         same_strand = lambda x, y: x[ts_strand_idx] == y[tf_strand_idx]
     else:
         same_strand = lambda x, y: True
     Y = []
     ynext = (-sys.maxint, -sys.maxint, 0.0)
     for x in ts:
         xstart = x[0]
         xend = x[1]
         # Load into Y all feature items which intersect score x
         while ynext[0] < xend:
             if ynext[1] > xstart:
                 Y.append(ynext)
             ynext = tf.next()
         # Remove features that are far behind x
         if Y:
             n = 0
             try:
                 while Y[n][1] <= xstart:
                     n += 1
                 Y = Y[n:]
             except IndexError:
                 Y = [ynext]
         # Yield intersections
         for y in Y:
             if not same_strand(x, y): continue
             info = tuple([y[k] for k in info_idx]) if annotate else ()
             if strict and (y[0] > xstart or y[1] < xend): continue
             if y[0] >= xend: continue  # keep for next iteration
             start = xstart if y[0] < xstart else y[0]
             end = xend if y[1] > xend else y[1]
             yield (start, end) + tuple(x[2:]) + info

Пример #7

0

Показать файл

def merge_scores(trackList, method='arithmetic'):
    """
    Creates a stream with per-base average of several score tracks::

        X1: __________666666666______
        X2: _____2222222222__________
        R:  _____11111444443333______

    :param trackList: list of FeatureStream objects.
    :param method: (str) type of average: one of 'arithmetic','geometric', or 'sum' (no average).
    :rtype: FeatureStream
    """
    tracks = [
        FeatureStream(common.sentinelize(x, [sys.maxint] * len(x.fields)),
                      x.fields) for x in trackList
    ]
    tracks = [common.reorder(t, ['start', 'end', 'score']) for t in tracks]
    fields = [
        f for f in tracks[0].fields if all([f in t.fields for t in tracks])
    ]  # common fields
    elements = [list(x.next()) for x in tracks]
    track_denom = 1.0 / len(trackList)

    if hasattr(method, '__call__'):
        mean_fn = lambda scores, denom: method(scores)
    else:
        mean_fn = _score_functions.get(method, _arithmetic_mean)
    for i in xrange(len(tracks) - 1, -1, -1):
        if elements[i][0] == sys.maxint:
            tracks.pop(i)
            elements.pop(i)

    def _stream(tracks):
        while tracks:
            start = min([x[0] for x in elements])
            end = min([x[0] for x in elements if x[0] > start] +
                      [x[1] for x in elements])
            scores = [x[2] for x in elements if x[1] > start and x[0] < end]
            if len(fields) > 3:
                rest = []
                for i in range(len(fields[3:])):
                    r = [
                        str(x[3 + i]) for x in elements if
                        not (x[3 + i] is None) and x[1] > start and x[0] < end
                    ]
                    if all([x == r[0] for x in r]):
                        rest.append(r[0])
                    else:
                        rest.append("|".join(r))
                yield (start, end, mean_fn(scores, track_denom)) + tuple(rest)
            else:
                yield (start, end, mean_fn(scores, track_denom))
            for i in xrange(len(tracks) - 1, -1, -1):
                if elements[i][0] < end:
                    elements[i][0] = end
                if elements[i][1] <= end:
                    elements[i] = list(tracks[i].next())
                if elements[i][0] == sys.maxint:
                    tracks.pop(i)
                    elements.pop(i)

    return FeatureStream(_stream(tracks), fields)

Пример #8

0

Показать файл

def concatenate(trackList, fields=None, remove_duplicates=False, group_by=None, aggregate={}):
    """
    Returns one stream containing all features from a list of tracks, ordered by *fields*.

    :param trackList: list of FeatureStream objects.
    :param fields: (list of str) list of fields to keep in the output (at least ['start','end']).
    :param remove_duplicates: (bool) whether to remove items that are identical in several
        of the tracks in *trackList*. [False]
    :param group_by: (list of str) if specified, elements having all values for these fields in
        common will be merged into a singe element. Other fields are merged according to *aggregate*
        if specified, or `common.generic_merge` by default.
    :aggregate: (dict) for each field name given as a key, its value is the function
        to apply to the vector containing all different values for this field in order to merge them.
        E.g. ``{'score': lambda x: sum(x)}`` will return the sum of all scores in the output.
    :rtype: FeatureStream
    """
    def _find_min(feat_tuple):
        """Return the index of the 'smallest' element amongst a tuple of features from
        different tracks. Priority is given to the first field; if the first field items
        are equal amongst several elements, it looks at the second field, a.s.o."""
        nmin = 0
        xmin = feat_tuple[0]
        for n,x in enumerate(feat_tuple[1:]):
            if x[0] == sys.maxint: continue
            for k in range(len(x)):
                if cmp(hash(x[k]),hash(xmin[k]))<0:
                    xmin = x
                    nmin = n+1
                    break
                elif cmp(hash(x[k]),hash(xmin[k]))>0:
                    break
        return nmin

    def _weave(_t,N):
        """Generator yielding all features represented in a list of tracks *_t*,
        sorted w.r.t the *N* first fields."""
        current = [x.next()[:N] for x in _t] # init
        allfields = [t.fields for t in _t]
        n = _find_min(current)
        last = current[n]
        current[n] = _t[n].next()[:N]
        if not group_by: yield last
        while 1:
            # Remove duplicates
            if remove_duplicates:
                while not all([current.count(x)==1 for x in current]):
                    for k in range(len(current)):
                        if current.count(current[k]) > 1:
                            current[k] = _t[k].next()[:N]
            n = _find_min(current)
            if current[n][0] == sys.maxint: break
            if group_by:
                idx = [allfields[n].index(f) for f in group_by]
                if all(current[n][i] == last[i] for i in idx):
                    last = tuple(current[n][i] if i in idx \
                            else aggregate.get(allfields[n][i],common.generic_merge)((last[i],current[n][i])) \
                            for i in range(len(allfields[n]))) # merge last and current
                else:
                    yield last
                    last = current[n]
            else:
                yield current[n]
            current[n] = _t[n].next()[:N]
        if group_by: yield last

    if len(trackList) == 1: return trackList[0]
    if fields is None:
        fields = trackList[0].fields
    fields = [f for f in fields if all(f in t.fields for t in trackList)]
    _of = ['start','end']
    if 'chr' in fields: _of = ['chr']+_of
    if 'name' in fields: _of += ['name']
    _of += [f for f in fields if not(f in _of)]
    tl = [common.reorder(t,_of) for t in trackList]
    tl = [FeatureStream(common.sentinelize(x,(sys.maxint,)*len(x.fields)),x.fields) for x in tl]
    return FeatureStream(_weave(tl,len(_of)),fields=_of)

Пример #9

0

Показать файл

def _combine(trackList,fn,win_size,aggregate):
    """Generator - see function `combine` below."""
    N = len(trackList)
    fields = trackList[0].fields
    trackList = [common.sentinelize(t, [sys.maxint]*len(t.fields)) for t in trackList]
    init = [trackList[i].next() for i in range(N)] # the first element of each track
    activity = [False]*N # a vector of boolean values for the N tracks at a given position
    z = [None]*N
    # If there are empty tracks, remove them, and their index from init
    for i in xrange(N-1,-1,-1):
        if init[i][0] == sys.maxint:
            N-=1
            trackList.pop(i)
            init.pop(i)
    if N == 0: return
    available_tracks = range(N-1,-1,-1)
    # Sort starts and ends of all init elements indifferently; record the origin track index.
    current = [(init[i][0],i)+init[i][2:] for i in range(N)]+[(init[i][1],i) for i in range(N)]
    current.sort()

    # Init step: set all tracks beginning at the starting point as 'active'
    is_chr = 'chr' in fields
    if is_chr:
        empty = (current[0][2],)+('0',)*len(fields[3:]) # write chr name if a region has no other annotation
    else:
        empty = ('0',)*len(fields[2:])
    start = current[0][0]
    while current[0][0] == start:
        i = current[0][1]          # track index
        activity[i] = True         # set this track to 'active'
        z[i] = current.pop(0)[2:]  # z records all meta info

    k=1
    while available_tracks or current:
        # Load all elements within *win_size* bp in *current*
        to_remove = []
        limit = k * win_size
        while current[0][0] >= limit:
            k+=1
            limit = k * win_size
        for i in available_tracks:
            a = [0,0]
            while a[1] < limit:
                a = trackList[i].next()
                if a[0] == sys.maxint:  # track i is completely read:
                    to_remove.append(i) # remove it from the tracks list
                else:
                    current.append((a[0],i)+a[2:])
                    current.append((a[1],i))
        for i in to_remove:
            available_tracks.remove(i)
        if not current: continue
        current.sort()
        # Calculate boolean values for this window
        while current and current[0][0] < limit:
            next = current[0][0]
            if fn(activity):
                feat_aggreg = [None]*len(fields[2:])
                for n,f in enumerate(fields[2:]):
                    feats = tuple(zi[n] for zi in z if zi)
                    try: feat_aggreg[n] = aggregate.get(f,common.generic_merge)(feats)
                    except IndexError: feat_aggreg = empty
                yield (start,next) + tuple(feat_aggreg)
            while current and current[0][0] == next:
                i = current[0][1]               # track index
                activity[i] = not(activity[i])  # reverse activity
                zi = current.pop(0)[2:]         # record meta info
                z[i] = zi if activity[i] else None
            start = next
        k+=1

Пример #10

0

Показать файл

Файл: test_gfminer.py Проект: JoseEspinosa/bbcflib

 def test_sentinelize(self):
     stream = fstream([(10,12,0.5), (14,15,1.2)], fields=['start','end','score'])
     stream = sentinelize(stream,'Z')
     for y in stream: x = y
     self.assertEqual(x,'Z')

Пример #11

0

Показать файл

 def _get_feature(_t, _a):
     F = []
     _a = common.sentinelize(_a, [sys.maxint] * len(_a.fields))
     for peak in _t:
         distMinBefore = distMinAfter = thresholdInter + 1
         gene = dist = typeLoc = ""
         geneBefore = geneAfter = strandBefore = strandAfter = None
         included = 0
         # keep only genes which don't start too far
         for annot in _a:
             F.append(annot)
             if annot[0] > peak[1] + thresholdInter: break
         # remove genes that end too far
         fpop = -1  # always keep one gene before
         for annot in F:
             if annot[1] > peak[0] - thresholdInter: break
             fpop += 1
         if fpop > 0: F = F[fpop:]
         for annot in F:
             # if the peak is totally included in the gene
             if (peak[0] >= annot[0]) and (annot[1] >= peak[1]):
                 includedGene = annot[2]
                 includedDist = (
                     annot[3]
                     == -1) and annot[1] - peak[1] or peak[0] - annot[0]
                 included = 1
             # if the gene is totally included in the peak
             elif (annot[0] > peak[0]) and (peak[1] > annot[1]):
                 includedGene = annot[2]
                 includedDist = 0
                 included = 1
             else:
                 # if annot is not too far 3' and no intersection
                 if 0 < (peak[0] - annot[1]) < distMinBefore:
                     distMinBefore = peak[0] - annot[1]
                     geneBefore = annot[2]
                     strandBefore = annot[3]
                 # if intersection (annot is before)
                 elif annot[0] < peak[0] < annot[1]:
                     distMinBefore = 0
                     geneBefore = annot[2]
                     strandBefore = annot[3]
                     #print "gene %s overlaps begin of peak %s" % (geneBefore,peakName)
                 # if annot is not too far 5' and no intersection
                 if 0 < (annot[0] - peak[1]) < distMinAfter:
                     distMinAfter = annot[0] - peak[1]
                     geneAfter = annot[2]
                     strandAfter = annot[3]
                 # if intersection (annot is after)
                 elif annot[0] < peak[1] < annot[1]:
                     distMinAfter = 0
                     geneAfter = annot[2]
                     strandAfter = annot[3]
                     #print "gene %s overlaps end of peak %s" % (geneAfter,peakName)
         # detect intergenic peak
         if not (
                 included
         ) and distMinBefore > thresholdInter and distMinAfter > thresholdInter:
             yield peak + ('', 'Intergenic', thresholdInter)
             continue
         # detect peak before the first or after the last gene on the chromosome
         if geneBefore == None:
             if distMinAfter <= thresholdInter:
                 gene = geneAfter
                 dist = distMinAfter
                 typeLoc = (strandAfter == 1) and "Upstream" or "Downstream"
         elif geneAfter == None:
             if distMinBefore <= thresholdInter:
                 gene = geneBefore
                 dist = distMinBefore
                 typeLoc = (strandBefore
                            == -1) and "Upstream" or "Downstream"
         # detect peak between two genes on the same strand
         elif strandBefore == strandAfter:
             if strandBefore == 1:
                 if thresholdUTR * distMinAfter > 100 * distMinBefore:
                     gene = geneBefore
                     dist = distMinBefore
                     if distMinAfter < thresholdPromot:
                         typeLoc = "3UTR"
                     else:
                         typeLoc = "Downstream"
                 else:
                     gene = geneAfter
                     dist = distMinAfter
                     if dist < thresholdPromot:
                         typeLoc = "Promot"
                     else:
                         typeLoc = "Upstream"
             else:
                 if thresholdUTR * distMinBefore > 100 * distMinAfter:
                     gene = geneAfter
                     dist = distMinAfter
                     if distMinBefore < thresholdPromot:
                         typeLoc = "3UTR"
                     else:
                         typeLoc = "Downstream"
                 else:
                     gene = geneBefore
                     dist = distMinBefore
                     if dist < thresholdPromot:
                         typeLoc = "Promot"
                     else:
                         typeLoc = "Upstream"
         # detect peak between two genes on different strands
         else:
             # detect peak between 2 promoters
             if strandBefore == -1:
                 typeLoc = "Upstream"
                 if distMinBefore < distMinAfter:
                     gene = geneBefore
                     dist = distMinBefore
                     if dist < thresholdPromot:
                         typeLoc = "Promot"
                         if distMinAfter < thresholdPromot:
                             typeLoc += "_Promot"
                             gene += "_" + geneAfter
                             dist = str(dist) + "_" + str(distMinAfter)
                 else:
                     gene = geneAfter
                     dist = distMinAfter
                     if dist < thresholdPromot:
                         typeLoc = "Promot"
                         if distMinBefore < thresholdPromot:
                             typeLoc += "_Promot"
                             gene += "_" + geneBefore
                             dist = str(dist) + "_" + str(distMinBefore)
             # detect peak between 2 3UTR
             else:
                 typeLoc = "Downstream"
                 # detect peak overlapping the 2 3UTR
                 if distMinBefore == distMinAfter:
                     if thresholdUTR * thresholdPromot > 100 * distMinBefore:
                         typeLoc = "3UTR"
                     typeLoc += "_" + typeLoc
                     gene = geneBefore + "_" + geneAfter
                     dist = str(distMinBefore) + "_" + str(distMinAfter)
                 elif distMinBefore < distMinAfter:
                     dist = distMinBefore
                     gene = geneBefore
                     if thresholdUTR * thresholdPromot > 100 * dist:
                         typeLoc = "3UTR"
                 else:
                     dist = distMinAfter
                     gene = geneAfter
                     if thresholdUTR * thresholdPromot > 100 * dist:
                         typeLoc = "3UTR"
         if included == 1:
             gene += "_" + includedGene if gene else includedGene
             dist = str(dist)
             dist = dist + "_" + str(includedDist) if dist else str(
                 includedDist)
             typeLoc += "_Included" if typeLoc else "Included"
         yield peak + (gene, typeLoc, dist)

Пример #12

0

Показать файл

Файл: annotate.py Проект: JoseEspinosa/bbcflib

 def _get_feature(_t,_a):
     F = []
     _a = common.sentinelize(_a, [sys.maxint]*len(_a.fields))
     for peak in _t:
         distMinBefore = distMinAfter = thresholdInter+1
         gene = dist = typeLoc = ""
         geneBefore = geneAfter = strandBefore = strandAfter = None
         included = 0
         # keep only genes which don't start too far
         for annot in _a:
             F.append(annot)
             if annot[0] > peak[1]+thresholdInter: break
         # remove genes that end too far
         fpop = -1 # always keep one gene before
         for annot in F:
             if annot[1] > peak[0]-thresholdInter: break
             fpop += 1
         if fpop>0: F = F[fpop:]
         for annot in F:
             # if the peak is totally included in the gene
             if (peak[0]>=annot[0]) and (annot[1]>=peak[1]):
                 includedGene = annot[2]
                 includedDist = (annot[3] == -1) and annot[1]-peak[1] or peak[0]-annot[0]
                 included = 1
             # if the gene is totally included in the peak
             elif (annot[0]>peak[0]) and (peak[1]>annot[1]):
                 includedGene = annot[2]
                 includedDist = 0
                 included = 1
             else:
                 # if annot is not too far 3' and no intersection
                 if 0 < (peak[0]-annot[1]) < distMinBefore:
                     distMinBefore = peak[0]-annot[1]
                     geneBefore = annot[2]
                     strandBefore = annot[3]
                 # if intersection (annot is before)
                 elif annot[0] < peak[0] < annot[1]:
                     distMinBefore = 0
                     geneBefore = annot[2]
                     strandBefore = annot[3]
                     #print "gene %s overlaps begin of peak %s" % (geneBefore,peakName)
                 # if annot is not too far 5' and no intersection
                 if 0 < (annot[0]-peak[1]) < distMinAfter:
                     distMinAfter = annot[0]-peak[1]
                     geneAfter = annot[2]
                     strandAfter = annot[3]
                 # if intersection (annot is after)
                 elif annot[0] < peak[1] < annot[1]:
                     distMinAfter = 0
                     geneAfter = annot[2]
                     strandAfter = annot[3]
                     #print "gene %s overlaps end of peak %s" % (geneAfter,peakName)
         # detect intergenic peak
         if not(included) and distMinBefore > thresholdInter and distMinAfter > thresholdInter:
             yield peak+('','Intergenic',thresholdInter)
             continue
         # detect peak before the first or after the last gene on the chromosome
         if geneBefore == None:
             if distMinAfter <= thresholdInter:
                 gene = geneAfter
                 dist = distMinAfter
                 typeLoc = (strandAfter == 1) and  "Upstream" or "Downstream"
         elif geneAfter == None:
             if distMinBefore <= thresholdInter:
                 gene = geneBefore
                 dist = distMinBefore
                 typeLoc = (strandBefore == -1) and  "Upstream" or "Downstream"
         # detect peak between two genes on the same strand
         elif strandBefore == strandAfter:
             if strandBefore == 1:
                 if thresholdUTR*distMinAfter > 100*distMinBefore:
                     gene = geneBefore
                     dist = distMinBefore
                     if distMinAfter < thresholdPromot:
                         typeLoc = "3UTR"
                     else:
                         typeLoc = "Downstream"
                 else:
                     gene = geneAfter
                     dist = distMinAfter
                     if dist < thresholdPromot:
                         typeLoc = "Promot"
                     else:
                         typeLoc = "Upstream"
             else:
                 if thresholdUTR*distMinBefore > 100*distMinAfter:
                     gene = geneAfter
                     dist = distMinAfter
                     if distMinBefore < thresholdPromot:
                         typeLoc = "3UTR"
                     else:
                         typeLoc = "Downstream"
                 else:
                     gene = geneBefore
                     dist = distMinBefore
                     if dist < thresholdPromot:
                         typeLoc = "Promot"
                     else:
                         typeLoc = "Upstream"
         # detect peak between two genes on different strands
         else:
             # detect peak between 2 promoters
             if strandBefore == -1:
                 typeLoc = "Upstream"
                 if distMinBefore < distMinAfter:
                     gene = geneBefore
                     dist = distMinBefore
                     if dist < thresholdPromot:
                         typeLoc = "Promot"
                         if distMinAfter < thresholdPromot:
                             typeLoc += "_Promot"
                             gene += "_"+geneAfter
                             dist = str(dist)+"_"+str(distMinAfter)
                 else:
                     gene = geneAfter
                     dist = distMinAfter
                     if dist < thresholdPromot:
                         typeLoc = "Promot"
                         if distMinBefore < thresholdPromot:
                             typeLoc += "_Promot"
                             gene += "_"+geneBefore
                             dist = str(dist)+"_"+str(distMinBefore)
             # detect peak between 2 3UTR
             else:
                 typeLoc = "Downstream"
                 # detect peak overlapping the 2 3UTR
                 if distMinBefore == distMinAfter:
                     if thresholdUTR*thresholdPromot > 100*distMinBefore:
                         typeLoc = "3UTR"
                     typeLoc += "_"+typeLoc
                     gene = geneBefore+"_"+geneAfter
                     dist = str(distMinBefore)+"_"+str(distMinAfter)
                 elif distMinBefore < distMinAfter:
                     dist = distMinBefore
                     gene = geneBefore
                     if thresholdUTR*thresholdPromot > 100*dist:
                         typeLoc = "3UTR"
                 else:
                     dist = distMinAfter
                     gene = geneAfter
                     if thresholdUTR*thresholdPromot > 100*dist:
                         typeLoc = "3UTR"
         if included == 1:
             gene += "_"+includedGene if gene else includedGene
             dist = str(dist)
             dist = dist+"_"+str(includedDist) if dist else str(includedDist)
             typeLoc += "_Included" if typeLoc else "Included"
         yield peak+(gene,typeLoc,dist)

Python sentinelize примеры использования