Exemplo n.º 1
0
class AslrOracle:
  def __init__(self):
    self.queries = 0

    self.InitCache()

  def CheckAddress(self, address):
    return self.CheckRange(address, 0x1000)

  def InitCache(self):
    self.cached_queries = 0
    self.good_regions = IntervalTree()
    self.bad_regions = IntervalTree()

  def InsertToCache(self, start, end, valid):
    if valid:
      self.good_regions.add(Interval(start, end + 1))
      self.good_regions.merge_overlaps()
    else:
      self.bad_regions.add(Interval(start, end))

  def CheckCache(self, start, end):
    good_overlaps = self.good_regions.overlap(start, end)
    for overlap in good_overlaps:
      if (overlap[0] <= start) and (overlap[1] >= end):
        self.cached_queries += 1
        return True

    bad_overlaps = self.bad_regions.envelop(start, end)
    if len(bad_overlaps) > 0:
      self.cached_queries += 1
      return False

    return None
Exemplo n.º 2
0
class SimpleDnMedium(DnMedium):
    def __init__(self) -> None:
        self.msgs = IntervalTree()

    def add_dn(self, msg: LoraMsg) -> None:
        t0 = Simulation.time2ticks(msg.xbeg)
        t1 = t0 + Simulation.time2ticks(msg.tpreamble())
        self.msgs[t0:t1] = msg

    @staticmethod
    def overlap(i1: Interval, i2: Interval) -> int:
        return min(i1.end, i2.end) - max(i1.begin, i2.begin)  # type: ignore

    def get_dn(self,
               rxon: int,
               rxtout: int,
               freq: int,
               rps: int,
               nsym: int = 4) -> Optional[LoraMsg]:
        rxw = Interval(rxon, rxon + rxtout)
        tpn = Simulation.time2ticks(LoraMsg.symtime(rps, nsym))
        for i in self.msgs.overlap(rxw[0], rxw[1]):
            m = i.data  # type: LoraMsg
            if m.match(freq, rps) and SimpleDnMedium.overlap(i, rxw) >= tpn:
                break
        else:
            return None
        self.msgs.remove(i)
        return m

    def prune(self, ticks: int) -> None:
        exp = self.msgs.envelop(0, ticks)
        if exp:
            self.msgs.remove_envelop(0, ticks)
        return exp
Exemplo n.º 3
0
def find_candidate(Interval_list,
                   window=10,
                   min_primary=0,
                   min_support=0,
                   secondary_thres=0.0,
                   primary_thres=1.0):
    '''
    Find candidate exon boundary (i.e. intron boundary) within a given range.
    Parameter:
        begin:
            start (left-most) position of the range to be searched (0-based)
        end:
            end (right-most) possition of the range to be searched (0-based)
        tree:
            IntervalTree containing all boundary pairs 
        window: 
            window size for group surrounding boundaries (difference 
            of boundary in each size of the intron will be grouped together if 
            the absolute difference < window size)
        min_support:
            The best supported boundary need will be included only when the num
            of support reaches the minimum
        secondary_thres:
            only the junctions with multiple well supported boundary will
            be included. Well supported junction is defined as 
            secondary_thres * support num of the most supported boundary.
    '''
    # get boundaries with in searching window, sorted by the number of support
    intervals_tree = IntervalTree()
    for interval in Interval_list:
        intervals_tree.addi(interval.begin, interval.end, interval.data)

    candidate_boundaries = []
    while intervals_tree:
        interval = max(intervals_tree, key=lambda x: x.data)
        best_support = interval.data
        if interval.data < min_primary:  # lower bound of the support
            return candidate_boundaries

        #candidate_boundaries.append(interval)
        intervals_tree.remove(interval)

        # include surrounding boundaries
        enveloped_interval = intervals_tree.envelop(interval.begin - window,
                                                    interval.end + window)
        neighbour_found = []
        for i in enveloped_interval:
            if i.begin <= interval.begin + window and \
                    i.end >= interval.end - window:
                if i.data > secondary_thres * best_support:
                    neighbour_found.append((interval, i))
                intervals_tree.remove(i)
        if neighbour_found:
            neighbour_found.append((interval, interval))
            count = sum([x.data for y, x in neighbour_found])
            if count >= min_support and best_support / count <= primary_thres:
                candidate_boundaries += neighbour_found
    return candidate_boundaries
Exemplo n.º 4
0
def original_print():
    it = IntervalTree()
    it.addi(1, 3, "dude")
    it.addi(2, 4, "sweet")
    it.addi(6, 9, "rad")
    for iobj in it:
        print(it[iobj.begin, iobj.end])  # set(), should be using :

    for iobj in it:
        print(it.envelop(iobj.begin, iobj.end))
Exemplo n.º 5
0
class SimpleMedium(Medium):
    def __init__(self, put_up: Optional[Callable[[LoraMsg], None]]) -> None:
        self._put_up = put_up
        self.msgs = IntervalTree()

    def reset_medium(self) -> None:
        self.msgs.clear()

    def add_dn(self, msg: LoraMsg) -> None:
        t0 = Simulation.time2ticks(msg.xbeg)
        t1 = t0 + Simulation.time2ticks(msg.tpreamble())
        self.msgs[t0:t1] = msg

    @staticmethod
    def overlap(i1: Interval, i2: Interval) -> int:
        return min(i1.end, i2.end) - max(i1.begin, i2.begin)  # type: ignore

    def get_dn(self,
               rxon: int,
               rxtout: int,
               freq: int,
               rps: int,
               nsym: int = 4,
               peek=False) -> Optional[LoraMsg]:
        rxw = Interval(rxon, rxon + rxtout)
        tpn = Simulation.time2ticks(LoraMsg.symtime(rps, nsym))
        for i in self.msgs.overlap(rxw[0], rxw[1]):
            m = i.data  # type: LoraMsg
            if m.match(freq, rps) and (peek
                                       or SimpleMedium.overlap(i, rxw) >= tpn):
                break
        else:
            return None
        if not peek:
            self.msgs.remove(i)
        return m

    def prune(self, ticks: int) -> List[LoraMsg]:
        exp = cast(List[Interval], self.msgs.envelop(0, ticks))
        if exp:
            self.msgs.remove_envelop(0, ticks)
        return [iv[2] for iv in exp]
Exemplo n.º 6
0
def test_empty_queries():
    t = IntervalTree()
    e = set()

    assert len(t) == 0
    assert t.is_empty()
    assert t[3] == e
    assert t[4:6] == e
    assert t.begin() == 0
    assert t.end() == 0
    assert t[t.begin():t.end()] == e
    assert t.overlap(t.begin(), t.end()) == e
    assert t.envelop(t.begin(), t.end()) == e
    assert t.items() == e
    assert set(t) == e
    assert set(t.copy()) == e
    assert t.find_nested() == {}
    assert t.range().is_null()
    assert t.range().length() == 0
    t.verify()
Exemplo n.º 7
0
def test_empty_queries():
    t = IntervalTree()
    e = set()

    assert len(t) == 0
    assert t.is_empty()
    assert t[3] == e
    assert t[4:6] == e
    assert t.begin() == 0
    assert t.end() == 0
    assert t[t.begin():t.end()] == e
    assert t.overlap(t.begin(), t.end()) == e
    assert t.envelop(t.begin(), t.end()) == e
    assert t.items() == e
    assert set(t) == e
    assert set(t.copy()) == e
    assert t.find_nested() == {}
    assert t.range().is_null()
    assert t.range().length() == 0
    t.verify()
Exemplo n.º 8
0
class AmpliconSet:
    def __init__(
        self,
        name,
        amplicons,
        tolerance=5,
        shortname=None,
    ):
        """AmpliconSet supports various membership operations"""
        if not shortname:
            # base-54 hash
            self.shortname = chr(((sum(map(ord, name)) - ord("A")) % 54) + 65)
        self.tree = IntervalTree()
        self.name = name
        self.seqs = {}
        self.amplicons = amplicons
        self.amplicon_ids = {}

        primer_lengths = set()
        sequences = {}
        for amplicon_name in amplicons:
            amplicon = amplicons[amplicon_name]
            self.amplicon_ids[amplicon.shortname] = amplicon

            for primer in amplicon.left:
                sequences[primer.seq] = amplicon
                primer_lengths.add(len(primer.seq))
            for primer in amplicon.right:
                # note: you may want to reverse complement this
                sequences[primer.seq] = amplicon
                primer_lengths.add(len(primer.seq))

            # interval containment tolerance
            start = amplicon.start - tolerance
            end = amplicon.end + tolerance
            self.tree[start:end] = amplicon

        self.min_primer_length = min(primer_lengths)
        # the internal sequences table allows lookup by primer sequence
        for k, v in sequences.items():
            self.seqs[k[:self.min_primer_length]] = v

    def __eq__(self, other):
        return type(other) is type(self) and self.__dict__ == other.__dict__

    @classmethod
    def from_json(cls, fn, tolerance=5):
        raise NotImplementedError

    @classmethod
    def from_tsv(cls, fn, name=None, **kwargs):
        amplicons = {}
        required_cols = {
            "Amplicon_name",
            "Primer_name",
            "Left_or_right",
            "Sequence",
            "Position",
        }
        n = 0
        with open(fn) as f:
            reader = csv.DictReader(f, delimiter="\t")
            missing_cols = required_cols.difference(set(reader.fieldnames))
            if len(missing_cols) > 0:
                missing_cols = ",".join(sorted(list(missing_cols)))
                raise Exception(
                    f"Amplicon scheme TSV missing these columns: {missing_cols}. Got these columns: {reader.fieldnames}"
                )

            for d in reader:
                if d["Amplicon_name"] not in amplicons:
                    amplicons[d["Amplicon_name"]] = Amplicon(
                        d["Amplicon_name"], shortname=n)
                    n += 1

                left = d["Left_or_right"].lower() == "left"
                # We assume that primer is always left+forward, or right+reverse
                forward = left
                pos = int(d["Position"])
                primer = Primer(d["Primer_name"], d["Sequence"], left, forward,
                                pos)
                amplicons[d["Amplicon_name"]].add(primer)

        name = fn if not name else name
        return cls(name, amplicons, **kwargs)

    def match(self, start, end):
        """Identify a template's mapped interval based on the start and end
        positions

        returns a set of matching amplicons
        """

        # amplicons which contain the start and end
        hits = self.tree[start].intersection(self.tree[end])

        # amplicons contained by the start and end
        # this should never happen in tiled amplicons
        enveloped = self.tree.envelop(start, end)

        if enveloped:
            return None

        if len(hits) == 0:
            return None
        elif len(hits) > 2:
            # there should not be any more than 2 ambiguous matches under any
            # known primer set. The interval tree can confirm this at the time
            # the primer set is parsed
            raise Exception
        else:
            return [hit.data for hit in hits]

    def get_tags(self, read):
        pass

    def set_tags(self, read):
        pass
Exemplo n.º 9
0
class Protein:
    """
	We will represent a protein as its domains
	3 ways:
	1) protein can have overlapping domains
	2) protein has only no-overlapping domains
	3) protein has known length so gap domain can be added
	"""
    def __init__(self,
                 with_overlap,
                 with_redundant,
                 with_gap,
                 hit_line="",
                 proteins_id_len="",
                 interpro_local_format=False):
        """
		Protein Class init

		Parameters
		----------
		with_overlap : bool
			output overlapping domain annotation (True), otherwise not overlapping domain annotation will be created (False)
		with_redundant : bool
			if with_overlap is False then create non overlapping (but possibly redundant) domains (True),
			otherwise create non overlapping and non redundant domain annotation (False)
		with_gap : bool
			add GAP domain for each protein subsequence >30 amino acids without domain hit (True),
			otherwise don't add GAP domain (False)
		hit_line : str
			domain hits line
		proteins_id_len :  file
			proteins id length file handle
		interpro_local_format : bool
			preprocess output format produced by local interproscan run (True),
			otherwise preprocess Interpro downloaded protein2ipr format (False)

		Returns
		-------
		None
		"""
        self.with_overlap = with_overlap
        self.with_redundant = with_redundant
        self.with_gap = with_gap
        self.domain_interval_tree = IntervalTree()
        self.domains_with_gaps = []
        self.gap_min_size = 30
        self.length = 0
        self.interpro_exist_all_domains = []
        if hit_line != "":
            if interpro_local_format:  # interpro local run format
                # get the interpro annotation of protein line based on:
                # https://github.com/ebi-pf-team/interproscan/wiki/OutputFormats
                assert len(
                    hit_line.split("\t")
                ) >= 11, "AssertionError: line: {} has less than 11 tabs.".format(
                    hit_line)
                self.uniprot_id = self.get_uniprot_id(hit_line)
                self.domains = {}
                self.add_domain(hit_line)
                if with_gap:
                    self.length = int(hit_line.split("\t")[2])
                    assert self.length > 0, "AssertionError: protein with id {} has length <=0.".format(
                        self.length)
            else:  # prot2ipr format
                assert isinstance(
                    hit_line, str
                ), "AssertionError: Input of protein should be a String line."
                hit_line = hit_line.strip()
                self.uniprot_id = self.get_uniprot_id(hit_line)
                self.domains = {}
                self.add_domain(hit_line)
                if with_gap:
                    self.length = self.get_prot_length(proteins_id_len)
                    assert self.length > 0, "AssertionError: protein with id {} has length <= 0.".format(
                        self.length)
        else:
            self.uniprot_id = ""

    def get_prot_length(self, proteins_id_len):
        """
		Get protein length

		Parameters
		----------
		proteins_id_len : file
			protein id length file handle

		Returns
		-------
		prot_len : int
			protein length
		"""
        prot_len = -1
        prot_found = False
        try:
            while prot_found == False:
                prot_id_len = next(proteins_id_len)
                # print("current len:{}".format(prot_id_len))
                if prot_id_len.strip().split("\t")[0] == self.uniprot_id:
                    prot_len = int(prot_id_len.strip().split("\t")[1])
                    prot_found = True

        except (StopIteration):
            print("EOF")
        return prot_len

    @staticmethod
    def get_prot_id(hit_line):
        """
		Get protein id

		Parameters
		----------
		hit_line : str
			domain hit line

		Returns
		-------
		str
			protein id
		"""
        return hit_line.split("\t")[0]

    def get_uniprot_id(self, hit_line):
        """
		Get uniprot id

		Parameters
		----------
		hit_line : str
			domain hit line

		Returns
		-------
		str
			protein id
		"""
        return hit_line.split("\t")[0]

    def add_domain(self, hit_line):
        """
		Add domain in Protein object

		Parameters
		----------
		hit_line : str
			domain hit line

		Returns
		-------
		None
		"""
        if self.with_overlap:
            self.add_overlap(hit_line)
        elif self.with_overlap is False or self.with_redundant:
            self.add_no_overlap(hit_line)

    def add_overlap(self, hit_line):
        """
		Add domain hit in overlapping fashion

		Parameters
		----------
		hit_line : str
			domain hit line
		Returns
		-------
		None
		"""
        domain = Domain(hit_line)
        self.interpro_exist_all_domains.append(domain.interpro_id_exists)
        if domain.end_pos > domain.start_pos:
            # construct start_stop index
            start_stop = str(domain.start_pos) + str(domain.end_pos)
            start_stop = float(start_stop)
            if start_stop not in self.domains:
                self.domains[start_stop] = domain
            else:
                # allow for 100 domain annotations to have the same start and end
                start_stop = start_stop + 0.01
                self.domains[start_stop] = domain

    def add_no_overlap(self, hit_line):
        """
		Add domain in no overlapping fashion

		Parameters
		----------
		hit_line : str
			domain hit

		Returns
		-------
		None
		"""
        domain = Domain(hit_line)
        self.interpro_exist_all_domains.append(domain.interpro_id_exists)
        if domain.end_pos > domain.start_pos:
            self.domain_interval_tree.addi(domain.start_pos, domain.end_pos,
                                           domain)

    def to_tabs(self):
        """
		Convert saved domain hits for a protein to output tabular line

		Parameters
		----------

		Returns
		-------
		str
		"""
        if self.with_overlap:
            # print("Overlap")
            return self.to_tabs_overlap()
        elif self.with_redundant is False:
            # print("No overlap")
            return self.to_tabs_no_overlap()
        else:
            # print("No redundant")
            return self.to_tabs_no_redundant()

    def find_strong_no_overlap_domains(self, parent_domain, already_resolved):
        """
		Find all no strong overlap domains with maximum length
		1) Resolve overlapping domains that overlap for less than 0.99% of their length
		to no strong overlap domains
		No strong overlap: |-----"--|-----"
		Strong overlap: |----"--"--|
		2) Find enveloppe domains
		3) From the rest of the domains, find the one with maximum length

		Parameters
		----------
		parent_domain : str
			anchor domain to start overlapping search
		candidate_overlap_domains : list of str
			list of overlapping domains

		Returns
		-------
		strong_overlap_domains, no_strong_overlap_domains
		lists of strong overlapping domains (resolved), no strong overlapping (not (yet) resolved)
		"""
        envelopped_domains = self.domain_interval_tree.envelop(
            parent_domain.begin, parent_domain.end)
        overlapping_domains = self.domain_interval_tree.overlap(
            parent_domain.begin, parent_domain.end)
        candidate_domains = overlapping_domains - envelopped_domains - already_resolved

        strong_overlap_domains = set()
        no_strong_overlap_domains = set()
        for candidate_domain in list(candidate_domains):
            # As parent has the maximum length, there are two choices:
            # 1) candidate domain is strongly overlapping with the parent => add it to strong_overlap_domains (resolved)
            # 2) candidate domain is no strongly overlapping so => add it to no_strong_overlap_domains (not_resolved)
            candidate_domain_len = candidate_domain.end - candidate_domain.begin + 1
            if candidate_domain.begin >= parent_domain.begin:
                # |---parent---|
                #          |---child---|
                overlap_len = parent_domain.end - candidate_domain.begin + 1
            else:
                #    |---parent---|
                # |---child---|
                overlap_len = candidate_domain.end - parent_domain.begin + 1

            if float(overlap_len
                     ) / candidate_domain_len >= 0.8:  # Strong overlap
                strong_overlap_domains.add(candidate_domain)
                assert candidate_domain.data.length <= parent_domain.data.length, "AssertionError: prot:{} candidate domain {} is longer than parent domain {}".format(
                    self.uniprot_id, candidate_domain.data.evidence_db_id,
                    parent_domain.data.evidence_db_id)
            else:  # no strong overlap
                if candidate_domain.data.interpro_id == parent_domain.data.interpro_id:  # if no strong overlap but the same interpro id take the longest one
                    assert candidate_domain.data.length <= parent_domain.data.length, "AssertionError: prot:{} candidate domain {} is longer than parent domain {}".format(
                        self.uniprot_id, candidate_domain.data.evidence_db_id,
                        parent_domain.data.evidence_db_id)
                    strong_overlap_domains.add(candidate_domain)
                else:
                    no_strong_overlap_domains.add(candidate_domain)

        strong_overlap_domains.update(
            envelopped_domains
        )  # add envelopped domains to strong_overlap domains
        return strong_overlap_domains, no_strong_overlap_domains

    def find_no_redundant_domains(self, parent_domain, already_resolved):
        """
		Find no redundant domains

		Parameters
		----------
		parent_domain : str
			anchor domain to start overlapping search
		already_resolved : set of str
			set of already resolved for redundancy domains
		Returns
		-------
		"""
        overlapping_domains = self.domain_interval_tree.overlap(
            parent_domain.begin, parent_domain.end)
        candidate_domains = overlapping_domains - already_resolved
        redundant_domains = set()
        no_redundant_domains = set()

        for candidate_domain in list(candidate_domains):

            # As parent has the maximum length, there are two choices:
            # 1) candidate domain has the same interpro id => add it to redundant (resolved)
            # 2) candidate domain has not the same interpro id => add it to no redundant (not_resolved)
            if candidate_domain.data.interpro_id == parent_domain.data.interpro_id:
                redundant_domains.add(candidate_domain)
            else:
                no_redundant_domains.add(candidate_domain)

        return redundant_domains, no_redundant_domains

    def find_no_redundant_max_len(self):
        """
		Find all domains that are not redundant (having unique interpro id) and are maximally long

		Parameters
		----------

		Returns
		-------
		list of IntervalTree.node
			list of IntervalTree nodes as the no redundant maximum length domains
		"""
        resolved = set()
        domains_no_redundant_max = []

        domains_len_srt = [domain for domain in self.domain_interval_tree]
        domains_len_srt.sort(key=lambda dom_node: dom_node.data.length,
                             reverse=True)

        for domain_node in domains_len_srt:
            if domain_node not in resolved:
                redundant_domains, no_redundant_domains = self.find_no_redundant_domains(
                    domain_node, resolved)
                domains_no_redundant_max.append(domain_node)
                resolved.update(redundant_domains)
        return domains_no_redundant_max

    def find_no_overlap_max_len(self):
        """
		Find all domains that are not overlapping and are maximally long

		Parameters
		----------

		Returns
		-------
		list of IntervalTree.node
			list of not overlapping maximum length domains
		"""
        resolved = set()
        domains_no_overlap_max = []

        domains_len_srt = [domain for domain in self.domain_interval_tree]
        domains_len_srt.sort(key=lambda dom_node: dom_node.data.length,
                             reverse=True)
        """
		Idea: After sorting the domains by length in descending order, then
		pick each domain and check for 
		envelopped domains -> resolved
		strong overlap domains -> resolved
		no strong overlap domains -> not resolved, the for loop will either add it as max no overlap or as resolved
		"""
        for domain_node in domains_len_srt:
            if domain_node not in resolved:
                strong_overlap_domains, strong_no_overlap_domains = self.find_strong_no_overlap_domains(
                    domain_node, resolved)
                domains_no_overlap_max.append(domain_node.data)

                resolved.update(strong_overlap_domains)
        return domains_no_overlap_max

    def construct_gap_hitline(self, gap_start, gap_stop):
        """
		Construct GAP domain tabular line

		Parameters
		----------
		gap_start : int
			GAP start position in protein amino sequence
		gap_stop : int
			GAP end position in protein amino sequence

		Returns
		-------
		str
			GAP domain tabular line
		"""
        return "\t".join([
            self.uniprot_id, "GAP", "gap", "gap_no_evid",
            str(gap_start),
            str(gap_stop)
        ])

    def add_gaps_no_redundant(self, domains_srt):
        """
		Add GAP domains in no redundant domain annotations

		Parameters
		----------
		domains_srt : list of Domain
			domains sorted per start/end position

		Returns
		-------
		None
		"""
        start_gap = 1
        previous_domain = None  # interval tree node
        is_first_domain = True
        for domain_interval in domains_srt:
            if is_first_domain:  # first domain
                if domain_interval.begin - start_gap + 1 > self.gap_min_size:  # add start GAP
                    assert domain_interval.begin > 1, "AssertionError: Start gap can be added if the very first domain is not starting at 1."
                    self.domains_with_gaps.append(
                        Domain(
                            self.construct_gap_hitline(
                                start_gap, domain_interval.begin - 1)))
                    start_gap = domain_interval.end + 1
                is_first_domain = False
            else:
                # check if the current domain and the previous are overlapping if yes then you can't add a gap
                # if no check the space between them
                overlap_domains = self.domain_interval_tree.overlap(
                    domain_interval.begin, domain_interval.end)
                no_redundant_overlap_domains = overlap_domains.intersection(
                    set(domains_srt))

                if previous_domain not in no_redundant_overlap_domains:  # not overlapping domains => check for space to add a GAP
                    if domain_interval.begin - start_gap + 1 > self.gap_min_size:  # add middle GAP
                        self.domains_with_gaps.append(
                            Domain(
                                self.construct_gap_hitline(
                                    start_gap, domain_interval.begin - 1)))
            # adding gap or no append current domain interval and update start_gap
            self.domains_with_gaps.append(domain_interval.data)
            start_gap = domain_interval.end + 1
            previous_domain = domain_interval

        # To check for end GAP, you should get the maximum end_pos of non redundant domain
        max_end_pos = max([dom.end for dom in domains_srt])
        max_end_pos = max_end_pos + 1
        if self.length - max_end_pos + 1 > self.gap_min_size:
            self.domains_with_gaps.append(
                Domain(self.construct_gap_hitline(start_gap, self.length)))

    def add_gaps(self, domains_srt):
        """
		Add gaps in domain annotations

		Parameters
		----------
		domains_srt : list of Domain
			domain sorted per start/end position

		Returns
		-------
		None
		"""
        start_gap = 1
        for domain in domains_srt:  # check for GAP in the start and middle of the protein
            # |--- --- protein --- ---|
            #     |--dom1--| |--dom2--|
            # |GAP|
            if domain.start_pos - start_gap + 1 > self.gap_min_size:
                self.domains_with_gaps.append(
                    Domain(
                        self.construct_gap_hitline(start_gap,
                                                   domain.start_pos)))
            start_gap = domain.end_pos + 1
            self.domains_with_gaps.append(domain)

        # check for gap in the end of the protein seq
        # |--- --- protein --- ---|
        # |--dom1--| |--dom2--|
        #                     |GAP|
        if self.length - domain.end_pos + 1 > self.gap_min_size:
            self.domains_with_gaps.append(
                Domain(
                    self.construct_gap_hitline(domain.end_pos + 1,
                                               self.length)))

    def to_tabs_no_redundant(self):
        """
		Convert tabular info for protein in no redundant domain annotations (tabular output as well)

		Parameters
		----------

		Returns
		-------
		str
			no redundant domain tabular output line
		"""
        # find no redundant domains with maximum length
        domains_no_redundant_max_len = self.find_no_redundant_max_len()
        # sort by start position
        domains_no_redundant_max_len.sort(key=lambda domain: domain.begin,
                                          reverse=False)
        if self.with_gap:
            self.add_gaps_no_redundant(domains_no_redundant_max_len)
            self.domains_with_gaps.sort(
                key=lambda domain: domain.start_pos,
                reverse=False)  # sort by start position
            domains_no_redundant = " ".join(
                [domain.interpro_id for domain in self.domains_with_gaps])
            domains_evidence_db_ids = " ".join(
                [domain.evidence_db_id for domain in self.domains_with_gaps])
        else:
            domains_no_redundant = " ".join([
                domain.data.interpro_id
                for domain in domains_no_redundant_max_len
            ])
            domains_evidence_db_ids = " ".join([
                domain.data.evidence_db_id
                for domain in domains_no_redundant_max_len
            ])

        return self.uniprot_id + "\t" + domains_no_redundant + "\t" + domains_evidence_db_ids + "\n"

    def to_tabs_no_overlap(self):
        """
		Convert tabular info for protein in no overlapping domain annotations (tabular output as well)

		Parameters
		----------

		Returns
		-------
		str
			no overlapping domain tabular output line
		"""
        # find non overlaping domains with maximum length
        domains_no_overlap_max_len = self.find_no_overlap_max_len()
        # sort by start position
        domains_no_overlap_max_len.sort(key=lambda domain: domain.start_pos,
                                        reverse=False)
        if self.with_gap:
            self.add_gaps(domains_no_overlap_max_len)
            domains_no_overlap = " ".join(
                [domain.interpro_id for domain in self.domains_with_gaps])
            domains_evidence_db_ids = " ".join(
                [domain.evidence_db_id for domain in self.domains_with_gaps])
        else:
            domains_no_overlap = " ".join(
                [domain.interpro_id for domain in domains_no_overlap_max_len])
            domains_evidence_db_ids = " ".join([
                domain.evidence_db_id for domain in domains_no_overlap_max_len
            ])
        return self.uniprot_id + "\t" + domains_no_overlap + "\t" + domains_evidence_db_ids + "\n"

    def to_tabs_overlap(self):
        """
		Convert tabular info for protein in overlapping domain annotations (tabular output as well)

		Parameters
		----------

		Returns
		-------
		str
			overlapping domain tabular output line
		"""
        # for gaps you shall give a list out of the sorted dictionary sorted(self.domains)
        if self.with_gap:
            self.add_gaps([
                self.domains[start_stop]
                for start_stop in sorted(self.domains.keys())
            ])
            domains_overlap = " ".join(
                [domain.interpro_id for domain in self.domains_with_gaps])
            domains_evid_db_ids = " ".join(
                [domain.evidence_db_id for domain in self.domains_with_gaps])
        else:
            domains_overlap = " ".join([
                self.domains[start_stop].interpro_id
                for start_stop in self.domains
            ])
            domains_evid_db_ids = " ".join([
                self.domains[start_stop].evidence_db_id
                for start_stop in self.domains
            ])
        return self.uniprot_id + "\t" + domains_overlap + "\t" + domains_evid_db_ids + "\n"