Пример #1
0
    def _make_pbs_sequence(nuclease, reference, pbs_min_length, pbs_max_length,
                           cloning_strategy, cloning_options, **options):
        """Find a suggested PBS length, and generate all possible PBS candidate lengths.

        Selects the shortest PBS sequence with a GC content in the range [0.4,0.6].
        If no sequence is within this range, selects the shortest PBS with a GC content closest to 0.5.
        """
        cloning_strategy = nuclease.get_cloning_strategy(cloning_strategy)
        pbs_length = pbs_min_length - 1
        lengths = []
        while pbs_length < pbs_max_length:
            pbs_length += 1
            pbs = reference[-pbs_length:]
            if not cloning_strategy.can_express(reverse_complement(pbs), **
                                                cloning_options):
                continue
            if 0.4 <= gc(pbs) <= 0.6:
                break
            lengths.append((abs(0.5 - gc(pbs)), len(pbs), pbs))
        else:
            try:
                pbs = sorted(lengths, key=lambda x: x[:1])[0][2]
            except IndexError:
                pbs = reference[-pbs_min_length:]

        # Create all possible PBS sequences within range limits.
        alt_lengths = [
            reference[-pbs_length:]
            for pbs_length in range(pbs_min_length, pbs_max_length + 1)
        ]
        alt_lengths = [
            seq for seq in alt_lengths if cloning_strategy.can_express(
                reverse_complement(seq), **cloning_options)
        ]
        return pbs, alt_lengths
Пример #2
0
    def make_extension_sequence(cls, nuclease, reference_sequence,
                                altered_sequence, spacer_strand,
                                spacer_cut_site, cut_dist, alteration_length,
                                pbs_min_length, pbs_max_length, rt_min_length,
                                rt_max_length, cloning_strategy,
                                cloning_options, nuclease_options, **options):
        """Create the pegRNA extension sequence.

        pegRNA extension sequences consist of a PBS and a RT template.
        The PBS is upstream of the cut site. The RT template is downstream of the cut site and contains the edit sequence.

        """
        nucleotide_difference = len(altered_sequence) - len(reference_sequence)
        if spacer_strand == 1:
            nucleotide_difference = min(0, nucleotide_difference)
            pbs_reference = reference_sequence[:spacer_cut_site]
            rt_reference = altered_sequence[spacer_cut_site:]
        else:
            pbs_reference = reverse_complement(
                reference_sequence[spacer_cut_site:])
            rt_reference = reverse_complement(
                altered_sequence[:spacer_cut_site + nucleotide_difference])
        pbs, pbs_lengths = cls._make_pbs_sequence(nuclease,
                                                  pbs_reference.upper(),
                                                  pbs_min_length,
                                                  pbs_max_length,
                                                  cloning_strategy,
                                                  cloning_options, **options)
        rt, rt_lengths = cls._make_rt_sequence(
            nuclease, rt_reference, cut_dist, nucleotide_difference,
            alteration_length, rt_min_length, rt_max_length, cloning_strategy,
            cloning_options, nuclease_options, **options)
        pbs_length = len(pbs)
        rt_length = len(rt)

        # Generate all combinations of PBS and RT template sequences that are not identical to the primary suggestion.
        alternate_extensions = []
        for alt_pbs in pbs_lengths:
            alt_pbs_length = len(alt_pbs)
            alt_pbs_gc = round(gc(alt_pbs), 2)
            for alt_rt in rt_lengths:
                alt_rt_length = len(alt_rt)
                alt_rt_gc = round(gc(alt_rt), 2)
                if alt_pbs_length == pbs_length and alt_rt_length == rt_length:
                    continue
                alternate_extensions.append({
                    'pbs_length':
                    alt_pbs_length,
                    'rt_template_length':
                    alt_rt_length,
                    'sequence':
                    reverse_complement(alt_pbs + alt_rt),
                    'pbs_gc':
                    alt_pbs_gc,
                    'rt_gc':
                    alt_rt_gc
                })

        return pbs_length, rt_length, reverse_complement(
            pbs + rt), alternate_extensions
Пример #3
0
    def _make_extension_sequence(self, **options):
        """Make the extension sequence for the pegRNA.

        Also creates visual_extension, which can be used by the frontend for visualization.
        """

        reference_sequence = self.tracker.original_sequence
        altered_sequence = str(self.tracker)
        alteration_position = sum(self.tracker.alterations[0])
        if alteration_position >= self.spacer_cut_site:
            cut_dist = alteration_position + self.tracker.alteration_length - self.spacer_cut_site
        else:
            cut_dist = self.spacer_cut_site - alteration_position

        if self.repair:
            reference_sequence, altered_sequence = altered_sequence, reference_sequence
        pbs_length, rt_template_length, extension, alternate_extensions = self.design_strategy.make_extension_sequence(
            self.nuclease,
            reference_sequence,
            altered_sequence,
            self.spacer_strand,
            self.spacer_cut_site,
            cut_dist,
            self.tracker.number_of_alterations,
            cloning_strategy=self.cloning_strategy,
            cloning_options=self.cloning_options,
            nuclease_options=self.nuclease_options,
            **options)
        self.pbs_length = pbs_length
        self.rt_template_length = rt_template_length
        self.extension = extension
        self.rt_template = extension[:rt_template_length]
        self.pbs = extension[rt_template_length:]
        self.alternate_extensions = alternate_extensions
        visual_extension = extension

        start = self.spacer_cut_site - self.pbs_length
        if self.spacer_strand == 1:
            visual_extension = reverse_complement(visual_extension)

        if self.repair:
            if self.spacer_strand == -1:
                start = self.spacer_cut_site - self.rt_template_length + self.tracker.number_of_deletions - self.tracker.number_of_insertions
            visual_extension = self.tracker.seq_from_original_coordinates(
                start, start + len(visual_extension))
        else:
            if self.spacer_strand == -1:
                start = self.spacer_cut_site - self.rt_template_length - self.tracker.number_of_deletions + self.tracker.number_of_insertions
            visual_extension = self.tracker.seq_from_new_coordinates(
                start, start + len(visual_extension))
        if self.spacer_strand == 1:
            visual_extension = reverse_complement(visual_extension)

        self.visual_exension = visual_extension
Пример #4
0
 def make_spacer_oligos(cls, spacer_sequence: str,
                        scaffold: str) -> OligoDict:
     target = cls._spacer_to_cloning(spacer_sequence)
     return {
         'top': ''.join(['cacc', target, scaffold[:5].lower()]),
         'bottom': reverse_complement(target + scaffold[:9].lower())
     }
Пример #5
0
    def _make_rt_sequence(nuclease, reference, cut_dist, nucleotide_difference,
                          alteration_length, rt_min_length, rt_max_length,
                          cloning_strategy, cloning_options, nuclease_options,
                          **options):
        """Find a suggested RT template length, and generate alterniative RT template lengths."""
        cloning_strategy = nuclease.get_cloning_strategy(cloning_strategy)
        rt_template_length = rt_min_length
        to_position = cut_dist + rt_template_length + nucleotide_difference
        rt_template = reference[:to_position]
        last_valid = rt_template
        # For large alterations, longer template is probably preferred
        while (nuclease.filter_extension(rt_template, **nuclease_options)
               or rt_template_length <= alteration_length * 2
               ) and rt_template_length <= rt_max_length:
            try:
                rt_template += reference[to_position]
            except IndexError:
                break
            if not nuclease.filter_extension(
                    rt_template, **
                    nuclease_options) and cloning_strategy.can_express(
                        reverse_complement(rt_template), **cloning_options):
                last_valid = rt_template
            to_position += 1
            rt_template_length += 1

        rt_template = last_valid
        # Create all possible RT templates within range limits.
        lengths = []
        for rt_template_length in range(rt_min_length, rt_max_length + 1):
            template = reference[:cut_dist + rt_template_length +
                                 nucleotide_difference]
            if not nuclease.filter_extension(
                    template, **
                    nuclease_options) and cloning_strategy.can_express(
                        reverse_complement(template), **cloning_options):
                lengths.append(template)
        return rt_template, lengths
Пример #6
0
 def make_nicking_oligos(cls, spacer_sequence: str, scaffold: str):
     target = cls._spacer_to_cloning(spacer_sequence)
     return {
         'top': 'cacc' + target,
         'bottom': reverse_complement(target + scaffold[:4].lower())
     }
Пример #7
0
 def make_extension_oligos(cls, extension_sequence: str,
                           scaffold: str) -> OligoDict:
     return {
         'top': scaffold[-4:].lower() + extension_sequence,
         'bottom': reverse_complement(''.join([extension_sequence, 'tttt']))
     }
Пример #8
0
 def make_scaffold_oligos(cls, scaffold: str) -> OligoDict:
     return {
         'top': scaffold[5:-4],
         'bottom': reverse_complement(scaffold[9:])
     }
Пример #9
0
    def find_spacers(nuclease, reference_sequence, altered_sequence, start,
                     end, spacer_search_range, cloning_method, cloning_options,
                     **options):
        """Find candidate spacers for pegRNA selection.

        Finds all spacers with a cut site within spacer_search_range of the edit.
        Sorts spacers according to pam disruption, distance to edit and score.
        """
        cloning_method = nuclease.get_cloning_strategy(cloning_method)
        spacers = []
        scoring_spacers = []
        sense = reference_sequence[:start + nuclease.downstream_from_cut_site]
        sense_offset = max(
            0, start - spacer_search_range - nuclease.cut_site_position)
        nucleotide_difference = len(altered_sequence) - len(reference_sequence)

        antisense = reverse_complement(
            reference_sequence[end - nuclease.downstream_from_cut_site -
                               max(0, nucleotide_difference):])
        antisense_offset = end - max(0, nucleotide_difference) + min(
            len(antisense) -
            (spacer_search_range + nuclease.cut_site_position +
             nuclease.downstream_from_cut_site), 0)

        pam_motif = dgn_to_regex(nuclease.pam_motif) + '$'
        for match in regex.finditer(
                nuclease.target_motif,
                sense[-spacer_search_range - nuclease.cut_site_position -
                      nuclease.downstream_from_cut_site:],
                regex.IGNORECASE,
                overlapped=True):
            spacer = match.group('spacer')
            pam = match.group('PAM')
            pos = sense_offset + match.start() + len(match.group('upstream'))
            pam_disrupted = not regex.search(
                pam_motif, altered_sequence[pos + len(spacer):pos +
                                            len(spacer) + len(pam)],
                regex.IGNORECASE)
            cut_site = pos + nuclease.cut_site_position - len(
                match.group('upstream'))
            distance = start - cut_site
            if cloning_method.can_express(spacer, **cloning_options):
                spacers.append({
                    'spacer': spacer,
                    'position': pos,
                    'cut_site': cut_site,
                    'strand': 1,
                    'pam': (pam, pos + len(spacer)),
                    'pam_disrupted': pam_disrupted,
                    'distance': distance,
                })

                scoring_spacers.append(match.group().upper())

        for match in regex.finditer(
                nuclease.target_motif,
                antisense[-spacer_search_range - nuclease.cut_site_position -
                          nuclease.downstream_from_cut_site:],
                regex.IGNORECASE,
                overlapped=True):
            spacer = match.group('spacer')
            pam = match.group('PAM')
            pos = antisense_offset + spacer_search_range - match.start() - len(
                match.group('upstream')) - 1 + nuclease.cut_site_position
            pam_disrupted = not regex.search(
                pam_motif,
                reverse_complement(
                    altered_sequence[pos - len(spacer) - len(pam) + 1 +
                                     nucleotide_difference:pos + 1 +
                                     nucleotide_difference - len(spacer)]),
                regex.IGNORECASE)
            cut_site = pos - nuclease.cut_site_position + len(
                match.group('upstream')) + 1
            distance = cut_site - end + max(0, nucleotide_difference)
            if cloning_method.can_express(spacer, **cloning_options):
                spacers.append({
                    'spacer':
                    spacer,
                    'position':
                    pos,
                    'cut_site':
                    cut_site,
                    'strand':
                    -1,
                    'pam':
                    (pam,
                     pos - len(spacer) - len(pam) + 1 + nucleotide_difference),
                    'pam_disrupted':
                    pam_disrupted,
                    'distance':
                    distance,
                })

                scoring_spacers.append(match.group().upper())

        for i, score in enumerate(nuclease.score_spacers(scoring_spacers)):
            spacers[i]['score'] = score

        return sorted(spacers,
                      key=lambda x:
                      (not x['pam_disrupted'], x['distance'], x['score']))
Пример #10
0
    def find_nicking_spacers(nuclease, reference_sequence, altered_sequence,
                             spacer_strand, cut_site, scaffold, nicking_range,
                             cloning_method, cloning_options, **options):
        """Find spacers for nicking the opposite strand."""
        cloning_method = nuclease.get_cloning_strategy(cloning_method)
        spacers = []
        scoring_spacers = []

        nt_difference = len(altered_sequence) - len(reference_sequence)

        if spacer_strand == 1:
            reference_sequence = reverse_complement(reference_sequence)
            altered_sequence = reverse_complement(altered_sequence)
            cut_site = len(altered_sequence) - cut_site
        sequence = altered_sequence[max(
            0, cut_site - nuclease.cut_site_position - nicking_range
        ):min(len(altered_sequence), cut_site +
              nuclease.downstream_from_cut_site + nicking_range)].upper()
        ref = reference_sequence[
            max(0, cut_site - nuclease.cut_site_position - nicking_range):min(
                len(reference_sequence), cut_site +
                nuclease.downstream_from_cut_site + nicking_range -
                nt_difference)].upper()

        if cut_site - nuclease.cut_site_position - nicking_range > 0:
            cut_site = nuclease.cut_site_position + nicking_range

        for match in regex.finditer(nuclease.target_motif,
                                    sequence,
                                    regex.IGNORECASE,
                                    overlapped=True):
            spacer = match.group('spacer')
            pos = match.start() + len(match.group('upstream'))
            wt_pos = pos
            cut = match.start() + nuclease.cut_site_position
            nick_location = cut_site - cut
            if nick_location < 0:
                wt_pos -= nt_difference

            kind = '3'
            wt_score = 1
            alt_bind = sequence[pos:pos + len(spacer) +
                                len(nuclease.pam_motif)].upper()
            wt_bind = ref[wt_pos:wt_pos + len(spacer) +
                          len(nuclease.pam_motif)].upper()
            if nuclease._is3b(alt_bind, wt_bind):
                kind = '3b'
                wt_score = nuclease._calc_wt_score(alt_bind, wt_bind)

            info = cloning_method.make_nicking_oligos(spacer, scaffold)
            info['position'] = nick_location
            info['spacer'] = spacer
            info['kind'] = kind
            info['wt_score'] = wt_score
            info['offset'] = nuclease.cut_site_position - len(
                match.group('upstream'))
            if cloning_method.can_express(spacer, **cloning_options):
                spacers.append(info)
                scoring_spacers.append(match.group().upper())

        for i, score in enumerate(nuclease.score_spacers(scoring_spacers)):
            spacers[i]['score'] = score

        return sorted(
            spacers,
            key=lambda x:
            (x['wt_score'], not (abs(x['position']) > 50), -x['score']))
Пример #11
0
    def find_best_spacers(self,
                          repair=False,
                          nuclease=None,
                          cloning_strategy=None,
                          design_strategy=None,
                          nuclease_options=None,
                          cloning_options=None,
                          **options):
        """Find pegRNA spacers.

                Required arguments:
                num_pegs -- number of pegRNAs to return

                Optional arguments:
                repair -- If true, designs pegRNAs from edited sequence -> wild type sequence, defaults to False.
                nuclease -- Which nuclease to design pegRNAs for, if None uses default nuclease.
                """
        reference_sequence = self.original_sequence
        altered_sequence = self.__str__()

        alterations = self.alterations
        position = sum(alterations[0])

        if nuclease is None:
            nuclease = django.conf.settings.DESIGN_CONF['default_nuclease']

        nuclease = NUCLEASES[nuclease]
        design_strategy = nuclease.get_design_strategy(design_strategy)

        if repair:
            reference_sequence, altered_sequence = altered_sequence, reference_sequence
        spacers = design_strategy.find_spacers(nuclease,
                                               reference_sequence,
                                               altered_sequence,
                                               position,
                                               position +
                                               self.alteration_length,
                                               cloning_method=cloning_strategy,
                                               cloning_options=cloning_options,
                                               **options)
        for sp in spacers:
            spacer = sp['spacer']
            pos = sp['position']
            strand = sp['strand']

            if strand == -1:
                pos = pos - len(spacer) + 1
            if repair:
                visual_spacer = self.seq_from_new_coordinates(
                    pos, pos + len(spacer))
            else:
                visual_spacer = self.seq_from_original_coordinates(
                    pos, pos + len(spacer))
            if strand == -1:
                visual_spacer = reverse_complement(visual_spacer)
            sp['visual_spacer'] = visual_spacer

        return [
            OligoSet(tracker=self,
                     spacer=sp,
                     repair=repair,
                     nuclease=nuclease,
                     nuclease_options=nuclease_options,
                     cloning_strategy=cloning_strategy,
                     cloning_options=cloning_options) for sp in spacers
        ]
Пример #12
0
    def make_oligos(self, degenerate_sequence, silence_pam=False, **options):
        """Make oligos for cloning pegRNAs"""
        if not self.pam_disrupted and silence_pam:
            self.tracker = self.tracker.copy()
            if self.spacer_strand == 1:
                pam = self.nuclease.pam_motif
            else:
                pam = reverse_complement(self.nuclease.pam_motif)

            for i, j in enumerate(
                    range(self.pam[1], self.pam[1] + len(self.pam[0]))):
                dgn = degenerate_sequence[j]
                pam_nt = pam[i]
                pam_dgn = degenerate_to_nucleotides[pam_nt]
                nt_dgn = degenerate_to_nucleotides[dgn]
                if pam_dgn < nt_dgn:
                    for nt in nt_dgn:
                        if nt not in pam_dgn:
                            self.tracker.substitute(nt.lower(), j)
                            self.pam_silenced = True
                            break

        reference_sequence = self.tracker.original_sequence
        altered_sequence = str(self.tracker)
        self._make_extension_sequence(**options)

        upstream = reference_sequence[:self.spacer_cut_site]
        downstream = reference_sequence[self.spacer_cut_site:]

        alteration_position = sum(self.tracker.alterations[0])
        if alteration_position >= self.spacer_cut_site:
            cut_dist = alteration_position + self.tracker.alteration_length - self.spacer_cut_site
        else:
            cut_dist = self.spacer_cut_site - alteration_position

        if self.spacer_strand == -1:
            upstream, downstream = reverse_complement(
                downstream), reverse_complement(upstream)
        self.oligos = self.cloning_strategy.design_cloning(
            spacer_sequence=self.spacer_sequence,
            scaffold=self.scaffold,
            extension_sequence=self.extension,
            upstream=upstream,
            downstream=downstream,
            cut_dist=cut_dist,
            cloning_options=self.cloning_options,
            **options)
        self.can_express = self.cloning_strategy.can_express(
            self.spacer_sequence, **
            self.cloning_options) and self.cloning_strategy.can_express(
                self.extension, **self.cloning_options)

        extensions = []
        for extension in self.alternate_extensions:
            if self.cloning_strategy.can_express(extension['sequence'],
                                                 **self.cloning_options):
                extension[
                    'oligos'] = self.cloning_strategy.alternate_extension(
                        spacer_sequence=self.spacer_sequence,
                        scaffold=self.scaffold,
                        extension_sequence=extension['sequence'],
                        upstream=upstream,
                        downstream=downstream,
                        cut_dist=cut_dist,
                        cloning_options=self.cloning_options,
                        **options)
                extensions.append(extension)
        self.alternate_extensions = extensions

        if self.repair:
            reference_sequence, altered_sequence = altered_sequence, reference_sequence
        if self.cloning_strategy.can_design_nicking:
            spacers = self.design_strategy.find_nicking_spacers(
                nuclease=self.nuclease,
                reference_sequence=reference_sequence,
                altered_sequence=altered_sequence,
                spacer_strand=self.spacer_strand,
                cut_site=self.spacer_cut_site,
                scaffold=self.scaffold,
                cloning_method=self.cloning_strategy,
                cloning_options=self.cloning_options,
                **options)

            for spacer in spacers:

                visual_spacer = spacer['spacer']
                position = spacer['position']
                if self.spacer_strand == 1:
                    visual_spacer = reverse_complement(visual_spacer)

                pos = self.spacer_cut_site
                if self.spacer_strand == 1:
                    pos += position - len(spacer['spacer']) + spacer['offset']
                else:
                    pos -= position + spacer['offset']

                if self.repair:
                    visual_spacer = self.tracker.seq_from_original_coordinates(
                        pos, pos + len(visual_spacer))
                    spacer['push'] = 0
                    if self.tracker.number_of_insertions:
                        spacer['push'] = (pos - self.tracker.index[pos][0]) + (
                            self.spacer_cut_site -
                            self.tracker.index[self.spacer_cut_site][0])
                else:
                    visual_spacer = self.tracker.seq_from_new_coordinates(
                        pos, pos + len(visual_spacer))
                    spacer['push'] = 0
                    if self.tracker.number_of_deletions:
                        spacer['push'] = (self.tracker.index[pos][0] - pos) + (
                            self.tracker.index[self.spacer_cut_site][0] -
                            self.spacer_cut_site)
                if self.spacer_strand == 1:
                    spacer[
                        'push'] -= self.tracker.number_of_insertions + self.tracker.number_of_deletions

                if self.spacer_strand == 1:
                    visual_spacer = reverse_complement(visual_spacer)

                spacer['visual_spacer'] = visual_spacer

            self.nicking_spacers = spacers
        return self.oligos