Exemplo n.º 1
0
    def test_006_dbtodat(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)
        refdata1 = ReferenceData(server=server)
        refdata2 = ReferenceData()

        datseqs = refdata2.hlaref['HLA-A*01:01:01:01']

        db = refdata1.server[refdata1.dbversion + "_" + 'A']
        expected = db.lookup(name='HLA-A*01:01:01:01')

        expected_seqs1 = get_features(datseqs)
        expected_seqs2 = get_features(expected)

        for feat in expected_seqs1:
            if feat not in expected_seqs2:
                self.assertEqual(feat, None)
            else:
                self.assertEqual(str(expected_seqs1[feat]),
                                 str(expected_seqs2[feat]))
        server.close()
        pass
Exemplo n.º 2
0
 def test_012_exact(self):
     seqann = BioSeqAnn(verbose=False, verbosity=verbosity, pid="007_exact")
     input_seq = self.data_dir + '/exact_seqs.fasta'
     for ex in self.expected['exact']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         annotation = seqann.annotate(in_seq, locus)
         self.assertTrue(annotation.exact)
         self.assertIsNone(annotation.features)
         self.assertEqual(annotation.method, "match")
         self.assertIsInstance(annotation, Annotation)
         self.assertTrue(annotation.complete_annotation)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         expected = seqann.refdata.hlaref[allele]
         expected_seqs = get_features(expected)
         self.assertGreater(len(annotation.structure), 1)
         for feat in annotation.structure:
             self.assertIsInstance(feat, Feature)
         self.assertEqual(annotation.gfe, ex['gfe'])
         self.assertGreater(len(expected_seqs.keys()), 1)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         for feat in expected_seqs:
             if feat not in annotation.annotation:
                 self.assertEqual(feat, None)
             else:
                 self.assertEqual(str(expected_seqs[feat]),
                                  str(annotation.annotation[feat]))
     pass
Exemplo n.º 3
0
 def test_005_select(self):
     refdata = ReferenceData()
     input_seq = self.data_dir + '/exact_seqs.fasta'
     self.assertTrue(refdata.hlaref)
     self.assertTrue(refdata.seqref)
     for ex in self.expected['exact']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         annotation = refdata.search_refdata(in_seq, locus)
         self.assertIsNone(annotation.features)
         self.assertEqual(annotation.method, "match")
         self.assertIsInstance(annotation, Annotation)
         self.assertTrue(annotation.complete_annotation)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         expected = refdata.hlaref[allele]
         expected_seqs = get_features(expected)
         for feat in expected_seqs:
             if feat not in annotation.annotation:
                 self.assertEqual(feat, None)
             else:
                 self.assertEqual(str(expected_seqs[feat]),
                                  str(annotation.annotation[feat]))
     pass
Exemplo n.º 4
0
    def test_004_selectserv(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)
        refdata = ReferenceData(server=server)
        input_seq = self.data_dir + '/exact_seqs.fasta'
        self.assertFalse(refdata.hlaref)
        self.assertFalse(refdata.seqref)
        for ex in self.expected['exact']:
            i = int(ex['index'])
            locus = ex['locus']
            allele = ex['name']
            hla, loc = locus.split("-")
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
            annotation = refdata.search_refdata(in_seq, locus)
            self.assertIsNone(annotation.features)
            self.assertEqual(annotation.method, "match")
            self.assertIsInstance(annotation, Annotation)
            self.assertTrue(annotation.complete_annotation)
            self.assertGreater(len(annotation.annotation.keys()), 1)
            db = refdata.server[refdata.dbversion + "_" + loc]
            expected = db.lookup(name=allele)
            expected_seqs = get_features(expected)

            for feat in expected_seqs:
                if feat not in annotation.annotation:
                    self.assertEqual(feat, None)
                else:
                    self.assertEqual(str(expected_seqs[feat]),
                                     str(annotation.annotation[feat]))
        server.close()
        pass
Exemplo n.º 5
0
    def test_009_partialambigserv(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)
        seqann = BioSeqAnn(server=server,
                           verbose=False,
                           verbosity=verbosity,
                           pid="006_partialambig")
        input_seq = self.data_dir + '/partial_ambig.fasta'

        for ex in self.expected['partial_ambig']:
            i = int(ex['index'])
            locus = ex['locus']
            allele = ex['name']
            hla, loc = locus.split("-")
            print(str(i), allele)
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
            ann = seqann.annotate(in_seq, locus)
            self.assertTrue(ann.complete_annotation)
            self.assertEqual(ann.method, ex['method'])
            self.assertFalse(ann.blocks)
            self.assertIsInstance(ann, Annotation)
            self.assertTrue(ann.complete_annotation)
            self.assertGreater(len(ann.annotation.keys()), 1)
            db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
            expected = db.lookup(name=allele)
            expected_seqs = get_features(expected)
            self.assertGreater(len(expected_seqs.keys()), 1)
            self.assertGreater(len(ann.annotation.keys()), 1)
            self.assertEqual(ann.gfe, ex['gfe'])

            self.assertGreater(len(ann.structure), 1)
            for feat in ann.structure:
                self.assertIsInstance(feat, Feature)
            # Make sure only mapped feats exist
            for mf in ex['missing_feats']:
                self.assertFalse(mf in ann.annotation)

            for feat in ex['feats']:
                if feat in ex['diff']:
                    self.assertNotEqual(str(expected_seqs[feat]),
                                        str(ann.annotation[feat].seq))
                else:
                    self.assertEqual(str(expected_seqs[feat]),
                                     str(ann.annotation[feat].seq))

        server.close()
        pass
Exemplo n.º 6
0
 def test_005_insertionserv(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="004_insertion")
     input_seq = self.data_dir + '/insertion_seqs.fasta'
     for ex in self.expected['insertion']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         ann = seqann.annotate(in_seq, locus)
         self.assertEqual(ann.method, "nt_search")
         self.assertFalse(ann.missing)
         self.assertFalse(ann.blocks)
         self.assertIsInstance(ann, Annotation)
         self.assertTrue(ann.complete_annotation)
         self.assertGreater(len(ann.annotation.keys()), 1)
         db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
         expected = db.lookup(name=allele)
         self.assertEqual(ann.gfe, ex['gfe'])
         self.assertGreater(len(ann.structure), 1)
         for feat in ann.structure:
             self.assertIsInstance(feat, Feature)
         n_diffs = 0
         expected_seqs = get_features(expected)
         self.assertGreater(len(expected_seqs.keys()), 1)
         for feat in expected_seqs:
             if feat not in ann.annotation:
                 self.assertEqual(feat, None)
             else:
                 if feat in ex['diff']:
                     n_diffs += 1
                     self.assertNotEqual(str(expected_seqs[feat]),
                                         str(ann.annotation[feat].seq))
                     diff_len = len(str(ann.annotation[feat].seq)) - \
                         len(str(expected_seqs[feat]))
                     self.assertEqual(diff_len, ex['lengths'][feat])
                 else:
                     self.assertEqual(str(expected_seqs[feat]),
                                      str(ann.annotation[feat].seq))
         self.assertEqual(n_diffs, len(ex['diff']))
     server.close()
     pass
Exemplo n.º 7
0
 def test_021_stringseq(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="015_stringseq")
     input_seq = self.data_dir + '/exact_seqs.fasta'
     ex = self.expected['exact'][0]
     locus = ex['locus']
     allele = ex['name']
     hla, loc = locus.split("-")
     in_seqrec = list(SeqIO.parse(input_seq, "fasta"))[0]
     in_str = str(in_seqrec.seq)
     in_seq = in_seqrec.seq
     ann_str = seqann.annotate(in_str, locus)
     ann_seq = seqann.annotate(in_seq, locus)
     for annotation in [ann_str, ann_seq]:
         self.assertTrue(annotation.exact)
         self.assertIsNone(annotation.features)
         self.assertEqual(annotation.method, "match")
         self.assertIsInstance(annotation, Annotation)
         self.assertTrue(annotation.complete_annotation)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
         expected = db.lookup(name=allele)
         expected_seqs = get_features(expected)
         self.assertEqual(annotation.gfe, ex['gfe'])
         self.assertGreater(len(expected_seqs.keys()), 1)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         self.assertGreater(len(annotation.structure), 1)
         for feat in annotation.structure:
             self.assertIsInstance(feat, Feature)
         for feat in expected_seqs:
             if feat not in annotation.annotation:
                 self.assertEqual(feat, None)
             else:
                 self.assertEqual(str(expected_seqs[feat]),
                                  str(annotation.annotation[feat]))
     server.close()
     pass
Exemplo n.º 8
0
    def seqannotation(self, seqrecord, allele, loc):
        """
        Gets the Annotation from the found sequence

        :return: The Annotation from the found sequence
        :rtype: Annotation
        """
        #seqrecord = self.seqrecord(allele, loc)
        complete_annotation = get_features(seqrecord)
        annotation = Annotation(annotation=complete_annotation,
                                method='match',
                                complete_annotation=True)

        if self.alignments:
            alignment = {
                f: self.annoated_alignments[loc][allele][f]['Seq']
                for f in self.annoated_alignments[loc][allele].keys()
            }
            annotation.aligned = alignment

        return annotation
Exemplo n.º 9
0
 def test_006_insertion(self):
     seqann = BioSeqAnn(verbosity=verbosity, pid="004_insertion")
     input_seq = self.data_dir + '/insertion_seqs.fasta'
     for ex in self.expected['insertion']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         ann = seqann.annotate(in_seq, locus)
         self.assertEqual(ann.method, "nt_search")
         self.assertFalse(ann.missing)
         self.assertFalse(ann.blocks)
         self.assertIsInstance(ann, Annotation)
         self.assertTrue(ann.complete_annotation)
         self.assertGreater(len(ann.annotation.keys()), 1)
         expected = seqann.refdata.hlaref[allele]
         self.assertEqual(ann.gfe, ex['gfe'])
         self.assertGreater(len(ann.structure), 1)
         for feat in ann.structure:
             self.assertIsInstance(feat, Feature)
         n_diffs = 0
         expected_seqs = get_features(expected)
         self.assertGreater(len(expected_seqs.keys()), 1)
         for feat in expected_seqs:
             if feat not in ann.annotation:
                 self.assertEqual(feat, None)
             else:
                 if feat in ex['diff']:
                     n_diffs += 1
                     self.assertNotEqual(str(expected_seqs[feat]),
                                         str(ann.annotation[feat].seq))
                     diff_len = len(str(ann.annotation[feat].seq)) - \
                         len(str(expected_seqs[feat]))
                     self.assertEqual(diff_len, ex['lengths'][feat])
                 else:
                     self.assertEqual(str(expected_seqs[feat]),
                                      str(ann.annotation[feat].seq))
         self.assertEqual(n_diffs, len(ex['diff']))
     pass
Exemplo n.º 10
0
    def test_010_partialambig(self):
        seqann = BioSeqAnn(verbose=False,
                           verbosity=verbosity,
                           pid="006_partialambig")
        input_seq = self.data_dir + '/partial_ambig.fasta'
        for ex in self.expected['partial_ambig']:
            i = int(ex['index'])
            locus = ex['locus']
            allele = ex['name']
            hla, loc = locus.split("-")
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
            ann = seqann.annotate(in_seq, locus)
            self.assertTrue(ann.complete_annotation)
            self.assertEqual(ann.method, ex['method'])
            self.assertFalse(ann.blocks)
            self.assertIsInstance(ann, Annotation)
            self.assertTrue(ann.complete_annotation)
            self.assertGreater(len(ann.annotation.keys()), 1)
            expected = seqann.refdata.hlaref[allele]
            expected_seqs = get_features(expected)
            self.assertGreater(len(expected_seqs.keys()), 1)
            self.assertGreater(len(ann.annotation.keys()), 1)
            self.assertEqual(ann.gfe, ex['gfe'])
            self.assertGreater(len(ann.structure), 1)
            for feat in ann.structure:
                self.assertIsInstance(feat, Feature)
            # Make sure only mapped feats exist
            for mf in ex['missing_feats']:
                self.assertFalse(mf in ann.annotation)

            for feat in ex['feats']:
                if feat in ex['diff']:
                    self.assertNotEqual(str(expected_seqs[feat]),
                                        str(ann.annotation[feat].seq))
                else:
                    self.assertEqual(str(expected_seqs[feat]),
                                     str(ann.annotation[feat].seq))

        pass
Exemplo n.º 11
0
    def search_seqs(self, seqrec, in_seq, locus, run=0, partial_ann=None):
        """
        search_seqs - method for annotating a BioPython sequence without alignment

        :param seqrec: The reference sequence
        :type seqrec: SeqRecord
        :param locus: The gene locus associated with the sequence.
        :type locus: str
        :param in_seq: The input sequence
        :type in_seq: SeqRecord
        :param run: The number of runs that have been done
        :type run: int
        :param partial_ann: A partial annotation from a previous step
        :type partial_ann: :ref:`ann`
        :rtype: :ref:`ann`

        Example usage:

            >>> from Bio.Seq import Seq
            >>> from seqann.seq_search import SeqSearch
            >>> inseq = Seq('AGAGACTCTCCCGAGGATTTCGTGTACCAGTTTAAGGCCATGTGCTACTTCACC')
            >>> sqsrch = SeqSearch()
            >>> ann = sqsrch.search_seqs(refseqs, inseq)

        """
        # Extract out the sequences and feature names
        # from the reference sequences

        # The mapped features will be subtracted from seq_covered
        # so the final seq_covered number will reflect the remaining
        # number of base pairs that haven't been mapped.
        #
        # The coordinates and mapping will help determine what positions
        # in the sequence have been mapped and to what features. The
        # missing blocks variable will be generated using these.
        structures = get_structures()
        seq_covered = len(in_seq.seq)
        coordinates = dict(
            map(lambda x: [x, 1], [i for i in range(0,
                                                    len(in_seq.seq) + 1)]))

        mapping = dict(
            map(lambda x: [x, 1], [i for i in range(0,
                                                    len(in_seq.seq) + 1)]))

        ambig_map = {}
        found_feats = {}
        feat_missing = {}

        method = "nt_search" if not partial_ann else partial_ann.method

        # If the partial annotation is provided
        # then make the found_feats equal to
        # what has already been annotated
        feats = get_features(seqrec)
        if partial_ann:

            found_feats = partial_ann.features

            if self.verbose and self.verbosity > 4:
                self.logger.info("Found partial features:")
                for f in found_feats:
                    self.logger.info(f)

            # Skip references that only have features
            # that have already been annoated
            if len([f for f in feats if f in found_feats]) == len(feats):
                if self.verbose:
                    self.logger.info("Skipping incomplete refseq")
                return partial_ann

            if self.verbose and self.verbosity > 1:
                self.logger.info("Using partial annotation | " + locus + " " +
                                 str(len(partial_ann.features)))

            coordinates = dict(
                map(lambda l: [l, 1], [
                    item for sublist in partial_ann.blocks for item in sublist
                ]))
            seq_covered = partial_ann.covered
            mapping = partial_ann.mapping

            if self.verbose and self.verbosity > 2:
                self.logger.info("Partial sequence coverage = " +
                                 str(seq_covered))
                self.logger.info("Partial sequence metho = " + method)

        added_feat = {}
        deleted_coords = {}
        for feat_name in sorted(feats, key=lambda k: structures[locus][k]):

            # skip if partial annotation is provided
            # and the feat name is not one of the
            # missing features
            if partial_ann and feat_name not in partial_ann.refmissing:
                if self.verbose and self.verbosity > 1:
                    self.logger.info("Skipping " + feat_name +
                                     " - Already annotated")
                continue

            if self.verbose and self.verbosity > 1:
                self.logger.info("Running seqsearch for " + feat_name)

            # Search for the reference feature sequence in the
            # input sequence. Record the coordinates if it's
            # found and if it's found in multiple spots. If it
            # is not found, then record that feature as missing.
            seq_search = nt_search(str(in_seq.seq), str(feats[feat_name]))

            if len(seq_search) == 2:

                if self.verbose and self.verbosity > 0:
                    self.logger.info("Found exact match for " + feat_name)

                seq_covered -= len(str(feats[feat_name]))
                end = int(len(str(feats[feat_name])) + seq_search[1])

                if feat_name == 'three_prime_UTR' \
                        and len(str(in_seq.seq)) > end:
                    end = len(str(in_seq.seq))

                # If the feature is found and it's a five_prime_UTR then
                # the start should always be 0, so insertions at the
                # beinging of the sequence will be found.
                start = seq_search[1] if feat_name != 'five_prime_UTR' else 0
                si = seq_search[1]+1 if seq_search[1] != 0 and \
                    feat_name != 'five_prime_UTR' else 0

                # check if this features has already been mapped
                mapcheck = set(
                    [0 if i in coordinates else 1 for i in range(si, end + 1)])

                # Dont map features if they are out of order
                skip = False
                if found_feats and len(found_feats) > 0:
                    for f in found_feats:
                        o1 = structures[locus][feat_name]
                        o2 = structures[locus][f]
                        loctyp = loctype(found_feats[f].location.start,
                                         found_feats[f].location.end, start,
                                         end)

                        if o1 < o2 and loctyp:
                            skip = True
                            if self.verbose:
                                self.logger.info("Skipping map for " +
                                                 feat_name)
                        elif o2 < o1 and not loctyp:
                            skip = True
                            if self.verbose:
                                self.logger.info("Skipping map for " +
                                                 feat_name)

                if 1 not in mapcheck and not skip:
                    for i in range(si, end + 1):
                        if i in coordinates:
                            if feat_name == "exon_8" or feat_name == 'three_prime_UTR':
                                deleted_coords.update({i: coordinates[i]})
                            del coordinates[i]
                        else:
                            if self.verbose:
                                self.logger.error(
                                    "seqsearch - should't be here " + locus +
                                    " - " + " - " + feat_name)
                        mapping[i] = feat_name

                    found_feats.update({
                        feat_name:
                        SeqFeature(FeatureLocation(ExactPosition(start),
                                                   ExactPosition(end),
                                                   strand=1),
                                   type=feat_name)
                    })

                    if feat_name == "exon_8" or feat_name == 'three_prime_UTR':
                        added_feat.update({feat_name: feats[feat_name]})
                    if self.verbose and self.verbosity > 3:
                        self.logger.info("Coordinates | Start = " +
                                         str(start) + " - End = " + str(end))

            elif (len(seq_search) > 2):
                if self.verbose and self.verbosity > 1:
                    self.logger.info("Found " + str(len(seq_search)) +
                                     " matches for " + feat_name)

                new_seq = [seq_search[0]]
                for i in range(1, len(seq_search)):
                    tnp = seq_search[i] + 1
                    if seq_search[i] in coordinates or tnp in coordinates:
                        new_seq.append(seq_search[i])

                seq_search = new_seq
                if (partial_ann and feat_name == "exon_8" and run > 0):
                    missing_feats = sorted(list(partial_ann.missing.keys()))

                    # * HARD CODED LOGIC * #
                    # > exon8 in class I maps to multiple spots in a sequence,
                    #   often in the 3' UTR. These features need to be mapped
                    #   last to make sure it's not mapping exon8 incorrectly.
                    if (missing_feats == ['exon_8', 'three_prime_UTR']
                            and len(seq_search) <= 3):
                        if self.verbose and self.verbosity > 0:
                            self.logger.info("Resolving exon_8")

                        seq_covered -= len(str(feats[feat_name]))
                        end = int(len(str(feats[feat_name])) + seq_search[1])

                        # If the feature is found and it's a five_prime_UTR then
                        # the start should always be 0, so insertions at the
                        # beinging of the sequence will be found.
                        start = seq_search[1]
                        si = seq_search[1] + 1 if seq_search[1] != 0 else 0

                        # check if this features has already been mapped
                        mapcheck = set([
                            0 if i in coordinates else 1
                            for i in range(si, end + 1)
                        ])

                        for i in range(si, end + 1):
                            if i in coordinates:
                                del coordinates[i]
                            else:
                                if self.verbose:
                                    self.logger.error(
                                        "seqsearch - should't be here " +
                                        locus + " - " + " - " + feat_name)
                            mapping[i] = feat_name

                        found_feats.update({
                            feat_name:
                            SeqFeature(FeatureLocation(ExactPosition(start),
                                                       ExactPosition(end),
                                                       strand=1),
                                       type=feat_name)
                        })

                        if self.verbose and self.verbosity > 0:
                            self.logger.info("Coordinates | Start = " +
                                             str(start) + " - End = " +
                                             str(end))
                    else:
                        if self.verbose and self.verbosity > 0:
                            self.logger.info("Adding ambig feature " +
                                             feat_name)
                        feat_missing.update({feat_name: feats[feat_name]})
                        ambig_map.update(
                            {feat_name: seq_search[1:len(seq_search)]})
                else:
                    if self.verbose and self.verbosity > 0:
                        self.logger.info("Adding ambig feature " + feat_name)
                    feat_missing.update({feat_name: feats[feat_name]})
                    ambig_map.update(
                        {feat_name: seq_search[1:len(seq_search)]})
            else:
                if self.verbose and self.verbosity > 1:
                    self.logger.info("No match for " + feat_name)
                feat_missing.update({feat_name: feats[feat_name]})

        blocks = getblocks(coordinates)
        exact_matches = list(found_feats.keys())

        # * HARD CODED LOGIC * #
        # >
        #
        #  HLA-DRB1 exon3 exact match - with intron1 and 3 missing
        if ('exon_3' in exact_matches and run == 99 and locus == 'HLA-DRB1'
                and 'exon_2' in feat_missing
                and (len(blocks) == 1 or len(blocks) == 2)):

            for b in blocks:
                x = b[len(b) - 1]
                if x == max(list(mapping.keys())):
                    featname = "intron_3"
                    found_feats.update({
                        featname:
                        SeqFeature(FeatureLocation(ExactPosition(b[0] - 1),
                                                   ExactPosition(b[len(b) -
                                                                   1]),
                                                   strand=1),
                                   type=featname)
                    })
                else:
                    featname = "exon_2"
                    found_feats.update({
                        featname:
                        SeqFeature(FeatureLocation(ExactPosition(b[0]),
                                                   ExactPosition(b[len(b) -
                                                                   1]),
                                                   strand=1),
                                   type=featname)
                    })
                    seq_covered -= len(b)

                if self.verbose and self.verbosity > 1:
                    self.logger.info(
                        "Successfully annotated class DRB1 II sequence")

                return Annotation(features=found_feats,
                                  covered=seq_covered,
                                  seq=in_seq,
                                  missing=feat_missing,
                                  ambig=ambig_map,
                                  method=method,
                                  mapping=mapping,
                                  exact_match=exact_matches)

        # If it's a class II sequence and
        # exon_2 is an exact match
        # * HARD CODED LOGIC * #
        # > It's common for exon2 to be fully sequenced
        #   but intron_2 and intron_1 to be partially sequenced,
        #   which can make it hard to annotate those to features.
        #   If there are two missing blocks that is small enough
        #   and they are before and after exon2, then it's very
        #   very likely to be intron_2 and intron_1.
        if 'exon_2' in exact_matches and len(blocks) == 2 \
                and is_classII(locus) and seq_covered < 300:

            if self.verbose and self.verbosity > 1:
                self.logger.info("Running search for class II sequence")

            r = True
            for b in blocks:
                x = b[len(b) - 1]
                if x == max(list(mapping.keys())):
                    x = b[0] - 1
                else:
                    x += 1
                f = mapping[x]
                if f != 'exon_2':
                    r = False
            if r:
                for b in blocks:
                    x = b[len(b) - 1]
                    if x == max(list(mapping.keys())):
                        featname = "intron_2"
                        found_feats.update({
                            featname:
                            SeqFeature(FeatureLocation(ExactPosition(b[0] - 1),
                                                       ExactPosition(b[len(b) -
                                                                       1]),
                                                       strand=1),
                                       type=featname)
                        })
                    else:
                        featname = "intron_1"
                        found_feats.update({
                            featname:
                            SeqFeature(FeatureLocation(ExactPosition(b[0]),
                                                       ExactPosition(b[len(b) -
                                                                       1]),
                                                       strand=1),
                                       type=featname)
                        })
                    seq_covered -= len(b)

                if self.verbose and self.verbosity > 1:
                    self.logger.info(
                        "Successfully annotated class II sequence")

                return Annotation(features=found_feats,
                                  covered=seq_covered,
                                  seq=in_seq,
                                  missing=feat_missing,
                                  ambig=ambig_map,
                                  method=method,
                                  mapping=mapping,
                                  exact_match=exact_matches)

        annotated_feats, mb, mapping = self._resolve_unmapped(
            blocks, feat_missing, ambig_map, mapping, found_feats, locus,
            seq_covered)

        # * HARD CODED LOGIC * #
        if (not mb and blocks and len(feat_missing.keys()) == 0
                and len(ambig_map.keys()) == 0):
            mb = blocks

        if mb:

            # Unmap exon 8
            if locus in ['HLA-C', 'HLA-A'] and len(in_seq.seq) < 3000 \
                    and 'exon_8' in exact_matches:
                for i in deleted_coords:
                    mapping[i] = 1
                coordinates.update(deleted_coords)
                mb = getblocks(coordinates)
                feat_missing.update(added_feat)

                # Delte from found features
                del exact_matches[exact_matches.index('exon_8')]
                del found_feats['exon_8']

                if 'exon_8' in annotated_feats:
                    del annotated_feats['exon_8']
                if 'three_prime_UTR' in found_feats:
                    del found_feats['three_prime_UTR']
                if 'three_prime_UTR' in annotated_feats:
                    del annotated_feats['three_prime_UTR']

            refmissing = [
                f for f in structures[locus] if f not in annotated_feats
            ]

            if self.verbose and self.verbosity > 1:
                self.logger.info("* Annotation not complete *")

            # Print out what features were missing by the ref
            if self.verbose and self.verbosity > 2:
                self.logger.info("Refseq was missing these features = " +
                                 ",".join(list(refmissing)))

            # Print out what features were ambig matches
            if self.verbose and self.verbosity > 1 and len(ambig_map) > 1:
                self.logger.info("Features with ambig matches = " +
                                 ",".join(list(ambig_map)))

            # Print out what features were exact matches
            if self.verbose and self.verbosity > 2 and len(exact_matches) > 1:
                self.logger.info("Features exact matches = " +
                                 ",".join(list(exact_matches)))

            # Print out what features have been annotated
            if self.verbose and self.verbosity > 1 and len(
                    annotated_feats) > 1:
                self.logger.info("Features annotated = " +
                                 ",".join(list(annotated_feats)))

            # Print out what features are missing
            if self.verbose and self.verbosity > 1 and len(feat_missing) > 1:
                self.logger.info("Features missing = " +
                                 ",".join(list(feat_missing)))

            annotation = Annotation(features=annotated_feats,
                                    covered=seq_covered,
                                    seq=in_seq,
                                    missing=feat_missing,
                                    ambig=ambig_map,
                                    blocks=mb,
                                    method=method,
                                    refmissing=refmissing,
                                    mapping=mapping,
                                    exact_match=exact_matches,
                                    annotation=None)
        else:

            mb = None
            # Unmap exon 8
            if locus in ['HLA-C', 'HLA-A'] and len(in_seq.seq) < 600 \
                    and 'exon_8' in exact_matches \
                    and 'three_prime_UTR' in annotated_feats\
                    and 'three_prime_UTR' not in exact_matches:

                for i in deleted_coords:
                    mapping[i] = 1

                coordinates.update(deleted_coords)
                mb = getblocks(coordinates)
                feat_missing.update(added_feat)
                del exact_matches[exact_matches.index('exon_8')]
                del found_feats['exon_8']
                if 'exon_8' in annotated_feats:
                    del annotated_feats['exon_8']
                if 'three_prime_UTR' in found_feats:
                    del found_feats['three_prime_UTR']
                if 'three_prime_UTR' in annotated_feats:
                    del annotated_feats['three_prime_UTR']

            if self.verbose:
                self.logger.info("* No missing blocks after seq_search *")

            # Print out what features were ambig matches
            if self.verbose and self.verbosity > 0 and len(ambig_map) > 1:
                self.logger.info("Features with ambig matches = " +
                                 ",".join(list(ambig_map)))

            # Print out what features were exact matches
            if self.verbose and self.verbosity > 0 and len(exact_matches) > 1:
                self.logger.info("Features exact matches = " +
                                 ",".join(list(exact_matches)))

            # Print out what features have been annotated
            if self.verbose and self.verbosity > 0 and len(
                    annotated_feats) > 1:
                self.logger.info("Features annotated = " +
                                 ",".join(list(annotated_feats)))

            # Print out what features are missing
            if self.verbose and self.verbosity > 0 and len(feat_missing) > 1:
                self.logger.info("Features missing = " +
                                 ",".join(list(feat_missing)))

            annotation = Annotation(features=annotated_feats,
                                    covered=seq_covered,
                                    seq=in_seq,
                                    missing=feat_missing,
                                    ambig=ambig_map,
                                    method=method,
                                    blocks=mb,
                                    mapping=mapping,
                                    exact_match=exact_matches,
                                    annotation=None)

        return annotation