Exemplo n.º 1
0
    def test_020_skip(self):
        # import logging
        # logging.basicConfig(format='%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s',
        #                     datefmt='%m/%d/%Y %I:%M:%S %p',
        #                     level=logging.INFO)
        seqann = BioSeqAnn(verbose=False)
        refdata = seqann.refdata
        test_list = [
            'HLA-C*07:241', 'HLA-A*01:07', 'HLA-A*01:01:59',
            'HLA-A*01:09:01:01', 'HLA-A*02:545', 'HLA-A*29:13',
            'HLA-A*24:03:02', 'HLA-A*02:544', 'HLA-DQA1*04:01:01:01',
            'HLA-A*01:217', 'HLA-A*01:22N', 'HLA-B*51:42', 'HLA-C*03:04:05',
            'HLA-A*01:01:01:04', 'HLA-A*01:09:01:01', 'HLA-B*82:01'
        ]

        for seqname in refdata.hlaref:
            if seqname not in test_list:
                continue
            print(seqname)
            seqrec = refdata.hlaref[seqname]
            locus = seqrec.description.split("*")[0]
            ann1 = seqann.annotate(seqrec, locus=locus)
            ann2 = seqann.annotate(seqrec, locus=locus, skip=[seqname])
            self.assertTrue(ann1.exact)
            self.assertEqual(len(ann2.annotation), len(ann1.annotation))
            self.assertEqual(ann1.gfe, ann2.gfe)
            self.assertGreater(len(ann2.structure), 1)
            for feat in ann2.structure:
                self.assertIsInstance(feat, Feature)
            for f in ann1.annotation:
                self.assertTrue(f in ann2.annotation)
                seq1 = str(ann1.annotation[f])
                seq2 = str(ann2.annotation[f].seq)
                self.assertEqual(seq1, seq2)
        pass
Exemplo n.º 2
0
 def test_012_exact(self):
     seqann = BioSeqAnn(verbose=False, verbosity=verbosity, pid="007_exact")
     input_seq = self.data_dir + '/exact_seqs.fasta'
     for ex in self.expected['exact']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         annotation = seqann.annotate(in_seq, locus)
         self.assertTrue(annotation.exact)
         self.assertIsNone(annotation.features)
         self.assertEqual(annotation.method, "match")
         self.assertIsInstance(annotation, Annotation)
         self.assertTrue(annotation.complete_annotation)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         expected = seqann.refdata.hlaref[allele]
         expected_seqs = get_features(expected)
         self.assertGreater(len(annotation.structure), 1)
         for feat in annotation.structure:
             self.assertIsInstance(feat, Feature)
         self.assertEqual(annotation.gfe, ex['gfe'])
         self.assertGreater(len(expected_seqs.keys()), 1)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         for feat in expected_seqs:
             if feat not in annotation.annotation:
                 self.assertEqual(feat, None)
             else:
                 self.assertEqual(str(expected_seqs[feat]),
                                  str(annotation.annotation[feat]))
     pass
Exemplo n.º 3
0
    def test_001_align(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)

        # TODO *** NOT WORKING WITH NO LOCUS           ***
        # TODO *** FIX 3290 Alignments                 ***
        # TODO *** GET ALIGNMENTS WORKING WITH DB SEQS ***
        seqann = BioSeqAnn(server=server, align=True)
        input_seq = self.data_dir + '/align_tests.fasta'
        for ex in self.expected['align']:
            i = int(ex['index'])
            ex = self.expected['align'][i]
            locus = ex['locus']
            allele = ex['name']
            hla, loc = locus.split("-")
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
            annotation = seqann.annotate(in_seq, "HLA-A")
            align = "".join(
                [annotation.aligned[s] for s in annotation.aligned])
            for i in range(0, len(align) - 1):
                if str(i) in ex['diffs']:
                    self.assertEqual(list(align)[i], ex['diffs'][str(i)])
                else:
                    if list(align)[i] != list(ex['alignment'])[i]:
                        print("FAILED:", allele, i,
                              list(align)[i],
                              list(ex['alignment'])[i])
                    self.assertEqual(list(align)[i], list(ex['alignment'])[i])
        server.close()
Exemplo n.º 4
0
def gfeNotation_post(sequence, locus, gene):
    """
    gfeNotation_post
        GFE notations associated with the sequence

        :param locus: Valid HLA locus
        :param sequence: Valid sequence
        :param gene : Kir true or false
        :rtype: Feature and gfe
    """
    kir = gene
    sequence = SeqRecord(seq=Seq(sequence['sequence']))
    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s '
        '- %(funcName)s %(lineno)d: - %(message)s')
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    gfe = GFE()
    if kir:
        seqann = BioSeqAnn(kir=True)
    else:
        seqann = BioSeqAnn()

    try:
        annotation = seqann.annotate(sequence)
    except Exception as e:
        print(e)
        log_contents = log_capture_string.getvalue()
        return Error("An error occured during the annotation",
                     log=log_contents.split("\n")), 404
    try:
        res_feature, res_gfe = gfe.get_gfe(annotation, locus)
    except Exception as e:
        print(e)
        log_contents = log_capture_string.getvalue()
        return Error("An error occurred in getting the gfe of annotation",
                     log=log_contents.split("\n")), 404
    feats = []
    for f in res_feature:
        fn = Feature(accession=f.accession,
                     rank=f.rank,
                     term=f.term,
                     sequence=f.sequence)
        feats.append(fn)
    return {'gfe': res_gfe, 'feature': feats}
Exemplo n.º 5
0
    def test_009_partialambigserv(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)
        seqann = BioSeqAnn(server=server,
                           verbose=False,
                           verbosity=verbosity,
                           pid="006_partialambig")
        input_seq = self.data_dir + '/partial_ambig.fasta'

        for ex in self.expected['partial_ambig']:
            i = int(ex['index'])
            locus = ex['locus']
            allele = ex['name']
            hla, loc = locus.split("-")
            print(str(i), allele)
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
            ann = seqann.annotate(in_seq, locus)
            self.assertTrue(ann.complete_annotation)
            self.assertEqual(ann.method, ex['method'])
            self.assertFalse(ann.blocks)
            self.assertIsInstance(ann, Annotation)
            self.assertTrue(ann.complete_annotation)
            self.assertGreater(len(ann.annotation.keys()), 1)
            db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
            expected = db.lookup(name=allele)
            expected_seqs = get_features(expected)
            self.assertGreater(len(expected_seqs.keys()), 1)
            self.assertGreater(len(ann.annotation.keys()), 1)
            self.assertEqual(ann.gfe, ex['gfe'])

            self.assertGreater(len(ann.structure), 1)
            for feat in ann.structure:
                self.assertIsInstance(feat, Feature)
            # Make sure only mapped feats exist
            for mf in ex['missing_feats']:
                self.assertFalse(mf in ann.annotation)

            for feat in ex['feats']:
                if feat in ex['diff']:
                    self.assertNotEqual(str(expected_seqs[feat]),
                                        str(ann.annotation[feat].seq))
                else:
                    self.assertEqual(str(expected_seqs[feat]),
                                     str(ann.annotation[feat].seq))

        server.close()
        pass
Exemplo n.º 6
0
 def test_005_insertionserv(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="004_insertion")
     input_seq = self.data_dir + '/insertion_seqs.fasta'
     for ex in self.expected['insertion']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         ann = seqann.annotate(in_seq, locus)
         self.assertEqual(ann.method, "nt_search")
         self.assertFalse(ann.missing)
         self.assertFalse(ann.blocks)
         self.assertIsInstance(ann, Annotation)
         self.assertTrue(ann.complete_annotation)
         self.assertGreater(len(ann.annotation.keys()), 1)
         db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
         expected = db.lookup(name=allele)
         self.assertEqual(ann.gfe, ex['gfe'])
         self.assertGreater(len(ann.structure), 1)
         for feat in ann.structure:
             self.assertIsInstance(feat, Feature)
         n_diffs = 0
         expected_seqs = get_features(expected)
         self.assertGreater(len(expected_seqs.keys()), 1)
         for feat in expected_seqs:
             if feat not in ann.annotation:
                 self.assertEqual(feat, None)
             else:
                 if feat in ex['diff']:
                     n_diffs += 1
                     self.assertNotEqual(str(expected_seqs[feat]),
                                         str(ann.annotation[feat].seq))
                     diff_len = len(str(ann.annotation[feat].seq)) - \
                         len(str(expected_seqs[feat]))
                     self.assertEqual(diff_len, ex['lengths'][feat])
                 else:
                     self.assertEqual(str(expected_seqs[feat]),
                                      str(ann.annotation[feat].seq))
         self.assertEqual(n_diffs, len(ex['diff']))
     server.close()
     pass
Exemplo n.º 7
0
 def test_021_stringseq(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="015_stringseq")
     input_seq = self.data_dir + '/exact_seqs.fasta'
     ex = self.expected['exact'][0]
     locus = ex['locus']
     allele = ex['name']
     hla, loc = locus.split("-")
     in_seqrec = list(SeqIO.parse(input_seq, "fasta"))[0]
     in_str = str(in_seqrec.seq)
     in_seq = in_seqrec.seq
     ann_str = seqann.annotate(in_str, locus)
     ann_seq = seqann.annotate(in_seq, locus)
     for annotation in [ann_str, ann_seq]:
         self.assertTrue(annotation.exact)
         self.assertIsNone(annotation.features)
         self.assertEqual(annotation.method, "match")
         self.assertIsInstance(annotation, Annotation)
         self.assertTrue(annotation.complete_annotation)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
         expected = db.lookup(name=allele)
         expected_seqs = get_features(expected)
         self.assertEqual(annotation.gfe, ex['gfe'])
         self.assertGreater(len(expected_seqs.keys()), 1)
         self.assertGreater(len(annotation.annotation.keys()), 1)
         self.assertGreater(len(annotation.structure), 1)
         for feat in annotation.structure:
             self.assertIsInstance(feat, Feature)
         for feat in expected_seqs:
             if feat not in annotation.annotation:
                 self.assertEqual(feat, None)
             else:
                 self.assertEqual(str(expected_seqs[feat]),
                                  str(annotation.annotation[feat]))
     server.close()
     pass
Exemplo n.º 8
0
 def test_015_fail(self):
     input_seq = self.data_dir + '/failed_seqs.fasta'
     in_seq = list(SeqIO.parse(input_seq, "fasta"))[0]
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="011_fail")
     self.assertFalse(seqann.refdata.seqref)
     self.assertFalse(seqann.refdata.hlaref)
     annotation = seqann.annotate(in_seq)
     self.assertFalse(annotation)
     server.close()
     pass
Exemplo n.º 9
0
 def test_013_nomatch(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="009_nomatch")
     self.assertIsInstance(seqann, BioSeqAnn)
     input_seq = self.data_dir + '/nomatch_seqs.fasta'
     in_seq = list(SeqIO.parse(input_seq, "fasta"))[0]
     annotation = seqann.annotate(in_seq, "HLA-A")
     self.assertIsInstance(annotation, Annotation)
     self.assertGreater(len(annotation.annotation.keys()), 1)
     self.assertTrue(annotation.complete_annotation)
     server.close()
     pass
Exemplo n.º 10
0
 def test_006_insertion(self):
     seqann = BioSeqAnn(verbosity=verbosity, pid="004_insertion")
     input_seq = self.data_dir + '/insertion_seqs.fasta'
     for ex in self.expected['insertion']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         ann = seqann.annotate(in_seq, locus)
         self.assertEqual(ann.method, "nt_search")
         self.assertFalse(ann.missing)
         self.assertFalse(ann.blocks)
         self.assertIsInstance(ann, Annotation)
         self.assertTrue(ann.complete_annotation)
         self.assertGreater(len(ann.annotation.keys()), 1)
         expected = seqann.refdata.hlaref[allele]
         self.assertEqual(ann.gfe, ex['gfe'])
         self.assertGreater(len(ann.structure), 1)
         for feat in ann.structure:
             self.assertIsInstance(feat, Feature)
         n_diffs = 0
         expected_seqs = get_features(expected)
         self.assertGreater(len(expected_seqs.keys()), 1)
         for feat in expected_seqs:
             if feat not in ann.annotation:
                 self.assertEqual(feat, None)
             else:
                 if feat in ex['diff']:
                     n_diffs += 1
                     self.assertNotEqual(str(expected_seqs[feat]),
                                         str(ann.annotation[feat].seq))
                     diff_len = len(str(ann.annotation[feat].seq)) - \
                         len(str(expected_seqs[feat]))
                     self.assertEqual(diff_len, ex['lengths'][feat])
                 else:
                     self.assertEqual(str(expected_seqs[feat]),
                                      str(ann.annotation[feat].seq))
         self.assertEqual(n_diffs, len(ex['diff']))
     pass
Exemplo n.º 11
0
    def test_017_logging(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)

        with self.assertLogs(level='INFO') as cm:
            seqann = BioSeqAnn(server=server, verbose=True)
            input_seq = self.data_dir + '/failed_seqs.fasta'
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[0]
            annotation = seqann.annotate(in_seq)
            self.assertFalse(annotation)

        self.assertGreater(len(cm.output), 1)
        error = list(cm.output)[len(cm.output) - 1].split(":")[0]
        error_msg = list(cm.output)[len(cm.output) - 1].split("-")[1]
        self.assertEqual(error, "ERROR")
        self.assertEqual(error_msg, " Locus could not be determined!")
        server.close()
        pass
Exemplo n.º 12
0
    def test_010_partialambig(self):
        seqann = BioSeqAnn(verbose=False,
                           verbosity=verbosity,
                           pid="006_partialambig")
        input_seq = self.data_dir + '/partial_ambig.fasta'
        for ex in self.expected['partial_ambig']:
            i = int(ex['index'])
            locus = ex['locus']
            allele = ex['name']
            hla, loc = locus.split("-")
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
            ann = seqann.annotate(in_seq, locus)
            self.assertTrue(ann.complete_annotation)
            self.assertEqual(ann.method, ex['method'])
            self.assertFalse(ann.blocks)
            self.assertIsInstance(ann, Annotation)
            self.assertTrue(ann.complete_annotation)
            self.assertGreater(len(ann.annotation.keys()), 1)
            expected = seqann.refdata.hlaref[allele]
            expected_seqs = get_features(expected)
            self.assertGreater(len(expected_seqs.keys()), 1)
            self.assertGreater(len(ann.annotation.keys()), 1)
            self.assertEqual(ann.gfe, ex['gfe'])
            self.assertGreater(len(ann.structure), 1)
            for feat in ann.structure:
                self.assertIsInstance(feat, Feature)
            # Make sure only mapped feats exist
            for mf in ex['missing_feats']:
                self.assertFalse(mf in ann.annotation)

            for feat in ex['feats']:
                if feat in ex['diff']:
                    self.assertNotEqual(str(expected_seqs[feat]),
                                        str(ann.annotation[feat].seq))
                else:
                    self.assertEqual(str(expected_seqs[feat]),
                                     str(ann.annotation[feat].seq))

        pass
Exemplo n.º 13
0
    def test_018_nogfe(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)

        with self.assertLogs(level='INFO') as cm:
            seqann = BioSeqAnn(server=server, verbose=True)
            input_seq = self.data_dir + '/failed_seqs.fasta'
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[1]
            annotation = seqann.annotate(in_seq)
            self.assertFalse(annotation.gfe)
            self.assertFalse(annotation.structure)
            self.assertTrue(annotation.annotation)

        self.assertGreater(len(cm.output), 2)
        error = list(cm.output)[0].split(":")[0]
        error_msg = list(cm.output)[0].split("-")[1]
        self.assertEqual(error, "WARNING")
        self.assertEqual(error_msg, " Sequence alphabet contains non DNA")
        server.close()
        pass
def gfeAnnotation_post(sequence, locus, gene=None, imgtdb_version="3.31.0"):
    """gfeAnnotation_post

        Get all kir associated with a GFE # noqa: E501

        :param sequence: Valid sequence fasta
        :param gene: the KIR param true or false
        :param locus: Valid Locus
        :param imgtdb_version:
        :rtype: Typing
        """
    global seqanns

    typing = Typing()
    sequence = SeqRecord(seq=Seq(sequence['sequence']))

    if not re.match(".", imgtdb_version):
        imgtdb_version = ".".join([list(imgtdb_version)[0],
                                    "".join(list(imgtdb_version)[1:3]),
                                   list(imgtdb_version)[3]])

    db = "".join(imgtdb_version.split("."))
    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p',
                        level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s '
        '- %(funcName)s %(lineno)d: - %(message)s')
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    # TODO: Use `gene` or locus to figure out the gene-family
    if db in seqanns:
        seqann = seqanns[db]
    elif gene:
        if gene.upper() == 'KIR':
            seqann = BioSeqAnn(verbose=True, safemode=True,
                               dbversion=db, verbosity=3, kir=True)
            seqanns.update({db: seqann})
    else:
        # Defaults to HLA
        seqann = BioSeqAnn(verbose=True, safemode=True,
                           dbversion=db, verbosity=3)
        seqanns.update({db: seqann})

    try:
        annotation = seqann.annotate(sequence, locus)
    except Exception as e:
        print(e)
        log_contents = log_capture_string.getvalue()
        return Error("An error occurred during the annotation",
                     log=log_contents.split("\n")), 404

    if not annotation:
        log_contents = log_capture_string.getvalue()
        return Error("No annotation could be produced",
                     log=log_contents.split("\n")), 404

    if not hasattr(annotation, 'structure'):
        log_contents = log_capture_string.getvalue()
        return Error("No structure was produced",
                     log=log_contents.split("\n")), 404

    feats = []
    for f in annotation.structure:
        fn = Feature(accession=f.accession, rank=f.rank,
                     term=f.term, sequence=f.sequence)
        feats.append(fn)

    typing.features = feats
    typing.gfe = annotation.gfe
    typing.imgtdb_version = imgtdb_version
    return typing
Exemplo n.º 15
0
def annotate_get(sequence, locus=None, imgthla_version="3.31.0"):  # noqa: E501
    """annotate_get

    Find the sequence differences between two GFE # noqa: E501

    :param sequence: Valid consensus sequence
    :type sequence: str
    :param locus: Valid locus
    :type locus: str
    :param imgthla_version: IMGT/HLA DB Version
    :type imgthla_version: str
    :param verbose: Flag for running service in verbose
    :type verbose: bool

    :rtype: Typing
    """
    global seqanns

    typing = Typing()
    sequence = SeqRecord(seq=Seq(sequence))

    if not re.match(".", imgthla_version):
        imgthla_version = ".".join([
            list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]),
            list(imgthla_version)[3]
        ])

    db = "".join(imgthla_version.split("."))
    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s'
    )
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    if db in seqanns:
        seqann = seqanns[db]
    else:
        seqann = BioSeqAnn(verbose=True,
                           safemode=True,
                           dbversion=db,
                           verbosity=3)
        seqanns.update({db: seqann})

    try:
        annotation = seqann.annotate(sequence, locus)
    except:
        log_contents = log_capture_string.getvalue()
        return Error("An error occured during the annotation",
                     log=log_contents.split("\n")), 404

    if not annotation:
        log_contents = log_capture_string.getvalue()
        return Error("No annotation could be produced",
                     log=log_contents.split("\n")), 404

    if not hasattr(annotation, 'structure'):
        log_contents = log_capture_string.getvalue()
        return Error("No structure was produced",
                     log=log_contents.split("\n")), 404

    feats = []
    for f in annotation.structure:
        fn = Feature(accession=f.accession,
                     rank=f.rank,
                     term=f.term,
                     sequence=f.sequence)
        feats.append(fn)

    typing.features = feats
    typing.gfe = annotation.gfe
    typing.imgtdb_version = imgthla_version
    return typing