示例#1
0
 def test_001_load_features(self):
     graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                   bolt=False)
     #if conn():
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=3307)
     seqann = BioSeqAnn(server=server)
     #else:
     #    seqann = BioSeqAnn()
     pygfe = pyGFE(graph=graph,
                   seqann=seqann,
                   verbose=True,
                   load_features=True,
                   verbosity=2,
                   loci=["HLA-A"])
     self.assertIsInstance(pygfe, pyGFE)
     self.assertGreater(len(pygfe.gfe.structures), 1)
     self.assertGreater(len(pygfe.gfe.all_feats), 1)
     self.assertTrue('HLA-A' in pygfe.gfe.structures)
     self.assertFalse('HLA-Z' in pygfe.gfe.structures)
     pass
示例#2
0
    def test_005_A(self):
        #start = time.time()
        graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                      bolt=False)
        #if conn():
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=3307)
        seqann = BioSeqAnn(server=server, dbversion="3200", verbose=True)

        pickle_file1 = "unique_db-feats.pickle"
        pickle_file2 = "feature-service.pickle"
        pickle_gfe2feat = "gfe2feat.pickle"
        pickle_file3 = "gfe2hla.pickle"
        pickle_file4 = "seq2hla.pickle"
        with open(pickle_gfe2feat, 'rb') as handle1:
            gfe_feats = pickle.load(handle1)

        with open(pickle_file1, 'rb') as handle1:
            feats = pickle.load(handle1)

        with open(pickle_file2, 'rb') as handle2:
            cached_feats = pickle.load(handle2)

        with open(pickle_file3, 'rb') as handle3:
            gfe2hla = pickle.load(handle3)

        with open(pickle_file4, 'rb') as handle:
            seq2hla = pickle.load(handle)

        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_features=False,
                      verbose=True,
                      features=feats,
                      seq2hla=seq2hla,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      cached_features=cached_feats,
                      loci=["HLA-DQB1"])
        self.assertIsInstance(pygfe, pyGFE)
        seqs = list(SeqIO.parse(self.data_dir + "/A_fail.fasta", "fasta"))
        typing1 = pygfe.type_from_seq("HLA-DQB1", str(seqs[1].seq), "3.20.0")
        #typing2 = pygfe.type_from_seq("HLA-DRB1", str(seqs[0].seq), "3.31.0")
        #typing2 = pygfe.type_from_seq("HLA-DRB1", str(seqs[0].seq), "3.31.0")
        #end = time.time()
        #time_taken = end - start
        print(typing1)
        #print("=====")
        #print(typing2)
        # self.assertEqual(typing2.hla, 'HLA-A*01:01:01:01')
        # self.assertEqual(typing2.status, "documented")
        #self.assertIsInstance(typing2, Typing)
        # self.assertEqual(typing1.hla, 'HLA-A*01:01:01:01')
        # self.assertEqual(typing1.status, "documented")
        self.assertIsInstance(typing1, Typing)
        pass
示例#3
0
 def test_004_loader3(self):
     start = time.time()
     graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                   bolt=False)
     #if conn():
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb, port=3307)
     seqann = BioSeqAnn(server=server, verbose=True)
     pygfe = pyGFE(graph=graph,
                   seqann=seqann,
                   verbose=False,
                   load_features=False,
                   load_gfe2hla=True,
                   load_seq2hla=True,
                   load_gfe2feat=True,
                   loci=["HLA-A"])
     self.assertIsInstance(pygfe, pyGFE)
     seqs = list(SeqIO.parse(self.data_dir + "/known_A.fasta", "fasta"))
     #typing1 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.20.0")
     typing2 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.31.0")
     server.close()
     end = time.time()
     time_taken = end - start
     print("TIME TAKEN: " + str(time_taken))
     self.assertEqual(typing2.hla, 'HLA-A*01:01:01:01')
     self.assertEqual(typing2.status, "documented")
     self.assertIsInstance(typing2, Typing)
     # self.assertEqual(typing1.hla, 'HLA-A*01:01:01:01')
     # self.assertEqual(typing1.status, "documented")
     # self.assertIsInstance(typing1, Typing)
     pass
示例#4
0
 def test_000_pygfe(self):
     graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                   bolt=False)
     #if conn():
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb)
     seqann = BioSeqAnn(server=server, verbose=False)
     seqann = "X"
     #else:
     #    print
     #    seqann = BioSeqAnn()
     pygfe = pyGFE(graph=graph,
                   seqann=seqann,
                   load_features=False,
                   verbose=False,
                   load_all=True,
                   loci=["HLA-A"])
     self.assertIsInstance(pygfe, pyGFE)
     seqs = list(SeqIO.parse(self.data_dir + "/unknown_A.fasta", "fasta"))
     typing = pygfe.type_from_seq("HLA-A", str(seqs[1].seq))
     #self.assertEqual(typing.gfe, 'HLA-Aw770-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-4')
     self.assertEqual(typing.hla, 'HLA-A*01:01:01:01')
     self.assertEqual(typing.status, "novel")
     self.assertIsInstance(typing, Typing)
     pass
示例#5
0
    def test_001_pygfe(self):
        graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                      bolt=False)
        #if conn():

        pickle_file1 = "unique_db-feats.pickle"
        pickle_file2 = "feature-service.pickle"
        pickle_gfe2feat = "gfe2feat.pickle"
        pickle_file3 = "gfe2hla.pickle"
        pickle_file4 = "seq2hla.pickle"

        with open(pickle_gfe2feat, 'rb') as handle1:
            gfe_feats = pickle.load(handle1)

        with open(pickle_file1, 'rb') as handle1:
            feats = pickle.load(handle1)

        with open(pickle_file2, 'rb') as handle2:
            cached_feats = pickle.load(handle2)

        with open(pickle_file3, 'rb') as handle3:
            gfe2hla = pickle.load(handle3)

        with open(pickle_file4, 'rb') as handle:
            seq2hla = pickle.load(handle)

        seqann = BioSeqAnn(verbose=False, cached_features=cached_feats, align=True)

        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      gfe_feats=gfe_feats,
                      gfe2hla=gfe2hla,
                      seq2hla=seq2hla,
                      features=feats,
                      verbose=False)
        self.assertIsInstance(pygfe, pyGFE)
        seqs = list(SeqIO.parse(self.data_dir + "/unknown_A.fasta", "fasta"))
        typing = pygfe.type_from_seq("HLA-A", str(seqs[1].seq))
        print(typing)
        #self.assertEqual(typing.gfe, 'HLA-Aw770-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-4')
        self.assertEqual(typing.hla, 'HLA-A*01:01:01:01')
        self.assertEqual(typing.status, "novel")
        self.assertIsInstance(typing, Typing)
        pass
def gfecreate_post(locus,
                   sequence,
                   imgt_version,
                   neo4j_url=neo_dict['neo4j_url'],
                   user=neo_dict['user'],
                   password=neo_dict['password']):  # noqa: E501
    """gfecreate_post

    Get all features associated with a locus

    :param locus: Valid HLA locus
    :param sequence: Valid sequence
    :param imgt_version : db version
    :rtype: Typing
    """
    imgthla_version = imgt_version
    global seqanns
    global gfe_feats
    global gfe2hla
    global seq2hla
    pygfe = pyGFE()
    sequence = sequence['sequence']
    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter('%(asctime)s - %(name)-35s - %(levelname)-5s'
                                  ' - %(funcName)s %(lineno)d: - %(message)s')
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    if not re.match(".", imgthla_version):
        imgthla_version = ".".join([
            list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]),
            list(imgthla_version)[3]
        ])

    db = "".join(imgthla_version.split("."))
    if db in seqanns:
        seqann = seqanns[db]
    else:
        seqann = BioSeqAnn(verbose=True,
                           safemode=True,
                           dbversion=db,
                           verbosity=3)
        seqanns.update({db: seqann})
    try:
        graph = Graph(neo4j_url, user=user, password=password, bolt=False)
    except ServiceUnavailable as err:
        log_contents = log_capture_string.getvalue()
        log_data = log_contents.split("\n")
        log_data.append(str(err))
        return Error("Failed to connect to graph", log=log_data), 404

    if (not isinstance(gfe_feats, DataFrame)
            or not isinstance(seq2hla, DataFrame)):
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_gfe2hla=True,
                      load_seq2hla=True,
                      load_gfe2feat=True,
                      verbose=True)
        gfe_feats = pygfe.gfe_feats
        seq2hla = pygfe.seq2hla
        gfe2hla = pygfe.gfe2hla
    else:
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      seq2hla=seq2hla,
                      verbose=True)
    try:
        typing = pygfe.gfe_create(locus=locus,
                                  sequence=sequence,
                                  imgtdb_version=db)
    except Exception as e:
        print(e)
        log_contents = log_capture_string.getvalue()
        return Error("Type with alignment failed",
                     log=log_contents.split("\n")), 404

    if isinstance(typing, Error):
        log_contents = log_capture_string.getvalue()
        typing.log = log_contents.split("\n")
        return typing, 404

    if not typing:
        log_contents = log_capture_string.getvalue()
        return Error("Type with alignment failed",
                     log=log_contents.split("\n")), 404
    structute_feats = []
    for f in typing['structure']:
        fn = Feature(accession=f.accession,
                     rank=f.rank,
                     term=f.term,
                     sequence=f.sequence)
        structute_feats.append(fn)
    anno_feats = []
    for f in typing['annotation'].structure:
        fn = Feature(accession=f.accession,
                     rank=f.rank,
                     term=f.term,
                     sequence=f.sequence)
        anno_feats.append(fn)
    return {
        'gfe': typing['gfe'],
        'feature': structute_feats,
        'annotation_feature': anno_feats
    }
示例#7
0
def main():
    """This is run if file is directly executed, but not if imported as
    module. Having this in a separate function  allows importing the file
    into interactive python, and still able to execute the
    function for testing"""
    parser = argparse.ArgumentParser()

    parser.add_argument("-k",
                        "--kir",
                        required=False,
                        help="Bool for KIR",
                        action='store_true')

    parser.add_argument("-a",
                        "--align",
                        required=False,
                        help="Bool for loading alignments",
                        action='store_true')

    parser.add_argument("-d",
                        "--debug",
                        required=False,
                        help="Bool for debugging",
                        action='store_true')

    parser.add_argument("-o",
                        "--outdir",
                        required=True,
                        help="Output directory",
                        type=str)

    parser.add_argument("-n",
                        "--number",
                        required=False,
                        help="Number of IMGT/DB releases",
                        default=1,
                        type=int)

    parser.add_argument("-r",
                        "--releases",
                        required=False,
                        help="IMGT/DB releases",
                        type=str)

    parser.add_argument("-v",
                        "--verbose",
                        help="Option for running in verbose",
                        action='store_true')

    data_dir = os.path.dirname(__file__)
    args = parser.parse_args()

    outdir = args.outdir

    load_loci = hla_loci + kir_loci
    release_n = args.number
    releases = args.releases
    verbosity = 1

    align = False
    kir = False
    debug = False
    verbose = False

    if args.kir:
        kir = True

    if args.align:
        align = True

    if args.verbose:
        verbose = True

    if kir:
        load_loci = hla_loci + kir_loci
    else:
        load_loci = hla_loci

    if args.debug:
        logging.info("Running in debug mode")
        load_loci = ["HLA-A"]
        kir = False
        debug = True
        verbose = True
        verbosity = 2
        release_n = 1

    gfe_e = []
    seq_e = []
    seq_n = []
    cds_n = []
    grp_e = []
    trs_e = []
    allele_n = []

    # Get last five IMGT/HLA releases
    if releases:
        dbversions = [db for db in releases.split(",")]
    else:
        dbversions = pd.read_html(imgt_hla)[0]['Release'][0:release_n].tolist()

    # Get lastest IMGT/KIR release
    kir_release = pd.read_html(imgt_kir)[0][0][1]

    gfe_maker = pyGFE(verbose=verbose,
                      verbosity=verbosity,
                      load_features=True,
                      store_features=True,
                      loci=load_loci)

    if kir:
        if verbose:
            logging.info("Adding KIR to GFE DB")

        kir_file = data_dir + '/../data/KIR.dat'

        if align:
            aligned = kir_alignments()

        # Downloading KIR
        if not os.path.isfile(kir_file):
            if verbose:
                logging.info("Downloading KIR dat file from " + kir_url)
            urllib.request.urlretrieve(kir_url, kir_file)

        kir_gen = SeqIO.parse(kir_file, "imgt")
        if verbose:
            logging.info("Finished parsing KIR dat file")

        i = 0
        for allele in kir_gen:
            if hasattr(allele, 'seq'):
                loc = allele.description.split(",")[0].split("*")[0]
                if loc in kir_loci and len(str(allele.seq)) > 5:
                    if verbose:
                        logging.info("KIR = " +
                                     allele.description.split(",")[0] + " " +
                                     kir_release)

                    groups = []
                    complete_annotation = get_features(allele)
                    ambigs = [
                        a for a in complete_annotation if re.search("/", a)
                    ]

                    aligned_seq = ''
                    if align:
                        if allele.description.split(",")[0] in aligned[loc]:
                            aligned_seq = aligned[loc][
                                allele.description.split(",")[0]]

                    if ambigs:
                        logging.info("AMBIGS " +
                                     allele.description.split(",")[0] + " " +
                                     kir_release)
                        annotations = []
                        for ambig in ambigs:
                            logging.info("AMBIG = " + ambig)
                            aterm = ambig.split("/")[0].split("_")[0]
                            anno = {
                                a: complete_annotation[a]
                                for a in complete_annotation if a not in ambigs
                            }
                            anno.update({
                                ambig.split("/")[0]:
                                complete_annotation[ambig]
                            })
                            annotations.append(anno)

                            anno2 = {
                                a: complete_annotation[a]
                                for a in complete_annotation if a not in ambigs
                            }
                            anno2.update({
                                aterm + "_" + ambig.split("/")[1]:
                                complete_annotation[ambig]
                            })
                            annotations.append(anno2)

                        for annotation in annotations:
                            ann = Annotation(annotation=annotation,
                                             method='match',
                                             complete_annotation=True)

                            features, gfe = gfe_maker.get_gfe(ann, loc)
                            (allelenode, gfeedge, seq_nodes, cds_nodes,
                             seq_edges, trans_edge, grp_edges) = build_graph(
                                 groups, gfe, allele, features, kir_release,
                                 aligned_seq, '', '', "IMGT_KIR", align)

                            gfe_e += gfeedge
                            seq_e += seq_edges
                            seq_n += seq_nodes
                            allele_n += allelenode
                            grp_e += grp_edges
                            trs_e += trans_edge
                            cds_n += cds_nodes
                        i += 1

                    else:
                        ann = Annotation(annotation=complete_annotation,
                                         method='match',
                                         complete_annotation=True)
                        features, gfe = gfe_maker.get_gfe(ann, loc)

                        (allelenode, gfeedge, seq_nodes, cds_nodes, seq_edges,
                         trans_edge, grp_edges) = build_graph(
                             groups, gfe, allele, features, kir_release,
                             aligned_seq, '', '', "IMGT_KIR", align)

                        gfe_e += gfeedge
                        seq_e += seq_edges
                        seq_n += seq_nodes
                        allele_n += allelenode
                        grp_e += grp_edges
                        trs_e += trans_edge
                        cds_n += cds_nodes
                        i += 1

    # Loop through DB versions
    for dbversion in dbversions:

        db_striped = ''.join(dbversion.split("."))

        if align:
            gen_aln, nuc_aln, prot_aln = hla_alignments(db_striped)

        ard = ARD(db_striped)

        dat_url = 'https://raw.githubusercontent.com/ANHIG/IMGTHLA/' \
                  + db_striped + '/hla.dat'
        dat_file = data_dir + '/hla.' + str(db_striped) + ".dat"

        # Downloading DAT file
        if not os.path.isfile(dat_file):
            if verbose:
                logging.info("Downloading dat file from " + dat_url)
            urllib.request.urlretrieve(dat_url, dat_file)

        cmd = "perl -p -i -e 's/[^\\x00-\\x7F]//g' " + dat_file
        os.system(cmd)

        a_gen = SeqIO.parse(dat_file, "imgt")
        if verbose:
            logging.info("Finished parsing dat file")

        i = 0
        for allele in a_gen:
            if hasattr(allele, 'seq'):
                hla_name = allele.description.split(",")[0]
                loc = allele.description.split(",")[0].split("*")[0]
                if hla_name in skip_alleles:
                    logging.info("SKIPPING = " +
                                 allele.description.split(",")[0] + " " +
                                 dbversion)
                    continue

                if (debug and (loc != "HLA-A" and i > 20)):
                    continue

                if (loc in hla_loci
                        or loc == "DRB5") and (len(str(allele.seq)) > 5):
                    if verbose:
                        logging.info("HLA = " +
                                     allele.description.split(",")[0] + " " +
                                     dbversion)

                    a_name = allele.description.split(",")[0].split("-")[1]
                    groups = [["HLA-" + ard.redux(a_name, grp), grp]
                              if ard.redux(a_name, grp) != a_name else None
                              for grp in ard_groups]
                    seco = [[to_second(a_name), "2nd_FIELD"]]
                    groups = list(filter(None, groups)) + seco
                    complete_annotation = get_features(allele)
                    ann = Annotation(annotation=complete_annotation,
                                     method='match',
                                     complete_annotation=True)
                    features, gfe = gfe_maker.get_gfe(ann, loc)

                    #gen_aln, nuc_aln, prot_aln
                    aligned_gen = ''
                    aligned_nuc = ''
                    aligned_prot = ''

                    if align:
                        if allele.description.split(",")[0] in gen_aln[loc]:
                            aligned_gen = gen_aln[loc][
                                allele.description.split(",")[0]]

                        if allele.description.split(",")[0] in nuc_aln[loc]:
                            aligned_nuc = nuc_aln[loc][
                                allele.description.split(",")[0]]

                        if allele.description.split(",")[0] in prot_aln[loc]:
                            aligned_prot = prot_aln[loc][
                                allele.description.split(",")[0]]

                    (allelenode, gfeedge, seq_nodes, cds_nodes, seq_edges,
                     trans_edge, grp_edges) = build_graph(
                         groups, gfe, allele, features, dbversion, aligned_gen,
                         aligned_nuc, aligned_prot, "IMGT_HLA", align)

                    gfe_e += gfeedge
                    seq_e += seq_edges
                    seq_n += seq_nodes
                    allele_n += allelenode
                    grp_e += grp_edges
                    trs_e += trans_edge
                    cds_n += cds_nodes
                    i += 1
        if verbose:
            logging.info("Finished loading IMGT DB " + dbversion)
    if verbose:
        logging.info("Finished loading ALL DB versions")
    gfe_df = pd.DataFrame(
        gfe_e,
        columns=":START_ID(ALLELE),:END_ID(ALLELE),imgt_release,:TYPE".split(
            ","))
    seq_df = pd.DataFrame(
        seq_e,
        columns=
        ":START_ID(ALLELE),:END_ID(SEQUENCE),imgt_release,accession,:TYPE".
        split(","))
    seqn_df = pd.DataFrame(
        seq_n,
        columns=
        "sequenceId:ID(SEQUENCE),sequence,name,feature:LABEL,rank,length,seq:string[]"
        .split(","))
    allele_df = pd.DataFrame(
        allele_n,
        columns="alleleId:ID(ALLELE),name,alleletype:LABEL,locus".split(","))
    group_df = pd.DataFrame(
        grp_e,
        columns=":START_ID(ALLELE),:END_ID(ALLELE),imgtdb,:TYPE".split(","))
    cdsn_df = pd.DataFrame(
        cds_n,
        columns="cdsId:ID(CDS),name,cdstype:LABEL,cds,protein".split(","))
    trs_df = pd.DataFrame(
        trs_e, columns=":START_ID(SEQUENCE),:END_ID(CDS),:TYPE".split(","))

    if verbose:
        gfe_es = str(len(gfe_df))
        seq_es = str(len(seq_df))
        seq_ns = str(len(seqn_df))
        all_ns = str(len(allele_df))
        grp_es = str(len(group_df))
        cds_ns = str(len(cdsn_df))
        cds_es = str(len(trs_df))
        logging.info("GFE Edges    = " + gfe_es)
        logging.info("Seq Edges    = " + seq_es)
        logging.info("Group Edges  = " + grp_es)
        logging.info("CDS Edges    = " + cds_es)
        logging.info("Seq Nodes    = " + seq_ns)
        logging.info("CDS Nodes    = " + cds_ns)
        logging.info("Allele Nodes = " + all_ns)

    gfe_df.to_csv(outdir + "/gfe_edges.csv", header=True, index=False)
    seq_df.to_csv(outdir + "/seq_edges.csv", header=True, index=False)
    seqn_df.to_csv(outdir + "/sequence_nodes.csv", header=True, index=False)
    allele_df.to_csv(outdir + "/allele_nodes.csv", header=True, index=False)
    cdsn_df.to_csv(outdir + "/cds_nodes.csv", header=True, index=False)
    group_df.to_csv(outdir + "/group_edges.csv", header=True, index=False)
    trs_df.to_csv(outdir + "/cds_edges.csv", header=True, index=False)

    if verbose:
        logging.info("** Finshed build **")
示例#8
0
def releases_locus_get(imgt_releases,
                       locus,
                       neo4j_url=neo_dict['neo4j_url'],
                       user=neo_dict['user'],
                       password=neo_dict['password']):
    """releases_locus_get

        Get all db releases

    :param imgt_releases: Valid imgt releases verion
    :param locus: Valid imgt releases verion
    :rtype: list of available db
    """
    global seqanns
    global gfe_feats
    global gfe2hla
    global seq2hla
    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s '
        '- %(funcName)s %(lineno)d: - %(message)s')
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    db = "".join(imgt_releases.split("."))
    if db in seqanns:
        seqann = seqanns[db]
    else:
        seqann = BioSeqAnn(verbose=True,
                           safemode=True,
                           dbversion=db,
                           verbosity=3)
        seqanns.update({db: seqann})

    try:
        graph = Graph(neo4j_url, user=user, password=password, bolt=False)
    except ServiceUnavailable as err:
        log_contents = log_capture_string.getvalue()
        log_data = log_contents.split("\n")
        log_data.append(str(err))
        return Error("Failed to connect to graph", log=log_data), 404

    if (not isinstance(gfe_feats, DataFrame)
            or not isinstance(seq2hla, DataFrame)):
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_gfe2hla=True,
                      load_seq2hla=True,
                      load_gfe2feat=True,
                      verbose=True)
        gfe_feats = pygfe.gfe_feats
        seq2hla = pygfe.seq2hla
        gfe2hla = pygfe.gfe2hla
    else:
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      seq2hla=seq2hla,
                      verbose=True)
    try:
        hla_list = pygfe.list_db_by_locus_imgt(locus, imgt_releases)
    except Exception as e:
        log_contents = log_capture_string.getvalue()
        print("The Error", e)
        return Error("hla list failed", log=log_contents.split("\n")), 404

    if isinstance(hla_list, Error):
        log_contents = log_capture_string.getvalue()
        hla_list.log = log_contents.split("\n")
        return hla_list, 404

    if not hla_list:
        log_contents = log_capture_string.getvalue()
        return Error("no data record found", log=log_contents.split("\n")), 404
    return hla_list
示例#9
0
    def test_006_align(self):

        graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                      bolt=False)
        #if conn():
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=3307)
        seqann = BioSeqAnn(align=True, server=server, dbversion="3310", verbose=True)

        pickle_file1 = "unique_db-feats.pickle"
        pickle_file2 = "feature-service.pickle"
        pickle_gfe2feat = "gfe2feat.pickle"
        pickle_file3 = "gfe2hla.pickle"
        pickle_file4 = "seq2hla.pickle"
        with open(pickle_gfe2feat, 'rb') as handle1:
            gfe_feats = pickle.load(handle1)

        with open(pickle_file1, 'rb') as handle1:
            feats = pickle.load(handle1)

        with open(pickle_file2, 'rb') as handle2:
            cached_feats = pickle.load(handle2)

        with open(pickle_file3, 'rb') as handle3:
            gfe2hla = pickle.load(handle3)

        with open(pickle_file4, 'rb') as handle:
            seq2hla = pickle.load(handle)

        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_features=False,
                      verbose=True,
                      features=feats,
                      seq2hla=seq2hla,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      cached_features=cached_feats,
                      loci=["HLA-A"])
        self.assertIsInstance(pygfe, pyGFE)
        seqs = list(SeqIO.parse(self.data_dir + "/align_tests.fasta", "fasta"))
        typing1 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.31.0")
        typing2 = pygfe.type_from_seq("HLA-A", str(seqs[1].seq), "3.31.0")
        typing3 = pygfe.type_from_seq("HLA-A", str(seqs[2].seq), "3.31.0")
        typing4 = pygfe.type_from_seq("HLA-A", str(seqs[3].seq), "3.31.0")
        self.assertEqual(typing1.hla, 'HLA-A*02:01:01:12')
        self.assertEqual(typing2.hla, 'HLA-A*02:01:01:12')
        self.assertEqual(typing3.hla, 'HLA-A*02:01:01:12')
        self.assertEqual(typing4.hla, 'HLA-A*02:01:01:12')
        #end = time.time()
        #time_taken = end - start
        #print(typing1)
        #print(typing1.aligned.keys())
        #print(typing1.novel_features)
        #difss = pygfe.hla_seqdiff("HLA-A","3.31.0","HLA-A*01:01:01:01","HLA-A*01:01:01:07")

        #self.assertIsInstance(typing1, Typing)
        pass
示例#10
0
def typeseq_get(sequence,
                locus=None,
                imgthla_version="3.31.0",
                neo4j_url="http://neo4j.b12x.org:80",
                user='******',
                password='******'):  # noqa: E501
    """typeseq_get

    Get HLA and GFE from consensus sequence or GFE notation # noqa: E501

    :param locus: Valid HLA locus
    :type locus: str
    :param sequence: Consensus sequence
    :type sequence: str
    :param imgthla_version: IMGT/HLA DB Version
    :type imgthla_version: str
    :param neo4j_url: URL for the neo4j graph
    :type neo4j_url: str
    :param user: Username for the neo4j graph
    :type user: str
    :param password: Password for the neo4j graph
    :type password: str
    :param verbose: Flag for running service in verbose
    :type verbose: bool

    :rtype: Typing
    """
    global seqanns
    global gfe_feats
    global gfe2hla
    global seq2hla

    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s'
    )
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    if not re.match(".", imgthla_version):
        imgthla_version = ".".join([
            list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]),
            list(imgthla_version)[3]
        ])

    db = "".join(imgthla_version.split("."))
    if db in seqanns:
        seqann = seqanns[db]
    else:
        seqann = BioSeqAnn(verbose=True,
                           safemode=True,
                           dbversion=db,
                           verbosity=3)
        seqanns.update({db: seqann})

    try:
        graph = Graph(neo4j_url, user=user, password=password, bolt=False)
    except ServiceUnavailable as err:
        log_contents = log_capture_string.getvalue()
        log_data = log_contents.split("\n")
        log_data.append(str(err))
        return Error("Failed to connect to graph", log=log_data), 404

    if (not isinstance(gfe_feats, DataFrame)
            or not isinstance(seq2hla, DataFrame)):
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_gfe2hla=True,
                      load_seq2hla=True,
                      load_gfe2feat=True,
                      verbose=True)
        gfe_feats = pygfe.gfe_feats
        seq2hla = pygfe.seq2hla
        gfe2hla = pygfe.gfe2hla
    else:
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      seq2hla=seq2hla,
                      verbose=True)

    try:
        typing = pygfe.type_from_seq(locus, sequence, imgthla_version)
    except:
        log_contents = log_capture_string.getvalue()
        return Error("Type with alignment failed",
                     log=log_contents.split("\n")), 404

    if isinstance(typing, Error):
        log_contents = log_capture_string.getvalue()
        typing.log = log_contents.split("\n")
        return typing, 404

    if not typing:
        log_contents = log_capture_string.getvalue()
        return Error("Type sequence failed", log=log_contents.split("\n")), 404

    typing.gfedb_version = "2.0.0"
    return typing
def findkir_get(gfe,
                neo4j_url=neo_dict['neo4j_url'],
                user=neo_dict['user'],
                password=neo_dict['password']):  # noqa: E501
    """findkir_get

    Get all kir associated with a GFE # noqa: E501

    :param gfe: Valid gfe of locus
    :rtype: Typing
    """
    global seqanns
    global gfe_feats
    global gfe2hla
    global seq2hla

    log_capture_string = io.StringIO()
    logger = logging.getLogger('')
    logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)

    # create console handler and set level to debug
    ch = logging.StreamHandler(log_capture_string)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)-35s - %(levelname)-5s '
        '- %(funcName)s %(lineno)d: - %(message)s')
    ch.setFormatter(formatter)
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    seqann = BioSeqAnn()

    try:
        graph = Graph(neo4j_url, user=user, password=password, bolt=False)
    except ServiceUnavailable as err:
        log_contents = log_capture_string.getvalue()
        log_data = log_contents.split("\n")
        log_data.append(str(err))
        return Error("Failed to connect to graph", log=log_data), 404

    if (not isinstance(gfe_feats, DataFrame)
            or not isinstance(seq2hla, DataFrame)):
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_gfe2hla=True,
                      load_seq2hla=True,
                      load_gfe2feat=True,
                      verbose=True)
        gfe_feats = pygfe.gfe_feats
        seq2hla = pygfe.seq2hla
        gfe2hla = pygfe.gfe2hla
    else:
        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      seq2hla=seq2hla,
                      verbose=True)
    try:
        typing = pygfe.find_gfe_kir(gfe, pygfe.breakup_gfe(gfe))
    except Exception as e:
        print(e)
        log_contents = log_capture_string.getvalue()
        return Error("Type with alignment failed",
                     log=log_contents.split("\n")), 404

    if isinstance(typing, Error):
        log_contents = log_capture_string.getvalue()
        typing.log = log_contents.split("\n")
        return typing, 404

    if not typing:
        log_contents = log_capture_string.getvalue()
        return Error("Type with alignment failed",
                     log=log_contents.split("\n")), 404
    return typing