def test_001_load_features(self): graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=3307) seqann = BioSeqAnn(server=server) #else: # seqann = BioSeqAnn() pygfe = pyGFE(graph=graph, seqann=seqann, verbose=True, load_features=True, verbosity=2, loci=["HLA-A"]) self.assertIsInstance(pygfe, pyGFE) self.assertGreater(len(pygfe.gfe.structures), 1) self.assertGreater(len(pygfe.gfe.all_feats), 1) self.assertTrue('HLA-A' in pygfe.gfe.structures) self.assertFalse('HLA-Z' in pygfe.gfe.structures) pass
def test_005_A(self): #start = time.time() graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=3307) seqann = BioSeqAnn(server=server, dbversion="3200", verbose=True) pickle_file1 = "unique_db-feats.pickle" pickle_file2 = "feature-service.pickle" pickle_gfe2feat = "gfe2feat.pickle" pickle_file3 = "gfe2hla.pickle" pickle_file4 = "seq2hla.pickle" with open(pickle_gfe2feat, 'rb') as handle1: gfe_feats = pickle.load(handle1) with open(pickle_file1, 'rb') as handle1: feats = pickle.load(handle1) with open(pickle_file2, 'rb') as handle2: cached_feats = pickle.load(handle2) with open(pickle_file3, 'rb') as handle3: gfe2hla = pickle.load(handle3) with open(pickle_file4, 'rb') as handle: seq2hla = pickle.load(handle) pygfe = pyGFE(graph=graph, seqann=seqann, load_features=False, verbose=True, features=feats, seq2hla=seq2hla, gfe2hla=gfe2hla, gfe_feats=gfe_feats, cached_features=cached_feats, loci=["HLA-DQB1"]) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/A_fail.fasta", "fasta")) typing1 = pygfe.type_from_seq("HLA-DQB1", str(seqs[1].seq), "3.20.0") #typing2 = pygfe.type_from_seq("HLA-DRB1", str(seqs[0].seq), "3.31.0") #typing2 = pygfe.type_from_seq("HLA-DRB1", str(seqs[0].seq), "3.31.0") #end = time.time() #time_taken = end - start print(typing1) #print("=====") #print(typing2) # self.assertEqual(typing2.hla, 'HLA-A*01:01:01:01') # self.assertEqual(typing2.status, "documented") #self.assertIsInstance(typing2, Typing) # self.assertEqual(typing1.hla, 'HLA-A*01:01:01:01') # self.assertEqual(typing1.status, "documented") self.assertIsInstance(typing1, Typing) pass
def test_004_loader3(self): start = time.time() graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=3307) seqann = BioSeqAnn(server=server, verbose=True) pygfe = pyGFE(graph=graph, seqann=seqann, verbose=False, load_features=False, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, loci=["HLA-A"]) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/known_A.fasta", "fasta")) #typing1 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.20.0") typing2 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.31.0") server.close() end = time.time() time_taken = end - start print("TIME TAKEN: " + str(time_taken)) self.assertEqual(typing2.hla, 'HLA-A*01:01:01:01') self.assertEqual(typing2.status, "documented") self.assertIsInstance(typing2, Typing) # self.assertEqual(typing1.hla, 'HLA-A*01:01:01:01') # self.assertEqual(typing1.status, "documented") # self.assertIsInstance(typing1, Typing) pass
def test_000_pygfe(self): graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb) seqann = BioSeqAnn(server=server, verbose=False) seqann = "X" #else: # print # seqann = BioSeqAnn() pygfe = pyGFE(graph=graph, seqann=seqann, load_features=False, verbose=False, load_all=True, loci=["HLA-A"]) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/unknown_A.fasta", "fasta")) typing = pygfe.type_from_seq("HLA-A", str(seqs[1].seq)) #self.assertEqual(typing.gfe, 'HLA-Aw770-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-4') self.assertEqual(typing.hla, 'HLA-A*01:01:01:01') self.assertEqual(typing.status, "novel") self.assertIsInstance(typing, Typing) pass
def test_001_pygfe(self): graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): pickle_file1 = "unique_db-feats.pickle" pickle_file2 = "feature-service.pickle" pickle_gfe2feat = "gfe2feat.pickle" pickle_file3 = "gfe2hla.pickle" pickle_file4 = "seq2hla.pickle" with open(pickle_gfe2feat, 'rb') as handle1: gfe_feats = pickle.load(handle1) with open(pickle_file1, 'rb') as handle1: feats = pickle.load(handle1) with open(pickle_file2, 'rb') as handle2: cached_feats = pickle.load(handle2) with open(pickle_file3, 'rb') as handle3: gfe2hla = pickle.load(handle3) with open(pickle_file4, 'rb') as handle: seq2hla = pickle.load(handle) seqann = BioSeqAnn(verbose=False, cached_features=cached_feats, align=True) pygfe = pyGFE(graph=graph, seqann=seqann, gfe_feats=gfe_feats, gfe2hla=gfe2hla, seq2hla=seq2hla, features=feats, verbose=False) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/unknown_A.fasta", "fasta")) typing = pygfe.type_from_seq("HLA-A", str(seqs[1].seq)) print(typing) #self.assertEqual(typing.gfe, 'HLA-Aw770-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-4') self.assertEqual(typing.hla, 'HLA-A*01:01:01:01') self.assertEqual(typing.status, "novel") self.assertIsInstance(typing, Typing) pass
def gfecreate_post(locus, sequence, imgt_version, neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): # noqa: E501 """gfecreate_post Get all features associated with a locus :param locus: Valid HLA locus :param sequence: Valid sequence :param imgt_version : db version :rtype: Typing """ imgthla_version = imgt_version global seqanns global gfe_feats global gfe2hla global seq2hla pygfe = pyGFE() sequence = sequence['sequence'] log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter('%(asctime)s - %(name)-35s - %(levelname)-5s' ' - %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) if not re.match(".", imgthla_version): imgthla_version = ".".join([ list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]), list(imgthla_version)[3] ]) db = "".join(imgthla_version.split(".")) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: typing = pygfe.gfe_create(locus=locus, sequence=sequence, imgtdb_version=db) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 if isinstance(typing, Error): log_contents = log_capture_string.getvalue() typing.log = log_contents.split("\n") return typing, 404 if not typing: log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 structute_feats = [] for f in typing['structure']: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) structute_feats.append(fn) anno_feats = [] for f in typing['annotation'].structure: fn = Feature(accession=f.accession, rank=f.rank, term=f.term, sequence=f.sequence) anno_feats.append(fn) return { 'gfe': typing['gfe'], 'feature': structute_feats, 'annotation_feature': anno_feats }
def main(): """This is run if file is directly executed, but not if imported as module. Having this in a separate function allows importing the file into interactive python, and still able to execute the function for testing""" parser = argparse.ArgumentParser() parser.add_argument("-k", "--kir", required=False, help="Bool for KIR", action='store_true') parser.add_argument("-a", "--align", required=False, help="Bool for loading alignments", action='store_true') parser.add_argument("-d", "--debug", required=False, help="Bool for debugging", action='store_true') parser.add_argument("-o", "--outdir", required=True, help="Output directory", type=str) parser.add_argument("-n", "--number", required=False, help="Number of IMGT/DB releases", default=1, type=int) parser.add_argument("-r", "--releases", required=False, help="IMGT/DB releases", type=str) parser.add_argument("-v", "--verbose", help="Option for running in verbose", action='store_true') data_dir = os.path.dirname(__file__) args = parser.parse_args() outdir = args.outdir load_loci = hla_loci + kir_loci release_n = args.number releases = args.releases verbosity = 1 align = False kir = False debug = False verbose = False if args.kir: kir = True if args.align: align = True if args.verbose: verbose = True if kir: load_loci = hla_loci + kir_loci else: load_loci = hla_loci if args.debug: logging.info("Running in debug mode") load_loci = ["HLA-A"] kir = False debug = True verbose = True verbosity = 2 release_n = 1 gfe_e = [] seq_e = [] seq_n = [] cds_n = [] grp_e = [] trs_e = [] allele_n = [] # Get last five IMGT/HLA releases if releases: dbversions = [db for db in releases.split(",")] else: dbversions = pd.read_html(imgt_hla)[0]['Release'][0:release_n].tolist() # Get lastest IMGT/KIR release kir_release = pd.read_html(imgt_kir)[0][0][1] gfe_maker = pyGFE(verbose=verbose, verbosity=verbosity, load_features=True, store_features=True, loci=load_loci) if kir: if verbose: logging.info("Adding KIR to GFE DB") kir_file = data_dir + '/../data/KIR.dat' if align: aligned = kir_alignments() # Downloading KIR if not os.path.isfile(kir_file): if verbose: logging.info("Downloading KIR dat file from " + kir_url) urllib.request.urlretrieve(kir_url, kir_file) kir_gen = SeqIO.parse(kir_file, "imgt") if verbose: logging.info("Finished parsing KIR dat file") i = 0 for allele in kir_gen: if hasattr(allele, 'seq'): loc = allele.description.split(",")[0].split("*")[0] if loc in kir_loci and len(str(allele.seq)) > 5: if verbose: logging.info("KIR = " + allele.description.split(",")[0] + " " + kir_release) groups = [] complete_annotation = get_features(allele) ambigs = [ a for a in complete_annotation if re.search("/", a) ] aligned_seq = '' if align: if allele.description.split(",")[0] in aligned[loc]: aligned_seq = aligned[loc][ allele.description.split(",")[0]] if ambigs: logging.info("AMBIGS " + allele.description.split(",")[0] + " " + kir_release) annotations = [] for ambig in ambigs: logging.info("AMBIG = " + ambig) aterm = ambig.split("/")[0].split("_")[0] anno = { a: complete_annotation[a] for a in complete_annotation if a not in ambigs } anno.update({ ambig.split("/")[0]: complete_annotation[ambig] }) annotations.append(anno) anno2 = { a: complete_annotation[a] for a in complete_annotation if a not in ambigs } anno2.update({ aterm + "_" + ambig.split("/")[1]: complete_annotation[ambig] }) annotations.append(anno2) for annotation in annotations: ann = Annotation(annotation=annotation, method='match', complete_annotation=True) features, gfe = gfe_maker.get_gfe(ann, loc) (allelenode, gfeedge, seq_nodes, cds_nodes, seq_edges, trans_edge, grp_edges) = build_graph( groups, gfe, allele, features, kir_release, aligned_seq, '', '', "IMGT_KIR", align) gfe_e += gfeedge seq_e += seq_edges seq_n += seq_nodes allele_n += allelenode grp_e += grp_edges trs_e += trans_edge cds_n += cds_nodes i += 1 else: ann = Annotation(annotation=complete_annotation, method='match', complete_annotation=True) features, gfe = gfe_maker.get_gfe(ann, loc) (allelenode, gfeedge, seq_nodes, cds_nodes, seq_edges, trans_edge, grp_edges) = build_graph( groups, gfe, allele, features, kir_release, aligned_seq, '', '', "IMGT_KIR", align) gfe_e += gfeedge seq_e += seq_edges seq_n += seq_nodes allele_n += allelenode grp_e += grp_edges trs_e += trans_edge cds_n += cds_nodes i += 1 # Loop through DB versions for dbversion in dbversions: db_striped = ''.join(dbversion.split(".")) if align: gen_aln, nuc_aln, prot_aln = hla_alignments(db_striped) ard = ARD(db_striped) dat_url = 'https://raw.githubusercontent.com/ANHIG/IMGTHLA/' \ + db_striped + '/hla.dat' dat_file = data_dir + '/hla.' + str(db_striped) + ".dat" # Downloading DAT file if not os.path.isfile(dat_file): if verbose: logging.info("Downloading dat file from " + dat_url) urllib.request.urlretrieve(dat_url, dat_file) cmd = "perl -p -i -e 's/[^\\x00-\\x7F]//g' " + dat_file os.system(cmd) a_gen = SeqIO.parse(dat_file, "imgt") if verbose: logging.info("Finished parsing dat file") i = 0 for allele in a_gen: if hasattr(allele, 'seq'): hla_name = allele.description.split(",")[0] loc = allele.description.split(",")[0].split("*")[0] if hla_name in skip_alleles: logging.info("SKIPPING = " + allele.description.split(",")[0] + " " + dbversion) continue if (debug and (loc != "HLA-A" and i > 20)): continue if (loc in hla_loci or loc == "DRB5") and (len(str(allele.seq)) > 5): if verbose: logging.info("HLA = " + allele.description.split(",")[0] + " " + dbversion) a_name = allele.description.split(",")[0].split("-")[1] groups = [["HLA-" + ard.redux(a_name, grp), grp] if ard.redux(a_name, grp) != a_name else None for grp in ard_groups] seco = [[to_second(a_name), "2nd_FIELD"]] groups = list(filter(None, groups)) + seco complete_annotation = get_features(allele) ann = Annotation(annotation=complete_annotation, method='match', complete_annotation=True) features, gfe = gfe_maker.get_gfe(ann, loc) #gen_aln, nuc_aln, prot_aln aligned_gen = '' aligned_nuc = '' aligned_prot = '' if align: if allele.description.split(",")[0] in gen_aln[loc]: aligned_gen = gen_aln[loc][ allele.description.split(",")[0]] if allele.description.split(",")[0] in nuc_aln[loc]: aligned_nuc = nuc_aln[loc][ allele.description.split(",")[0]] if allele.description.split(",")[0] in prot_aln[loc]: aligned_prot = prot_aln[loc][ allele.description.split(",")[0]] (allelenode, gfeedge, seq_nodes, cds_nodes, seq_edges, trans_edge, grp_edges) = build_graph( groups, gfe, allele, features, dbversion, aligned_gen, aligned_nuc, aligned_prot, "IMGT_HLA", align) gfe_e += gfeedge seq_e += seq_edges seq_n += seq_nodes allele_n += allelenode grp_e += grp_edges trs_e += trans_edge cds_n += cds_nodes i += 1 if verbose: logging.info("Finished loading IMGT DB " + dbversion) if verbose: logging.info("Finished loading ALL DB versions") gfe_df = pd.DataFrame( gfe_e, columns=":START_ID(ALLELE),:END_ID(ALLELE),imgt_release,:TYPE".split( ",")) seq_df = pd.DataFrame( seq_e, columns= ":START_ID(ALLELE),:END_ID(SEQUENCE),imgt_release,accession,:TYPE". split(",")) seqn_df = pd.DataFrame( seq_n, columns= "sequenceId:ID(SEQUENCE),sequence,name,feature:LABEL,rank,length,seq:string[]" .split(",")) allele_df = pd.DataFrame( allele_n, columns="alleleId:ID(ALLELE),name,alleletype:LABEL,locus".split(",")) group_df = pd.DataFrame( grp_e, columns=":START_ID(ALLELE),:END_ID(ALLELE),imgtdb,:TYPE".split(",")) cdsn_df = pd.DataFrame( cds_n, columns="cdsId:ID(CDS),name,cdstype:LABEL,cds,protein".split(",")) trs_df = pd.DataFrame( trs_e, columns=":START_ID(SEQUENCE),:END_ID(CDS),:TYPE".split(",")) if verbose: gfe_es = str(len(gfe_df)) seq_es = str(len(seq_df)) seq_ns = str(len(seqn_df)) all_ns = str(len(allele_df)) grp_es = str(len(group_df)) cds_ns = str(len(cdsn_df)) cds_es = str(len(trs_df)) logging.info("GFE Edges = " + gfe_es) logging.info("Seq Edges = " + seq_es) logging.info("Group Edges = " + grp_es) logging.info("CDS Edges = " + cds_es) logging.info("Seq Nodes = " + seq_ns) logging.info("CDS Nodes = " + cds_ns) logging.info("Allele Nodes = " + all_ns) gfe_df.to_csv(outdir + "/gfe_edges.csv", header=True, index=False) seq_df.to_csv(outdir + "/seq_edges.csv", header=True, index=False) seqn_df.to_csv(outdir + "/sequence_nodes.csv", header=True, index=False) allele_df.to_csv(outdir + "/allele_nodes.csv", header=True, index=False) cdsn_df.to_csv(outdir + "/cds_nodes.csv", header=True, index=False) group_df.to_csv(outdir + "/group_edges.csv", header=True, index=False) trs_df.to_csv(outdir + "/cds_edges.csv", header=True, index=False) if verbose: logging.info("** Finshed build **")
def releases_locus_get(imgt_releases, locus, neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): """releases_locus_get Get all db releases :param imgt_releases: Valid imgt releases verion :param locus: Valid imgt releases verion :rtype: list of available db """ global seqanns global gfe_feats global gfe2hla global seq2hla log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) db = "".join(imgt_releases.split(".")) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: hla_list = pygfe.list_db_by_locus_imgt(locus, imgt_releases) except Exception as e: log_contents = log_capture_string.getvalue() print("The Error", e) return Error("hla list failed", log=log_contents.split("\n")), 404 if isinstance(hla_list, Error): log_contents = log_capture_string.getvalue() hla_list.log = log_contents.split("\n") return hla_list, 404 if not hla_list: log_contents = log_capture_string.getvalue() return Error("no data record found", log=log_contents.split("\n")), 404 return hla_list
def test_006_align(self): graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=3307) seqann = BioSeqAnn(align=True, server=server, dbversion="3310", verbose=True) pickle_file1 = "unique_db-feats.pickle" pickle_file2 = "feature-service.pickle" pickle_gfe2feat = "gfe2feat.pickle" pickle_file3 = "gfe2hla.pickle" pickle_file4 = "seq2hla.pickle" with open(pickle_gfe2feat, 'rb') as handle1: gfe_feats = pickle.load(handle1) with open(pickle_file1, 'rb') as handle1: feats = pickle.load(handle1) with open(pickle_file2, 'rb') as handle2: cached_feats = pickle.load(handle2) with open(pickle_file3, 'rb') as handle3: gfe2hla = pickle.load(handle3) with open(pickle_file4, 'rb') as handle: seq2hla = pickle.load(handle) pygfe = pyGFE(graph=graph, seqann=seqann, load_features=False, verbose=True, features=feats, seq2hla=seq2hla, gfe2hla=gfe2hla, gfe_feats=gfe_feats, cached_features=cached_feats, loci=["HLA-A"]) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/align_tests.fasta", "fasta")) typing1 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.31.0") typing2 = pygfe.type_from_seq("HLA-A", str(seqs[1].seq), "3.31.0") typing3 = pygfe.type_from_seq("HLA-A", str(seqs[2].seq), "3.31.0") typing4 = pygfe.type_from_seq("HLA-A", str(seqs[3].seq), "3.31.0") self.assertEqual(typing1.hla, 'HLA-A*02:01:01:12') self.assertEqual(typing2.hla, 'HLA-A*02:01:01:12') self.assertEqual(typing3.hla, 'HLA-A*02:01:01:12') self.assertEqual(typing4.hla, 'HLA-A*02:01:01:12') #end = time.time() #time_taken = end - start #print(typing1) #print(typing1.aligned.keys()) #print(typing1.novel_features) #difss = pygfe.hla_seqdiff("HLA-A","3.31.0","HLA-A*01:01:01:01","HLA-A*01:01:01:07") #self.assertIsInstance(typing1, Typing) pass
def typeseq_get(sequence, locus=None, imgthla_version="3.31.0", neo4j_url="http://neo4j.b12x.org:80", user='******', password='******'): # noqa: E501 """typeseq_get Get HLA and GFE from consensus sequence or GFE notation # noqa: E501 :param locus: Valid HLA locus :type locus: str :param sequence: Consensus sequence :type sequence: str :param imgthla_version: IMGT/HLA DB Version :type imgthla_version: str :param neo4j_url: URL for the neo4j graph :type neo4j_url: str :param user: Username for the neo4j graph :type user: str :param password: Password for the neo4j graph :type password: str :param verbose: Flag for running service in verbose :type verbose: bool :rtype: Typing """ global seqanns global gfe_feats global gfe2hla global seq2hla log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s - %(funcName)s %(lineno)d: - %(message)s' ) ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) if not re.match(".", imgthla_version): imgthla_version = ".".join([ list(imgthla_version)[0], "".join(list(imgthla_version)[1:3]), list(imgthla_version)[3] ]) db = "".join(imgthla_version.split(".")) if db in seqanns: seqann = seqanns[db] else: seqann = BioSeqAnn(verbose=True, safemode=True, dbversion=db, verbosity=3) seqanns.update({db: seqann}) try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: typing = pygfe.type_from_seq(locus, sequence, imgthla_version) except: log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 if isinstance(typing, Error): log_contents = log_capture_string.getvalue() typing.log = log_contents.split("\n") return typing, 404 if not typing: log_contents = log_capture_string.getvalue() return Error("Type sequence failed", log=log_contents.split("\n")), 404 typing.gfedb_version = "2.0.0" return typing
def findkir_get(gfe, neo4j_url=neo_dict['neo4j_url'], user=neo_dict['user'], password=neo_dict['password']): # noqa: E501 """findkir_get Get all kir associated with a GFE # noqa: E501 :param gfe: Valid gfe of locus :rtype: Typing """ global seqanns global gfe_feats global gfe2hla global seq2hla log_capture_string = io.StringIO() logger = logging.getLogger('') logging.basicConfig(datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO) # create console handler and set level to debug ch = logging.StreamHandler(log_capture_string) formatter = logging.Formatter( '%(asctime)s - %(name)-35s - %(levelname)-5s ' '- %(funcName)s %(lineno)d: - %(message)s') ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) seqann = BioSeqAnn() try: graph = Graph(neo4j_url, user=user, password=password, bolt=False) except ServiceUnavailable as err: log_contents = log_capture_string.getvalue() log_data = log_contents.split("\n") log_data.append(str(err)) return Error("Failed to connect to graph", log=log_data), 404 if (not isinstance(gfe_feats, DataFrame) or not isinstance(seq2hla, DataFrame)): pygfe = pyGFE(graph=graph, seqann=seqann, load_gfe2hla=True, load_seq2hla=True, load_gfe2feat=True, verbose=True) gfe_feats = pygfe.gfe_feats seq2hla = pygfe.seq2hla gfe2hla = pygfe.gfe2hla else: pygfe = pyGFE(graph=graph, seqann=seqann, gfe2hla=gfe2hla, gfe_feats=gfe_feats, seq2hla=seq2hla, verbose=True) try: typing = pygfe.find_gfe_kir(gfe, pygfe.breakup_gfe(gfe)) except Exception as e: print(e) log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 if isinstance(typing, Error): log_contents = log_capture_string.getvalue() typing.log = log_contents.split("\n") return typing, 404 if not typing: log_contents = log_capture_string.getvalue() return Error("Type with alignment failed", log=log_contents.split("\n")), 404 return typing