def test_genbank_utility_gp(): """ Check whether the high-level utility functions return the expected content of a known GenPept file. """ gp_file = gb.GenBankFile.read(join(data_dir("sequence"), "bt_lysozyme.gp")) #[print(e) for e in gp_file._field_pos] assert gb.get_locus(gp_file) \ == ("AAC37312", 147, "", False, "MAM", "27-APR-1993") assert gb.get_definition(gp_file) == "lysozyme [Bos taurus]." assert gb.get_version(gp_file) == "AAC37312.1" assert gb.get_gi(gp_file) == 163334 annotation = gb.get_annotation(gp_file) feature = seq.Feature( "Site", [seq.Location(start, stop) for start, stop in zip( [52,55,62,76,78,81,117,120,125], [53,55,62,76,78,81,117,120,126] )], {"note": "lysozyme catalytic cleft [active]", "site_type": "active"} ) in_annotation = False for f in annotation: if f.key == feature.key and f.locs == feature.locs and \ all([(key, val in f.qual.items()) for key, val in feature.qual.items()]): in_annotation = True assert in_annotation assert len(gb.get_sequence(gp_file, format="gp")) == 147
def test_genbank_utility_gb(): """ Check whether the high-level utility functions return the expected content of a known GenBank file. """ gb_file = gb.GenBankFile.read(join(data_dir("sequence"), "ec_bl21.gb")) assert gb.get_locus(gb_file) \ == ("CP001509", 4558953, "DNA", True, "BCT", "16-FEB-2017") assert gb.get_definition(gb_file) \ == ("Escherichia coli BL21(DE3), complete genome.") assert gb.get_version(gb_file) == "CP001509.3" assert gb.get_gi(gb_file) == 296142109 assert gb.get_db_link(gb_file) \ == {"BioProject" : "PRJNA20713", "BioSample" : "SAMN02603478"} annotation = gb.get_annotation(gb_file, include_only=["CDS"]) feature = seq.Feature( "CDS", [seq.Location(5681, 6457, seq.Location.Strand.REVERSE)], {"gene": "yaaA", "transl_table": "11"} ) in_annotation = False for f in annotation: if f.key == feature.key and f.locs == feature.locs and \ all([(key, val in f.qual.items()) for key, val in feature.qual.items()]): in_annotation = True assert in_annotation assert len(gb.get_sequence(gb_file, format="gb")) == 4558953
"R": -4.5 } # Look for the Swiss-Prot entry contaning the human HCN1 channel query = entrez.SimpleQuery("HCN1", "Gene Name") \ & entrez.SimpleQuery("h**o sapiens", "Organism") \ & entrez.SimpleQuery("srcdb_swiss-prot", "Properties") uids = entrez.search(query, db_name="protein") file_name = entrez.fetch(uids[0], biotite.temp_dir(), "gp", db_name="protein", ret_type="gp") gp_file = gb.GenBankFile.read(file_name) hcn1 = seq.ProteinSequence(gb.get_sequence(gp_file, format="gp")) print(hcn1) ######################################################################## # The positional hydropathy is calculated and smoothened using # a moving average for clearer visualization. hydropathies = np.array([hydropathy_dict[symbol] for symbol in hcn1]) def moving_average(data_set, window_size): weights = np.full(window_size, 1 / window_size) return np.convolve(data_set, weights, mode='valid') # Apply moving average over 15 amino acids for clearer visualization
import biotite.structure.graphics as strucgraphics import biotite.application.viennarna as viennarna # Download Escherichia coli BL21 and Salmonella enterica genome gb_file = gb.MultiFile.read( entrez.fetch_single_file(["CP001509", "CP019649"], None, "nuccore", "gb")) ec_file, se_file = tuple(gb_file) annot_seq = gb.get_annotated_sequence(ec_file, include_only=["ncRNA"]) # Find M1 gene in E. coli genome via its annotation for feature in annot_seq.annotation: if "product" in feature.qual and "RNase P" in feature.qual["product"]: m1_sequence = annot_seq[feature] # Get S. enterica genome sequence se_genome = gb.get_sequence(se_file) # We want to search in the genome sequence and its reverse complement genomic_seqs = [se_genome, se_genome.reverse().complement()] ######################################################################## # In an initial fast matching step, we look for matching *k-mers* # between *M1* and the *S. enterica* genome. # A matching *k-mer* is a length *k* subsequence, that appears in both # sequences. # In *Biotite* this task is carried out by a :class:`KmerTable`. # # Later we will perform costly gapped alignments at the match positions. # To reduce the number of triggered gapped alignments at unspecific # matches, we add some additional filters: # One extra condition is that two non-overlapping matches must appear