示例#1
0
def t_write_format():
    record_parser = GenBank.RecordParser(debug_level=0)

    for file in write_format_files:
        print("Testing GenBank writing for %s..." % os.path.basename(file))
        cur_handle = open(os.path.join("GenBank", file), "r")
        compare_handle = open(os.path.join("GenBank", file), "r")

        iterator = GenBank.Iterator(cur_handle, record_parser)
        compare_iterator = GenBank.Iterator(compare_handle)

        while True:
            cur_record = next(iterator)
            compare_record = next(compare_iterator)

            if cur_record is None or compare_record is None:
                break

            print("\tTesting for %s" % cur_record.version)

            output_record = str(cur_record) + "\n"
            do_comparison(compare_record, output_record)

        cur_handle.close()
        compare_handle.close()
示例#2
0
def write_format(file):
    """Write a GenBank record from a Genbank file and compare them."""
    record_parser = GenBank.RecordParser(debug_level=2)

    print("Testing GenBank writing for %s..." % os.path.basename(file))
    # be able to handle gzipped files
    if ".gz" in file:
        cur_handle = gzip.open(file, "r")
        compare_handle = gzip.open(file, "r")
    else:
        cur_handle = open(file, "r")
        compare_handle = open(file, "r")

    iterator = GenBank.Iterator(cur_handle, record_parser)
    compare_iterator = GenBank.Iterator(compare_handle)

    while True:
        cur_record = next(iterator)
        compare_record = next(compare_iterator)

        if cur_record is None or compare_record is None:
            break

        # print("\tTesting for %s" % cur_record.version)

        output_record = str(cur_record) + "\n"
        try:
            do_comparison(compare_record, output_record)
        except AssertionError as msg:
            print("\tTesting for %s" % cur_record.version)
            print(msg)

    cur_handle.close()
    compare_handle.close()
示例#3
0
def write_format(file):
    record_parser = GenBank.RecordParser(debug_level = 2)

    print "Testing GenBank writing for %s..." % os.path.basename(file)
    # be able to handle gzipped files
    if '.gz' in file:
        cur_handle = gzip.open(file, "r")
        compare_handle = gzip.open(file, "r")
    else:
        cur_handle = open(file, "r")
        compare_handle = open(file, "r")

    iterator = GenBank.Iterator(cur_handle, record_parser)
    compare_iterator = GenBank.Iterator(compare_handle)

    while 1:
        cur_record = iterator.next()
        compare_record = compare_iterator.next()

        if cur_record is None or compare_record is None:
            break

        # print "\tTesting for %s" % cur_record.version

        output_record = str(cur_record) + "\n"
        try:
            do_comparison(compare_record, output_record)
        except AssertionError, msg:
            print "\tTesting for %s" % cur_record.version
            print msg
示例#4
0
    def setUp(self):
        # create TESTDB
        create_database()

        # load the database
        db_name = "biosql-test"
        server = BioSeqDatabase.open_database(driver=DBDRIVER,
                                              user=DBUSER,
                                              passwd=DBPASSWD,
                                              host=DBHOST,
                                              db=TESTDB)

        # remove the database if it already exists
        try:
            server[db_name]
            server.remove_database(db_name)
        except KeyError:
            pass

        self.db = server.new_database(db_name)

        # get the GenBank file we are going to put into it
        input_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb")
        handle = open(input_file, "r")
        parser = GenBank.FeatureParser()
        self.iterator = GenBank.Iterator(handle, parser)
示例#5
0
 def loadData(self, data, dbtype):
     if (dbtype == "GenBank"):
         # get the GenBank file we are going to put into it
         parser = GenBank.FeatureParser()
         iterator = GenBank.Iterator(data, parser)
         # finally put it in the database
         try:
             self.getDatabase().load(iterator)
         except:
             self.getBioSQLRoot().getDBServer().adaptor.conn.rollback()
             return traceback.format_exc()
         self.getBioSQLRoot().getDBServer().adaptor.conn.commit()
         return ""
     else:
         raise "Unknown dbtype: %r" % (dbtype) 
示例#6
0
def t_cleaning_features():
    """Test the ability to clean up feature values."""
    gb_parser = GenBank.FeatureParser(
        feature_cleaner=utils.FeatureValueCleaner())
    handle = open(os.path.join("GenBank", "arab1.gb"))
    iterator = GenBank.Iterator(handle, gb_parser)

    first_record = next(iterator)

    # test for cleaning of translation
    translation_feature = first_record.features[1]
    test_trans = translation_feature.qualifiers["translation"][0]
    assert " " not in test_trans, "Did not clean spaces out of the translation"
    assert "\012" not in test_trans, "Did not clean newlines out of the translation"

    handle.close()
示例#7
0
def t_cleaning_features():
    """Test the ability to clean up feature values.
    """
    parser = GenBank.FeatureParser(feature_cleaner = \
                                   utils.FeatureValueCleaner())
    handle = open(os.path.join("GenBank", "arab1.gb"))
    iterator = GenBank.Iterator(handle, parser)

    first_record = iterator.next()

    # test for cleaning of translation
    translation_feature = first_record.features[1]
    test_trans = translation_feature.qualifiers["translation"][0]
    assert test_trans.find(" ") == -1, \
      "Did not clean spaces out of the translation"
    assert test_trans.find("\012") == -1, \
      "Did not clean newlines out of the translation"
示例#8
0
def load_database(gb_handle):
    """Load a GenBank file into a BioSQL database.
    
    This is useful for running tests against a newly created database.
    """

    create_database()
    # now open a connection to load the database
    db_name = "biosql-test"
    server = BioSeqDatabase.open_database(driver=DBDRIVER,
                                          user=DBUSER,
                                          passwd=DBPASSWD,
                                          host=DBHOST,
                                          db=TESTDB)
    db = server.new_database(db_name)

    # get the GenBank file we are going to put into it
    parser = GenBank.FeatureParser()
    iterator = GenBank.Iterator(gb_handle, parser)
    # finally put it in the database
    db.load(iterator)
    server.adaptor.conn.commit()
    server.adaptor.conn.close()
示例#9
0
# files_to_parse += [os.path.join(os.getcwd(), 'GenBank', 'biojava_test.gb')]

# test the parsers
feature_parser = GenBank.FeatureParser(debug_level=0)
record_parser = GenBank.RecordParser(debug_level=0)

all_parsers = [feature_parser, record_parser]
print("Testing parsers...")
for parser in all_parsers:
    for filename in files_to_parse:
        if not os.path.isfile(filename):
            print("Missing test input file: %s" % filename)
            continue

        handle = open(filename, 'r')
        iterator = GenBank.Iterator(handle, parser)

        while True:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", BiopythonParserWarning)
                # e.g. BiopythonParserWarning: Premature end of file in sequence data
                cur_record = next(iterator)

            if cur_record is None:
                break

            if isinstance(parser, GenBank.FeatureParser):
                print("***Record from %s with the FeatureParser" %
                      filename.split(os.path.sep)[-1])
                print("Seq: %r" % cur_record.seq)
                print("Id: %s" % cur_record.id)
示例#10
0
import sys

# GenBank
from Bio import GenBank

verbose = 0

if len(sys.argv) != 2:
    print("Usage ./find_parser_problems <GenBank file to parse>")
    sys.exit()

feature_parser = GenBank.FeatureParser(debug_level=0)
parser = GenBank.ErrorParser(feature_parser)

handle = open(sys.argv[1], 'r')
iterator = GenBank.Iterator(handle, parser, has_header=1)

while 1:
    have_record = 0

    while have_record == 0:
        try:
            cur_record = iterator.next()
            have_record = 1
        except GenBank.ParserFailureError as msg:
            print "Parsing Problem:", msg
            sys.exit()

    if cur_record is None:
        break
示例#11
0
#!/usr/bin/env python

from Bio import GenBank
from Bio import Entrez
from BioSQL import BioSeqDatabase
import sys

# Should read these from settings at some point
dbpath = 'biosql.sqlite3'
dbname = 'local_db'
Entrez.email = '*****@*****.**'

server = BioSeqDatabase.open_database(driver='sqlite3', db=dbpath)
db = server[dbname]

parser = GenBank.FeatureParser()
loadgb = lambda _id: db.load(
    GenBank.Iterator(
        Entrez.efetch(db='nucleotide', id=_id, rettype='gb', retmode='text'),
        parser))

ACCESSIONS_FILE = 'accession.lst' if len(sys.argv) < 2 else sys.argv[1]
for id in open(ACCESSIONS_FILE):
    print "Loading %s" % id
    loadgb(id)
server.adaptor.commit()
示例#12
0
    "Leptocephalus sp. 'type II larva' (Smith, 1989)":
    "Leptocephalus sp. 'type II larva'",
    "Humphaplotropis culaishanensis":
    "Humphaplotropis culaishanensis (nomen nudum)",
    "Paraglypturus tonganus": "Paraglypturus tonganus (nomen nudum)",
    "Hoploplana elisabelloi": "Hoploplana elisabelloi (nomen nudum)",
    "Palpitomonas bilix Eukaryota.": "Palpitomonas bilix",
    "Eukaryota sp. BB2 Eukaryota.": "Eukaryota sp. BB2",
    "Ancoracysta twista Eukaryota.": "Ancoracysta twista"
}

# --- load a parser and iterator for our GenBank file
gb_handle = gzip.open(sys.argv[1], "r")
# -- a parser that will give you back SeqFeature objects
feature_parser = GenBank.FeatureParser()
iterator = GenBank.Iterator(gb_handle, feature_parser)

# load taxonomy for taxids
ncbi = NCBITaxa()

# output using prefix
out_1 = open("%s.cdna.fasta" % sys.argv[2], "w")
out_2 = open("%s.codons.tab" % sys.argv[2], "w")

strands = []
stop_codons = []
prot_ids = []
excluded = []
missing_id = []

# begin iterating through the file and getting GenBank records
示例#13
0
def loadDB(catalog):
    from BioSQL import BioSeqDatabase
    import sys
    
    username = raw_input("Please enter user name: ")
    password = raw_input("and password: "******"dbpg-ifi-utv.uio.no"
    db_name = "rnammer"

    server = BioSeqDatabase.open_database(driver="psycopg2", user=username,passwd=password, 
            host=host, db=db_name)
    
    biodb_name = "empty"     # genebank problem ? se staving

    db  = "nodb"

    gi_rep = 1
    
    for gbff in catalog:
                               #server.remove_database(source)
        print gi_rep
        print gbff


        parser = GenBank.FeatureParser()
        #record = parser.parse(open(gbff))
        #records = SeqIO.parse(open(gbff),'genbank')
        records = GenBank.Iterator(open(gbff), parser)
        
        for x in records:
            if re.search("plasmid",x.description, re.IGNORECASE):
                continue
            print "Record name:"
            print x.id
            #print dir(x)

            if "Proteobacteria" == x.annotations["taxonomy"][1]:
                print x.annotations["taxonomy"][1]
                print x.annotations["taxonomy"][2]
                biodb_name = x.annotations["taxonomy"][2]
            else :
                print x.annotations["taxonomy"][1]
                biodb_name = x.annotations["taxonomy"][1]
            while True : 
                try :
                    db = server[biodb_name] 
                    #print "here"
                    break
                except KeyError :
                    #print ("Cannot find biodatabase with name %r making it" % source)
                    server.new_database(biodb_name)
                    server.commit()
            db.load([x])
        #record.annotations["gi"] = gi_rep 
        #print type(records)

        #print record.id
        gi_rep = gi_rep + 1

        #db.load([records])

    server.adaptor.commit()
示例#14
0
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 17 15:02:37 2016

@author: rgilmore
"""

import sqlite3
from BioSQL import BioSeqDatabase
from Bio import GenBank
import os

server = BioSeqDatabase.open_database(driver="sqlite3", db="biosql.db")

db = server.new_database("HTR1A")

dir_list1 = os.listdir()
print(dir_list1)

#for files in  dir_list1:
parser = GenBank.FeatureParser()
iterator = GenBank.Iterator(open("HTR1A_Ailuropoda melanoleuca.gbk"), parser)
db.load(iterator)
db.adaptor.commit()
#input("%s loaded into HTR1A database.  Proceed?")

server.commit()
server.close()