def main(argv = None): # Setup argument parser parser = ArgumentParser(description = '%s -- create actual bacteria databases from NCBI Sources' % (os.path.basename(sys.argv[0])), epilog = 'created by Philipp Sehnert', add_help = True) parser.add_argument('--version', action = 'version', version = '%s 1.0' % (os.path.basename(sys.argv[0]))) parser.add_argument("-type", dest = "type", default = 'nucl', choices = {'nucl','prot'}, help = "set type of blastdb") parser.add_argument('-metacv', dest = 'metacv', action = 'store_true', default = False, help = 'create metacv database') parser.add_argument('-exe', dest = 'exe', help = "if not installed, specify path to executable of 'makeblastdb' or 'metacv'") parser.add_argument('-name', dest = 'name', default = 'bacterial', required = True, help = 'outname for the databases') parser.add_argument('-parse_seqids', dest = 'parse_seqids', action = 'store_false', default = True, help = 'Remove duplicated GI numbers from downloaded files and run "makeblastdb" with -parse_seqids statement ') # Process arguments args = parser.parse_args() DB_TYPE = args.type METACV = args.metacv DB_NAME = args.name EXECUTABLE = args.exe PARSE_SEQIDS = args.parse_seqids if __name__ == '__main__': # check for protein or nucleotide database DB_TYPE = check_db_type(METACV, DB_TYPE) # verify executable for external scripts EXECUTABLE = check_executable(EXECUTABLE, METACV) # create dir for sources create_folder(DOWNLOAD_FOLDER) # init FTP functions ftp = ftp_functions(FTP_SERVER, FTP_ROOT, DOWNLOAD_FOLDER, DEBUG) # connect to Blast FTP Server ftp.connect() ftp.go_to_root() # start Downloading for ftp_folder in SOURCES: sys.stdout.write("Downloading files from %s \n" % (ftp_folder)) ftp.download_folder(ftp_folder, DB_TYPE) # close ftp connection ftp.close() # run external database creation scripts DBCreate = DBCreation(DB_OUT, DOWNLOAD_FOLDER, DB_TYPE, PARSE_SEQIDS, DEBUG, EXECUTABLE) if METACV: DBCreate.set_METACV(True) # select the subfolder for MetaCV database DBCreate.createMetaCVDB(DB_NAME, ['Bacteria', 'Bacteria_DRAFT']) else: DBCreate.set_METACV(False) DBCreate.createBlastDB(DB_NAME)
def get_functional_annotation(self): '''checks if functional annotation if function annotion exists and is actual if not the annotation will be downloaded from uniprot ftp server''' # FTP Server information uniprotFTP = 'ftp.uniprot.org' functional = 'pub/databases/uniprot/current_release/knowledgebase/idmapping' idmapping = 'idmapping.dat.gz' # establish connection uniprot = ftp_functions(uniprotFTP, functional, DOWNLOAD_FOLDER, self.DEBUG) uniprot.connect() # go to functional dir uniprot.go_down(functional) # download file and extract it idmapping = uniprot.get_idmapping(idmapping) # close connection uniprot.close() return idmapping
def get_taxonomy(self): '''checks if taxonomical annotation for metacv database is existing and actual if not the files will be downloaded from ncbi ftp server''' # FTP Server information ncbiFTP = 'ftp.ncbi.nih.gov' taxonomy = '/pub/taxonomy/' ncbi_files = ['gi_taxid_prot.dmp.gz', 'taxdump.tar.gz'] files = [] # establish connection ncbi = ftp_functions(ncbiFTP, taxonomy, self.DOWNLOAD_FOLDER, self.DEBUG) ncbi.connect() # go to taxonomy dir ncbi.go_down(taxonomy) for item in ncbi_files: # download actual files and extract needed files if item in ncbi_files[0]: files.append(ncbi.get_gi_map(item)) else: [files.append(x) for x in ncbi.get_taxdump(item)] # close connection ncbi.close() return files