Пример #1
0
 def alleleretriever(self):
     """
     Retrieve the required alleles from a file of all alleles, and create organism-specific allele files
     """
     logging.info('Retrieving alleles')
     # Index all the records in the allele file
     logging.info('Loading rMLST records')
     recorddict = SeqIO.index(self.allelefile, 'fasta')
     logging.info('Creating allele output files')
     # Create the organism-specific files of alleles
     for organism in sorted(self.alleledict):
         # Make an object to store information for each strain
         metadata = MetadataObject()
         metadata.organism = organism
         metadata.path = self.path
         metadata.outpath = os.path.join(self.path, 'outputalleles',
                                         organism, '')
         # Delete and recreate the output path - as the files are appended to each time, they will be too large if
         # this script is run more than once
         try:
             shutil.rmtree(metadata.outpath)
         except OSError:
             pass
         make_path(metadata.outpath)
         metadata.combined = os.path.join(metadata.outpath,
                                          'gdcs_alleles.fasta')
         metadata.allelefiles = list()
         with open(metadata.combined, 'w') as combined:
             for gene, alleles in sorted(self.alleledict[organism].items()):
                 # Open the file to append
                 allelefiles = os.path.join(metadata.outpath,
                                            '{}.tfa'.format(gene))
                 metadata.allelefiles.append(allelefiles)
                 with open(allelefiles, 'a') as allelefile:
                     # Write each allele record to the file
                     for allele in sorted(alleles):
                         # Skip adding alleles that are no longer in the database
                         try:
                             SeqIO.write(
                                 recorddict['{}_{}'.format(gene, allele)],
                                 allelefile, 'fasta')
                             SeqIO.write(
                                 recorddict['{}_{}'.format(gene, allele)],
                                 combined, 'fasta')
                         except KeyError:
                             pass
         # Add the populated metadata to the list
         self.samples.append(metadata)
Пример #2
0
 def rmlst(self):
     """
     Get the most up-to-date profiles and alleles from pubmlst. Note that you will need the necessary access token
     and secret for this to work
     """
     printtime('Downloading rMLST database', self.start)
     # Set the name of the file to be used to determine if the database download and set-up was successful
     completefile = os.path.join(self.databasepath, 'rMLST', 'complete')
     if not os.path.isfile(completefile):
         # Create an object to send to the rMLST download script
         args = MetadataObject()
         # Add the path and start time attributes
         args.path = self.databasepath
         args.start = self.start
         # Run the rMLST download
         get_rmlst.Get(args)
         # Create and populate the complete.txt file
         with open(completefile, 'w') as complete:
             complete.write('\n'.join(glob(os.path.join(self.databasepath, 'rMLST', '*'))))
Пример #3
0
 def __init__(self, inputobject, extension='fasta', light=False):
     # Create an object to mimic the command line arguments necessary for the script
     args = MetadataObject()
     args.path = inputobject.path
     args.sequencepath = inputobject.path
     args.databasepath = os.path.join(inputobject.reffilepath, 'clark')
     make_path(args.databasepath)
     args.clarkpath = os.path.dirname(which('CLARK'))
     args.clarkpath += '/../opt/clark/'
     args.cutoff = 0.005
     args.database = 'bacteria'
     args.rank = 'species'
     args.filter = False
     args.threads = inputobject.cpus
     args.runmetadata = inputobject.runmetadata
     args.clean_seqs = False
     args.reffilepath = inputobject.reffilepath
     args.runmetadata.extension = extension
     args.light = light
     # Run CLARK
     CLARK(args, inputobject.commit, inputobject.starttime,
           inputobject.homepath)
Пример #4
0
 def mlst(self, genera={'Escherichia', 'Vibrio', 'Campylobacter', 'Listeria', 'Bacillus', 'Staphylococcus',
                        'Salmonella'}):
     """
     Download the necessary up-to-date MLST profiles and alleles
     """
     printtime('Downloading MLST databases', self.start)
     for genus in genera:
         # Create an object to pass to the get_mlst script
         args = MetadataObject()
         # Populate the object with the necessary attributes
         args.species = genus
         args.repository_url = 'http://pubmlst.org/data/dbases.xml'
         args.force_scheme_name = False
         args.path = os.path.join(self.databasepath, 'MLST', genus)
         # Create the name of the file to be used to determine if the database download and setup was successful
         completefile = os.path.join(args.path, 'complete')
         # Only download the files if the download was not previously successful
         if not os.path.isfile(completefile):
             # Run the download
             get_mlst.main(args)
             # Create and populate the complete.txt file
             with open(completefile, 'w') as complete:
                 complete.write('\n'.join(glob(os.path.join(args.path, '*'))))