def cli():
    # Import the argument parser from allele_finder.py
    parent_parser = allele_finder.cli()
    parser = ArgumentParser(parents=[parent_parser])
    # Get the arguments into an object
    arguments = parser.parse_args()
    SetupLogging(debug=arguments.verbose)
    # Run the allele-finding pipeline
    finder = allele_finder.AlleleFinder(
        path=arguments.path,
        targetfile=arguments.targetfile,
        analysis_type=arguments.blast,
        fasta_path=arguments.fasta_path,
        genesippr=arguments.genesippr,
        metadata_file=arguments.metadatafile,
        cutoff=arguments.cutoff,
        target_alleles=arguments.no_target_alleles,
        allele_hashing=arguments.allele_hashing,
        amino_acid=arguments.amino_acid,
        one_based=arguments.one_based)
    finder.main()
    # Extract the dictionary of records from the allele finding
    records = finder.records
    logging.info('Allele finding complete')
    # Run the profiling pipeline
    profiler = ProfileAlleles(path=arguments.path,
                              fasta_path=arguments.fasta_path,
                              records=records,
                              amino_acid=arguments.amino_acid)
    profiler.main()
    logging.info('Allele Profiling complete')
def cli():
    # Parser for arguments
    parser = ArgumentParser(
        description='Translate allele files in nucleotide format to amino acid. '
        'Remove duplicates. Keep notes.')
    parser.add_argument('-p',
                        '--path',
                        required=True,
                        help='Specify path containing allele files.')
    parser.add_argument(
        '--profile',
        action='store_true',
        help=
        'Optionally parse the nucleic acid profile, and create the corresponding reduced amino '
        'acid profile')
    parser.add_argument(
        '-o',
        '--one_based',
        action='store_true',
        help='Use 1-based indexing rather than the default 0-based')
    # Get the arguments into an object
    arguments = parser.parse_args()
    SetupLogging(debug=True)
    translate = Translate(path=arguments.path,
                          profile=arguments.profile,
                          one_based=arguments.one_based)
    translate.main()
    logging.info('Allele translation complete!')
Пример #3
0
def supremacy(args):
    SetupLogging(debug=args.debug)
    # Create supremacy object
    finder = PrimerFinder(sequence_path=args.sequencepath,
                          primer_file=args.primerfile,
                          mismatches=args.mismatches,
                          kmer_length=args.kmerlength,
                          cpus=args.cpus,
                          analysistype='ePCR')
    # Run the script
    finder.main()
Пример #4
0
def ultimatum(args):
    SetupLogging(debug=args.debug)
    # Create metadata objects for the samples
    args.runmetadata = MetadataObject()
    args.runmetadata.samples = Filer.filer(args)
    finder = Ultimatum(metadataobject=args.runmetadata.samples,
                       sequencepath=args.sequencepath,
                       reportpath=os.path.join(args.sequencepath, 'reports'),
                       primerfile=args.primerfile,
                       primer_format=args.primer_format,
                       mismatches=args.mismatches,
                       export_amplicons=args.export_amplicons)
    finder.main()
def cli():
    # Parser for arguments
    parser = ArgumentParser(
        description=
        'Downloads and decompresses FASTA assemblies from the NCBI FTP')
    parser.add_argument('-p',
                        '--path',
                        required=True,
                        help='Path to folder containing necessary tables')
    parser.add_argument(
        '-o',
        '--outputpath',
        help=
        'Path in which files are to be downloaded. Default is "path/downloads"'
    )
    parser.add_argument(
        '-a',
        '--accessiontable',
        default='pathogens.csv',
        help=
        'Name of metadata table from NCBI (must be in the supplied path). Generate the table '
        'from NCBI pathogens '
        'e.g. https://www.ncbi.nlm.nih.gov/pathogens/isolates/#/search/taxgroup_name:%22Salmonella'
        '%20enterica%22 '
        'Select Download: -> Data type: Metadata -> Download. Default name is pathogens.csv'
    )
    parser.add_argument(
        '-n',
        '--numthreads',
        default=3,
        type=int,
        choices=[1, 2, 3, 4, 5, 6],
        help='Number of concurrent downloads to perform. Default is 3')
    parser.add_argument(
        '-s',
        '--sleeptime',
        default=0,
        type=int,
        help=
        'Amount of time in seconds you would like the script to sleep until it starts the '
        'download. Default is 0. NOTE: There are 3600 seconds in an hour.')
    arguments = parser.parse_args()
    SetupLogging()
    download = AssemblyDownload(path=arguments.path,
                                outputpath=arguments.outputpath,
                                accessiontable=arguments.accessiontable,
                                threads=arguments.numthreads,
                                sleeptime=arguments.sleeptime)
    download.main()
    logging.info('NCBI assembly download complete!')
Пример #6
0
 def __init__(self, spectra_path, filename, start_time, outputpath, classic,
              extensions):
     """
     :param spectra_path: Path to .spa/.spc files
     :param filename: Path to .xls(x) file with renaming information.
     :param start_time: Time the analyses started
     :param outputpath: Path to folder in which the renamed files are to be stored
     :param classic: BOOL whether to use the "classic" method of file renaming.
     :param extension: BOOL whether the file extension is .spc
     """
     SetupLogging()
     # Define variables based on supplied arguments
     if spectra_path.startswith('~'):
         self.spectra_path = os.path.abspath(
             os.path.expanduser(os.path.join(spectra_path)))
     else:
         self.spectra_path = self.file = os.path.abspath(
             os.path.join(spectra_path))
     assert os.path.isdir(self.spectra_path), 'Supplied sequence path is not a valid directory {0!r:s}'\
         .format(self.spectra_path)
     if filename.startswith('~'):
         self.file = os.path.abspath(
             os.path.expanduser(os.path.join(filename)))
     else:
         self.file = os.path.abspath(os.path.join(filename))
     # If the path to the file wasn't provided, check the spectra folder
     if not os.path.isfile(self.file):
         self.file = os.path.join(self.spectra_path, filename)
     # If the file still can't be found, check the parental folder of the spectra folder
     if not os.path.isfile(self.file):
         self.file = os.path.join(os.path.dirname(self.spectra_path),
                                  filename)
     self.start = start_time
     assert os.path.isfile(self.file), 'Cannot find the supplied Excel file ({0!r:s}) with the file information. ' \
                                       'Please ensure that this file is in the path, and there\'s no spelling ' \
                                       'mistakes'.format(self.file)
     # Set the output path
     self.outputpath = os.path.join(outputpath)
     # Create the output path as required
     make_path(self.outputpath)
     # Determine the naming scheme
     self.classic = classic
     # Variable for extensions of files to rename
     self.extensions = extensions
     # Create class variable
     self.metadata = list()
def main():
    parser = ArgumentParser(description='Perform virus typing')
    parser.add_argument(
        '-db',
        '--dbpath',
        required=True,
        help='Path of folder containing .gb database files to process.')
    parser.add_argument(
        '-d',
        '--debug',
        action='store_true',
        help='Allow debug-level logging to be printed to the terminal')
    # Get the arguments into an object
    arguments = parser.parse_args()
    SetupLogging(debug=arguments.debug)
    virus_typer_db = VirusTypeDB(db_path=arguments.dbpath)
    virus_typer_db.main()
def cli():
    # Parser for arguments
    parser = ArgumentParser(description='Determines profiles of strains against previously calculated allele database '
                                        'and profile. Creates and/or updates both the database of allele definitions '
                                        'and the profile based on novel alleles and/or profiles discovered')
    parser.add_argument('-p', '--path',
                        required=True,
                        help='Specify path. Note that due to code reuse, the query sequence files must be in the '
                             '"query" sub-folder, the alleles must be in the "alleles" sub-folder')
    parser.add_argument('-aa', '--amino_acid',
                        action='store_true',
                        help='The query sequences are protein.')
    # Get the arguments into an object
    arguments = parser.parse_args()
    SetupLogging(debug=True)
    # Run the profiling pipeline
    updater = Updater(path=arguments.path,
                      amino_acid=arguments.amino_acid)
    updater.main()
    logging.info('Allele Updating complete')
def cli():
    # Parser for arguments
    parser = ArgumentParser(
        description='Extract the genes of interest from a profile file')
    parser.add_argument('-p',
                        '--profile',
                        required=True,
                        help='Name and path of profile file.')
    parser.add_argument(
        '-n',
        '--names',
        required=True,
        help=
        'Name and path to a file containing the gene names (one per line) to be extracted '
        'from the profile')
    # Get the arguments into an object
    arguments = parser.parse_args()
    SetupLogging(debug=True)
    reduce = ProfileReduce(profile=arguments.profile, names=arguments.names)
    reduce.main()
    logging.info('Profile reduction complete!')
def cli():
    # Parser for arguments
    parser = ArgumentParser(
        description='Downloads and compresses FASTQ files from SRA')
    parser.add_argument('-p',
                        '--path',
                        required=True,
                        help='Path to folder containing necessary tables')
    parser.add_argument(
        '-r',
        '--runinfotable',
        default='SraRunInfo.csv',
        help=
        'Name of SRA accession table from NCBI (must be in the supplied path). Generate the table '
        'from NCBI SRA '
        'e.g. https://www.ncbi.nlm.nih.gov/sra?LinkName=bioproject_sra_all&from_uid=309770 '
        'Select Send to: -> File -> RunInfo. Default is SraRunInfo.csv')
    parser.add_argument(
        '-n',
        '--name',
        choices=['Run', 'LibraryName', 'Sample', 'BioSample', 'SampleName'],
        default='SampleName',
        help=
        'Column name to use for the final naming of the FASTQ files. Default is SampleName'
    )
    parser.add_argument(
        '-t',
        '--threads',
        default=multiprocessing.cpu_count() - 1,
        help=
        'Number of threads. Default is the number of cores in the system minus one'
    )
    arguments = parser.parse_args()
    SetupLogging()
    download = SRAdownload(path=arguments.path,
                           runinfotable=arguments.runinfotable,
                           column_name=arguments.name,
                           threads=arguments.threads)
    download.main()
    logging.info('SRA download complete!')
Пример #11
0
def identity(args):
    SetupLogging(debug=args.debug)
    # Create metadata objects for the samples
    args.runmetadata = MetadataObject()
    args.runmetadata.samples = Filer.filer(args)
    if args.analysistype == 'vtyper':
        epcr = VtyperIP(metadataobject=args.runmetadata.samples,
                        analysistype=args.analysistype,
                        reportpath=os.path.join(args.sequencepath, 'reports'))
        epcr.vtyper()
    else:
        epcr = CustomIP(metadataobject=args.runmetadata.samples,
                        sequencepath=args.sequencepath,
                        reportpath=os.path.join(args.sequencepath, 'reports'),
                        primerfile=args.primerfile,
                        min_amplicon_size=args.minampliconsize,
                        max_amplicon_size=args.maxampliconsize,
                        primer_format=args.primer_format,
                        mismatches=args.mismatches,
                        export_amplicons=args.export_amplicons,
                        contigbreaks=args.contigbreaks)
        epcr.main()
Пример #12
0
def legacy(args):
    # Prep the args object to be used in the legacy script
    SetupLogging(debug=args.debug)
    args.reportpath = os.path.join(args.sequencepath, 'reports')
    args.runmetadata = MetadataObject()
    # Create metadata objects for the samples
    args.runmetadata.samples = Filer.filer(args)
    if args.analysistype == 'vtyper':
        # Perform vtx typing
        vtyper = Vtyper(inputobject=args,
                        analysistype='vtyper_legacy',
                        mismatches=args.mismatches)
        vtyper.vtyper()
    else:
        epcr = Custom(inputobject=args,
                      analysistype='custom_epcr',
                      primerfile=args.primerfile,
                      ampliconsize=args.maxampliconsize,
                      mismatches=args.mismatches,
                      primer_format=args.primer_format,
                      export_amplicons=args.export_amplicons)
        epcr.main()
def main():
    parser = ArgumentParser(description='Perform virus typing')
    parser.add_argument(
        '-s',
        '--sequencepath',
        required=True,
        help='Path of folder containing .ab1 files to process.')
    parser.add_argument('-r',
                        '--reportpath',
                        required=True,
                        help='Path in which reports are to be created')
    parser.add_argument(
        '-d',
        '--debug',
        action='store_true',
        help='Allow debug-level logging to be printed to the terminal')
    # Get the arguments into an object
    arguments = parser.parse_args()
    SetupLogging(debug=arguments.debug)
    virus_typer = VirusTyping(sequencepath=arguments.sequencepath,
                              reportpath=arguments.reportpath)
    virus_typer.main()
Пример #14
0
 def __init__(self, start, sequencepath, referencefilepath, scriptpath,
              debug):
     """
     
     :param start: 
     :param sequencepath: 
     :param referencefilepath: 
     :param scriptpath:
     """
     self.debug = debug
     SetupLogging(self.debug)
     logging.info('Welcome to the CFIA bacterial typing pipeline {}'.format(
         __version__))
     # Define variables from the arguments - there may be a more streamlined way to do this
     self.sequencepath = os.path.join(sequencepath)
     self.path = self.sequencepath
     self.targetpath = os.path.join(referencefilepath)
     self.reffilepath = self.targetpath
     # Define the start time
     self.starttime = start
     self.start = self.starttime
     # Use the argument for the number of threads to use, or default to the number of cpus in the system
     self.cpus = multiprocessing.cpu_count() - 1
     # Assertions to ensure that the provided variables are valid
     assert os.path.isdir(self.sequencepath), 'Supplied path location is not a valid directory {0!r:s}'\
         .format(self.sequencepath)
     self.reportpath = os.path.join(self.sequencepath, 'reports')
     assert os.path.isdir(self.targetpath), 'Reference file path is not a valid directory {0!r:s}'\
         .format(self.targetpath)
     self.commit = __version__
     self.homepath = scriptpath
     self.analysistype = 'assembly_typing'
     self.genus_specific = False
     self.logfile = os.path.join(self.sequencepath, 'logfile')
     self.pipeline = True
     # Initialise the metadata object
     self.metadata = list()
     self.runmetadata = MetadataObject()
Пример #15
0
def cli():
    # Parser for arguments
    parser = ArgumentParser(
        description=
        'Finds the target sequences in allele files. Useful if you have an allele '
        'database, and want to attribute subtypes e.g. STEC subtyping to your newly '
        'expanded alleles')
    parser.add_argument(
        '-a',
        '--allelepath',
        required=True,
        help='Name and path of folder containing generated allele files')
    parser.add_argument(
        '-t',
        '--targetpath',
        required=True,
        help='Name and path of folder containing sequencing target sequences')
    parser.add_argument(
        '-r',
        '--reportpath',
        required=True,
        help='Name and path of folder in which reports are to be created')
    parser.add_argument('-g',
                        '--gene',
                        required=True,
                        choices=['stx1A', 'stx1B', 'stx2A', 'stx2B'],
                        help='Name of gene being profiled')
    SetupLogging()
    arguments = parser.parse_args()
    # Run the pipeline
    attributer = Attribute(allelepath=arguments.allelepath,
                           targetpath=arguments.targetpath,
                           reportpath=arguments.reportpath,
                           gene=arguments.gene)
    attributer.main()
    logging.info('Allele Attribution complete!')
Пример #16
0
def cli():
    # Parser for arguments
    parser = ArgumentParser(add_help=False)
    parser.add_argument('-p', '--path', required=True, help='Specify path.')
    parser.add_argument(
        '-t',
        '--targetfile',
        required=True,
        help=
        'Name of file containing probe sequence to search. The file can be a multi-FASTA. The '
        'header for each sequence must be unique, as it will be used as the name of the gene.'
        'This file must be located in the supplied path folder.')
    parser.add_argument('-f',
                        '--fasta_path',
                        help='Path to folder containing local files to BLAST.')
    parser.add_argument(
        '-g',
        '--genesippr',
        action='store_true',
        help=
        'Enable mode to specifically create alleles for the defined set of genes used in '
        'the GeneSippr analysis')
    parser.add_argument(
        '-m',
        '--metadatafile',
        help=
        'Name of combined metadata file used to parse the genus of each local assembly. This '
        'file must be located in the supplied path folder. NOTE: This is only required if '
        'performing the "GeneSippr-specific" analysis')
    parser.add_argument(
        '-b',
        '--blast',
        choices=['local', 'remote', 'both'],
        default='local',
        help=
        'Choose whether to run either local or remote BLAST, or both. Default is local'
    )
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='Enable verbose mode')
    parser.add_argument(
        '-c',
        '--cutoff',
        default=80,
        type=int,
        help='Percent identity cutoff to use when parsing BLAST outputs')
    parser.add_argument(
        '-n',
        '--no_target_alleles',
        action='store_false',
        help=
        'Do not include the target alleles in the output allele. If the alleles are stored, they '
        'will be the first allele in the multi-FASTA file (allele_0 or '
        'allele_COMPUTED_HASH - see below)')
    parser.add_argument(
        '-a',
        '--allele_hashing',
        action='store_true',
        help=
        'Use the first eight digits of the computed hash of the allele sequence as the allele '
        'identifier (e.g. _503e35061a) rather than the arbitrary _0, _1, etc.')
    parser.add_argument(
        '-aa',
        '--amino_acid',
        choices=['targets_nt', 'targets_aa'],
        help=
        'Find the amino acid sequence of alleles. The target alleles supplied can either be '
        'nucleotide or amino acid. Default is nucleotide')
    parser.add_argument(
        '-o',
        '--one_based',
        action='store_true',
        help='Use 1-based indexing rather than the default 0-based')
    arg_parser = ArgumentParser(parents=[parser])
    # Get the arguments into an object
    arguments = arg_parser.parse_args()
    SetupLogging(debug=arguments.verbose)
    # Run the pipeline
    pipeline = AlleleFinder(path=arguments.path,
                            targetfile=arguments.targetfile,
                            analysis_type=arguments.blast,
                            fasta_path=arguments.fasta_path,
                            genesippr=arguments.genesippr,
                            metadata_file=arguments.metadatafile,
                            cutoff=arguments.cutoff,
                            target_alleles=arguments.no_target_alleles,
                            allele_hashing=arguments.allele_hashing,
                            amino_acid=arguments.amino_acid,
                            one_based=arguments.one_based)
    pipeline.main()
    logging.info('Allele finding complete')
    return parser
Пример #17
0
            .format(tf=self.test_folder)
        self.assembly_typer = assembly_typer
        self.validate_pass = False


if __name__ == '__main__':
    # Parser for arguments
    parser = ArgumentParser(description='Run integration tests on COWBAT pipeline')
    parser.add_argument('-r', '--reference_folder',
                        required=True,
                        help='Path to reference folder with CSV reports with expected results.')
    parser.add_argument('-t', '--test_folder',
                        required=True,
                        help='Path to test folder with CSV reports with observed results .')
    parser.add_argument('-a', '--assembly',
                        action='store_true',
                        help='The assembly typing pipeline was used to process the run, rather than full COWBAT')
    # Get the arguments into an object
    args = parser.parse_args()
    # Pretty logging!
    SetupLogging()
    # Test the reports.
    validate_outputs = ValidateCowbat(reference_folder=args.reference_folder,
                                      test_folder=args.test_folder,
                                      assembly_typer=args.assembly)
    validate_outputs.validate_cowbat()
    if validate_outputs.validate_pass:
        logging.info('COWBAT successfully validated! :D')
    else:
        logging.error('COWBAT not successfully validated.')
Пример #18
0
def cli():
    # Import the argument parser from allele_finder.py
    parent_parser = allele_finder.cli()
    parser = ArgumentParser(parents=[parent_parser])
    parser.add_argument('-min',
                        '--min',
                        default=20,
                        type=int,
                        help='Minimum size of probe to create')
    parser.add_argument('-max',
                        '--max',
                        default=50,
                        type=int,
                        help='Maximum size of probe to create')
    parser.add_argument(
        '-c',
        '--cutoff',
        default=70,
        help='Cutoff percent identity of a nucleotide location to use')
    parser.add_argument('-gc',
                        '--percentgc',
                        default=50,
                        type=int,
                        help='Desired percent GC of the probe')
    parser.add_argument(
        '-r',
        '--runblast',
        action='store_true',
        help=
        'Run BLAST analyses on the supplied target file. If not enabled, then the program assumes '
        'that the supplied file includes all the desired alleles to use to create the probe'
    )
    parser.add_argument(
        '-aa',
        '--amino_acid',
        choices=['targets_nt', 'targets_aa'],
        help=
        'Find the amino acid sequence of alleles. The target alleles supplied can either be '
        'nucleotide or amino acid. Default is nucleotide')
    parser.add_argument(
        '-o',
        '--one_based',
        action='store_true',
        help='Use 1-based indexing rather than the default 0-based')
    # Get the arguments into an object
    arguments = parser.parse_args()
    SetupLogging(debug=arguments.verbose)
    if arguments.runblast:
        # Run the allele-finding pipeline
        finder = allele_finder.AlleleFinder(
            path=arguments.path,
            targetfile=arguments.targetfile,
            analysis_type=arguments.blast,
            fasta_path=arguments.fasta_path,
            genesippr=arguments.genesippr,
            metadata_file=arguments.metadatafile,
            cutoff=arguments.cutoff,
            amino_acid=arguments.amino_acid,
            one_based=arguments.one_based)
        finder.main()
    # Run the pipeline
    probes = Probes(path=arguments.path,
                    targetfile=arguments.targetfile,
                    min_length=arguments.min,
                    max_length=arguments.max,
                    cutoff=arguments.cutoff,
                    perc_gc=arguments.percentgc,
                    blast=arguments.runblast,
                    one_based=arguments.one_based)
    probes.main()
    logging.info('Probe finding complete')
Пример #19
0
 def __init__(self, args):
     """
     Initialises the variables required for this class
     :param args: list of arguments passed to the script
     """
     self.debug = args.debug
     SetupLogging(self.debug)
     logging.info(
         'Welcome to the CFIA OLC Workflow for Bacterial Assembly and Typing (COWBAT) version {version}'
         .format(version=__version__))
     # Define variables from the arguments - there may be a more streamlined way to do this
     self.args = args
     if args.sequencepath.startswith('~'):
         self.path = os.path.abspath(
             os.path.expanduser(os.path.join(args.sequencepath)))
     else:
         self.path = os.path.abspath(os.path.join(args.sequencepath))
     self.sequencepath = self.path
     if args.referencefilepath.startswith('~'):
         self.reffilepath = os.path.expanduser(
             os.path.abspath(os.path.join(args.referencefilepath)))
     else:
         self.reffilepath = os.path.abspath(
             os.path.join(args.referencefilepath))
     self.numreads = args.numreads
     self.preprocess = args.preprocess
     # Define the start time
     self.starttime = args.startingtime
     if args.customsamplesheet:
         if args.customsamplesheet.startswith('~'):
             self.customsamplesheet = os.path.expanduser(
                 os.path.abspath(os.path.join(self.customsamplesheet)))
         else:
             self.customsamplesheet = os.path.abspath(
                 os.path.join(args.customsamplesheet))
     else:
         self.customsamplesheet = args.customsamplesheet
     if self.customsamplesheet:
         assert os.path.isfile(self.customsamplesheet), 'Cannot find custom sample sheet as specified {css}' \
             .format(css=self.customsamplesheet)
     self.basicassembly = args.basicassembly
     if not self.customsamplesheet and not os.path.isfile(
             os.path.join(self.path, 'SampleSheet.csv')):
         self.basicassembly = True
         logging.warning(
             'Could not find a sample sheet. Performing basic assembly (no run metadata captured)'
         )
     # Use the argument for the number of threads to use, or default to the number of cpus in the system
     self.cpus = args.threads if args.threads else multiprocessing.cpu_count(
     ) - 1
     # Assertions to ensure that the provided variables are valid
     make_path(self.path)
     assert os.path.isdir(
         self.path
     ), 'Supplied path location is not a valid directory {0!r:s}'.format(
         self.path)
     self.reportpath = os.path.join(self.path, 'reports')
     make_path(self.reportpath)
     assert os.path.isdir(self.reffilepath), 'Reference file path is not a valid directory {0!r:s}' \
         .format(self.reffilepath)
     self.commit = __version__
     self.homepath = args.homepath
     self.logfile = os.path.join(self.path, 'logfile')
     self.runinfo = str()
     self.pipeline = True
     self.qualityobject = MetadataObject()
     # Initialise the metadata object
     self.runmetadata = MetadataObject()