Exemplo n.º 1
0
 def setUpClass( self ):
     from wrairlib import settings
     self.config_copy = settings.parse_config()
Exemplo n.º 2
0
def main():
    args = get_args()
    if args.configpath is not None:
        global_setup(settings.parse_config(args.configpath))
        logger.info(
            "=== mapSamples.py (pyWrairLib {}) ===".format(__version__))
        logger.info("Using config file {}".format(args.configpath))
    else:
        global_setup(settings.config)
        logger.info(
            "=== mapSamples.py (pyWrairLib {}) ===".format(__version__))
        logger.info("Using default config file from settings")

    runfile = args.runfile
    with open(runfile) as fh:
        logger.debug("Parsing Runfile %s" % runfile)
        try:
            rf = RunFile(fh)
        except ValueError as e:
            logger.critical(
                "There is an error with the runfile {}".format(runfile))
            logger.error("Error was {}".format(e))
            sys.exit(1)
        logger.info("%d samples to map" % len(rf.samples))
        jobs = []
        failed_samples = []

        numSamples = len(rf.samples)

        # Try to maximize the cpu processing
        # if there are more samples than cpus, then just run each project with a single cpu
        p = None
        if numSamples >= numCPU:
            # Setup multiprocessing pool with numCpus
            processes = numCPU
            cpu_per_project = 1
            idlecpu = 0
        # If there are less samples than cpus, then run each sample with as many cpus as possible
        else:
            cpu_per_project, idlecpu = divmod(numCPU, numSamples)
            processes = int(numCPU / cpu_per_project)

        p = Pool(processes)
        logger.info(
            "Starting a Pool of {} workers to process {} samples".format(
                numCPU, numSamples))
        logger.info("Each sample's project will utilize {} cpus".format(
            cpu_per_project))

        for sample in rf.samples:
            if not sample.disabled and sample.refgenomelocation:
                project_directory = "%s__%s__%s" % (
                    sample.name, sample.midkeyname, sample.genotype)
                # If any of the commands fail, then bail on the sample don't try to run the project
                try:
                    newMapping(project_directory, force=True)
                    setRef(project_directory, sample.refgenomelocation)
                    addRun(project_directory,
                           os.path.join(readsbysampledir, sample.name),
                           args.includeplats)
                except FailedCommand as e:
                    failed_samples.append((sample.name, str(e)))
                    logger.critical(
                        "Leaving sample %s unmapped due to errors" %
                        sample.name)
                    continue
                # Allocate idle cpu to project until none are left
                cpu = cpu_per_project
                if idlecpu > 0:
                    cpu += 1
                    idlecpu -= 1
                jobops = {
                    'projectdir': project_directory,
                    'cpu': cpu,
                    'bam': True,
                    'numn': 0
                }
                # If the sample has primers listed then set them to be trimmed
                if sample.primers:
                    jobops.update(vt=sample.primers, tr=True, trim=True)
                jobs.append(jobops)
            else:
                logger.info(
                    "Skipping %s because either commented out or missing reference path"
                    % sample.name)

        for pdir, output, retcode in p.map(runProjectParallel, jobs):
            if retcode != 0:
                failed_samples.append((pdir, output))

        for sample, err in failed_samples:
            logger.error("%s failed due to %s" % (sample, err))

    logger.info("=== {} mapSamples.py has finished ===".format(__version__))
Exemplo n.º 3
0
def main( ):
    args = get_args()
    if args.configpath is not None:
        global_setup( settings.parse_config( args.configpath ) )
        logger.info( "=== mapSamples.py (pyWrairLib {}) ===".format( __version__ ) )
        logger.info( "Using config file {}".format( args.configpath ) )
    else:
        global_setup( settings.config )
        logger.info( "=== mapSamples.py (pyWrairLib {}) ===".format( __version__ ) )
        logger.info( "Using default config file from settings" )

    runfile = args.runfile
    with open( runfile ) as fh:
        logger.debug( "Parsing Runfile %s" % runfile )
        try:
            rf = RunFile( fh )
        except ValueError as e:
            logger.critical( "There is an error with the runfile {}".format(runfile) )
            logger.error( "Error was {}".format(e) )
            sys.exit( 1 )
        logger.info( "%d samples to map" % len( rf.samples ) )
        jobs = []
        failed_samples = []
        
        numSamples = len( rf.samples )

        # Try to maximize the cpu processing
        # if there are more samples than cpus, then just run each project with a single cpu
        p = None
        if numSamples >= numCPU:
            # Setup multiprocessing pool with numCpus
            processes = numCPU
            cpu_per_project = 1
            idlecpu = 0
        # If there are less samples than cpus, then run each sample with as many cpus as possible
        else:
            cpu_per_project, idlecpu = divmod( numCPU, numSamples )
            processes = int( numCPU / cpu_per_project )

        p = Pool( processes )
        logger.info( "Starting a Pool of {} workers to process {} samples".format(numCPU,numSamples) )
        logger.info( "Each sample's project will utilize {} cpus".format(cpu_per_project) )

        for sample in rf.samples:
            if not sample.disabled and sample.refgenomelocation:
                project_directory = "%s__%s__%s" % (sample.name, sample.midkeyname, sample.genotype)
                # If any of the commands fail, then bail on the sample don't try to run the project
                try:
                    newMapping( project_directory, force=True )
                    setRef( project_directory, sample.refgenomelocation )
                    addRun( project_directory, 
                        os.path.join( readsbysampledir, sample.name ),
                        args.includeplats
                    )
                except FailedCommand as e:
                    failed_samples.append( (sample.name, str(e)) )
                    logger.critical( "Leaving sample %s unmapped due to errors" % sample.name )
                    continue
                # Allocate idle cpu to project until none are left
                cpu = cpu_per_project
                if idlecpu > 0:
                    cpu += 1
                    idlecpu -= 1
                jobops = {'projectdir':project_directory, 'cpu': cpu, 'bam': True, 'numn': 0}
                # If the sample has primers listed then set them to be trimmed
                if sample.primers:
                    jobops.update( vt=sample.primers, tr=True, trim=True )
                jobs.append( jobops )
            else:
                logger.info( "Skipping %s because either commented out or missing reference path" % sample.name )

        for pdir, output, retcode in p.map( runProjectParallel, jobs ):
            if retcode != 0:
                failed_samples.append( (pdir, output) )

        for sample, err in failed_samples:
            logger.error( "%s failed due to %s" % (sample, err) )

    logger.info( "=== {} mapSamples.py has finished ===".format( __version__ ) )
Exemplo n.º 4
0
 def setUp(self):
     from wrairlib.settings import path_to_config, parse_config
     self.config = parse_config(path_to_config)
Exemplo n.º 5
0
    def setUp(self):
        from wrairlib.settings import path_to_config, parse_config

        self.config = parse_config(path_to_config)
Exemplo n.º 6
0
 def setUpClass(self):
     from wrairlib import settings
     self.config_copy = settings.parse_config()