def setUpClass( self ): from wrairlib import settings self.config_copy = settings.parse_config()
def main(): args = get_args() if args.configpath is not None: global_setup(settings.parse_config(args.configpath)) logger.info( "=== mapSamples.py (pyWrairLib {}) ===".format(__version__)) logger.info("Using config file {}".format(args.configpath)) else: global_setup(settings.config) logger.info( "=== mapSamples.py (pyWrairLib {}) ===".format(__version__)) logger.info("Using default config file from settings") runfile = args.runfile with open(runfile) as fh: logger.debug("Parsing Runfile %s" % runfile) try: rf = RunFile(fh) except ValueError as e: logger.critical( "There is an error with the runfile {}".format(runfile)) logger.error("Error was {}".format(e)) sys.exit(1) logger.info("%d samples to map" % len(rf.samples)) jobs = [] failed_samples = [] numSamples = len(rf.samples) # Try to maximize the cpu processing # if there are more samples than cpus, then just run each project with a single cpu p = None if numSamples >= numCPU: # Setup multiprocessing pool with numCpus processes = numCPU cpu_per_project = 1 idlecpu = 0 # If there are less samples than cpus, then run each sample with as many cpus as possible else: cpu_per_project, idlecpu = divmod(numCPU, numSamples) processes = int(numCPU / cpu_per_project) p = Pool(processes) logger.info( "Starting a Pool of {} workers to process {} samples".format( numCPU, numSamples)) logger.info("Each sample's project will utilize {} cpus".format( cpu_per_project)) for sample in rf.samples: if not sample.disabled and sample.refgenomelocation: project_directory = "%s__%s__%s" % ( sample.name, sample.midkeyname, sample.genotype) # If any of the commands fail, then bail on the sample don't try to run the project try: newMapping(project_directory, force=True) setRef(project_directory, sample.refgenomelocation) addRun(project_directory, os.path.join(readsbysampledir, sample.name), args.includeplats) except FailedCommand as e: failed_samples.append((sample.name, str(e))) logger.critical( "Leaving sample %s unmapped due to errors" % sample.name) continue # Allocate idle cpu to project until none are left cpu = cpu_per_project if idlecpu > 0: cpu += 1 idlecpu -= 1 jobops = { 'projectdir': project_directory, 'cpu': cpu, 'bam': True, 'numn': 0 } # If the sample has primers listed then set them to be trimmed if sample.primers: jobops.update(vt=sample.primers, tr=True, trim=True) jobs.append(jobops) else: logger.info( "Skipping %s because either commented out or missing reference path" % sample.name) for pdir, output, retcode in p.map(runProjectParallel, jobs): if retcode != 0: failed_samples.append((pdir, output)) for sample, err in failed_samples: logger.error("%s failed due to %s" % (sample, err)) logger.info("=== {} mapSamples.py has finished ===".format(__version__))
def main( ): args = get_args() if args.configpath is not None: global_setup( settings.parse_config( args.configpath ) ) logger.info( "=== mapSamples.py (pyWrairLib {}) ===".format( __version__ ) ) logger.info( "Using config file {}".format( args.configpath ) ) else: global_setup( settings.config ) logger.info( "=== mapSamples.py (pyWrairLib {}) ===".format( __version__ ) ) logger.info( "Using default config file from settings" ) runfile = args.runfile with open( runfile ) as fh: logger.debug( "Parsing Runfile %s" % runfile ) try: rf = RunFile( fh ) except ValueError as e: logger.critical( "There is an error with the runfile {}".format(runfile) ) logger.error( "Error was {}".format(e) ) sys.exit( 1 ) logger.info( "%d samples to map" % len( rf.samples ) ) jobs = [] failed_samples = [] numSamples = len( rf.samples ) # Try to maximize the cpu processing # if there are more samples than cpus, then just run each project with a single cpu p = None if numSamples >= numCPU: # Setup multiprocessing pool with numCpus processes = numCPU cpu_per_project = 1 idlecpu = 0 # If there are less samples than cpus, then run each sample with as many cpus as possible else: cpu_per_project, idlecpu = divmod( numCPU, numSamples ) processes = int( numCPU / cpu_per_project ) p = Pool( processes ) logger.info( "Starting a Pool of {} workers to process {} samples".format(numCPU,numSamples) ) logger.info( "Each sample's project will utilize {} cpus".format(cpu_per_project) ) for sample in rf.samples: if not sample.disabled and sample.refgenomelocation: project_directory = "%s__%s__%s" % (sample.name, sample.midkeyname, sample.genotype) # If any of the commands fail, then bail on the sample don't try to run the project try: newMapping( project_directory, force=True ) setRef( project_directory, sample.refgenomelocation ) addRun( project_directory, os.path.join( readsbysampledir, sample.name ), args.includeplats ) except FailedCommand as e: failed_samples.append( (sample.name, str(e)) ) logger.critical( "Leaving sample %s unmapped due to errors" % sample.name ) continue # Allocate idle cpu to project until none are left cpu = cpu_per_project if idlecpu > 0: cpu += 1 idlecpu -= 1 jobops = {'projectdir':project_directory, 'cpu': cpu, 'bam': True, 'numn': 0} # If the sample has primers listed then set them to be trimmed if sample.primers: jobops.update( vt=sample.primers, tr=True, trim=True ) jobs.append( jobops ) else: logger.info( "Skipping %s because either commented out or missing reference path" % sample.name ) for pdir, output, retcode in p.map( runProjectParallel, jobs ): if retcode != 0: failed_samples.append( (pdir, output) ) for sample, err in failed_samples: logger.error( "%s failed due to %s" % (sample, err) ) logger.info( "=== {} mapSamples.py has finished ===".format( __version__ ) )
def setUp(self): from wrairlib.settings import path_to_config, parse_config self.config = parse_config(path_to_config)
def setUpClass(self): from wrairlib import settings self.config_copy = settings.parse_config()