Exemplo n.º 1
0
	def __init__(self, cmd, path_stdout=os.getcwd(), logger=False, jobname='NoJobName'): #file_output=os.path.join(os.getcwd(), __name__+'.tmp.log'
		self.path_stdout = HelperUtils.check_if_writable(path_stdout)
		if logger: #TODO: check that logger is of class Logger?
			self.logger = logger
		else: # create new logger, with name e.g. LaunchSubprocess_NoLoggerParsed_2014-05-15_23.08.59.log
			#self.logger = Logger(self.__class__.__name__+"_NoLoggerParsed", path_stdout).get() # BEFORE JUNE 2014
			self.logger = Logger(name=self.__class__.__name__+"_NoLoggerParsed"+HelperUtils.gen_timestamp(), log_dir=path_stdout, log_format=1, enabled=True).get()
			
		self.jobname = jobname
		self.cmd = cmd
def submit():
	files = glob.glob(path_snplist+'/*.txt') #OBS: folder also contains "not_mapped.log"
	#files = ['/home/unix/ptimshel/git/snpsnap/samples/sample_10randSNPs_fewmatches.list']
	files.sort()
	processes = []
	for (counter, filename) in enumerate(files, start=1):
		pheno = os.path.splitext(os.path.basename(filename))[0]
		logger.info( "processing file #%d/#%d: %s" % (counter, len(files), pheno) )
		user_snps_file = filename # full path
		output_dir = path_output_sub+"/"+pheno
		HelperUtils.mkdirs(output_dir)
		#TODO: consider the potential problems with 'use' environment
		#TODO: reuse Python-2.7 && bsub [...]
			# Put this inside LaunchBsub!!
		command_shell = "python {program:s} --user_snps_file {snplist:s} --output_dir {outputdir:s} --distance_type ld --distance_cutoff 0.5 match --N_sample_sets {N} --max_freq_deviation {freq} --max_distance_deviation {dist} --max_genes_count_deviation {gene_count}".format(program=script2call, snplist=filename, outputdir=output_dir, N=10000, freq=1, dist=5, gene_count=5)
		processes.append( LaunchBsub(cmd=command_shell, queue_name=queue_name, walltime=walltime, mem=mem, jobname=pheno, projectname='snpsnp', path_stdout=path_stdout, file_output=pheno+'.txt', no_output=False, email=email, logger=logger) ) #
	for p in processes:
		p.run()
		time.sleep(args.pause)
	return processes
def submit():
	files = glob.glob(path_snplist+'/*.txt') #[0:2], OBS: folder also contains "not_mapped.log"
	#files = ['/home/unix/ptimshel/git/snpsnap/samples/sample_10randSNPs_fewmatches.list']
	files.sort()
	processes = []
	for (counter, filename) in enumerate(files, start=1):
		filename = re.sub(r'[()]', '', filename) #### OBS: changing file names!
		pheno = os.path.splitext(os.path.basename(filename))[0]
		logger.info( "processing file #%d/#%d: %s" % (counter, len(files), pheno) )
		user_snps_file = filename # full path
		output_dir = path_output_sub+"/"+pheno
		HelperUtils.mkdirs(output_dir)
		command_shell = "python {program:s} --user_snps_file {snplist:s} --output_dir {outputdir:s} --distance_type ld --distance_cutoff 0.5 match --N_sample_sets {N} --max_freq_deviation {freq} --max_distance_deviation {dist} --max_genes_count_deviation {gene_count}".format(program=script2call, snplist=filename, outputdir=output_dir, N=N_sample_sets, freq=freq, dist=dist, gene_count=gene_count)
		#command_seq = "--user_snps_file {snplist:s} --output_dir {outputdir:s} --distance_type ld --distance_cutoff 0.5 match --N_sample_sets {N} --max_freq_deviation {freq} --max_distance_deviation {dist} --max_genes_count_deviation {gene_count}".format(snplist=filename, outputdir=output_dir, N=1000, freq=5, dist=20, gene_count=20)
		#print command_shell
		processes.append( LaunchSubprocess(cmd=command_shell, path_stdout=path_stdout, logger=logger, jobname=pheno) ) #
		#time.sleep(1)
		#p.run_Log(file_output=pheno+'.txt') # writes stdout and stdout to "file_output" file in PATH path_stdout. NO WAITING since output goes to file

	for p in processes:
		p.run_Pipe()
	return processes
###################################### Global params ######################################
queue_name = "hour" # [bhour, bweek] priority
#queue_name = "priority" # [bhour, bweek] priority
walltime="59" # hh:mmm, e.g. [24:00=1day | 10:00=10hrs | 120=2hrs | 1:0=1hrs
mem="1" # gb
#email='*****@*****.**'
email=False

script2call = "/home/unix/ptimshel/git/snpsnap/snpsnap_query.py" # Updated path
current_script_name = os.path.basename(__file__).replace('.py','')

path_snplist = "/cvar/jhlab/snpsnap/data/input_lists/gwascatalog_140201_listsBIGbim"
path_output_main = "/cvar/jhlab/snpsnap/data/query/gwascatalog"

path_output_sub = path_output_main + "/output"
HelperUtils.mkdirs(path_output_sub)
path_stdout = path_output_main + "/stdout"
HelperUtils.mkdirs(path_stdout)

###################################### ARGUMENTS ######################################
args = ParseArguments()

###################################### LOGGER ######################################
## Setup-logger
#logger = Logger(__name__, path_stdout).get() # gives __name__ == main
logger = Logger(current_script_name, path_stdout).get()
#loglevel = getattr(logging, args.logger_lvl.upper(), logging.INFO) # returns some numeric value
loglevel = getattr(logging, args.logger_lvl.upper()) # returns some numeric value
logger.setLevel(loglevel)
#logger.setLevel(logging.INFO)
#logger.setLevel(logging.WARNING)
Exemplo n.º 5
0
	def __init__(self, cmd, queue_name, mem, shell_script_path, parallel = False,num_parallel = None,proc=None, shared_mem=None, runlimit=None, app=None, jobname='NoJobName', projectname='NoProjectName', path_stdout=os.getcwd(), join_outputs='yes',file_output=None, no_output=False, email=None, email_status_notification=False, email_report=False, logger=False, cmd_custom=None): #file_output=os.path.join(os.getcwd(), __name__+'.tmp.log'
		LaunchQsub.LB_job_counter += 1 # Counter the number of jobs
		self.job_number = LaunchQsub.LB_job_counter
		
		self.path_stdout = HelperUtils.check_if_writable(path_stdout)
		if logger: #TODO: check that logger is of class Logger?
			self.logger = logger
		else: # create new logger, with name e.g. LauchQsub_NoLoggerParsed_2014-05-15_23.08.59.log
			self.logger = Logger(name=self.__class__.__name__+"_NoLoggerParsed"+HelperUtils.gen_timestamp(), log_dir=path_stdout, log_format=1, enabled=True).get()
		#OBS: updating variable
		if no_output:
			self.file_output = '/dev/null'
		elif file_output is None:
			file_output = "qsub_outfile_ID{job_number}_{jobname}.{ext}".format(job_number=self.job_number, jobname=jobname, ext='out')
			self.file_output = os.path.join(self.path_stdout, file_output) # OBS: overwriting variable. Change this!
		else:
			self.file_output = os.path.join(self.path_stdout, file_output) # OBS: overwriting variable. Change this!

		self.jobname = jobname
		self.projectname = projectname
		self.status = ""
		self.attempts = 0
		#TODO: overwrite output files with -oo ?

		#TODO self.mem_per_process ## M --> a per-process (soft) memory limit
		#TODO: span[ptile=value]
			#Indicates the number of processors on each host that should be allocated to the job, where value is one of the following:
			#-R span[ptile=<x>]
			# x denotes the exact number of job slots to be used on each host. If the total process number is not divisible by x, the residual processes will be put on one host.
			#**DOES THIS OPTION EXISTS ON THE BROAD LSF 7.0.6?
		#TODO: "-w" option to qsub to set up the dependencies 
		#TODO: I/O resource requests --> qsub -R "rusage[indium_io=3]" ...
			## "df -k ." --> gets the file
			## /broad/tools/scripts/io_resource_for_file . -->
		#TODO: BsUB -x - request exclusive access to the nodes


		self.p_queue_name = queue_name # string
		self.p_mem = mem # in GB
		self.p_proc = proc 
		self.p_runlimit = runlimit 
		self.p_shared_mem = shared_mem # boolean value. 
		self.p_app = app
		self.shell_script_path = shell_script_path
		
		if parallel == True:
			parallel_processing = '-pe smp %d' %int(num_parallel)
		else:
			parallel_processing = ''

		self.join_outputs = join_outputs

		if join_outputs == 'no':
			join_outputs = 'n'
		elif join_outputs == 'yes':
			join_outputs = 'y'

		self.cmd = cmd # this is the command/program to run. THIS IS APPENDED TO the qsub command
		self.bcmd = "qsub -cwd -N {jobname} -o {output} -j {join_outputs} -q {queue} -l m_mem_free={mem}g -b n {parallel_processing} {shell_script_path}".format(project=self.projectname, jobname=self.jobname,  join_outputs = join_outputs, output=self.file_output, queue=self.p_queue_name, mem=self.p_mem, parallel_processing = parallel_processing,shell_script_path = self.shell_script_path)
		

		if self.p_shared_mem:
			addon = "-R 'span[hosts=1]'" # OBS: remember the QUOTES!
			self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon)
		
		if self.p_proc:
			addon = "-n {}".format(self.p_proc)
			self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon)

		if self.p_runlimit:
			addon = "-W {}".format(self.p_runlimit) 
			self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon)

		if self.p_app:
			addon = "-app {}".format(self.p_app)
			self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon)

		if email:
			# *Question: can I get the report in both an email and the stdout file? ANSWER ---> NO!
			addon = "-u {}".format(email) 
			self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon)
		if email_status_notification: # -B
			# -B: Sends email to the job submitter when the job is dispatched and begins running
			addon = "-B"
			self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon)
		if email_report: # -N
			# -N: If you want to separate the job report information from the job output, use the -N option to specify that the job report information should be sent by email.
			addon = "-N"
			self.bcmd = "{base} {addon}".format(base=self.bcmd, addon=addon)



		### GENERATING CALL
		self.call = ''
		if not cmd_custom:
			self.call = self.bcmd + " " + self.cmd
		else:
			self.logger.warning( "OBS: custom command parsed to LaunchQsub(). The command is: %s" % cmd_custom )
			self.call = cmd_custom