def process(self): if type(self.input_files) != list: self.input_files = [self.input_files] self.output_files = [] for input_biom in self.input_files: fileName, fileExt = os.path.splitext(os.path.basename(input_biom)) output_tsv = '%s.tsv' % fileName if os.path.isfile(output_tsv): os.remove(output_tsv) self.log.info('Converting %s into %s', input_biom, output_tsv) # For now only allow tsv conversion using option '-b' cmd = '%s convert -i %s -o %s -b' % (which('biom'), input_biom, output_tsv) self.submit_cmd(cmd) if self.remove_hdr: ip_fn = open(output_tsv,'r') op_fn = open('%s.tmp' % output_tsv,'w') next(ip_fn) for line in ip_fn: op_fn.write(line) ip_fn.close() op_fn.close() os.rename('%s.tmp' % output_tsv, output_tsv) self.output_files.append(output_tsv)
def process(self): if type(self.input_files) != list: self.input_files = [self.input_files] self.output_files = [] for input_biom in self.input_files: fileName, fileExt = os.path.splitext(os.path.basename(input_biom)) output_tsv = '%s.tsv' % fileName if os.path.isfile(output_tsv): os.remove(output_tsv) self.log.info('Converting %s into %s', input_biom, output_tsv) # For now only allow tsv conversion using option '-b' cmd = '%s convert -i %s -o %s -b' % (which('biom'), input_biom, output_tsv) self.submit_cmd(cmd) if self.remove_hdr: ip_fn = open(output_tsv, 'r') op_fn = open('%s.tmp' % output_tsv, 'w') next(ip_fn) for line in ip_fn: op_fn.write(line) ip_fn.close() op_fn.close() os.rename('%s.tmp' % output_tsv, output_tsv) self.output_files.append(output_tsv)
def resume(user, cfg, run_id, pids): """ Resumes as user 'user' a pipeline defined by the given config Returns exit code, stdout, and stderr. """ pids[run_id] = mp.current_process().pid cmd = [which('np_submit.py'), cfg] (ec, err, out) = run_as(cmd=cmd, user=user) if ec == 0: return (err, out) else: raise Exception('Unable to execute cmd %s:\n %s' % (cmd, err))
def submit(config, user, run_id, pids): """ Submits pipeline defined by 'config' as user 'user'. Dumps the config in a temp. file that is removed after succesful completion. Returns exit code, stdout, and stderr. """ pids[run_id] = mp.current_process().pid (fd, tmp_cfg) = tempfile.mkstemp(prefix='pypers_', suffix='.cfg', text=True) os.fchmod(fd, 0644) with os.fdopen(fd, 'w') as fh: json.dump(config, fh) cmd = [which('np_submit.py'), '-i', tmp_cfg] (ec, err, out) = run_as(cmd=cmd, user=user) if ec == 0: os.unlink(tmp_cfg) return (err, out) else: raise Exception('Unable to execute cmd %s:\n%s\n%s' % (cmd, err, out))
def exec_monitoring(self): """ Check if all the flow cell IDs have been demultiplexed For each flow cell which has not been demultiplexed, then the demultiplexing pipeline is submitted to the cluster """ #Create a dictionary with {"Fw cell ID" : "path"} fw_cell_dirs = {} missing_ss_list = [] for hiseq_dir in self.hiseq_dirs: #Parse all the hiseq dirs and create a list of data directories #Only the directorise with the "RTAComplete.txt" file are considered for fwcell in os.listdir(hiseq_dir): fwcell_path = os.path.join(hiseq_dir, fwcell) if (re.search(".+_.+_.+_.+", fwcell) \ and "Temp" not in fwcell \ and os.path.exists(os.path.join(fwcell_path, "RTAComplete.txt"))): ss_found = False #search for the sample sheet in the fwcell_path for filename in os.listdir(fwcell_path): if ("SampleSheet" in filename) and (".csv" in filename): ss_found = True break if ss_found: fw_cell_dirs[fwcell] = os.path.join(hiseq_dir, fwcell) #otherwise add the directory to the list of missing sample sheet else: missing_ss_list.append(os.path.join(hiseq_dir, fwcell)) #log all the missing sample sheets detected if missing_ss_list: print ("******************************************************") for missing_ss in missing_ss_list: print ("Missing sample sheet in %s "% missing_ss) #create a set for the hiseq dirs and a set for the demultiplexed dirs hiseq_flow_cells = set([key for key in fw_cell_dirs]) demu_flow_cells = set(os.listdir(self.demu_dir)) if not hiseq_flow_cells.issubset(demu_flow_cells): #get the difference fwcell_diff = hiseq_flow_cells.difference(demu_flow_cells) if fwcell_diff: for fwcell_id in fwcell_diff: submit_cmd = which('np_submit.py') cmd = [ submit_cmd, pipeline_names['demultiplexing'], 'pipeline.output_dir=%s' % os.path.join(self.demu_dir, fwcell_id), 'pipeline.project_name=Demux', 'pipeline.description=Demultiplexing', 'steps.inputs.input_dir=%s' % fw_cell_dirs[fwcell_id] ] run_as(cmd=cmd, user=self.user) print("******************************************************") print(" %s Queued demux with:" % time.ctime()) print(" Input dir : %s" % fw_cell_dirs[fwcell_id]) print(" Output dir : %s" % os.path.join(self.demu_dir, fwcell_id)) print(" Cmd : %s" % ' '.join(cmd)) print("******************************************************")
#!/usr/bin/env python import glob import subprocess import time import os import getpass import sys from pypers.utils.utils import which NP_SUBMIT = which("np_submit.py") if __name__ == '__main__': testdir = os.path.dirname(os.path.realpath(__file__)) user = getpass.getuser() output_root = '/scratch/%s/pypers/test_suite/%.0f' % (user, time.time()) if len(sys.argv)>1: tests = sys.argv[1:] else: tests = glob.glob('%s/*.json' %testdir) for test in tests: output_dir = os.path.join(output_root, os.path.basename(test).split('.')[0]) cmd = [NP_SUBMIT, test, 'pipeline.output_dir=%s' % output_dir] #print ' '.join(cmd) subprocess.call(cmd)
def exec_monitoring(self): """ Check if all the flow cell IDs have been demultiplexed For each flow cell which has not been demultiplexed, then the demultiplexing pipeline is submitted to the cluster """ #Create a dictionary with {"Fw cell ID" : "path"} fw_cell_dirs = {} missing_ss_list = [] for hiseq_dir in self.hiseq_dirs: #Parse all the hiseq dirs and create a list of data directories #Only the directorise with the "RTAComplete.txt" file are considered for fwcell in os.listdir(hiseq_dir): fwcell_path = os.path.join(hiseq_dir, fwcell) if (re.search(".+_.+_.+_.+", fwcell) \ and "Temp" not in fwcell \ and os.path.exists(os.path.join(fwcell_path, "RTAComplete.txt"))): ss_found = False #search for the sample sheet in the fwcell_path for filename in os.listdir(fwcell_path): if ("SampleSheet" in filename) and (".csv" in filename): ss_found = True break if ss_found: fw_cell_dirs[fwcell] = os.path.join(hiseq_dir, fwcell) #otherwise add the directory to the list of missing sample sheet else: missing_ss_list.append(os.path.join(hiseq_dir, fwcell)) #log all the missing sample sheets detected if missing_ss_list: print("******************************************************") for missing_ss in missing_ss_list: print("Missing sample sheet in %s " % missing_ss) #create a set for the hiseq dirs and a set for the demultiplexed dirs hiseq_flow_cells = set([key for key in fw_cell_dirs]) demu_flow_cells = set(os.listdir(self.demu_dir)) if not hiseq_flow_cells.issubset(demu_flow_cells): #get the difference fwcell_diff = hiseq_flow_cells.difference(demu_flow_cells) if fwcell_diff: for fwcell_id in fwcell_diff: submit_cmd = which('np_submit.py') cmd = [ submit_cmd, pipeline_names['demultiplexing'], 'pipeline.output_dir=%s' % os.path.join(self.demu_dir, fwcell_id), 'pipeline.project_name=Demux', 'pipeline.description=Demultiplexing', 'steps.inputs.input_dir=%s' % fw_cell_dirs[fwcell_id] ] run_as(cmd=cmd, user=self.user) print( "******************************************************" ) print(" %s Queued demux with:" % time.ctime()) print(" Input dir : %s" % fw_cell_dirs[fwcell_id]) print(" Output dir : %s" % os.path.join(self.demu_dir, fwcell_id)) print(" Cmd : %s" % ' '.join(cmd)) print( "******************************************************" )