def run(config_file, subject_list_file, p_name = None): try: if not os.path.exists(config_file): raise IOError else: c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) except IOError: print "config file %s doesn't exist" %config_file raise except Exception: raise Exception("Error reading config file - %s"%config_file) #do some validation validate(c) try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: raise Exception ("Subject list is not in proper YAML format. Please check your file") strategies = sorted(build_strategies(c)) print "strategies ---> ", strategies sub_ids =[] for sub in sublist: if sub['unique_id']: sub_ids.append(sub['subject_id']+"_" + sub["unique_id"]) else: sub_ids.append(sub['subject_id']) create_group_log_template(sub_ids, os.path.join(c.outputDirectory, 'logs')) seeds_created = [] if not (c.seedSpecificationFile is None): try: if os.path.exists(c.seedSpecificationFile): seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR) print 'seeds created %s -> ' % seeds_created except: raise IOError('Problem in seedSpecificationFile') if 1 in c.runVoxelTimeseries: if 2 in c.useSeedInAnalysis: c.maskSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.maskSpecificationFile) if 1 in c.runROITimeseries: if 1 in c.useSeedInAnalysis: c.roiSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFile) if 1 in c.runNetworkCentrality: if 3 in c.useSeedInAnalysis: c.templateSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.templateSpecificationFile) if not c.runOnGrid: from CPAC.pipeline.cpac_pipeline import prep_workflow procss = [Process(target=prep_workflow, args=(sub, c, strategies, p_name)) for sub in sublist] pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w') import subprocess jobQueue = [] if len(sublist) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >>pid,p.pid else: """ Stream the subject worlflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(sublist)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc: idc + c.numSubjectsAtOnce]: p.start() print >>pid,p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close() else: import commands import pickle from time import strftime temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files') print commands.getoutput("mkdir -p %s" % temp_files_dir) strategies_file = os.path.join(temp_files_dir, 'strategies.obj') f = open(strategies_file, 'w') pickle.dump(strategies, f) f.close() if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name) return 1
def run(config_file, subject_list_file, p_name = None): # Import packages import time # take date+time stamp for run identification purposes unique_pipeline_id = strftime("%Y%m%d%H%M%S") pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S") try: if not os.path.exists(config_file): raise IOError else: c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) except IOError: print("config file %s doesn't exist" % config_file) raise except Exception: print("Error reading config file - %s" % config_file) raise Exception #do some validation validate(c) # get the pipeline name p_name = c.pipelineName try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: print("Subject list is not in proper YAML format. Please check your file") raise Exception # NOTE: strategies list is only needed in cpac_pipeline prep_workflow for # creating symlinks strategies = sorted(build_strategies(c)) print("strategies ---> ") print(strategies) sub_scan_map ={} print("subject list: ") print(sublist) try: for sub in sublist: if sub['unique_id']: s = sub['subject_id']+"_" + sub["unique_id"] else: s = sub['subject_id'] scan_ids = ['scan_anat'] for id in sub['rest']: scan_ids.append('scan_'+ str(id)) sub_scan_map[s] = scan_ids except: print("\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \ "Error name: cpac_runner_0001" + "\n\n") raise Exception create_group_log_template(sub_scan_map, os.path.join(c.outputDirectory, 'logs')) seeds_created = [] if not (c.seedSpecificationFile is None): try: if os.path.exists(c.seedSpecificationFile): seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR) print('seeds created %s -> ' % seeds_created) except: raise IOError('Problem in seedSpecificationFile') if 1 in c.runVoxelTimeseries: if 'roi_voxelwise' in c.useSeedInAnalysis: c.maskSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.maskSpecificationFile) if 1 in c.runROITimeseries: if 'roi_average' in c.useSeedInAnalysis: c.roiSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFile) if 1 in c.runSCA: if 'roi_average' in c.useSeedInAnalysis: c.roiSpecificationFileForSCA = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFileForSCA) if 1 in c.runNetworkCentrality: if 'centrality_outputs_smoothed' in c.useSeedInAnalysis: c.templateSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.templateSpecificationFile) pipeline_timing_info = [] pipeline_timing_info.append(unique_pipeline_id) pipeline_timing_info.append(pipeline_start_stamp) pipeline_timing_info.append(len(sublist)) if not c.runOnGrid: # Import packages from CPAC.pipeline.cpac_pipeline import prep_workflow # Init variables procss = [Process(target=prep_workflow, args=(sub, c, strategies, 1, pipeline_timing_info, p_name)) \ for sub in sublist] pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w') # Init job queue jobQueue = [] # If we're allocating more processes than are subjects, run them all if len(sublist) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print(p.pid, file=pid) # Otherwise manage resources to run processes incrementally else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(sublist)): # If the job queue is empty and we haven't started indexing if len(jobQueue) == 0 and idx == 0: # Init subject process index idc = idx # Launch processes (one for each subject) for p in procss[idc: idc + c.numSubjectsAtOnce]: p.start() print(p.pid, file=pid) jobQueue.append(p) idx += 1 # Otherwise, jobs are running - check them else: # Check every job in the queue's status for job in jobQueue: # If the job is not alive if not job.is_alive(): # Find job and delete it from queue print('found dead job ', job) loc = jobQueue.index(job) del jobQueue[loc] # ...and start the next available process (subject) procss[idx].start() # Append this to job queue and increment index jobQueue.append(procss[idx]) idx += 1 # Add sleep so while loop isn't consuming 100% of CPU time.sleep(2) pid.close() else: import subprocess import pickle temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files') print(subprocess.getoutput("mkdir -p %s" % temp_files_dir)) strategies_file = os.path.join(temp_files_dir, 'strategies.obj') f = open(strategies_file, 'w') pickle.dump(strategies, f) f.close() if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name)
def run(config_file, subject_list_file, p_name=None): # Import packages import time # take date+time stamp for run identification purposes unique_pipeline_id = strftime("%Y%m%d%H%M%S") pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S") try: if not os.path.exists(config_file): raise IOError else: c = Configuration( yaml.load(open(os.path.realpath(config_file), 'r'))) except IOError: print "config file %s doesn't exist" % config_file raise except Exception: print "Error reading config file - %s" % config_file raise Exception #do some validation validate(c) # get the pipeline name p_name = c.pipelineName try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: print "Subject list is not in proper YAML format. Please check your file" raise Exception # NOTE: strategies list is only needed in cpac_pipeline prep_workflow for # creating symlinks strategies = sorted(build_strategies(c)) print "strategies ---> " print strategies sub_scan_map = {} print "subject list: " print sublist try: for sub in sublist: if sub['unique_id']: s = sub['subject_id'] + "_" + sub["unique_id"] else: s = sub['subject_id'] scan_ids = ['scan_anat'] for id in sub['rest']: scan_ids.append('scan_' + str(id)) sub_scan_map[s] = scan_ids except: print "\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \ "Error name: cpac_runner_0001" + "\n\n" raise Exception create_group_log_template(sub_scan_map, os.path.join(c.outputDirectory, 'logs')) seeds_created = [] if not (c.seedSpecificationFile is None): try: if os.path.exists(c.seedSpecificationFile): seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR) print 'seeds created %s -> ' % seeds_created except: raise IOError('Problem in seedSpecificationFile') if 1 in c.runVoxelTimeseries: if 'roi_voxelwise' in c.useSeedInAnalysis: c.maskSpecificationFile = append_seeds_to_file( c.workingDirectory, seeds_created, c.maskSpecificationFile) if 1 in c.runROITimeseries: if 'roi_average' in c.useSeedInAnalysis: c.roiSpecificationFile = append_seeds_to_file( c.workingDirectory, seeds_created, c.roiSpecificationFile) if 1 in c.runSCA: if 'roi_average' in c.useSeedInAnalysis: c.roiSpecificationFileForSCA = append_seeds_to_file( c.workingDirectory, seeds_created, c.roiSpecificationFileForSCA) if 1 in c.runNetworkCentrality: if 'centrality_outputs_smoothed' in c.useSeedInAnalysis: c.templateSpecificationFile = append_seeds_to_file( c.workingDirectory, seeds_created, c.templateSpecificationFile) pipeline_timing_info = [] pipeline_timing_info.append(unique_pipeline_id) pipeline_timing_info.append(pipeline_start_stamp) pipeline_timing_info.append(len(sublist)) if not c.runOnGrid: # Import packages from CPAC.pipeline.cpac_pipeline import prep_workflow # Init variables procss = [Process(target=prep_workflow, args=(sub, c, strategies, 1, pipeline_timing_info, p_name)) \ for sub in sublist] pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w') # Init job queue jobQueue = [] # If we're allocating more processes than are subjects, run them all if len(sublist) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >> pid, p.pid # Otherwise manage resources to run processes incrementally else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while (idx < len(sublist)): # If the job queue is empty and we haven't started indexing if len(jobQueue) == 0 and idx == 0: # Init subject process index idc = idx # Launch processes (one for each subject) for p in procss[idc:idc + c.numSubjectsAtOnce]: p.start() print >> pid, p.pid jobQueue.append(p) idx += 1 # Otherwise, jobs are running - check them else: # Check every job in the queue's status for job in jobQueue: # If the job is not alive if not job.is_alive(): # Find job and delete it from queue print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] # ...and start the next available process (subject) procss[idx].start() # Append this to job queue and increment index jobQueue.append(procss[idx]) idx += 1 # Add sleep so while loop isn't consuming 100% of CPU time.sleep(2) pid.close() else: import commands import pickle temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files') print commands.getoutput("mkdir -p %s" % temp_files_dir) strategies_file = os.path.join(temp_files_dir, 'strategies.obj') f = open(strategies_file, 'w') pickle.dump(strategies, f) f.close() if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name)
def run(config_file, subject_list_file, p_name = None): # take date+time stamp for run identification purposes unique_pipeline_id = strftime("%Y%m%d%H%M%S") pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S") try: if not os.path.exists(config_file): raise IOError else: c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) except IOError: print "config file %s doesn't exist" % config_file raise except Exception: print "Error reading config file - %s" % config_file raise Exception #do some validation validate(c) try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: print "Subject list is not in proper YAML format. Please check your file" raise Exception strategies = sorted(build_strategies(c)) print "strategies ---> " print strategies sub_scan_map ={} print "subject list: " print sublist try: for sub in sublist: if sub['unique_id']: s = sub['subject_id']+"_" + sub["unique_id"] else: s = sub['subject_id'] scan_ids = ['scan_anat'] for id in sub['rest']: scan_ids.append('scan_'+ str(id)) sub_scan_map[s] = scan_ids except: print "\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \ "Error name: cpac_runner_0001" + "\n\n" raise Exception create_group_log_template(sub_scan_map, os.path.join(c.outputDirectory, 'logs')) seeds_created = [] if not (c.seedSpecificationFile is None): try: if os.path.exists(c.seedSpecificationFile): seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR) print 'seeds created %s -> ' % seeds_created except: raise IOError('Problem in seedSpecificationFile') if 1 in c.runVoxelTimeseries: if 'roi_voxelwise' in c.useSeedInAnalysis: c.maskSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.maskSpecificationFile) if 1 in c.runROITimeseries: if 'roi_average' in c.useSeedInAnalysis: c.roiSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFile) if 1 in c.runNetworkCentrality: if 'centrality_outputs_smoothed' in c.useSeedInAnalysis: c.templateSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.templateSpecificationFile) pipeline_timing_info = [] pipeline_timing_info.append(unique_pipeline_id) pipeline_timing_info.append(pipeline_start_stamp) pipeline_timing_info.append(len(sublist)) if not c.runOnGrid: from CPAC.pipeline.cpac_pipeline import prep_workflow procss = [Process(target=prep_workflow, args=(sub, c, strategies, 1, pipeline_timing_info, p_name)) for sub in sublist] pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w') jobQueue = [] if len(sublist) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >>pid,p.pid else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(sublist)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc: idc + c.numSubjectsAtOnce]: p.start() print >>pid,p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close() else: import commands import pickle temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files') print commands.getoutput("mkdir -p %s" % temp_files_dir) strategies_file = os.path.join(temp_files_dir, 'strategies.obj') f = open(strategies_file, 'w') pickle.dump(strategies, f) f.close() if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name)
def run(config_file, subject_list_file, p_name=None): try: if not os.path.exists(config_file): raise IOError else: c = Configuration( yaml.load(open(os.path.realpath(config_file), 'r'))) except IOError: print "config file %s doesn't exist" % config_file raise except Exception: print "Error reading config file - %s" % config_file raise Exception #do some validation validate(c) try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: print "Subject list is not in proper YAML format. Please check your file" raise Exception strategies = sorted(build_strategies(c)) print "strategies ---> " print strategies sub_scan_map = {} print "subject list: " print sublist try: for sub in sublist: if sub['unique_id']: s = sub['subject_id'] + "_" + sub["unique_id"] else: s = sub['subject_id'] scan_ids = ['scan_anat'] for id in sub['rest']: scan_ids.append('scan_' + str(id)) sub_scan_map[s] = scan_ids except: print "\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \ "Error name: cpac_runner_0001" + "\n\n" raise Exception create_group_log_template(sub_scan_map, os.path.join(c.outputDirectory, 'logs')) seeds_created = [] if not (c.seedSpecificationFile is None): try: if os.path.exists(c.seedSpecificationFile): seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR) print 'seeds created %s -> ' % seeds_created except: raise IOError('Problem in seedSpecificationFile') if 1 in c.runVoxelTimeseries: if 'roi_voxelwise' in c.useSeedInAnalysis: c.maskSpecificationFile = append_seeds_to_file( c.workingDirectory, seeds_created, c.maskSpecificationFile) if 1 in c.runROITimeseries: if 'roi_average' in c.useSeedInAnalysis: c.roiSpecificationFile = append_seeds_to_file( c.workingDirectory, seeds_created, c.roiSpecificationFile) if 1 in c.runNetworkCentrality: if 'centrality_outputs_smoothed' in c.useSeedInAnalysis: c.templateSpecificationFile = append_seeds_to_file( c.workingDirectory, seeds_created, c.templateSpecificationFile) if not c.runOnGrid: from CPAC.pipeline.cpac_pipeline import prep_workflow procss = [ Process(target=prep_workflow, args=(sub, c, strategies, 1, p_name)) for sub in sublist ] pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w') jobQueue = [] if len(sublist) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >> pid, p.pid else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while (idx < len(sublist)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc:idc + c.numSubjectsAtOnce]: p.start() print >> pid, p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close() else: import commands import pickle temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files') print commands.getoutput("mkdir -p %s" % temp_files_dir) strategies_file = os.path.join(temp_files_dir, 'strategies.obj') f = open(strategies_file, 'w') pickle.dump(strategies, f) f.close() if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name)