def download_s3_sublist(s3_sublist, inputs_dir, study_name, creds_path): ''' Function to download the imaging data from S3 based on an S3-path- formatted C-PAC subject list; it then uses the local downloaded files as the image paths in the resulting subject list Parameters ---------- s3_sublist : list a C-PAC-compatible subject list with the S3 filepaths instead of local filepaths inputs_dir : string filepath to the directory where all of the subjects' folders and sub-folders and niftis will be written to study_name : string the name of the study/site that all of the subjects will be placed in creds_path : string path to the csv file with 'ACCESS_KEY_ID' as the header and the corresponding ASCII text for the key underneath; same with the 'SECRET_ACCESS_KEY' string and ASCII text Returns ------- s3_sublist: list a modified version of the input s3_sublist where the filepaths for each image type point to a downloaded file ''' # Import packages import fetch_creds # Init variables aws_access_key_id, aws_secret_access_key = \ fetch_creds.return_aws_keys(creds_path) # Go through sublist to create filepaths; download data via ndar_unpack no_subs = len(s3_sublist) for sub in s3_sublist: idx = s3_sublist.index(sub) # First create subject directories unique_sub_dir = os.path.join(inputs_dir, study_name, str(sub['subject_id']), str(sub['unique_id'])) # If the file directory doesn't exist already if not os.path.exists(unique_sub_dir): print 'creating subject/session directories: %s' % unique_sub_dir os.makedirs(unique_sub_dir) # ndar_unpack the anatomical anat_dir = unique_sub_dir + '/anat_1' if not os.path.exists(anat_dir): print 'creating anatomical directory: %s' % anat_dir os.makedirs(anat_dir) # Set nifti file output s3_path = sub['anat'] out_nii = anat_dir + '/' + 'anat.nii.gz' if not os.path.exists(out_nii): # And try and extract the image try: print 'attempting to download and extract %s to %s'\ % (s3_path, out_nii) run_ndar_unpack(s3_path, out_nii, aws_access_key_id, aws_secret_access_key) print 'Success!' # If it is successful, replace s3_path with out_nii s3_sublist[idx]['anat'] = out_nii except OSError as e: print e print 'Failed anatomical image %s extraction for %s.\n'\ 'Trying functional...' % (s3_path, sub['subject_id']) else: print 'Anatomical file %s exists! Skipping...' % out_nii # ndar_unpack the functional for each functional for folder, s3_path in sub['rest'].items(): rest_dir = unique_sub_dir + '/' + folder.split('_rest')[0] if not os.path.exists(rest_dir): print 'creating functional directory: %s' % rest_dir os.makedirs(rest_dir) out_nii = rest_dir + '/' + 'rest.nii.gz' # And try and extract the image if not os.path.exists(out_nii): try: print 'attempting to download and extract %s to %s'\ % (s3_path, out_nii) run_ndar_unpack(s3_path, out_nii, aws_access_key_id, aws_secret_access_key) print 'Success!' # If it is successful, replace s3_path with out_nii s3_sublist[idx]['rest'][folder] = out_nii except OSError as e: print e print 'Failed functional image %s extraction for %s'\ % (s3_path, sub['subject_id']) else: print 'Functional file %s eists! Skippng...' % out_nii # Print % complete i = idx+1 per = 100*(float(i)/no_subs) print 'Done extracting %d/%d\n%f%% complete' % (i, no_subs, per) # Return the new s3_sublist return s3_sublist
def main(sub_list, sub_idx): ''' Method to preprocess a subject's image (nifti) data using ANTs and upload it to a miNDAR database. First argument to script specifies index of subject to process of subject list, which is Parameters ---------- sub_list : string filepath to a yaml file which contains a python list of tuples each tuple in the list is of the form (img03_id, s3_path), where img03_id is an integer corresponding to the image03_id of the image and the s3_path is a string corresponding to the path of the image on S3. e.g. (123, 's3://NDAR_Bucket/subject/image01.nii') sub_idx : integer index of subject to process from the sub_list yaml file Returns ------- None The function doesn't return any value, it processes and uploads data to S3 and creates a log file of the overall progress. ''' # Import packages import boto import cx_Oracle import fetch_creds import logging from nipype import logging as np_logging from nipype import config import os import re import subprocess import sys import time import yaml # Start timing start = time.time() # Init variables base_path = '/data/act_run/' creds_path = '/data/creds/Daniels_credentials.csv' # Oasis template paths oasis_path = '/data/OASIS-30_Atropos_template/' oasis_roi_yaml = oasis_path + 'oasis_roi_map.yml' # Load in OASIS ROI map oasis_roi_map = yaml.load(open(oasis_roi_yaml,'r')) # Setup s3 bucket, RDS cursor connections for uploading aws_access_key_id, aws_secret_access_key = fetch_creds.return_aws_keys(creds_path) bucket = fetch_creds.return_bucket(creds_path, 'ndar-data') cursor = fetch_creds.return_cursor(creds_path) # Get subject info subject = sub_list[sub_idx-1] img03_id_str = str(subject[0]) s3_path = subject[1] # Change bucket name to always be 'NDAR_Central' (caps-sensitive) s3_list = s3_path.split('/') s3_list[2] = 'NDAR_Central' s3_path = '/'.join(s3_list) # --- Set up log file --- log_file = base_path + 'logs/' + img03_id_str + '.log' setup_logger('log1', log_file, logging.INFO) ndar_log = logging.getLogger('log1') # Log input image stats ndar_log.info('-------- RUNNING SUBJECT NO. #%d --------' % (sub_idx)) ndar_log.info('Start time: %s ' % time.ctime(start)) ndar_log.info('Input S3 path: %s' % s3_path) ndar_log.info('Input IMAGE03 ID: %s' % img03_id_str) # --- Search results_stats table for previous entries of that img03_id --- cmd = ''' select rs_id, wf_status from results_stats where img03_id = :arg_1 ''' cursor.execute(cmd, arg_1=int(img03_id_str)) result = cursor.fetchall() # If the record already exists, check to see if it was successful wkflow_flag = 0 for record in result: wkflow_status = record[1] if wkflow_status == 'PASS': wkflow_flag = 1 rs_id = record[0] # Log if already found and exit if wkflow_flag: ndar_log.info('Image already successfully ran, found at RS_ID: %d' % rs_id) sys.exit() # --- Download and extract data from NDAR_Central S3 bucket --- nifti_file = base_path + 'inputs-ef/' + img03_id_str + '.nii.gz' # Execute ndar_unpack for that subject cmd = './ndar_unpack' if not os.path.exists(nifti_file): cmd_list = [cmd, '--aws-access-key-id', aws_access_key_id, '--aws-secret-access-key', aws_secret_access_key, '-v', nifti_file, s3_path] cmd_str = ' '.join(cmd_list) ndar_log.info('Executing command: %s ' % cmd_str) p = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() stdout, stderr = p.communicate() ndar_log.info(stdout) else: ndar_log.info('Nifti file already present for IMAGE03 ID %s' % img03_id_str) ndar_log.info('ndar_unpack did not need to run') extract_status_str = 'PASS' # If file was never created, log and exit if not os.path.exists(nifti_file): ndar_log.info('File extraction FAILED for IMAGE03 ID %s' % img03_id_str) extract_status_str = 'FAIL' # Upload the log file time_str = time.strftime('%Y-%m-%d_%H%M-%S',time.localtime(time.time())) s3_filename = time_str + '_' + img03_id_str up_log_list = [] s3_log_list = [] s3_log_path = 'logs/' + s3_filename + '.log' up_log_list.append(log_file) s3_log_list.append(s3_log_path) upload_to_s3(bucket, up_log_list, s3_log_list) # Finally upload the record to the database add_db_record(cursor, img03_id_str, 'N/A', extract_status_str, 'https://s3.amazonaws.com/ndar-data/' + s3_log_path, 'N/A', 'N/A') # And quit sys.exit() # Create the nipype workflow wf, crash_dir = create_workflow(base_path, img03_id_str, nifti_file, oasis_path) # --- Run the workflow --- wf_base_dir = base_path + 'work-dirs/' + img03_id_str up_nifti_path = wf_base_dir + \ '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz' up_roi_path = wf_base_dir + '/output/ROIstats.txt' if os.path.exists(up_nifti_path) and os.path.exists(up_roi_path): wf_status = 1 else: wf_status = 0 if wf_status == 0: try: ndar_log.info('Running the workflow...') wf.run() # We're successful at this point, add it as a file to the completed path ndar_log.info('Workflow completed successfully for IMAGE03 ID %s' % img03_id_str) wf_status = 1 finish_str = 'Finish time: %s' # If the workflow run fails except: ndar_log.info('ACT Workflow failed for IMAGE03 ID %s' % img03_id_str) finish_str = 'Crash time: %s' else: finish_str = 'Workflow did not need to run as files were already there: %s' # Log finish and total computation time fin = time.time() elapsed = (fin - start)/60 ndar_log.info(finish_str % time.ctime(fin)) ndar_log.info('Total time running IMAGE03 ID %s is: %s minutes' \ %(img03_id_str,str(elapsed))) up_list = [] s3_list = [] time_str = time.strftime('%Y-%m-%d_%H-%M-%S',time.localtime(fin)) s3_filename = time_str + '_' + img03_id_str # If workflow completed succesfully if wf_status: # Define cloud data and status wf_status_str = 'PASS' s3_nifti_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \ '_corticalthickness_normd.nii.gz' s3_roi_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \ '_ROIstats.txt' full_s3_nifti_path = 's3://ndar_data/' + s3_nifti_path full_s3_roi_path = 's3://ndar_data/' + s3_roi_path # Upload paths #wf_base_dir = base_path + 'work-dirs/' + img03_id_str #up_nifti_path = wf_base_dir + \ # '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz' #up_roi_path = wf_base_dir + '/output/ROIstats.txt' # Append upload/s3 lists with path names up_list.append(up_nifti_path) up_list.append(up_roi_path) s3_list.append(s3_nifti_path) s3_list.append(s3_roi_path) # Log nifti and roi files upload ndar_log.info('Uploading nifti and roi files...') # Create dictionary of ROIs for that subject sub_roi_dic = create_roi_dic(up_roi_path) try: # Insert the ROIs into the unorm'd and norm'd databases ndar_log.info('uploading rois...') print '----------------------------------' insert_unormd(cursor, img03_id_str, roi_dic=sub_roi_dic) ndar_log.info('uploading imgs...') # Insert the act nifti into the unorm'd and norm'd databases insert_unormd(cursor, img03_id_str, s3_path=full_s3_nifti_path) except: e = sys.exc_info()[0] ndar_log.info('Error inserting results to MINDAR, message: %s' % str(e)) wf_status_str = 'Error inserting results into MINDAR database' # Otherwise, there were crash files, upload those else: # Define cloud data and status wf_status_str = 's3://ndar-data/crashes/' + s3_filename + '/' full_s3_nifti_path = 'N/A' full_s3_roi_path = 'N/A' # Find crash file names/paths for root, dirs, files in os.walk(crash_dir): root_path = os.path.abspath(root) crash_files = files # Append crash file and s3 path lists for f in crash_files: crash_path = root_path + '/' + f s3_crash_path = 'crashes/' + s3_filename + '/' + f up_list.append(crash_path) s3_list.append(s3_crash_path) # Log crash file upload ndar_log.info('Uploading crash files into %s ...' % wf_status_str) # Call the upload function upload_to_s3(bucket, up_list, s3_list) ndar_log.info('Done') # Upload the log file up_log_list = [] s3_log_list = [] s3_log_path = 'logs/' + s3_filename + '.log' up_log_list.append(log_file) s3_log_list.append(s3_log_path) upload_to_s3(bucket, up_log_list, s3_log_list) # Finally upload the record to the database add_db_record(cursor, img03_id_str, wf_status_str, extract_status_str, 's3://ndar-data/'+s3_log_path, full_s3_nifti_path, full_s3_roi_path)