def get_roi_dict(creds_path, bucket_name, datasetid): """ Function to read txt stream from URL of an ROI file generated from an ANTs cortical thickness run Parameters ---------- creds_path : string path to the csv file with 'Access Key Id' as the header and the corresponding ASCII text for the key underneath; same with the 'Secret Access Key' string and ASCII text bucket : string the name of the bucket to get the ROI txt file from datasetid : string the dataset id of interest Returns ------- sub_dict : dictionary {str : str} the ROI dictionary with the ROI label (key) mapped to its ROI value """ # Import packages import fetch_creds # Init variables bucket = fetch_creds.return_bucket(creds_path, bucket_name) key_path = "outputs/" + datasetid + "/" + datasetid + "_ROIstats.txt" key = bucket.get_key(key_path) # Get file contents and split into list kstring = key.get_contents_as_string() temp_list = kstring.split("\n") # Form subject ROI dictionary key = temp_list[0].split()[2:] val = temp_list[1].split()[2:] sub_dict = dict(zip(key, val)) # Return the subject ROI dictionary return sub_dict
def main(creds_path, creds_path2, bucket, b_prefix, pipeline, num_res): ''' Function that analyzes data in an S3 bucket and then uploads it into a tabular format as an entry in a database table Parameters ---------- creds_path : string filepath to the S3 bucket credentials as a csv file creds_path2 : string filepath to the database instance credentials as a csv file bucket : string name of the S3 bucket to analyze data from b_prefix : string prefix filepath within the S3 bucket to parse for data pipeline : string name of the pipeline to gather outputs from for tabulating in DB num_res : integer the number of results you would expect the pipeline to have per derivative when checking if the information was already entered Returns src_list : list (boto Keys) a list of the keys that were inserted into the database ''' # Import packages import fetch_creds # ANTs if pipeline == 'ants': import ants_insert as db_insert # CIVET elif pipeline == 'civet': import civet_insert as db_insert # Freesurfer elif pipeline == 'freesurfer': import freesurfer_insert as db_insert # Otherwise, assume its ccs, cpac, dparsf, or niak else: import insert_utils as db_insert # Init variables prefix = 'https://s3.amazonaws.com/' + bucket # Get AWS keys b = fetch_creds.return_bucket(creds_path, bucket) cursor = fetch_creds.return_cursor(creds_path2) # Set up lists of keys src_list = b.list(prefix=b_prefix) file_list = [s for s in src_list if pipeline in str(s.name)] # Part of the list is already uploaded, hack off some no_files = len(file_list) print 'done creating file list, it has %d elements' % no_files # Iterate through list i = 0 for f in file_list: url_path = prefix + str(f.name) exists = check_existing(cursor, url_path, 'abide_img_results', num_res) if not exists: db_insert.upload_results(cursor, url_path) print 'uploaded file %s successfully!' % url_path else: print 'already loaded file %s, skipping...' % url_path i += 1 per = 100*(float(i)/no_files) print 'done with file %d/%d\n%f%% complete\n' % \ (i, no_files, per) # Return the src_list return src_list
def main(sub_list, sub_idx): ''' Method to preprocess a subject's image (nifti) data using ANTs and upload it to a miNDAR database. First argument to script specifies index of subject to process of subject list, which is Parameters ---------- sub_list : string filepath to a yaml file which contains a python list of tuples each tuple in the list is of the form (img03_id, s3_path), where img03_id is an integer corresponding to the image03_id of the image and the s3_path is a string corresponding to the path of the image on S3. e.g. (123, 's3://NDAR_Bucket/subject/image01.nii') sub_idx : integer index of subject to process from the sub_list yaml file Returns ------- None The function doesn't return any value, it processes and uploads data to S3 and creates a log file of the overall progress. ''' # Import packages import boto import cx_Oracle import fetch_creds import logging from nipype import logging as np_logging from nipype import config import os import re import subprocess import sys import time import yaml # Start timing start = time.time() # Init variables base_path = '/data/act_run/' creds_path = '/data/creds/Daniels_credentials.csv' # Oasis template paths oasis_path = '/data/OASIS-30_Atropos_template/' oasis_roi_yaml = oasis_path + 'oasis_roi_map.yml' # Load in OASIS ROI map oasis_roi_map = yaml.load(open(oasis_roi_yaml,'r')) # Setup s3 bucket, RDS cursor connections for uploading aws_access_key_id, aws_secret_access_key = fetch_creds.return_aws_keys(creds_path) bucket = fetch_creds.return_bucket(creds_path, 'ndar-data') cursor = fetch_creds.return_cursor(creds_path) # Get subject info subject = sub_list[sub_idx-1] img03_id_str = str(subject[0]) s3_path = subject[1] # Change bucket name to always be 'NDAR_Central' (caps-sensitive) s3_list = s3_path.split('/') s3_list[2] = 'NDAR_Central' s3_path = '/'.join(s3_list) # --- Set up log file --- log_file = base_path + 'logs/' + img03_id_str + '.log' setup_logger('log1', log_file, logging.INFO) ndar_log = logging.getLogger('log1') # Log input image stats ndar_log.info('-------- RUNNING SUBJECT NO. #%d --------' % (sub_idx)) ndar_log.info('Start time: %s ' % time.ctime(start)) ndar_log.info('Input S3 path: %s' % s3_path) ndar_log.info('Input IMAGE03 ID: %s' % img03_id_str) # --- Search results_stats table for previous entries of that img03_id --- cmd = ''' select rs_id, wf_status from results_stats where img03_id = :arg_1 ''' cursor.execute(cmd, arg_1=int(img03_id_str)) result = cursor.fetchall() # If the record already exists, check to see if it was successful wkflow_flag = 0 for record in result: wkflow_status = record[1] if wkflow_status == 'PASS': wkflow_flag = 1 rs_id = record[0] # Log if already found and exit if wkflow_flag: ndar_log.info('Image already successfully ran, found at RS_ID: %d' % rs_id) sys.exit() # --- Download and extract data from NDAR_Central S3 bucket --- nifti_file = base_path + 'inputs-ef/' + img03_id_str + '.nii.gz' # Execute ndar_unpack for that subject cmd = './ndar_unpack' if not os.path.exists(nifti_file): cmd_list = [cmd, '--aws-access-key-id', aws_access_key_id, '--aws-secret-access-key', aws_secret_access_key, '-v', nifti_file, s3_path] cmd_str = ' '.join(cmd_list) ndar_log.info('Executing command: %s ' % cmd_str) p = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() stdout, stderr = p.communicate() ndar_log.info(stdout) else: ndar_log.info('Nifti file already present for IMAGE03 ID %s' % img03_id_str) ndar_log.info('ndar_unpack did not need to run') extract_status_str = 'PASS' # If file was never created, log and exit if not os.path.exists(nifti_file): ndar_log.info('File extraction FAILED for IMAGE03 ID %s' % img03_id_str) extract_status_str = 'FAIL' # Upload the log file time_str = time.strftime('%Y-%m-%d_%H%M-%S',time.localtime(time.time())) s3_filename = time_str + '_' + img03_id_str up_log_list = [] s3_log_list = [] s3_log_path = 'logs/' + s3_filename + '.log' up_log_list.append(log_file) s3_log_list.append(s3_log_path) upload_to_s3(bucket, up_log_list, s3_log_list) # Finally upload the record to the database add_db_record(cursor, img03_id_str, 'N/A', extract_status_str, 'https://s3.amazonaws.com/ndar-data/' + s3_log_path, 'N/A', 'N/A') # And quit sys.exit() # Create the nipype workflow wf, crash_dir = create_workflow(base_path, img03_id_str, nifti_file, oasis_path) # --- Run the workflow --- wf_base_dir = base_path + 'work-dirs/' + img03_id_str up_nifti_path = wf_base_dir + \ '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz' up_roi_path = wf_base_dir + '/output/ROIstats.txt' if os.path.exists(up_nifti_path) and os.path.exists(up_roi_path): wf_status = 1 else: wf_status = 0 if wf_status == 0: try: ndar_log.info('Running the workflow...') wf.run() # We're successful at this point, add it as a file to the completed path ndar_log.info('Workflow completed successfully for IMAGE03 ID %s' % img03_id_str) wf_status = 1 finish_str = 'Finish time: %s' # If the workflow run fails except: ndar_log.info('ACT Workflow failed for IMAGE03 ID %s' % img03_id_str) finish_str = 'Crash time: %s' else: finish_str = 'Workflow did not need to run as files were already there: %s' # Log finish and total computation time fin = time.time() elapsed = (fin - start)/60 ndar_log.info(finish_str % time.ctime(fin)) ndar_log.info('Total time running IMAGE03 ID %s is: %s minutes' \ %(img03_id_str,str(elapsed))) up_list = [] s3_list = [] time_str = time.strftime('%Y-%m-%d_%H-%M-%S',time.localtime(fin)) s3_filename = time_str + '_' + img03_id_str # If workflow completed succesfully if wf_status: # Define cloud data and status wf_status_str = 'PASS' s3_nifti_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \ '_corticalthickness_normd.nii.gz' s3_roi_path = 'outputs/' + img03_id_str + '/' + img03_id_str + \ '_ROIstats.txt' full_s3_nifti_path = 's3://ndar_data/' + s3_nifti_path full_s3_roi_path = 's3://ndar_data/' + s3_roi_path # Upload paths #wf_base_dir = base_path + 'work-dirs/' + img03_id_str #up_nifti_path = wf_base_dir + \ # '/output/OUTPUT_CorticalThicknessNormalizedToTemplate.nii.gz' #up_roi_path = wf_base_dir + '/output/ROIstats.txt' # Append upload/s3 lists with path names up_list.append(up_nifti_path) up_list.append(up_roi_path) s3_list.append(s3_nifti_path) s3_list.append(s3_roi_path) # Log nifti and roi files upload ndar_log.info('Uploading nifti and roi files...') # Create dictionary of ROIs for that subject sub_roi_dic = create_roi_dic(up_roi_path) try: # Insert the ROIs into the unorm'd and norm'd databases ndar_log.info('uploading rois...') print '----------------------------------' insert_unormd(cursor, img03_id_str, roi_dic=sub_roi_dic) ndar_log.info('uploading imgs...') # Insert the act nifti into the unorm'd and norm'd databases insert_unormd(cursor, img03_id_str, s3_path=full_s3_nifti_path) except: e = sys.exc_info()[0] ndar_log.info('Error inserting results to MINDAR, message: %s' % str(e)) wf_status_str = 'Error inserting results into MINDAR database' # Otherwise, there were crash files, upload those else: # Define cloud data and status wf_status_str = 's3://ndar-data/crashes/' + s3_filename + '/' full_s3_nifti_path = 'N/A' full_s3_roi_path = 'N/A' # Find crash file names/paths for root, dirs, files in os.walk(crash_dir): root_path = os.path.abspath(root) crash_files = files # Append crash file and s3 path lists for f in crash_files: crash_path = root_path + '/' + f s3_crash_path = 'crashes/' + s3_filename + '/' + f up_list.append(crash_path) s3_list.append(s3_crash_path) # Log crash file upload ndar_log.info('Uploading crash files into %s ...' % wf_status_str) # Call the upload function upload_to_s3(bucket, up_list, s3_list) ndar_log.info('Done') # Upload the log file up_log_list = [] s3_log_list = [] s3_log_path = 'logs/' + s3_filename + '.log' up_log_list.append(log_file) s3_log_list.append(s3_log_path) upload_to_s3(bucket, up_log_list, s3_log_list) # Finally upload the record to the database add_db_record(cursor, img03_id_str, wf_status_str, extract_status_str, 's3://ndar-data/'+s3_log_path, full_s3_nifti_path, full_s3_roi_path)