def read_report_file_yaml(report_file, date_obj, utag): """ Extract information from a run_log.yaml file, and returns a dictionary """ try: with open(report_file, 'r') as rf: try: data = yaml.load(rf) except yaml.YAMLError as exc: APP_LOGGER.error("YMALError %s received" % exc) return None if not data: APP_LOGGER.debug("YAML file, %s, is empty." % report_file) return None data[DATETIME] = date_obj data[FILE_TYPE] = 'yaml' data[UTAG] = utag if USER in data and isinstance(data[USER], str): data[USER] = [user.strip() for user in data[USER].split(',')] # distinguish reports from Web UI and Client UI if CARTRIDGE_BC not in data: report_obj = RunReportWebUI.from_dict(**data) else: report_obj = RunReportClientUI.from_dict(**data) return report_obj.as_dict() except: APP_LOGGER.error("Error raised for report %s: %s" % (report_file, traceback.format_exc())) return None
def read_report_file_txt(report_file, date_obj, utag): """ Extract information from a run_log.txt file, and returns a dictionary """ try: with open(report_file, 'r') as rf: lines = rf.readlines() if not lines: APP_LOGGER.error("The log file, %s, is empty." % report_file) return None data = {FILE_TYPE: 'txt', DATETIME: date_obj, UTAG: utag} for i, line in enumerate(lines): if line.strip(): try: key, value = line.split(':') key, value = key.strip(), value.strip() if key == USER_TXT and value: data[key] = [user.strip() for user in value.split(',')] elif key in [ RUN_DESCRIPTION_TXT, EXIT_NOTES_TXT, TDI_STACKS_TXT ]: values = [value] j = i + 1 while j < len(lines) and ':' not in lines[j]: values.append(lines[j].strip()) j += 1 if key == TDI_STACKS_TXT: regex = ARCHIVES_PATH + '/[^/]+' data[key] = re.findall(regex, ''.join(values)) else: data[key] = ', '.join(values) elif value: data[key] = value except: continue report_obj = RunReportWebUI.from_dict(**data) return report_obj.as_dict() except: APP_LOGGER.error("Error raised for report %s: %s" % (report_file, traceback.format_exc())) return None
def update_archives(): ''' Update the database with available primary analysis archives. It is not an error if zero archives are available at this moment. @return True if database is successfully updated, False otherwise ''' APP_LOGGER.info("Updating database with available archives...") exist_archives = _DB_CONNECTOR.distinct(ARCHIVES_COLLECTION, ARCHIVE) if os.path.isdir(ARCHIVES_PATH): # Remove archives named similarly (same name, different capitalization) archives = io_utilities.get_subfolders(ARCHIVES_PATH) # Check yyyy_mm/dd/HHMM_pilotX location run_folders = get_run_folders() for folder in run_folders: archives.extend(io_utilities.get_subfolders(folder)) new_archives = [ x for x in archives if os.path.basename(x) not in exist_archives ] records = [{ ARCHIVE: os.path.basename(archive), ARCHIVE_PATH: remove_disk_directory(archive) } for archive in new_archives] APP_LOGGER.info("Found %d archives" % (len(records))) if len(records) > 0: # There is a possible race condition here. Ideally these operations # would be performed in concert atomically _DB_CONNECTOR.insert(ARCHIVES_COLLECTION, records) else: APP_LOGGER.error( "Couldn't locate archives path '%s', to update database." % ARCHIVES_PATH) return False APP_LOGGER.info("Database successfully updated with available archives.") return True
def handle_request(cls, query_params, path_fields): ''' Example API call: http://<hostname>:<port>/api/v1/MeltingTemperatures/<user>/IDT?name=foo&sequence=bar In the above example, query_params would be {"name": "foo", "sequence": "bar"} and path_fields would be [<user>]. After collecting input parameters, call process_request(). Then return the results in the requested format. ''' (params_dict, _) = cls._parse_query_params(query_params) cls._handle_path_fields(path_fields, params_dict) response = {} http_status_code = None try: response, http_status_code = cls.process_request(params_dict) except: APP_LOGGER.error("Failed to delete records: %s" % traceback.format_exc()) http_status_code = 500 response[ERROR] = str(sys.exc_info()[1]) return (make_clean_response(response, http_status_code), None, None)
def update_run_reports(date_folders=None): ''' Update the database with available run reports. It is not an error if zero reports are available at this moment. @return True if database is successfully updated, False otherwise ''' APP_LOGGER.info("Updating database with available run reports...") # fetch utags from run report collection db_utags = _DB_CONNECTOR.distinct(RUN_REPORT_COLLECTION, UTAG) if os.path.isdir(RUN_REPORT_PATH): if date_folders is None: try: latest_date = _DB_CONNECTOR.find_max(RUN_REPORT_COLLECTION, DATETIME)[DATETIME] except TypeError: latest_date = datetime.now() def valid_date(folder): date_obj = get_date_object(folder) return date_obj >= latest_date - timedelta(days=6) date_folders = [ folder for folder in os.listdir(RUN_REPORT_PATH) if re.match('\d{2}_\d{2}_\d{2}', folder) and valid_date(folder) ] # New file location new_date_folders = get_date_folders() date_folders.extend(f for f in new_date_folders if valid_date(f)) date_folders = [os.path.join(RUN_REPORT_PATH, f) for f in date_folders] date_folders = [f for f in date_folders if os.path.isdir(f)] reports = list() for folder in date_folders: for sf in os.listdir(folder): report_file_path = get_run_info_path(folder, sf) if report_file_path is None: continue date_obj = get_date_object(folder) data_folder = os.path.join(RUN_REPORT_PATH, folder, sf) utag = set_utag(date_obj, sf) if utag not in db_utags: # if not exists, need to insert to collection log_data = read_report_file(report_file_path, date_obj, utag) if log_data is None or all( not log_data[DEVICE_NAME].lower().startswith(x) for x in ['pilot', 'beta']): log_data = {DATETIME: date_obj, UTAG: utag} if IMAGE_STACKS in log_data: # add image stacks to archive collection update_image_stacks(log_data, data_folder) # find HDF5 datasets and add them to HDF5 collection hdf5_datasets = get_hdf5_datasets( log_data, data_folder) log_data[IMAGE_STACKS].extend(hdf5_datasets) # add report direcotry path log_data[DIR_PATH] = remove_disk_directory( os.path.dirname(report_file_path)) reports.append(log_data) else: # if exists, check HDF5 collection for new datasets log_data = _DB_CONNECTOR.find_one(RUN_REPORT_COLLECTION, UTAG, utag) # If previously a run report was not there or had wrong format, # the mongo documents only has three or four fields, _id, datetime, # unique_tag, and maybe dir_path. If this occurs, try reading the # run report again. if not set(log_data.keys()) - set( [ID, DATETIME, UTAG, DIR_PATH]): log_data = read_report_file(report_file_path, date_obj, utag) if log_data is None or all( not log_data[DEVICE_NAME].lower().startswith(x) for x in ['pilot', 'beta']): continue # add report direcotry path log_data[DIR_PATH] = remove_disk_directory( os.path.dirname(report_file_path)) # add image stacks to archive collection update_image_stacks(log_data, data_folder) if IMAGE_STACKS in log_data: # find HDF5 datasets and add new records to HDF5 collection new_datasets = set( get_hdf5_datasets(log_data, data_folder)) if new_datasets: # exclude uploaded HDF5 datasets exist_datasets = set([ d for d in log_data[IMAGE_STACKS] if isinstance(d, str) or isinstance(d, unicode) ]) new_datasets = list(new_datasets - exist_datasets) if new_datasets: _DB_CONNECTOR.update( RUN_REPORT_COLLECTION, {UTAG: utag}, { "$addToSet": { IMAGE_STACKS: { '$each': new_datasets } } }) APP_LOGGER.info( 'Updated run report utag=%s with %d datasets' % (utag, len(new_datasets))) APP_LOGGER.info("Found %d run reports" % (len(reports))) if len(reports) > 0: # There is a possible race condition here. Ideally these operations # would be performed in concert atomically _DB_CONNECTOR.insert(RUN_REPORT_COLLECTION, reports) else: APP_LOGGER.error( "Couldn't locate run report path '%s', to update database." % RUN_REPORT_PATH) return False APP_LOGGER.info( "Database successfully updated with available run reports.") return True
def process_request(cls, params_dict): image_stack_tgz = params_dict[cls._file_param][0] stack_type = params_dict[cls._stack_type_param][0] img_stack_name = params_dict[cls._name_param][0] short_desc = params_dict[cls._short_desc_param][0] http_status_code = 200 uuid = str(uuid4()) tmp_archive_path = os.path.join(TMP_PATH, uuid + '.tar.gz') archive_path = os.path.join(RESULTS_PATH, uuid + '.tar.gz') json_response = { FILENAME: image_stack_tgz.filename, UUID: uuid, DATESTAMP: datetime.today(), } try: # check tar file image_stack_tgz.save(tmp_archive_path) image_stack_tgz.close() tar_error, nimgs = check_mon_tar_structure(tmp_archive_path, stack_type) # check for existing image stacks existing_stacks = cls._DB_CONNECTOR.find(IMAGES_COLLECTION, { NAME: img_stack_name, STACK_TYPE: stack_type }, [NAME]) if existing_stacks: http_status_code = 403 json_response[ERROR] = 'Image stack with given name already ' \ 'exists.' elif tar_error: APP_LOGGER.error(tar_error) http_status_code = 415 json_response[ERROR] = tar_error else: url = 'http://%s/results/%s/%s' % ( HOSTNAME, PORT, os.path.basename(archive_path)) shutil.copy(tmp_archive_path, archive_path) json_response[RESULT] = archive_path json_response[URL] = url json_response[NAME] = img_stack_name json_response[DESCRIPTION] = short_desc json_response[NUM_IMAGES] = nimgs json_response[STACK_TYPE] = stack_type cls._DB_CONNECTOR.insert(IMAGES_COLLECTION, [json_response]) except IOError: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 415 json_response[ERROR] = str(sys.exc_info()[1]) except: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 500 json_response[ERROR] = str(sys.exc_info()[1]) finally: if ID in json_response: del json_response[ID] silently_remove_file(tmp_archive_path) return make_clean_response(json_response, http_status_code)
def update_hdf5s(): APP_LOGGER.info("Updating database with available HDF5 files...") # check if run report path exists if not os.path.isdir(RUN_REPORT_PATH): APP_LOGGER.error( "Couldn't locate run report path '%s', to update database." % RUN_REPORT_PATH) return False # find new hdf5 files, using nested listdirs, way faster than glob, os.walk, or scandir # only search two subdirectories within the run report folder # assumes each the hdf5 file is in a subfolder in the run report folder database_paths = set( _DB_CONNECTOR.distinct_sorted(HDF5_COLLECTION, HDF5_PATH)) current_paths = set() for par_ in os.listdir(RUN_REPORT_PATH): report_dir = os.path.join(RUN_REPORT_PATH, par_) if os.path.isdir(report_dir): for sub_ in os.listdir(report_dir): subdir = os.path.join(report_dir, sub_) if os.path.isdir(subdir): hdf5s = [ f for f in os.listdir(subdir) if os.path.splitext(f)[-1] in VALID_HDF5_EXTENSIONS ] hdf5_paths = [os.path.join(subdir, f) for f in hdf5s] current_paths.update(hdf5_paths) # Check yyyy_mm/dd/HHMM_pilotX location run_folders = get_run_folders() for folder in run_folders: hdf5s = [ f for f in os.listdir(folder) if os.path.splitext(f)[-1] in VALID_HDF5_EXTENSIONS ] hdf5_paths = [os.path.join(folder, f) for f in hdf5s] current_paths.update(hdf5_paths) # update database with any new files new_hdf5_paths = current_paths - database_paths new_records = list() for hdf5_path in new_hdf5_paths: try: with h5py.File(hdf5_path) as h5_file: dataset_names = h5_file.keys() for dsname in dataset_names: if any( re.match(pat, dsname) for pat in [ r'^\d{4}-\d{2}-\d{2}_\d{4}\.\d{2}', r'^Pilot\d+_\d{4}-\d{2}-\d{2}_\d{4}\.\d{2}' ]): new_records.append({ HDF5_PATH: remove_disk_directory(hdf5_path), HDF5_DATASET: dsname, }) except: APP_LOGGER.exception( 'Unable to get dataset information from HDF5 file: %s' % hdf5_path) if new_records: # There is a possible race condition here. Ideally these operations # would be performed in concert atomically _DB_CONNECTOR.insert(HDF5_COLLECTION, new_records) APP_LOGGER.info('Updated database with %s new HDF5 files' % len(new_records)) else: APP_LOGGER.info('Unable to find any new HDF5 files') return True
def process_request(cls, params_dict): users = params_dict[cls._users_param] date = params_dict[cls._date_param][0] archive_name = params_dict[cls._archive_param][0] beta = params_dict[cls._beta_param][0] device = params_dict[cls._device_param][0] dye_prof_metrics = params_dict[cls._dye_profile_metrics_param] surfactant = params_dict[cls._surfactant_param][0] json_response = {} # Ensure archive directory is valid try: archives = get_archive_dirs(archive_name) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) return make_clean_response(json_response, 500) # Ensure only one valid archive is found if len(archives) != 1: APP_LOGGER.warning("Expected 1 archive, found %d" % len(archives)) return make_clean_response(json_response, 404) response = { USERS: users, DATE: date, ARCHIVE: archives[0], BETA: beta, DEVICE: device, DYE_PROFILE_METRICS: dye_prof_metrics, SURFACTANT: surfactant, STATUS: JOB_STATUS.submitted, # @UndefinedVariable JOB_TYPE_NAME: JOB_TYPE.dye_profile_images, # @UndefinedVariable SUBMIT_DATESTAMP: datetime.today(), } status_code = 200 try: # # Create helper functions # callable = PaProcessCallable(archive, dyes, device, # major, minor, # offset, use_iid, # outfile_path, # config_path, # response[UUID], # cls._DB_CONNECTOR) # callback = make_process_callback(response[UUID], # outfile_path, # config_path, # cls._DB_CONNECTOR) # # # Add to queue and update DB # cls._DB_CONNECTOR.insert(PA_PROCESS_COLLECTION, [response]) # cls._EXECUTION_MANAGER.add_job(response[UUID], # abs_callable, callback) except: APP_LOGGER.exception(traceback.format_exc()) response[ERROR] = str(sys.exc_info()[1]) status_code = 500 finally: if ID in response: del response[ID] http_status_code = 200 uuid = str(uuid4()) tmp_archive_path = os.path.join(TMP_PATH, uuid + ".tar.gz") archive_path = os.path.join(RESULTS_PATH, uuid + ".tar.gz") json_response = { FILENAME: image_stack_tgz.filename, UUID: uuid, DATESTAMP: datetime.today(), } try: # check tar file image_stack_tgz.save(tmp_archive_path) image_stack_tgz.close() tar_error, nimgs = check_ham_tar_structure(tmp_archive_path, HAM) # check for existing image stacks existing_stacks = cls._DB_CONNECTOR.find(IMAGES_COLLECTION, {NAME: img_stack_name, STACK_TYPE: HAM}, [NAME]) # check for exp def exp_defs = ExperimentDefinitions() exp_def_uuid = exp_defs.get_experiment_uuid(exp_def_name) if existing_stacks: http_status_code = 403 json_response[ERROR] = "Image stack with given name already " \ "exists." elif not exp_def_uuid: http_status_code = 404 json_response[ERROR] = "Couldn't locate UUID for " \ "experiment definition." elif tar_error: APP_LOGGER.error(tar_error) http_status_code = 415 json_response[ERROR] = tar_error else: url = "http://%s/results/%s/%s" % (HOSTNAME, PORT, os.path.basename(archive_path)) shutil.copy(tmp_archive_path, archive_path) json_response[RESULT] = archive_path json_response[URL] = url json_response[NAME] = img_stack_name json_response[DESCRIPTION] = short_desc json_response[EXP_DEF_NAME] = exp_def_name json_response[EXP_DEF_UUID] = exp_def_uuid json_response[NUM_IMAGES] = nimgs json_response[STACK_TYPE] = HAM cls._DB_CONNECTOR.insert(IMAGES_COLLECTION, [json_response]) except IOError: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 415 json_response[ERROR] = str(sys.exc_info()[1]) except: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 500 json_response[ERROR] = str(sys.exc_info()[1]) finally: if ID in json_response: del json_response[ID] silently_remove_file(tmp_archive_path) return make_clean_response(json_response, http_status_code)
def process_request(cls, params_dict): image_stack_tgz = params_dict[cls._file_param][0] exp_def_name = params_dict[cls._exp_defs_param][0] img_stack_name = params_dict[cls._name_param][0] short_desc = params_dict[cls._short_desc_param][0] http_status_code = 200 uuid = str(uuid4()) tmp_archive_path = os.path.join(TMP_PATH, uuid + ".tar.gz") archive_path = os.path.join(RESULTS_PATH, uuid + ".tar.gz") json_response = { FILENAME: image_stack_tgz.filename, UUID: uuid, DATESTAMP: datetime.today(), } try: # check tar file image_stack_tgz.save(tmp_archive_path) image_stack_tgz.close() tar_error, nimgs = check_ham_tar_structure(tmp_archive_path, HAM) # check for existing image stacks existing_stacks = cls._DB_CONNECTOR.find(IMAGES_COLLECTION, { NAME: img_stack_name, STACK_TYPE: HAM }, [NAME]) # check for exp def exp_def_fetcher = ExpDefHandler() exp_def_uuid = exp_def_fetcher.get_experiment_uuid(exp_def_name) if existing_stacks: http_status_code = 403 json_response[ERROR] = "Image stack with given name already " \ "exists." elif not exp_def_uuid: http_status_code = 404 json_response[ERROR] = "Couldn't locate UUID for " \ "experiment definition." elif tar_error: APP_LOGGER.error(tar_error) http_status_code = 415 json_response[ERROR] = tar_error else: url = "http://%s/results/%s/%s" % ( HOSTNAME, PORT, os.path.basename(archive_path)) shutil.copy(tmp_archive_path, archive_path) json_response[RESULT] = archive_path json_response[URL] = url json_response[NAME] = img_stack_name json_response[DESCRIPTION] = short_desc json_response[EXP_DEF_NAME] = exp_def_name json_response[EXP_DEF_UUID] = exp_def_uuid json_response[NUM_IMAGES] = nimgs json_response[STACK_TYPE] = HAM cls._DB_CONNECTOR.insert(IMAGES_COLLECTION, [json_response]) except IOError: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 415 json_response[ERROR] = str(sys.exc_info()[1]) except: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 500 json_response[ERROR] = str(sys.exc_info()[1]) finally: if ID in json_response: del json_response[ID] silently_remove_file(tmp_archive_path) return make_clean_response(json_response, http_status_code)