def lasif_process_data(parser, args): """ Launch data processing. This function works with MPI. Don't use too many cores, I/O quickly becomes the limiting factor. It also works without MPI but then only one core actually does any work. """ parser.add_argument( "events", help="One or more events. If none given, all will be done.", nargs="*", ) parser.add_argument( "--iteration", help="Take all events used in " "iteration", default=None, ) args = parser.parse_args(args) api.process_data( lasif_root=".", events=args.events if args.events else None, iteration=args.iteration, )
def process_data(self, event: str): """ Process the data for the periods specified in Lasif project. :param event: Name of event to be processed :type event: str """ if self._already_processed(event): return lapi.process_data(self.lasif_comm, events=[event])
def process_data(self, event: str): """ Process the data for the periods specified in Lasif project. :param event: Name of event to be processed :type event: str """ if self._already_processed(event): return if self.comm.project.remote_data_processing: # Get local proc filename lasif_root = self.comm.project.lasif_root proc_filename = ( f"preprocessed_{int(self.comm.project.min_period)}s_" f"to_{int(self.comm.project.max_period)}s.h5") local_proc_folder = os.path.join(lasif_root, "PROCESSED_DATA", "EARTHQUAKES", event) local_proc_file = os.path.join(local_proc_folder, proc_filename) if not os.path.exists(local_proc_folder): os.mkdir(local_proc_folder) remote_proc_file_name = f"{event}_{proc_filename}" hpc_cluster = get_site(self.comm.project.site_name) remote_processed_dir = os.path.join( self.comm.project.remote_inversionson_dir, "PROCESSED_DATA") remote_proc_path = os.path.join(remote_processed_dir, remote_proc_file_name) tmp_local_path = local_proc_file + "_tmp" if hpc_cluster.remote_exists(remote_proc_path): hpc_cluster.remote_get(remote_proc_path, tmp_local_path) os.rename(tmp_local_path, local_proc_file) return # Return if it got it and got it there. lapi.process_data(self.lasif_comm, events=[event])
def process_random_unprocessed_event(self) -> bool: """ Instead of sleeping when we queue for the HPC, we can also process a random unprocessed event. That is what this function does. it first tries to process a high priority event, from the validation dataset or the current iteration, otherwise it tries to process any other event that may be used in the future. Leaves the function as soon as one event was processed or if there was nothing to process. :return: Returns True if an event was processed, otherwise False :rtype: bool """ events_in_iteration = self.comm.project.events_in_iteration events = self.comm.lasif.list_events() validation_events = self.comm.project.validation_dataset msg = (f"Seems like there is nothing to do now. " f"I might as well process some random event.") if not self.everything_processed: self.everything_processed = True # First give the most urgent events a try. if not self.validation_data_processed: self.validation_data_processed = True for event in validation_events: if self._already_processed(event): continue else: self.print(msg) self.print(f"Processing validation {event}...") self.validation_data_processed = False lapi.process_data(self.lasif_comm, events=[event]) return True for event in events_in_iteration: if self._already_processed(event): continue else: self.print(msg) self.print(f"Processing {event} from current iteration...") self.everything_processed = False lapi.process_data(self.lasif_comm, events=[event]) return True for event in events: if self._already_processed(event): continue else: self.print(msg) self.print(f"Processing random other {event}...") self.everything_processed = False lapi.process_data(self.lasif_comm, events=[event]) return True return False