def preprocess_from_files(self,shot_files,use_shots): #all shots, including invalid ones all_signals = self.conf['paths']['all_signals'] shot_list = ShotList() shot_list.load_from_shot_list_files_objects(shot_files,all_signals) shot_list_picked = shot_list.random_sublist(use_shots) #empty used_shots = ShotList() use_cores = max(1,mp.cpu_count()-2) pool = mp.Pool(use_cores) print('running in parallel on {} processes'.format(pool._processes)) start_time = time.time() for (i,shot) in enumerate(pool.imap_unordered(self.preprocess_single_file,shot_list_picked)): #for (i,shot) in enumerate(map(self.preprocess_single_file,shot_list_picked)): sys.stdout.write('\r{}/{}'.format(i,len(shot_list_picked))) used_shots.append_if_valid(shot) pool.close() pool.join() print('Finished Preprocessing {} files in {} seconds'.format(len(shot_list_picked),time.time()-start_time)) print('Omitted {} shots of {} total.'.format(len(shot_list_picked) - len(used_shots),len(shot_list_picked))) print('{}/{} disruptive shots'.format(used_shots.num_disruptive(),len(used_shots))) if len(used_shots) == 0: print("WARNING: All shots were omitted, please ensure raw data is complete and available at {}.".format(self.conf['paths']['signal_prepath'])) return used_shots
def preprocess_from_files(self, shot_files, use_shots): # all shots, including invalid ones all_signals = self.conf['paths']['all_signals'] shot_list = ShotList() shot_list.load_from_shot_list_files_objects(shot_files, all_signals) shot_list_picked = shot_list.random_sublist(use_shots) # empty used_shots = ShotList() # TODO(KGF): generalize the follwowing line to perform well on # architecutres other than CPUs, e.g. KNLs # min( <desired-maximum-process-count>, max(1,mp.cpu_count()-2) ) use_cores = max(1, mp.cpu_count() - 2) pool = mp.Pool(use_cores) print('Running in parallel on {} processes'.format(pool._processes)) start_time = time.time() for (i, shot) in enumerate( pool.imap_unordered(self.preprocess_single_file, shot_list_picked)): # for (i,shot) in # enumerate(map(self.preprocess_single_file,shot_list_picked)): sys.stdout.write('\r{}/{}'.format(i, len(shot_list_picked))) used_shots.append_if_valid(shot) pool.close() pool.join() print('\nFinished preprocessing {} files in {} seconds'.format( len(shot_list_picked), time.time() - start_time)) print('Using {} shots ({} disruptive shots)'.format( len(used_shots), used_shots.num_disruptive())) print('Omitted {} shots of {} total shots'.format( len(shot_list_picked) - len(used_shots), len(shot_list_picked))) print( 'Omitted {} disruptive shots of {} total disruptive shots'.format( shot_list_picked.num_disruptive() - used_shots.num_disruptive(), shot_list_picked.num_disruptive())) if len(used_shots) == 0: print("WARNING: All shots were omitted, please ensure raw data " " is complete and available at {}.".format( self.conf['paths']['signal_prepath'])) return used_shots