def _load_mapfiles(self): """ Load data map file, instrument map file, and sky map file. Update the 'skip' fields in these map files: if 'skip' is True in any of the maps, then 'skip' must be set to True in all maps. """ self.logger.debug("Loading map files:" "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" % ( self.inputs['args'][0], self.inputs['instrument_mapfile'], self.inputs['sky_mapfile'] ) ) self.data_map = DataMap.load(self.inputs['args'][0]) self.inst_map = DataMap.load(self.inputs['instrument_mapfile']) self.sky_map = DataMap.load(self.inputs['sky_mapfile']) if not validate_data_maps(self.data_map, self.inst_map, self.sky_map): self.logger.error("Validation of input data mapfiles failed") return False # Update the skip fields of the three maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for x, y, z in zip(self.data_map, self.inst_map, self.sky_map): x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip) return True
def plugin_main(args, **kwargs): """ Matchs the hosts in one datamap with those in another Parameters ---------- mapfile_in : str, optional Filename of datamap to adjust mapfile_to_match : str, optional Filename of datamap to match """ mapfile_in = kwargs['mapfile_in'] mapfile_to_match = kwargs['mapfile_to_match'] map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_to_match = DataMap.load(mapfile_to_match) map_to_match.iterator = DataMap.SkipIterator hosts_to_match = [] for item in map_to_match: hosts_to_match.append(item.host) for item, host in zip(map_in, hosts_to_match): item.host = host map_in.save(mapfile_in)
def go(self): super(imager_create_dbs, self).go() # get assoc_theta, convert from empty string if needed assoc_theta = self.inputs["assoc_theta"] if assoc_theta == "": assoc_theta = None # Load mapfile data from files self.logger.info(self.inputs["slice_paths_mapfile"]) slice_paths_map = MultiDataMap.load(self.inputs["slice_paths_mapfile"]) input_map = DataMap.load(self.inputs['args'][0]) source_list_map = DataMap.load(self.inputs['source_list_map_path']) if self._validate_input_data(input_map, slice_paths_map): return 1 # Run the nodes with now collected inputs jobs, output_map = self._run_create_dbs_node( input_map, slice_paths_map, assoc_theta, source_list_map) # Collect the output of the node scripts write to (map) files return self._collect_and_assign_outputs(jobs, output_map, slice_paths_map)
def _load_mapfiles(self): """ Load data map file, instrument map file, and sky map file. Update the 'skip' fields in these map files: if 'skip' is True in any of the maps, then 'skip' must be set to True in all maps. """ self.logger.debug( "Loading map files:" "\n\tdata map: %s\n\tinstrument map: %s\n\tsky map: %s" % (self.inputs['args'][0], self.inputs['instrument_mapfile'], self.inputs['sky_mapfile'])) self.data_map = DataMap.load(self.inputs['args'][0]) self.inst_map = DataMap.load(self.inputs['instrument_mapfile']) self.sky_map = DataMap.load(self.inputs['sky_mapfile']) if not validate_data_maps(self.data_map, self.inst_map, self.sky_map): self.logger.error("Validation of input data mapfiles failed") return False # Update the skip fields of the three maps. If 'skip' is True in any of # these maps, then 'skip' must be set to True in all maps. for x, y, z in zip(self.data_map, self.inst_map, self.sky_map): x.skip = y.skip = z.skip = (x.skip or y.skip or z.skip) return True
def plugin_main(args, **kwargs): """ Takes in list of targets and an h5parm solution set and returns a list of stations in the target data which mismatch the calibrator solutions antenna table Parameters ---------- mapfile_in : str Mapfile for input measurement sets h5parmdb: str Location of the solution h5parm set solset_name: str Name of the solution set of the corresponding h5parm set to compare with filter: str Default filter constrains for the ndppp_prep_target step (usually removing International Baselines) Returns ------- result : dict Output station names to filter """ mapfile_in = kwargs['mapfile_in'] h5parmdb = kwargs['h5parmdb'] solset_name = kwargs['solset_name'] filter = kwargs['filter'] data = DataMap.load(mapfile_in) mslist = [data[i].file for i in xrange(len(data))] #mslist = MSfiles.lstrip('[').rstrip(']').replace(' ','').replace("'","").split(',') if len(mslist) == 0: raise ValueError( "Did not find any existing directory in input MS list!") pass else: MS = mslist[0] pass ## reading ANTENNA table of MS antennaFile = MS + "/ANTENNA" logging.info('Collecting information from the ANTENNA table.') antennaTable = pt.table(antennaFile, ack=False) antennaNames = antennaTable.getcol('NAME') ## reading ANTENNA information of h5parm data = h5parm(h5parmdb, readonly=True) solset = data.getSolset(solset_name) station_names = solset.getAnt().keys() ## check whether there are more stations in the target than in the calibrator solutions missing_stations = list(set(antennaNames) - set(station_names)) for missing_station in missing_stations: filter += ';!' + missing_station + '*' pass ## return results result = {'filter': str(filter)} return result pass
def plugin_main(args, **kwargs): """ Prunes entries from a mapfile Parameters ---------- mapfile_in : str Filename of datamap to trim prune_str : str Entries starting with this string will be removed. Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] prune_str = kwargs['prune_str'].lower() mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] prunelen = len(prune_str) map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): if item.file[:prunelen].lower() != prune_str: map_out.data.append(DataProduct(item.host, item.file, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Takes in list of targets and an h5parm solution set and returns a list of stations in the target data which mismatch the calibrator solutions antenna table Parameters ---------- mapfile_in : str Mapfile for input measurement sets filter: str Default filter constrains for the ndppp_prep_target step (usually removing International Baselines) Returns ------- result : dict Output station names to filter """ mapfile_in = kwargs['mapfile_in'] data = DataMap.load(mapfile_in) mslist = [data[i].file for i in xrange(len(data))] msfile = mslist[0] observationTable = pyrap.tables.table(msfile + '::OBSERVATION') targetName = observationTable.getcol('LOFAR_TARGET')['array'][0] ## return results result = {'targetName': targetName} return result pass
def plugin_main(args, **kwargs): """ Appends a string to filenames in a mapfile Parameters ---------- mapfile_in : str Filename of datamap to append to append : str String to append append_index : bool If True, append a unique index to each file mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] if 'append_index' in kwargs: append_index = kwargs['append_index'] if type(append_index) is str: if append_index.lower() == 'true': append_index = True else: append_index = False else: append_index = False append_str = kwargs['append'] if append_str == 'None': append_str = '' mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): if append_index: map_out.data.append( DataProduct(item.host, item.file + append_str + '_{}'.format(i), item.skip)) else: map_out.data.append( DataProduct(item.host, item.file + append_str, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Copies each entry of mapfile_in as often as the the length of the corresponding group into a new mapfile Parameters ---------- mapfile_in : str Name of the input mapfile to be expanded. (E.g. with the skymodels for the different groups.) mapfile_groups : str Name of the multi-mapfile with the given groups. Number of groups need to be the same as the number of files in mapfile_in. mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile ignore_dummies: str (optional) If true, do not count dummy entries when expanding Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] try: ignore_dummies = str(kwargs['ignore_dummies']) # if the user has defined a dummy preference, follow it, otherwise count dummies as usual ignore_dummies = ignore_dummies in ['true', 'True', '1', 'T', 't'] except: ignore_dummies = False inmap = DataMap.load(kwargs['mapfile_in']) groupmap = MultiDataMap.load(kwargs['mapfile_groups']) if len(inmap) != len(groupmap): raise ValueError('PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'.format(kwargs['mapfile_in'],kwargs['mapfile_groups'])) map_out = DataMap([]) inindex = 0 if ignore_dummies: for groupID in xrange(len(groupmap)): for fileID in xrange(len(groupmap[groupID].file)): if (groupmap[groupID].file)[fileID] != 'dummy_entry': map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) )) else: for groupID in xrange(len(groupmap)): for fileID in xrange(len(groupmap[groupID].file)): map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) )) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): fileid = kwargs['mapfile_in'] datamap = DataMap.load(fileid) hdf5File = os.path.join(kwargs['hdf5_dir'],kwargs['hdf5file']) if kwargs.has_key('instrument'): instrument = kwargs['instrument'] else: instrument = '/instrument' if kwargs.has_key('compression'): compression = int(kwargs['compression']) else: compression = 5 if kwargs.has_key('solset'): solsetName = kwargs['solset'] else: solsetName = None # Check is all the necessary files are available antennaFile = os.path.join(datamap[0].file,'ANTENNA') if not os.path.isdir(antennaFile): logging.critical('Missing ANTENNA table.') sys.exit(1) fieldFile = os.path.join(datamap[0].file,'FIELD') if not os.path.isdir(fieldFile): logging.critical('Missing FIELD table.') sys.exit(1) skydbFile = os.path.join(datamap[0].file,'sky') if not os.path.isdir(skydbFile): logging.critical('Missing sky table.') sys.exit(1) #generate list of parmDB-filenames parmDBnames = [ MS.file+instrument for MS in datamap ] #create and fill the hdf5-file: solset = parmDBs2h5parm(hdf5File, parmDBnames, antennaFile, fieldFile, skydbFile, compression=compression, solsetName=solsetName) # Add CREATE entry to history h5parmDB = h5parm(hdf5File, readonly = False) soltabs = h5parmDB.getSoltabs(solset=solset) for st in soltabs: sw = solWriter(soltabs[st]) sw.addHistory('CREATE (by PipelineStep_losotoImporter from %s / %s - %s)' % (os.path.abspath(''), os.path.basename(parmDBnames[0]), os.path.basename(parmDBnames[-1]) ) ) h5parmDB.close() #generate mapfile and wrap up mapfileentry = {} mapfileentry['host'] = 'localhost' mapfileentry['file'] = hdf5File mapfileentry['skip'] = False outfileid = os.path.join(kwargs['mapfile_dir'], kwargs['filename']) outmap = open(outfileid, 'w') outmap.write(repr([mapfileentry])) outmap.close() result = {} result['mapfile'] = outfileid return result
def plugin_main(args, **kwargs): """ Checks a "check" mapfile for values of 'None' and, if found, changes the input mapfile "file" to "empty". Note: the check and input mapfiles must have the same length Parameters ---------- mapfile_in : str Name of the input mapfile from which to select files. mapfile_check : str Name of the mapfile to check for None mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) checkmap = DataMap.load(kwargs['mapfile_check']) if len(inmap) != len(checkmap): raise ValueError('Input and check mapfiles must have the same length') map_out = DataMap([]) for checkitem, item in zip(checkmap, inmap): if checkitem.file.lower() == 'none': map_out.data.append(DataProduct(item.host, 'empty', item.skip)) else: map_out.append(item) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def finalize(self): """ Finalize this operation """ # Add output datamaps to direction object for later reference self.direction.input_files_single_mapfile = os.path.join( self.pipeline_mapfile_dir, 'input_files_single.mapfile') self.direction.verify_subtract_mapfile = os.path.join( self.pipeline_mapfile_dir, 'verify_subtract.break.mapfile') self.direction.dir_dep_parmdb_mapfile = os.path.join( self.pipeline_mapfile_dir, 'merge_normalized_selfcal_parmdbs.mapfile') self.direction.converted_parmdb_mapfile = os.path.join( self.pipeline_mapfile_dir, 'convert_normalized_merged_selfcal_parmdbs.mapfile') self.direction.dir_indep_skymodels_mapfile = os.path.join( self.pipeline_mapfile_dir, 'full_skymodels.mapfile') self.direction.selfcal_plots_mapfile = os.path.join( self.pipeline_mapfile_dir, 'make_selfcal_plots.mapfile') if self.direction.create_preapply_h5parm: self.direction.preapply_parmdb_mapfile = os.path.join( self.pipeline_mapfile_dir, 'create_preapply_h5parm.mapfile') self.direction.sourcedb_new_facet_sources = os.path.join( self.pipeline_mapfile_dir, 'make_sourcedb_new_facet_sources_for_facet_imaging.mapfile') self.direction.diff_models_field_mapfile = os.path.join( self.pipeline_mapfile_dir, 'predict_and_difference_models.mapfile') # Store results of verify_subtract check. This will work if the verification # was done using multiple bands although we use only one at the moment if (os.path.exists(self.direction.verify_subtract_mapfile) and not self.parset['calibration_specific']['skip_selfcal_check']): ok_mapfile = DataMap.load(self.direction.verify_subtract_mapfile) ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile] if all(ok_flags): self.direction.selfcal_ok = True else: self.direction.selfcal_ok = False elif self.parset['calibration_specific']['skip_selfcal_check']: self.direction.selfcal_ok = True else: self.direction.selfcal_ok = False # Delete temp data self.direction.cleanup_mapfiles = [ os.path.join(self.pipeline_mapfile_dir, 'shift_cal.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat_data.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'apply_dir_dep.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'average_pre.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'average_post.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'sorted_groups.mapfile_groups') ] self.log.debug('Cleaning up files (direction: {})'.format( self.direction.name)) self.direction.cleanup() self.cleanup()
def plugin_main(args, **kwargs): """ Selects those files from mapfile_in that have the same filename-base as the one in mapfile_reference. Parameters ---------- mapfile_in : str Name of the input mapfile from which to select files. mapfile_reference : str Name of the reference mapfile mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) refmap = DataMap.load(kwargs['mapfile_reference']) map_out = DataMap([]) basenames = [ os.path.splitext(os.path.basename(item.file))[0] for item in inmap ] for refitem in refmap: refbase = os.path.splitext(os.path.basename(refitem.file))[0] idx = basenames.index(refbase) map_out.append(inmap[idx]) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Appends a string to filenames in a mapfile Parameters ---------- mapfile_in : str Filename of datamap to append to append : str String to append append_index : bool If True, append a unique index to each file mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] if 'append_index' in kwargs: append_index = kwargs['append_index'] if type(append_index) is str: if append_index.lower() == 'true': append_index = True else: append_index = False else: append_index = False append_str = kwargs['append'] if append_str == 'None': append_str = '' mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): if append_index: map_out.data.append(DataProduct(item.host, item.file+append_str+'_{}'.format(i), item.skip)) else: map_out.data.append(DataProduct(item.host, item.file+append_str, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile for list of files Parameters ---------- files : list or str List of files or mapfile with such a list as the only entry. May be given as a list of strings or as a string (e.g., '[s1.skymodel, s2.skymodel]' hosts : list or str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ if type(kwargs['files']) is str: try: # Check if input is mapfile containing list as a string map_in = DataMap.load(kwargs['files']) in_files = [item.file for item in map_in] files = [] for f in in_files: files += f.strip('[]').split(',') except: files = kwargs['files'] files = files.strip('[]').split(',') files = [f.strip() for f in files] if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] for i in range(len(files)-len(hosts)): hosts.append(hosts[i]) map_out = DataMap([]) for h, f in zip(hosts, files): map_out.data.append(DataProduct(h, f, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): mapfile_in = kwargs['mapfile_in'] station_filter = kwargs['station_filter'] data = DataMap.load(mapfile_in) mslist = [data[i].file for i in range(len(data))] ## derive the fraction of flagged data of the entire observation print('Reading data.') logging.info('Reading data.') pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) flagged_fraction_dict = pool.map(find_flagged_fraction, mslist) print('Apply station filter ' + str(station_filter)) logging.info('Apply station filter ' + str(station_filter)) flagged_fraction_data = {} for entry in flagged_fraction_dict: antennas = entry.keys() selected_stations = [ station_name for station_name in antennas if re.match(station_filter, station_name) ] if len(selected_stations) == 0: logging.error('No stations left after filtering.') return (1) for antenna in selected_stations: try: flagged_fraction_data[antenna].append(float(entry[antenna])) except KeyError: flagged_fraction_data[antenna] = [float(entry[antenna])] flagged_fraction_list = [] sorted_stations = sorted(flagged_fraction_data.keys()) for antenna in sorted_stations: flagged_fraction = sum(flagged_fraction_data[antenna]) / len( flagged_fraction_data[antenna]) flagged_fraction_list.append(flagged_fraction) try: flagged_fraction_data[flagged_fraction].append(antenna) except KeyError: flagged_fraction_data[flagged_fraction] = [antenna] min_flagged_fraction = min(flagged_fraction_list) refant = flagged_fraction_data[min_flagged_fraction][0] logging.info('Selected station ' + str(refant) + ' as reference antenna. Fraction of flagged data is ' + '{:>3}'.format('{:.1f}'.format(min_flagged_fraction) + '%')) print('Selected station ' + str(refant) + ' as reference antenna. Fraction of flagged data is ' + '{:>3}'.format('{:.1f}'.format(min_flagged_fraction) + '%')) ## return results result = {'refant': str(refant)} return (result)
def go(self): # TODO: Remove dependency on mapfile_dir self.logger.info("Starting copier run") super(copier, self).go() # Load data from mapfiles self.source_map = DataMap.load(self.inputs['mapfile_source']) self.target_map = DataMap.load(self.inputs['mapfile_target']) # validate data in mapfiles if not self._validate_mapfiles(self.inputs['allow_rename']): return 1 # Run the compute nodes with the node specific mapfiles for source, target in zip(self.source_map, self.target_map): args = [source.host, source.file, target.file] self.append_job(target.host, args) # start the jobs, return the exit status. return self.run_jobs()
def _create_mapfile_ato(inmap): maps = DataMap([]) mapsin = DataMap.load(inmap) mapsin.iterator = DataMap.SkipIterator newlist = '' for i, item in enumerate(mapsin): newlist = newlist + item.file + ',' newlist = newlist.rstrip(',') newlist = '[' + newlist + ']' maps.data.append(DataProduct('localhost', newlist, False)) return maps
def plugin_main(args, **kwargs): """ Makes a mapfile by compressing input mapfile items into one item Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile list_format : bool, optional If True, the compreseed item will use a Python list format (e.g., '[file1, file2, ...]'. If False, it will be a space-separated list (e.g., 'file1 file2 ...' Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'list_format' in kwargs: list_format = kwargs['list_format'] else: list_format = True if type(list_format) is str: if list_format.lower() == 'true': list_format = True else: list_format = False map_in = DataMap.load(mapfile_in) map_out = DataMap([]) map_in.iterator = DataMap.SkipIterator file_list = [item.file for item in map_in] if list_format: newlist = '[{0}]'.format(','.join(file_list)) else: newlist = '{0}'.format(' '.join(file_list)) # Just assign host of first file to compressed file hosts = [item.host for item in map_in] map_out.data.append(DataProduct(hosts[0], newlist, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile for list of files Parameters ---------- files : list or str List of files or mapfile with such a list as the only entry. May be given as a list of strings or as a string (e.g., '[s1.skymodel, s2.skymodel]' hosts : list or str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ if type(kwargs['files']) is str: try: # Check if input is mapfile containing list as a string map_in = DataMap.load(kwargs['files']) in_files = [item.file for item in map_in] files = [] for f in in_files: files += f.strip('[]').split(',') except: files = kwargs['files'] files = files.strip('[]').split(',') files = [f.strip() for f in files] if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] for i in range(len(files) - len(hosts)): hosts.append(hosts[i]) map_out = DataMap([]) for h, f in zip(hosts, files): map_out.data.append(DataProduct(h, f, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def update_state(dir_input): """ Updates the paths in mapfiles or state files Parameters ---------- dir_input : str Directory containing files to update """ file_list = glob.glob(os.path.join(dir_input, '*')) if dir_input.endswith('mapfiles'): # Assume path is a pipeline mapfiles directory. In this case, we can # simply substitute the new working_dir for the old one in each of the # mapfiles working_dir = dir_input.split('results/')[0] for f in file_list: map = DataMap.load(f) for item in map: if '/' in item.file: old_working_dir = item.file.split('results/')[0] item.file = item.file.replace(old_working_dir, working_dir) map.save(f) elif dir_input.endswith('state'): # Assume path is the Factor state directory. In this case, we can try to # load files as pickled state files and look for paths inside. If found, # substitute new working_dir for the old one working_dir = os.path.dirname(dir_input) for f in file_list: try: with open(f, "rb") as fp: d = pickle.load(fp) for k, v in d.iteritems(): if type(v) is str: if k == 'working_dir': d[k] = working_dir if '/' in v: for infix in ['results/', 'state/', 'chunks/']: parts = v.split(infix) if len(parts) > 1: d[k] = os.path.join(working_dir, infix, parts[-1]) elif type(v) is list: for i, l in enumerate(v): if '/' in l: for infix in ['results/', 'state/', 'chunks/']: parts = l.split(infix) if len(parts) > 1: v[i] = os.path.join(working_dir, infix, parts[-1]) d[k] = v with open(f, "w") as fp: pickle.dump(d, fp) except: pass
def plugin_main(args, **kwargs): """ Selects those files from mapfile_in that have the same filename-base as the one in mapfile_reference. Parameters ---------- mapfile_in : str Name of the input mapfile from which to select files. mapfile_reference : str Name of the reference mapfile mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) refmap = DataMap.load(kwargs['mapfile_reference']) map_out = DataMap([]) basenames = [ os.path.splitext(os.path.basename(item.file))[0] for item in inmap] for refitem in refmap: refbase = os.path.splitext(os.path.basename(refitem.file))[0] idx = basenames.index(refbase) map_out.append(inmap[idx]) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Trims a string from filenames in a mapfile Note that everything from the last instance of the matching string to the end is trimmed. Parameters ---------- mapfile_in : str Filename of datamap to trim trim_str : str String to remove mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile counter : int If counter is greater than 0, replace "image32" with "image42". This is a special argument for facetselfcal looping only Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] trim_str = kwargs['trim'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'counter' in kwargs: counter = int(kwargs['counter']) else: counter = 0 map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): index = item.file.rfind(trim_str) if index >= 0: item_trim = item.file[:index] if counter > 0: item_trim = item_trim.replace('image32', 'image42') map_out.data.append(DataProduct(item.host, item_trim, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Takes in mapfile_in, containing many files, and returns only one Parameters ---------- mapfile_in : str Parmdbs containing phase solutions mapfile_dir : str mapfile directory filename : str output filename mapfile_comp : str target MSs Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] mapfile_in = kwargs['mapfile_in'] mapfile_comp = kwargs['mapfile_comp'] filename = kwargs['filename'] value = DataMap.load(mapfile_in)[ 0] # this the the single mapfile to be expanded n = len(DataMap.load(mapfile_comp)) # these are actual MS files map_out = DataMap([]) for i in range(n): map_out.data.append(DataProduct(value.host, value.file, value.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile by expanding single input mapfile item into many items Parameters ---------- mapfile_in : str Filename of datamap containing single item mapfile_to_match : str Filename of datamap containing multiple items mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_to_match = kwargs['mapfile_to_match'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_in = DataMap.load(mapfile_in) map_match = DataMap.load(mapfile_to_match) map_out = DataMap([]) map_match.iterator = DataMap.SkipIterator for item in map_match: map_out.data.append(DataProduct(item.host, map_in[0].file, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Takes in mapfile_in, containing many files, and returns only one Parameters ---------- mapfile_in : str Parmdbs containing phase solutions mapfile_dir : str mapfile directory filename : str output filename mapfile_comp : str target MSs Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] mapfile_in = kwargs['mapfile_in'] mapfile_comp = kwargs['mapfile_comp'] filename = kwargs['filename'] value = DataMap.load(mapfile_in)[0] # this the the single mapfile to be expanded n = len(DataMap.load(mapfile_comp)) # these are actual MS files map_out = DataMap([]) for i in range(n): map_out.data.append(DataProduct(value.host,value.file, value.skip )) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _bbs(self, timeslice_map_path, parmdbs_map_path, sourcedb_map_path, skip=False): """ Perform a calibration step. First with a set of sources from the gsm and in later iterations also on the found sources """ # create parset for bbs run parset = self.parset.makeSubset("BBS.") parset_path = self._write_parset_to_file( parset, "bbs", "Parset for calibration with a local sky model") # create the output file path output_mapfile = self._write_datamap_to_file( None, "bbs_output", "Mapfile with calibrated measurement sets.") converted_sourcedb_map_path = self._write_datamap_to_file( None, "source_db", "correctly shaped mapfile for input sourcedbs") if skip: return output_mapfile # The create db step produces a mapfile with a single sourcelist for # the different timeslices. Generate a mapfile with copies of the # sourcelist location: This allows validation of maps in combination # get the original map data sourcedb_map = DataMap.load(sourcedb_map_path) parmdbs_map = MultiDataMap.load(parmdbs_map_path) converted_sourcedb_map = [] # sanity check for correcy output from previous recipes if not validate_data_maps(sourcedb_map, parmdbs_map): self.logger.error("The input files for bbs do not contain " "matching host names for each entry content:") self.logger.error(repr(sourcedb_map)) self.logger.error(repr(parmdbs_map)) raise PipelineException("Invalid input data for imager_bbs recipe") self.run_task("imager_bbs", timeslice_map_path, parset=parset_path, instrument_mapfile=parmdbs_map_path, sourcedb_mapfile=sourcedb_map_path, mapfile=output_mapfile, working_directory=self.scratch_directory) return output_mapfile
def _combine_local_map(inmap): map_out = DataMap([]) map_in = DataMap.load(inmap) map_in.iterator = DataMap.SkipIterator local_files = {} for item in map_in: if item.host in local_files: local_files[item.host] += item.file + ',' else: local_files[item.host] = item.file + ',' for k, v in local_files.iteritems(): v = v.rstrip(',') v = '[' + v + ']' map_out.data.append(DataProduct(k, v, False)) return map_out
def verify_subtract(direction): """ Checks selfcal success """ verify_subtract_mapfile = os.path.join(direction.working_dir, 'results', 'facetselfcal', direction.name, 'mapfiles', 'verify_subtract.break.mapfile') if os.path.exists(verify_subtract_mapfile): ok_mapfile = DataMap.load(verify_subtract_mapfile) ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile] if all(ok_flags): return True else: return False else: return False
def plugin_main(args, **kwargs): """ Copies each entry of mapfile_in as often as the the length of the corresponding group into a new mapfile Parameters ---------- mapfile_in : str Name of the input mapfile to be expanded. (E.g. with the skymodels for the different groups.) mapfile_groups : str Name of the multi-mapfile with the given groups. Number of groups need to be the same as the number of files in mapfile_in. mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) groupmap = MultiDataMap.load(kwargs['mapfile_groups']) if len(inmap) != len(groupmap): raise ValueError( 'PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'. format(kwargs['mapfile_in'], kwargs['mapfile_groups'])) map_out = DataMap([]) inindex = 0 for groupID in xrange(len(groupmap)): for fileID in xrange(len(groupmap[groupID].file)): map_out.data.append( DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip))) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _bbs(self, timeslice_map_path, parmdbs_map_path, sourcedb_map_path, skip = False): """ Perform a calibration step. First with a set of sources from the gsm and in later iterations also on the found sources """ # create parset for bbs run parset = self.parset.makeSubset("BBS.") parset_path = self._write_parset_to_file(parset, "bbs", "Parset for calibration with a local sky model") # create the output file path output_mapfile = self._write_datamap_to_file(None, "bbs_output", "Mapfile with calibrated measurement sets.") converted_sourcedb_map_path = self._write_datamap_to_file(None, "source_db", "correctly shaped mapfile for input sourcedbs") if skip: return output_mapfile # The create db step produces a mapfile with a single sourcelist for # the different timeslices. Generate a mapfile with copies of the # sourcelist location: This allows validation of maps in combination # get the original map data sourcedb_map = DataMap.load(sourcedb_map_path) parmdbs_map = MultiDataMap.load(parmdbs_map_path) converted_sourcedb_map = [] # sanity check for correcy output from previous recipes if not validate_data_maps(sourcedb_map, parmdbs_map): self.logger.error("The input files for bbs do not contain " "matching host names for each entry content:") self.logger.error(repr(sourcedb_map)) self.logger.error(repr(parmdbs_map)) raise PipelineException("Invalid input data for imager_bbs recipe") self.run_task("imager_bbs", timeslice_map_path, parset = parset_path, instrument_mapfile = parmdbs_map_path, sourcedb_mapfile = sourcedb_map_path, mapfile = output_mapfile, working_directory = self.scratch_directory) return output_mapfile
def plugin_main(args, **kwargs): """ Makes a mapfile by uncompressing input mapfile list item into separate items Parameters ---------- mapfile_in : str Filename of datamap containing list of MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile hosts : str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] map_in = DataMap.load(mapfile_in) map_out = DataMap([]) files = map_in[0].file.strip('[]').split(',') files = [f.strip() for f in files] for i in range(len(files) - len(hosts)): hosts.append(hosts[i]) for file, host in zip(files, hosts): map_out.data.append(DataProduct(host, file, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile by uncompressing input mapfile list item into separate items Parameters ---------- mapfile_in : str Filename of datamap containing list of MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile hosts : str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] map_in = DataMap.load(mapfile_in) map_out = DataMap([]) files = map_in[0].file.strip('[]').split(',') files = [f.strip() for f in files] for i in range(len(files)-len(hosts)): hosts.append(hosts[i]) for file, host in zip(files, hosts): map_out.data.append(DataProduct(host, file, False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def finalize(self): """ Finalize this operation """ # Add output datamaps to direction object for later reference self.direction.input_files_single_mapfile = os.path.join(self.pipeline_mapfile_dir, 'input_files_single.mapfile') self.direction.verify_subtract_mapfile = os.path.join(self.pipeline_mapfile_dir, 'verify_subtract.break.mapfile') self.direction.dir_dep_parmdb_mapfile = os.path.join(self.pipeline_mapfile_dir, 'merge_normalized_selfcal_parmdbs.mapfile') self.direction.dir_indep_skymodels_mapfile = os.path.join(self.pipeline_mapfile_dir, 'full_skymodels.mapfile') self.direction.selfcal_plots_mapfile = os.path.join(self.pipeline_mapfile_dir, 'make_selfcal_plots.mapfile') # Store results of verify_subtract check. This will work if the verification # was done using multiple bands although we use only one at the moment if (os.path.exists(self.direction.verify_subtract_mapfile) and not self.parset['calibration_specific']['skip_selfcal_check']): ok_mapfile = DataMap.load(self.direction.verify_subtract_mapfile) ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile] if all(ok_flags): self.direction.selfcal_ok = True else: self.direction.selfcal_ok = False elif self.parset['calibration_specific']['skip_selfcal_check']: self.direction.selfcal_ok = True else: self.direction.selfcal_ok = False # Delete temp data self.direction.cleanup_mapfiles = [ os.path.join(self.pipeline_mapfile_dir, 'add_all_facet_sources.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'shift_and_average.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat_data.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat_blavg_data.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'predict_outlier_model.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'corrupt_outlier_model.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'average_pre.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'average_post.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'sorted_groups.mapfile_groups')] self.log.debug('Cleaning up files (direction: {})'.format(self.direction.name)) self.direction.cleanup()
def plugin_main(args, **kwargs): """ Copies each entry of mapfile_in as often as the the length of the corresponding group into a new mapfile Parameters ---------- mapfile_in : str Name of the input mapfile to be expanded. (E.g. with the skymodels for the different groups.) mapfile_groups : str Name of the multi-mapfile with the given groups. Number of groups need to be the same as the number of files in mapfile_in. mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) groupmap = MultiDataMap.load(kwargs['mapfile_groups']) if len(inmap) != len(groupmap): raise ValueError('PipelineStep_mapfileSingleToGroup: length of {0} and {1} differ'.format(kwargs['mapfile_in'],kwargs['mapfile_groups'])) map_out = DataMap([]) inindex = 0 for groupID in xrange(len(groupmap)): for fileID in xrange(len(groupmap[groupID].file)): map_out.data.append(DataProduct(inmap[groupID].host, inmap[groupID].file, (inmap[groupID].skip or groupmap[groupID].skip) )) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def finalize(self): """ Finalize this operation """ # Add output datamaps to direction object for later reference self.direction.input_files_single_mapfile = os.path.join( self.pipeline_mapfile_dir, 'input_files_single.mapfile') self.direction.verify_subtract_mapfile = os.path.join( self.pipeline_mapfile_dir, 'verify_subtract.break.mapfile') # Store results of verify_subtract check. This will work if the verification # was done using multiple bands although we use only one at the moment if os.path.exists(self.direction.verify_subtract_mapfile ) and not self.parset['skip_selfcal_check']: ok_mapfile = DataMap.load(self.direction.verify_subtract_mapfile) ok_flags = [ast.literal_eval(item.file) for item in ok_mapfile] if all(ok_flags): self.direction.selfcal_ok = True else: self.direction.selfcal_ok = False elif self.parset['skip_selfcal_check']: self.direction.selfcal_ok = True else: self.direction.selfcal_ok = False # Delete temp data self.direction.cleanup_mapfiles = [ os.path.join(self.pipeline_mapfile_dir, 'add_all_facet_sources.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'shift_and_average.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat_data.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'concat_blavg_data.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'predict_outlier_model.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'corrupt_outlier_model.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'average_pre.mapfile'), os.path.join(self.pipeline_mapfile_dir, 'average_post.mapfile') ] self.log.debug('Cleaning up files (direction: {})'.format( self.direction.name)) self.direction.cleanup()
def plugin_main(args, **kwargs): print 'PLUGIN KWARG: ', kwargs result = {} datamap = None fileid = kwargs['mapfile_in'] datamap = DataMap.load(fileid) #if kwargs['change_files']: # for item in datamap: # item.file = kwargs['change_files'] if kwargs['join_files']: for item in datamap: item.file = os.path.join(item.file,kwargs['join_files']) if kwargs['newname']: fileid = os.path.join(os.path.dirname(fileid), kwargs['newname']) if datamap: print 'Wrinting mapfile: ',fileid datamap.save(fileid) result['mapfile'] = fileid return result
def plugin_main(args, **kwargs): """ Makes a mapfile by repeating max size in input mapfile items Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_in = DataMap.load(mapfile_in) map_out = DataMap([]) # Find max size in x and y xsize_list = [] ysize_list = [] for item in map_in: xsize, ysize = [int(s) for s in item.file.split(' ')] xsize_list.append(xsize) ysize_list.append(ysize) maxsize = '{0} {1}'.format(max(xsize_list), max(ysize_list)) for item in map_in: map_out.data.append(DataProduct(item.host, maxsize, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): print 'PLUGIN KWARG: ', kwargs result = {} datamap = None fileid = kwargs['mapfile_in'] datamap = DataMap.load(fileid) #if kwargs['change_files']: # for item in datamap: # item.file = kwargs['change_files'] if kwargs['join_files']: for item in datamap: item.file = os.path.join(item.file, kwargs['join_files']) if kwargs['newname']: fileid = os.path.join(os.path.dirname(fileid), kwargs['newname']) if datamap: print 'Wrinting mapfile: ', fileid datamap.save(fileid) result['mapfile'] = fileid return result
def plugin_main(args, **kwargs): """ Takes in mapfiles and change host names to allow for efficient MPI reduction Parameters ---------- mapfiles : list of strs List of the names of the input mapfiles. WILL BE MODIFIED! mapfile_dir : str Name of the directory containing the mapfile head_node_only : str String: Either True or False. Describes whether to use just the head node or not. Returns ------- result : empty dictionary """ result = {} mapfiles = (kwargs['mapfiles'][1:-1]).split(',') # read in list of mapfiles from string (separated by commas) mapfile_dir = kwargs['mapfile_dir'] head_node_only = (kwargs['head_node_only'] in ['True','true','T','t','1']) fn_list=[] for mf in mapfiles: fn_list.append( os.path.join(mapfile_dir,mf) ) # caution: remember to reload the compute node iterable for every mapfile to ensure corresponding entries have the same node set as host for fn in fn_list: if(head_node_only): cn_cycle = it.cycle( get_head_node( ClusterDesc(str(os.environ['cluster_desc_file'])) ) ) # Read in head node. Set up iterator (unnessary with just one node, but better to have less code!) else: cn_cycle = it.cycle( get_compute_nodes( ClusterDesc(str(os.environ['cluster_desc_file'])) ) ) # Read in list of compute nodes. Set up iterator to cyclically iterate over them. data = DataMap.load(fn) # read in current data map file (probably with all host values set to "localhost") iterator = DataMap.SkipIterator(data) # set up iterator for all values in mapfile for value in iterator: value.host = cn_cycle.next() # iterate through map file, assigning each entry a host from the available compute nodes in a cyclical fashion data.save(fn) # overwrite original file return result
def plugin_main(args, **kwargs): """ Updates the hosts in an input datamap Parameters ---------- mapfile_in : str, optional Filename of datamap mapfile_dir: str, optional Directory containing mapfiles. All mapfiles in this directory will be updated hosts : str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' """ if 'mapfile_dir' in kwargs: mapfiles_in = glob.glob(os.path.join(kwargs['mapfile_dir'], '*')) else: mapfiles_in = [kwargs['mapfile_in']] if len(mapfiles_in) == 0: return if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] for mapfile_in in mapfiles_in: try: map = DataMap.load(mapfile_in) for i in range(len(map) - len(hosts)): hosts.append(hosts[i]) for item, host in zip(map, hosts): item.host = host map.save(mapfile_in) except: print( 'File {} does not appear to be a mapfile. Skipping it.'.format( mapfile_in))
def input2strlist(invar): str_list = None if type(invar) is str: if invar.startswith('[') and invar.endswith(']'): str_list = [f.strip(' \'\"') for f in invar.strip('[]').split(',')] else: map_in = DataMap.load(invar) map_in.iterator = DataMap.SkipIterator str_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): str_list.append(f.strip(' \'\"')) else: str_list.append(fname.strip(' \'\"')) elif type(invar) is list: str_list = [str(f).strip(' \'\"') for f in invar] else: raise TypeError('input2strlist: Type '+str(type(invar))+' unknown!') return str_list
def cleanup(self): """ Cleans up unneeded data """ from lofarpipe.support.data_map import DataMap for mapfile in self.cleanup_mapfiles: try: datamap = DataMap.load(mapfile) for item in datamap: # Handle case in which item.file is a Python list if item.file[0] == '[' and item.file[-1] == ']': files = item.file.strip('[]').split(',') else: files = [item.file] for f in files: if os.path.exists(f): os.system('rm -rf {0}'.format(f)) except IOError: pass
def plugin_main(args, **kwargs): """ Makes a mapfile by filtering input mapfile items into one item (the middle one) Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_in = DataMap.load(mapfile_in) map_out = DataMap([]) map_in.iterator = DataMap.SkipIterator files = [item.file for item in map_in] hosts = [item.host for item in map_in] if 'index' in kwargs: index = int(kwargs['index']) else: index = len(files)/2 map_out.data.append(DataProduct(hosts[index], files[index], False)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Updates the hosts in an input datamap Parameters ---------- mapfile_in : str, optional Filename of datamap mapfile_dir: str, optional Directory containing mapfiles. All mapfiles in this directory will be updated hosts : str List of hosts/nodes. May be given as a list or as a string (e.g., '[host1, host2]' """ if 'mapfile_dir' in kwargs: mapfiles_in = glob.glob(os.path.join(kwargs['mapfile_dir'], '*')) else: mapfiles_in = [kwargs['mapfile_in']] if len(mapfiles_in) == 0: return if type(kwargs['hosts']) is str: hosts = kwargs['hosts'].strip('[]').split(',') hosts = [h.strip() for h in hosts] for mapfile_in in mapfiles_in: try: map = DataMap.load(mapfile_in) for i in range(len(map)-len(hosts)): hosts.append(hosts[i]) for item, host in zip(map, hosts): item.host = host map.save(mapfile_in) except: print('File {} does not appear to be a mapfile. Skipping it.'.format(mapfile_in))
def plugin_main(args, **kwargs): """ Appends a string to filenames in a mapfile Parameters ---------- mapfile_in : str Filename of datamap to append to append_str : str String to append mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict New datamap filename """ mapfile_in = kwargs['mapfile_in'] append_str = kwargs['append'] if append_str == 'None': append_str = '' mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] map_out = DataMap([]) map_in = DataMap.load(mapfile_in) for i, item in enumerate(map_in): map_out.data.append(DataProduct(item.host, item.file+append_str, item.skip)) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def check_existing_files(mapfile): """ Checks if files in input mapfile exist Parameters ---------- mapfile : str Filename of mapfile to check Returns ------- file : list List of files """ all_exist = True all_files = [] log.info('Checking for existing files...') try: datamap = DataMap.load(mapfile) for item in datamap: # Handle case in which item.file is a Python list if item.file[0] == '[' and item.file[-1] == ']': files = item.file.strip('[]').split(',') else: files = [item.file] for f in files: if not os.path.exists(f): all_exist = False all_files.extend(files) if all_exist: log.info('...all files exist') else: log.warning('...one or more files not found') return all_files except IOError: return []
def check_existing_files(self, mapfile): """ Checks if files in input mapfile exist Parameters ---------- mapfile : str Filename of mapfile to check Returns ------- all_exist : bool True if all files in mapfile exist, False if not """ from lofarpipe.support.data_map import DataMap all_exist = True self.log.debug('Checking for existing files...') try: datamap = DataMap.load(mapfile) for item in datamap: # Handle case in which item.file is a Python list if item.file[0] == '[' and item.file[-1] == ']': files = item.file.strip('[]').split(',') else: files = [item.file] for f in files: if not os.path.exists(f): all_exist = False if all_exist: self.log.debug('...all files exist') else: self.log.debug('...one or more files not found') return all_exist except IOError: self.log.debug('Could not read mapfile {}. Skipping it'.format(mapfile)) return False