def test_append_item_non_skip(self): data_map = DataMap(self.new_style_map) data_map.append(("host","file", False)) data_map.iterator = DataMap.TupleIterator tuples = [item for item in data_map] self.assertEqual(len(tuples), 5) self.assertTrue(all(isinstance(item, tuple) for item in tuples)) self.assertTrue(all(len(item) == 2 for item in tuples)) self.assertEqual(tuples[-1], ('host', 'file'))
def test_append_item_non_skip(self): data_map = DataMap(self.new_style_map) data_map.append(("host", "file", False)) data_map.iterator = DataMap.TupleIterator tuples = [item for item in data_map] self.assertEqual(len(tuples), 5) self.assertTrue(all(isinstance(item, tuple) for item in tuples)) self.assertTrue(all(len(item) == 2 for item in tuples)) self.assertEqual(tuples[-1], ('host', 'file'))
def test_append_item_skip(self): data_map = DataMap(self.new_style_map) data_map.append(("host", "file", True)) data_map.iterator = DataMap.SkipIterator dataProducts = [item for item in data_map] # default contains 2 nonskipped items self.assertEqual(len(dataProducts), 2) self.assertTrue( all(isinstance(item, DataProduct) for item in dataProducts)) # The map already contains 2 skipped items, the final item is tested # here self.assertEqual(dataProducts[-1].host, 'locus004') self.assertEqual(dataProducts[-1].file, 'L12345_SB104.MS')
def test_append_item_skip(self): data_map = DataMap(self.new_style_map) data_map.append(("host","file", True)) data_map.iterator = DataMap.SkipIterator dataProducts = [item for item in data_map] # default contains 2 nonskipped items self.assertEqual(len(dataProducts), 2) self.assertTrue(all(isinstance(item, DataProduct) for item in dataProducts)) # The map already contains 2 skipped items, the final item is tested # here self.assertEqual(dataProducts[-1].host, 'locus004') self.assertEqual(dataProducts[-1].file, 'L12345_SB104.MS')
def plugin_main(args, **kwargs): """ Checks a "check" mapfile for values of 'None' and, if found, changes the input mapfile "file" to "empty". Note: the check and input mapfiles must have the same length Parameters ---------- mapfile_in : str Name of the input mapfile from which to select files. mapfile_check : str Name of the mapfile to check for None mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) checkmap = DataMap.load(kwargs['mapfile_check']) if len(inmap) != len(checkmap): raise ValueError('Input and check mapfiles must have the same length') map_out = DataMap([]) for checkitem, item in zip(checkmap, inmap): if checkitem.file.lower() == 'none': map_out.data.append(DataProduct(item.host, 'empty', item.skip)) else: map_out.append(item) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Selects those files from mapfile_in that have the same filename-base as the one in mapfile_reference. Parameters ---------- mapfile_in : str Name of the input mapfile from which to select files. mapfile_reference : str Name of the reference mapfile mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) refmap = DataMap.load(kwargs['mapfile_reference']) map_out = DataMap([]) basenames = [ os.path.splitext(os.path.basename(item.file))[0] for item in inmap ] for refitem in refmap: refbase = os.path.splitext(os.path.basename(refitem.file))[0] idx = basenames.index(refbase) map_out.append(inmap[idx]) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _calc_edge_chans(inmap, numch, edgeFactor=32): """ Generates a map with strings that can be used as input for NDPPP to flag the edges of the input MSs during (or after) concatenation. inmap - MultiDataMap (not mapfilename!) with the files to be concatenated. numch - Number of channels per input file (All files are assumed to have the same number of channels.) edgeFactor - Divisor to compute how many channels are to be flagged at beginning and end. (numch=64 and edgeFactor=32 means "flag two channels at beginning and two at end") """ outmap = DataMap([]) for group in inmap: flaglist = [] for i in xrange(len(group.file)): flaglist.extend(range(i*numch,i*numch+numch/edgeFactor)) flaglist.extend(range((i+1)*numch-numch/edgeFactor,(i+1)*numch)) outmap.append(DataProduct(group.host,str(flaglist).replace(' ',''),group.skip)) print str(flaglist).replace(' ','') return outmap
def plugin_main(args, **kwargs): """ Selects those files from mapfile_in that have the same filename-base as the one in mapfile_reference. Parameters ---------- mapfile_in : str Name of the input mapfile from which to select files. mapfile_reference : str Name of the reference mapfile mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile Returns ------- result : dict Output datamap filename """ mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] inmap = DataMap.load(kwargs['mapfile_in']) refmap = DataMap.load(kwargs['mapfile_reference']) map_out = DataMap([]) basenames = [ os.path.splitext(os.path.basename(item.file))[0] for item in inmap] for refitem in refmap: refbase = os.path.splitext(os.path.basename(refitem.file))[0] idx = basenames.index(refbase) map_out.append(inmap[idx]) fileid = os.path.join(mapfile_dir, filename) map_out.save(fileid) result = {'mapfile': fileid} return result
def _sort_ObsSB(Mapfile, SBperGroup, filllNDPPPdummies=False, mergeLastGroup=False, startFromZero=False, truncateLastSBs=False, cdir=''): """ Sorts the MSs in "inmap" into groups with "SBperGroup" consecutive subband numbers for each observation number. Expects files that contain a string like: "L123456_SAP007_SB890" or: "L123456_SB890". The files in the groups will be sorted by subband number. The hostname is taken from the first file found for each group. Mapfile - Name of the mapfile with the input data, should contain single MSs SBperGroup - Group that many subbands into one group. filllNDPPPdummies - If True, add dummy file-names for missing subbands, so that NDPPP can fill the data with flagged dummy data. mergeLastGroup - True: put the files from the last group that doesn't have SBperGroup subbands into the second last group (which will then have more than SBperGroup entries). False: keep inclomplete last group, or - with filllNDPPPdummies=True - fill last group with dummies. startFromZero - Start grouping with 0, even if there is no SB000 in the map. """ if mergeLastGroup and truncateLastSBs: raise ValueError('_sort_ObsSB: Can either merge the last partial group or truncate at last full group, not both!') inmap = MiniMapfileManager.load(Mapfile) # I'm going to need that: PatternReg = re.compile(r'(L\d+)(_SAP\d+)?_(SB\d+)') # get smallest and largest subband number: sort by subband and get SB-number of first and last entry inmap.sortMS_by_SBnum() if not startFromZero: minSB = int(PatternReg.search(inmap.data[0].file).group(3)[2:]) else: minSB = 0 maxSB = int(PatternReg.search(inmap.data[-1].file).group(3)[2:]) # sort data into dictionary sortDict = {} for item in inmap.data: if not item.skip: regerg = PatternReg.search(item.file) Obs = regerg.group(1) SBnum = int(regerg.group(3)[2:]) SBgroup = int((SBnum-minSB)/SBperGroup) if not Obs in sortDict: sortDict[Obs] = { } if not SBgroup in sortDict[Obs]: replacestring = Obs+'_SBgr%03d-%d_%s' %(SBgroup,SBperGroup,cdir) reffile = PatternReg.sub(replacestring,item.file,1) sortDict[Obs][SBgroup] = { 'host' : item.host , 'files' : [], 'firstfile' : reffile } #the data is sorted by SBnum, so if files with lower SBnum are not already #in the list, then they are missing! while filllNDPPPdummies and len(sortDict[Obs][SBgroup]['files']) < ((SBnum-minSB) % SBperGroup) : sortDict[Obs][SBgroup]['files'].append('dummy_entry') sortDict[Obs][SBgroup]['files'].append(item.file) # now go through the dictionary and put the data into the new map newmap = MultiDataMap([]) firstfileMap = DataMap([]) numGroups = (maxSB-minSB+1)/SBperGroup SBs_in_last = (maxSB-minSB+1)%SBperGroup obsNames = sortDict.keys() obsNames.sort() for obs in obsNames: obsDict = sortDict[obs] for SBgroup in xrange(numGroups-1): if SBgroup in obsDict: while filllNDPPPdummies and len(obsDict[SBgroup]['files']) < SBperGroup : obsDict[SBgroup]['files'].append('dummy_entry') newmap.append(MultiDataProduct(obsDict[SBgroup]['host'], obsDict[SBgroup]['files'], False)) firstfileMap.append(DataProduct(obsDict[SBgroup]['host'], obsDict[SBgroup]['firstfile'], False)) #work on last full group SBgroup = numGroups-1 if SBgroup in obsDict: while filllNDPPPdummies and len(obsDict[SBgroup]['files']) < SBperGroup : obsDict[SBgroup]['files'].append('dummy_entry') if mergeLastGroup and SBs_in_last != 0: lastList = [] if numGroups in obsDict: lastList = obsDict[numGroups]['files'] while filllNDPPPdummies and len(lastList) < SBs_in_last : lastList.append('dummy_entry') obsDict[SBgroup]['files'].extend(lastList) newmap.append(MultiDataProduct(obsDict[SBgroup]['host'], obsDict[SBgroup]['files'], False)) firstfileMap.append(DataProduct(obsDict[SBgroup]['host'], obsDict[SBgroup]['firstfile'], False)) #need to process incomplete group if SBs_in_last != 0 and not mergeLastGroup and not truncateLastSBs: if numGroups in obsDict: while filllNDPPPdummies and len(obsDict[numGroups]['files']) < SBs_in_last : obsDict[numGroups]['files'].append('dummy_entry') newmap.append(MultiDataProduct(obsDict[numGroups]['host'], obsDict[numGroups]['files'], False)) firstfileMap.append(DataProduct(obsDict[numGroups]['host'], obsDict[numGroups]['firstfile'], False)) #That should be it! return (newmap,firstfileMap)
def plugin_main(args, **kwargs): """ Reads in closure phase file and returns the best delay calibrator mapfile Parameters ---------- mapfile_dir : str Directory for output mapfile closurePhaseMap: str Name of output mapfile closurePhase_file: str Name of file with closure phase scatter Returns ------- result : dict Output datamap closurePhaseFile """ mapfile_dir = kwargs['mapfile_dir'] mapfile_out = kwargs['mapfile_out'] closurePhaseMap = kwargs['closurePhaseMap'] fileid = os.path.join( mapfile_dir, mapfile_out) # this file holds all the output measurement sets with open(closurePhaseMap, 'r') as f: lines = f.readlines() f.close() closurePhaseFile = lines[0].split(',')[1].split(':')[1].strip().strip("'") # read the file with open(closurePhaseFile, 'r') as f: lines = f.readlines() f.close() ## get lists of directions and scatter direction = [] scatter = [] for l in lines: direction.append(l.split()[4]) scatter.append(np.float(l.split()[6])) ## convert to numpy arrays direction = np.asarray(direction) scatter = np.asarray(scatter) ## find the minimum scatter if len(scatter) > 1: min_scatter_index = np.where(scatter == np.min(scatter))[0] best_calibrator = direction[min_scatter_index[0]] else: best_calibrator = direction[0][0] job_dir = closurePhaseFile.replace('closure_phases.txt', '') delayCal = job_dir + best_calibrator + '*' + 'concat' delay_ms = glob.glob(delayCal)[0] map_out = DataMap([]) map_out.append(DataProduct('localhost', delay_ms, False)) map_out.save(fileid) result = {'mapfile': fileid} return result
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None, mergeLastGroup=False, truncateLastSBs=True, firstSB=None): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile filename: str Name of output mapfile mapfile_dir : str Directory for output mapfile numSB : int, optional How many files should go into one frequency group. Values <= 0 mean put all files of the same time-step into one group. default = -1 hosts : list or str List of hostnames or string with list of hostnames NDPPPfill : bool, optional Add dummy file-names for missing frequencies, so that NDPPP can fill the data with flagged dummy data. default = True target_path : str, optional Change the path of the "groups" files to this. (I.e. write output files into this directory with the subsequent NDPPP call.) default = keep path of input files stepname : str, optional Add this step-name into the file-names of the output files. mergeLastGroup, truncateLastSBs : bool, optional mergeLastGroup = True, truncateLastSBs = True: not allowed mergeLastGroup = True, truncateLastSBs = False: put the files from the last group that doesn't have SBperGroup subbands into the second last group (which will then have more than SBperGroup entries). mergeLastGroup = False, truncateLastSBs = True: ignore last files, that don't make for a full group (not all files are used). mergeLastGroup = False, truncateLastSBs = False: keep inclomplete last group, or - with NDPPPfill=True - fill last group with dummies. firstSB : int, optional If set, then reference the grouping of files to this station-subband. As if a file with this station-subband would be included in the input files. (For HBA-low, i.e. 0 -> 100MHz, 55 -> 110.74MHz, 512 -> 200MHz) Returns ------- result : dict Dict with the name of the generated mapfile """ NDPPPfill = input2bool(NDPPPfill) mergeLastGroup = input2bool(mergeLastGroup) truncateLastSBs = input2bool(truncateLastSBs) firstSB = input2int(firstSB) numSB = int(numSB) if not filename or not mapfile_dir: raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!') if mergeLastGroup and truncateLastSBs: raise ValueError('sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!') # if mergeLastGroup: # raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!') if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_times_into_freqGroups: type of "ms_input" unknown!') if type(hosts) is str: hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] if not hosts: hosts = ['localhost'] numhosts = len(hosts) print "sort_times_into_freqGroups: Working on",len(ms_list),"files (including flagged files)." time_groups = {} # sort by time for i, ms in enumerate(ms_list): # work only on files selected by a previous step if ms.lower() != 'none': # use the slower but more reliable way: obstable = pt.table(ms, ack=False) timestamp = int(round(np.min(obstable.getcol('TIME')))) #obstable = pt.table(ms+'::OBSERVATION', ack=False) #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) obstable.close() if timestamp in time_groups: time_groups[timestamp]['files'].append(ms) else: time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] } print "sort_times_into_freqGroups: found",len(time_groups),"time-groups" # sort time-groups by frequency timestamps = time_groups.keys() timestamps.sort() # not needed now, but later first = True nchans = 0 for time in timestamps: freqs = [] for ms in time_groups[time]['files']: # Get the frequency info sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False) freq = sw.col('REF_FREQUENCY')[0] if first: file_bandwidth = sw.col('TOTAL_BANDWIDTH')[0] nchans = sw.col('CHAN_WIDTH')[0].shape[0] chwidth = sw.col('CHAN_WIDTH')[0][0] freqset = set([freq]) first = False else: assert file_bandwidth == sw.col('TOTAL_BANDWIDTH')[0] assert nchans == sw.col('CHAN_WIDTH')[0].shape[0] assert chwidth == sw.col('CHAN_WIDTH')[0][0] freqset.add(freq) freqs.append(freq) sw.close() time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files']) time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) #time_groups[time]['files'] = [name for (freq,name) in freq_names] #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] print "sort_times_into_freqGroups: Collected the frequencies for the time-groups" freqliste = np.array(list(freqset)) freqliste.sort() freq_width = np.min(freqliste[1:]-freqliste[:-1]) if file_bandwidth > freq_width: raise ValueError("Bandwidth of files is larger than minimum frequency step between two files!") if file_bandwidth < (freq_width/2.): raise ValueError("Bandwidth of files is smaller than half the minimum frequency step between two files! (More than half the data is missing.)") #the new output map filemap = MultiDataMap() groupmap = DataMap() # add 1% of the SB badwidth in case maxfreq might be "exactly" on a group-border maxfreq = np.max(freqliste)+freq_width*0.51 if firstSB != None: minfreq = (float(firstSB)/512.*100e6)+100e6-freq_width/2. if np.min(freqliste) < minfreq: raise ValueError('sort_times_into_freqGroups: Frequency of lowest input data is lower than reference frequency!') else: minfreq = np.min(freqliste)-freq_width/2. groupBW = freq_width*numSB if groupBW < 1e6: print 'sort_times_into_freqGroups: ***WARNING***: Bandwidth of concatenated MS is lower than 1 MHz. This may cause conflicts with the concatenated file names!' freqborders = np.arange(minfreq,maxfreq,groupBW) if mergeLastGroup: freqborders[-1] = maxfreq elif truncateLastSBs: pass #nothing to do! # left to make the logic more clear! elif not truncateLastSBs and NDPPPfill: freqborders = np.append(freqborders,(freqborders[-1]+groupBW)) elif not truncateLastSBs and not NDPPPfill: freqborders = np.append(freqborders,maxfreq) freqborders = freqborders[freqborders>(np.min(freqliste)-groupBW)] ngroups = len(freqborders)-1 if ngroups == 0: raise ValueError('sort_times_into_freqGroups: Not enough input subbands to create at least one full (frequency-)group!') print "sort_times_into_freqGroups: Will create",ngroups,"group(s) with",numSB,"file(s) each." hostID = 0 for time in timestamps: (freq,fname) = time_groups[time]['freq_names'].pop(0) for groupIdx in xrange(ngroups): files = [] skip_this = True filefreqs_low = np.arange(freqborders[groupIdx],freqborders[groupIdx+1],freq_width) for lower_freq in filefreqs_low: if freq > lower_freq and freq < lower_freq+freq_width: assert freq!=1e12 files.append(fname) if len(time_groups[time]['freq_names'])>0: (freq,fname) = time_groups[time]['freq_names'].pop(0) else: (freq,fname) = (1e12,'This_shouldn\'t_show_up') skip_this = False elif NDPPPfill: files.append('dummy.ms') if not skip_this: filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this)) freqID = int((freqborders[groupIdx]+freqborders[groupIdx+1])/2e6) groupname = time_groups[time]['basename']+'_%Xt_%dMHz.ms'%(time,freqID) if type(stepname) is str: groupname += stepname if type(target_path) is str: groupname = os.path.join(target_path,os.path.basename(groupname)) groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this)) orphan_files = len(time_groups[time]['freq_names']) if freq < 1e12: orphan_files += 1 if orphan_files > 0: print "sort_times_into_freqGroups: Had %d unassigned files in time-group %xt."%(orphan_files, time) filemapname = os.path.join(mapfile_dir, filename) filemap.save(filemapname) groupmapname = os.path.join(mapfile_dir, filename+'_groups') groupmap.save(groupmapname) # genertate map with edge-channels to flag flagmap = _calc_edge_chans(filemap, nchans) flagmapname = os.path.join(mapfile_dir, filename+'_flags') flagmap.save(flagmapname) result = {'mapfile': filemapname, 'groupmapfile': groupmapname, 'flagmapfile': flagmapname} return result
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1.): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile outmapname: str Name of output mapfile mapfile_dir : str Directory for output mapfile cellsize_highres_deg : float, optional cellsize for the high-res images in deg cellsize_lowres_deg : float, optional cellsize for the low-res images in deg fieldsize_highres : float, optional How many FWHM's shall the high-res images be. fieldsize_lowres : float, optional How many FWHM's shall the low-res images be. image_padding : float, optional How much padding shall we add to the padded image sizes. y_axis_stretch : float, optional How much shall the y-axis be stretched or compressed. Returns ------- result : dict Dict with the name of the generated mapfiles """ if not outmapname or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!' ) if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [ f.strip(' \'\"') for f in ms_input.strip('[]').split(',') ] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_into_freqBands: type of "ms_input" unknown!') cellsize_highres_deg = float(cellsize_highres_deg) cellsize_lowres_deg = float(cellsize_lowres_deg) fieldsize_highres = float(fieldsize_highres) fieldsize_lowres = float(fieldsize_lowres) image_padding = float(image_padding) y_axis_stretch = float(y_axis_stretch) msdict = {} for ms in ms_list: # group all MSs by frequency sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) msfreq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if msfreq in msdict: msdict[msfreq].append(ms) else: msdict[msfreq] = [ms] bands = [] bandfreqs = [] print "InitSubtract_deep_sort_and_compute.py: Putting files into bands." for MSkey in msdict.keys(): bands.append(Band(msdict[MSkey])) bandfreqs.append(Band(msdict[MSkey]).freq) ## min freq gives largest image size for deep image bandfreqs = np.array(bandfreqs) minfreq = np.min(bandfreqs) bandmin = np.argmin(bandfreqs) ## need to map the output from wsclean channels to the right frequencies ## just put the bands in the right freq order wsclean_channum = np.argsort(bandfreqs) bands = np.array(bands) bands = bands[wsclean_channum] #minfreq = 1e9 #for ib, band in enumerate(bands): #if band.freq < minfreq: #minfreq = band.freq #bandmin = ib group_map = MultiDataMap() file_single_map = DataMap([]) high_size_map = DataMap([]) low_size_map = DataMap([]) high_paddedsize_map = DataMap([]) low_paddedsize_map = DataMap([]) numfiles = 0 nbands = len(bands) if nbands > 8: nchansout_clean1 = np.int(nbands / 4) elif nbands > 4: nchansout_clean1 = np.int(nbands / 2) else: nchansout_clean1 = np.int(nbands) (freqstep, timestep) = bands[0].get_averaging_steps() int_time_sec = bands[ 0].timestep_sec * timestep # timestep_sec gets added to band object in get_averaging_steps() nwavelengths_high = bands[0].get_nwavelengths(cellsize_highres_deg, int_time_sec) nwavelengths_low = bands[0].get_nwavelengths(cellsize_lowres_deg, int_time_sec) for band in bands: print "InitSubtract_deep_sort_and_compute.py: Working on Band:", band.name group_map.append(MultiDataProduct('localhost', band.files, False)) numfiles += len(band.files) for filename in band.files: file_single_map.append(DataProduct('localhost', filename, False)) (imsize_high_res, imsize_low_res) = band.get_image_sizes( cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres, fieldsize_lowres) imsize_high_res_stretch = band.get_optimum_size( int(imsize_high_res * y_axis_stretch)) high_size_map.append( DataProduct( 'localhost', str(imsize_high_res) + " " + str(imsize_high_res_stretch), False)) imsize_low_res_stretch = band.get_optimum_size( int(imsize_low_res * y_axis_stretch)) low_size_map.append( DataProduct( 'localhost', str(imsize_low_res) + " " + str(imsize_low_res_stretch), False)) imsize_high_pad = band.get_optimum_size( int(imsize_high_res * image_padding)) imsize_high_pad_stretch = band.get_optimum_size( int(imsize_high_res * image_padding * y_axis_stretch)) high_paddedsize_map.append( DataProduct( 'localhost', str(imsize_high_pad) + " " + str(imsize_high_pad_stretch), False)) imsize_low_pad = band.get_optimum_size( int(imsize_low_res * image_padding)) imsize_low_pad_stretch = band.get_optimum_size( int(imsize_low_res * image_padding * y_axis_stretch)) low_paddedsize_map.append( DataProduct( 'localhost', str(imsize_low_pad) + " " + str(imsize_low_pad_stretch), False)) print band.freq / 1e6, imsize_high_res, imsize_high_res_stretch, imsize_high_pad, imsize_high_pad_stretch, imsize_low_res, imsize_low_res_stretch, imsize_low_pad, imsize_low_pad_stretch, nwavelengths_high, nwavelengths_low if band.freq == minfreq: deep_imsize_high_res = imsize_high_res deep_imsize_high_res_stretch = imsize_high_res_stretch deep_imsize_high_pad = imsize_high_pad deep_imsize_high_pad_stretch = imsize_high_pad_stretch deep_imsize_low_res = imsize_low_res deep_imsize_low_res_stretch = imsize_low_res_stretch deep_imsize_low_pad = imsize_low_pad deep_imsize_low_pad_stretch = imsize_low_pad_stretch print '*', band.freq / 1e6, imsize_high_res, imsize_high_res_stretch, imsize_high_pad, imsize_high_pad_stretch, imsize_low_res, imsize_low_res_stretch, imsize_low_pad, imsize_low_pad_stretch deep_high_size_map = DataMap([ DataProduct( 'localhost', str(deep_imsize_high_res) + " " + str(deep_imsize_high_res_stretch), False) ]) deep_high_paddedsize_map = DataMap([ DataProduct( 'localhost', str(deep_imsize_high_pad) + " " + str(deep_imsize_high_pad_stretch), False) ]) deep_low_size_map = DataMap([ DataProduct( 'localhost', str(deep_imsize_low_res) + " " + str(deep_imsize_low_res_stretch), False) ]) deep_low_paddedsize_map = DataMap([ DataProduct( 'localhost', str(deep_imsize_low_pad) + " " + str(deep_imsize_low_pad_stretch), False) ]) nbands_map = DataMap([DataProduct('localhost', str(nbands), False)]) nchansout_clean1_map = DataMap( [DataProduct('localhost', str(nchansout_clean1), False)]) print "InitSubtract_deep_sort_and_compute.py: Computing averaging steps." # get mapfiles for freqstep and timestep with the length of single_map freqstep_map = DataMap([]) timestep_map = DataMap([]) nwavelengths_high_map = DataMap([]) nwavelengths_low_map = DataMap([]) for index in xrange(numfiles): freqstep_map.append(DataProduct('localhost', str(freqstep), False)) timestep_map.append(DataProduct('localhost', str(timestep), False)) nwavelengths_high_map.append( DataProduct('localhost', str(nwavelengths_high), False)) nwavelengths_low_map.append( DataProduct('localhost', str(nwavelengths_low), False)) groupmapname = os.path.join(mapfile_dir, outmapname) group_map.save(groupmapname) file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single') file_single_map.save(file_single_mapname) high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size') high_size_map.save(high_sizename) low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size') low_size_map.save(low_sizename) high_padsize_name = os.path.join(mapfile_dir, outmapname + '_high_padded_size') high_paddedsize_map.save(high_padsize_name) low_padsize_name = os.path.join(mapfile_dir, outmapname + '_low_padded_size') low_paddedsize_map.save(low_padsize_name) deep_high_sizename = os.path.join(mapfile_dir, outmapname + '_deep_high_size') deep_high_size_map.save(deep_high_sizename) deep_low_sizename = os.path.join(mapfile_dir, outmapname + '_deep_low_size') deep_low_size_map.save(deep_low_sizename) deep_high_padsize_name = os.path.join( mapfile_dir, outmapname + '_deep_high_padded_size') deep_high_paddedsize_map.save(deep_high_padsize_name) deep_low_padsize_name = os.path.join(mapfile_dir, outmapname + '_deep_low_padded_size') deep_low_paddedsize_map.save(deep_low_padsize_name) nbands_mapname = os.path.join(mapfile_dir, outmapname + '_nbands') nbands_map.save(nbands_mapname) nchansout_clean1_mapname = os.path.join(mapfile_dir, outmapname + '_nchansout_clean1') nchansout_clean1_map.save(nchansout_clean1_mapname) freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep') freqstep_map.save(freqstepname) timestepname = os.path.join(mapfile_dir, outmapname + '_timestep') timestep_map.save(timestepname) nwavelengths_high_name = os.path.join(mapfile_dir, outmapname + '_nwavelengths_high') nwavelengths_high_map.save(nwavelengths_high_name) nwavelengths_low_name = os.path.join(mapfile_dir, outmapname + '_nwavelengths_low') nwavelengths_low_map.save(nwavelengths_low_name) result = { 'groupmap': groupmapname, 'single_mapfile': file_single_mapname, 'high_size_mapfile': high_sizename, 'low_size_mapfile': low_sizename, 'high_padsize_mapfile': high_padsize_name, 'low_padsize_mapfile': low_padsize_name, 'deep_high_size_mapfile': deep_high_sizename, 'deep_low_size_mapfile': deep_low_sizename, 'deep_high_padsize_mapfile': deep_high_padsize_name, 'deep_low_padsize_mapfile': deep_low_padsize_name, 'nbands': nbands_mapname, 'nchansout_clean1': nchansout_clean1_mapname, 'freqstep': freqstepname, 'timestep': timestepname, 'nwavelengths_high_mapfile': nwavelengths_high_name, 'nwavelengths_low_mapfile': nwavelengths_low_name } return result
def plugin_main(args, **kwargs): """ Makes a mapfile with only the MSs at the middle Frequency Quite a bit of a hack for a plugin, but right now I don't care. Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile index: int, optional Index of the frequency band to use. Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_out = DataMap() # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_selectMiddleFreq: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} #sort into frequency groups freq_groups = {} hosts = [] for item in map_in: # Get the frequency info sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) # find maximum number of files per frequency-group maxfiles = max([len(group) for group in freq_groups.values()]) # find the center-frequency freqs = freq_groups.keys() freqs.sort() selfreq = freqs[len(freqs) / 2] if 'index' in kwargs: selfreq = int(kwargs['index']) else: # make sure that chosen frequncy has maxfiles entries while len(freq_groups[selfreq]) < maxfiles: freqs.remove(selfreq) selfreq = freqs[len(freqs) / 2] # extend the hosts-list for i in range(len(freq_groups[selfreq]) - len(hosts)): hosts.append(hosts[i]) # fill the output-map for (host, fname) in zip(hosts, freq_groups[selfreq]): map_out.append(DataProduct(host, fname, False)) map_out.save(fileid) result = {'mapfile': fileid} return result
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1., calc_y_axis_stretch=False, apply_y_axis_stretch_highres=True, apply_y_axis_stretch_lowres=True): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile outmapname: str Name of output mapfile mapfile_dir : str Directory for output mapfile cellsize_highres_deg : float, optional cellsize for the high-res images in deg cellsize_lowres_deg : float, optional cellsize for the low-res images in deg fieldsize_highres : float, optional How many FWHM's shall the high-res images be. fieldsize_lowres : float, optional How many FWHM's shall the low-res images be. image_padding : float, optional How much padding shall we add to the padded image sizes. y_axis_stretch : float, optional How much shall the y-axis be stretched or compressed. calc_y_axis_stretch : bool, optional Adjust the image sizes returned by this script for the mean elevation. If True, the value of y_axis_stretch above is ignored apply_y_axis_stretch_highres : bool, optional Apply the y-axis stretch to the high-res image sizes apply_y_axis_stretch_lowres : bool, optional Apply the y-axis stretch to the low-res image sizes Returns ------- result : dict Dict with the name of the generated mapfiles """ if not outmapname or not mapfile_dir: raise (ValueError( 'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!' )) if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [ f.strip(' \'\"') for f in ms_input.strip('[]').split(',') ] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise (TypeError('sort_into_freqBands: type of "ms_input" unknown!')) cellsize_highres_deg = float(cellsize_highres_deg) cellsize_lowres_deg = float(cellsize_lowres_deg) fieldsize_highres = float(fieldsize_highres) fieldsize_lowres = float(fieldsize_lowres) image_padding = float(image_padding) y_axis_stretch = float(y_axis_stretch) calc_y_axis_stretch = input2bool(calc_y_axis_stretch) apply_y_axis_stretch_highres = input2bool(apply_y_axis_stretch_highres) apply_y_axis_stretch_lowres = input2bool(apply_y_axis_stretch_lowres) msdict = {} for ms in ms_list: # group all MSs by frequency sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) msfreq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if msfreq in msdict: msdict[msfreq].append(ms) else: msdict[msfreq] = [ms] bands = [] print("InitSubtract_sort_and_compute.py: Putting files into bands.") for MSkey in msdict.keys(): bands.append(Band(msdict[MSkey])) group_map = MultiDataMap() file_single_map = DataMap([]) high_size_map = DataMap([]) low_size_map = DataMap([]) high_paddedsize_map = DataMap([]) low_paddedsize_map = DataMap([]) numfiles = 0 for i, band in enumerate(bands): print("InitSubtract_sort_and_compute.py: Working on Band:", band.name) group_map.append(MultiDataProduct('localhost', band.files, False)) numfiles += len(band.files) for filename in band.files: file_single_map.append(DataProduct('localhost', filename, False)) (imsize_high_res, imsize_low_res) = band.get_image_sizes( cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres, fieldsize_lowres) # Calculate y_axis_stretch if desired if calc_y_axis_stretch: if i == 0: y_axis_stretch = 1.0 / np.sin(band.mean_el_rad) print( "InitSubtract_sort_and_compute.py: Using y-axis stretch of:", y_axis_stretch) y_axis_stretch_lowres = y_axis_stretch y_axis_stretch_highres = y_axis_stretch else: y_axis_stretch_lowres = 1.0 y_axis_stretch_highres = 1.0 # Adjust sizes so that we get the correct ones below if not apply_y_axis_stretch_highres: y_axis_stretch_highres = 1.0 if not apply_y_axis_stretch_lowres: y_axis_stretch_lowres = 1.0 imsize_low_res /= y_axis_stretch_lowres imsize_high_res /= y_axis_stretch_highres imsize_high_res = band.get_optimum_size(int(imsize_high_res)) imsize_high_res_stretch = band.get_optimum_size( int(imsize_high_res * y_axis_stretch_highres)) high_size_map.append( DataProduct( 'localhost', str(imsize_high_res) + " " + str(imsize_high_res_stretch), False)) imsize_low_res = band.get_optimum_size(int(imsize_low_res)) imsize_low_res_stretch = band.get_optimum_size( int(imsize_low_res * y_axis_stretch_lowres)) low_size_map.append( DataProduct( 'localhost', str(imsize_low_res) + " " + str(imsize_low_res_stretch), False)) imsize_high_pad = band.get_optimum_size( int(imsize_high_res * image_padding)) imsize_high_pad_stretch = band.get_optimum_size( int(imsize_high_res * image_padding * y_axis_stretch_highres)) high_paddedsize_map.append( DataProduct( 'localhost', str(imsize_high_pad) + " " + str(imsize_high_pad_stretch), False)) imsize_low_pad = band.get_optimum_size( int(imsize_low_res * image_padding)) imsize_low_pad_stretch = band.get_optimum_size( int(imsize_low_res * image_padding * y_axis_stretch_lowres)) low_paddedsize_map.append( DataProduct( 'localhost', str(imsize_low_pad) + " " + str(imsize_low_pad_stretch), False)) print("InitSubtract_sort_and_compute.py: Computing averaging steps.") (freqstep, timestep) = bands[0].get_averaging_steps() (nwavelengths_high, nwavelengths_low) = bands[0].nwavelengths(cellsize_highres_deg, cellsize_lowres_deg, timestep) # get mapfiles for freqstep and timestep with the length of single_map freqstep_map = DataMap([]) timestep_map = DataMap([]) nwavelengths_high_map = DataMap([]) nwavelengths_low_map = DataMap([]) for index in range(numfiles): freqstep_map.append(DataProduct('localhost', str(freqstep), False)) timestep_map.append(DataProduct('localhost', str(timestep), False)) freqstep_map.append(DataProduct('localhost', str(freqstep), False)) timestep_map.append(DataProduct('localhost', str(timestep), False)) nwavelengths_high_map.append( DataProduct('localhost', str(nwavelengths_high), False)) nwavelengths_low_map.append( DataProduct('localhost', str(nwavelengths_low), False)) groupmapname = os.path.join(mapfile_dir, outmapname) group_map.save(groupmapname) file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single') file_single_map.save(file_single_mapname) high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size') high_size_map.save(high_sizename) low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size') low_size_map.save(low_sizename) high_padsize_name = os.path.join(mapfile_dir, outmapname + '_high_padded_size') high_paddedsize_map.save(high_padsize_name) low_padsize_name = os.path.join(mapfile_dir, outmapname + '_low_padded_size') low_paddedsize_map.save(low_padsize_name) freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep') freqstep_map.save(freqstepname) timestepname = os.path.join(mapfile_dir, outmapname + '_timestep') timestep_map.save(timestepname) nwavelengths_high_name = os.path.join(mapfile_dir, outmapname + '_nwavelengths_high') nwavelengths_high_map.save(nwavelengths_high_name) nwavelengths_low_name = os.path.join(mapfile_dir, outmapname + '_nwavelengths_low') nwavelengths_low_map.save(nwavelengths_low_name) result = { 'groupmap': groupmapname, 'single_mapfile': file_single_mapname, 'high_size_mapfile': high_sizename, 'low_size_mapfile': low_sizename, 'high_padsize_mapfile': high_padsize_name, 'low_padsize_mapfile': low_padsize_name, 'freqstep': freqstepname, 'timestep': timestepname, 'nwavelengths_high_mapfile': nwavelengths_high_name, 'nwavelengths_low_mapfile': nwavelengths_low_name } return (result)
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None, nband_pad=0): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile filename: str Name of output mapfile mapfile_dir : str Directory for output mapfile numSB : int, optional How many files should go into one frequency group. Values <= 0 mean put all files of the same time-step into one group. default = -1 hosts : list or str List of hostnames or string with list of hostnames NDPPPfill : bool, optional Add dummy file-names for missing frequencies, so that NDPPP can fill the data with flagged dummy data. default = True target_path : str, optional Change the path of the "groups" files to this. (I.e. write output files into this directory with the subsequent NDPPP call.) default = keep path of input files stepname : str, optional Add this step-name into the file-names of the output files nband_pad : int, optional Add this number of bands of dummy data to the high-frequency end of the list Returns ------- result : dict Dict with the name of the generated mapfile """ if not filename or not mapfile_dir: raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!') # convert input to needed types ms_list = input2strlist(ms_input) NDPPPfill = input2bool(NDPPPfill) nband_pad = int(nband_pad) if type(hosts) is str: hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] if not hosts: hosts = ['localhost'] numhosts = len(hosts) print "sort_times_into_freqGroups: Working on",len(ms_list),"files" time_groups = {} # sort by time for i, ms in enumerate(ms_list): # use the slower but more reliable way: obstable = pt.table(ms, ack=False) timestamp = int(round(np.min(obstable.getcol('TIME')))) #obstable = pt.table(ms+'::OBSERVATION', ack=False) #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) obstable.close() if timestamp in time_groups: time_groups[timestamp]['files'].append(ms) else: time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] } print "sort_times_into_freqGroups: found",len(time_groups),"time-groups" # sort time-groups by frequency timestamps = time_groups.keys() timestamps.sort() # not needed now, but later first = True for time in timestamps: freqs = [] for ms in time_groups[time]['files']: # Get the frequency info sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False) freq = sw.col('REF_FREQUENCY')[0] if first: freq_width = sw.col('TOTAL_BANDWIDTH')[0] maxfreq = freq minfreq = freq first = False else: assert freq_width == sw.col('TOTAL_BANDWIDTH')[0] maxfreq = max(maxfreq,freq) minfreq = min(minfreq,freq) freqs.append(freq) sw.close() time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files']) time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) #time_groups[time]['files'] = [name for (freq,name) in freq_names] #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] print "sort_times_into_freqGroups: Collected the frequencies for the time-groups" #the new output map filemap = MultiDataMap() groupmap = DataMap() maxfreq = maxfreq+freq_width/2. minfreq = minfreq-freq_width/2. numFiles = round((maxfreq-minfreq)/freq_width) if numSB > 0: ngroups = int(np.ceil(numFiles/numSB)) else: ngroups = 1 numSB = int(numFiles) hostID = 0 for time in timestamps: (freq,fname) = time_groups[time]['freq_names'].pop(0) for fgroup in range(ngroups): files = [] skip_this = True for fIdx in range(numSB): if freq > (fIdx+fgroup*numSB+1)*freq_width+minfreq: files.append('dummy.ms') else: files.append(fname) if len(time_groups[time]['freq_names'])>0: (freq,fname) = time_groups[time]['freq_names'].pop(0) else: (freq,fname) = (1e12,'This_shouldn\'t_show_up') skip_this = False for i in range(nband_pad): files.append('dummy.ms') filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this)) groupname = time_groups[time]['basename']+'_%Xt_%dg.ms'%(time,fgroup) if type(stepname) is str: groupname += stepname if type(target_path) is str: groupname = os.path.join(target_path,os.path.basename(groupname)) groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this)) hostID += 1 assert freq==1e12 filemapname = os.path.join(mapfile_dir, filename) filemap.save(filemapname) groupmapname = os.path.join(mapfile_dir, filename+'_groups') groupmap.save(groupmapname) result = {'mapfile': filemapname, 'groupmapfile': groupmapname} return result
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1.): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile outmapname: str Name of output mapfile mapfile_dir : str Directory for output mapfile cellsize_highres_deg : float, optional cellsize for the high-res images in deg cellsize_lowres_deg : float, optional cellsize for the low-res images in deg fieldsize_highres : float, optional How many FWHM's shall the high-res images be. fieldsize_lowres : float, optional How many FWHM's shall the low-res images be. image_padding : float, optional How much padding shall we add to the padded image sizes. y_axis_stretch : float, optional How much shall the y-axis be stretched or compressed. Returns ------- result : dict Dict with the name of the generated mapfiles """ if not outmapname or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: outmapname and mapfile_dir are needed!' ) if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [ f.strip(' \'\"') for f in ms_input.strip('[]').split(',') ] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_into_freqBands: type of "ms_input" unknown!') cellsize_highres_deg = float(cellsize_highres_deg) cellsize_lowres_deg = float(cellsize_lowres_deg) fieldsize_highres = float(fieldsize_highres) fieldsize_lowres = float(fieldsize_lowres) image_padding = float(image_padding) y_axis_stretch = float(y_axis_stretch) msdict = {} for ms in ms_list: # group all MSs by frequency sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) msfreq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if msfreq in msdict: msdict[msfreq].append(ms) else: msdict[msfreq] = [ms] bands = [] print "InitSubtract_sort_and_compute.py: Putting files into bands." for MSkey in msdict.keys(): bands.append(Band(msdict[MSkey])) group_map = MultiDataMap() file_single_map = DataMap([]) high_size_map = DataMap([]) low_size_map = DataMap([]) high_paddedsize_map = DataMap([]) low_paddedsize_map = DataMap([]) numfiles = 0 for band in bands: print "InitSubtract_sort_and_compute.py: Working on Band:", band.name group_map.append(MultiDataProduct('localhost', band.files, False)) numfiles += len(band.files) for filename in band.files: file_single_map.append(DataProduct('localhost', filename, False)) (imsize_high_res, imsize_low_res) = band.get_image_sizes( cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres, fieldsize_lowres) imsize_high_res_stretch = band.get_optimum_size( int(imsize_high_res * y_axis_stretch)) high_size_map.append( DataProduct( 'localhost', str(imsize_high_res) + " " + str(imsize_high_res_stretch), False)) imsize_low_res_stretch = band.get_optimum_size( int(imsize_low_res * y_axis_stretch)) low_size_map.append( DataProduct( 'localhost', str(imsize_low_res) + " " + str(imsize_low_res_stretch), False)) imsize_high_pad = band.get_optimum_size( int(imsize_high_res * image_padding)) imsize_high_pad_stretch = band.get_optimum_size( int(imsize_high_res * image_padding * y_axis_stretch)) high_paddedsize_map.append( DataProduct( 'localhost', str(imsize_high_pad) + " " + str(imsize_high_pad_stretch), False)) imsize_low_pad = band.get_optimum_size( int(imsize_low_res * image_padding)) imsize_low_pad_stretch = band.get_optimum_size( int(imsize_low_res * image_padding * y_axis_stretch)) low_paddedsize_map.append( DataProduct( 'localhost', str(imsize_low_pad) + " " + str(imsize_low_pad_stretch), False)) print "InitSubtract_sort_and_compute.py: Computing averaging steps." (freqstep, timestep) = bands[0].get_averaging_steps() # get mapfiles for freqstep and timestep with the length of single_map freqstep_map = DataMap([]) timestep_map = DataMap([]) for index in xrange(numfiles): freqstep_map.append(DataProduct('localhost', str(freqstep), False)) timestep_map.append(DataProduct('localhost', str(timestep), False)) groupmapname = os.path.join(mapfile_dir, outmapname) group_map.save(groupmapname) file_single_mapname = os.path.join(mapfile_dir, outmapname + '_single') file_single_map.save(file_single_mapname) high_sizename = os.path.join(mapfile_dir, outmapname + '_high_size') high_size_map.save(high_sizename) low_sizename = os.path.join(mapfile_dir, outmapname + '_low_size') low_size_map.save(low_sizename) high_padsize_name = os.path.join(mapfile_dir, outmapname + '_high_padded_size') high_paddedsize_map.save(high_padsize_name) low_padsize_name = os.path.join(mapfile_dir, outmapname + '_low_padded_size') low_paddedsize_map.save(low_padsize_name) freqstepname = os.path.join(mapfile_dir, outmapname + '_freqstep') freqstep_map.save(freqstepname) timestepname = os.path.join(mapfile_dir, outmapname + '_timestep') timestep_map.save(timestepname) result = { 'groupmap': groupmapname, 'single_mapfile': file_single_mapname, 'high_size_mapfile': high_sizename, 'low_size_mapfile': low_sizename, 'high_padsize_mapfile': high_padsize_name, 'low_padsize_mapfile': low_padsize_name, 'freqstep': freqstepname, 'timestep': timestepname } return result
def test_run_dppp(self): """ This unittest border a functional test: framework is mucked by using an muckable function """ working_dir = "" time_slice_dir_path = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__), )) slices_per_image = 2 input_map = [("lce072", "test_file_path1"), ("lce072", "test_file_path2"), ("lce072", "test_file_path3"), ("lce072", "test_file_path4")] input_datamap = DataMap() for entry in input_map: input_datamap.append(entry) subbands_per_image = 2 collected_ms_dir_name = "" fp = open(os.path.join(self.test_path, "parset"), 'w') fp.write("key=value\n") fp.close() parset = os.path.join(self.test_path, "parset") ndppp = "" init_script = "" sut = ImagerPrepareTestWrapper() output = sut._run_dppp(working_dir, time_slice_dir_path, slices_per_image, input_datamap, subbands_per_image, collected_ms_dir_name, parset, ndppp) # The output should contain two timeslices ms prepended with the time_slice_dir_path expected_output = [ os.path.join(time_slice_dir_path, "time_slice_0.dppp.ms"), os.path.join(time_slice_dir_path, "time_slice_1.dppp.ms") ] self.assertTrue( output == expected_output, "_run_dppp did not return timeslice ms: {0} != {1}".format( output, expected_output)) # Two parset should be written in the time_slice_dir_path parset_1_content_expected = [ ('replace', 'uselogger', 'True'), ('replace', 'avg1.freqstep', '4'), ('replace', 'msin', "['test_file_path1', 'test_file_path2']"), ('replace', 'msout', '{0}'.format( os.path.join(time_slice_dir_path, "time_slice_0.dppp.ms"))) ] parset_1_output = eval(open(os.path.join(time_slice_dir_path, \ "time_slice_0.dppp.ms.ndppp.par")).read()) self.assertTrue( parset_1_output == parset_1_content_expected, "\n{0} != \n{1}".format(parset_1_output, parset_1_content_expected)) # Two parset should be written in the time_slice_dir_path parset_2_content_expected = [ ('replace', 'uselogger', 'True'), ('replace', 'avg1.freqstep', '4'), ('replace', 'msin', "['test_file_path3', 'test_file_path4']"), ('replace', 'msout', '{0}'.format( os.path.join(time_slice_dir_path, "time_slice_1.dppp.ms"))) ] parset_2_output = eval(open(os.path.join(time_slice_dir_path, \ "time_slice_1.dppp.ms.ndppp.par")).read()) self.assertTrue( parset_2_output == parset_2_content_expected, "\n{0} != \n{1}".format(parset_2_output, parset_2_content_expected)) shutil.rmtree(time_slice_dir_path)
def plugin_main(args, **kwargs): """ Makes a mapfile with the MSs spread across the full bandwidth Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile num: int, optional Number of frequencies in output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'num' in kwargs: num = int(kwargs['num']) else: num = 6 fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_selectDistFreqs: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} #sort into frequency groups freq_groups = {} hosts = [] for item in map_in: # Get the frequency info from the MS file sw = pt.table(item.file+'::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) # select frequencies freqs = freq_groups.keys() freqs.sort() num_freqs = len(freqs) if num > num_freqs: print 'PipelineStep_selectDistFreqs: fewer than %d frequency groups found, continuig with %d groups.'%(num, num_freqs) num = num_freqs dist_ind = get_distributed_indices(0, num_freqs-1, num) selfreqs = [freqs[ind] for ind in dist_ind] if len(selfreqs) < 1: print "PipelineStep_selectDistFreqs: Selected fewer than one frequency band." raise ValueError("Selected fewer than one frequency band.") all_files = [] for selfreq in selfreqs: all_files.extend(freq_groups[selfreq]) # extend the hosts-list for i in range(len(all_files)-len(hosts)): hosts.append(hosts[i]) # fill the output-map for (host,fname) in zip(hosts,all_files): map_out.append(DataProduct(host, fname, False)) map_out.save(fileid) del(map_in) del(map_out) result = {'mapfile': fileid} return result
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None, mergeLastGroup=False, truncateLastSBs=True): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile filename: str Name of output mapfile mapfile_dir : str Directory for output mapfile numSB : int, optional How many files should go into one frequency group. Values <= 0 mean put all files of the same time-step into one group. default = -1 hosts : list or str List of hostnames or string with list of hostnames NDPPPfill : bool, optional Add dummy file-names for missing frequencies, so that NDPPP can fill the data with flagged dummy data. default = True target_path : str, optional Change the path of the "groups" files to this. (I.e. write output files into this directory with the subsequent NDPPP call.) default = keep path of input files stepname : str, optional Add this step-name into the file-names of the output files. mergeLastGroup, truncateLastSBs : bool, optional mergeLastGroup = True, truncateLastSBs = True: not allowed mergeLastGroup = True, truncateLastSBs = False: put the files from the last group that doesn't have SBperGroup subbands into the second last group (which will then have more than SBperGroup entries). mergeLastGroup = False, truncateLastSBs = True: ignore last files, that don't make for a full group (not all files are used). mergeLastGroup = False, truncateLastSBs = False: keep inclomplete last group, or - with NDPPPfill=True - fill last group with dummies. Returns ------- result : dict Dict with the name of the generated mapfile """ if not filename or not mapfile_dir: raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!') if mergeLastGroup and truncateLastSBs: raise ValueError('sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!') if mergeLastGroup: raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!') if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_times_into_freqGroups: type of "ms_input" unknown!') if type(hosts) is str: hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] if not hosts: hosts = ['localhost'] numhosts = len(hosts) print "sort_times_into_freqGroups: Working on",len(ms_list),"files" time_groups = {} # sort by time for i, ms in enumerate(ms_list): # use the slower but more reliable way: obstable = pt.table(ms, ack=False) timestamp = int(round(np.min(obstable.getcol('TIME')))) #obstable = pt.table(ms+'::OBSERVATION', ack=False) #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) obstable.close() if timestamp in time_groups: time_groups[timestamp]['files'].append(ms) else: time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] } print "sort_times_into_freqGroups: found",len(time_groups),"time-groups" # sort time-groups by frequency timestamps = time_groups.keys() timestamps.sort() # not needed now, but later first = True nchans = 0 for time in timestamps: freqs = [] for ms in time_groups[time]['files']: # Get the frequency info sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False) freq = sw.col('REF_FREQUENCY')[0] if first: freq_width = sw.col('TOTAL_BANDWIDTH')[0] nchans = sw.col('CHAN_WIDTH')[0].shape[0] chwidth = sw.col('CHAN_WIDTH')[0][0] maxfreq = freq minfreq = freq first = False else: assert freq_width == sw.col('TOTAL_BANDWIDTH')[0] assert nchans == sw.col('CHAN_WIDTH')[0].shape[0] assert chwidth == sw.col('CHAN_WIDTH')[0][0] maxfreq = max(maxfreq,freq) minfreq = min(minfreq,freq) freqs.append(freq) sw.close() time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files']) time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) #time_groups[time]['files'] = [name for (freq,name) in freq_names] #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] print "sort_times_into_freqGroups: Collected the frequencies for the time-groups" #the new output map filemap = MultiDataMap() groupmap = DataMap() maxfreq = maxfreq+freq_width/2. minfreq = minfreq-freq_width/2. numFiles = round((maxfreq-minfreq)/freq_width) if numSB > 0: if truncateLastSBs: ngroups = int(np.floor(numFiles/numSB)) else: ngroups = int(np.ceil(numFiles/numSB)) else: ngroups = 1 numSB = int(numFiles) hostID = 0 for time in timestamps: (freq,fname) = time_groups[time]['freq_names'].pop(0) for fgroup in range(ngroups): files = [] skip_this = True for fIdx in range(numSB): if freq > (fIdx+fgroup*numSB+1)*freq_width+minfreq: if NDPPPfill: files.append('dummy.ms') else: files.append(fname) if len(time_groups[time]['freq_names'])>0: (freq,fname) = time_groups[time]['freq_names'].pop(0) else: (freq,fname) = (1e12,'This_shouldn\'t_show_up') skip_this = False filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this)) groupname = time_groups[time]['basename']+'_%Xt_%dg.ms'%(time,fgroup) if type(stepname) is str: groupname += stepname if type(target_path) is str: groupname = os.path.join(target_path,os.path.basename(groupname)) groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this)) assert freq==1e12 filemapname = os.path.join(mapfile_dir, filename) filemap.save(filemapname) groupmapname = os.path.join(mapfile_dir, filename+'_groups') groupmap.save(groupmapname) # genertate map with edge-channels to flag flagmap = _calc_edge_chans(filemapname, nchans) flagmapname = os.path.join(mapfile_dir, filename+'_flags') flagmap.save(flagmapname) result = {'mapfile': filemapname, 'groupmapfile': groupmapname, 'flagmapfile': flagmapname} return result
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, enforce_numSB=True, hosts=None, NDPPPfill=True, target_path=None, stepname=None, nband_pad=0, make_dummy_files=False, skip_flagged_groups=True): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile filename: str Name of output mapfile mapfile_dir : str Directory for output mapfile numSB : int, optional How many files should go into one frequency group. Values <= 0 mean put all files of the same time-step into one group. default = -1 enforce_numSB : bool, optional If True and numSB > 0, then add flagged dummy data to ensure that the last block has exactly numSB files. If False, then the last block can have fewer files (as long as there are no gaps in frequency) hosts : list or str List of hostnames or string with list of hostnames NDPPPfill : bool, optional Add dummy file-names for missing frequencies, so that NDPPP can fill the data with flagged dummy data. default = True target_path : str, optional Change the path of the "groups" files to this. (I.e. write output files into this directory with the subsequent NDPPP call.) default = keep path of input files stepname : str, optional Add this step-name into the file-names of the output files nband_pad : int, optional Add this number of bands of dummy data to the high-frequency end of the list make_dummy_files : bool, optional If True, make MS files for all dummy data skip_flagged_groups : bool, optional If True, groups that are missing have their skip flag set to True. If False, these groups are filled with dummy data and their skip flag set to False Returns ------- result : dict Dict with the name of the generated mapfile """ if not filename or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: filename and mapfile_dir are needed!') # convert input to needed types ms_list = input2strlist(ms_input) NDPPPfill = input2bool(NDPPPfill) numSB = int(numSB) nband_pad = int(nband_pad) enforce_numSB = input2bool(enforce_numSB) make_dummy_files = input2bool(make_dummy_files) skip_flagged_groups = input2bool(skip_flagged_groups) if type(hosts) is str: hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] if not hosts: hosts = ['localhost'] numhosts = len(hosts) print "sort_times_into_freqGroups: Working on", len(ms_list), "files" dirname = os.path.dirname(ms_list[0]) time_groups = {} # sort by time for i, ms in enumerate(ms_list): # use the slower but more reliable way: obstable = pt.table(ms, ack=False) timestamp = int(round(np.min(obstable.getcol('TIME')))) #obstable = pt.table(ms+'::OBSERVATION', ack=False) #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) obstable.close() if timestamp in time_groups: time_groups[timestamp]['files'].append(ms) else: time_groups[timestamp] = { 'files': [ms], 'basename': os.path.splitext(ms)[0] } print "sort_times_into_freqGroups: found", len(time_groups), "time-groups" # sort time-groups by frequency timestamps = time_groups.keys() timestamps.sort() # not needed now, but later first = True for time in timestamps: freqs = [] for ms in time_groups[time]['files']: # Get the frequency info sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) freq = sw.col('REF_FREQUENCY')[0] if first: freq_width = sw.col('TOTAL_BANDWIDTH')[0] maxfreq = freq minfreq = freq first = False else: assert freq_width == sw.col('TOTAL_BANDWIDTH')[0] maxfreq = max(maxfreq, freq) minfreq = min(minfreq, freq) freqs.append(freq) sw.close() time_groups[time]['freq_names'] = zip(freqs, time_groups[time]['files']) time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) #time_groups[time]['files'] = [name for (freq,name) in freq_names] #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] print "sort_times_into_freqGroups: Collected the frequencies for the time-groups" #the new output map filemap = MultiDataMap() groupmap = DataMap() maxfreq = maxfreq + freq_width / 2. minfreq = minfreq - freq_width / 2. numFiles = round((maxfreq - minfreq) / freq_width) if numSB > 0: ngroups = int(np.ceil(numFiles / numSB)) else: ngroups = 1 numSB = int(numFiles) hostID = 0 for time in timestamps: (freq, fname) = time_groups[time]['freq_names'].pop(0) nbands = 0 all_group_files = [] for fgroup in range(ngroups): files = [] skip_this = True for fIdx in range(numSB): thisfreq = (fIdx + fgroup * numSB + 1) * freq_width + minfreq if freq > thisfreq: if enforce_numSB or thisfreq - freq_width / 2. < maxfreq: files.append('dummy.ms') else: files.append(fname) skip_this = False if len(time_groups[time]['freq_names']) > 0: (freq, fname) = time_groups[time]['freq_names'].pop(0) else: # Set freq to high value to pad the rest of the group # with dummy data (freq, fname) = (1e12, 'This_shouldn\'t_show_up') if fgroup == ngroups - 1: # Append dummy data to last frequency group only for i in range(nband_pad): files.append('dummy.ms') if not skip_this: nbands += len(files) if make_dummy_files: for i, ms in enumerate(files): if ms == 'dummy.ms': # Replace dummy.ms in files list with new filename files[i] = os.path.join( dirname, '{0}_{1}.ms'.format( os.path.splitext(ms)[0], uuid.uuid4().urn.split('-')[-1])) if not skip_flagged_groups: # Don't set skip flag to True, even if group is missing all files if not make_dummy_files: raise ValueError( 'skip_flagged_groups cannot be False if make_dummy_files is also False' ) else: skip_this = False filemap.append( MultiDataProduct(hosts[hostID % numhosts], files, skip_this)) groupname = time_groups[time]['basename'] + '_%Xt_%dg.ms' % ( time, fgroup) if type(stepname) is str: groupname += stepname if type(target_path) is str: groupname = os.path.join(target_path, os.path.basename(groupname)) groupmap.append( DataProduct(hosts[hostID % numhosts], groupname, skip_this)) hostID += 1 all_group_files.extend(files) assert freq == 1e12 if make_dummy_files: # Find at least one existing ms for this timestamp ms_exists = None for ms in all_group_files: if os.path.exists(ms): ms_exists = ms sw = pt.table('{}::SPECTRAL_WINDOW'.format(ms)) ms_exists_ref_freq = sw.getcol('REF_FREQUENCY')[0] sw.close() break for i, ms in enumerate(all_group_files): if 'dummy' in ms: # Alter SPECTRAL_WINDOW subtable as appropriate to fill gap ref_freq = minfreq + freq_width * (i + 0.5) pt.tableutil.tablecopy(ms_exists, ms) sw = pt.table('{}::SPECTRAL_WINDOW'.format(ms), readonly=False) chan_freq = sw.getcol( 'CHAN_FREQ') - ms_exists_ref_freq + ref_freq sw.putcol('REF_FREQUENCY', ref_freq) sw.putcol('CHAN_FREQ', chan_freq) sw.close() # Flag all data t = pt.table(ms, readonly=False) t.putcol('FLAG_ROW', np.ones(len(t), dtype=bool)) f = t.getcol('FLAG') t.putcol('FLAG', np.ones(f.shape, dtype=bool)) t.close() filemapname = os.path.join(mapfile_dir, filename) filemap.save(filemapname) groupmapname = os.path.join(mapfile_dir, filename + '_groups') groupmap.save(groupmapname) result = { 'mapfile': filemapname, 'groupmapfile': groupmapname, 'nbands': nbands } return result
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1.): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile outmapname: str Name of output mapfile mapfile_dir : str Directory for output mapfile cellsize_highres_deg : float, optional cellsize for the high-res images in deg cellsize_lowres_deg : float, optional cellsize for the low-res images in deg fieldsize_highres : float, optional How many FWHM's shall the high-res images be. fieldsize_lowres : float, optional How many FWHM's shall the low-res images be. image_padding : float, optional How much padding shall we add to the padded image sizes. y_axis_stretch : float, optional How much shall the y-axis be stretched or compressed. Returns ------- result : dict Dict with the name of the generated mapfiles """ if not outmapname or not mapfile_dir: raise ValueError('sort_times_into_freqGroups: outmapname and mapfile_dir are needed!') if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_into_freqBands: type of "ms_input" unknown!') cellsize_highres_deg = float(cellsize_highres_deg) cellsize_lowres_deg = float(cellsize_lowres_deg) fieldsize_highres = float(fieldsize_highres) fieldsize_lowres = float(fieldsize_lowres) image_padding = float(image_padding) y_axis_stretch = float(y_axis_stretch) msdict = {} for ms in ms_list: # group all MSs by frequency sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False) msfreq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if msfreq in msdict: msdict[msfreq].append(ms) else: msdict[msfreq] = [ms] bands = [] print "InitSubtract_sort_and_compute.py: Putting files into bands." for MSkey in msdict.keys(): bands.append( Band(msdict[MSkey]) ) group_map = MultiDataMap() file_single_map = DataMap([]) high_size_map = DataMap([]) low_size_map = DataMap([]) high_paddedsize_map = DataMap([]) low_paddedsize_map = DataMap([]) numfiles = 0 for band in bands: print "InitSubtract_sort_and_compute.py: Working on Band:",band.name group_map.append(MultiDataProduct('localhost', band.files, False)) numfiles += len(band.files) for filename in band.files: file_single_map.append(DataProduct('localhost', filename, False)) (imsize_high_res, imsize_low_res) = band.get_image_sizes(cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres, fieldsize_lowres) imsize_high_res_stretch = band.get_optimum_size(int(imsize_high_res*y_axis_stretch)) high_size_map.append(DataProduct('localhost', str(imsize_high_res)+" "+str(imsize_high_res_stretch), False)) imsize_low_res_stretch = band.get_optimum_size(int(imsize_low_res*y_axis_stretch)) low_size_map.append(DataProduct('localhost', str(imsize_low_res)+" "+str(imsize_low_res_stretch), False)) imsize_high_pad = band.get_optimum_size(int(imsize_high_res*image_padding)) imsize_high_pad_stretch = band.get_optimum_size(int(imsize_high_res*image_padding*y_axis_stretch)) high_paddedsize_map.append(DataProduct('localhost', str(imsize_high_pad)+" "+str(imsize_high_pad_stretch), False)) imsize_low_pad = band.get_optimum_size(int(imsize_low_res*image_padding)) imsize_low_pad_stretch = band.get_optimum_size(int(imsize_low_res*image_padding*y_axis_stretch)) low_paddedsize_map.append(DataProduct('localhost', str(imsize_low_pad)+" "+str(imsize_low_pad_stretch), False)) print "InitSubtract_sort_and_compute.py: Computing averaging steps." (freqstep, timestep) = bands[0].get_averaging_steps() # get mapfiles for freqstep and timestep with the length of single_map freqstep_map = DataMap([]) timestep_map = DataMap([]) for index in xrange(numfiles): freqstep_map.append(DataProduct('localhost', str(freqstep), False)) timestep_map.append(DataProduct('localhost', str(timestep), False)) groupmapname = os.path.join(mapfile_dir, outmapname) group_map.save(groupmapname) file_single_mapname = os.path.join(mapfile_dir, outmapname+'_single') file_single_map.save(file_single_mapname) high_sizename = os.path.join(mapfile_dir, outmapname+'_high_size') high_size_map.save(high_sizename) low_sizename = os.path.join(mapfile_dir, outmapname+'_low_size') low_size_map.save(low_sizename) high_padsize_name = os.path.join(mapfile_dir, outmapname+'_high_padded_size') high_paddedsize_map.save(high_padsize_name) low_padsize_name = os.path.join(mapfile_dir, outmapname+'_low_padded_size') low_paddedsize_map.save(low_padsize_name) freqstepname = os.path.join(mapfile_dir, outmapname+'_freqstep') freqstep_map.save(freqstepname) timestepname = os.path.join(mapfile_dir, outmapname+'_timestep') timestep_map.save(timestepname) result = {'groupmap': groupmapname, 'single_mapfile' : file_single_mapname, 'high_size_mapfile' : high_sizename, 'low_size_mapfile' : low_sizename, 'high_padsize_mapfile' : high_padsize_name, 'low_padsize_mapfile' : low_padsize_name, 'freqstep' : freqstepname, 'timestep' : timestepname} return result
def plugin_main(args, **kwargs): """ Matches a mapfile with one in which the MSs are distributed Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dist : str Filename of mapfile with distributed MS files mapfile_full : str Filename of mapfile with all MS files from which distributed one was made mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile num: int, optional Number of frequencies in output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dist = kwargs['mapfile_dist'] mapfile_full = kwargs['mapfile_full'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_dist = DataMap.load(mapfile_dist) map_dist.iterator = DataMap.SkipIterator map_full = DataMap.load(mapfile_full) map_full.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_matchDistFreqs: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} # find matches all_files_hosts = [(item.file, item.host) for item in map_full] dist_files = [item.file for item in map_dist] for i, (f, h) in enumerate(all_files_hosts): if f in dist_files: map_out.append(DataProduct(h, map_in[i].file, False)) map_out.save(fileid) del(map_in) del(map_out) result = {'mapfile': fileid} return result
def test_run_dppp(self): """ This unittest border a functional test: framework is mucked by using an muckable function """ working_dir = "" time_slice_dir_path = tempfile.mkdtemp(suffix=".%s" % (os.path.basename(__file__),)) slices_per_image = 2 input_map = [("lce072", "test_file_path1"), ("lce072", "test_file_path2"), ("lce072", "test_file_path3"), ("lce072", "test_file_path4")] input_datamap = DataMap() for entry in input_map: input_datamap.append(entry) subbands_per_image = 2 collected_ms_dir_name = "" fp = open(os.path.join(self.test_path, "parset"), 'w') fp.write("key=value\n") fp.close() parset = os.path.join(self.test_path, "parset") ndppp = "" init_script = "" sut = ImagerPrepareTestWrapper() output = sut._run_dppp(working_dir, time_slice_dir_path, slices_per_image, input_datamap, subbands_per_image, collected_ms_dir_name, parset, ndppp) # The output should contain two timeslices ms prepended with the time_slice_dir_path expected_output = [os.path.join(time_slice_dir_path, "time_slice_0.dppp.ms"), os.path.join(time_slice_dir_path, "time_slice_1.dppp.ms")] self.assertTrue(output == expected_output, "_run_dppp did not return timeslice ms: {0} != {1}".format(output, expected_output)) # Two parset should be written in the time_slice_dir_path parset_1_content_expected = [('replace', 'uselogger', 'True'), ('replace', 'avg1.freqstep', '4'), ('replace', 'msin', "['test_file_path1', 'test_file_path2']"), ('replace', 'msout', '{0}'.format( os.path.join(time_slice_dir_path, "time_slice_0.dppp.ms")))] parset_1_output = eval(open(os.path.join(time_slice_dir_path, \ "time_slice_0.dppp.ms.ndppp.par")).read()) self.assertTrue(parset_1_output == parset_1_content_expected, "\n{0} != \n{1}".format(parset_1_output, parset_1_content_expected)) # Two parset should be written in the time_slice_dir_path parset_2_content_expected = [('replace', 'uselogger', 'True'), ('replace', 'avg1.freqstep', '4'), ('replace', 'msin', "['test_file_path3', 'test_file_path4']"), ('replace', 'msout', '{0}'.format( os.path.join(time_slice_dir_path, "time_slice_1.dppp.ms")))] parset_2_output = eval(open(os.path.join(time_slice_dir_path, \ "time_slice_1.dppp.ms.ndppp.par")).read()) self.assertTrue(parset_2_output == parset_2_content_expected, "\n{0} != \n{1}".format(parset_2_output, parset_2_content_expected)) shutil.rmtree(time_slice_dir_path)
def plugin_main(args, **kwargs): """ Matches a mapfile with one in which the MSs are distributed Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dist : str Filename of mapfile with distributed MS files mapfile_full : str Filename of mapfile with all MS files from which distributed one was made mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile num: int, optional Number of frequencies in output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dist = kwargs['mapfile_dist'] mapfile_full = kwargs['mapfile_full'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_dist = DataMap.load(mapfile_dist) map_dist.iterator = DataMap.SkipIterator map_full = DataMap.load(mapfile_full) map_full.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_matchDistFreqs: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} # find matches all_files_hosts = [(item.file, item.host) for item in map_full] dist_files = [item.file for item in map_dist] for i, (f, h) in enumerate(all_files_hosts): if f in dist_files: map_out.append(DataProduct(h, map_in[i].file, False)) map_out.save(fileid) del (map_in) del (map_out) result = {'mapfile': fileid} return result
def plugin_main(args, **kwargs): """ Makes a mapfile with the MSs spread across the full bandwidth Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile num: int, optional Number of frequencies in output mapfile Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] if 'num' in kwargs: num = int(kwargs['num']) else: num = 6 fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_out = DataMap() map_out.data = [] map_out._data = [] # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_selectDistFreqs: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} #sort into frequency groups freq_groups = {} hosts = [] for item in map_in: # Get the frequency info from the MS file sw = pt.table(item.file + '::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) # select frequencies freqs = freq_groups.keys() freqs.sort() num_freqs = len(freqs) if num > num_freqs: print 'PipelineStep_selectDistFreqs: fewer than %d frequency groups found, continuig with %d groups.' % ( num, num_freqs) num = num_freqs dist_ind = get_distributed_indices(0, num_freqs - 1, num) selfreqs = [freqs[ind] for ind in dist_ind] if len(selfreqs) < 1: print "PipelineStep_selectDistFreqs: Selected fewer than one frequency band." raise ValueError("Selected fewer than one frequency band.") all_files = [] for selfreq in selfreqs: all_files.extend(freq_groups[selfreq]) # extend the hosts-list for i in range(len(all_files) - len(hosts)): hosts.append(hosts[i]) # fill the output-map for (host, fname) in zip(hosts, all_files): map_out.append(DataProduct(host, fname, False)) map_out.save(fileid) del (map_in) del (map_out) result = {'mapfile': fileid} return result
def main(ms_input, outmapname=None, mapfile_dir=None, cellsize_highres_deg=0.00208, cellsize_lowres_deg=0.00694, fieldsize_highres=2.5, fieldsize_lowres=6.5, image_padding=1., y_axis_stretch=1.): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile outmapname: str Name of output mapfile mapfile_dir : str Directory for output mapfile cellsize_highres_deg : float, optional cellsize for the high-res images in deg cellsize_lowres_deg : float, optional cellsize for the low-res images in deg fieldsize_highres : float, optional How many FWHM's shall the high-res images be. fieldsize_lowres : float, optional How many FWHM's shall the low-res images be. image_padding : float, optional How much padding shall we add to the padded image sizes. y_axis_stretch : float, optional How much shall the y-axis be stretched or compressed. Returns ------- result : dict Dict with the name of the generated mapfiles """ if not outmapname or not mapfile_dir: raise ValueError('sort_times_into_freqGroups: outmapname and mapfile_dir are needed!') if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [f.strip(' \'\"') for f in ms_input.strip('[]').split(',')] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError('sort_into_freqBands: type of "ms_input" unknown!') cellsize_highres_deg = float(cellsize_highres_deg) cellsize_lowres_deg = float(cellsize_lowres_deg) fieldsize_highres = float(fieldsize_highres) fieldsize_lowres = float(fieldsize_lowres) image_padding = float(image_padding) y_axis_stretch = float(y_axis_stretch) msdict = {} for ms in ms_list: # group all MSs by frequency sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False) msfreq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if msfreq in msdict: msdict[msfreq].append(ms) else: msdict[msfreq] = [ms] bands = [] bandfreqs = [] print "InitSubtract_sort_and_compute.py: Putting files into bands." for MSkey in msdict.keys(): bands.append( Band(msdict[MSkey]) ) bandfreqs.append( Band(msdict[MSkey]).freq ) ## min freq gives largest image size for deep image bandfreqs = np.array(bandfreqs) minfreq = np.min(bandfreqs) bandmin = np.argmin(bandfreqs) ## need to map the output from wsclean channels to the right frequencies ## just put the bands in the right freq order wsclean_channum = np.argsort(bandfreqs) bands = np.array(bands) bands = bands[wsclean_channum] #minfreq = 1e9 #for ib, band in enumerate(bands): #if band.freq < minfreq: #minfreq = band.freq #bandmin = ib group_map = MultiDataMap() file_single_map = DataMap([]) high_size_map = DataMap([]) low_size_map = DataMap([]) high_paddedsize_map = DataMap([]) low_paddedsize_map = DataMap([]) numfiles = 0 nbands = len(bands) if nbands > 8: nchansout_clean1 = np.int(nbands/4) elif nbands > 4: nchansout_clean1 = np.int(nbands/2) else: nchansout_clean1 = np.int(nbands) (freqstep, timestep) = bands[0].get_averaging_steps() int_time_sec = self.timestep_sec * timestep nwavelengths_high = bands[0].get_nwavelengths(cellsize_highres_deg, int_time_sec) nwavelengths_low = bands[0].get_nwavelengths(cellsize_lowres_deg, int_time_sec) for band in bands: print "InitSubtract_sort_and_compute.py: Working on Band:",band.name group_map.append(MultiDataProduct('localhost', band.files, False)) numfiles += len(band.files) for filename in band.files: file_single_map.append(DataProduct('localhost', filename, False)) (imsize_high_res, imsize_low_res) = band.get_image_sizes(cellsize_highres_deg, cellsize_lowres_deg, fieldsize_highres, fieldsize_lowres) imsize_high_res_stretch = band.get_optimum_size(int(imsize_high_res*y_axis_stretch)) high_size_map.append(DataProduct('localhost', str(imsize_high_res)+" "+str(imsize_high_res_stretch), False)) imsize_low_res_stretch = band.get_optimum_size(int(imsize_low_res*y_axis_stretch)) low_size_map.append(DataProduct('localhost', str(imsize_low_res)+" "+str(imsize_low_res_stretch), False)) imsize_high_pad = band.get_optimum_size(int(imsize_high_res*image_padding)) imsize_high_pad_stretch = band.get_optimum_size(int(imsize_high_res*image_padding*y_axis_stretch)) high_paddedsize_map.append(DataProduct('localhost', str(imsize_high_pad)+" "+str(imsize_high_pad_stretch), False)) imsize_low_pad = band.get_optimum_size(int(imsize_low_res*image_padding)) imsize_low_pad_stretch = band.get_optimum_size(int(imsize_low_res*image_padding*y_axis_stretch)) low_paddedsize_map.append(DataProduct('localhost', str(imsize_low_pad)+" "+str(imsize_low_pad_stretch), False)) print band.freq/1e6, imsize_high_res, imsize_high_res_stretch, imsize_high_pad, imsize_high_pad_stretch, imsize_low_res, imsize_low_res_stretch, imsize_low_pad, imsize_low_pad_stretch, nwavelengths_high, nwavelengths_low if band.freq == minfreq: deep_imsize_high_res = imsize_high_res deep_imsize_high_res_stretch = imsize_high_res_stretch deep_imsize_high_pad = imsize_high_pad deep_imsize_high_pad_stretch = imsize_high_pad_stretch deep_imsize_low_res = imsize_low_res deep_imsize_low_res_stretch = imsize_low_res_stretch deep_imsize_low_pad = imsize_low_pad deep_imsize_low_pad_stretch = imsize_low_pad_stretch print '*', band.freq/1e6, imsize_high_res, imsize_high_res_stretch, imsize_high_pad, imsize_high_pad_stretch, imsize_low_res, imsize_low_res_stretch, imsize_low_pad, imsize_low_pad_stretch deep_high_size_map = DataMap([DataProduct('localhost', str(deep_imsize_high_res)+" "+str(deep_imsize_high_res_stretch), False)]) deep_high_paddedsize_map = DataMap([DataProduct('localhost', str(deep_imsize_high_pad)+" "+str(deep_imsize_high_pad_stretch), False)]) deep_low_size_map = DataMap([DataProduct('localhost', str(deep_imsize_low_res)+" "+str(deep_imsize_low_res_stretch), False)]) deep_low_paddedsize_map = DataMap([DataProduct('localhost', str(deep_imsize_low_pad)+" "+str(deep_imsize_low_pad_stretch), False)]) nbands_map = DataMap([DataProduct('localhost', str(nbands), False)]) nchansout_clean1_map = DataMap([DataProduct('localhost', str(nchansout_clean1), False)]) print "InitSubtract_sort_and_compute.py: Computing averaging steps." # get mapfiles for freqstep and timestep with the length of single_map freqstep_map = DataMap([]) timestep_map = DataMap([]) nwavelengths_high_map = DataMap([]) nwavelengths_low_map = DataMap([]) for index in xrange(numfiles): freqstep_map.append(DataProduct('localhost', str(freqstep), False)) timestep_map.append(DataProduct('localhost', str(timestep), False)) nwavelengths_high_map.append(DataProduct('localhost', str(nwavelengths_high), False)) nwavelengths_low_map.append(DataProduct('localhost', str(nwavelengths_low), False)) groupmapname = os.path.join(mapfile_dir, outmapname) group_map.save(groupmapname) file_single_mapname = os.path.join(mapfile_dir, outmapname+'_single') file_single_map.save(file_single_mapname) high_sizename = os.path.join(mapfile_dir, outmapname+'_high_size') high_size_map.save(high_sizename) low_sizename = os.path.join(mapfile_dir, outmapname+'_low_size') low_size_map.save(low_sizename) high_padsize_name = os.path.join(mapfile_dir, outmapname+'_high_padded_size') high_paddedsize_map.save(high_padsize_name) low_padsize_name = os.path.join(mapfile_dir, outmapname+'_low_padded_size') low_paddedsize_map.save(low_padsize_name) deep_high_sizename = os.path.join(mapfile_dir, outmapname+'_deep_high_size') deep_high_size_map.save(deep_high_sizename) deep_low_sizename = os.path.join(mapfile_dir, outmapname+'_deep_low_size') deep_low_size_map.save(deep_low_sizename) deep_high_padsize_name = os.path.join(mapfile_dir, outmapname+'_deep_high_padded_size') deep_high_paddedsize_map.save(deep_high_padsize_name) deep_low_padsize_name = os.path.join(mapfile_dir, outmapname+'_deep_low_padded_size') deep_low_paddedsize_map.save(deep_low_padsize_name) nbands_mapname = os.path.join(mapfile_dir, outmapname+'_nbands') nbands_map.save(nbands_mapname) nchansout_clean1_mapname = os.path.join(mapfile_dir, outmapname+'_nchansout_clean1') nchansout_clean1_map.save(nchansout_clean1_mapname) freqstepname = os.path.join(mapfile_dir, outmapname+'_freqstep') freqstep_map.save(freqstepname) timestepname = os.path.join(mapfile_dir, outmapname+'_timestep') timestep_map.save(timestepname) nwavelengths_high_name = os.path.join(mapfile_dir, outmapname+'_nwavelengths_high') nwavelengths_high_map.save(nwavelengths_high_name) nwavelengths_low_name = os.path.join(mapfile_dir, outmapname+'_nwavelengths_low') nwavelengths_low_map.save(nwavelengths_low_name) result = {'groupmap': groupmapname, 'single_mapfile' : file_single_mapname, 'high_size_mapfile' : high_sizename, 'low_size_mapfile' : low_sizename, 'high_padsize_mapfile' : high_padsize_name, 'low_padsize_mapfile' : low_padsize_name, 'deep_high_size_mapfile' : deep_high_sizename, 'deep_low_size_mapfile' : deep_low_sizename, 'deep_high_padsize_mapfile' : deep_high_padsize_name, 'deep_low_padsize_mapfile' : deep_low_padsize_name, 'nbands' : nbands_mapname, 'nchansout_clean1' : nchansout_clean1_mapname, 'freqstep' : freqstepname, 'timestep' : timestepname, 'nwavelengths_high_mapfile': nwavelengths_high_name, 'nwavelengths_low_mapfile': nwavelengths_low_name} return result
def plugin_main(args, **kwargs): """ Makes a mapfile with only the MSs at the middle Frequency Quite a bit of a hack for a plugin, but right now I don't care. Parameters ---------- mapfile_in : str Filename of datamap containing MS files mapfile_dir : str Directory for output mapfile filename: str Name of output mapfile index: int, optional Index of the frequency band to use. Returns ------- result : dict New parmdb datamap filename """ mapfile_in = kwargs['mapfile_in'] mapfile_dir = kwargs['mapfile_dir'] filename = kwargs['filename'] fileid = os.path.join(mapfile_dir, filename) map_in = DataMap.load(mapfile_in) map_in.iterator = DataMap.SkipIterator map_out = DataMap() # do not re-run if we already ran, and input files are deleted. if os.path.exists(fileid) and not os.path.exists(map_in[0].file): print 'PipelineStep_selectMiddleFreq: Not re-running because output file exists, but input files don\'t!' return {'mapfile': fileid} #sort into frequency groups freq_groups = {} hosts = [] for item in map_in: # Get the frequency info sw = pt.table(item.file+'::SPECTRAL_WINDOW', ack=False) freq = int(sw.col('REF_FREQUENCY')[0]) sw.close() if freq in freq_groups: freq_groups[freq].append(item.file) else: freq_groups[freq] = [item.file] if not item.host in hosts: hosts.append(item.host) # find maximum number of files per frequency-group maxfiles = max([len(group) for group in freq_groups.values()]) # find the center-frequency freqs = freq_groups.keys() freqs.sort() selfreq = freqs[len(freqs)/2] if 'index' in kwargs: selfreq = int(kwargs['index']) else: # make sure that chosen frequncy has maxfiles entries while len(freq_groups[selfreq]) < maxfiles: freqs.remove(selfreq) selfreq = freqs[len(freqs)/2] # extend the hosts-list for i in range(len(freq_groups[selfreq])-len(hosts)): hosts.append(hosts[i]) # fill the output-map for (host,fname) in zip(hosts,freq_groups[selfreq]): map_out.append(DataProduct(host, fname, False)) map_out.save(fileid) result = {'mapfile': fileid} return result
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None, mergeLastGroup=False, truncateLastSBs=True, firstSB=None): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile filename: str Name of output mapfile mapfile_dir : str Directory for output mapfile numSB : int, optional How many files should go into one frequency group. Values <= 0 mean put all files of the same time-step into one group. default = -1 hosts : list or str List of hostnames or string with list of hostnames NDPPPfill : bool, optional Add dummy file-names for missing frequencies, so that NDPPP can fill the data with flagged dummy data. default = True target_path : str, optional Change the path of the "groups" files to this. (I.e. write output files into this directory with the subsequent NDPPP call.) default = keep path of input files stepname : str, optional Add this step-name into the file-names of the output files. mergeLastGroup, truncateLastSBs : bool, optional mergeLastGroup = True, truncateLastSBs = True: not allowed mergeLastGroup = True, truncateLastSBs = False: put the files from the last group that doesn't have SBperGroup subbands into the second last group (which will then have more than SBperGroup entries). mergeLastGroup = False, truncateLastSBs = True: ignore last files, that don't make for a full group (not all files are used). mergeLastGroup = False, truncateLastSBs = False: keep inclomplete last group, or - with NDPPPfill=True - fill last group with dummies. firstSB : int, optional If set, then reference the grouping of files to this station-subband. As if a file with this station-subband would be included in the input files. (For HBA-low, i.e. 0 -> 100MHz, 55 -> 110.74MHz, 512 -> 200MHz) Returns ------- result : dict Dict with the name of the generated mapfile """ NDPPPfill = input2bool(NDPPPfill) mergeLastGroup = input2bool(mergeLastGroup) truncateLastSBs = input2bool(truncateLastSBs) firstSB = input2int(firstSB) numSB = int(numSB) if not filename or not mapfile_dir: raise ValueError( 'sort_times_into_freqGroups: filename and mapfile_dir are needed!') if mergeLastGroup and truncateLastSBs: raise ValueError( 'sort_times_into_freqGroups: Can either merge the last partial group or truncate at last full group, not both!' ) # if mergeLastGroup: # raise ValueError('sort_times_into_freqGroups: mergeLastGroup is not (yet) implemented!') if type(ms_input) is str: if ms_input.startswith('[') and ms_input.endswith(']'): ms_list = [ f.strip(' \'\"') for f in ms_input.strip('[]').split(',') ] else: map_in = DataMap.load(ms_input) map_in.iterator = DataMap.SkipIterator ms_list = [] for fname in map_in: if fname.startswith('[') and fname.endswith(']'): for f in fname.strip('[]').split(','): ms_list.append(f.strip(' \'\"')) else: ms_list.append(fname.strip(' \'\"')) elif type(ms_input) is list: ms_list = [str(f).strip(' \'\"') for f in ms_input] else: raise TypeError( 'sort_times_into_freqGroups: type of "ms_input" unknown!') if type(hosts) is str: hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] if not hosts: hosts = ['localhost'] numhosts = len(hosts) print("sort_times_into_freqGroups: Working on", len(ms_list), "files (including flagged files).") time_groups = {} # sort by time for i, ms in enumerate(ms_list): # work only on files selected by a previous step if ms.lower() != 'none': # use the slower but more reliable way: obstable = pt.table(ms, ack=False) timestamp = int(round(np.min(obstable.getcol('TIME')))) #obstable = pt.table(ms+'::OBSERVATION', ack=False) #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) obstable.close() if timestamp in time_groups: time_groups[timestamp]['files'].append(ms) else: time_groups[timestamp] = { 'files': [ms], 'basename': os.path.splitext(ms)[0] } print("sort_times_into_freqGroups: found", len(time_groups), "time-groups") # sort time-groups by frequency timestamps = list(time_groups.keys()) timestamps.sort() # not needed now, but later first = True nchans = 0 for time in timestamps: freqs = [] for ms in time_groups[time]['files']: # Get the frequency info sw = pt.table(ms + '::SPECTRAL_WINDOW', ack=False) freq = sw.col('REF_FREQUENCY')[0] if first: file_bandwidth = sw.col('TOTAL_BANDWIDTH')[0] nchans = sw.col('CHAN_WIDTH')[0].shape[0] chwidth = sw.col('CHAN_WIDTH')[0][0] freqset = set([freq]) first = False else: assert file_bandwidth == sw.col('TOTAL_BANDWIDTH')[0] assert nchans == sw.col('CHAN_WIDTH')[0].shape[0] assert chwidth == sw.col('CHAN_WIDTH')[0][0] freqset.add(freq) freqs.append(freq) sw.close() time_groups[time]['freq_names'] = list( zip(freqs, time_groups[time]['files'])) time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) #time_groups[time]['files'] = [name for (freq,name) in freq_names] #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] print( "sort_times_into_freqGroups: Collected the frequencies for the time-groups" ) freqliste = np.array(list(freqset)) freqliste.sort() freq_width = np.min(freqliste[1:] - freqliste[:-1]) if file_bandwidth > freq_width: raise ValueError( "Bandwidth of files is larger than minimum frequency step between two files!" ) if file_bandwidth < (freq_width * 0.51): #raise ValueError("Bandwidth of files is smaller than 51% of the minimum frequency step between two files! (More than about half the data is missing.)") logging.warning( "Bandwidth of files is smaller than 51% of the minimum frequency step between two files! (More than about half the data is missing.)" ) #the new output map filemap = MultiDataMap() groupmap = DataMap() # add 1% of the SB badwidth in case maxfreq might be "exactly" on a group-border maxfreq = np.max(freqliste) + freq_width * 0.51 if firstSB != None: if freqliste[0] < 100e6: # LBA Data minfreq = (float(firstSB) / 512. * 100e6) - freq_width / 2. elif freqliste[0] > 100e6 and freqliste[0] < 200e6: # HBA-Low minfreq = (float(firstSB) / 512. * 100e6) + 100e6 - freq_width / 2. elif freqliste[0] > 200e6 and freqliste[0] < 300e6: # HBA-high minfreq = (float(firstSB) / 512. * 100e6) + 200e6 - freq_width / 2. else: raise ValueError( 'sort_times_into_freqGroups: Frequency of lowest input data is higher than 300MHz!' ) if np.min(freqliste) < minfreq: raise ValueError( 'sort_times_into_freqGroups: Frequency of lowest input data is lower than reference frequency!' ) else: minfreq = np.min(freqliste) - freq_width / 2. groupBW = freq_width * numSB if groupBW < 1e6 and groupBW > 0: print( 'sort_times_into_freqGroups: ***WARNING***: Bandwidth of concatenated MS is lower than 1 MHz. This may cause conflicts with the concatenated file names!' ) if groupBW < 0: # this is the case for concatenating all subbands groupBW = maxfreq - minfreq truncateLastSBs = input2bool(False) NDPPPfill = input2bool(True) freqborders = np.arange(minfreq, maxfreq, groupBW) if mergeLastGroup: freqborders[-1] = maxfreq elif truncateLastSBs: pass #nothing to do! # left to make the logic more clear! elif not truncateLastSBs and NDPPPfill: freqborders = np.append(freqborders, (freqborders[-1] + groupBW)) elif not truncateLastSBs and not NDPPPfill: freqborders = np.append(freqborders, maxfreq) freqborders = freqborders[freqborders > (np.min(freqliste) - groupBW)] ngroups = len(freqborders) - 1 if ngroups == 0: raise ValueError( 'sort_times_into_freqGroups: Not enough input subbands to create at least one full (frequency-)group!' ) print("sort_times_into_freqGroups: Will create", ngroups, "group(s) with", numSB, "file(s) each.") hostID = 0 for time in timestamps: (freq, fname) = time_groups[time]['freq_names'].pop(0) for groupIdx in range(ngroups): files = [] skip_this = True filefreqs_low = np.arange(freqborders[groupIdx], freqborders[groupIdx + 1], freq_width) for lower_freq in filefreqs_low: if freq > lower_freq and freq < lower_freq + freq_width: assert freq != 1e12 files.append(fname) if len(time_groups[time]['freq_names']) > 0: (freq, fname) = time_groups[time]['freq_names'].pop(0) else: (freq, fname) = (1e12, 'This_shouldn\'t_show_up') skip_this = False elif NDPPPfill: files.append('dummy.ms') if not skip_this: filemap.append( MultiDataProduct(hosts[hostID % numhosts], files, skip_this)) freqID = int( (freqborders[groupIdx] + freqborders[groupIdx + 1]) / 2e6) groupname = time_groups[time]['basename'] + '_%Xt_%dMHz.ms' % ( time, freqID) if type(stepname) is str: groupname += stepname if type(target_path) is str: groupname = os.path.join(target_path, os.path.basename(groupname)) groupmap.append( DataProduct(hosts[hostID % numhosts], groupname, skip_this)) orphan_files = len(time_groups[time]['freq_names']) if freq < 1e12: orphan_files += 1 if orphan_files > 0: print( "sort_times_into_freqGroups: Had %d unassigned files in time-group %xt." % (orphan_files, time)) filemapname = os.path.join(mapfile_dir, filename) filemap.save(filemapname) groupmapname = os.path.join(mapfile_dir, filename + '_groups') groupmap.save(groupmapname) # genertate map with edge-channels to flag flagmap = _calc_edge_chans(filemap, nchans) flagmapname = os.path.join(mapfile_dir, filename + '_flags') flagmap.save(flagmapname) result = { 'mapfile': filemapname, 'groupmapfile': groupmapname, 'flagmapfile': flagmapname } return (result)
def main(ms_input, filename=None, mapfile_dir=None, numSB=-1, hosts=None, NDPPPfill=True, target_path=None, stepname=None): """ Check a list of MS files for missing frequencies Parameters ---------- ms_input : list or str List of MS filenames, or string with list, or path to a mapfile filename: str Name of output mapfile mapfile_dir : str Directory for output mapfile numSB : int, optional How many files should go into one frequency group. Values <= 0 mean put all files of the same time-step into one group. default = -1 hosts : list or str List of hostnames or string with list of hostnames NDPPPfill : bool, optional Add dummy file-names for missing frequencies, so that NDPPP can fill the data with flagged dummy data. default = True target_path : str, optional Change the path of the "groups" files to this. (I.e. write output files into this directory with the subsequent NDPPP call.) default = keep path of input files stepname : str, optional Add this step-name into the file-names of the output files. Returns ------- result : dict Dict with the name of the generated mapfile """ if not filename or not mapfile_dir: raise ValueError('sort_times_into_freqGroups: filename and mapfile_dir are needed!') # convert input to needed types ms_list = input2strlist(ms_input) NDPPPfill = input2bool(NDPPPfill) if type(hosts) is str: hosts = [h.strip(' \'\"') for h in hosts.strip('[]').split(',')] if not hosts: hosts = ['localhost'] numhosts = len(hosts) print "sort_times_into_freqGroups: Working on",len(ms_list),"files" time_groups = {} # sort by time for i, ms in enumerate(ms_list): # use the slower but more reliable way: obstable = pt.table(ms, ack=False) timestamp = int(round(np.min(obstable.getcol('TIME')))) #obstable = pt.table(ms+'::OBSERVATION', ack=False) #timestamp = int(round(obstable.col('TIME_RANGE')[0][0])) obstable.close() if timestamp in time_groups: time_groups[timestamp]['files'].append(ms) else: time_groups[timestamp] = {'files': [ ms ], 'basename' : os.path.splitext(ms)[0] } print "sort_times_into_freqGroups: found",len(time_groups),"time-groups" # sort time-groups by frequency timestamps = time_groups.keys() timestamps.sort() # not needed now, but later first = True for time in timestamps: freqs = [] for ms in time_groups[time]['files']: # Get the frequency info sw = pt.table(ms+'::SPECTRAL_WINDOW', ack=False) freq = sw.col('REF_FREQUENCY')[0] if first: freq_width = sw.col('TOTAL_BANDWIDTH')[0] maxfreq = freq minfreq = freq first = False else: assert freq_width == sw.col('TOTAL_BANDWIDTH')[0] maxfreq = max(maxfreq,freq) minfreq = min(minfreq,freq) freqs.append(freq) sw.close() time_groups[time]['freq_names'] = zip(freqs,time_groups[time]['files']) time_groups[time]['freq_names'].sort(key=lambda pair: pair[0]) #time_groups[time]['files'] = [name for (freq,name) in freq_names] #time_groups[time]['freqs'] = [freq for (freq,name) in freq_names] print "sort_times_into_freqGroups: Collected the frequencies for the time-groups" #the new output map filemap = MultiDataMap() groupmap = DataMap() maxfreq = maxfreq+freq_width/2. minfreq = minfreq-freq_width/2. numFiles = round((maxfreq-minfreq)/freq_width) if numSB > 0: ngroups = int(np.ceil(numFiles/numSB)) else: ngroups = 1 numSB = int(numFiles) hostID = 0 for time in timestamps: (freq,fname) = time_groups[time]['freq_names'].pop(0) for fgroup in range(ngroups): files = [] skip_this = True for fIdx in range(numSB): if freq > (fIdx+fgroup*numSB+1)*freq_width+minfreq: files.append('dummy.ms') else: files.append(fname) if len(time_groups[time]['freq_names'])>0: (freq,fname) = time_groups[time]['freq_names'].pop(0) else: (freq,fname) = (1e12,'This_shouldn\'t_show_up') skip_this = False filemap.append(MultiDataProduct(hosts[hostID%numhosts], files, skip_this)) groupname = time_groups[time]['basename']+'_%Xt_%dg.ms'%(time,fgroup) if type(stepname) is str: groupname += stepname if type(target_path) is str: groupname = os.path.join(target_path,os.path.basename(groupname)) groupmap.append(DataProduct(hosts[hostID%numhosts],groupname, skip_this)) hostID += 1 assert freq==1e12 filemapname = os.path.join(mapfile_dir, filename) filemap.save(filemapname) groupmapname = os.path.join(mapfile_dir, filename+'_groups') groupmap.save(groupmapname) result = {'mapfile': filemapname, 'groupmapfile': groupmapname} return result